From dd71bde9b0a7eb7dce534cba7180899cbf50b02e Mon Sep 17 00:00:00 2001 From: "E. Joshua Rigler" <erigler@usgs.gov> Date: Tue, 28 May 2024 14:10:01 -0600 Subject: [PATCH] Pull inputs in run(), not run_as_update() For years, when invoking geomag-algorithms' update mechanism, inputs were pulled and checked from the run_as_update() method inside the Controller class. I never understood this. It seemed to break the logic of the update mechanism if no inputs were available for the current inteval/gap. This might even be the source of noted issues where running back-filling scripts didn't behave as expected, and those scripts needed to be run multiple times. What's more, the run_as_update() method logically seems like the most appropriate place to read *outputs* and check for gaps. Whereas the run() method seems like the most appropriate place to read inputs, apply algorithms, and write out outputs. In any case, this change should not break any existing code. It should only allow the update mechanism to complete, every time, and as originally intended, rather than be short-circuited when input data are missing for the current interval (but might be available for previous intervals).. --- geomagio/Controller.py | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/geomagio/Controller.py b/geomagio/Controller.py index 9d50ceb2..d9feb299 100644 --- a/geomagio/Controller.py +++ b/geomagio/Controller.py @@ -285,8 +285,12 @@ class Controller(object): channels=input_channels, interval=input_interval, ) - if timeseries.count() == 0: - # no data to process + if not algorithm.can_produce_data( + starttime=timeseries[0].stats.starttime, + endtime=timeseries[0].stats.endtime, + stream=timeseries, + ): + # don't process if nothing will be produced return # pre-process if next_starttime and realtime: @@ -416,18 +420,6 @@ class Controller(object): ] ] for output_gap in output_gaps: - input_timeseries = self._get_input_timeseries( - algorithm=algorithm, - observatory=observatory, - starttime=output_gap[0], - endtime=output_gap[1], - channels=input_channels, - interval=input_interval, - ) - if not algorithm.can_produce_data( - starttime=output_gap[0], endtime=output_gap[1], stream=input_timeseries - ): - continue # check for fillable gap at start if output_gap[0] == starttime: # found fillable gap at start, recurse to previous interval @@ -469,7 +461,6 @@ class Controller(object): starttime=gap_starttime, endtime=gap_endtime, input_channels=input_channels, - input_timeseries=input_timeseries, output_channels=output_channels, input_interval=input_interval, output_interval=output_interval, -- GitLab