From dd71bde9b0a7eb7dce534cba7180899cbf50b02e Mon Sep 17 00:00:00 2001
From: "E. Joshua Rigler" <erigler@usgs.gov>
Date: Tue, 28 May 2024 14:10:01 -0600
Subject: [PATCH 1/2] Pull inputs in run(), not run_as_update()

For years, when invoking geomag-algorithms' update mechanism, inputs
were pulled and checked from the run_as_update() method inside the
Controller class. I never understood this. It seemed to break the
logic of the update mechanism if no inputs were available for the
current inteval/gap. This might even be the source of noted issues
where running back-filling scripts didn't behave as expected, and
those scripts needed to be run multiple times.

What's more, the run_as_update() method logically seems like the
most appropriate place to read *outputs* and check for gaps. Whereas
the run() method seems like the most appropriate place to read
inputs, apply algorithms, and write out outputs.

In any case, this change should not break any existing code. It
should only allow the update mechanism to complete, every time, and
as originally intended, rather than be short-circuited when input
data are missing for the current interval (but might be available
for previous intervals)..
---
 geomagio/Controller.py | 21 ++++++---------------
 1 file changed, 6 insertions(+), 15 deletions(-)

diff --git a/geomagio/Controller.py b/geomagio/Controller.py
index 9d50ceb2..d9feb299 100644
--- a/geomagio/Controller.py
+++ b/geomagio/Controller.py
@@ -285,8 +285,12 @@ class Controller(object):
             channels=input_channels,
             interval=input_interval,
         )
-        if timeseries.count() == 0:
-            # no data to process
+        if not algorithm.can_produce_data(
+            starttime=timeseries[0].stats.starttime,
+            endtime=timeseries[0].stats.endtime,
+            stream=timeseries,
+        ):
+            # don't process if nothing will be produced
             return
         # pre-process
         if next_starttime and realtime:
@@ -416,18 +420,6 @@ class Controller(object):
                 ]
             ]
         for output_gap in output_gaps:
-            input_timeseries = self._get_input_timeseries(
-                algorithm=algorithm,
-                observatory=observatory,
-                starttime=output_gap[0],
-                endtime=output_gap[1],
-                channels=input_channels,
-                interval=input_interval,
-            )
-            if not algorithm.can_produce_data(
-                starttime=output_gap[0], endtime=output_gap[1], stream=input_timeseries
-            ):
-                continue
             # check for fillable gap at start
             if output_gap[0] == starttime:
                 # found fillable gap at start, recurse to previous interval
@@ -469,7 +461,6 @@ class Controller(object):
                 starttime=gap_starttime,
                 endtime=gap_endtime,
                 input_channels=input_channels,
-                input_timeseries=input_timeseries,
                 output_channels=output_channels,
                 input_interval=input_interval,
                 output_interval=output_interval,
-- 
GitLab


From 2bd15768bc0354f6d0f9beceb550e02bbe5c43f4 Mon Sep 17 00:00:00 2001
From: "E. Joshua Rigler" <erigler@usgs.gov>
Date: Tue, 28 May 2024 16:15:09 -0600
Subject: [PATCH 2/2] Add can_produce_data() method to SqDistAlgorithm

This simply returns True from can_produce_data(), which is appropriate
because 1) a stateful algorithm should be able to procude data as long
as it starts from a valid state, adn 2) SqDistAlgorithm itself checks
this state. If we ever implement a different stateful algorithm, it
should do something similar.
---
 geomagio/algorithm/SqDistAlgorithm.py | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/geomagio/algorithm/SqDistAlgorithm.py b/geomagio/algorithm/SqDistAlgorithm.py
index 246baa05..5855ab87 100644
--- a/geomagio/algorithm/SqDistAlgorithm.py
+++ b/geomagio/algorithm/SqDistAlgorithm.py
@@ -306,6 +306,25 @@ class SqDistAlgorithm(Algorithm):
         out += self.create_trace(channel + "_Sigma", trace.stats, sigmahat)
         return out
 
+    def can_produce_data(self, starttime, endtime, stream):
+        """Can Produce data
+
+        A stateful algorithm can produce data as long as it starts with a valid
+        state. Such a check is performed in SqDistAlgorithm.process_one(), so
+        there is little need to reproduce that here, and we simply return True,
+        thereby overiding the base Algorithm class's default behavior.
+
+        Parameters
+        ----------
+        starttime: UTCDateTime
+            start time of requested output
+        end : UTCDateTime
+            end time of requested output
+        stream: obspy.core.Stream
+            The input stream we want to make certain has data for the algorithm
+        """
+        return True
+
     @classmethod
     def additive(
         cls,
-- 
GitLab