From 69637b4d88115a64e6d22fc4c48b6ee9f809a20e Mon Sep 17 00:00:00 2001
From: Alex Wernle <awernle@usgs.gov>
Date: Tue, 1 Nov 2022 12:40:25 -0600
Subject: [PATCH] Changed dst_tot to calculate average regardless of nan values
 and added a new stream that counts available observatories pertimestep

---
 geomagio/algorithm/AverageAlgorithm.py       | 23 ++++++++++++--------
 test/algorithm_test/AverageAlgorithm_test.py | 15 +++++++------
 2 files changed, 22 insertions(+), 16 deletions(-)

diff --git a/geomagio/algorithm/AverageAlgorithm.py b/geomagio/algorithm/AverageAlgorithm.py
index d6536c76b..9dbf6f7bf 100644
--- a/geomagio/algorithm/AverageAlgorithm.py
+++ b/geomagio/algorithm/AverageAlgorithm.py
@@ -63,18 +63,14 @@ class AverageAlgorithm(Algorithm):
             if ts.stats.npts != self._npts:
                 raise AlgorithmException("Received timeseries have different lengths")
 
-            if numpy.isnan(ts.data).all():
-                raise AlgorithmException(
-                    "Trace for %s observatory is completely empty." % (ts.stats.station)
-                )
-
             if ts.stats.starttime != self._stt:
                 raise AlgorithmException(
                     "Received timeseries have different starttimes"
                 )
 
     def process(self, timeseries):
-        """averages a channel across multiple stations
+        """averages a channel across multiple stations and counts available 
+        observatories per timestep
 
         Parameters
         ----------
@@ -82,7 +78,7 @@ class AverageAlgorithm(Algorithm):
         Returns
         -------
         out_stream:
-            new stream object containing the averaged values.
+            new stream object containing the averaged values and available observatories.
         """
         self.observatories = self.observatories or [t.stats.station for t in timeseries]
 
@@ -116,9 +112,13 @@ class AverageAlgorithm(Algorithm):
             combined.append(ts.data * latcorr)
 
         # after looping over stations, compute average
-        dst_tot = numpy.mean(combined, axis=0)
+        dst_tot = numpy.nanmean(combined, axis=0)
 
-        # Create a stream from the trace function
+        # Stack timeseries and count non-nan values per timestep
+        ts_counter = numpy.column_stack(timeseries)
+        channel_count = numpy.count_nonzero(~numpy.isnan(ts_counter), axis=1)
+
+        # create metadata
         new_stats = obspy.core.Stats()
         new_stats.station = "USGS"
         new_stats.channel = self.outchannel
@@ -127,7 +127,12 @@ class AverageAlgorithm(Algorithm):
         new_stats.starttime = timeseries[0].stats.starttime
         new_stats.npts = timeseries[0].stats.npts
         new_stats.delta = timeseries[0].stats.delta
+        new_stats2 = new_stats.copy()
+        new_stats2.channel = new_stats.channel + "_count"
+
+        # create streams from the trace function
         stream = obspy.core.Stream((obspy.core.Trace(dst_tot, new_stats),))
+        stream += obspy.core.Trace(numpy.asarray(channel_count), new_stats2)
 
         # return averaged values as a stream
         return stream
diff --git a/test/algorithm_test/AverageAlgorithm_test.py b/test/algorithm_test/AverageAlgorithm_test.py
index a93540cd9..1a0038126 100644
--- a/test/algorithm_test/AverageAlgorithm_test.py
+++ b/test/algorithm_test/AverageAlgorithm_test.py
@@ -8,18 +8,19 @@ from numpy.testing import assert_array_equal, assert_equal
 
 def test_process():
     """AverageAlgorithm_test.test_process()
-    confirms that the output of the algorithm is the average of three
-    different stations.
+    confirms that the output of the algorithm is the average of two 
+    stations and ignores the third.
 
     """
-    # Make three test data arrays of 1's, 3's, and 5's
-    # the average of the three arrays should be 3
-    test_data1 = np.ones(5)
+    # Make three test data arrays of NAN's, 3's, and 5's
+    # the average of the three arrays should be 4
+    test_data1 = np.zeros(5)
+    test_data1[:] = np.nan
     test_data2 = np.ones(5) * 5
     test_data3 = np.ones(5) * 3
 
-    # Create expected solution array of 3's
-    expected_solution = np.ones(5) * 3
+    # Create expected solution array of 4's
+    expected_solution = np.ones(5) * 4
 
     # Create timeseries with first trace that uses test_data1
     timeseries = Stream()
-- 
GitLab