From aa9bac5e7b578960e1eb8fbee12e794f2d40fcd3 Mon Sep 17 00:00:00 2001
From: Jeremy Fee <jmfee@usgs.gov>
Date: Fri, 14 Feb 2020 13:41:44 -0700
Subject: [PATCH] Remove conversion factor arguments, add tests, fix how
 metadata is accessed and make convert method "public"

---
 geomagio/Controller.py           |  19 +++--
 geomagio/edge/MiniSeedFactory.py | 135 ++++++++++++++++---------------
 test/Metadata_test.py            |  79 ++++++++++++++++++
 3 files changed, 158 insertions(+), 75 deletions(-)
 create mode 100644 test/Metadata_test.py

diff --git a/geomagio/Controller.py b/geomagio/Controller.py
index 6b95ff5e5..d2d5601cd 100644
--- a/geomagio/Controller.py
+++ b/geomagio/Controller.py
@@ -334,8 +334,6 @@ def get_input_factory(args):
                 port=args.input_port,
                 locationCode=args.locationcode,
                 convert_channels=args.convert_voltbin,
-                volt_conv=args.volt_conversion,
-                bin_conv=args.bin_conversion,
                 **input_factory_args)
     elif input_type == 'goes':
         # TODO: deal with other goes arguments
@@ -690,14 +688,6 @@ def parse_args(args):
                     --outchannels U
                     "
                 """)
-    input_group.add_argument('--volt-conversion',
-            default=100.0,
-            metavar='NT',
-            help='Conversion factor (nT/V) for volts')
-    input_group.add_argument('--bin-conversion',
-            default=500.0,
-            metavar='NT',
-            help='Conversion factor (nT/bin) for bins')
 
     # Output group
     output_group = parser.add_argument_group('Output', 'How data is written.')
@@ -849,6 +839,15 @@ def parse_args(args):
     if '--enable-deprecated-arguments' in args:
         add_deprecated_args(deprecated, input_type_group, output_type_group)
 
+    deprecated.add_argument('--volt-conversion',
+            default=100.0,
+            metavar='NT',
+            help='(Deprecated, Unused) Conversion factor (nT/V) for volts')
+    deprecated.add_argument('--bin-conversion',
+            default=500.0,
+            metavar='NT',
+            help='(Deprecated, Unused) Conversion factor (nT/bin) for bins')
+
     return parser.parse_args(args)
 
 
diff --git a/geomagio/edge/MiniSeedFactory.py b/geomagio/edge/MiniSeedFactory.py
index 97edd82c1..a27f1580f 100644
--- a/geomagio/edge/MiniSeedFactory.py
+++ b/geomagio/edge/MiniSeedFactory.py
@@ -66,7 +66,7 @@ class MiniSeedFactory(TimeseriesFactory):
     def __init__(self, host='cwbpub.cr.usgs.gov', port=2061, write_port=7981,
             observatory=None, channels=None, type=None, interval=None,
             observatoryMetadata=None, locationCode=None,
-            convert_channels=None, volt_conv=100, bin_conv=500):
+            convert_channels=None):
         TimeseriesFactory.__init__(self, observatory, channels, type, interval)
 
         self.client = miniseed.Client(host, port)
@@ -77,8 +77,6 @@ class MiniSeedFactory(TimeseriesFactory):
         self.port = port
         self.write_port = write_port
         self.convert_channels = convert_channels
-        self.volt_conv = volt_conv
-        self.bin_conv = bin_conv
         self.write_client = MiniSeedInputClient(self.host, self.write_port)
 
     def get_timeseries(self, starttime, endtime, observatory=None,
@@ -185,6 +183,64 @@ class MiniSeedFactory(TimeseriesFactory):
         # close socket
         self.write_client.close()
 
+    def get_calculated_timeseries(self, starttime, endtime, observatory,
+            channel, type, interval, components):
+        """Calculate a single channel using multiple component channels.
+
+        Parameters
+        ----------
+        starttime: obspy.core.UTCDateTime
+            the starttime of the requested data
+        endtime: obspy.core.UTCDateTime
+            the endtime of the requested data
+        observatory : str
+            observatory code
+        channel : str
+            single character channel {H, E, D, Z, F}
+        type : str
+            data type {definitive, quasi-definitive, variation}
+        interval : str
+            interval length {'day', 'hour', 'minute', 'second', 'tenhertz'}
+        components: list
+            each component is a dictionary with the following keys:
+                channel: str
+                offset: float
+                scale: float
+
+        Returns
+        -------
+        obspy.core.trace
+            timeseries trace of the converted channel data
+        """
+        # sum channels
+        print(channel)
+        print(components)
+        stats = None
+        converted = None
+        for component in components:
+            # load component
+            data = self._get_timeseries(starttime, endtime, observatory,
+                        component["channel"], type, interval)[0]
+            print(data)
+            # convert to nT
+            nt = data.data * component["scale"] + component["offset"]
+            print(nt)
+            # add to converted
+            if converted is None:
+                converted = nt
+                stats = obspy.core.Stats(data.stats)
+            else:
+                converted += nt
+        # set channel parameter to U, V, or W
+        stats.channel = channel
+        # create empty trace with adapted stats
+        out = TimeseriesUtility.create_empty_trace(stats.starttime,
+                stats.endtime, stats.station, stats.channel,
+                stats.data_type, stats.data_interval,
+                stats.network, stats.station, stats.location)
+        out.data = converted
+        return out
+
     def _convert_stream_to_masked(self, timeseries, channel):
         """convert geomag edge traces in a timeseries stream to a MaskedArray
             This allows for gaps and splitting.
@@ -482,80 +538,29 @@ class MiniSeedFactory(TimeseriesFactory):
         out = obspy.core.Stream()
         metadata = get_instrument(observatory, starttime, endtime)
         # loop in case request spans different configurations
-        for instrument in metadata:
+        for entry in metadata:
+            entry_endtime = entry["end_time"]
+            entry_starttime = entry["start_time"]
+            instrument = entry["instrument"]
             instrument_channels = instrument["channels"]
-            instrument_endtime = instrument["end_time"]
-            instrument_starttime = instrument["start_time"]
             if channel not in instrument_channels:
                 # no idea how to convert
                 continue
             # determine metadata overlap with request
             start = (starttime
-                    if instrument_starttime is None or
-                        instrument_starttime < starttime
-                    else instrument_starttime)
+                    if entry_starttime is None or
+                        entry_starttime < starttime
+                    else entry_starttime)
             end = (endtime
-                    if instrument_endtime is None or
-                        instrument_endtime > endtime
-                    else instrument_endtime)
+                    if entry_endtime is None or
+                        entry_endtime > endtime
+                    else entry_endtime)
             # now convert
-            out += self._get_converted_timeseries(start, end,
+            out += self.get_calculated_timeseries(start, end,
                     observatory, channel, type, interval,
                     instrument_channels[channel])
         return out
 
-    def _get_converted_timeseries(self, starttime, endtime, observatory,
-            channel, type, interval, components):
-        """Generate a single channel using multiple components.
-
-        Parameters
-        ----------
-        starttime: obspy.core.UTCDateTime
-            the starttime of the requested data
-        endtime: obspy.core.UTCDateTime
-            the endtime of the requested data
-        observatory : str
-            observatory code
-        channel : str
-            single character channel {H, E, D, Z, F}
-        type : str
-            data type {definitive, quasi-definitive, variation}
-        interval : str
-            interval length {'day', 'hour', 'minute', 'second', 'tenhertz'}
-        components: list
-            each component is a dictionary with the following keys:
-                channel: str
-                offset: float
-                scale: float
-
-        Returns
-        -------
-        obspy.core.trace
-            timeseries trace of the converted channel data
-        """
-        # sum channels
-        stats = None
-        converted = None
-        for component in components:
-            # load component
-            data = self._get_timeseries(starttime, endtime, observatory,
-                        component["channel"], type, interval)
-            # save stats from first component
-            stats = stats or obspy.core.Stats(data.stats)
-            # convert to nT
-            nt = data.data * component["scale"] + component["offset"]
-            # add to converted
-            converted = converted and converted + nt or nt
-        # set channel parameter to U, V, or W
-        stats.channel = channel
-        # create empty trace with adapted stats
-        out = TimeseriesUtility.create_empty_trace(stats.starttime,
-                stats.endtime, stats.station, stats.channel,
-                stats.data_type, stats.data_interval,
-                stats.network, stats.station, stats.location)
-        out.data = converted
-        return out
-
     def _post_process(self, timeseries, starttime, endtime, channels):
         """Post process a timeseries stream after the raw data is
                 is fetched from querymom. Specifically changes
diff --git a/test/Metadata_test.py b/test/Metadata_test.py
new file mode 100644
index 000000000..01e5e4171
--- /dev/null
+++ b/test/Metadata_test.py
@@ -0,0 +1,79 @@
+from obspy import UTCDateTime
+from geomagio.Metadata import get_instrument
+from numpy.testing import assert_equal
+
+
+METADATA1 = {
+    "station": "TST",
+    "start_time": None,
+    "end_time": UTCDateTime("2020-02-02T00:00:00Z"),
+}
+
+METADATA2 = {
+    "station": "TST",
+    "start_time": UTCDateTime("2020-02-02T00:00:00Z"),
+    "end_time": UTCDateTime("2020-02-03T00:00:00Z"),
+}
+
+METADATA3 = {
+    "station": "TST",
+    "start_time": UTCDateTime("2020-02-03T00:00:00Z"),
+    "end_time": None,
+}
+
+TEST_METADATA = [METADATA1, METADATA2, METADATA3]
+
+
+def test_get_instrument_after():
+    """Request an interval after the last entry, that has start_time None"""
+    matches = get_instrument(
+        "TST",
+        UTCDateTime("2021-02-02T00:00:00Z"),
+        UTCDateTime("2022-01-02T00:00:00Z"),
+        TEST_METADATA,
+    )
+    assert_equal(matches, [METADATA3])
+
+
+def test_get_instrument_before():
+    """Request an interval before the first entry, that has start_time None"""
+    matches = get_instrument(
+        "TST",
+        UTCDateTime("2019-02-02T00:00:00Z"),
+        UTCDateTime("2020-01-02T00:00:00Z"),
+        TEST_METADATA,
+    )
+    assert_equal(matches, [METADATA1])
+
+
+def test_get_instrument_inside():
+    """Request an interval that is wholly contained by one entry"""
+    matches = get_instrument(
+        "TST",
+        UTCDateTime("2020-02-02T01:00:00Z"),
+        UTCDateTime("2020-02-02T02:00:00Z"),
+        TEST_METADATA,
+    )
+    assert_equal(matches, [METADATA2])
+
+
+def test_get_instrument_span():
+    """Request a time interval that spans multiple entries"""
+    matches = get_instrument(
+        "TST",
+        UTCDateTime("2020-01-02T00:00:00Z"),
+        UTCDateTime("2020-02-02T01:00:00Z"),
+        TEST_METADATA,
+    )
+    assert_equal(matches, [METADATA1, METADATA2])
+
+
+def test_get_instrument_unknown():
+    """Request an unknown observatory"""
+    matches = get_instrument(
+        "OTHER",
+        UTCDateTime("2020-01-02T00:00:00Z"),
+        UTCDateTime("2020-02-02T01:00:00Z"),
+        TEST_METADATA,
+    )
+    assert_equal(matches, [])
-- 
GitLab