diff --git a/etc/filter/BOU20200831vday.day b/etc/filter/BOU20200831vday.day new file mode 100644 index 0000000000000000000000000000000000000000..94858b73c0a6ade10b0f89955ec2f3fa866efc70 --- /dev/null +++ b/etc/filter/BOU20200831vday.day @@ -0,0 +1,26 @@ + Format IAGA-2002 | + Source of Data United States Geological Survey (USGS) | + Station Name Boulder | + IAGA CODE BOU | + Geodetic Latitude 40.137 | + Geodetic Longitude 254.763 | + Elevation 1682 | + Reported HEZF | + Sensor Orientation HDZF | + Digital Sampling 0.01 second | + Data Interval Type filtered 1-minute (00:15-01:45) | + Data Type variation | + # DECBAS 5527 (Baseline declination value in | + # tenths of minutes East (0-216,000)). | + # Vector 1-minute values are computed from 1-second values using | + # the INTERMAGNET gaussian filter centered on the minute. Scalar | + # 1-minute values are computed from 1-second values using the | + # INTERMAGNET gaussian filter centered on the minute. | + # CONDITIONS OF USE: The Conditions of Use for data provided | + # through INTERMAGNET and acknowledgement templates can be found at | + # www.intermagnet.org | +DATE TIME DOY BOUH BOUE BOUZ BOUF | +2020-08-27 11:59:30.000 240 20817.44 -110.62 46800.86 51739.31 +2020-08-28 11:59:30.000 241 20817.73 -111.55 46799.29 51738.09 +2020-08-29 11:59:30.000 242 20804.21 -109.16 46800.15 51733.70 +2020-08-30 11:59:30.000 243 20808.16 -109.66 46803.17 51738.29 diff --git a/etc/filter/BOU20200831vhor.hor b/etc/filter/BOU20200831vhor.hor new file mode 100644 index 0000000000000000000000000000000000000000..c721fe096ba82490efbb294325da3986d9163b63 --- /dev/null +++ b/etc/filter/BOU20200831vhor.hor @@ -0,0 +1,26 @@ + Format IAGA-2002 | + Source of Data United States Geological Survey (USGS) | + Station Name Boulder | + IAGA CODE BOU | + Geodetic Latitude 40.137 | + Geodetic Longitude 254.763 | + Elevation 1682 | + Reported HEZF | + Sensor Orientation HDZF | + Digital Sampling 0.01 second | + Data Interval Type filtered 1-minute (00:15-01:45) | + Data Type variation | + # DECBAS 5527 (Baseline declination value in | + # tenths of minutes East (0-216,000)). | + # Vector 1-minute values are computed from 1-second values using | + # the INTERMAGNET gaussian filter centered on the minute. Scalar | + # 1-minute values are computed from 1-second values using the | + # INTERMAGNET gaussian filter centered on the minute. | + # CONDITIONS OF USE: The Conditions of Use for data provided | + # through INTERMAGNET and acknowledgement templates can be found at | + # www.intermagnet.org | +DATE TIME DOY BOUH BOUE BOUZ BOUF | +2020-08-31 00:29:30.000 244 20778.61 -99.10 46814.71 51737.42 +2020-08-31 01:29:30.000 244 20777.91 -102.77 46814.63 51737.11 +2020-08-31 02:29:30.000 244 20789.41 -96.59 46814.35 51741.37 +2020-08-31 03:29:30.000 244 20813.68 -73.96 46808.12 51745.12 diff --git a/etc/filter/LLO20200106vhor.hor b/etc/filter/LLO20200106vhor.hor deleted file mode 100644 index bb6a60d077cd341939eff9e729a2c40585b85e76..0000000000000000000000000000000000000000 --- a/etc/filter/LLO20200106vhor.hor +++ /dev/null @@ -1,9 +0,0 @@ - Format IAGA-2002 | - IAGA CODE LLO | - Reported UVWNUL | -DATE TIME DOY LLOU LLOV LLOW LLONUL | -2020-01-06 00:00:00.000 006 8330.00 -18971.07 39294.07 99999.00 -2020-01-06 01:00:00.000 006 8341.78 -18959.85 39294.98 99999.00 -2020-01-06 02:00:00.000 006 8363.88 -18962.53 39294.24 99999.00 -2020-01-06 03:00:00.000 006 8305.76 -18932.89 39305.85 99999.00 -2020-01-06 04:00:00.000 006 8320.99 -18990.56 39275.92 99999.00 diff --git a/etc/filter/coeffs.json b/etc/filter/coeffs.json index 32708e153259678d2223b2fac9552964f2fbd4bb..e5c7b364cf101f29c4527535ac33441d55ac0a28 100644 --- a/etc/filter/coeffs.json +++ b/etc/filter/coeffs.json @@ -123,5 +123,6 @@ -0.000004277196717501353, -0.0000012056376791654907, 7.2383512291811e-20 - ] + ], + "type": "firfilter" } \ No newline at end of file diff --git a/etc/filter/day_filter_min.mseed b/etc/filter/day_filter_min.mseed new file mode 100644 index 0000000000000000000000000000000000000000..412c009d4a756661707eee3533de147cff41957b Binary files /dev/null and b/etc/filter/day_filter_min.mseed differ diff --git a/etc/filter/hor_filter_min.mseed b/etc/filter/hor_filter_min.mseed new file mode 100644 index 0000000000000000000000000000000000000000..1917ade005d7bc1518a8f073c38ee86acf450fc0 Binary files /dev/null and b/etc/filter/hor_filter_min.mseed differ diff --git a/geomagio/algorithm/FilterAlgorithm.py b/geomagio/algorithm/FilterAlgorithm.py index 99b7177bdafa51ec4541f24d12193cb498f92eae..49353c0cd980c592d4d64c46fed27ddb178e4dac 100644 --- a/geomagio/algorithm/FilterAlgorithm.py +++ b/geomagio/algorithm/FilterAlgorithm.py @@ -17,22 +17,76 @@ STEPS = [ "input_sample_period": 0.1, "output_sample_period": 1.0, "window": sps.firwin(123, 0.25, window="blackman", fs=10.0), + "type": "firfilter", }, { # one second to one minute filter "name": "Intermagnet One Minute", "input_sample_period": 1.0, "output_sample_period": 60.0, "window": sps.get_window(window=("gaussian", 15.8734), Nx=91), + "type": "firfilter", }, { # one minute to one hour filter "name": "One Hour", "input_sample_period": 60.0, "output_sample_period": 3600.0, - "window": sps.windows.boxcar(91), + "window": sps.windows.boxcar(60), + "type": "average", + }, + { # one minute to one hour filter + "name": "One Day", + "input_sample_period": 60.0, + "output_sample_period": 86400, + "window": sps.windows.boxcar(1440), + "type": "average", }, ] +def get_nearest_time(step, output_time, left=True): + interval_start = output_time - ( + output_time.timestamp % step["output_sample_period"] + ) + # shift interval right if needed + if interval_start != output_time and not left: + interval_start += step["output_sample_period"] + # position center of filter, data around interval + half_width = get_step_time_shift(step) + if step["type"] == "average": + filter_center = interval_start + half_width + data_start = interval_start + data_end = (interval_start + step["output_sample_period"]) - step[ + "input_sample_period" + ] + else: + filter_center = interval_start + data_start = filter_center - half_width + data_end = filter_center + half_width + return { + "time": filter_center, + "data_start": data_start, + "data_end": data_end, + } + + +def get_step_time_shift(step): + """Calculates the time shift generated in each filtering step + + Parameters + ---------- + step: dict + Dictionary object holding information about a given filter step + Returns + ------- + shift: float + Time shift value + """ + input_sample_period = step["input_sample_period"] + numtaps = len(step["window"]) + shift = input_sample_period * ((numtaps - 1) / 2) + return shift + + class FilterAlgorithm(Algorithm): """ Filter Algorithm that filters and downsamples data @@ -58,7 +112,7 @@ class FilterAlgorithm(Algorithm): self.load_state() # ensure correctly aligned coefficients in each step self.steps = ( - self.steps and [self._prepare_step(step) for step in self.steps] or [] + self.steps and [self._validate_step(step) for step in self.steps] or [] ) def load_state(self): @@ -78,6 +132,7 @@ class FilterAlgorithm(Algorithm): "input_sample_period": self.input_sample_period, "output_sample_period": self.output_sample_period, "window": data["window"], + "type": data["type"], } ] @@ -87,7 +142,7 @@ class FilterAlgorithm(Algorithm): """ if self.coeff_filename is None: return - data = {"window": list(self.window)} + data = {"window": list(self.window), "type": self.type} with open(self.coeff_filename, "w") as f: f.write(json.dumps(data)) @@ -105,22 +160,24 @@ class FilterAlgorithm(Algorithm): steps = [] for step in STEPS: - if self.input_sample_period <= step["input_sample_period"]: - if self.output_sample_period >= step["output_sample_period"]: - steps.append(step) + if ( + self.input_sample_period <= step["input_sample_period"] + and self.output_sample_period >= step["output_sample_period"] + ): + if ( + step["type"] == "average" + and step["output_sample_period"] != self.output_sample_period + ): + continue + steps.append(step) return steps - def _prepare_step(self, step) -> Dict: - window = step["window"] - if len(window) % 2 == 1: + def _validate_step(self, step): + """Verifies whether or not firfirlter steps have an odd number of coefficients""" + if step["type"] == "firfilter" and len(step["window"]) % 2 != 1: + raise ValueError("Firfilter requires an odd number of coefficients") + else: return step - sys.stderr.write("Even number of taps. Appending center coefficient.") - new_window = np.array(window) - i = len(window) // 2 - np.insert(new_window, i + 1, np.average(window[i : i + 2])) - new_step = dict(step) - new_step["window"] = new_window - return new_step def can_produce_data(self, starttime, endtime, stream): """Can Produce data @@ -202,39 +259,55 @@ class FilterAlgorithm(Algorithm): decimation = int(output_sample_period / input_sample_period) numtaps = len(window) window = window / sum(window) - # first output timestamp is in the center of the filter window - filter_time_shift = input_sample_period * (numtaps // 2) out = Stream() for trace in stream: - # data to filter - data = trace.data - starttime = trace.stats.starttime + filter_time_shift - # align with the output period - misalignment = starttime.timestamp % output_sample_period - if misalignment != 0: - # skip incomplete input - starttime = (starttime - misalignment) + output_sample_period - input_starttime = starttime - filter_time_shift - offset = int( - 1e-6 - + (input_starttime - trace.stats.starttime) / input_sample_period - ) - print( - f"Skipping {offset} input samples to align output", file=sys.stderr - ) - data = data[offset:] - # check there is still enough data for filter - if len(data) < numtaps: - continue + starttime, data = self.align_trace(step, trace) + # check that there is still enough data to filter + if len(data) < numtaps: + continue filtered = self.firfilter(data, window, decimation) stats = Stats(trace.stats) - stats.starttime = starttime stats.delta = output_sample_period + stats.starttime = starttime stats.npts = len(filtered) trace_out = self.create_trace(stats.channel, stats, filtered) out += trace_out return out + def align_trace(self, step, trace): + """Aligns trace to handle trailing or missing values. + Parameters + ---------- + step: dict + Dictionary object holding information about a given filter step + trace: obspy.core.trace + trace holding data and stats(starttime/endtime) to manipulate in alignment + Returns + ------- + filter_start["time"]: UTCDateTime + shifted time for filtered output + data: numpy array + trimmed data if input trace is misaligned + """ + data = trace.data + start = trace.stats.starttime + filter_start = get_nearest_time(step=step, output_time=start, left=False) + while filter_start["data_start"] < start: + # filter needs more data, shift one output right + filter_start = get_nearest_time( + step=step, + output_time=filter_start["time"] + step["output_sample_period"], + left=False, + ) + + if start != filter_start["data_start"]: + offset = int( + 1e-6 + + (filter_start["data_start"] - start) / step["input_sample_period"] + ) + data = data[offset:] + return filter_start["time"], data + @staticmethod def firfilter(data, window, step, allowed_bad=0.1): """Run fir filter for a numpy array. @@ -304,12 +377,11 @@ class FilterAlgorithm(Algorithm): end of input required to generate requested output. """ steps = self.get_filter_steps() - # calculate start/end from step array - for step in steps: - half = len(step["window"]) // 2 - half_step = half * step["input_sample_period"] - start = start - half_step - end = end + half_step + # calculate start/end from inverted step array + for step in reversed(steps): + start_interval = get_nearest_time(step=step, output_time=start, left=False) + end_interval = get_nearest_time(step=step, output_time=end, left=True) + start, end = start_interval["data_start"], end_interval["data_end"] return (start, end) @classmethod diff --git a/test/algorithm_test/FilterAlgorithm_test.py b/test/algorithm_test/FilterAlgorithm_test.py index d0062d357b80a1887c713fdc930053bab7972322..6a1947183d7d685bad3a6b2edec43bd51db86cc7 100644 --- a/test/algorithm_test/FilterAlgorithm_test.py +++ b/test/algorithm_test/FilterAlgorithm_test.py @@ -3,8 +3,9 @@ import json from numpy.testing import assert_almost_equal, assert_equal import numpy as np from obspy import read, UTCDateTime +import pytest -from geomagio.algorithm import FilterAlgorithm +from geomagio.algorithm.FilterAlgorithm import FilterAlgorithm, get_nearest_time import geomagio.iaga2002 as i2 @@ -48,6 +49,8 @@ def test_second(): assert_almost_equal(u_filt.data, u.data, 2) assert_almost_equal(v_filt.data, v.data, 2) assert_almost_equal(w_filt.data, w.data, 2) + assert_equal(filtered[0].stats.starttime, UTCDateTime("2020-01-06T00:00:00Z")) + assert_equal(filtered[0].stats.endtime, UTCDateTime("2020-01-06T04:00:00Z")) def test_minute(): @@ -89,47 +92,96 @@ def test_minute(): assert_almost_equal(u_filt.data, u.data, 2) assert_almost_equal(v_filt.data, v.data, 2) assert_almost_equal(w_filt.data, w.data, 2) + assert_equal(filtered[0].stats.starttime, UTCDateTime("2020-01-06T00:00:00Z")) + assert_equal(filtered[0].stats.endtime, UTCDateTime("2020-01-06T04:00:00Z")) def test_hour(): """algorithm_test.FilterAlgorithm_test.test_hour() - Tests algorithm for 10Hz to hour. + Tests algorithm for 1min to hour. """ - f = FilterAlgorithm(input_sample_period=0.1, output_sample_period=3600.0) + f = FilterAlgorithm(input_sample_period=60.0, output_sample_period=3600.0) - # generation of 10HZ_filter_hor.mseed - # starttime = UTCDateTime('2020-01-06T00:00:00Z') - # endtime = UTCDateTime('2020-01-06T04:00:00Z') - # m = MiniSeedFactory(port=2061, host='...', - # convert_channels=['U', 'V', 'W']) - # f = FilterAlgorithm(input_sample_period=0.1, + # generation of hor_filter_min.mseed + # starttime = UTCDateTime("2020-08-31T00:00:00Z") + # endtime = UTCDateTime("2020-08-31T03:00:00Z") + # e = EdgeFactory() + # f = FilterAlgorithm(input_sample_period=60.0, # output_sample_period=3600.0) # starttime, endtime = f.get_input_interval(starttime,endtime) - # LLO = m.get_timeseries(observatory='LLO', + # BOU = e.get_timeseries(observatory='BOU', # starttime=starttime,endtime=endtime, - # channels=['U_Volt', 'U_Bin', 'V_Volt', - # 'V_Bin', 'W_Volt', 'W_Bin'], - # interval='tenhertz', type='variaton') - # LLO.write('10HZ_filter_hor.mseed') + # channels=["H", "E", "Z", "F"], + # interval="minute", type='variaton') + # LLO.write('hour_filter_min.mseed') - llo = read("etc/filter/10HZ_filter_hor.mseed") - filtered = f.process(llo) + bou = read("etc/filter/hor_filter_min.mseed") + filtered = f.process(bou) - with open("etc/filter/LLO20200106vhor.hor", "r") as f: + with open("etc/filter/BOU20200831vhor.hor", "r") as f: iaga = i2.StreamIAGA2002Factory(stream=f) - LLO = iaga.get_timeseries(starttime=None, endtime=None, observatory="LLO") + BOU = iaga.get_timeseries(starttime=None, endtime=None, observatory="BOU") - u = LLO.select(channel="U")[0] - v = LLO.select(channel="V")[0] - w = LLO.select(channel="W")[0] + h = BOU.select(channel="H")[0] + e = BOU.select(channel="E")[0] + z = BOU.select(channel="Z")[0] + f = BOU.select(channel="F")[0] - u_filt = filtered.select(channel="U")[0] - v_filt = filtered.select(channel="V")[0] - w_filt = filtered.select(channel="W")[0] + h_filt = filtered.select(channel="H")[0] + e_filt = filtered.select(channel="E")[0] + z_filt = filtered.select(channel="Z")[0] + f_filt = filtered.select(channel="F")[0] + + assert_almost_equal(h_filt.data, h.data, 2) + assert_almost_equal(e_filt.data, e.data, 2) + assert_almost_equal(z_filt.data, z.data, 2) + assert_almost_equal(f_filt.data, f.data, 2) + assert_equal(filtered[0].stats.starttime, UTCDateTime("2020-08-31T00:29:30")) + assert_equal(filtered[0].stats.endtime, UTCDateTime("2020-08-31T03:29:30")) - assert_almost_equal(u_filt.data, u.data, 2) - assert_almost_equal(v_filt.data, v.data, 2) - assert_almost_equal(w_filt.data, w.data, 2) + +def test_day(): + """algorithm_test.FilterAlgorithm_test.test_hour() + Tests algorithm for 1min to day. + """ + f = FilterAlgorithm(input_sample_period=60.0, output_sample_period=86400.0) + + # generation of day_filter_min.mseed + # starttime = UTCDateTime("2020-08-27T00:00:00Z") + # endtime = UTCDateTime("2020-08-30T00:00:00Z") + # e = EdgeFactory() + # f = FilterAlgorithm(input_sample_period=60.0, + # output_sample_period=86400.0) + # starttime, endtime = f.get_input_interval(starttime,endtime) + # BOU = e.get_timeseries(observatory='BOU', + # starttime=starttime,endtime=endtime, + # channels=["H", "E", "Z", "F"], + # interval="minute", type='variaton') + # LLO.write('day_filter_min.mseed') + + bou = read("etc/filter/day_filter_min.mseed") + filtered = f.process(bou) + + with open("etc/filter/BOU20200831vday.day", "r") as f: + iaga = i2.StreamIAGA2002Factory(stream=f) + BOU = iaga.get_timeseries(starttime=None, endtime=None, observatory="BOU") + + h = BOU.select(channel="H")[0] + e = BOU.select(channel="E")[0] + z = BOU.select(channel="Z")[0] + f = BOU.select(channel="F")[0] + + h_filt = filtered.select(channel="H")[0] + e_filt = filtered.select(channel="E")[0] + z_filt = filtered.select(channel="Z")[0] + f_filt = filtered.select(channel="F")[0] + + assert_almost_equal(h_filt.data, h.data, 2) + assert_almost_equal(e_filt.data, e.data, 2) + assert_almost_equal(z_filt.data, z.data, 2) + assert_almost_equal(f_filt.data, f.data, 2) + assert_equal(filtered[0].stats.starttime, UTCDateTime("2020-08-27T11:59:30")) + assert_equal(filtered[0].stats.endtime, UTCDateTime("2020-08-30T11:59:30")) def test_custom(): @@ -175,6 +227,8 @@ def test_custom(): assert_almost_equal(u_filt.data, u.data, 2) assert_almost_equal(v_filt.data, v.data, 2) assert_almost_equal(w_filt.data, w.data, 2) + assert_equal(filtered[0].stats.starttime, UTCDateTime("2020-01-06T00:00:00Z")) + assert_equal(filtered[0].stats.endtime, UTCDateTime("2020-01-06T04:00:00Z")) def test_starttime_shift(): @@ -216,9 +270,62 @@ def test_starttime_shift(): assert_equal(filtered[0].stats.endtime, UTCDateTime("2020-01-01T00:13:00Z")) -def test_prepare_step(): - """algorithm_test.FilterAlgorithm_test.test_custom() - Tests algorithm for 10Hz to second with custom filter coefficients. +def test_align_trace(): + """algorithm_test.FilterAlgorithm_test.test_align_trace() + Tests algorithm for minute to hour with expected behavior, trailing samples, and missing samples + """ + f = FilterAlgorithm(input_sample_period=60.0, output_sample_period=3600.0) + bou = read("etc/filter/hor_filter_min.mseed") + step = f.get_filter_steps()[0] + # check intial assumptions + starttime, _ = f.align_trace(step, bou[0]) + assert_equal(starttime, UTCDateTime("2020-08-31T00:29:30")) + # check for filtered product producing the correct interval with trailing samples + trimmed = bou.copy().trim( + starttime=UTCDateTime("2020-08-31T01:00:00"), + endtime=UTCDateTime("2020-08-31T02:04:00"), + ) + starttime, _ = f.align_trace(step, trimmed[0]) + assert_equal(starttime, UTCDateTime("2020-08-31T01:29:30")) + # test for skipped sample when not enough data is given for first interval + trimmed = bou.copy().trim( + starttime=UTCDateTime("2020-08-31T01:30:00"), endtime=bou[0].stats.endtime + ) + starttime, _ = f.align_trace(step, trimmed[0]) + assert_equal(starttime, UTCDateTime("2020-08-31T02:29:30")) + + +def test_get_nearest__oneday_average(): + """algorithm_test.FilterAlgorithm_test.test_get_nearest__oneday_average() + Tests get_nearest_time for minute to day + """ + f = FilterAlgorithm(input_sample_period=60.0, output_sample_period=86400.0) + step = f.get_filter_steps()[0] + time = UTCDateTime("2020-08-20T01:00:00") + aligned = get_nearest_time(step=step, output_time=time) + # filter is average for day, should be first/last minute samples of 2020-08-20 + assert_equal(aligned["data_start"], UTCDateTime("2020-08-20T00:00:00")) + assert_equal(aligned["time"], UTCDateTime("2020-08-20T11:59:30")) + assert_equal(aligned["data_end"], UTCDateTime("2020-08-20T23:59:00")) + + +def test_get_nearest__intermagnet_minute(): + """algorithm_test.FilterAlgorithm_test.test_get_nearest__intermagnet_minute() + Tests get_nearest_time for second to minute + """ + f = FilterAlgorithm(input_sample_period=1.0, output_sample_period=60.0) + step = f.get_filter_steps()[0] + time = UTCDateTime("2020-08-20T01:00:13") + aligned = get_nearest_time(step=step, output_time=time) + # filter uses 91 samples, should be 01:00:00 +/- 45 seconds + assert_equal(aligned["data_start"], UTCDateTime("2020-08-20T00:59:15")) + assert_equal(aligned["time"], UTCDateTime("2020-08-20T01:00:00")) + assert_equal(aligned["data_end"], UTCDateTime("2020-08-20T01:00:45")) + + +def test_validate_step(): + """algorithm_test.FilterAlgorithm_test.test_validate_steps() + Validates algorithm steps 10 Hz to second with custom coefficients. """ with open("etc/filter/coeffs.json", "rb") as f: step = json.loads(f.read()) @@ -227,14 +334,12 @@ def test_prepare_step(): half = numtaps // 2 # check initial assumption assert_equal(numtaps % 2, 1) - # expect step to be unchanged when window has odd length - unchanged = f._prepare_step(step) - assert_equal(unchanged, step) - # expect step to be extended when window has event length - even_step = {"window": np.delete(step["window"], numtaps // 2, 0)} - assert_equal(len(even_step["window"]) % 2, 0) - prepared = f._prepare_step(step) - assert_equal(len(prepared["window"]) % 2, 1) - # value is inserted in middle - assert_equal(prepared["window"][: half + 1], step["window"][: half + 1]) - assert_equal(prepared["window"][half:], step["window"][half:]) + f._validate_step(step) + # expect step to raise a value error when window has an even length + step = { + "window": np.delete(step["window"], numtaps // 2, 0), + "type": "firfilter", + } + assert_equal(len(step["window"]) % 2, 0) + with pytest.raises(ValueError): + f._validate_step(step)