From b662348c3822d7eb19dde6e1bb2d4111190d6da8 Mon Sep 17 00:00:00 2001 From: "E. Joshua Rigler" <erigler@usgs.gov> Date: Mon, 8 May 2023 10:20:47 -0600 Subject: [PATCH] Modify TimeseriesUtility.py's split_trace() The split_trace() function always dropped the last sample. This was seemingly part of a flawed logic designed to prevent traces being written to Edge by the MiniSeedInputClient that spanned midnight. This worked, but only if the original trace actually spanned midnight. For the majority of data that do not span midnight, this dropped a sample every time. The updated logic does what I believe was the original intent (that is, it breaks traces into more manageable chunks) without duplicating or losing any data. This is consistent with obspy's concept of data slices, where a starttime and endtime are always inclusive, but it deviates somewhat from MiniSeed logic, which assumes a starttime, plus a delta, plus a number of samples. It was necessary to update a couple unit tests for the MiniSeedFactory to work with this new logic, but I am certain that the original test logic was not based on anything but what the TimeseriesUtility.py function(s) returned when they were originally written. --- geomagio/TimeseriesUtility.py | 24 ++++++++++++++++++------ test/edge_test/MiniSeedFactory_test.py | 16 ++++++++++++---- 2 files changed, 30 insertions(+), 10 deletions(-) diff --git a/geomagio/TimeseriesUtility.py b/geomagio/TimeseriesUtility.py index c54d74983..f22ba28d9 100644 --- a/geomagio/TimeseriesUtility.py +++ b/geomagio/TimeseriesUtility.py @@ -621,13 +621,25 @@ def split_trace(trace: Trace, size: int = 86400) -> Stream: interval_start = interval["start"] interval_end = interval["end"] delta = out_trace.stats.delta - # accounts for trace containing one sample if interval_end - delta < interval_start: + # trace contains one sample stream += out_trace continue - stream += out_trace.slice( - starttime=interval_start, - endtime=interval_end - delta, - nearest_sample=False, - ) + if interval_end.timestamp % size: + # trace does NOT contain first sample in next interval + stream += out_trace.slice( + starttime=interval_start, endtime=interval_end, nearest_sample=False + ) + else: + # trace DOES contain first sample in next interval + stream += out_trace.slice( + starttime=interval_start, + endtime=interval_end - delta, + nearest_sample=False, + ) + if interval_end == out_trace.stats.endtime: + # ONLY if it is the last interval + stream += out_trace.slice( + starttime=interval_end, endtime=interval_end, nearest_sample=False + ) return stream diff --git a/test/edge_test/MiniSeedFactory_test.py b/test/edge_test/MiniSeedFactory_test.py index 4858bc475..e5d5e1734 100644 --- a/test/edge_test/MiniSeedFactory_test.py +++ b/test/edge_test/MiniSeedFactory_test.py @@ -151,13 +151,19 @@ def test__pre_process(): """edge_test.MiniSeedFactory_test.test__pre_process()""" trace = __create_trace(numpy.arange((86400 * 2) + 1), channel="H") processed = MiniSeedInputClient(host=None)._pre_process(stream=Stream(trace)) - assert len(processed) == 2 - for trace in processed: + assert len(processed) == 3 + for trace in processed[0:2]: assert trace.data.dtype == "float32" stats = trace.stats assert stats.npts == 86400 assert stats.starttime.timestamp % 86400 == 0 assert stats.endtime.timestamp % 86400 != 0 + for trace in processed[-1:]: + assert trace.data.dtype == "float32" + stats = trace.stats + assert stats.npts == 1 + assert stats.starttime.timestamp % 86400 == 0 + assert stats.starttime == stats.endtime def test__format_miniseed(): @@ -168,8 +174,10 @@ def test__format_miniseed(): block_size = 512 data = buf.getvalue() n_blocks = int(len(data) / block_size) - assert n_blocks == 1516 - # 759th block is start of second day(758 blocks per day for 1Hz data) + assert n_blocks == 1517 + # 759th block is start of second day + # (758 blocks per day for 1Hz data, which implies 56-byte, + # NOT 64-byte, MiniSeed headers...something to investigate) block_start = 758 * block_size block = data[block_start : block_start + block_size] out_stream = read(io.BytesIO(block)) -- GitLab