Skip to content
Snippets Groups Projects
Commit b662348c authored by Erin (Josh) Rigler's avatar Erin (Josh) Rigler
Browse files

Modify TimeseriesUtility.py's split_trace()

The split_trace() function always dropped the last sample. This was
seemingly part of a flawed logic designed to prevent traces being
written to Edge by the MiniSeedInputClient that spanned midnight.
This worked, but only if the original trace actually spanned midnight.
For the majority of data that do not span midnight, this dropped a
sample every time. The updated logic does what I believe was the
original intent (that is, it breaks traces into more manageable
chunks) without duplicating or losing any data. This is consistent
with obspy's concept of data slices, where a starttime and endtime
are always inclusive, but it deviates somewhat from MiniSeed logic,
which assumes a starttime, plus a delta, plus a number of samples.

It was necessary to update a couple unit tests for the MiniSeedFactory
to work with this new logic, but I am certain that the original test
logic was not based on anything but what the TimeseriesUtility.py
function(s) returned when they were originally written.
parent e14d9838
No related branches found
No related tags found
1 merge request!226Modify TimeseriesUtility.py's split_trace()
...@@ -621,13 +621,25 @@ def split_trace(trace: Trace, size: int = 86400) -> Stream: ...@@ -621,13 +621,25 @@ def split_trace(trace: Trace, size: int = 86400) -> Stream:
interval_start = interval["start"] interval_start = interval["start"]
interval_end = interval["end"] interval_end = interval["end"]
delta = out_trace.stats.delta delta = out_trace.stats.delta
# accounts for trace containing one sample
if interval_end - delta < interval_start: if interval_end - delta < interval_start:
# trace contains one sample
stream += out_trace stream += out_trace
continue continue
stream += out_trace.slice( if interval_end.timestamp % size:
starttime=interval_start, # trace does NOT contain first sample in next interval
endtime=interval_end - delta, stream += out_trace.slice(
nearest_sample=False, starttime=interval_start, endtime=interval_end, nearest_sample=False
) )
else:
# trace DOES contain first sample in next interval
stream += out_trace.slice(
starttime=interval_start,
endtime=interval_end - delta,
nearest_sample=False,
)
if interval_end == out_trace.stats.endtime:
# ONLY if it is the last interval
stream += out_trace.slice(
starttime=interval_end, endtime=interval_end, nearest_sample=False
)
return stream return stream
...@@ -151,13 +151,19 @@ def test__pre_process(): ...@@ -151,13 +151,19 @@ def test__pre_process():
"""edge_test.MiniSeedFactory_test.test__pre_process()""" """edge_test.MiniSeedFactory_test.test__pre_process()"""
trace = __create_trace(numpy.arange((86400 * 2) + 1), channel="H") trace = __create_trace(numpy.arange((86400 * 2) + 1), channel="H")
processed = MiniSeedInputClient(host=None)._pre_process(stream=Stream(trace)) processed = MiniSeedInputClient(host=None)._pre_process(stream=Stream(trace))
assert len(processed) == 2 assert len(processed) == 3
for trace in processed: for trace in processed[0:2]:
assert trace.data.dtype == "float32" assert trace.data.dtype == "float32"
stats = trace.stats stats = trace.stats
assert stats.npts == 86400 assert stats.npts == 86400
assert stats.starttime.timestamp % 86400 == 0 assert stats.starttime.timestamp % 86400 == 0
assert stats.endtime.timestamp % 86400 != 0 assert stats.endtime.timestamp % 86400 != 0
for trace in processed[-1:]:
assert trace.data.dtype == "float32"
stats = trace.stats
assert stats.npts == 1
assert stats.starttime.timestamp % 86400 == 0
assert stats.starttime == stats.endtime
def test__format_miniseed(): def test__format_miniseed():
...@@ -168,8 +174,10 @@ def test__format_miniseed(): ...@@ -168,8 +174,10 @@ def test__format_miniseed():
block_size = 512 block_size = 512
data = buf.getvalue() data = buf.getvalue()
n_blocks = int(len(data) / block_size) n_blocks = int(len(data) / block_size)
assert n_blocks == 1516 assert n_blocks == 1517
# 759th block is start of second day(758 blocks per day for 1Hz data) # 759th block is start of second day
# (758 blocks per day for 1Hz data, which implies 56-byte,
# NOT 64-byte, MiniSeed headers...something to investigate)
block_start = 758 * block_size block_start = 758 * block_size
block = data[block_start : block_start + block_size] block = data[block_start : block_start + block_size]
out_stream = read(io.BytesIO(block)) out_stream = read(io.BytesIO(block))
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment