diff --git a/geomagio/algorithm/MetadataAlgorithm.py b/geomagio/algorithm/MetadataAlgorithm.py index ef61e73f49947ba3f3c6eb2b427767014ccac422..c9970cf664a3a987dc7451712a85cf859771c74b 100644 --- a/geomagio/algorithm/MetadataAlgorithm.py +++ b/geomagio/algorithm/MetadataAlgorithm.py @@ -2,7 +2,6 @@ from pydantic import BaseModel from obspy import UTCDateTime, Stream from datetime import timedelta from enum import Enum -from typing import List from ..metadata.flag.Flag import Flag from ..metadata import Metadata, MetadataFactory, MetadataCategory @@ -18,7 +17,7 @@ class DataFactory(str, Enum): class MetadataAlgorithm(BaseModel): factory: DataFactory = DataFactory.MINISEED observatory: str - channels: List[str] + channels: str metadata_token: str metadata_url: str type: str @@ -47,6 +46,7 @@ class MetadataAlgorithm(BaseModel): return data_factory.get_timeseries( starttime=self.starttime, endtime=self.endtime, + add_empty_channels=True, ) except Exception as e: raise ValueError(f"Failed to retrieve data stream from {self.factory}: {e}") @@ -103,26 +103,28 @@ class MetadataAlgorithm(BaseModel): def split_stream_by_day(self, stream: Stream) -> list[Stream]: """Split stream into daily streams to prevent metadata from overlapping.""" daily_streams = [] + # get min and max time + current_time = min(trace.stats.starttime for trace in stream) + end_time = max(trace.stats.endtime for trace in stream) + + # loop through each day and slice the stream accordingly + while current_time <= end_time: + day_endtime = min( + UTCDateTime( + current_time.year, current_time.month, current_time.day, 23, 59, 59 + ), + end_time, + ) + + # slice stream for the current day + daily_stream = stream.slice( + starttime=current_time, endtime=day_endtime, nearest_sample=True + ) + + if daily_stream: + daily_streams.append(daily_stream) - for trace in stream: - current_time = trace.stats.starttime - trace_endtime = trace.stats.endtime - - while current_time <= trace_endtime: - day_endtime = min( - UTCDateTime( - current_time.year, current_time.month, current_time.day, 23, 59 - ), - trace_endtime, - ) - - daily_streams.append( - stream.slice( - starttime=current_time, endtime=day_endtime, nearest_sample=True - ) - ) - - current_time += timedelta(days=1) + current_time += timedelta(days=1) return daily_streams