From b221ec1db777d993dead9def3dfca3f93b9d596a Mon Sep 17 00:00:00 2001 From: Nicholas Shavers <nshavers@contractor.usgs.gov> Date: Fri, 13 Dec 2024 09:28:18 -0800 Subject: [PATCH] network encoding for maximum portability. code cleanup --- geomagio/imagcdf/ImagCDFFactory.py | 86 ++++++------------------------ 1 file changed, 17 insertions(+), 69 deletions(-) diff --git a/geomagio/imagcdf/ImagCDFFactory.py b/geomagio/imagcdf/ImagCDFFactory.py index 0c63bb70..89c1f6a6 100644 --- a/geomagio/imagcdf/ImagCDFFactory.py +++ b/geomagio/imagcdf/ImagCDFFactory.py @@ -132,9 +132,9 @@ class ImagCDFFactory(TimeseriesFactory): try: # Initialize the CDF writer cdf_spec = { - "Compressed": 9, # Enable compression (0-9) - "Majority": CDFWriter.ROW_MAJOR, # Data layout - gets set automatically - "Encoding": CDFWriter.HOST_ENCODING, # gets set automatically + "Compressed": 9, # Enable compression (1-9) + "Majority": CDFWriter.ROW_MAJOR, + "Encoding": CDFWriter.NETWORK_ENCODING, # XDR Encoding - If a CDF must be portable between two or more different types of computers use network encoded. "Checksum": True, # Disable checksum for faster writes (optional) "rDim_sizes": [], # Applicable only if using rVariables - CDF protocol recommends only using zVariables. } @@ -259,9 +259,6 @@ class ImagCDFFactory(TimeseriesFactory): for urlInterval in urlIntervals: interval_start = urlInterval["start"] interval_end = urlInterval["end"] - # Removes last data point ex: if endtime = 02:00:00, this could return 01:59:00 as last data point. - # if interval_start != interval_end: - # interval_end = interval_end - delta url = self._get_url( observatory=observatory, date=interval_start, @@ -357,13 +354,7 @@ class ImagCDFFactory(TimeseriesFactory): try: # Read CDF data and merge cdf = CDFReader(url_file) - # file_stream = self._read_cdf(cdf, channels) timeseries = self._read_cdf(cdf, channels) - # Attempt to select only requested channelws (redundant as read_cdf can more efficiently filter) - # selected = Stream() - # for ch in channels: - # selected += file_stream.select(channel=ch) - # timeseries += selected except Exception as e: print(f"Error reading CDF file '{url_file}': {e}", file=sys.stderr) @@ -395,48 +386,6 @@ class ImagCDFFactory(TimeseriesFactory): timeseries.sort() return timeseries - # Removed - cdflib takes a file path as an input more efficiently than taking in byte data. - # def parse_string(self, data: str, **kwargs): - # """ - # Parse ImagCDF binary data into an ObsPy Stream. - - # This method writes the provided binary data to a temporary file, - # reads the file using `cdflib`, and converts the data into an ObsPy - # Stream. - - # Parameters - # ---------- - # data : bytes - # Binary data containing ImagCDF content. - - # Returns - # ------- - # Stream - # An ObsPy Stream object with the parsed geomagnetic time series data. - - # Raises - # ------ - # TimeseriesFactoryException - # If an error occurs while parsing the ImagCDF data. - # """ - # # Create a temporary file to store the CDF data - # with tempfile.NamedTemporaryFile(delete=False, suffix=".cdf") as tmp_file: - # tmp_file_name = tmp_file.name - # tmp_file.write(data) - # channels = kwargs.get('channels', []) - # try: - # # Read the CDF from the temporary file - # cdf = CDFReader(tmp_file_name) - # stream = self._read_cdf(cdf, channels) - # # no cdf.close() method required - # except Exception as e: - # raise TimeseriesFactoryException(f"Error parsing ImagCDF data: {e}") - # finally: - # # Clean up the temporary file - # os.remove(tmp_file_name) - - # return stream - def _create_global_attributes( self, timeseries: Stream, channels: List[str] ) -> dict: @@ -654,7 +603,7 @@ class ImagCDFFactory(TimeseriesFactory): units = "Celsius" validmin = -273.15 # absolute zero validmax = 79_999 - depend_0 = "DataTimes" #can be used for nonstandard element + depend_0 = "DataTimes" # can be used for nonstandard element # elif channel in [REAL_TEMPERATURES]: # units = "Celsius" # fieldnam = f"Temperature {temperature_index} {trace.stats.location}" @@ -782,7 +731,7 @@ class ImagCDFFactory(TimeseriesFactory): f"{', '.join(missing_global_attrs)}" ) raise TimeseriesFactoryException(error_message) - + # Map global attributes to Stream-level metadata observatory = global_attrs.get("IagaCode", [""])[0] station_name = global_attrs.get("ObservatoryName", [""])[0] @@ -811,26 +760,25 @@ class ImagCDFFactory(TimeseriesFactory): # Read data variables and associate them with time variables for var in cdf.cdf_info().zVariables: - + # Skip time variables if var.endswith("Times"): continue - # Map the variable name back to a standard channel code - # Geomagnetic fields are named like GeomagneticFieldH, GeomagneticFieldD, etc. - # Temperatures are named like Temperature1, Temperature2, ... - # Extract channel name by removing known prefixes + # Map the variable name back to a standard channel code by removing known prefixes + # Names are like GeomagneticFieldH, GeomagneticFieldD, Temperature1, Temperature2, ... if var.startswith("GeomagneticField"): channel = var.replace("GeomagneticField", "") - elif var.startswith("Temperature"): - # Temperature variables may not map directly to a geomagnetic channel - # but to temperature sensors. We can just use the label from LABLAXIS if needed - channel = attrs.get("LABLAXIS", var) + # elif var.startswith("Temperature"): + # # Temperature variables may not map directly to a geomagnetic channel + # # but to temperature sensors. We can just use the label from LABLAXIS if needed + # channel = attrs.get("LABLAXIS", var) else: # fallback if naming doesn't match expected patterns channel = var - if channels and channel not in channels: continue + if channels and channel not in channels: + continue data = cdf.varget(var) attrs = cdf.varattsget(var) @@ -852,7 +800,7 @@ class ImagCDFFactory(TimeseriesFactory): # continue times = [] if matched_time_key in time_vars: - times = time_vars[matched_time_key] + times = time_vars[matched_time_key] # Determine delta (sample interval) if len(times) > 1: @@ -883,7 +831,7 @@ class ImagCDFFactory(TimeseriesFactory): "VALIDMAX", "DISPLAY_TYPE", "LABLAXIS", - "DEPEND_0" + "DEPEND_0", ] # Validate presence of required variable attributes missing_var_attrs = [] @@ -1014,7 +962,7 @@ class ImagCDFFactory(TimeseriesFactory): base_path = self.urlTemplate[7:] if not base_path or base_path == "{obs}_{dt}_{t}.cdf": base_path = os.getcwd() # Default to current working directory - return os.path.join(base_path, "etc","imagcdf", filename) + return os.path.join(base_path, "etc", "imagcdf", filename) return os.path.join(self.urlTemplate, filename) # Unsupported URL scheme -- GitLab