Skip to content
Snippets Groups Projects
Commit b221ec1d authored by Shavers, Nicholas H's avatar Shavers, Nicholas H
Browse files

network encoding for maximum portability. code cleanup

parent 066e59b4
No related branches found
No related tags found
1 merge request!368Imagcdf factory mvp
...@@ -132,9 +132,9 @@ class ImagCDFFactory(TimeseriesFactory): ...@@ -132,9 +132,9 @@ class ImagCDFFactory(TimeseriesFactory):
try: try:
# Initialize the CDF writer # Initialize the CDF writer
cdf_spec = { cdf_spec = {
"Compressed": 9, # Enable compression (0-9) "Compressed": 9, # Enable compression (1-9)
"Majority": CDFWriter.ROW_MAJOR, # Data layout - gets set automatically "Majority": CDFWriter.ROW_MAJOR,
"Encoding": CDFWriter.HOST_ENCODING, # gets set automatically "Encoding": CDFWriter.NETWORK_ENCODING, # XDR Encoding - If a CDF must be portable between two or more different types of computers use network encoded.
"Checksum": True, # Disable checksum for faster writes (optional) "Checksum": True, # Disable checksum for faster writes (optional)
"rDim_sizes": [], # Applicable only if using rVariables - CDF protocol recommends only using zVariables. "rDim_sizes": [], # Applicable only if using rVariables - CDF protocol recommends only using zVariables.
} }
...@@ -259,9 +259,6 @@ class ImagCDFFactory(TimeseriesFactory): ...@@ -259,9 +259,6 @@ class ImagCDFFactory(TimeseriesFactory):
for urlInterval in urlIntervals: for urlInterval in urlIntervals:
interval_start = urlInterval["start"] interval_start = urlInterval["start"]
interval_end = urlInterval["end"] interval_end = urlInterval["end"]
# Removes last data point ex: if endtime = 02:00:00, this could return 01:59:00 as last data point.
# if interval_start != interval_end:
# interval_end = interval_end - delta
url = self._get_url( url = self._get_url(
observatory=observatory, observatory=observatory,
date=interval_start, date=interval_start,
...@@ -357,13 +354,7 @@ class ImagCDFFactory(TimeseriesFactory): ...@@ -357,13 +354,7 @@ class ImagCDFFactory(TimeseriesFactory):
try: try:
# Read CDF data and merge # Read CDF data and merge
cdf = CDFReader(url_file) cdf = CDFReader(url_file)
# file_stream = self._read_cdf(cdf, channels)
timeseries = self._read_cdf(cdf, channels) timeseries = self._read_cdf(cdf, channels)
# Attempt to select only requested channelws (redundant as read_cdf can more efficiently filter)
# selected = Stream()
# for ch in channels:
# selected += file_stream.select(channel=ch)
# timeseries += selected
except Exception as e: except Exception as e:
print(f"Error reading CDF file '{url_file}': {e}", file=sys.stderr) print(f"Error reading CDF file '{url_file}': {e}", file=sys.stderr)
...@@ -395,48 +386,6 @@ class ImagCDFFactory(TimeseriesFactory): ...@@ -395,48 +386,6 @@ class ImagCDFFactory(TimeseriesFactory):
timeseries.sort() timeseries.sort()
return timeseries return timeseries
# Removed - cdflib takes a file path as an input more efficiently than taking in byte data.
# def parse_string(self, data: str, **kwargs):
# """
# Parse ImagCDF binary data into an ObsPy Stream.
# This method writes the provided binary data to a temporary file,
# reads the file using `cdflib`, and converts the data into an ObsPy
# Stream.
# Parameters
# ----------
# data : bytes
# Binary data containing ImagCDF content.
# Returns
# -------
# Stream
# An ObsPy Stream object with the parsed geomagnetic time series data.
# Raises
# ------
# TimeseriesFactoryException
# If an error occurs while parsing the ImagCDF data.
# """
# # Create a temporary file to store the CDF data
# with tempfile.NamedTemporaryFile(delete=False, suffix=".cdf") as tmp_file:
# tmp_file_name = tmp_file.name
# tmp_file.write(data)
# channels = kwargs.get('channels', [])
# try:
# # Read the CDF from the temporary file
# cdf = CDFReader(tmp_file_name)
# stream = self._read_cdf(cdf, channels)
# # no cdf.close() method required
# except Exception as e:
# raise TimeseriesFactoryException(f"Error parsing ImagCDF data: {e}")
# finally:
# # Clean up the temporary file
# os.remove(tmp_file_name)
# return stream
def _create_global_attributes( def _create_global_attributes(
self, timeseries: Stream, channels: List[str] self, timeseries: Stream, channels: List[str]
) -> dict: ) -> dict:
...@@ -654,7 +603,7 @@ class ImagCDFFactory(TimeseriesFactory): ...@@ -654,7 +603,7 @@ class ImagCDFFactory(TimeseriesFactory):
units = "Celsius" units = "Celsius"
validmin = -273.15 # absolute zero validmin = -273.15 # absolute zero
validmax = 79_999 validmax = 79_999
depend_0 = "DataTimes" #can be used for nonstandard element depend_0 = "DataTimes" # can be used for nonstandard element
# elif channel in [REAL_TEMPERATURES]: # elif channel in [REAL_TEMPERATURES]:
# units = "Celsius" # units = "Celsius"
# fieldnam = f"Temperature {temperature_index} {trace.stats.location}" # fieldnam = f"Temperature {temperature_index} {trace.stats.location}"
...@@ -782,7 +731,7 @@ class ImagCDFFactory(TimeseriesFactory): ...@@ -782,7 +731,7 @@ class ImagCDFFactory(TimeseriesFactory):
f"{', '.join(missing_global_attrs)}" f"{', '.join(missing_global_attrs)}"
) )
raise TimeseriesFactoryException(error_message) raise TimeseriesFactoryException(error_message)
# Map global attributes to Stream-level metadata # Map global attributes to Stream-level metadata
observatory = global_attrs.get("IagaCode", [""])[0] observatory = global_attrs.get("IagaCode", [""])[0]
station_name = global_attrs.get("ObservatoryName", [""])[0] station_name = global_attrs.get("ObservatoryName", [""])[0]
...@@ -811,26 +760,25 @@ class ImagCDFFactory(TimeseriesFactory): ...@@ -811,26 +760,25 @@ class ImagCDFFactory(TimeseriesFactory):
# Read data variables and associate them with time variables # Read data variables and associate them with time variables
for var in cdf.cdf_info().zVariables: for var in cdf.cdf_info().zVariables:
# Skip time variables # Skip time variables
if var.endswith("Times"): if var.endswith("Times"):
continue continue
# Map the variable name back to a standard channel code # Map the variable name back to a standard channel code by removing known prefixes
# Geomagnetic fields are named like GeomagneticFieldH, GeomagneticFieldD, etc. # Names are like GeomagneticFieldH, GeomagneticFieldD, Temperature1, Temperature2, ...
# Temperatures are named like Temperature1, Temperature2, ...
# Extract channel name by removing known prefixes
if var.startswith("GeomagneticField"): if var.startswith("GeomagneticField"):
channel = var.replace("GeomagneticField", "") channel = var.replace("GeomagneticField", "")
elif var.startswith("Temperature"): # elif var.startswith("Temperature"):
# Temperature variables may not map directly to a geomagnetic channel # # Temperature variables may not map directly to a geomagnetic channel
# but to temperature sensors. We can just use the label from LABLAXIS if needed # # but to temperature sensors. We can just use the label from LABLAXIS if needed
channel = attrs.get("LABLAXIS", var) # channel = attrs.get("LABLAXIS", var)
else: else:
# fallback if naming doesn't match expected patterns # fallback if naming doesn't match expected patterns
channel = var channel = var
if channels and channel not in channels: continue if channels and channel not in channels:
continue
data = cdf.varget(var) data = cdf.varget(var)
attrs = cdf.varattsget(var) attrs = cdf.varattsget(var)
...@@ -852,7 +800,7 @@ class ImagCDFFactory(TimeseriesFactory): ...@@ -852,7 +800,7 @@ class ImagCDFFactory(TimeseriesFactory):
# continue # continue
times = [] times = []
if matched_time_key in time_vars: if matched_time_key in time_vars:
times = time_vars[matched_time_key] times = time_vars[matched_time_key]
# Determine delta (sample interval) # Determine delta (sample interval)
if len(times) > 1: if len(times) > 1:
...@@ -883,7 +831,7 @@ class ImagCDFFactory(TimeseriesFactory): ...@@ -883,7 +831,7 @@ class ImagCDFFactory(TimeseriesFactory):
"VALIDMAX", "VALIDMAX",
"DISPLAY_TYPE", "DISPLAY_TYPE",
"LABLAXIS", "LABLAXIS",
"DEPEND_0" "DEPEND_0",
] ]
# Validate presence of required variable attributes # Validate presence of required variable attributes
missing_var_attrs = [] missing_var_attrs = []
...@@ -1014,7 +962,7 @@ class ImagCDFFactory(TimeseriesFactory): ...@@ -1014,7 +962,7 @@ class ImagCDFFactory(TimeseriesFactory):
base_path = self.urlTemplate[7:] base_path = self.urlTemplate[7:]
if not base_path or base_path == "{obs}_{dt}_{t}.cdf": if not base_path or base_path == "{obs}_{dt}_{t}.cdf":
base_path = os.getcwd() # Default to current working directory base_path = os.getcwd() # Default to current working directory
return os.path.join(base_path, "etc","imagcdf", filename) return os.path.join(base_path, "etc", "imagcdf", filename)
return os.path.join(self.urlTemplate, filename) return os.path.join(self.urlTemplate, filename)
# Unsupported URL scheme # Unsupported URL scheme
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment