From cfd231ce361bfdd7f78d30c6745dca0f03c6dc34 Mon Sep 17 00:00:00 2001
From: Nicholas Shavers <nshavers@contractor.usgs.gov>
Date: Tue, 3 Dec 2024 13:49:18 -0800
Subject: [PATCH 01/30] poc

---
 geomagio/ImagCDFFactory.py | 753 +++++++++++++++++++++++++++++++++++++
 poetry.lock                |  19 +
 pyproject.toml             |   1 +
 3 files changed, 773 insertions(+)
 create mode 100644 geomagio/ImagCDFFactory.py

diff --git a/geomagio/ImagCDFFactory.py b/geomagio/ImagCDFFactory.py
new file mode 100644
index 00000000..5d104acd
--- /dev/null
+++ b/geomagio/ImagCDFFactory.py
@@ -0,0 +1,753 @@
+"""ImagCDFFactory Implementation Using cdflib
+
+This module provides the ImagCDFFactory class for creating and writing
+geomagnetic time series data in the ImagCDF format using the cdflib library.
+The ImagCDF format is based on NASA's Common Data Format (CDF) and is designed
+to store geomagnetic data with high precision.
+
+References:
+- ImagCDF Format Documentation: https://intermagnet.org/docs/technical/im_tn_8_ImagCDF.pdf
+- CDF Library: http://cdf.gsfc.nasa.gov/
+"""
+
+from __future__ import absolute_import, print_function
+from io import BytesIO
+import os
+import sys
+from typing import List, Optional, Union
+
+import numpy as np
+from obspy import Stream, Trace, UTCDateTime
+
+from geomagio.TimeseriesFactory import TimeseriesFactory
+
+from .geomag_types import DataInterval, DataType
+from .TimeseriesFactoryException import TimeseriesFactoryException
+from . import TimeseriesUtility
+from . import Util
+
+import cdflib
+import tempfile
+
+
+class IMCDFPublicationLevel:
+    """Handles publication levels and mapping between data types and levels.
+
+    The ImagCDF format uses publication levels to describe the processing
+    level of the data. This class maps data types (e.g., 'variation', 'definitive')
+    to their corresponding publication levels as defined in the ImagCDF documentation.
+
+    Publication Levels:
+        1: Raw data with no processing.
+        2: Edited data with preliminary baselines.
+        3: Data suitable for initial bulletins or quasi-definitive publication.
+        4: Definitive data with no further changes expected.
+
+    Reference:
+    - ImagCDF Documentation Section 4.2: Attributes that Uniquely Identify the Data
+    """
+
+    class PublicationLevel:
+        LEVEL_1 = "1"
+        LEVEL_2 = "2"
+        LEVEL_3 = "3"
+        LEVEL_4 = "4"
+
+    TYPE_TO_LEVEL = {
+        "none": PublicationLevel.LEVEL_1,
+        "variation": PublicationLevel.LEVEL_1,
+        "reported": PublicationLevel.LEVEL_1,
+        "provisional": PublicationLevel.LEVEL_2,
+        "adjusted": PublicationLevel.LEVEL_2,
+        "quasi-definitive": PublicationLevel.LEVEL_3,
+        "quasidefinitive": PublicationLevel.LEVEL_3,
+        "definitive": PublicationLevel.LEVEL_4,
+    }
+
+    def __init__(self, data_type: Optional[str] = None):
+        """Initialize with a data type to determine the publication level."""
+        if data_type:
+            self.level = self.TYPE_TO_LEVEL.get(data_type.lower())
+        else:
+            raise ValueError("data_type must be provided.")
+
+        if not self.level:
+            raise ValueError(f"Unsupported data_type: {data_type}")
+
+    def to_string(self) -> str:
+        """Return the publication level as a string."""
+        return self.level
+
+
+class ImagCDFFactory(TimeseriesFactory):
+    """Factory for creating and writing ImagCDF formatted CDF files.
+
+    This class extends the TimeseriesFactory to support writing geomagnetic
+    time series data to files in the ImagCDF format using the cdflib library.
+    """
+
+    def __init__(
+        self,
+        observatory: Optional[str] = None,
+        channels: List[str] = ("H", "D", "Z", "F"),
+        type: DataType = "variation",
+        interval: DataInterval = "minute",
+        urlTemplate="file://{obs}_{dt}_{t}.cdf",
+        urlInterval: int = -1,
+    ):
+        """
+        Initialize the ImagCDFFactory with default parameters.
+
+        Parameters:
+        - observatory: IAGA code of the observatory (e.g., 'BOU').
+        - channels: List of geomagnetic elements (e.g., ['H', 'D', 'Z', 'F']).
+        - type: Data type indicating the processing level (e.g., 'variation', 'definitive').
+        - interval: Data interval (e.g., 'minute', 'second').
+        - urlTemplate: Template for generating file URLs or paths.
+        - urlInterval: Interval size for splitting data into multiple files.
+        """
+        super().__init__(
+            observatory=observatory,
+            channels=channels,
+            type=type,
+            interval=interval,
+            urlTemplate=urlTemplate,
+            urlInterval=urlInterval,
+        )
+
+    def parse_string(self, data: str, **kwargs):
+        """Parse ImagCDF formatted string data into a Stream.
+
+        Note: Parsing from strings is not implemented in this factory.
+        """
+        raise NotImplementedError('"parse_string" not implemented')
+
+    def write_file(self, fh, timeseries: Stream, channels: List[str]):
+        """Write the timeseries data to a file handle in ImagCDF format.
+
+        Parameters:
+        - fh: File handle to write the data.
+        - timeseries: ObsPy Stream containing the geomagnetic data.
+        - channels: List of channels to include in the output file.
+        """
+        # Create a temporary file to write the CDF data
+        with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
+            tmp_file_path = tmp_file.name + ".cdf"
+
+        try:
+            # Initialize the CDF writer
+            cdf_writer = cdflib.cdfwrite.CDF(tmp_file_path, cdf_spec=None)
+
+            # Write global attributes (metadata that applies to the entire file)
+            global_attrs = self._create_global_attributes(timeseries, channels)
+            cdf_writer.write_globalattrs(global_attrs)
+
+            # Write time variables for each channel
+            time_vars = self._create_time_stamp_variables(timeseries)
+            for ts_name, ts_data in time_vars.items():
+                var_spec = {
+                    "Variable": ts_name,
+                    "Data_Type": 33,  # CDF TT2000 data type
+                    "Num_Elements": 1,
+                    "Rec_Vary": True,
+                    "Dim_Sizes": [],
+                    "Var_Type": "zVariable",
+                }
+                print(f"Writing time variable {ts_name} with data length: {len(ts_data)}")
+                cdf_writer.write_var(
+                    var_spec=var_spec,
+                    var_attrs=self._create_time_var_attrs(ts_name),
+                    var_data=ts_data,
+                )
+
+            # Write geomagnetic data variables
+            for trace in timeseries:
+                channel = trace.stats.channel
+                var_name = f"GeomagneticField{channel}"
+                var_spec = {
+                    "Variable": var_name,
+                    "Data_Type": self._get_cdf_data_type(trace),
+                    "Num_Elements": 1,
+                    "Rec_Vary": True,
+                    "Dim_Sizes": [],
+                    "Var_Type": "zVariable",
+                }
+                print(f"Writing data variable {var_name} with data shape: {trace.data.shape}")
+                cdf_writer.write_var(
+                    var_spec=var_spec,
+                    var_attrs=self._create_var_attrs(trace),
+                    var_data=trace.data,
+                )
+
+            # Copy the temporary CDF file to the final file handle
+            with open(tmp_file_path, "rb") as tmp:
+                cdf_data = tmp.read()
+                fh.write(cdf_data)
+
+            cdf_writer.close()
+
+        finally:
+            # Cleanup the temporary file
+            print(tmp_file_path)
+
+    def put_timeseries(
+        self,
+        timeseries: Stream,
+        starttime: Optional[UTCDateTime] = None,
+        endtime: Optional[UTCDateTime] = None,
+        channels: Optional[List[str]] = None,
+        type: Optional[DataType] = None,
+        interval: Optional[DataInterval] = None,
+    ):
+        """
+        Store timeseries data in ImagCDF format using cdflib.
+
+        This method writes the timeseries data to one or more files, depending
+        on the specified urlInterval.
+
+        Parameters:
+        - timeseries: ObsPy Stream containing the geomagnetic data.
+        - starttime: Start time of the data to be written.
+        - endtime: End time of the data to be written.
+        - channels: List of channels to include in the output file.
+        - type: Data type indicating the processing level.
+        - interval: Data interval of the data.
+        """
+        if len(timeseries) == 0:
+            # No data to store
+            return
+
+        channels = channels or self.channels
+        type = type or self.type
+        interval = interval or self.interval
+
+        # Extract metadata from the first trace
+        stats = timeseries[0].stats
+        delta = stats.delta  # Sample rate
+        observatory = stats.station
+        starttime = starttime or stats.starttime
+        endtime = endtime or stats.endtime
+
+        # Split data into intervals if necessary
+        urlIntervals = Util.get_intervals(
+            starttime=starttime, endtime=endtime, size=self.urlInterval
+        )
+        for urlInterval in urlIntervals:
+            interval_start = urlInterval["start"]
+            interval_end = urlInterval["end"]
+            if interval_start != interval_end:
+                interval_end = interval_end - delta
+            url = self._get_url(
+                observatory=observatory,
+                date=interval_start,
+                type=type,
+                interval=interval,
+                channels=channels,
+            )
+
+            # Handle 'stdout' output
+            if url == 'stdout':
+                # Write directly to stdout
+                fh = sys.stdout.buffer
+                url_data = timeseries.slice(
+                    starttime=interval_start,
+                    endtime=interval_end,
+                )
+                self.write_file(fh, url_data, channels)
+                continue  # Proceed to next interval if any
+
+            # Handle 'file://' output
+            elif url.startswith('file://'):
+                # Get the file path from the URL
+                url_file = Util.get_file_from_url(url, createParentDirectory=False)
+                url_data = timeseries.slice(
+                    starttime=interval_start,
+                    endtime=interval_end,
+                )
+
+                # Check if the file already exists to merge data
+                if os.path.isfile(url_file):
+                    try:
+                        # Read existing data to merge with new data
+                        existing_cdf = cdflib.cdfread.CDF(url_file)
+                        existing_stream = self._read_cdf(existing_cdf)
+                        existing_cdf.close()
+                        existing_data = existing_stream
+
+                        # Merge existing data with new data
+                        for trace in existing_data:
+                            new_trace = url_data.select(
+                                network=trace.stats.network,
+                                station=trace.stats.station,
+                                channel=trace.stats.channel,
+                            )
+                            if new_trace:
+                                trace.data = np.concatenate((trace.data, new_trace[0].data))
+                        url_data = existing_data + url_data
+                    except Exception as e:
+                        # Log the exception if needed
+                        print(f"Warning: Could not read existing CDF file '{url_file}': {e}", file=sys.stderr)
+                        # Proceed with new data
+
+                # Pad the data with NaNs to ensure it fits the interval
+                url_data.trim(
+                    starttime=interval_start,
+                    endtime=interval_end,
+                    nearest_sample=False,
+                    pad=True,
+                    fill_value=np.nan,
+                )
+
+                # Write the data to the CDF file
+                with open(url_file, "wb") as fh:
+                    self.write_file(fh, url_data, channels)
+
+            else:
+                # Unsupported URL scheme encountered
+                raise TimeseriesFactoryException("Unsupported URL scheme in urlTemplate")
+
+    def _create_global_attributes(self, timeseries: Stream, channels: List[str]) -> dict:
+        """
+        Create a dictionary of global attributes for the ImagCDF file.
+
+        These attributes apply to all the data in the file and include metadata
+        such as observatory information, data publication level, and format
+        descriptions.
+
+        References:
+        - ImagCDF Documentation Section 4: ImagCDF Global Attributes
+        """
+        stats = timeseries[0].stats if len(timeseries) > 0 else None
+
+        # Extract metadata from stats or fallback to defaults
+        observatory_name = getattr(stats, 'station_name', None) or self.observatory or "Unknown Observatory"
+        station = getattr(stats, 'station', None) or "Unknown Iaga Code"
+        institution = getattr(stats, 'agency_name', None) or "Unknown Institution"
+        latitude = getattr(stats, 'geodetic_latitude', None) or 0.0
+        longitude = getattr(stats, 'geodetic_longitude', None) or 0.0
+        elevation = getattr(stats, 'elevation', None) or 99_999.0
+        vector_orientation = getattr(stats, 'sensor_orientation', None) or ""
+        data_interval_type = getattr(stats, 'data_interval_type', None) or self.interval
+        publication_level = IMCDFPublicationLevel(data_type=self.type).to_string()
+        global_attrs = {
+            'FormatDescription': {0: 'INTERMAGNET CDF Format'},
+            'FormatVersion': {0: '1.2'},
+            'Title': {0: 'Geomagnetic time series data'},
+            'IagaCode': {0: station},
+            'ElementsRecorded': {0: ''.join(channels)},
+            'PublicationLevel': {0: publication_level},
+            'PublicationDate': {0: UTCDateTime.now().strftime("%Y-%m-%dT%H:%M:%SZ")},
+            'ObservatoryName': {0: observatory_name},
+            'Latitude': {0: latitude},
+            'Longitude': {0: longitude},
+            'Elevation': {0: elevation},
+            'Institution': {0: institution},
+            'VectorSensOrient': {0: vector_orientation}, #remove F - because its a calculation, not an element?
+            'StandardLevel': {0: 'None'},  # Set to 'None'
+            # Temporarily Omit 'StandardName', 'StandardVersion', 'PartialStandDesc'
+            'Source': {0: 'institute'}, # "institute" - if the named institution provided the data, â€œINTERMAGNETâ€ - if the data file has been created by INTERMAGNET from another data source, â€œWDCâ€ - if the World Data Centre has created the file from another data source
+            # 'TermsOfUse': {0: self.getINTERMAGNETTermsOfUse()},
+            # 'UniqueIdentifier': {0: ''},
+            # 'ParentIdentifiers': {0: ''},
+            # 'ReferenceLinks': {0: ''}, #links to /ws, plots, USGS.gov 
+        }
+        return global_attrs
+
+    def _create_time_stamp_variables(self, timeseries: Stream) -> dict:
+        vector_times = None
+        scalar_times = None
+
+        for trace in timeseries:
+            channel = trace.stats.channel
+            times = [
+                (trace.stats.starttime + trace.stats.delta * i).datetime
+                for i in range(trace.stats.npts)
+            ]
+            # Convert timestamps to TT2000 format required by CDF
+            tt2000_times = cdflib.cdfepoch.timestamp_to_tt2000([time.timestamp() for time in times])
+            # tt2000_times = cdflib.cdfepoch.compute_tt2000(times) #this does not work
+
+            if channel in self._get_vector_elements():
+                if vector_times is None:
+                    vector_times = tt2000_times
+                else:
+                    if not np.array_equal(vector_times, tt2000_times):
+                        raise ValueError("Time stamps for vector channels are not the same.")
+            elif channel in self._get_scalar_elements():
+                if scalar_times is None:
+                    scalar_times = tt2000_times
+                else:
+                    if not np.array_equal(scalar_times, tt2000_times):
+                        raise ValueError("Time stamps for scalar channels are not the same.")
+            else:
+                # Handle other channels if necessary
+                pass
+
+        time_vars = {}
+        if vector_times is not None:
+            time_vars['GeomagneticVectorTimes'] = vector_times
+        if scalar_times is not None:
+            time_vars['GeomagneticScalarTimes'] = scalar_times
+
+        return time_vars
+
+
+    def _create_var_spec(
+        self,
+        var_name: str,
+        data_type: str,
+        num_elements: int,
+        var_type: str,
+        dim_sizes: List[int],
+        sparse: bool,
+        compress: int,
+        pad: Optional[Union[str, np.ndarray]],
+    ) -> dict:
+        """
+        Create a variable specification dictionary for cdflib.
+
+        This is used to define the properties of a variable when writing it to
+        the CDF file.
+
+        Parameters:
+        - var_name: Name of the variable.
+        - data_type: CDF data type.
+        - num_elements: Number of elements per record.
+        - var_type: Variable type ('zVariable' or 'rVariable').
+        - dim_sizes: Dimensions of the variable (empty list for 0D).
+        - sparse: Whether the variable uses sparse records.
+        - compress: Compression level.
+        - pad: Pad value for sparse records.
+
+        Reference:
+        - CDF User's Guide: Variable Specification
+        """
+        var_spec = {
+            'Variable': var_name,
+            'Data_Type': data_type,
+            'Num_Elements': num_elements,
+            'Rec_Vary': True,
+            'Var_Type': var_type,
+            'Dim_Sizes': dim_sizes,
+            'Sparse': 'no_sparse' if not sparse else 'pad_sparse',
+            'Compress': compress,
+            'Pad': pad,
+        }
+        return var_spec
+
+    def _create_var_attrs(self, trace: Trace) -> dict:
+        print(trace.stats)
+        channel = trace.stats.channel
+        fieldnam = f"Geomagnetic Field Element {channel}" # â€œGeomagnetic Field Element â€ + the element code or â€œTemperature â€ + the name of the location where the temperature was recorded.
+        units = '' # Must be one of â€œnTâ€, â€œDegrees of arcâ€ or â€œCelsiusâ€
+        if channel == 'D':
+            units = 'Degrees of arc'
+            validmin = -360.0 
+            validmax = 360.0 # A full circle representation
+        elif channel == 'I':
+            units = 'Degrees of arc'
+            validmin = -90.0 
+            validmax = 90.0 #The magnetic field vector can point straight down (+90Â°), horizontal (0Â°), or straight up (-90Â°).
+        elif 'Temperature' in channel:
+            units = 'Celsius'
+            fieldnam = f"Temperature {trace.stats.location}"
+        elif channel == 'F':
+            units = 'nT'
+            validmin = 0.0 # negative magnetic field intestity not physically meaningful.
+            validmax = 79_999.0
+        elif channel in ['X', 'Y', 'Z', 'H', 'E', 'V', 'G']:
+            units = 'nT'
+            validmin = -79_999.0
+            validmax = 79_999.0
+
+        if channel in self._get_vector_elements():
+            depend_0 = 'GeomagneticVectorTimes'
+        elif channel in self._get_scalar_elements():
+            depend_0 = 'GeomagneticScalarTimes'
+        else:
+            depend_0 = None  # Handle other cases if necessary
+
+    
+        var_attrs = {
+            'FIELDNAM': fieldnam,
+            'UNITS': units,
+            'FILLVAL': 99999.0,
+            'VALIDMIN': validmin,
+            'VALIDMAX': validmax,
+            'DEPEND_0': depend_0,
+            'DISPLAY_TYPE': 'time_series',
+            'LABLAXIS': channel,
+        }
+        return var_attrs
+
+
+    def _create_time_var_attrs(self, ts_name: str) -> dict:
+        """
+        Create a dictionary of time variable attributes.
+
+        These attributes provide metadata for time variables.
+        Note: None of these attributes are required for the time stamp variables GeomagneticVectorTimes and GeomagneticScalarTimes.
+        Reference:
+        - ImagCDF Documentation Section 3: ImagCDF Data
+        """
+        # var_attrs = {
+            # 'UNITS': 'TT2000',
+            # 'DISPLAY_TYPE': 'time_series',
+            # 'LABLAXIS': 'Time',
+        # }
+        # return var_attrs
+        return {}
+
+    def _get_cdf_data_type(self, trace: Trace) -> int:
+        """
+        Map ObsPy trace data type to CDF data type.
+
+        Determines the appropriate CDF data type based on the NumPy data type
+        of the trace data.
+
+        Returns:
+        - CDF_DOUBLE (45) for floating-point data.
+        - CDF_INT4 (41) for integer data.
+
+        Reference:
+        - CDF Data Types: http://cdf.gsfc.nasa.gov/html/cdfdatatypes.html
+        """
+        # CDF data type constants
+        CDF_DOUBLE = 45  # CDF_DOUBLE corresponds to 64-bit float
+        CDF_INT4 = 41    # CDF_INT4 corresponds to 32-bit int
+
+        if trace.data.dtype in [np.float32, np.float64]:
+            return CDF_DOUBLE
+        elif trace.data.dtype in [np.int32, np.int64]:
+            return CDF_INT4
+        else:
+            # Default to double precision float
+            return CDF_DOUBLE
+
+    def _read_cdf(self, cdf: cdflib.cdfread.CDF) -> Stream:
+        """
+        Read CDF data into an ObsPy Stream.
+
+        This method reads the data variables and their corresponding time
+        variables from a CDF file and constructs an ObsPy Stream.
+
+        Parameters:
+        - cdf: cdflib CDF object representing the open CDF file.
+
+        Returns:
+        - An ObsPy Stream containing the data from the CDF file.
+        """
+        stream = Stream()
+        # Read time variables
+        time_vars = {}
+        for var in cdf.cdf_info()['zVariables']:
+            if var.endswith('Time'):
+                time_data = cdf.varget(var)
+                # Convert TT2000 to UTCDateTime
+                utc_times = [UTCDateTime(t) for t in cdflib.cdfepoch.to_datetime(time_data)]
+                time_vars[var] = utc_times
+
+        # Read data variables
+        for var in cdf.cdf_info()['zVariables']:
+            if not var.endswith('Time'):
+                data = cdf.varget(var)
+                attrs = cdf.varattsget(var)
+                if 'DEPEND_0' in attrs:
+                    ts_name = attrs['DEPEND_0']
+                    if ts_name in time_vars:
+                        times = time_vars[ts_name]
+                        if len(times) > 1:
+                            delta = times[1] - times[0]  # Calculate sample interval
+                        else:
+                            delta = 60 if self.interval == 'minute' else 1
+                        trace = Trace(
+                            data=data,
+                            header={
+                                'station': self.observatory,
+                                'channel': var,
+                                'starttime': times[0],
+                                'delta': delta,
+                            }
+                        )
+                        stream += trace
+        return stream
+
+    @staticmethod
+    def getINTERMAGNETTermsOfUse() -> str:
+        """
+        Return the INTERMAGNET Terms of Use.
+
+        These terms should be included in the 'TermsOfUse' global attribute
+        as per the ImagCDF specification.
+
+        Reference:
+        - ImagCDF Documentation Section 4.5: Attributes that Relate to Publication of the Data
+        """
+        return (
+            "CONDITIONS OF USE FOR DATA PROVIDED THROUGH INTERMAGNET:\n"
+            "The data made available through INTERMAGNET are provided for\n"
+            "your use and are not for commercial use or sale or distribution\n"
+            "to third parties without the written permission of the institute\n"
+            "(http://www.intermagnet.org/Institutes_e.html) operating\n"
+            "the observatory. Publications making use of the data\n"
+            "should include an acknowledgment statement of the form given below.\n"
+            "A citation reference should be sent to the INTERMAGNET Secretary\n"
+            "(secretary@intermagnet.org) for inclusion in a publications list\n"
+            "on the INTERMAGNET website.\n"
+            "\n"
+            "     ACKNOWLEDGEMENT OF DATA FROM OBSERVATORIES\n"
+            "     PARTICIPATING IN INTERMAGNET\n"
+            "We offer two acknowledgement templates. The first is for cases\n"
+            "where data from many observatories have been used and it is not\n"
+            "practical to list them all, or each of their operating institutes.\n"
+            "The second is for cases where research results have been produced\n"
+            "using a smaller set of observatories.\n"
+            "\n"
+            "     Suggested Acknowledgement Text (template 1)\n"
+            "The results presented in this paper rely on data collected\n"
+            "at magnetic observatories. We thank the national institutes that\n"
+            "support them and INTERMAGNET for promoting high standards of\n"
+            "magnetic observatory practice (www.intermagnet.org).\n"
+            "\n"
+            "     Suggested Acknowledgement Text (template 2)\n"
+            "The results presented in this paper rely on the data\n"
+            "collected at <observatory name>. We thank <institute name>,\n"
+            "for supporting its operation and INTERMAGNET for promoting high\n"
+            "standards of magnetic observatory practice (www.intermagnet.org).\n"
+        )
+
+    def _get_url(
+        self,
+        observatory: str,
+        date: UTCDateTime,
+        type: DataType = "variation",
+        interval: DataInterval = "minute",
+        channels: Optional[List[str]] = None,
+    ) -> str:
+        """
+        Generate the file URL specific to ImagCDF conventions.
+
+        This method constructs the filename based on the ImagCDF naming
+        conventions, which include the observatory code, date-time formatted
+        according to the data interval, and the publication level.
+
+        Parameters:
+        - observatory: IAGA code of the observatory.
+        - date: Start date for the file.
+        - type: Data type indicating the processing level.
+        - interval: Data interval (e.g., 'minute', 'second').
+        - channels: List of channels (optional).
+
+        Returns:
+        - The formatted file URL or path.
+
+        Reference:
+        - ImagCDF Documentation Section 5: ImagCDF File Names
+        """
+        # Get the publication level for the type
+        publication_level = IMCDFPublicationLevel(data_type=type).to_string()
+
+        # Determine filename date format based on interval
+        if interval == "year":
+            date_format = date.strftime("%Y")
+        elif interval == "month":
+            date_format = date.strftime("%Y%m")
+        elif interval == "day":
+            date_format = date.strftime("%Y%m%d")
+        elif interval == "hour":
+            date_format = date.strftime("%Y%m%d_%H")
+        elif interval == "minute":
+            date_format = date.strftime("%Y%m%d_%H%M")
+        elif interval == "second":
+            date_format = date.strftime("%Y%m%d_%H%M%S")
+        else:
+            raise ValueError(f"Unsupported interval: {interval}")
+
+        # Default filename following ImagCDF convention
+        # Filename format: [iaga-code]_[date-time]_[publication-level].cdf
+        filename = f"{observatory.lower()}_{date_format}_{publication_level}.cdf"
+
+        # If the urlTemplate explicitly specifies 'stdout', return 'stdout'
+        if self.urlTemplate.lower() == "stdout":
+            return "stdout"
+
+        # Prepare parameters for templating
+        params = {
+            "date": date.datetime,
+            "i": self._get_interval_abbreviation(interval),
+            "interval": self._get_interval_name(interval),
+            "minute": date.hour * 60 + date.minute,
+            "month": date.strftime("%b").lower(),
+            "MONTH": date.strftime("%b").upper(),
+            "obs": observatory.lower(),
+            "OBS": observatory.upper(),
+            "t": publication_level,
+            "type": self._get_type_name(type),
+            "julian": date.strftime("%j"),
+            "year": date.strftime("%Y"),
+            "ymd": date.strftime("%Y%m%d"),
+            "dt": date_format,  # Add the date-time formatted string
+        }
+
+        # Attempt to use the template provided in urlTemplate
+        if "{" in self.urlTemplate and "}" in self.urlTemplate:
+            try:
+                return self.urlTemplate.format(**params)
+            except KeyError as e:
+                raise TimeseriesFactoryException(f"Invalid placeholder in urlTemplate: {e}")
+
+        # If the urlTemplate doesn't support placeholders, assume 'file://' scheme
+        if self.urlTemplate.startswith("file://"):
+            base_path = self.urlTemplate[7:]  # Strip "file://"
+            if not base_path or base_path == "{obs}_{dt}_{t}.cdf":
+                base_path = os.getcwd()  # Default to current working directory
+            return os.path.join(base_path, filename)
+
+        # Unsupported URL scheme
+        raise TimeseriesFactoryException(
+            f"Unsupported URL scheme in urlTemplate: {self.urlTemplate}"
+        )
+
+    # Placeholder methods for interval and type names/abbreviations
+    def _get_interval_abbreviation(self, interval: DataInterval) -> str:
+        """Get the abbreviation for the data interval."""
+        abbreviations = {
+            "year": "yr",
+            "month": "mon",
+            "day": "day",
+            "hour": "hr",
+            "minute": "min",
+            "second": "sec",
+        }
+        return abbreviations.get(interval, "min")
+
+    def _get_interval_name(self, interval: DataInterval) -> str:
+        """Get the full name for the data interval."""
+        names = {
+            "year": "yearly",
+            "month": "monthly",
+            "day": "daily",
+            "hour": "hourly",
+            "minute": "minute",
+            "second": "second",
+        }
+        return names.get(interval, "minute")
+
+    def _get_type_name(self, type: DataType) -> str:
+        """Get the full name for the data type."""
+        type_names = {
+            "variation": "variation",
+            "definitive": "definitive",
+            "quasi-definitive": "quasi-definitive",
+            "provisional": "provisional",
+            "adjusted": "adjusted",
+            "none": "none",
+        }
+        return type_names.get(type, "variation")
+
+
+    def _get_vector_elements(self):
+        return {'X', 'Y', 'Z', 'H', 'D', 'E', 'V', 'I', 'F'}
+    
+    def _get_scalar_elements(self):
+        return {'S', 'G'} 
\ No newline at end of file
diff --git a/poetry.lock b/poetry.lock
index 3d7add0e..90b76110 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -153,6 +153,25 @@ d = ["aiohttp (>=3.7.4)", "aiohttp (>=3.7.4,!=3.9.0)"]
 jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"]
 uvloop = ["uvloop (>=0.15.2)"]
 
+[[package]]
+name = "cdflib"
+version = "1.3.2"
+description = "A python CDF reader toolkit"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "cdflib-1.3.2-py3-none-any.whl", hash = "sha256:49af97acc328c586ac5b7c27fd8e67bccf24af82c5bd8c37d8cfe048a0c1752a"},
+    {file = "cdflib-1.3.2.tar.gz", hash = "sha256:97f27ac629e4c0ac1367eb8f4edd7a1d184190272ab98a6401e999f3a2e05687"},
+]
+
+[package.dependencies]
+numpy = ">=1.21"
+
+[package.extras]
+dev = ["ipython", "pre-commit"]
+docs = ["astropy", "netcdf4", "sphinx", "sphinx-automodapi", "sphinx-copybutton", "sphinx-rtd-theme", "xarray"]
+tests = ["astropy", "h5netcdf", "hypothesis", "netcdf4", "pytest (>=3.9)", "pytest-cov", "pytest-remotedata", "xarray"]
+
 [[package]]
 name = "certifi"
 version = "2024.12.14"
diff --git a/pyproject.toml b/pyproject.toml
index c163d9b0..51a5ee31 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -45,6 +45,7 @@ SQLAlchemy = "1.4.41"
 SQLAlchemy-Utc = "^0.14.0"
 uvicorn = {extras = ["standard"], version = "^0.22.0"}
 netcdf4 = "^1.7.2"
+cdflib = "^1.3.2"
 
 
 [tool.poetry.dev-dependencies]
-- 
GitLab


From b94e4eeb111d2394126896622fc570990ec4e17f Mon Sep 17 00:00:00 2001
From: Nicholas Shavers <nshavers@contractor.usgs.gov>
Date: Wed, 4 Dec 2024 15:50:52 -0800
Subject: [PATCH 02/30] poc with working cdf write

---
 geomagio/ImagCDFFactory.py | 96 ++++++++++++++++++++------------------
 1 file changed, 51 insertions(+), 45 deletions(-)

diff --git a/geomagio/ImagCDFFactory.py b/geomagio/ImagCDFFactory.py
index 5d104acd..3189d15a 100644
--- a/geomagio/ImagCDFFactory.py
+++ b/geomagio/ImagCDFFactory.py
@@ -15,7 +15,7 @@ from io import BytesIO
 import os
 import sys
 from typing import List, Optional, Union
-
+from datetime import datetime, timezone
 import numpy as np
 from obspy import Stream, Trace, UTCDateTime
 
@@ -121,74 +121,80 @@ class ImagCDFFactory(TimeseriesFactory):
         Note: Parsing from strings is not implemented in this factory.
         """
         raise NotImplementedError('"parse_string" not implemented')
-
     def write_file(self, fh, timeseries: Stream, channels: List[str]):
-        """Write the timeseries data to a file handle in ImagCDF format.
-
-        Parameters:
-        - fh: File handle to write the data.
-        - timeseries: ObsPy Stream containing the geomagnetic data.
-        - channels: List of channels to include in the output file.
-        """
         # Create a temporary file to write the CDF data
         with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
             tmp_file_path = tmp_file.name + ".cdf"
 
         try:
             # Initialize the CDF writer
-            cdf_writer = cdflib.cdfwrite.CDF(tmp_file_path, cdf_spec=None)
+            cdf_writer = cdflib.cdfwrite.CDF(tmp_file_path)
 
-            # Write global attributes (metadata that applies to the entire file)
+            # Write global attributes
             global_attrs = self._create_global_attributes(timeseries, channels)
             cdf_writer.write_globalattrs(global_attrs)
 
-            # Write time variables for each channel
+            # Time variables
             time_vars = self._create_time_stamp_variables(timeseries)
             for ts_name, ts_data in time_vars.items():
+                # Define time variable specification
                 var_spec = {
-                    "Variable": ts_name,
-                    "Data_Type": 33,  # CDF TT2000 data type
-                    "Num_Elements": 1,
-                    "Rec_Vary": True,
-                    "Dim_Sizes": [],
-                    "Var_Type": "zVariable",
+                    'Variable': ts_name,
+                    'Data_Type': 33,  # CDF_TIME_TT2000
+                    'Num_Elements': 1,
+                    'Rec_Vary': True,
+                    'Var_Type': 'zVariable',
+                    'Dim_Sizes': [],
+                    'Sparse': 'no_sparse',
+                    'Compress': 6,
+                    'Pad': None,
                 }
-                print(f"Writing time variable {ts_name} with data length: {len(ts_data)}")
-                cdf_writer.write_var(
-                    var_spec=var_spec,
-                    var_attrs=self._create_time_var_attrs(ts_name),
-                    var_data=ts_data,
-                )
+                # Define time variable attributes
+                var_attrs = self._create_time_var_attrs(ts_name)
+
+                # Write time variable
+                cdf_writer.write_var(var_spec, var_attrs, ts_data)
 
-            # Write geomagnetic data variables
+            # Data variables
             for trace in timeseries:
                 channel = trace.stats.channel
                 var_name = f"GeomagneticField{channel}"
+                data_type = self._get_cdf_data_type(trace)
+                num_elements = 1
+                CDF_CHAR = 51
+                CDF_UCHAR = 52
+                if data_type in [CDF_CHAR, CDF_UCHAR]:  # Handle string types
+                    num_elements = len(trace.data[0]) if len(trace.data) > 0 else 1
+
                 var_spec = {
-                    "Variable": var_name,
-                    "Data_Type": self._get_cdf_data_type(trace),
-                    "Num_Elements": 1,
-                    "Rec_Vary": True,
-                    "Dim_Sizes": [],
-                    "Var_Type": "zVariable",
+                    'Variable': var_name,
+                    'Data_Type': data_type,
+                    'Num_Elements': num_elements,
+                    'Rec_Vary': True,
+                    'Var_Type': 'zVariable',
+                    'Dim_Sizes': [],
+                    'Sparse': 'no_sparse',
+                    'Compress': 6,
+                    'Pad': None,
                 }
-                print(f"Writing data variable {var_name} with data shape: {trace.data.shape}")
-                cdf_writer.write_var(
-                    var_spec=var_spec,
-                    var_attrs=self._create_var_attrs(trace),
-                    var_data=trace.data,
-                )
+
+                var_attrs = self._create_var_attrs(trace)
+
+                # Write data variable
+                cdf_writer.write_var(var_spec, var_attrs, trace.data)
+
+            # Close the CDF writer
+            cdf_writer.close()
 
             # Copy the temporary CDF file to the final file handle
             with open(tmp_file_path, "rb") as tmp:
                 cdf_data = tmp.read()
                 fh.write(cdf_data)
 
-            cdf_writer.close()
-
         finally:
             # Cleanup the temporary file
-            print(tmp_file_path)
+            os.remove(tmp_file_path)
+
 
     def put_timeseries(
         self,
@@ -336,11 +342,11 @@ class ImagCDFFactory(TimeseriesFactory):
             'IagaCode': {0: station},
             'ElementsRecorded': {0: ''.join(channels)},
             'PublicationLevel': {0: publication_level},
-            'PublicationDate': {0: UTCDateTime.now().strftime("%Y-%m-%dT%H:%M:%SZ")},
+            'PublicationDate': {0: [cdflib.cdfepoch.timestamp_to_tt2000(datetime.timestamp(datetime.now(timezone.utc))), "cdf_time_tt2000"]},
             'ObservatoryName': {0: observatory_name},
-            'Latitude': {0: latitude},
-            'Longitude': {0: longitude},
-            'Elevation': {0: elevation},
+            'Latitude': {0: np.array([latitude], dtype=np.float64)},
+            'Longitude': {0: np.array([longitude], dtype=np.float64)},
+            'Elevation': {0: np.array([elevation], dtype=np.float64)},
             'Institution': {0: institution},
             'VectorSensOrient': {0: vector_orientation}, #remove F - because its a calculation, not an element?
             'StandardLevel': {0: 'None'},  # Set to 'None'
@@ -436,7 +442,7 @@ class ImagCDFFactory(TimeseriesFactory):
         return var_spec
 
     def _create_var_attrs(self, trace: Trace) -> dict:
-        print(trace.stats)
+        # print(trace.stats)
         channel = trace.stats.channel
         fieldnam = f"Geomagnetic Field Element {channel}" # â€œGeomagnetic Field Element â€ + the element code or â€œTemperature â€ + the name of the location where the temperature was recorded.
         units = '' # Must be one of â€œnTâ€, â€œDegrees of arcâ€ or â€œCelsiusâ€
-- 
GitLab


From dc9cc949873e649788e69a21dd835b8888b4fdce Mon Sep 17 00:00:00 2001
From: Nicholas Shavers <nshavers@contractor.usgs.gov>
Date: Thu, 5 Dec 2024 11:25:57 -0800
Subject: [PATCH 03/30] minor adjustments for spec. temperature channel
 compatibility.

---
 geomagio/Controller.py     |   5 ++
 geomagio/ImagCDFFactory.py | 112 +++++++++++++++----------------------
 geomagio/api/ws/Element.py |   3 +
 3 files changed, 53 insertions(+), 67 deletions(-)

diff --git a/geomagio/Controller.py b/geomagio/Controller.py
index 564b54c9..40e6b173 100644
--- a/geomagio/Controller.py
+++ b/geomagio/Controller.py
@@ -7,6 +7,7 @@ from typing import List, Optional, Tuple, Union
 
 from obspy.core import Stream, UTCDateTime
 
+from geomagio.ImagCDFFactory import ImagCDFFactory
 
 from .algorithm import Algorithm, algorithms, AlgorithmException, FilterAlgorithm
 from .DerivedTimeseriesFactory import DerivedTimeseriesFactory
@@ -27,6 +28,7 @@ from . import vbf
 from . import xml
 from . import covjson
 from . import netcdf
+from geomagio.ImagCDFFactory import ImagCDFFactory
 
 
 class Controller(object):
@@ -630,6 +632,8 @@ def get_output_factory(args):
         )
     elif output_type == "plot":
         output_factory = PlotTimeseriesFactory()
+    elif output_type == "imagcdf":
+        output_factory = ImagCDFFactory()
     else:
         # stream compatible factories
         if output_type == "binlog":
@@ -1040,6 +1044,7 @@ def parse_args(args):
             "xml",
             "covjson",
             "netcdf",
+            "imagcdf",
         ),
         # TODO: set default to 'iaga2002'
         help="Output format",
diff --git a/geomagio/ImagCDFFactory.py b/geomagio/ImagCDFFactory.py
index 3189d15a..9644c23e 100644
--- a/geomagio/ImagCDFFactory.py
+++ b/geomagio/ImagCDFFactory.py
@@ -6,8 +6,9 @@ The ImagCDF format is based on NASA's Common Data Format (CDF) and is designed
 to store geomagnetic data with high precision.
 
 References:
-- ImagCDF Format Documentation: https://intermagnet.org/docs/technical/im_tn_8_ImagCDF.pdf
-- CDF Library: http://cdf.gsfc.nasa.gov/
+- ImagCDF Technical Note: https://intermagnet.org/docs/technical/im_tn_8_ImagCDF.pdf
+- ImagCDF Official Documentation: https://tech-man.intermagnet.org/latest/appendices/dataformats.html#imagcdfv1-2-intermagnet-exchange-format
+- CDF: http://cdf.gsfc.nasa.gov/
 """
 
 from __future__ import absolute_import, print_function
@@ -20,6 +21,7 @@ import numpy as np
 from obspy import Stream, Trace, UTCDateTime
 
 from geomagio.TimeseriesFactory import TimeseriesFactory
+from geomagio.api.ws.Element import TEMPERATURE_ELEMENTS_ID
 
 from .geomag_types import DataInterval, DataType
 from .TimeseriesFactoryException import TimeseriesFactoryException
@@ -121,6 +123,7 @@ class ImagCDFFactory(TimeseriesFactory):
         Note: Parsing from strings is not implemented in this factory.
         """
         raise NotImplementedError('"parse_string" not implemented')
+    
     def write_file(self, fh, timeseries: Stream, channels: List[str]):
         # Create a temporary file to write the CDF data
         with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
@@ -154,18 +157,24 @@ class ImagCDFFactory(TimeseriesFactory):
 
                 # Write time variable
                 cdf_writer.write_var(var_spec, var_attrs, ts_data)
-
+            
+            
             # Data variables
+            temperature_index = 0
             for trace in timeseries:
                 channel = trace.stats.channel
-                var_name = f"GeomagneticField{channel}"
+                if channel in TEMPERATURE_ELEMENTS_ID:
+                    temperature_index += 1 #MUST INCREMENT INDEX BEFORE USING
+                    var_name = f"Temperature{temperature_index}"
+                else:
+                    var_name = f"GeomagneticField{channel}"
                 data_type = self._get_cdf_data_type(trace)
                 num_elements = 1
                 CDF_CHAR = 51
                 CDF_UCHAR = 52
                 if data_type in [CDF_CHAR, CDF_UCHAR]:  # Handle string types
                     num_elements = len(trace.data[0]) if len(trace.data) > 0 else 1
-
+            
                 var_spec = {
                     'Variable': var_name,
                     'Data_Type': data_type,
@@ -178,7 +187,7 @@ class ImagCDFFactory(TimeseriesFactory):
                     'Pad': None,
                 }
 
-                var_attrs = self._create_var_attrs(trace)
+                var_attrs = self._create_var_attrs(trace, temperature_index)
 
                 # Write data variable
                 cdf_writer.write_var(var_spec, var_attrs, trace.data)
@@ -362,7 +371,8 @@ class ImagCDFFactory(TimeseriesFactory):
     def _create_time_stamp_variables(self, timeseries: Stream) -> dict:
         vector_times = None
         scalar_times = None
-
+        temperature_times = {}
+        temperature_index = 1
         for trace in timeseries:
             channel = trace.stats.channel
             times = [
@@ -371,7 +381,6 @@ class ImagCDFFactory(TimeseriesFactory):
             ]
             # Convert timestamps to TT2000 format required by CDF
             tt2000_times = cdflib.cdfepoch.timestamp_to_tt2000([time.timestamp() for time in times])
-            # tt2000_times = cdflib.cdfepoch.compute_tt2000(times) #this does not work
 
             if channel in self._get_vector_elements():
                 if vector_times is None:
@@ -385,16 +394,21 @@ class ImagCDFFactory(TimeseriesFactory):
                 else:
                     if not np.array_equal(scalar_times, tt2000_times):
                         raise ValueError("Time stamps for scalar channels are not the same.")
-            else:
-                # Handle other channels if necessary
-                pass
+            elif channel in TEMPERATURE_ELEMENTS_ID:
+                ts_key = f"Temperature{temperature_index}Times"
+                if ts_key not in temperature_times:
+                    temperature_times[ts_key] = tt2000_times
+                    temperature_index += 1
+                else:
+                    temperature_times[ts_key] = tt2000_times
 
         time_vars = {}
         if vector_times is not None:
             time_vars['GeomagneticVectorTimes'] = vector_times
         if scalar_times is not None:
             time_vars['GeomagneticScalarTimes'] = scalar_times
-
+        if temperature_times:
+            time_vars.update(temperature_times)
         return time_vars
 
 
@@ -441,8 +455,7 @@ class ImagCDFFactory(TimeseriesFactory):
         }
         return var_spec
 
-    def _create_var_attrs(self, trace: Trace) -> dict:
-        # print(trace.stats)
+    def _create_var_attrs(self, trace: Trace, temperature_index: Optional[int] = None) -> dict:
         channel = trace.stats.channel
         fieldnam = f"Geomagnetic Field Element {channel}" # â€œGeomagnetic Field Element â€ + the element code or â€œTemperature â€ + the name of the location where the temperature was recorded.
         units = '' # Must be one of â€œnTâ€, â€œDegrees of arcâ€ or â€œCelsiusâ€
@@ -454,10 +467,12 @@ class ImagCDFFactory(TimeseriesFactory):
             units = 'Degrees of arc'
             validmin = -90.0 
             validmax = 90.0 #The magnetic field vector can point straight down (+90Â°), horizontal (0Â°), or straight up (-90Â°).
-        elif 'Temperature' in channel:
+        elif channel in TEMPERATURE_ELEMENTS_ID:
             units = 'Celsius'
-            fieldnam = f"Temperature {trace.stats.location}"
-        elif channel == 'F':
+            fieldnam = f"Temperature {temperature_index} {trace.stats.location}"
+            validmin = -273.15
+            validmax = 79_999
+        elif channel in ['F','S']:
             units = 'nT'
             validmin = 0.0 # negative magnetic field intestity not physically meaningful.
             validmax = 79_999.0
@@ -466,12 +481,13 @@ class ImagCDFFactory(TimeseriesFactory):
             validmin = -79_999.0
             validmax = 79_999.0
 
+        # Determine DEPEND_0 based on channel type
         if channel in self._get_vector_elements():
             depend_0 = 'GeomagneticVectorTimes'
         elif channel in self._get_scalar_elements():
             depend_0 = 'GeomagneticScalarTimes'
-        else:
-            depend_0 = None  # Handle other cases if necessary
+        elif channel in TEMPERATURE_ELEMENTS_ID:
+            depend_0 = f"Temperature{temperature_index}Times"
 
     
         var_attrs = {
@@ -486,7 +502,6 @@ class ImagCDFFactory(TimeseriesFactory):
         }
         return var_attrs
 
-
     def _create_time_var_attrs(self, ts_name: str) -> dict:
         """
         Create a dictionary of time variable attributes.
@@ -546,7 +561,7 @@ class ImagCDFFactory(TimeseriesFactory):
         stream = Stream()
         # Read time variables
         time_vars = {}
-        for var in cdf.cdf_info()['zVariables']:
+        for var in cdf.cdf_info().zVariables:
             if var.endswith('Time'):
                 time_data = cdf.varget(var)
                 # Convert TT2000 to UTCDateTime
@@ -554,7 +569,7 @@ class ImagCDFFactory(TimeseriesFactory):
                 time_vars[var] = utc_times
 
         # Read data variables
-        for var in cdf.cdf_info()['zVariables']:
+        for var in cdf.cdf_info().zVariables:
             if not var.endswith('Time'):
                 data = cdf.varget(var)
                 attrs = cdf.varattsget(var)
@@ -635,14 +650,16 @@ class ImagCDFFactory(TimeseriesFactory):
 
         This method constructs the filename based on the ImagCDF naming
         conventions, which include the observatory code, date-time formatted
-        according to the data interval, and the publication level.
+        according to the data interval, and the publication level.  
+        
+        [iaga-code]_[date-time]_[publication-level].cdf
 
         Parameters:
         - observatory: IAGA code of the observatory.
         - date: Start date for the file.
         - type: Data type indicating the processing level.
         - interval: Data interval (e.g., 'minute', 'second').
-        - channels: List of channels (optional).
+        - channels: List of channels (optional). Not Implemented
 
         Returns:
         - The formatted file URL or path.
@@ -653,7 +670,7 @@ class ImagCDFFactory(TimeseriesFactory):
         # Get the publication level for the type
         publication_level = IMCDFPublicationLevel(data_type=type).to_string()
 
-        # Determine filename date format based on interval
+        # Format of Date/Time Portion of Filename based on interval see reference: https://tech-man.intermagnet.org/latest/appendices/dataformats.html#example-data-file:~:text=Format%20of%20Date,%EF%83%81
         if interval == "year":
             date_format = date.strftime("%Y")
         elif interval == "month":
@@ -667,10 +684,9 @@ class ImagCDFFactory(TimeseriesFactory):
         elif interval == "second":
             date_format = date.strftime("%Y%m%d_%H%M%S")
         else:
-            raise ValueError(f"Unsupported interval: {interval}")
+            raise ValueError(f"Unsupported interval: {interval}") #tenhertz currently not supported
 
-        # Default filename following ImagCDF convention
-        # Filename format: [iaga-code]_[date-time]_[publication-level].cdf
+        # Filename following ImagCDF convention, see reference: https://tech-man.intermagnet.org/latest/appendices/dataformats.html#imagcdf-file-names
         filename = f"{observatory.lower()}_{date_format}_{publication_level}.cdf"
 
         # If the urlTemplate explicitly specifies 'stdout', return 'stdout'
@@ -696,7 +712,7 @@ class ImagCDFFactory(TimeseriesFactory):
         }
 
         # Attempt to use the template provided in urlTemplate
-        if "{" in self.urlTemplate and "}" in self.urlTemplate:
+        if "{" in self.urlTemplate and "}" in self.urlTemplate: 
             try:
                 return self.urlTemplate.format(**params)
             except KeyError as e:
@@ -714,44 +730,6 @@ class ImagCDFFactory(TimeseriesFactory):
             f"Unsupported URL scheme in urlTemplate: {self.urlTemplate}"
         )
 
-    # Placeholder methods for interval and type names/abbreviations
-    def _get_interval_abbreviation(self, interval: DataInterval) -> str:
-        """Get the abbreviation for the data interval."""
-        abbreviations = {
-            "year": "yr",
-            "month": "mon",
-            "day": "day",
-            "hour": "hr",
-            "minute": "min",
-            "second": "sec",
-        }
-        return abbreviations.get(interval, "min")
-
-    def _get_interval_name(self, interval: DataInterval) -> str:
-        """Get the full name for the data interval."""
-        names = {
-            "year": "yearly",
-            "month": "monthly",
-            "day": "daily",
-            "hour": "hourly",
-            "minute": "minute",
-            "second": "second",
-        }
-        return names.get(interval, "minute")
-
-    def _get_type_name(self, type: DataType) -> str:
-        """Get the full name for the data type."""
-        type_names = {
-            "variation": "variation",
-            "definitive": "definitive",
-            "quasi-definitive": "quasi-definitive",
-            "provisional": "provisional",
-            "adjusted": "adjusted",
-            "none": "none",
-        }
-        return type_names.get(type, "variation")
-
-
     def _get_vector_elements(self):
         return {'X', 'Y', 'Z', 'H', 'D', 'E', 'V', 'I', 'F'}
     
diff --git a/geomagio/api/ws/Element.py b/geomagio/api/ws/Element.py
index ae5c4ead..25b876c5 100644
--- a/geomagio/api/ws/Element.py
+++ b/geomagio/api/ws/Element.py
@@ -45,3 +45,6 @@ ELEMENTS = [
 ]
 
 ELEMENT_INDEX = {e.id: e for e in ELEMENTS}
+
+
+TEMPERATURE_ELEMENTS_ID = [element.id for element in ELEMENTS if "Temperature" in element.name]
\ No newline at end of file
-- 
GitLab


From 41e88066399371437ef80b805a7e4b3e8ce4bc74 Mon Sep 17 00:00:00 2001
From: Nicholas Shavers <nshavers@contractor.usgs.gov>
Date: Thu, 5 Dec 2024 13:05:31 -0800
Subject: [PATCH 04/30] use DataTimes variable name on non-unique time sets

---
 geomagio/ImagCDFFactory.py | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/geomagio/ImagCDFFactory.py b/geomagio/ImagCDFFactory.py
index 9644c23e..157ad0cf 100644
--- a/geomagio/ImagCDFFactory.py
+++ b/geomagio/ImagCDFFactory.py
@@ -19,6 +19,7 @@ from typing import List, Optional, Union
 from datetime import datetime, timezone
 import numpy as np
 from obspy import Stream, Trace, UTCDateTime
+from sqlalchemy import true
 
 from geomagio.TimeseriesFactory import TimeseriesFactory
 from geomagio.api.ws.Element import TEMPERATURE_ELEMENTS_ID
@@ -87,6 +88,8 @@ class ImagCDFFactory(TimeseriesFactory):
     This class extends the TimeseriesFactory to support writing geomagnetic
     time series data to files in the ImagCDF format using the cdflib library.
     """
+    
+    isUniqueTimes=True #used to determine depend_0 and CDF Time Variable Name
 
     def __init__(
         self,
@@ -138,7 +141,7 @@ class ImagCDFFactory(TimeseriesFactory):
             cdf_writer.write_globalattrs(global_attrs)
 
             # Time variables
-            time_vars = self._create_time_stamp_variables(timeseries)
+            time_vars = self._create_time_stamp_variables(timeseries) #modifies self.isUniqueTimes
             for ts_name, ts_data in time_vars.items():
                 # Define time variable specification
                 var_spec = {
@@ -186,8 +189,7 @@ class ImagCDFFactory(TimeseriesFactory):
                     'Compress': 6,
                     'Pad': None,
                 }
-
-                var_attrs = self._create_var_attrs(trace, temperature_index)
+                var_attrs = self._create_var_attrs(trace, temperature_index, self.isUniqueTimes)
 
                 # Write data variable
                 cdf_writer.write_var(var_spec, var_attrs, trace.data)
@@ -409,7 +411,15 @@ class ImagCDFFactory(TimeseriesFactory):
             time_vars['GeomagneticScalarTimes'] = scalar_times
         if temperature_times:
             time_vars.update(temperature_times)
-        return time_vars
+            
+        last_times = []
+        self.isUniqueTimes = len(time_vars) == 1 #true if only one set of times, else default to false.
+        for index, times in enumerate(time_vars.values()):
+            if index > 0:
+                self.isUniqueTimes = not np.array_equal(last_times, times)
+            last_times = times
+        
+        return time_vars if self.isUniqueTimes else {"DataTimes": last_times}
 
 
     def _create_var_spec(
@@ -455,7 +465,7 @@ class ImagCDFFactory(TimeseriesFactory):
         }
         return var_spec
 
-    def _create_var_attrs(self, trace: Trace, temperature_index: Optional[int] = None) -> dict:
+    def _create_var_attrs(self, trace: Trace, temperature_index: Optional[int] = None, isUniqueTimes: Optional[bool] = True) -> dict:
         channel = trace.stats.channel
         fieldnam = f"Geomagnetic Field Element {channel}" # â€œGeomagnetic Field Element â€ + the element code or â€œTemperature â€ + the name of the location where the temperature was recorded.
         units = '' # Must be one of â€œnTâ€, â€œDegrees of arcâ€ or â€œCelsiusâ€
@@ -496,7 +506,7 @@ class ImagCDFFactory(TimeseriesFactory):
             'FILLVAL': 99999.0,
             'VALIDMIN': validmin,
             'VALIDMAX': validmax,
-            'DEPEND_0': depend_0,
+            'DEPEND_0': depend_0 if isUniqueTimes else "DataTimes",
             'DISPLAY_TYPE': 'time_series',
             'LABLAXIS': channel,
         }
-- 
GitLab


From 68798d807806a6d681af5b93d770fd4e23e9ce92 Mon Sep 17 00:00:00 2001
From: Nicholas Shavers <nshavers@contractor.usgs.gov>
Date: Thu, 5 Dec 2024 15:51:48 -0800
Subject: [PATCH 05/30] with compression at the file level

---
 geomagio/ImagCDFFactory.py | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/geomagio/ImagCDFFactory.py b/geomagio/ImagCDFFactory.py
index 157ad0cf..0acf6ece 100644
--- a/geomagio/ImagCDFFactory.py
+++ b/geomagio/ImagCDFFactory.py
@@ -129,12 +129,20 @@ class ImagCDFFactory(TimeseriesFactory):
     
     def write_file(self, fh, timeseries: Stream, channels: List[str]):
         # Create a temporary file to write the CDF data
-        with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
-            tmp_file_path = tmp_file.name + ".cdf"
+        with tempfile.NamedTemporaryFile(delete=False, suffix='.cdf') as tmp_file:
+            tmp_file_path = tmp_file.name
 
         try:
             # Initialize the CDF writer
-            cdf_writer = cdflib.cdfwrite.CDF(tmp_file_path)
+            cdf_spec = {
+                'Compressed': 9,          # Enable compression (0-9)
+                # 'Majority': 'row_major',        # Data layout - gets set automatically
+                # 'Encoding': 'ibmpc',      # Corrupts CDF - gets set automatically
+                'Checksum': True,        # Disable checksum for faster writes (optional)
+                'rDim_sizes': [],         # Applicable only if using rVariables - CDF protocol recommends only using zVariables. 
+            }
+
+            cdf_writer = cdflib.cdfwrite.CDF(path=tmp_file_path, cdf_spec=cdf_spec, delete=True)
 
             # Write global attributes
             global_attrs = self._create_global_attributes(timeseries, channels)
@@ -206,7 +214,6 @@ class ImagCDFFactory(TimeseriesFactory):
             # Cleanup the temporary file
             os.remove(tmp_file_path)
 
-
     def put_timeseries(
         self,
         timeseries: Stream,
@@ -288,7 +295,7 @@ class ImagCDFFactory(TimeseriesFactory):
                         # Read existing data to merge with new data
                         existing_cdf = cdflib.cdfread.CDF(url_file)
                         existing_stream = self._read_cdf(existing_cdf)
-                        existing_cdf.close()
+                        # existing_cdf.close() #no close method?
                         existing_data = existing_stream
 
                         # Merge existing data with new data
@@ -480,7 +487,7 @@ class ImagCDFFactory(TimeseriesFactory):
         elif channel in TEMPERATURE_ELEMENTS_ID:
             units = 'Celsius'
             fieldnam = f"Temperature {temperature_index} {trace.stats.location}"
-            validmin = -273.15
+            validmin = -273.15 #absolute zero
             validmax = 79_999
         elif channel in ['F','S']:
             units = 'nT'
-- 
GitLab


From a0403695353a53a9be004df2c4b2e6ef1f63424e Mon Sep 17 00:00:00 2001
From: Nicholas Shavers <nshavers@contractor.usgs.gov>
Date: Fri, 6 Dec 2024 15:43:01 -0800
Subject: [PATCH 06/30] poc for reading cdf into obspy stream. working for imag
 to iaga

---
 geomagio/Controller.py     |   3 +
 geomagio/ImagCDFFactory.py | 296 +++++++++++++++++++++++++++----------
 2 files changed, 219 insertions(+), 80 deletions(-)

diff --git a/geomagio/Controller.py b/geomagio/Controller.py
index 40e6b173..96ebb28c 100644
--- a/geomagio/Controller.py
+++ b/geomagio/Controller.py
@@ -547,6 +547,8 @@ def get_input_factory(args):
             locationCode=args.locationcode,
             **input_factory_args,
         )
+    elif input_type == "imagcdf":
+        input_factory = ImagCDFFactory(**input_factory_args)
     else:
         # stream compatible factories
         if input_type == "iaga2002":
@@ -837,6 +839,7 @@ def parse_args(args):
             "xml",
             "covjson",
             "netcdf",
+            "imagcdf"
         ),
         default="edge",
         help='Input format (Default "edge")',
diff --git a/geomagio/ImagCDFFactory.py b/geomagio/ImagCDFFactory.py
index 0acf6ece..6ba44f3e 100644
--- a/geomagio/ImagCDFFactory.py
+++ b/geomagio/ImagCDFFactory.py
@@ -8,7 +8,8 @@ to store geomagnetic data with high precision.
 References:
 - ImagCDF Technical Note: https://intermagnet.org/docs/technical/im_tn_8_ImagCDF.pdf
 - ImagCDF Official Documentation: https://tech-man.intermagnet.org/latest/appendices/dataformats.html#imagcdfv1-2-intermagnet-exchange-format
-- CDF: http://cdf.gsfc.nasa.gov/
+- CDF User Guide: https://spdf.gsfc.nasa.gov/pub/software/cdf/doc/cdf_User_Guide.pdf
+- CDFLib Docs: [https://cdflib.readthedocs.io/en/latest/#, https://cdflib.readthedocs.io/en/stable/modules/index.html] 
 """
 
 from __future__ import absolute_import, print_function
@@ -16,7 +17,7 @@ from io import BytesIO
 import os
 import sys
 from typing import List, Optional, Union
-from datetime import datetime, timezone
+from datetime import datetime, timezone, tzinfo
 import numpy as np
 from obspy import Stream, Trace, UTCDateTime
 from sqlalchemy import true
@@ -330,6 +331,84 @@ class ImagCDFFactory(TimeseriesFactory):
                 # Unsupported URL scheme encountered
                 raise TimeseriesFactoryException("Unsupported URL scheme in urlTemplate")
 
+    def get_timeseries(
+        self,
+        starttime: UTCDateTime,
+        endtime: UTCDateTime,
+        add_empty_channels: bool = True,
+        observatory: Optional[str] = None,
+        channels: Optional[List[str]] = None,
+        type: Optional[DataType] = None,
+        interval: Optional[DataInterval] = None,
+    ) -> Stream:
+        observatory = observatory or self.observatory
+        channels = channels or self.channels
+        type = type or self.type
+        interval = interval or self.interval
+
+        timeseries = Stream()
+        urlIntervals = Util.get_intervals(
+            starttime=starttime, endtime=endtime, size=self.urlInterval
+        )
+
+        for urlInterval in urlIntervals:
+            url = self._get_url(
+                observatory=observatory,
+                date=urlInterval["start"],
+                type=type,
+                interval=interval,
+                channels=channels,
+            )
+            if url == 'stdout':
+                continue  # stdout is not a valid input source
+            if not url.startswith("file://"):
+                raise TimeseriesFactoryException("Only file urls are supported for reading ImagCDF")
+
+            url_file = Util.get_file_from_url(url, createParentDirectory=False)
+            if not os.path.isfile(url_file):
+                # If file doesn't exist, skip
+                continue
+
+            try:
+                # Read CDF data and merge
+                cdf = cdflib.cdfread.CDF(url_file)
+                file_stream = self._read_cdf(cdf)
+                # Attempt to select only requested channels
+                selected = Stream()
+                for ch in channels:
+                    selected += file_stream.select(channel=ch)
+                timeseries += selected
+            except Exception as e:
+                print(f"Error reading CDF file '{url_file}': {e}", file=sys.stderr)
+
+        # After reading all intervals, merge and trim
+        timeseries.merge(fill_value=np.nan)
+        timeseries.trim(
+            starttime=starttime,
+            endtime=endtime,
+            nearest_sample=False,
+            pad=True,
+            fill_value=np.nan,
+        )
+
+        # If requested, add empty channels not present in data
+        if add_empty_channels:
+            present_channels = {tr.stats.channel for tr in timeseries}
+            missing_channels = set(channels) - present_channels
+            for ch in missing_channels:
+                empty_trace = self._get_empty_trace(
+                    starttime=starttime,
+                    endtime=endtime,
+                    observatory=observatory,
+                    channel=ch,
+                    data_type=type,
+                    interval=interval,
+                )
+                timeseries += empty_trace
+
+        timeseries.sort()
+        return timeseries
+
     def _create_global_attributes(self, timeseries: Stream, channels: List[str]) -> dict:
         """
         Create a dictionary of global attributes for the ImagCDF file.
@@ -344,14 +423,19 @@ class ImagCDFFactory(TimeseriesFactory):
         stats = timeseries[0].stats if len(timeseries) > 0 else None
 
         # Extract metadata from stats or fallback to defaults
-        observatory_name = getattr(stats, 'station_name', None) or self.observatory or "Unknown Observatory"
-        station = getattr(stats, 'station', None) or "Unknown Iaga Code"
-        institution = getattr(stats, 'agency_name', None) or "Unknown Institution"
+        observatory_name = getattr(stats, 'station_name', None) or self.observatory or ""
+        station = getattr(stats, 'station', None) or ""
+        institution = getattr(stats, 'agency_name', None) or ""
         latitude = getattr(stats, 'geodetic_latitude', None) or 0.0
         longitude = getattr(stats, 'geodetic_longitude', None) or 0.0
         elevation = getattr(stats, 'elevation', None) or 99_999.0
+        conditions_of_use = getattr(stats, 'conditions_of_use', None) or ""
         vector_orientation = getattr(stats, 'sensor_orientation', None) or ""
         data_interval_type = getattr(stats, 'data_interval_type', None) or self.interval
+        data_type = getattr(stats, 'data_type', None) or "variation"
+        sensor_sampling_rate = getattr(stats, 'sensor_sampling_rate', None) or 0.0
+        comments = getattr(stats, 'filter_comments', None) or ['']
+        declination_base = getattr(stats, 'declination_base', None) or 0.0
         publication_level = IMCDFPublicationLevel(data_type=self.type).to_string()
         global_attrs = {
             'FormatDescription': {0: 'INTERMAGNET CDF Format'},
@@ -370,10 +454,14 @@ class ImagCDFFactory(TimeseriesFactory):
             'StandardLevel': {0: 'None'},  # Set to 'None'
             # Temporarily Omit 'StandardName', 'StandardVersion', 'PartialStandDesc'
             'Source': {0: 'institute'}, # "institute" - if the named institution provided the data, â€œINTERMAGNETâ€ - if the data file has been created by INTERMAGNET from another data source, â€œWDCâ€ - if the World Data Centre has created the file from another data source
-            # 'TermsOfUse': {0: self.getINTERMAGNETTermsOfUse()},
+            'TermsOfUse': {0: conditions_of_use},
             # 'UniqueIdentifier': {0: ''},
             # 'ParentIdentifiers': {0: ''},
             # 'ReferenceLinks': {0: ''}, #links to /ws, plots, USGS.gov 
+            'SensorSamplingRate': {0: sensor_sampling_rate},            
+            'DataType': {0: data_type},
+            'Comments': {0: comments},
+            'DeclinationBase': {0: declination_base},
         }
         return global_attrs
 
@@ -382,10 +470,12 @@ class ImagCDFFactory(TimeseriesFactory):
         scalar_times = None
         temperature_times = {}
         temperature_index = 1
+        print(timeseries[0].stats)
+
         for trace in timeseries:
             channel = trace.stats.channel
             times = [
-                (trace.stats.starttime + trace.stats.delta * i).datetime
+                (trace.stats.starttime + trace.stats.delta * i).datetime.replace(tzinfo=timezone.utc)
                 for i in range(trace.stats.npts)
             ]
             # Convert timestamps to TT2000 format required by CDF
@@ -473,7 +563,7 @@ class ImagCDFFactory(TimeseriesFactory):
         return var_spec
 
     def _create_var_attrs(self, trace: Trace, temperature_index: Optional[int] = None, isUniqueTimes: Optional[bool] = True) -> dict:
-        channel = trace.stats.channel
+        channel = trace.stats.channel.upper()
         fieldnam = f"Geomagnetic Field Element {channel}" # â€œGeomagnetic Field Element â€ + the element code or â€œTemperature â€ + the name of the location where the temperature was recorded.
         units = '' # Must be one of â€œnTâ€, â€œDegrees of arcâ€ or â€œCelsiusâ€
         if channel == 'D':
@@ -516,6 +606,7 @@ class ImagCDFFactory(TimeseriesFactory):
             'DEPEND_0': depend_0 if isUniqueTimes else "DataTimes",
             'DISPLAY_TYPE': 'time_series',
             'LABLAXIS': channel,
+            'DATA_INTERVAL_TYPE': trace.stats.data_interval_type 
         }
         return var_attrs
 
@@ -576,83 +667,126 @@ class ImagCDFFactory(TimeseriesFactory):
         - An ObsPy Stream containing the data from the CDF file.
         """
         stream = Stream()
-        # Read time variables
+
+        # Extract global attributes
+        global_attrs = cdf.globalattsget()
+        
+        # Map global attributes to Stream-level metadata
+        observatory = global_attrs.get('IagaCode', [''])[0]
+        station_name = global_attrs.get('ObservatoryName', [''])[0]
+        institution = global_attrs.get('Institution', [''])[0]
+        latitude = global_attrs.get('Latitude', [0.0])[0]
+        longitude = global_attrs.get('Longitude', [0.0])[0]
+        elevation = global_attrs.get('Elevation', [99_999.0])[0]
+        sensor_sampling_rate = global_attrs.get('SensorSamplingRate', [0.0])[0]
+        sensor_orientation = global_attrs.get('VectorSensOrient', [''])[0]
+        data_type = global_attrs.get('DataType', ['variation'])[0]
+        publication_level = global_attrs.get('PublicationLevel', ['1'])[0]
+        comments = global_attrs.get('Comments', [''])
+        terms_of_use = global_attrs.get('TermsOfUse', [''])[0]
+        declination_base = global_attrs.get('DeclinationBase', [0.0])[0]
+
+        # Identify all time variables
         time_vars = {}
         for var in cdf.cdf_info().zVariables:
-            if var.endswith('Time'):
+            if var.lower().endswith('times'):
                 time_data = cdf.varget(var)
-                # Convert TT2000 to UTCDateTime
-                utc_times = [UTCDateTime(t) for t in cdflib.cdfepoch.to_datetime(time_data)]
+                unix_times = cdflib.cdfepoch.unixtime(time_data)
+                utc_times = [UTCDateTime(t) for t in unix_times]
                 time_vars[var] = utc_times
 
-        # Read data variables
+        # Read data variables and associate them with time variables
         for var in cdf.cdf_info().zVariables:
-            if not var.endswith('Time'):
-                data = cdf.varget(var)
-                attrs = cdf.varattsget(var)
-                if 'DEPEND_0' in attrs:
-                    ts_name = attrs['DEPEND_0']
-                    if ts_name in time_vars:
-                        times = time_vars[ts_name]
-                        if len(times) > 1:
-                            delta = times[1] - times[0]  # Calculate sample interval
-                        else:
-                            delta = 60 if self.interval == 'minute' else 1
-                        trace = Trace(
-                            data=data,
-                            header={
-                                'station': self.observatory,
-                                'channel': var,
-                                'starttime': times[0],
-                                'delta': delta,
-                            }
-                        )
-                        stream += trace
-        return stream
-
-    @staticmethod
-    def getINTERMAGNETTermsOfUse() -> str:
-        """
-        Return the INTERMAGNET Terms of Use.
-
-        These terms should be included in the 'TermsOfUse' global attribute
-        as per the ImagCDF specification.
+            # Skip time variables
+            if var.lower().endswith('times'):
+                continue
+
+            data = cdf.varget(var)
+            attrs = cdf.varattsget(var)
+
+            # Determine DEPEND_0 (the time variable name) and validate
+            ts_name = attrs.get('DEPEND_0')
+            if not ts_name:
+                # If no DEPEND_0, skip this variable as we cannot map times
+                continue
+
+            # If we used a DataTimes fallback or similar, ensure we handle it case-insensitively
+            # and also confirm that time_vars keys are checked properly.
+            # The ImagCDF can have DataTimes, GeomagneticVectorTimes, etc.
+            # So try exact match first, if not found, attempt case-insensitive.
+            matched_time_key = None
+            for tkey in time_vars.keys():
+                if tkey == ts_name:
+                    matched_time_key = tkey
+                    break
+                if tkey.lower() == ts_name.lower():
+                    matched_time_key = tkey
+                    break
+
+            if matched_time_key not in time_vars:
+                # If we cannot find the matching time variable, skip this variable
+                continue
+
+            times = time_vars[matched_time_key]
+
+            # Determine delta (sample interval)
+            if len(times) > 1:
+                # delta as a float of seconds
+                delta = (times[1].timestamp - times[0].timestamp)
+            else:
+                # if only one sample, use default based on interval
+                # convert minute, second, etc. to numeric delta
+                if self.interval == 'minute':
+                    delta = 60.0
+                elif self.interval == 'second':
+                    delta = 1.0
+                elif self.interval == 'hour':
+                    delta = 3600.0
+                else:
+                    # fallback, set delta to 60
+                    delta = 60.0
+
+            # Map the variable name back to a standard channel code
+            # Geomagnetic fields are named like GeomagneticFieldH, GeomagneticFieldD, etc.
+            # Temperatures are named like Temperature1, Temperature2, ...
+            # Extract channel name by removing known prefixes
+            if var.startswith("GeomagneticField"):
+                channel = var.replace("GeomagneticField", "")
+            elif var.startswith("Temperature"):
+                # Temperature variables may not map directly to a geomagnetic channel
+                # but to temperature sensors. We can just use the label from LABLAXIS if needed
+                channel = attrs.get('LABLAXIS', var)
+            else:
+                # fallback if naming doesn't match expected patterns
+                channel = var
+
+            time_attrs =cdf.varattsget(var)
+            data_interval = time_attrs.get('DATA_INTERVAL_TYPE', [''])
+            # Create a trace
+            trace = Trace(
+                data=data,
+                header={
+                    'station': observatory,
+                    'channel': channel,
+                    'starttime': times[0],
+                    'delta': delta,
+                    'geodetic_latitude': latitude,
+                    'geodetic_longitude': longitude,
+                    'elevation': elevation,
+                    'sensor_orientation': "".join(sensor_orientation),
+                    'data_type': data_type,
+                    'station_name': station_name,
+                    'agency_name': institution,
+                    'conditions_of_use': terms_of_use,
+                    'sensor_sampling_rate': sensor_sampling_rate, 
+                    'data_interval_type': data_interval,
+                    'declination_base': declination_base,
+                    'filter_comments': comments 
+                }
+            )
+            stream += trace
 
-        Reference:
-        - ImagCDF Documentation Section 4.5: Attributes that Relate to Publication of the Data
-        """
-        return (
-            "CONDITIONS OF USE FOR DATA PROVIDED THROUGH INTERMAGNET:\n"
-            "The data made available through INTERMAGNET are provided for\n"
-            "your use and are not for commercial use or sale or distribution\n"
-            "to third parties without the written permission of the institute\n"
-            "(http://www.intermagnet.org/Institutes_e.html) operating\n"
-            "the observatory. Publications making use of the data\n"
-            "should include an acknowledgment statement of the form given below.\n"
-            "A citation reference should be sent to the INTERMAGNET Secretary\n"
-            "(secretary@intermagnet.org) for inclusion in a publications list\n"
-            "on the INTERMAGNET website.\n"
-            "\n"
-            "     ACKNOWLEDGEMENT OF DATA FROM OBSERVATORIES\n"
-            "     PARTICIPATING IN INTERMAGNET\n"
-            "We offer two acknowledgement templates. The first is for cases\n"
-            "where data from many observatories have been used and it is not\n"
-            "practical to list them all, or each of their operating institutes.\n"
-            "The second is for cases where research results have been produced\n"
-            "using a smaller set of observatories.\n"
-            "\n"
-            "     Suggested Acknowledgement Text (template 1)\n"
-            "The results presented in this paper rely on data collected\n"
-            "at magnetic observatories. We thank the national institutes that\n"
-            "support them and INTERMAGNET for promoting high standards of\n"
-            "magnetic observatory practice (www.intermagnet.org).\n"
-            "\n"
-            "     Suggested Acknowledgement Text (template 2)\n"
-            "The results presented in this paper rely on the data\n"
-            "collected at <observatory name>. We thank <institute name>,\n"
-            "for supporting its operation and INTERMAGNET for promoting high\n"
-            "standards of magnetic observatory practice (www.intermagnet.org).\n"
-        )
+        return stream
 
     def _get_url(
         self,
@@ -725,7 +859,7 @@ class ImagCDFFactory(TimeseriesFactory):
             "julian": date.strftime("%j"),
             "year": date.strftime("%Y"),
             "ymd": date.strftime("%Y%m%d"),
-            "dt": date_format,  # Add the date-time formatted string
+            "dt": date_format,
         }
 
         # Attempt to use the template provided in urlTemplate
@@ -751,4 +885,6 @@ class ImagCDFFactory(TimeseriesFactory):
         return {'X', 'Y', 'Z', 'H', 'D', 'E', 'V', 'I', 'F'}
     
     def _get_scalar_elements(self):
-        return {'S', 'G'} 
\ No newline at end of file
+        return {'S', 'G'} 
+
+
-- 
GitLab


From ee606ebb6445d443f368c12d3b931893ecbc7d16 Mon Sep 17 00:00:00 2001
From: Nicholas Shavers <nshavers@contractor.usgs.gov>
Date: Mon, 9 Dec 2024 13:10:09 -0800
Subject: [PATCH 07/30] general clean up

---
 geomagio/ImagCDFFactory.py | 56 ++++++++++++++++++--------------------
 1 file changed, 26 insertions(+), 30 deletions(-)

diff --git a/geomagio/ImagCDFFactory.py b/geomagio/ImagCDFFactory.py
index 6ba44f3e..3d8ba136 100644
--- a/geomagio/ImagCDFFactory.py
+++ b/geomagio/ImagCDFFactory.py
@@ -31,6 +31,8 @@ from . import TimeseriesUtility
 from . import Util
 
 import cdflib
+from cdflib.cdfwrite import CDF as CDFWriter
+from cdflib.cdfread import CDF as CDFReader
 import tempfile
 
 
@@ -48,7 +50,7 @@ class IMCDFPublicationLevel:
         4: Definitive data with no further changes expected.
 
     Reference:
-    - ImagCDF Documentation Section 4.2: Attributes that Uniquely Identify the Data
+    - ImagCDF Technical Documentation: Attributes that Uniquely Identify the Data
     """
 
     class PublicationLevel:
@@ -137,13 +139,13 @@ class ImagCDFFactory(TimeseriesFactory):
             # Initialize the CDF writer
             cdf_spec = {
                 'Compressed': 9,          # Enable compression (0-9)
-                # 'Majority': 'row_major',        # Data layout - gets set automatically
-                # 'Encoding': 'ibmpc',      # Corrupts CDF - gets set automatically
+                'Majority': CDFWriter.ROW_MAJOR,        # Data layout - gets set automatically
+                'Encoding': CDFWriter.IBMPC_ENCODING, #  gets set automatically
                 'Checksum': True,        # Disable checksum for faster writes (optional)
                 'rDim_sizes': [],         # Applicable only if using rVariables - CDF protocol recommends only using zVariables. 
             }
 
-            cdf_writer = cdflib.cdfwrite.CDF(path=tmp_file_path, cdf_spec=cdf_spec, delete=True)
+            cdf_writer = CDFWriter(path=tmp_file_path, cdf_spec=cdf_spec, delete=True)
 
             # Write global attributes
             global_attrs = self._create_global_attributes(timeseries, channels)
@@ -155,13 +157,13 @@ class ImagCDFFactory(TimeseriesFactory):
                 # Define time variable specification
                 var_spec = {
                     'Variable': ts_name,
-                    'Data_Type': 33,  # CDF_TIME_TT2000
+                    'Data_Type': CDFWriter.CDF_TIME_TT2000,  # CDF_TIME_TT2000
                     'Num_Elements': 1,
                     'Rec_Vary': True,
                     'Var_Type': 'zVariable',
                     'Dim_Sizes': [],
                     'Sparse': 'no_sparse',
-                    'Compress': 6,
+                    'Compress': 9,
                     'Pad': None,
                 }
                 # Define time variable attributes
@@ -182,9 +184,7 @@ class ImagCDFFactory(TimeseriesFactory):
                     var_name = f"GeomagneticField{channel}"
                 data_type = self._get_cdf_data_type(trace)
                 num_elements = 1
-                CDF_CHAR = 51
-                CDF_UCHAR = 52
-                if data_type in [CDF_CHAR, CDF_UCHAR]:  # Handle string types
+                if data_type in [CDFWriter.CDF_CHAR, CDFWriter.CDF_UCHAR]:  # Handle string types
                     num_elements = len(trace.data[0]) if len(trace.data) > 0 else 1
             
                 var_spec = {
@@ -195,7 +195,7 @@ class ImagCDFFactory(TimeseriesFactory):
                     'Var_Type': 'zVariable',
                     'Dim_Sizes': [],
                     'Sparse': 'no_sparse',
-                    'Compress': 6,
+                    'Compress': 9,
                     'Pad': None,
                 }
                 var_attrs = self._create_var_attrs(trace, temperature_index, self.isUniqueTimes)
@@ -294,7 +294,7 @@ class ImagCDFFactory(TimeseriesFactory):
                 if os.path.isfile(url_file):
                     try:
                         # Read existing data to merge with new data
-                        existing_cdf = cdflib.cdfread.CDF(url_file)
+                        existing_cdf = CDFReader(url_file)
                         existing_stream = self._read_cdf(existing_cdf)
                         # existing_cdf.close() #no close method?
                         existing_data = existing_stream
@@ -371,7 +371,7 @@ class ImagCDFFactory(TimeseriesFactory):
 
             try:
                 # Read CDF data and merge
-                cdf = cdflib.cdfread.CDF(url_file)
+                cdf = CDFReader(url_file)
                 file_stream = self._read_cdf(cdf)
                 # Attempt to select only requested channels
                 selected = Stream()
@@ -418,7 +418,7 @@ class ImagCDFFactory(TimeseriesFactory):
         descriptions.
 
         References:
-        - ImagCDF Documentation Section 4: ImagCDF Global Attributes
+        - ImagCDF Technical Documentation: ImagCDF Global Attributes
         """
         stats = timeseries[0].stats if len(timeseries) > 0 else None
 
@@ -458,10 +458,10 @@ class ImagCDFFactory(TimeseriesFactory):
             # 'UniqueIdentifier': {0: ''},
             # 'ParentIdentifiers': {0: ''},
             # 'ReferenceLinks': {0: ''}, #links to /ws, plots, USGS.gov 
-            'SensorSamplingRate': {0: sensor_sampling_rate},            
-            'DataType': {0: data_type},
-            'Comments': {0: comments},
-            'DeclinationBase': {0: declination_base},
+            'SensorSamplingRate': {0: sensor_sampling_rate}, #Optional            
+            'DataType': {0: data_type},#Optional
+            'Comments': {0: comments}, #Optional
+            'DeclinationBase': {0: declination_base}, #Optional
         }
         return global_attrs
 
@@ -470,7 +470,6 @@ class ImagCDFFactory(TimeseriesFactory):
         scalar_times = None
         temperature_times = {}
         temperature_index = 1
-        print(timeseries[0].stats)
 
         for trace in timeseries:
             channel = trace.stats.channel
@@ -615,9 +614,9 @@ class ImagCDFFactory(TimeseriesFactory):
         Create a dictionary of time variable attributes.
 
         These attributes provide metadata for time variables.
-        Note: None of these attributes are required for the time stamp variables GeomagneticVectorTimes and GeomagneticScalarTimes.
+        Note: None of these attributes are required for the time stamp variables.
         Reference:
-        - ImagCDF Documentation Section 3: ImagCDF Data
+        - ImagCDF Technical Documentation: ImagCDF Data
         """
         # var_attrs = {
             # 'UNITS': 'TT2000',
@@ -639,21 +638,18 @@ class ImagCDFFactory(TimeseriesFactory):
         - CDF_INT4 (41) for integer data.
 
         Reference:
-        - CDF Data Types: http://cdf.gsfc.nasa.gov/html/cdfdatatypes.html
+        - See CDF for more data types
         """
-        # CDF data type constants
-        CDF_DOUBLE = 45  # CDF_DOUBLE corresponds to 64-bit float
-        CDF_INT4 = 41    # CDF_INT4 corresponds to 32-bit int
 
         if trace.data.dtype in [np.float32, np.float64]:
-            return CDF_DOUBLE
+            return CDFWriter.CDF_DOUBLE
         elif trace.data.dtype in [np.int32, np.int64]:
-            return CDF_INT4
+            return CDFWriter.CDF_INT4
         else:
             # Default to double precision float
-            return CDF_DOUBLE
+            return CDFWriter.CDF_DOUBLE
 
-    def _read_cdf(self, cdf: cdflib.cdfread.CDF) -> Stream:
+    def _read_cdf(self, cdf: CDFReader) -> Stream:
         """
         Read CDF data into an ObsPy Stream.
 
@@ -677,7 +673,7 @@ class ImagCDFFactory(TimeseriesFactory):
         institution = global_attrs.get('Institution', [''])[0]
         latitude = global_attrs.get('Latitude', [0.0])[0]
         longitude = global_attrs.get('Longitude', [0.0])[0]
-        elevation = global_attrs.get('Elevation', [99_999.0])[0]
+        elevation = global_attrs.get('Elevation', [99_999.0])[0] #default to 99_999 per technical documents.
         sensor_sampling_rate = global_attrs.get('SensorSamplingRate', [0.0])[0]
         sensor_orientation = global_attrs.get('VectorSensOrient', [''])[0]
         data_type = global_attrs.get('DataType', ['variation'])[0]
@@ -816,7 +812,7 @@ class ImagCDFFactory(TimeseriesFactory):
         - The formatted file URL or path.
 
         Reference:
-        - ImagCDF Documentation Section 5: ImagCDF File Names
+        - ImagCDF Technical Documentation: ImagCDF File Names
         """
         # Get the publication level for the type
         publication_level = IMCDFPublicationLevel(data_type=type).to_string()
-- 
GitLab


From efa2c0dba84474303572fee39926c03575049b85 Mon Sep 17 00:00:00 2001
From: Nicholas Shavers <nshavers@contractor.usgs.gov>
Date: Mon, 9 Dec 2024 13:15:17 -0800
Subject: [PATCH 08/30] mvp for reading writing imagcdf

---
 geomagio/ImagCDFFactory.py | 407 +++++++++++++++++++++----------------
 1 file changed, 228 insertions(+), 179 deletions(-)

diff --git a/geomagio/ImagCDFFactory.py b/geomagio/ImagCDFFactory.py
index 3d8ba136..ea947903 100644
--- a/geomagio/ImagCDFFactory.py
+++ b/geomagio/ImagCDFFactory.py
@@ -91,8 +91,8 @@ class ImagCDFFactory(TimeseriesFactory):
     This class extends the TimeseriesFactory to support writing geomagnetic
     time series data to files in the ImagCDF format using the cdflib library.
     """
-    
-    isUniqueTimes=True #used to determine depend_0 and CDF Time Variable Name
+
+    isUniqueTimes = True  # used to determine depend_0 and CDF Time Variable Name
 
     def __init__(
         self,
@@ -129,20 +129,20 @@ class ImagCDFFactory(TimeseriesFactory):
         Note: Parsing from strings is not implemented in this factory.
         """
         raise NotImplementedError('"parse_string" not implemented')
-    
+
     def write_file(self, fh, timeseries: Stream, channels: List[str]):
         # Create a temporary file to write the CDF data
-        with tempfile.NamedTemporaryFile(delete=False, suffix='.cdf') as tmp_file:
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".cdf") as tmp_file:
             tmp_file_path = tmp_file.name
 
         try:
             # Initialize the CDF writer
             cdf_spec = {
-                'Compressed': 9,          # Enable compression (0-9)
-                'Majority': CDFWriter.ROW_MAJOR,        # Data layout - gets set automatically
-                'Encoding': CDFWriter.IBMPC_ENCODING, #  gets set automatically
-                'Checksum': True,        # Disable checksum for faster writes (optional)
-                'rDim_sizes': [],         # Applicable only if using rVariables - CDF protocol recommends only using zVariables. 
+                "Compressed": 9,  # Enable compression (0-9)
+                "Majority": CDFWriter.ROW_MAJOR,  # Data layout - gets set automatically
+                "Encoding": CDFWriter.IBMPC_ENCODING,  #  gets set automatically
+                "Checksum": True,  # Disable checksum for faster writes (optional)
+                "rDim_sizes": [],  # Applicable only if using rVariables - CDF protocol recommends only using zVariables.
             }
 
             cdf_writer = CDFWriter(path=tmp_file_path, cdf_spec=cdf_spec, delete=True)
@@ -152,53 +152,59 @@ class ImagCDFFactory(TimeseriesFactory):
             cdf_writer.write_globalattrs(global_attrs)
 
             # Time variables
-            time_vars = self._create_time_stamp_variables(timeseries) #modifies self.isUniqueTimes
+            time_vars = self._create_time_stamp_variables(
+                timeseries
+            )  # modifies self.isUniqueTimes
             for ts_name, ts_data in time_vars.items():
                 # Define time variable specification
                 var_spec = {
-                    'Variable': ts_name,
-                    'Data_Type': CDFWriter.CDF_TIME_TT2000,  # CDF_TIME_TT2000
-                    'Num_Elements': 1,
-                    'Rec_Vary': True,
-                    'Var_Type': 'zVariable',
-                    'Dim_Sizes': [],
-                    'Sparse': 'no_sparse',
-                    'Compress': 9,
-                    'Pad': None,
+                    "Variable": ts_name,
+                    "Data_Type": CDFWriter.CDF_TIME_TT2000,  # CDF_TIME_TT2000
+                    "Num_Elements": 1,
+                    "Rec_Vary": True,
+                    "Var_Type": "zVariable",
+                    "Dim_Sizes": [],
+                    "Sparse": "no_sparse",
+                    "Compress": 9,
+                    "Pad": None,
                 }
                 # Define time variable attributes
                 var_attrs = self._create_time_var_attrs(ts_name)
 
                 # Write time variable
                 cdf_writer.write_var(var_spec, var_attrs, ts_data)
-            
-            
+
             # Data variables
             temperature_index = 0
             for trace in timeseries:
                 channel = trace.stats.channel
                 if channel in TEMPERATURE_ELEMENTS_ID:
-                    temperature_index += 1 #MUST INCREMENT INDEX BEFORE USING
+                    temperature_index += 1  # MUST INCREMENT INDEX BEFORE USING
                     var_name = f"Temperature{temperature_index}"
                 else:
                     var_name = f"GeomagneticField{channel}"
                 data_type = self._get_cdf_data_type(trace)
                 num_elements = 1
-                if data_type in [CDFWriter.CDF_CHAR, CDFWriter.CDF_UCHAR]:  # Handle string types
+                if data_type in [
+                    CDFWriter.CDF_CHAR,
+                    CDFWriter.CDF_UCHAR,
+                ]:  # Handle string types
                     num_elements = len(trace.data[0]) if len(trace.data) > 0 else 1
-            
+
                 var_spec = {
-                    'Variable': var_name,
-                    'Data_Type': data_type,
-                    'Num_Elements': num_elements,
-                    'Rec_Vary': True,
-                    'Var_Type': 'zVariable',
-                    'Dim_Sizes': [],
-                    'Sparse': 'no_sparse',
-                    'Compress': 9,
-                    'Pad': None,
+                    "Variable": var_name,
+                    "Data_Type": data_type,
+                    "Num_Elements": num_elements,
+                    "Rec_Vary": True,
+                    "Var_Type": "zVariable",
+                    "Dim_Sizes": [],
+                    "Sparse": "no_sparse",
+                    "Compress": 9,
+                    "Pad": None,
                 }
-                var_attrs = self._create_var_attrs(trace, temperature_index, self.isUniqueTimes)
+                var_attrs = self._create_var_attrs(
+                    trace, temperature_index, self.isUniqueTimes
+                )
 
                 # Write data variable
                 cdf_writer.write_var(var_spec, var_attrs, trace.data)
@@ -271,7 +277,7 @@ class ImagCDFFactory(TimeseriesFactory):
             )
 
             # Handle 'stdout' output
-            if url == 'stdout':
+            if url == "stdout":
                 # Write directly to stdout
                 fh = sys.stdout.buffer
                 url_data = timeseries.slice(
@@ -282,7 +288,7 @@ class ImagCDFFactory(TimeseriesFactory):
                 continue  # Proceed to next interval if any
 
             # Handle 'file://' output
-            elif url.startswith('file://'):
+            elif url.startswith("file://"):
                 # Get the file path from the URL
                 url_file = Util.get_file_from_url(url, createParentDirectory=False)
                 url_data = timeseries.slice(
@@ -307,11 +313,16 @@ class ImagCDFFactory(TimeseriesFactory):
                                 channel=trace.stats.channel,
                             )
                             if new_trace:
-                                trace.data = np.concatenate((trace.data, new_trace[0].data))
+                                trace.data = np.concatenate(
+                                    (trace.data, new_trace[0].data)
+                                )
                         url_data = existing_data + url_data
                     except Exception as e:
                         # Log the exception if needed
-                        print(f"Warning: Could not read existing CDF file '{url_file}': {e}", file=sys.stderr)
+                        print(
+                            f"Warning: Could not read existing CDF file '{url_file}': {e}",
+                            file=sys.stderr,
+                        )
                         # Proceed with new data
 
                 # Pad the data with NaNs to ensure it fits the interval
@@ -329,7 +340,9 @@ class ImagCDFFactory(TimeseriesFactory):
 
             else:
                 # Unsupported URL scheme encountered
-                raise TimeseriesFactoryException("Unsupported URL scheme in urlTemplate")
+                raise TimeseriesFactoryException(
+                    "Unsupported URL scheme in urlTemplate"
+                )
 
     def get_timeseries(
         self,
@@ -359,10 +372,12 @@ class ImagCDFFactory(TimeseriesFactory):
                 interval=interval,
                 channels=channels,
             )
-            if url == 'stdout':
+            if url == "stdout":
                 continue  # stdout is not a valid input source
             if not url.startswith("file://"):
-                raise TimeseriesFactoryException("Only file urls are supported for reading ImagCDF")
+                raise TimeseriesFactoryException(
+                    "Only file urls are supported for reading ImagCDF"
+                )
 
             url_file = Util.get_file_from_url(url, createParentDirectory=False)
             if not os.path.isfile(url_file):
@@ -409,7 +424,9 @@ class ImagCDFFactory(TimeseriesFactory):
         timeseries.sort()
         return timeseries
 
-    def _create_global_attributes(self, timeseries: Stream, channels: List[str]) -> dict:
+    def _create_global_attributes(
+        self, timeseries: Stream, channels: List[str]
+    ) -> dict:
         """
         Create a dictionary of global attributes for the ImagCDF file.
 
@@ -423,45 +440,58 @@ class ImagCDFFactory(TimeseriesFactory):
         stats = timeseries[0].stats if len(timeseries) > 0 else None
 
         # Extract metadata from stats or fallback to defaults
-        observatory_name = getattr(stats, 'station_name', None) or self.observatory or ""
-        station = getattr(stats, 'station', None) or ""
-        institution = getattr(stats, 'agency_name', None) or ""
-        latitude = getattr(stats, 'geodetic_latitude', None) or 0.0
-        longitude = getattr(stats, 'geodetic_longitude', None) or 0.0
-        elevation = getattr(stats, 'elevation', None) or 99_999.0
-        conditions_of_use = getattr(stats, 'conditions_of_use', None) or ""
-        vector_orientation = getattr(stats, 'sensor_orientation', None) or ""
-        data_interval_type = getattr(stats, 'data_interval_type', None) or self.interval
-        data_type = getattr(stats, 'data_type', None) or "variation"
-        sensor_sampling_rate = getattr(stats, 'sensor_sampling_rate', None) or 0.0
-        comments = getattr(stats, 'filter_comments', None) or ['']
-        declination_base = getattr(stats, 'declination_base', None) or 0.0
+        observatory_name = (
+            getattr(stats, "station_name", None) or self.observatory or ""
+        )
+        station = getattr(stats, "station", None) or ""
+        institution = getattr(stats, "agency_name", None) or ""
+        latitude = getattr(stats, "geodetic_latitude", None) or 0.0
+        longitude = getattr(stats, "geodetic_longitude", None) or 0.0
+        elevation = getattr(stats, "elevation", None) or 99_999.0
+        conditions_of_use = getattr(stats, "conditions_of_use", None) or ""
+        vector_orientation = getattr(stats, "sensor_orientation", None) or ""
+        data_interval_type = getattr(stats, "data_interval_type", None) or self.interval
+        data_type = getattr(stats, "data_type", None) or "variation"
+        sensor_sampling_rate = getattr(stats, "sensor_sampling_rate", None) or 0.0
+        comments = getattr(stats, "filter_comments", None) or [""]
+        declination_base = getattr(stats, "declination_base", None) or 0.0
         publication_level = IMCDFPublicationLevel(data_type=self.type).to_string()
         global_attrs = {
-            'FormatDescription': {0: 'INTERMAGNET CDF Format'},
-            'FormatVersion': {0: '1.2'},
-            'Title': {0: 'Geomagnetic time series data'},
-            'IagaCode': {0: station},
-            'ElementsRecorded': {0: ''.join(channels)},
-            'PublicationLevel': {0: publication_level},
-            'PublicationDate': {0: [cdflib.cdfepoch.timestamp_to_tt2000(datetime.timestamp(datetime.now(timezone.utc))), "cdf_time_tt2000"]},
-            'ObservatoryName': {0: observatory_name},
-            'Latitude': {0: np.array([latitude], dtype=np.float64)},
-            'Longitude': {0: np.array([longitude], dtype=np.float64)},
-            'Elevation': {0: np.array([elevation], dtype=np.float64)},
-            'Institution': {0: institution},
-            'VectorSensOrient': {0: vector_orientation}, #remove F - because its a calculation, not an element?
-            'StandardLevel': {0: 'None'},  # Set to 'None'
+            "FormatDescription": {0: "INTERMAGNET CDF Format"},
+            "FormatVersion": {0: "1.2"},
+            "Title": {0: "Geomagnetic time series data"},
+            "IagaCode": {0: station},
+            "ElementsRecorded": {0: "".join(channels)},
+            "PublicationLevel": {0: publication_level},
+            "PublicationDate": {
+                0: [
+                    cdflib.cdfepoch.timestamp_to_tt2000(
+                        datetime.timestamp(datetime.now(timezone.utc))
+                    ),
+                    "cdf_time_tt2000",
+                ]
+            },
+            "ObservatoryName": {0: observatory_name},
+            "Latitude": {0: np.array([latitude], dtype=np.float64)},
+            "Longitude": {0: np.array([longitude], dtype=np.float64)},
+            "Elevation": {0: np.array([elevation], dtype=np.float64)},
+            "Institution": {0: institution},
+            "VectorSensOrient": {
+                0: vector_orientation
+            },  # remove F - because its a calculation, not an element?
+            "StandardLevel": {0: "None"},  # Set to 'None'
             # Temporarily Omit 'StandardName', 'StandardVersion', 'PartialStandDesc'
-            'Source': {0: 'institute'}, # "institute" - if the named institution provided the data, â€œINTERMAGNETâ€ - if the data file has been created by INTERMAGNET from another data source, â€œWDCâ€ - if the World Data Centre has created the file from another data source
-            'TermsOfUse': {0: conditions_of_use},
+            "Source": {
+                0: "institute"
+            },  # "institute" - if the named institution provided the data, â€œINTERMAGNETâ€ - if the data file has been created by INTERMAGNET from another data source, â€œWDCâ€ - if the World Data Centre has created the file from another data source
+            "TermsOfUse": {0: conditions_of_use},
             # 'UniqueIdentifier': {0: ''},
             # 'ParentIdentifiers': {0: ''},
-            # 'ReferenceLinks': {0: ''}, #links to /ws, plots, USGS.gov 
-            'SensorSamplingRate': {0: sensor_sampling_rate}, #Optional            
-            'DataType': {0: data_type},#Optional
-            'Comments': {0: comments}, #Optional
-            'DeclinationBase': {0: declination_base}, #Optional
+            # 'ReferenceLinks': {0: ''}, #links to /ws, plots, USGS.gov
+            "SensorSamplingRate": {0: sensor_sampling_rate},  # Optional
+            "DataType": {0: data_type},  # Optional
+            "Comments": {0: comments},  # Optional
+            "DeclinationBase": {0: declination_base},  # Optional
         }
         return global_attrs
 
@@ -474,24 +504,32 @@ class ImagCDFFactory(TimeseriesFactory):
         for trace in timeseries:
             channel = trace.stats.channel
             times = [
-                (trace.stats.starttime + trace.stats.delta * i).datetime.replace(tzinfo=timezone.utc)
+                (trace.stats.starttime + trace.stats.delta * i).datetime.replace(
+                    tzinfo=timezone.utc
+                )
                 for i in range(trace.stats.npts)
             ]
             # Convert timestamps to TT2000 format required by CDF
-            tt2000_times = cdflib.cdfepoch.timestamp_to_tt2000([time.timestamp() for time in times])
+            tt2000_times = cdflib.cdfepoch.timestamp_to_tt2000(
+                [time.timestamp() for time in times]
+            )
 
             if channel in self._get_vector_elements():
                 if vector_times is None:
                     vector_times = tt2000_times
                 else:
                     if not np.array_equal(vector_times, tt2000_times):
-                        raise ValueError("Time stamps for vector channels are not the same.")
+                        raise ValueError(
+                            "Time stamps for vector channels are not the same."
+                        )
             elif channel in self._get_scalar_elements():
                 if scalar_times is None:
                     scalar_times = tt2000_times
                 else:
                     if not np.array_equal(scalar_times, tt2000_times):
-                        raise ValueError("Time stamps for scalar channels are not the same.")
+                        raise ValueError(
+                            "Time stamps for scalar channels are not the same."
+                        )
             elif channel in TEMPERATURE_ELEMENTS_ID:
                 ts_key = f"Temperature{temperature_index}Times"
                 if ts_key not in temperature_times:
@@ -502,21 +540,22 @@ class ImagCDFFactory(TimeseriesFactory):
 
         time_vars = {}
         if vector_times is not None:
-            time_vars['GeomagneticVectorTimes'] = vector_times
+            time_vars["GeomagneticVectorTimes"] = vector_times
         if scalar_times is not None:
-            time_vars['GeomagneticScalarTimes'] = scalar_times
+            time_vars["GeomagneticScalarTimes"] = scalar_times
         if temperature_times:
             time_vars.update(temperature_times)
-            
+
         last_times = []
-        self.isUniqueTimes = len(time_vars) == 1 #true if only one set of times, else default to false.
+        self.isUniqueTimes = (
+            len(time_vars) == 1
+        )  # true if only one set of times, else default to false.
         for index, times in enumerate(time_vars.values()):
             if index > 0:
                 self.isUniqueTimes = not np.array_equal(last_times, times)
             last_times = times
-        
-        return time_vars if self.isUniqueTimes else {"DataTimes": last_times}
 
+        return time_vars if self.isUniqueTimes else {"DataTimes": last_times}
 
     def _create_var_spec(
         self,
@@ -549,63 +588,69 @@ class ImagCDFFactory(TimeseriesFactory):
         - CDF User's Guide: Variable Specification
         """
         var_spec = {
-            'Variable': var_name,
-            'Data_Type': data_type,
-            'Num_Elements': num_elements,
-            'Rec_Vary': True,
-            'Var_Type': var_type,
-            'Dim_Sizes': dim_sizes,
-            'Sparse': 'no_sparse' if not sparse else 'pad_sparse',
-            'Compress': compress,
-            'Pad': pad,
+            "Variable": var_name,
+            "Data_Type": data_type,
+            "Num_Elements": num_elements,
+            "Rec_Vary": True,
+            "Var_Type": var_type,
+            "Dim_Sizes": dim_sizes,
+            "Sparse": "no_sparse" if not sparse else "pad_sparse",
+            "Compress": compress,
+            "Pad": pad,
         }
         return var_spec
 
-    def _create_var_attrs(self, trace: Trace, temperature_index: Optional[int] = None, isUniqueTimes: Optional[bool] = True) -> dict:
+    def _create_var_attrs(
+        self,
+        trace: Trace,
+        temperature_index: Optional[int] = None,
+        isUniqueTimes: Optional[bool] = True,
+    ) -> dict:
         channel = trace.stats.channel.upper()
-        fieldnam = f"Geomagnetic Field Element {channel}" # â€œGeomagnetic Field Element â€ + the element code or â€œTemperature â€ + the name of the location where the temperature was recorded.
-        units = '' # Must be one of â€œnTâ€, â€œDegrees of arcâ€ or â€œCelsiusâ€
-        if channel == 'D':
-            units = 'Degrees of arc'
-            validmin = -360.0 
-            validmax = 360.0 # A full circle representation
-        elif channel == 'I':
-            units = 'Degrees of arc'
-            validmin = -90.0 
-            validmax = 90.0 #The magnetic field vector can point straight down (+90Â°), horizontal (0Â°), or straight up (-90Â°).
+        fieldnam = f"Geomagnetic Field Element {channel}"  # â€œGeomagnetic Field Element â€ + the element code or â€œTemperature â€ + the name of the location where the temperature was recorded.
+        units = ""  # Must be one of â€œnTâ€, â€œDegrees of arcâ€ or â€œCelsiusâ€
+        if channel == "D":
+            units = "Degrees of arc"
+            validmin = -360.0
+            validmax = 360.0  # A full circle representation
+        elif channel == "I":
+            units = "Degrees of arc"
+            validmin = -90.0
+            validmax = 90.0  # The magnetic field vector can point straight down (+90Â°), horizontal (0Â°), or straight up (-90Â°).
         elif channel in TEMPERATURE_ELEMENTS_ID:
-            units = 'Celsius'
+            units = "Celsius"
             fieldnam = f"Temperature {temperature_index} {trace.stats.location}"
-            validmin = -273.15 #absolute zero
+            validmin = -273.15  # absolute zero
             validmax = 79_999
-        elif channel in ['F','S']:
-            units = 'nT'
-            validmin = 0.0 # negative magnetic field intestity not physically meaningful.
+        elif channel in ["F", "S"]:
+            units = "nT"
+            validmin = (
+                0.0  # negative magnetic field intestity not physically meaningful.
+            )
             validmax = 79_999.0
-        elif channel in ['X', 'Y', 'Z', 'H', 'E', 'V', 'G']:
-            units = 'nT'
+        elif channel in ["X", "Y", "Z", "H", "E", "V", "G"]:
+            units = "nT"
             validmin = -79_999.0
             validmax = 79_999.0
 
         # Determine DEPEND_0 based on channel type
         if channel in self._get_vector_elements():
-            depend_0 = 'GeomagneticVectorTimes'
+            depend_0 = "GeomagneticVectorTimes"
         elif channel in self._get_scalar_elements():
-            depend_0 = 'GeomagneticScalarTimes'
+            depend_0 = "GeomagneticScalarTimes"
         elif channel in TEMPERATURE_ELEMENTS_ID:
             depend_0 = f"Temperature{temperature_index}Times"
 
-    
         var_attrs = {
-            'FIELDNAM': fieldnam,
-            'UNITS': units,
-            'FILLVAL': 99999.0,
-            'VALIDMIN': validmin,
-            'VALIDMAX': validmax,
-            'DEPEND_0': depend_0 if isUniqueTimes else "DataTimes",
-            'DISPLAY_TYPE': 'time_series',
-            'LABLAXIS': channel,
-            'DATA_INTERVAL_TYPE': trace.stats.data_interval_type 
+            "FIELDNAM": fieldnam,
+            "UNITS": units,
+            "FILLVAL": 99999.0,
+            "VALIDMIN": validmin,
+            "VALIDMAX": validmax,
+            "DEPEND_0": depend_0 if isUniqueTimes else "DataTimes",
+            "DISPLAY_TYPE": "time_series",
+            "LABLAXIS": channel,
+            "DATA_INTERVAL_TYPE": trace.stats.data_interval_type,
         }
         return var_attrs
 
@@ -619,9 +664,9 @@ class ImagCDFFactory(TimeseriesFactory):
         - ImagCDF Technical Documentation: ImagCDF Data
         """
         # var_attrs = {
-            # 'UNITS': 'TT2000',
-            # 'DISPLAY_TYPE': 'time_series',
-            # 'LABLAXIS': 'Time',
+        # 'UNITS': 'TT2000',
+        # 'DISPLAY_TYPE': 'time_series',
+        # 'LABLAXIS': 'Time',
         # }
         # return var_attrs
         return {}
@@ -666,26 +711,28 @@ class ImagCDFFactory(TimeseriesFactory):
 
         # Extract global attributes
         global_attrs = cdf.globalattsget()
-        
+
         # Map global attributes to Stream-level metadata
-        observatory = global_attrs.get('IagaCode', [''])[0]
-        station_name = global_attrs.get('ObservatoryName', [''])[0]
-        institution = global_attrs.get('Institution', [''])[0]
-        latitude = global_attrs.get('Latitude', [0.0])[0]
-        longitude = global_attrs.get('Longitude', [0.0])[0]
-        elevation = global_attrs.get('Elevation', [99_999.0])[0] #default to 99_999 per technical documents.
-        sensor_sampling_rate = global_attrs.get('SensorSamplingRate', [0.0])[0]
-        sensor_orientation = global_attrs.get('VectorSensOrient', [''])[0]
-        data_type = global_attrs.get('DataType', ['variation'])[0]
-        publication_level = global_attrs.get('PublicationLevel', ['1'])[0]
-        comments = global_attrs.get('Comments', [''])
-        terms_of_use = global_attrs.get('TermsOfUse', [''])[0]
-        declination_base = global_attrs.get('DeclinationBase', [0.0])[0]
+        observatory = global_attrs.get("IagaCode", [""])[0]
+        station_name = global_attrs.get("ObservatoryName", [""])[0]
+        institution = global_attrs.get("Institution", [""])[0]
+        latitude = global_attrs.get("Latitude", [0.0])[0]
+        longitude = global_attrs.get("Longitude", [0.0])[0]
+        elevation = global_attrs.get("Elevation", [99_999.0])[
+            0
+        ]  # default to 99_999 per technical documents.
+        sensor_sampling_rate = global_attrs.get("SensorSamplingRate", [0.0])[0]
+        sensor_orientation = global_attrs.get("VectorSensOrient", [""])[0]
+        data_type = global_attrs.get("DataType", ["variation"])[0]
+        publication_level = global_attrs.get("PublicationLevel", ["1"])[0]
+        comments = global_attrs.get("Comments", [""])
+        terms_of_use = global_attrs.get("TermsOfUse", [""])[0]
+        declination_base = global_attrs.get("DeclinationBase", [0.0])[0]
 
         # Identify all time variables
         time_vars = {}
         for var in cdf.cdf_info().zVariables:
-            if var.lower().endswith('times'):
+            if var.lower().endswith("times"):
                 time_data = cdf.varget(var)
                 unix_times = cdflib.cdfepoch.unixtime(time_data)
                 utc_times = [UTCDateTime(t) for t in unix_times]
@@ -694,14 +741,14 @@ class ImagCDFFactory(TimeseriesFactory):
         # Read data variables and associate them with time variables
         for var in cdf.cdf_info().zVariables:
             # Skip time variables
-            if var.lower().endswith('times'):
+            if var.lower().endswith("times"):
                 continue
 
             data = cdf.varget(var)
             attrs = cdf.varattsget(var)
 
             # Determine DEPEND_0 (the time variable name) and validate
-            ts_name = attrs.get('DEPEND_0')
+            ts_name = attrs.get("DEPEND_0")
             if not ts_name:
                 # If no DEPEND_0, skip this variable as we cannot map times
                 continue
@@ -728,15 +775,15 @@ class ImagCDFFactory(TimeseriesFactory):
             # Determine delta (sample interval)
             if len(times) > 1:
                 # delta as a float of seconds
-                delta = (times[1].timestamp - times[0].timestamp)
+                delta = times[1].timestamp - times[0].timestamp
             else:
                 # if only one sample, use default based on interval
                 # convert minute, second, etc. to numeric delta
-                if self.interval == 'minute':
+                if self.interval == "minute":
                     delta = 60.0
-                elif self.interval == 'second':
+                elif self.interval == "second":
                     delta = 1.0
-                elif self.interval == 'hour':
+                elif self.interval == "hour":
                     delta = 3600.0
                 else:
                     # fallback, set delta to 60
@@ -751,34 +798,34 @@ class ImagCDFFactory(TimeseriesFactory):
             elif var.startswith("Temperature"):
                 # Temperature variables may not map directly to a geomagnetic channel
                 # but to temperature sensors. We can just use the label from LABLAXIS if needed
-                channel = attrs.get('LABLAXIS', var)
+                channel = attrs.get("LABLAXIS", var)
             else:
                 # fallback if naming doesn't match expected patterns
                 channel = var
 
-            time_attrs =cdf.varattsget(var)
-            data_interval = time_attrs.get('DATA_INTERVAL_TYPE', [''])
+            time_attrs = cdf.varattsget(var)
+            data_interval = time_attrs.get("DATA_INTERVAL_TYPE", [""])
             # Create a trace
             trace = Trace(
                 data=data,
                 header={
-                    'station': observatory,
-                    'channel': channel,
-                    'starttime': times[0],
-                    'delta': delta,
-                    'geodetic_latitude': latitude,
-                    'geodetic_longitude': longitude,
-                    'elevation': elevation,
-                    'sensor_orientation': "".join(sensor_orientation),
-                    'data_type': data_type,
-                    'station_name': station_name,
-                    'agency_name': institution,
-                    'conditions_of_use': terms_of_use,
-                    'sensor_sampling_rate': sensor_sampling_rate, 
-                    'data_interval_type': data_interval,
-                    'declination_base': declination_base,
-                    'filter_comments': comments 
-                }
+                    "station": observatory,
+                    "channel": channel,
+                    "starttime": times[0],
+                    "delta": delta,
+                    "geodetic_latitude": latitude,
+                    "geodetic_longitude": longitude,
+                    "elevation": elevation,
+                    "sensor_orientation": "".join(sensor_orientation),
+                    "data_type": data_type,
+                    "station_name": station_name,
+                    "agency_name": institution,
+                    "conditions_of_use": terms_of_use,
+                    "sensor_sampling_rate": sensor_sampling_rate,
+                    "data_interval_type": data_interval,
+                    "declination_base": declination_base,
+                    "filter_comments": comments,
+                },
             )
             stream += trace
 
@@ -797,8 +844,8 @@ class ImagCDFFactory(TimeseriesFactory):
 
         This method constructs the filename based on the ImagCDF naming
         conventions, which include the observatory code, date-time formatted
-        according to the data interval, and the publication level.  
-        
+        according to the data interval, and the publication level.
+
         [iaga-code]_[date-time]_[publication-level].cdf
 
         Parameters:
@@ -831,7 +878,9 @@ class ImagCDFFactory(TimeseriesFactory):
         elif interval == "second":
             date_format = date.strftime("%Y%m%d_%H%M%S")
         else:
-            raise ValueError(f"Unsupported interval: {interval}") #tenhertz currently not supported
+            raise ValueError(
+                f"Unsupported interval: {interval}"
+            )  # tenhertz currently not supported
 
         # Filename following ImagCDF convention, see reference: https://tech-man.intermagnet.org/latest/appendices/dataformats.html#imagcdf-file-names
         filename = f"{observatory.lower()}_{date_format}_{publication_level}.cdf"
@@ -859,11 +908,13 @@ class ImagCDFFactory(TimeseriesFactory):
         }
 
         # Attempt to use the template provided in urlTemplate
-        if "{" in self.urlTemplate and "}" in self.urlTemplate: 
+        if "{" in self.urlTemplate and "}" in self.urlTemplate:
             try:
                 return self.urlTemplate.format(**params)
             except KeyError as e:
-                raise TimeseriesFactoryException(f"Invalid placeholder in urlTemplate: {e}")
+                raise TimeseriesFactoryException(
+                    f"Invalid placeholder in urlTemplate: {e}"
+                )
 
         # If the urlTemplate doesn't support placeholders, assume 'file://' scheme
         if self.urlTemplate.startswith("file://"):
@@ -878,9 +929,7 @@ class ImagCDFFactory(TimeseriesFactory):
         )
 
     def _get_vector_elements(self):
-        return {'X', 'Y', 'Z', 'H', 'D', 'E', 'V', 'I', 'F'}
-    
-    def _get_scalar_elements(self):
-        return {'S', 'G'} 
-
+        return {"X", "Y", "Z", "H", "D", "E", "V", "I", "F"}
 
+    def _get_scalar_elements(self):
+        return {"S", "G"}
-- 
GitLab


From 8a4949e890dfc2b561dfbbb927d5192a26a75a3c Mon Sep 17 00:00:00 2001
From: Nicholas Shavers <nshavers@contractor.usgs.gov>
Date: Mon, 9 Dec 2024 13:45:42 -0800
Subject: [PATCH 09/30] lint + poetry clean up

---
 geomagio/api/ws/Element.py |  4 ++-
 poetry.lock                | 69 +++++++++++++-------------------------
 2 files changed, 27 insertions(+), 46 deletions(-)

diff --git a/geomagio/api/ws/Element.py b/geomagio/api/ws/Element.py
index 25b876c5..e5ab3e62 100644
--- a/geomagio/api/ws/Element.py
+++ b/geomagio/api/ws/Element.py
@@ -47,4 +47,6 @@ ELEMENTS = [
 ELEMENT_INDEX = {e.id: e for e in ELEMENTS}
 
 
-TEMPERATURE_ELEMENTS_ID = [element.id for element in ELEMENTS if "Temperature" in element.name]
\ No newline at end of file
+TEMPERATURE_ELEMENTS_ID = [
+    element.id for element in ELEMENTS if "Temperature" in element.name
+]
diff --git a/poetry.lock b/poetry.lock
index 90b76110..56e23f59 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -38,13 +38,13 @@ docs = ["sphinx (==7.2.6)", "sphinx-mdinclude (==0.5.3)"]
 
 [[package]]
 name = "alembic"
-version = "1.14.0"
+version = "1.13.3"
 description = "A database migration tool for SQLAlchemy."
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "alembic-1.14.0-py3-none-any.whl", hash = "sha256:99bd884ca390466db5e27ffccff1d179ec5c05c965cfefc0607e69f9e411cb25"},
-    {file = "alembic-1.14.0.tar.gz", hash = "sha256:b00892b53b3642d0b8dbedba234dbf1924b69be83a9a769d5a624b01094e304b"},
+    {file = "alembic-1.13.3-py3-none-any.whl", hash = "sha256:908e905976d15235fae59c9ac42c4c5b75cfcefe3d27c0fbf7ae15a37715d80e"},
+    {file = "alembic-1.13.3.tar.gz", hash = "sha256:203503117415561e203aa14541740643a611f641517f0209fcae63e9fa09f1a2"},
 ]
 
 [package.dependencies]
@@ -153,25 +153,6 @@ d = ["aiohttp (>=3.7.4)", "aiohttp (>=3.7.4,!=3.9.0)"]
 jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"]
 uvloop = ["uvloop (>=0.15.2)"]
 
-[[package]]
-name = "cdflib"
-version = "1.3.2"
-description = "A python CDF reader toolkit"
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "cdflib-1.3.2-py3-none-any.whl", hash = "sha256:49af97acc328c586ac5b7c27fd8e67bccf24af82c5bd8c37d8cfe048a0c1752a"},
-    {file = "cdflib-1.3.2.tar.gz", hash = "sha256:97f27ac629e4c0ac1367eb8f4edd7a1d184190272ab98a6401e999f3a2e05687"},
-]
-
-[package.dependencies]
-numpy = ">=1.21"
-
-[package.extras]
-dev = ["ipython", "pre-commit"]
-docs = ["astropy", "netcdf4", "sphinx", "sphinx-automodapi", "sphinx-copybutton", "sphinx-rtd-theme", "xarray"]
-tests = ["astropy", "h5netcdf", "hypothesis", "netcdf4", "pytest (>=3.9)", "pytest-cov", "pytest-remotedata", "xarray"]
-
 [[package]]
 name = "certifi"
 version = "2024.12.14"
@@ -711,13 +692,13 @@ files = [
 
 [[package]]
 name = "dparse"
-version = "0.6.4"
+version = "0.6.3"
 description = "A parser for Python dependency files"
 optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.6"
 files = [
-    {file = "dparse-0.6.4-py3-none-any.whl", hash = "sha256:fbab4d50d54d0e739fbb4dedfc3d92771003a5b9aa8545ca7a7045e3b174af57"},
-    {file = "dparse-0.6.4.tar.gz", hash = "sha256:90b29c39e3edc36c6284c82c4132648eaf28a01863eb3c231c2512196132201a"},
+    {file = "dparse-0.6.3-py3-none-any.whl", hash = "sha256:0d8fe18714056ca632d98b24fbfc4e9791d4e47065285ab486182288813a5318"},
+    {file = "dparse-0.6.3.tar.gz", hash = "sha256:27bb8b4bcaefec3997697ba3f6e06b2447200ba273c0b085c3d012a04571b528"},
 ]
 
 [package.dependencies]
@@ -725,20 +706,18 @@ packaging = "*"
 tomli = {version = "*", markers = "python_version < \"3.11\""}
 
 [package.extras]
-all = ["pipenv", "poetry", "pyyaml"]
 conda = ["pyyaml"]
-pipenv = ["pipenv"]
-poetry = ["poetry"]
+pipenv = ["pipenv (<=2022.12.19)"]
 
 [[package]]
 name = "et-xmlfile"
-version = "2.0.0"
+version = "1.1.0"
 description = "An implementation of lxml.xmlfile for the standard library"
 optional = false
-python-versions = ">=3.8"
+python-versions = ">=3.6"
 files = [
-    {file = "et_xmlfile-2.0.0-py3-none-any.whl", hash = "sha256:7a91720bc756843502c3b7504c77b8fe44217c85c537d85037f0f536151b2caa"},
-    {file = "et_xmlfile-2.0.0.tar.gz", hash = "sha256:dab3f4764309081ce75662649be815c4c9081e88f0837825f90fd28317d4da54"},
+    {file = "et_xmlfile-1.1.0-py3-none-any.whl", hash = "sha256:a2ba85d1d6a74ef63837eed693bcb89c3f752169b0e3e7ae5b16ca5e1b3deada"},
+    {file = "et_xmlfile-1.1.0.tar.gz", hash = "sha256:8eb9e2bc2f8c97e37a2dc85a09ecdcdec9d8a396530a6d5a33b30b9a92da0c5c"},
 ]
 
 [[package]]
@@ -1795,13 +1774,13 @@ typing-extensions = ">=3.7.4"
 
 [[package]]
 name = "packaging"
-version = "24.2"
+version = "24.1"
 description = "Core utilities for Python packages"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759"},
-    {file = "packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f"},
+    {file = "packaging-24.1-py3-none-any.whl", hash = "sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124"},
+    {file = "packaging-24.1.tar.gz", hash = "sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002"},
 ]
 
 [[package]]
@@ -2499,23 +2478,23 @@ test = ["asv", "gmpy2", "mpmath", "pooch", "pytest", "pytest-cov", "pytest-timeo
 
 [[package]]
 name = "setuptools"
-version = "75.3.0"
+version = "75.2.0"
 description = "Easily download, build, install, upgrade, and uninstall Python packages"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "setuptools-75.3.0-py3-none-any.whl", hash = "sha256:f2504966861356aa38616760c0f66568e535562374995367b4e69c7143cf6bcd"},
-    {file = "setuptools-75.3.0.tar.gz", hash = "sha256:fba5dd4d766e97be1b1681d98712680ae8f2f26d7881245f2ce9e40714f1a686"},
+    {file = "setuptools-75.2.0-py3-none-any.whl", hash = "sha256:a7fcb66f68b4d9e8e66b42f9876150a3371558f98fa32222ffaa5bced76406f8"},
+    {file = "setuptools-75.2.0.tar.gz", hash = "sha256:753bb6ebf1f465a1912e19ed1d41f403a79173a9acf66a42e7e6aec45c3c16ec"},
 ]
 
 [package.extras]
 check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1)", "ruff (>=0.5.2)"]
-core = ["importlib-metadata (>=6)", "importlib-resources (>=5.10.2)", "jaraco.collections", "jaraco.functools", "jaraco.text (>=3.7)", "more-itertools", "more-itertools (>=8.8)", "packaging", "packaging (>=24)", "platformdirs (>=4.2.2)", "tomli (>=2.0.1)", "wheel (>=0.43.0)"]
+core = ["importlib-metadata (>=6)", "importlib-resources (>=5.10.2)", "jaraco.collections", "jaraco.functools", "jaraco.text (>=3.7)", "more-itertools", "more-itertools (>=8.8)", "packaging", "packaging (>=24)", "platformdirs (>=2.6.2)", "tomli (>=2.0.1)", "wheel (>=0.43.0)"]
 cover = ["pytest-cov"]
 doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "pyproject-hooks (!=1.1)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier", "towncrier (<24.7)"]
 enabler = ["pytest-enabler (>=2.2)"]
-test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "jaraco.test (>=5.5)", "packaging (>=23.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-home (>=0.5)", "pytest-perf", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel (>=0.44.0)"]
-type = ["importlib-metadata (>=7.0.2)", "jaraco.develop (>=7.21)", "mypy (==1.12.*)", "pytest-mypy"]
+test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "jaraco.test", "packaging (>=23.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-home (>=0.5)", "pytest-perf", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel (>=0.44.0)"]
+type = ["importlib-metadata (>=7.0.2)", "jaraco.develop (>=7.21)", "mypy (==1.11.*)", "pytest-mypy"]
 
 [[package]]
 name = "six"
@@ -2645,13 +2624,13 @@ SQLAlchemy = ">=0.9.0"
 
 [[package]]
 name = "starlette"
-version = "0.41.3"
+version = "0.41.0"
 description = "The little ASGI library that shines."
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "starlette-0.41.3-py3-none-any.whl", hash = "sha256:44cedb2b7c77a9de33a8b74b2b90e9f50d11fcf25d8270ea525ad71a25374ff7"},
-    {file = "starlette-0.41.3.tar.gz", hash = "sha256:0e4ab3d16522a255be6b28260b938eae2482f98ce5cc934cb08dce8dc3ba5835"},
+    {file = "starlette-0.41.0-py3-none-any.whl", hash = "sha256:a0193a3c413ebc9c78bff1c3546a45bb8c8bcb4a84cae8747d650a65bd37210a"},
+    {file = "starlette-0.41.0.tar.gz", hash = "sha256:39cbd8768b107d68bfe1ff1672b38a2c38b49777de46d2a592841d58e3bf7c2a"},
 ]
 
 [package.dependencies]
-- 
GitLab


From f2e1ec26e0946a9eba4ed660ce7da82923927764 Mon Sep 17 00:00:00 2001
From: Nicholas Shavers <nshavers@contractor.usgs.gov>
Date: Mon, 9 Dec 2024 13:46:21 -0800
Subject: [PATCH 10/30] lint + poetry clean up

---
 poetry.lock | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/poetry.lock b/poetry.lock
index 56e23f59..a4aac078 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -153,6 +153,25 @@ d = ["aiohttp (>=3.7.4)", "aiohttp (>=3.7.4,!=3.9.0)"]
 jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"]
 uvloop = ["uvloop (>=0.15.2)"]
 
+[[package]]
+name = "cdflib"
+version = "1.3.2"
+description = "A python CDF reader toolkit"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "cdflib-1.3.2-py3-none-any.whl", hash = "sha256:49af97acc328c586ac5b7c27fd8e67bccf24af82c5bd8c37d8cfe048a0c1752a"},
+    {file = "cdflib-1.3.2.tar.gz", hash = "sha256:97f27ac629e4c0ac1367eb8f4edd7a1d184190272ab98a6401e999f3a2e05687"},
+]
+
+[package.dependencies]
+numpy = ">=1.21"
+
+[package.extras]
+dev = ["ipython", "pre-commit"]
+docs = ["astropy", "netcdf4", "sphinx", "sphinx-automodapi", "sphinx-copybutton", "sphinx-rtd-theme", "xarray"]
+tests = ["astropy", "h5netcdf", "hypothesis", "netcdf4", "pytest (>=3.9)", "pytest-cov", "pytest-remotedata", "xarray"]
+
 [[package]]
 name = "certifi"
 version = "2024.12.14"
-- 
GitLab


From d6a2066dbb30f65e2f64b325e4694481de0c7a76 Mon Sep 17 00:00:00 2001
From: Nicholas Shavers <nshavers@contractor.usgs.gov>
Date: Wed, 11 Dec 2024 10:06:53 -0800
Subject: [PATCH 11/30] parse_string method implemented for streaming cdf file
 via input-file command

---
 geomagio/Controller.py     |  6 ++---
 geomagio/ImagCDFFactory.py | 51 ++++++++++++++++++++++++++++++++------
 2 files changed, 46 insertions(+), 11 deletions(-)

diff --git a/geomagio/Controller.py b/geomagio/Controller.py
index 96ebb28c..0985597c 100644
--- a/geomagio/Controller.py
+++ b/geomagio/Controller.py
@@ -503,7 +503,7 @@ def get_input_factory(args):
     input_type = args.input
     # stream/url arguments
     if args.input_file is not None:
-        if input_type in ["netcdf", "miniseed"]:
+        if input_type in ["netcdf", "miniseed", "imagcdf"]:
             input_stream = open(args.input_file, "rb")
         else:
             input_stream = open(args.input_file, "r")
@@ -547,8 +547,6 @@ def get_input_factory(args):
             locationCode=args.locationcode,
             **input_factory_args,
         )
-    elif input_type == "imagcdf":
-        input_factory = ImagCDFFactory(**input_factory_args)
     else:
         # stream compatible factories
         if input_type == "iaga2002":
@@ -573,6 +571,8 @@ def get_input_factory(args):
             input_factory = xml.XMLFactory(**input_factory_args)
         elif input_type == "covjson":
             input_factory = covjson.CovJSONFactory(**input_factory_args)
+        elif input_type == "imagcdf":
+            input_factory = ImagCDFFactory(**input_factory_args)
         # wrap stream
         if input_stream is not None:
             input_factory = StreamTimeseriesFactory(
diff --git a/geomagio/ImagCDFFactory.py b/geomagio/ImagCDFFactory.py
index ea947903..af603201 100644
--- a/geomagio/ImagCDFFactory.py
+++ b/geomagio/ImagCDFFactory.py
@@ -164,7 +164,7 @@ class ImagCDFFactory(TimeseriesFactory):
                     "Rec_Vary": True,
                     "Var_Type": "zVariable",
                     "Dim_Sizes": [],
-                    "Sparse": "no_sparse",
+                    "Sparse": "no_sparse",  # no_sparse because there should not be time gaps.
                     "Compress": 9,
                     "Pad": None,
                 }
@@ -424,6 +424,47 @@ class ImagCDFFactory(TimeseriesFactory):
         timeseries.sort()
         return timeseries
 
+    def parse_string(self, data: str, **kwargs):
+        """
+        Parse ImagCDF binary data into an ObsPy Stream.
+
+        This method writes the provided binary data to a temporary file,
+        reads the file using `cdflib`, and converts the data into an ObsPy
+        Stream.
+
+        Parameters
+        ----------
+        data : bytes
+            Binary data containing ImagCDF content.
+
+        Returns
+        -------
+        Stream
+            An ObsPy Stream object with the parsed geomagnetic time series data.
+
+        Raises
+        ------
+        TimeseriesFactoryException
+            If an error occurs while parsing the ImagCDF data.
+        """
+        # Create a temporary file to store the CDF data
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".cdf") as tmp_file:
+            tmp_file_name = tmp_file.name
+            tmp_file.write(data)
+
+        try:
+            # Read the CDF from the temporary file
+            cdf = CDFReader(tmp_file_name)
+            stream = self._read_cdf(cdf)
+            # no cdf.close() method required
+        except Exception as e:
+            raise TimeseriesFactoryException(f"Error parsing ImagCDF data: {e}")
+        finally:
+            # Clean up the temporary file
+            os.remove(tmp_file_name)
+
+        return stream
+
     def _create_global_attributes(
         self, timeseries: Stream, channels: List[str]
     ) -> dict:
@@ -753,18 +794,12 @@ class ImagCDFFactory(TimeseriesFactory):
                 # If no DEPEND_0, skip this variable as we cannot map times
                 continue
 
-            # If we used a DataTimes fallback or similar, ensure we handle it case-insensitively
-            # and also confirm that time_vars keys are checked properly.
-            # The ImagCDF can have DataTimes, GeomagneticVectorTimes, etc.
-            # So try exact match first, if not found, attempt case-insensitive.
+            # The ImagCDF can have DataTimes, GeomagneticVectorTimes, GeomagneticScalarTimes, TemperatureNTimes (for N > 0), etc.
             matched_time_key = None
             for tkey in time_vars.keys():
                 if tkey == ts_name:
                     matched_time_key = tkey
                     break
-                if tkey.lower() == ts_name.lower():
-                    matched_time_key = tkey
-                    break
 
             if matched_time_key not in time_vars:
                 # If we cannot find the matching time variable, skip this variable
-- 
GitLab


From 72bbdf58a81acbb3635366d96602b67fd274f1fa Mon Sep 17 00:00:00 2001
From: Nicholas Shavers <nshavers@contractor.usgs.gov>
Date: Thu, 12 Dec 2024 12:31:37 -0800
Subject: [PATCH 12/30] tweaks to accept output_args. temperature-type elements
 as a nonstandard geomagnetic field element instead of as Temperature cdf
 variable

---
 geomagio/Controller.py                   |  6 +-
 geomagio/{ => imagcdf}/ImagCDFFactory.py | 97 ++++++++++++++----------
 2 files changed, 61 insertions(+), 42 deletions(-)
 rename geomagio/{ => imagcdf}/ImagCDFFactory.py (93%)

diff --git a/geomagio/Controller.py b/geomagio/Controller.py
index 0985597c..4e212579 100644
--- a/geomagio/Controller.py
+++ b/geomagio/Controller.py
@@ -7,7 +7,7 @@ from typing import List, Optional, Tuple, Union
 
 from obspy.core import Stream, UTCDateTime
 
-from geomagio.ImagCDFFactory import ImagCDFFactory
+from geomagio.imagcdf.ImagCDFFactory import ImagCDFFactory
 
 from .algorithm import Algorithm, algorithms, AlgorithmException, FilterAlgorithm
 from .DerivedTimeseriesFactory import DerivedTimeseriesFactory
@@ -28,7 +28,7 @@ from . import vbf
 from . import xml
 from . import covjson
 from . import netcdf
-from geomagio.ImagCDFFactory import ImagCDFFactory
+from geomagio.imagcdf.ImagCDFFactory import ImagCDFFactory
 
 
 class Controller(object):
@@ -635,7 +635,7 @@ def get_output_factory(args):
     elif output_type == "plot":
         output_factory = PlotTimeseriesFactory()
     elif output_type == "imagcdf":
-        output_factory = ImagCDFFactory()
+        output_factory = ImagCDFFactory(**output_factory_args)
     else:
         # stream compatible factories
         if output_type == "binlog":
diff --git a/geomagio/ImagCDFFactory.py b/geomagio/imagcdf/ImagCDFFactory.py
similarity index 93%
rename from geomagio/ImagCDFFactory.py
rename to geomagio/imagcdf/ImagCDFFactory.py
index af603201..a7838f14 100644
--- a/geomagio/ImagCDFFactory.py
+++ b/geomagio/imagcdf/ImagCDFFactory.py
@@ -25,10 +25,10 @@ from sqlalchemy import true
 from geomagio.TimeseriesFactory import TimeseriesFactory
 from geomagio.api.ws.Element import TEMPERATURE_ELEMENTS_ID
 
-from .geomag_types import DataInterval, DataType
-from .TimeseriesFactoryException import TimeseriesFactoryException
-from . import TimeseriesUtility
-from . import Util
+from ..geomag_types import DataInterval, DataType
+from ..TimeseriesFactoryException import TimeseriesFactoryException
+from .. import TimeseriesUtility
+from .. import Util
 
 import cdflib
 from cdflib.cdfwrite import CDF as CDFWriter
@@ -93,6 +93,7 @@ class ImagCDFFactory(TimeseriesFactory):
     """
 
     isUniqueTimes = True  # used to determine depend_0 and CDF Time Variable Name
+    NONSTANDARD_ELEMENTS = TEMPERATURE_ELEMENTS_ID
 
     def __init__(
         self,
@@ -123,13 +124,6 @@ class ImagCDFFactory(TimeseriesFactory):
             urlInterval=urlInterval,
         )
 
-    def parse_string(self, data: str, **kwargs):
-        """Parse ImagCDF formatted string data into a Stream.
-
-        Note: Parsing from strings is not implemented in this factory.
-        """
-        raise NotImplementedError('"parse_string" not implemented')
-
     def write_file(self, fh, timeseries: Stream, channels: List[str]):
         # Create a temporary file to write the CDF data
         with tempfile.NamedTemporaryFile(delete=False, suffix=".cdf") as tmp_file:
@@ -164,7 +158,7 @@ class ImagCDFFactory(TimeseriesFactory):
                     "Rec_Vary": True,
                     "Var_Type": "zVariable",
                     "Dim_Sizes": [],
-                    "Sparse": "no_sparse",  # no_sparse because there should not be time gaps.
+                    "Sparse": "no_sparse",  # no_sparse because there should not be time gaps. (Time stamps must represent a regular time series with no missing values in the series)
                     "Compress": 9,
                     "Pad": None,
                 }
@@ -178,11 +172,10 @@ class ImagCDFFactory(TimeseriesFactory):
             temperature_index = 0
             for trace in timeseries:
                 channel = trace.stats.channel
-                if channel in TEMPERATURE_ELEMENTS_ID:
-                    temperature_index += 1  # MUST INCREMENT INDEX BEFORE USING
-                    var_name = f"Temperature{temperature_index}"
-                else:
-                    var_name = f"GeomagneticField{channel}"
+                var_name = f"GeomagneticField{channel}"
+                # if channel in REAL_TEMPERATURE:
+                #     temperature_index += 1  # MUST INCREMENT INDEX BEFORE USING
+                #     var_name = f"Temperature{temperature_index}"
                 data_type = self._get_cdf_data_type(trace)
                 num_elements = 1
                 if data_type in [
@@ -258,8 +251,8 @@ class ImagCDFFactory(TimeseriesFactory):
         observatory = stats.station
         starttime = starttime or stats.starttime
         endtime = endtime or stats.endtime
-
         # Split data into intervals if necessary
+
         urlIntervals = Util.get_intervals(
             starttime=starttime, endtime=endtime, size=self.urlInterval
         )
@@ -295,7 +288,6 @@ class ImagCDFFactory(TimeseriesFactory):
                     starttime=interval_start,
                     endtime=interval_end,
                 )
-
                 # Check if the file already exists to merge data
                 if os.path.isfile(url_file):
                     try:
@@ -325,13 +317,13 @@ class ImagCDFFactory(TimeseriesFactory):
                         )
                         # Proceed with new data
 
-                # Pad the data with NaNs to ensure it fits the interval
+                # Pad the data to ensure it fits the interval
                 url_data.trim(
                     starttime=interval_start,
                     endtime=interval_end,
                     nearest_sample=False,
                     pad=True,
-                    fill_value=np.nan,
+                    fill_value=99_999,  # FILLVAL
                 )
 
                 # Write the data to the CDF file
@@ -539,6 +531,7 @@ class ImagCDFFactory(TimeseriesFactory):
     def _create_time_stamp_variables(self, timeseries: Stream) -> dict:
         vector_times = None
         scalar_times = None
+        nonstandard_times = None
         temperature_times = {}
         temperature_index = 1
 
@@ -571,14 +564,21 @@ class ImagCDFFactory(TimeseriesFactory):
                         raise ValueError(
                             "Time stamps for scalar channels are not the same."
                         )
-            elif channel in TEMPERATURE_ELEMENTS_ID:
-                ts_key = f"Temperature{temperature_index}Times"
-                if ts_key not in temperature_times:
-                    temperature_times[ts_key] = tt2000_times
-                    temperature_index += 1
+            # elif channel in REAL_TEMPERATURES:
+            #     ts_key = f"Temperature{temperature_index}Times"
+            #     if ts_key not in temperature_times:
+            #         temperature_times[ts_key] = tt2000_times
+            #         temperature_index += 1
+            #     else:
+            #         temperature_times[ts_key] = tt2000_times
+            elif channel in self.NONSTANDARD_ELEMENTS:
+                if scalar_times is None:
+                    nonstandard_times = tt2000_times
                 else:
-                    temperature_times[ts_key] = tt2000_times
-
+                    if not np.array_equal(scalar_times, tt2000_times):
+                        raise ValueError(
+                            "Time stamps for nonstandard channels are not the same."
+                        )
         time_vars = {}
         if vector_times is not None:
             time_vars["GeomagneticVectorTimes"] = vector_times
@@ -587,16 +587,28 @@ class ImagCDFFactory(TimeseriesFactory):
         if temperature_times:
             time_vars.update(temperature_times)
 
+        isNonStandard = nonstandard_times is not None
+        if isNonStandard:
+            time_vars["NonstandardTimes"] = nonstandard_times
+
         last_times = []
+
         self.isUniqueTimes = (
             len(time_vars) == 1
-        )  # true if only one set of times, else default to false.
+        )  # (true if is a single time stamp variable in the file)
         for index, times in enumerate(time_vars.values()):
             if index > 0:
                 self.isUniqueTimes = not np.array_equal(last_times, times)
             last_times = times
-
-        return time_vars if self.isUniqueTimes else {"DataTimes": last_times}
+        if self.isUniqueTimes and isNonStandard and len(time_vars) > 2:
+            raise ValueError(
+                f"Time stamps must be the same for all channels when using nonstandard channels: {','.join(self.NONSTANDARD_ELEMENTS)}"
+            )
+        return (
+            {"DataTimes": last_times}
+            if isNonStandard or not self.isUniqueTimes
+            else time_vars
+        )
 
     def _create_var_spec(
         self,
@@ -660,9 +672,13 @@ class ImagCDFFactory(TimeseriesFactory):
             validmax = 90.0  # The magnetic field vector can point straight down (+90Â°), horizontal (0Â°), or straight up (-90Â°).
         elif channel in TEMPERATURE_ELEMENTS_ID:
             units = "Celsius"
-            fieldnam = f"Temperature {temperature_index} {trace.stats.location}"
             validmin = -273.15  # absolute zero
             validmax = 79_999
+        # elif channel in [REAL_TEMPERATURES]:
+        #     units = "Celsius"
+        #     fieldnam = f"Temperature {temperature_index} {trace.stats.location}"
+        #     validmin = -273.15  # absolute zero
+        #     validmax = 79_999
         elif channel in ["F", "S"]:
             units = "nT"
             validmin = (
@@ -675,12 +691,14 @@ class ImagCDFFactory(TimeseriesFactory):
             validmax = 79_999.0
 
         # Determine DEPEND_0 based on channel type
-        if channel in self._get_vector_elements():
+        if not isUniqueTimes:
+            depend_0 = "DataTimes"
+        elif channel in self._get_vector_elements():
             depend_0 = "GeomagneticVectorTimes"
         elif channel in self._get_scalar_elements():
             depend_0 = "GeomagneticScalarTimes"
-        elif channel in TEMPERATURE_ELEMENTS_ID:
-            depend_0 = f"Temperature{temperature_index}Times"
+        # elif channel in REAL_TEMPERATURES:
+        #     depend_0 = f"Temperature{temperature_index}Times"
 
         var_attrs = {
             "FIELDNAM": fieldnam,
@@ -688,10 +706,10 @@ class ImagCDFFactory(TimeseriesFactory):
             "FILLVAL": 99999.0,
             "VALIDMIN": validmin,
             "VALIDMAX": validmax,
-            "DEPEND_0": depend_0 if isUniqueTimes else "DataTimes",
+            "DEPEND_0": depend_0,
             "DISPLAY_TYPE": "time_series",
             "LABLAXIS": channel,
-            "DATA_INTERVAL_TYPE": trace.stats.data_interval_type,
+            "DATA_INTERVAL_TYPE": trace.stats.data_interval_type,  # optional
         }
         return var_attrs
 
@@ -953,10 +971,11 @@ class ImagCDFFactory(TimeseriesFactory):
 
         # If the urlTemplate doesn't support placeholders, assume 'file://' scheme
         if self.urlTemplate.startswith("file://"):
-            base_path = self.urlTemplate[7:]  # Strip "file://"
+            base_path = self.urlTemplate[7:]
             if not base_path or base_path == "{obs}_{dt}_{t}.cdf":
                 base_path = os.getcwd()  # Default to current working directory
-            return os.path.join(base_path, filename)
+                return os.path.join(base_path, filename)
+            return os.path.join(self.urlTemplate, filename)
 
         # Unsupported URL scheme
         raise TimeseriesFactoryException(
-- 
GitLab


From 066e59b4eca583d786acb5d488324d1a6a5e52ce Mon Sep 17 00:00:00 2001
From: Nicholas Shavers <nshavers@contractor.usgs.gov>
Date: Fri, 13 Dec 2024 09:13:41 -0800
Subject: [PATCH 13/30] Nonstand elements implemented. Temperatures
 un-implemented. no longer extends streamfactory on imagcdf input, parse
 string removed. basic is valid imagcdf logic.

---
 geomagio/imagcdf/ImagCDFFactory.py | 256 +++++++++++++++++------------
 1 file changed, 148 insertions(+), 108 deletions(-)

diff --git a/geomagio/imagcdf/ImagCDFFactory.py b/geomagio/imagcdf/ImagCDFFactory.py
index a7838f14..0c63bb70 100644
--- a/geomagio/imagcdf/ImagCDFFactory.py
+++ b/geomagio/imagcdf/ImagCDFFactory.py
@@ -101,7 +101,7 @@ class ImagCDFFactory(TimeseriesFactory):
         channels: List[str] = ("H", "D", "Z", "F"),
         type: DataType = "variation",
         interval: DataInterval = "minute",
-        urlTemplate="file://{obs}_{dt}_{t}.cdf",
+        urlTemplate="file://etc/imagcdf/{obs}_{dt}_{t}.cdf",
         urlInterval: int = -1,
     ):
         """
@@ -134,7 +134,7 @@ class ImagCDFFactory(TimeseriesFactory):
             cdf_spec = {
                 "Compressed": 9,  # Enable compression (0-9)
                 "Majority": CDFWriter.ROW_MAJOR,  # Data layout - gets set automatically
-                "Encoding": CDFWriter.IBMPC_ENCODING,  #  gets set automatically
+                "Encoding": CDFWriter.HOST_ENCODING,  #  gets set automatically
                 "Checksum": True,  # Disable checksum for faster writes (optional)
                 "rDim_sizes": [],  # Applicable only if using rVariables - CDF protocol recommends only using zVariables.
             }
@@ -259,8 +259,9 @@ class ImagCDFFactory(TimeseriesFactory):
         for urlInterval in urlIntervals:
             interval_start = urlInterval["start"]
             interval_end = urlInterval["end"]
-            if interval_start != interval_end:
-                interval_end = interval_end - delta
+            # Removes last data point ex: if endtime = 02:00:00, this could return 01:59:00 as last data point. 
+            # if interval_start != interval_end:
+                # interval_end = interval_end - delta
             url = self._get_url(
                 observatory=observatory,
                 date=interval_start,
@@ -290,38 +291,16 @@ class ImagCDFFactory(TimeseriesFactory):
                 )
                 # Check if the file already exists to merge data
                 if os.path.isfile(url_file):
-                    try:
-                        # Read existing data to merge with new data
-                        existing_cdf = CDFReader(url_file)
-                        existing_stream = self._read_cdf(existing_cdf)
-                        # existing_cdf.close() #no close method?
-                        existing_data = existing_stream
-
-                        # Merge existing data with new data
-                        for trace in existing_data:
-                            new_trace = url_data.select(
-                                network=trace.stats.network,
-                                station=trace.stats.station,
-                                channel=trace.stats.channel,
-                            )
-                            if new_trace:
-                                trace.data = np.concatenate(
-                                    (trace.data, new_trace[0].data)
-                                )
-                        url_data = existing_data + url_data
-                    except Exception as e:
-                        # Log the exception if needed
-                        print(
-                            f"Warning: Could not read existing CDF file '{url_file}': {e}",
-                            file=sys.stderr,
-                        )
-                        # Proceed with new data
-
+                    os.remove(url_file)
+                    print(f"Naming conflict. Deleting exisiting file '{url_file}'.")
+                    # raise TimeseriesFactoryException(
+                    #     f"Error: File '{url_file}' already exists."
+                    # )
                 # Pad the data to ensure it fits the interval
                 url_data.trim(
                     starttime=interval_start,
                     endtime=interval_end,
-                    nearest_sample=False,
+                    nearest_sample=True,
                     pad=True,
                     fill_value=99_999,  # FILLVAL
                 )
@@ -370,7 +349,6 @@ class ImagCDFFactory(TimeseriesFactory):
                 raise TimeseriesFactoryException(
                     "Only file urls are supported for reading ImagCDF"
                 )
-
             url_file = Util.get_file_from_url(url, createParentDirectory=False)
             if not os.path.isfile(url_file):
                 # If file doesn't exist, skip
@@ -379,23 +357,24 @@ class ImagCDFFactory(TimeseriesFactory):
             try:
                 # Read CDF data and merge
                 cdf = CDFReader(url_file)
-                file_stream = self._read_cdf(cdf)
-                # Attempt to select only requested channels
-                selected = Stream()
-                for ch in channels:
-                    selected += file_stream.select(channel=ch)
-                timeseries += selected
+                # file_stream = self._read_cdf(cdf, channels)
+                timeseries = self._read_cdf(cdf, channels)
+                # Attempt to select only requested channelws (redundant as read_cdf can more efficiently filter)
+                # selected = Stream()
+                # for ch in channels:
+                #     selected += file_stream.select(channel=ch)
+                # timeseries += selected
             except Exception as e:
                 print(f"Error reading CDF file '{url_file}': {e}", file=sys.stderr)
 
         # After reading all intervals, merge and trim
-        timeseries.merge(fill_value=np.nan)
+        timeseries.merge(fill_value=99_999)
         timeseries.trim(
             starttime=starttime,
             endtime=endtime,
-            nearest_sample=False,
+            nearest_sample=True,
             pad=True,
-            fill_value=np.nan,
+            fill_value=99_999,
         )
 
         # If requested, add empty channels not present in data
@@ -416,46 +395,47 @@ class ImagCDFFactory(TimeseriesFactory):
         timeseries.sort()
         return timeseries
 
-    def parse_string(self, data: str, **kwargs):
-        """
-        Parse ImagCDF binary data into an ObsPy Stream.
-
-        This method writes the provided binary data to a temporary file,
-        reads the file using `cdflib`, and converts the data into an ObsPy
-        Stream.
-
-        Parameters
-        ----------
-        data : bytes
-            Binary data containing ImagCDF content.
-
-        Returns
-        -------
-        Stream
-            An ObsPy Stream object with the parsed geomagnetic time series data.
-
-        Raises
-        ------
-        TimeseriesFactoryException
-            If an error occurs while parsing the ImagCDF data.
-        """
-        # Create a temporary file to store the CDF data
-        with tempfile.NamedTemporaryFile(delete=False, suffix=".cdf") as tmp_file:
-            tmp_file_name = tmp_file.name
-            tmp_file.write(data)
-
-        try:
-            # Read the CDF from the temporary file
-            cdf = CDFReader(tmp_file_name)
-            stream = self._read_cdf(cdf)
-            # no cdf.close() method required
-        except Exception as e:
-            raise TimeseriesFactoryException(f"Error parsing ImagCDF data: {e}")
-        finally:
-            # Clean up the temporary file
-            os.remove(tmp_file_name)
-
-        return stream
+    # Removed - cdflib takes a file path as an input more efficiently than taking in byte data.
+    # def parse_string(self, data: str, **kwargs):
+    #     """
+    #     Parse ImagCDF binary data into an ObsPy Stream.
+
+    #     This method writes the provided binary data to a temporary file,
+    #     reads the file using `cdflib`, and converts the data into an ObsPy
+    #     Stream.
+
+    #     Parameters
+    #     ----------
+    #     data : bytes
+    #         Binary data containing ImagCDF content.
+
+    #     Returns
+    #     -------
+    #     Stream
+    #         An ObsPy Stream object with the parsed geomagnetic time series data.
+
+    #     Raises
+    #     ------
+    #     TimeseriesFactoryException
+    #         If an error occurs while parsing the ImagCDF data.
+    #     """
+    #     # Create a temporary file to store the CDF data
+    #     with tempfile.NamedTemporaryFile(delete=False, suffix=".cdf") as tmp_file:
+    #         tmp_file_name = tmp_file.name
+    #         tmp_file.write(data)
+    #     channels = kwargs.get('channels', [])
+    #     try:
+    #         # Read the CDF from the temporary file
+    #         cdf = CDFReader(tmp_file_name)
+    #         stream = self._read_cdf(cdf, channels)
+    #         # no cdf.close() method required
+    #     except Exception as e:
+    #         raise TimeseriesFactoryException(f"Error parsing ImagCDF data: {e}")
+    #     finally:
+    #         # Clean up the temporary file
+    #         os.remove(tmp_file_name)
+
+    #     return stream
 
     def _create_global_attributes(
         self, timeseries: Stream, channels: List[str]
@@ -674,6 +654,7 @@ class ImagCDFFactory(TimeseriesFactory):
             units = "Celsius"
             validmin = -273.15  # absolute zero
             validmax = 79_999
+            depend_0 = "DataTimes" #can be used for nonstandard element
         # elif channel in [REAL_TEMPERATURES]:
         #     units = "Celsius"
         #     fieldnam = f"Temperature {temperature_index} {trace.stats.location}"
@@ -753,7 +734,7 @@ class ImagCDFFactory(TimeseriesFactory):
             # Default to double precision float
             return CDFWriter.CDF_DOUBLE
 
-    def _read_cdf(self, cdf: CDFReader) -> Stream:
+    def _read_cdf(self, cdf: CDFReader, channels: Optional[List[str]]) -> Stream:
         """
         Read CDF data into an ObsPy Stream.
 
@@ -771,6 +752,37 @@ class ImagCDFFactory(TimeseriesFactory):
         # Extract global attributes
         global_attrs = cdf.globalattsget()
 
+        # Define required global attributes based on ImagCDF standards
+        required_global_attrs = [
+            "FormatDescription",
+            "FormatVersion",
+            "Title",
+            "IagaCode",
+            "ElementsRecorded",
+            "PublicationLevel",
+            "PublicationDate",
+            "ObservatoryName",
+            "Latitude",
+            "Longitude",
+            "Elevation",
+            "Institution",
+            "StandardLevel",
+            "Source",
+        ]
+
+        # Check for missing required global attributes per ImagCDF techincal document
+        missing_global_attrs = []
+        for attr in required_global_attrs:
+            if attr not in global_attrs:
+                missing_global_attrs.append(attr)
+
+        if missing_global_attrs:
+            error_message = (
+                f"The following required global attributes are missing from the CDF: "
+                f"{', '.join(missing_global_attrs)}"
+            )
+            raise TimeseriesFactoryException(error_message)
+        
         # Map global attributes to Stream-level metadata
         observatory = global_attrs.get("IagaCode", [""])[0]
         station_name = global_attrs.get("ObservatoryName", [""])[0]
@@ -791,7 +803,7 @@ class ImagCDFFactory(TimeseriesFactory):
         # Identify all time variables
         time_vars = {}
         for var in cdf.cdf_info().zVariables:
-            if var.lower().endswith("times"):
+            if var.endswith("Times"):
                 time_data = cdf.varget(var)
                 unix_times = cdflib.cdfepoch.unixtime(time_data)
                 utc_times = [UTCDateTime(t) for t in unix_times]
@@ -799,18 +811,34 @@ class ImagCDFFactory(TimeseriesFactory):
 
         # Read data variables and associate them with time variables
         for var in cdf.cdf_info().zVariables:
+            
             # Skip time variables
-            if var.lower().endswith("times"):
+            if var.endswith("Times"):
                 continue
 
+            # Map the variable name back to a standard channel code
+            # Geomagnetic fields are named like GeomagneticFieldH, GeomagneticFieldD, etc.
+            # Temperatures are named like Temperature1, Temperature2, ...
+            # Extract channel name by removing known prefixes
+            if var.startswith("GeomagneticField"):
+                channel = var.replace("GeomagneticField", "")
+            elif var.startswith("Temperature"):
+                # Temperature variables may not map directly to a geomagnetic channel
+                # but to temperature sensors. We can just use the label from LABLAXIS if needed
+                channel = attrs.get("LABLAXIS", var)
+            else:
+                # fallback if naming doesn't match expected patterns
+                channel = var
+
+            if channels and channel not in channels: continue
             data = cdf.varget(var)
             attrs = cdf.varattsget(var)
 
             # Determine DEPEND_0 (the time variable name) and validate
             ts_name = attrs.get("DEPEND_0")
-            if not ts_name:
-                # If no DEPEND_0, skip this variable as we cannot map times
-                continue
+            # if not ts_name:
+            #     # If no DEPEND_0, skip this variable as we cannot map times
+            #     continue
 
             # The ImagCDF can have DataTimes, GeomagneticVectorTimes, GeomagneticScalarTimes, TemperatureNTimes (for N > 0), etc.
             matched_time_key = None
@@ -819,11 +847,12 @@ class ImagCDFFactory(TimeseriesFactory):
                     matched_time_key = tkey
                     break
 
-            if matched_time_key not in time_vars:
-                # If we cannot find the matching time variable, skip this variable
-                continue
-
-            times = time_vars[matched_time_key]
+            # if matched_time_key not in time_vars:
+            #     # If we cannot find the matching time variable, skip this variable
+            #     continue
+            times = []
+            if matched_time_key in time_vars:
+                times = time_vars[matched_time_key]            
 
             # Determine delta (sample interval)
             if len(times) > 1:
@@ -842,22 +871,33 @@ class ImagCDFFactory(TimeseriesFactory):
                     # fallback, set delta to 60
                     delta = 60.0
 
-            # Map the variable name back to a standard channel code
-            # Geomagnetic fields are named like GeomagneticFieldH, GeomagneticFieldD, etc.
-            # Temperatures are named like Temperature1, Temperature2, ...
-            # Extract channel name by removing known prefixes
-            if var.startswith("GeomagneticField"):
-                channel = var.replace("GeomagneticField", "")
-            elif var.startswith("Temperature"):
-                # Temperature variables may not map directly to a geomagnetic channel
-                # but to temperature sensors. We can just use the label from LABLAXIS if needed
-                channel = attrs.get("LABLAXIS", var)
-            else:
-                # fallback if naming doesn't match expected patterns
-                channel = var
-
             time_attrs = cdf.varattsget(var)
             data_interval = time_attrs.get("DATA_INTERVAL_TYPE", [""])
+
+            # Required variable attributes based on ImagCDF standards
+            required_variable_attrs = [
+                "FIELDNAM",
+                "UNITS",
+                "FILLVAL",
+                "VALIDMIN",
+                "VALIDMAX",
+                "DISPLAY_TYPE",
+                "LABLAXIS",
+                "DEPEND_0"
+            ]
+            # Validate presence of required variable attributes
+            missing_var_attrs = []
+            for var_attr in required_variable_attrs:
+                if var_attr not in attrs:
+                    missing_var_attrs.append(var_attr)
+
+            if missing_var_attrs or missing_global_attrs:
+                error_message = (
+                    f"The variable '{var}' is missing required attributes: "
+                    f"{', '.join(missing_var_attrs)}"
+                )
+                raise TimeseriesFactoryException(error_message)
+
             # Create a trace
             trace = Trace(
                 data=data,
@@ -974,7 +1014,7 @@ class ImagCDFFactory(TimeseriesFactory):
             base_path = self.urlTemplate[7:]
             if not base_path or base_path == "{obs}_{dt}_{t}.cdf":
                 base_path = os.getcwd()  # Default to current working directory
-                return os.path.join(base_path, filename)
+                return os.path.join(base_path, "etc","imagcdf", filename)
             return os.path.join(self.urlTemplate, filename)
 
         # Unsupported URL scheme
-- 
GitLab


From b221ec1db777d993dead9def3dfca3f93b9d596a Mon Sep 17 00:00:00 2001
From: Nicholas Shavers <nshavers@contractor.usgs.gov>
Date: Fri, 13 Dec 2024 09:28:18 -0800
Subject: [PATCH 14/30] network encoding for maximum portability. code cleanup

---
 geomagio/imagcdf/ImagCDFFactory.py | 86 ++++++------------------------
 1 file changed, 17 insertions(+), 69 deletions(-)

diff --git a/geomagio/imagcdf/ImagCDFFactory.py b/geomagio/imagcdf/ImagCDFFactory.py
index 0c63bb70..89c1f6a6 100644
--- a/geomagio/imagcdf/ImagCDFFactory.py
+++ b/geomagio/imagcdf/ImagCDFFactory.py
@@ -132,9 +132,9 @@ class ImagCDFFactory(TimeseriesFactory):
         try:
             # Initialize the CDF writer
             cdf_spec = {
-                "Compressed": 9,  # Enable compression (0-9)
-                "Majority": CDFWriter.ROW_MAJOR,  # Data layout - gets set automatically
-                "Encoding": CDFWriter.HOST_ENCODING,  #  gets set automatically
+                "Compressed": 9,  # Enable compression (1-9)
+                "Majority": CDFWriter.ROW_MAJOR,
+                "Encoding": CDFWriter.NETWORK_ENCODING,  #  XDR Encoding - If a CDF must be portable between two or more different types of computers use network encoded.
                 "Checksum": True,  # Disable checksum for faster writes (optional)
                 "rDim_sizes": [],  # Applicable only if using rVariables - CDF protocol recommends only using zVariables.
             }
@@ -259,9 +259,6 @@ class ImagCDFFactory(TimeseriesFactory):
         for urlInterval in urlIntervals:
             interval_start = urlInterval["start"]
             interval_end = urlInterval["end"]
-            # Removes last data point ex: if endtime = 02:00:00, this could return 01:59:00 as last data point. 
-            # if interval_start != interval_end:
-                # interval_end = interval_end - delta
             url = self._get_url(
                 observatory=observatory,
                 date=interval_start,
@@ -357,13 +354,7 @@ class ImagCDFFactory(TimeseriesFactory):
             try:
                 # Read CDF data and merge
                 cdf = CDFReader(url_file)
-                # file_stream = self._read_cdf(cdf, channels)
                 timeseries = self._read_cdf(cdf, channels)
-                # Attempt to select only requested channelws (redundant as read_cdf can more efficiently filter)
-                # selected = Stream()
-                # for ch in channels:
-                #     selected += file_stream.select(channel=ch)
-                # timeseries += selected
             except Exception as e:
                 print(f"Error reading CDF file '{url_file}': {e}", file=sys.stderr)
 
@@ -395,48 +386,6 @@ class ImagCDFFactory(TimeseriesFactory):
         timeseries.sort()
         return timeseries
 
-    # Removed - cdflib takes a file path as an input more efficiently than taking in byte data.
-    # def parse_string(self, data: str, **kwargs):
-    #     """
-    #     Parse ImagCDF binary data into an ObsPy Stream.
-
-    #     This method writes the provided binary data to a temporary file,
-    #     reads the file using `cdflib`, and converts the data into an ObsPy
-    #     Stream.
-
-    #     Parameters
-    #     ----------
-    #     data : bytes
-    #         Binary data containing ImagCDF content.
-
-    #     Returns
-    #     -------
-    #     Stream
-    #         An ObsPy Stream object with the parsed geomagnetic time series data.
-
-    #     Raises
-    #     ------
-    #     TimeseriesFactoryException
-    #         If an error occurs while parsing the ImagCDF data.
-    #     """
-    #     # Create a temporary file to store the CDF data
-    #     with tempfile.NamedTemporaryFile(delete=False, suffix=".cdf") as tmp_file:
-    #         tmp_file_name = tmp_file.name
-    #         tmp_file.write(data)
-    #     channels = kwargs.get('channels', [])
-    #     try:
-    #         # Read the CDF from the temporary file
-    #         cdf = CDFReader(tmp_file_name)
-    #         stream = self._read_cdf(cdf, channels)
-    #         # no cdf.close() method required
-    #     except Exception as e:
-    #         raise TimeseriesFactoryException(f"Error parsing ImagCDF data: {e}")
-    #     finally:
-    #         # Clean up the temporary file
-    #         os.remove(tmp_file_name)
-
-    #     return stream
-
     def _create_global_attributes(
         self, timeseries: Stream, channels: List[str]
     ) -> dict:
@@ -654,7 +603,7 @@ class ImagCDFFactory(TimeseriesFactory):
             units = "Celsius"
             validmin = -273.15  # absolute zero
             validmax = 79_999
-            depend_0 = "DataTimes" #can be used for nonstandard element
+            depend_0 = "DataTimes"  # can be used for nonstandard element
         # elif channel in [REAL_TEMPERATURES]:
         #     units = "Celsius"
         #     fieldnam = f"Temperature {temperature_index} {trace.stats.location}"
@@ -782,7 +731,7 @@ class ImagCDFFactory(TimeseriesFactory):
                 f"{', '.join(missing_global_attrs)}"
             )
             raise TimeseriesFactoryException(error_message)
-        
+
         # Map global attributes to Stream-level metadata
         observatory = global_attrs.get("IagaCode", [""])[0]
         station_name = global_attrs.get("ObservatoryName", [""])[0]
@@ -811,26 +760,25 @@ class ImagCDFFactory(TimeseriesFactory):
 
         # Read data variables and associate them with time variables
         for var in cdf.cdf_info().zVariables:
-            
+
             # Skip time variables
             if var.endswith("Times"):
                 continue
 
-            # Map the variable name back to a standard channel code
-            # Geomagnetic fields are named like GeomagneticFieldH, GeomagneticFieldD, etc.
-            # Temperatures are named like Temperature1, Temperature2, ...
-            # Extract channel name by removing known prefixes
+            # Map the variable name back to a standard channel code by removing known prefixes
+            # Names are like GeomagneticFieldH, GeomagneticFieldD, Temperature1, Temperature2, ...
             if var.startswith("GeomagneticField"):
                 channel = var.replace("GeomagneticField", "")
-            elif var.startswith("Temperature"):
-                # Temperature variables may not map directly to a geomagnetic channel
-                # but to temperature sensors. We can just use the label from LABLAXIS if needed
-                channel = attrs.get("LABLAXIS", var)
+            # elif var.startswith("Temperature"):
+            #     # Temperature variables may not map directly to a geomagnetic channel
+            #     # but to temperature sensors. We can just use the label from LABLAXIS if needed
+            #     channel = attrs.get("LABLAXIS", var)
             else:
                 # fallback if naming doesn't match expected patterns
                 channel = var
 
-            if channels and channel not in channels: continue
+            if channels and channel not in channels:
+                continue
             data = cdf.varget(var)
             attrs = cdf.varattsget(var)
 
@@ -852,7 +800,7 @@ class ImagCDFFactory(TimeseriesFactory):
             #     continue
             times = []
             if matched_time_key in time_vars:
-                times = time_vars[matched_time_key]            
+                times = time_vars[matched_time_key]
 
             # Determine delta (sample interval)
             if len(times) > 1:
@@ -883,7 +831,7 @@ class ImagCDFFactory(TimeseriesFactory):
                 "VALIDMAX",
                 "DISPLAY_TYPE",
                 "LABLAXIS",
-                "DEPEND_0"
+                "DEPEND_0",
             ]
             # Validate presence of required variable attributes
             missing_var_attrs = []
@@ -1014,7 +962,7 @@ class ImagCDFFactory(TimeseriesFactory):
             base_path = self.urlTemplate[7:]
             if not base_path or base_path == "{obs}_{dt}_{t}.cdf":
                 base_path = os.getcwd()  # Default to current working directory
-                return os.path.join(base_path, "etc","imagcdf", filename)
+                return os.path.join(base_path, "etc", "imagcdf", filename)
             return os.path.join(self.urlTemplate, filename)
 
         # Unsupported URL scheme
-- 
GitLab


From ba081cc4dffa98e52d6d076e56071ecf1e853844 Mon Sep 17 00:00:00 2001
From: Nicholas Shavers <nshavers@contractor.usgs.gov>
Date: Fri, 13 Dec 2024 12:43:14 -0800
Subject: [PATCH 15/30] accept and use file path is provided via --input-file
 argument, else try urlTemplates path. Network as a custom global attribute

---
 geomagio/imagcdf/ImagCDFFactory.py | 58 ++++++++++++++++++------------
 1 file changed, 35 insertions(+), 23 deletions(-)

diff --git a/geomagio/imagcdf/ImagCDFFactory.py b/geomagio/imagcdf/ImagCDFFactory.py
index 89c1f6a6..c2855045 100644
--- a/geomagio/imagcdf/ImagCDFFactory.py
+++ b/geomagio/imagcdf/ImagCDFFactory.py
@@ -103,6 +103,7 @@ class ImagCDFFactory(TimeseriesFactory):
         interval: DataInterval = "minute",
         urlTemplate="file://etc/imagcdf/{obs}_{dt}_{t}.cdf",
         urlInterval: int = -1,
+        inputFile: Optional[str] = None, 
     ):
         """
         Initialize the ImagCDFFactory with default parameters.
@@ -114,6 +115,7 @@ class ImagCDFFactory(TimeseriesFactory):
         - interval: Data interval (e.g., 'minute', 'second').
         - urlTemplate: Template for generating file URLs or paths.
         - urlInterval: Interval size for splitting data into multiple files.
+        - inputFile: An ImagCDF file to read data from. If not provided urlTemplate is assumed path for reads.  
         """
         super().__init__(
             observatory=observatory,
@@ -123,6 +125,7 @@ class ImagCDFFactory(TimeseriesFactory):
             urlTemplate=urlTemplate,
             urlInterval=urlInterval,
         )
+        self.inputFile = inputFile
 
     def write_file(self, fh, timeseries: Stream, channels: List[str]):
         # Create a temporary file to write the CDF data
@@ -333,24 +336,26 @@ class ImagCDFFactory(TimeseriesFactory):
         )
 
         for urlInterval in urlIntervals:
-            url = self._get_url(
-                observatory=observatory,
-                date=urlInterval["start"],
-                type=type,
-                interval=interval,
-                channels=channels,
-            )
-            if url == "stdout":
-                continue  # stdout is not a valid input source
-            if not url.startswith("file://"):
-                raise TimeseriesFactoryException(
-                    "Only file urls are supported for reading ImagCDF"
+            if self.inputFile is None: 
+                url = self._get_url(
+                    observatory=observatory,
+                    date=urlInterval["start"],
+                    type=type,
+                    interval=interval,
+                    channels=channels,
                 )
-            url_file = Util.get_file_from_url(url, createParentDirectory=False)
-            if not os.path.isfile(url_file):
-                # If file doesn't exist, skip
-                continue
-
+                if url == "stdout":
+                    continue  # stdout is not a valid input source
+                if not url.startswith("file://"):
+                    raise TimeseriesFactoryException(
+                        "Only file urls are supported for reading ImagCDF"
+                    )
+                url_file = Util.get_file_from_url(url, createParentDirectory=False)
+                if not os.path.isfile(url_file):
+                    # If file doesn't exist, skip
+                    continue
+            else: 
+                url_file = self.inputFile
             try:
                 # Read CDF data and merge
                 cdf = CDFReader(url_file)
@@ -406,6 +411,7 @@ class ImagCDFFactory(TimeseriesFactory):
             getattr(stats, "station_name", None) or self.observatory or ""
         )
         station = getattr(stats, "station", None) or ""
+        network = getattr(stats, "network", None) or ""
         institution = getattr(stats, "agency_name", None) or ""
         latitude = getattr(stats, "geodetic_latitude", None) or 0.0
         longitude = getattr(stats, "geodetic_longitude", None) or 0.0
@@ -450,10 +456,12 @@ class ImagCDFFactory(TimeseriesFactory):
             # 'UniqueIdentifier': {0: ''},
             # 'ParentIdentifiers': {0: ''},
             # 'ReferenceLinks': {0: ''}, #links to /ws, plots, USGS.gov
-            "SensorSamplingRate": {0: sensor_sampling_rate},  # Optional
-            "DataType": {0: data_type},  # Optional
-            "Comments": {0: comments},  # Optional
-            "DeclinationBase": {0: declination_base},  # Optional
+            # Custom Attributes Below
+            "SensorSamplingRate": {0: sensor_sampling_rate},  
+            "DataType": {0: data_type},  
+            "Comments": {0: comments},  
+            "DeclinationBase": {0: declination_base},  
+            "Network": {0: network},  
         }
         return global_attrs
 
@@ -639,7 +647,8 @@ class ImagCDFFactory(TimeseriesFactory):
             "DEPEND_0": depend_0,
             "DISPLAY_TYPE": "time_series",
             "LABLAXIS": channel,
-            "DATA_INTERVAL_TYPE": trace.stats.data_interval_type,  # optional
+            # custom ImagCDF variable attributes below
+            "DATA_INTERVAL_TYPE": trace.stats.data_interval_type,
         }
         return var_attrs
 
@@ -745,9 +754,10 @@ class ImagCDFFactory(TimeseriesFactory):
         sensor_orientation = global_attrs.get("VectorSensOrient", [""])[0]
         data_type = global_attrs.get("DataType", ["variation"])[0]
         publication_level = global_attrs.get("PublicationLevel", ["1"])[0]
-        comments = global_attrs.get("Comments", [""])
+        comments = global_attrs.get("Comments", [""]) #keep comments as an array
         terms_of_use = global_attrs.get("TermsOfUse", [""])[0]
         declination_base = global_attrs.get("DeclinationBase", [0.0])[0]
+        network = global_attrs.get("Network", [""])[0]
 
         # Identify all time variables
         time_vars = {}
@@ -862,10 +872,12 @@ class ImagCDFFactory(TimeseriesFactory):
                     "station_name": station_name,
                     "agency_name": institution,
                     "conditions_of_use": terms_of_use,
+                    # data points not in a traditional ImagCDF
                     "sensor_sampling_rate": sensor_sampling_rate,
                     "data_interval_type": data_interval,
                     "declination_base": declination_base,
                     "filter_comments": comments,
+                    "network": network
                 },
             )
             stream += trace
-- 
GitLab


From 44795c5a7ba7786ab910f8307eef24deb3f67440 Mon Sep 17 00:00:00 2001
From: Nicholas Shavers <nshavers@contractor.usgs.gov>
Date: Mon, 16 Dec 2024 13:19:28 -0800
Subject: [PATCH 16/30] Better mapping of publication level to imag or imf.
 Compression and Fill Value constants. Code simplified.

---
 geomagio/Controller.py               |   1 +
 geomagio/imagcdf/IMCDFPublication.py | 108 +++++++++
 geomagio/imagcdf/ImagCDFFactory.py   | 313 ++++++++-------------------
 3 files changed, 205 insertions(+), 217 deletions(-)
 create mode 100644 geomagio/imagcdf/IMCDFPublication.py

diff --git a/geomagio/Controller.py b/geomagio/Controller.py
index 4e212579..826683f1 100644
--- a/geomagio/Controller.py
+++ b/geomagio/Controller.py
@@ -501,6 +501,7 @@ def get_input_factory(args):
     input_factory_args["observatory"] = args.observatory
     input_factory_args["type"] = args.type
     input_type = args.input
+    input_factory_args["output"] = args.output
     # stream/url arguments
     if args.input_file is not None:
         if input_type in ["netcdf", "miniseed", "imagcdf"]:
diff --git a/geomagio/imagcdf/IMCDFPublication.py b/geomagio/imagcdf/IMCDFPublication.py
new file mode 100644
index 00000000..36f66be3
--- /dev/null
+++ b/geomagio/imagcdf/IMCDFPublication.py
@@ -0,0 +1,108 @@
+from typing import Optional, Union
+
+
+class IMCDFPublicationLevel:
+    """Handles publication levels and mapping between data types and levels.
+
+    The ImagCDF format uses publication levels to describe the processing
+    level of the data. This class maps data types (e.g., 'variation', 'definitive')
+    to their corresponding publication levels as defined in the ImagCDF documentation.
+
+    Publication Levels:
+        1: Raw data with no processing.
+        2: Edited data with preliminary baselines.
+        3: Data suitable for initial bulletins or quasi-definitive publication.
+        4: Definitive data with no further changes expected.
+
+    Reference:
+    - ImagCDF Technical Documentation: Attributes that Uniquely Identify the Data
+    """
+
+    class PublicationLevel:
+        LEVEL_1 = "1"
+        LEVEL_2 = "2"
+        LEVEL_3 = "3"
+        LEVEL_4 = "4"
+
+    TYPE_TO_LEVEL = {
+        "none": PublicationLevel.LEVEL_1,
+        "variation": PublicationLevel.LEVEL_1,
+        "reported": PublicationLevel.LEVEL_1,
+        "provisional": PublicationLevel.LEVEL_2,
+        "adjusted": PublicationLevel.LEVEL_2,
+        "quasi-definitive": PublicationLevel.LEVEL_3,
+        "quasidefinitive": PublicationLevel.LEVEL_3,
+        "definitive": PublicationLevel.LEVEL_4,
+    }
+
+    def __init__(self, value: Union[str, int]):
+        """Initialize with a data type or publication level to determine the publication level.
+
+        Args:
+            value (Union[str, int]): The data type (str) or publication level (int).
+
+        Raises:
+            ValueError: If the value is not provided or is unsupported.
+        """
+        if isinstance(value, int) or (isinstance(value, str) and value.isdigit()):
+            self.level = str(value)
+            if self.level not in self.PublicationLevel.__dict__.values():
+                raise ValueError(f"Unsupported level: {value}")
+        elif isinstance(value, str):
+            self.level = self.TYPE_TO_LEVEL.get(value.lower())
+            if not self.level:
+                raise ValueError(f"Unsupported data_type: {value}")
+        else:
+            raise ValueError(
+                "value must be a string or an integer representing data_type or level."
+            )
+
+    def get_level(self) -> str:
+        """Return the publication level as a string.
+
+        Returns:
+            str: The publication level.
+        """
+        return self.level
+
+    def get_imf_data_type(self, long_form: Optional[bool] = True) -> str:
+        """Get the IMF data type based on the publication level.
+
+        Args:
+            long_form (bool): If True, return the full description; otherwise, return the abbreviated form.
+
+        Returns:
+            str: The IMF data type.
+
+        Reference:
+            https://tech-man.intermagnet.org/latest/appendices/dataformats.html#intermagnet-satellite-transmission-format-imfv2-83
+        """
+        if self.level == self.PublicationLevel.LEVEL_4:
+            return "Definitive" if long_form else "D"
+        elif self.level == self.PublicationLevel.LEVEL_3:
+            return "Quasi-definitive" if long_form else "Q"
+        elif self.level == self.PublicationLevel.LEVEL_2:
+            return "Adjusted" if long_form else "A"
+        else:
+            return "Reported" if long_form else "R"
+
+    def get_iaga2002_data_type(self, long_form: Optional[bool] = True) -> str:
+        """Get the IAGA-2002 data type based on the publication level.
+
+        Args:
+            long_form (bool): If True, return the full description; otherwise, return the abbreviated form.
+
+        Returns:
+            str: The IAGA-2002 data type.
+
+        Reference:
+            https://tech-man.intermagnet.org/latest/appendices/dataformats.html#iaga2002-intermagnet-exchange-format-spreadsheet-compatible
+        """
+        if self.level == self.PublicationLevel.LEVEL_4:
+            return "Definitive" if long_form else "D"
+        elif self.level == self.PublicationLevel.LEVEL_3:
+            return "Quasi-definitive" if long_form else "Q"
+        elif self.level == self.PublicationLevel.LEVEL_2:
+            return "Provisional" if long_form else "P"
+        else:
+            return "Variation" if long_form else "V"
diff --git a/geomagio/imagcdf/ImagCDFFactory.py b/geomagio/imagcdf/ImagCDFFactory.py
index c2855045..de58ced1 100644
--- a/geomagio/imagcdf/ImagCDFFactory.py
+++ b/geomagio/imagcdf/ImagCDFFactory.py
@@ -12,77 +12,23 @@ References:
 - CDFLib Docs: [https://cdflib.readthedocs.io/en/latest/#, https://cdflib.readthedocs.io/en/stable/modules/index.html] 
 """
 
-from __future__ import absolute_import, print_function
-from io import BytesIO
 import os
 import sys
-from typing import List, Optional, Union
-from datetime import datetime, timezone, tzinfo
+from typing import List, Optional
+from datetime import datetime, timezone
 import numpy as np
 from obspy import Stream, Trace, UTCDateTime
-from sqlalchemy import true
 
 from geomagio.TimeseriesFactory import TimeseriesFactory
 from geomagio.api.ws.Element import TEMPERATURE_ELEMENTS_ID
-
 from ..geomag_types import DataInterval, DataType
 from ..TimeseriesFactoryException import TimeseriesFactoryException
-from .. import TimeseriesUtility
 from .. import Util
-
-import cdflib
+from cdflib import cdfepoch
 from cdflib.cdfwrite import CDF as CDFWriter
 from cdflib.cdfread import CDF as CDFReader
 import tempfile
-
-
-class IMCDFPublicationLevel:
-    """Handles publication levels and mapping between data types and levels.
-
-    The ImagCDF format uses publication levels to describe the processing
-    level of the data. This class maps data types (e.g., 'variation', 'definitive')
-    to their corresponding publication levels as defined in the ImagCDF documentation.
-
-    Publication Levels:
-        1: Raw data with no processing.
-        2: Edited data with preliminary baselines.
-        3: Data suitable for initial bulletins or quasi-definitive publication.
-        4: Definitive data with no further changes expected.
-
-    Reference:
-    - ImagCDF Technical Documentation: Attributes that Uniquely Identify the Data
-    """
-
-    class PublicationLevel:
-        LEVEL_1 = "1"
-        LEVEL_2 = "2"
-        LEVEL_3 = "3"
-        LEVEL_4 = "4"
-
-    TYPE_TO_LEVEL = {
-        "none": PublicationLevel.LEVEL_1,
-        "variation": PublicationLevel.LEVEL_1,
-        "reported": PublicationLevel.LEVEL_1,
-        "provisional": PublicationLevel.LEVEL_2,
-        "adjusted": PublicationLevel.LEVEL_2,
-        "quasi-definitive": PublicationLevel.LEVEL_3,
-        "quasidefinitive": PublicationLevel.LEVEL_3,
-        "definitive": PublicationLevel.LEVEL_4,
-    }
-
-    def __init__(self, data_type: Optional[str] = None):
-        """Initialize with a data type to determine the publication level."""
-        if data_type:
-            self.level = self.TYPE_TO_LEVEL.get(data_type.lower())
-        else:
-            raise ValueError("data_type must be provided.")
-
-        if not self.level:
-            raise ValueError(f"Unsupported data_type: {data_type}")
-
-    def to_string(self) -> str:
-        """Return the publication level as a string."""
-        return self.level
+from .IMCDFPublication import IMCDFPublicationLevel
 
 
 class ImagCDFFactory(TimeseriesFactory):
@@ -93,7 +39,11 @@ class ImagCDFFactory(TimeseriesFactory):
     """
 
     isUniqueTimes = True  # used to determine depend_0 and CDF Time Variable Name
-    NONSTANDARD_ELEMENTS = TEMPERATURE_ELEMENTS_ID
+    NONSTANDARD_ELEMENTS = (
+        TEMPERATURE_ELEMENTS_ID  # for using non standard elements such as UK1
+    )
+    __FILL_VALUE = 99_999
+    __MAX_COMPRESSION = 9
 
     def __init__(
         self,
@@ -103,7 +53,8 @@ class ImagCDFFactory(TimeseriesFactory):
         interval: DataInterval = "minute",
         urlTemplate="file://etc/imagcdf/{obs}_{dt}_{t}.cdf",
         urlInterval: int = -1,
-        inputFile: Optional[str] = None, 
+        inputFile: Optional[str] = None,
+        output: Optional[str] = "iaga2002",
     ):
         """
         Initialize the ImagCDFFactory with default parameters.
@@ -115,7 +66,7 @@ class ImagCDFFactory(TimeseriesFactory):
         - interval: Data interval (e.g., 'minute', 'second').
         - urlTemplate: Template for generating file URLs or paths.
         - urlInterval: Interval size for splitting data into multiple files.
-        - inputFile: An ImagCDF file to read data from. If not provided urlTemplate is assumed path for reads.  
+        - inputFile: An ImagCDF file to read data from. If not provided urlTemplate is assumed path for reads.
         """
         super().__init__(
             observatory=observatory,
@@ -126,6 +77,7 @@ class ImagCDFFactory(TimeseriesFactory):
             urlInterval=urlInterval,
         )
         self.inputFile = inputFile
+        self.output = output
 
     def write_file(self, fh, timeseries: Stream, channels: List[str]):
         # Create a temporary file to write the CDF data
@@ -135,10 +87,10 @@ class ImagCDFFactory(TimeseriesFactory):
         try:
             # Initialize the CDF writer
             cdf_spec = {
-                "Compressed": 9,  # Enable compression (1-9)
-                "Majority": CDFWriter.ROW_MAJOR,
+                "Compressed": self.__MAX_COMPRESSION,  # Max Gzip compression (1-9). Almost always the GZIP is the best choice for all data. (CDF User Guide p.23 1.4.3 )
+                "Majority": CDFWriter.ROW_MAJOR,  # The first dimension changes the slowest (CDF User Guide p.45 2.3.15 Majority)
                 "Encoding": CDFWriter.NETWORK_ENCODING,  #  XDR Encoding - If a CDF must be portable between two or more different types of computers use network encoded.
-                "Checksum": True,  # Disable checksum for faster writes (optional)
+                "Checksum": True,  # True for Data Integrity. False for faster writes (optional)
                 "rDim_sizes": [],  # Applicable only if using rVariables - CDF protocol recommends only using zVariables.
             }
 
@@ -162,11 +114,13 @@ class ImagCDFFactory(TimeseriesFactory):
                     "Var_Type": "zVariable",
                     "Dim_Sizes": [],
                     "Sparse": "no_sparse",  # no_sparse because there should not be time gaps. (Time stamps must represent a regular time series with no missing values in the series)
-                    "Compress": 9,
+                    "Compress": self.__MAX_COMPRESSION,
                     "Pad": None,
                 }
                 # Define time variable attributes
-                var_attrs = self._create_time_var_attrs(ts_name)
+                var_attrs = (
+                    {}
+                )  # ImagCDF does not require or recommend any 'time' variable attributes.
 
                 # Write time variable
                 cdf_writer.write_var(var_spec, var_attrs, ts_data)
@@ -179,23 +133,20 @@ class ImagCDFFactory(TimeseriesFactory):
                 # if channel in REAL_TEMPERATURE:
                 #     temperature_index += 1  # MUST INCREMENT INDEX BEFORE USING
                 #     var_name = f"Temperature{temperature_index}"
-                data_type = self._get_cdf_data_type(trace)
-                num_elements = 1
-                if data_type in [
-                    CDFWriter.CDF_CHAR,
-                    CDFWriter.CDF_UCHAR,
-                ]:  # Handle string types
-                    num_elements = len(trace.data[0]) if len(trace.data) > 0 else 1
-
+                data_type = (
+                    CDFWriter.CDF_INT4
+                    if trace.data.dtype in [np.int32, np.int64]
+                    else CDFWriter.CDF_DOUBLE
+                )
                 var_spec = {
                     "Variable": var_name,
                     "Data_Type": data_type,
-                    "Num_Elements": num_elements,
+                    "Num_Elements": 1,
                     "Rec_Vary": True,
                     "Var_Type": "zVariable",
                     "Dim_Sizes": [],
                     "Sparse": "no_sparse",
-                    "Compress": 9,
+                    "Compress": self.__MAX_COMPRESSION,
                     "Pad": None,
                 }
                 var_attrs = self._create_var_attrs(
@@ -250,7 +201,6 @@ class ImagCDFFactory(TimeseriesFactory):
 
         # Extract metadata from the first trace
         stats = timeseries[0].stats
-        delta = stats.delta  # Sample rate
         observatory = stats.station
         starttime = starttime or stats.starttime
         endtime = endtime or stats.endtime
@@ -291,18 +241,16 @@ class ImagCDFFactory(TimeseriesFactory):
                 )
                 # Check if the file already exists to merge data
                 if os.path.isfile(url_file):
-                    os.remove(url_file)
-                    print(f"Naming conflict. Deleting exisiting file '{url_file}'.")
-                    # raise TimeseriesFactoryException(
-                    #     f"Error: File '{url_file}' already exists."
-                    # )
+                    raise TimeseriesFactoryException(
+                        f"Error: File '{url_file}' already exists."
+                    )
                 # Pad the data to ensure it fits the interval
                 url_data.trim(
                     starttime=interval_start,
                     endtime=interval_end,
                     nearest_sample=True,
                     pad=True,
-                    fill_value=99_999,  # FILLVAL
+                    fill_value=self.__FILL_VALUE,  # FILLVAL
                 )
 
                 # Write the data to the CDF file
@@ -336,7 +284,7 @@ class ImagCDFFactory(TimeseriesFactory):
         )
 
         for urlInterval in urlIntervals:
-            if self.inputFile is None: 
+            if self.inputFile is None:
                 url = self._get_url(
                     observatory=observatory,
                     date=urlInterval["start"],
@@ -354,7 +302,7 @@ class ImagCDFFactory(TimeseriesFactory):
                 if not os.path.isfile(url_file):
                     # If file doesn't exist, skip
                     continue
-            else: 
+            else:
                 url_file = self.inputFile
             try:
                 # Read CDF data and merge
@@ -364,13 +312,13 @@ class ImagCDFFactory(TimeseriesFactory):
                 print(f"Error reading CDF file '{url_file}': {e}", file=sys.stderr)
 
         # After reading all intervals, merge and trim
-        timeseries.merge(fill_value=99_999)
+        timeseries.merge(fill_value=self.__FILL_VALUE)
         timeseries.trim(
             starttime=starttime,
             endtime=endtime,
             nearest_sample=True,
             pad=True,
-            fill_value=99_999,
+            fill_value=self.__FILL_VALUE,
         )
 
         # If requested, add empty channels not present in data
@@ -415,7 +363,7 @@ class ImagCDFFactory(TimeseriesFactory):
         institution = getattr(stats, "agency_name", None) or ""
         latitude = getattr(stats, "geodetic_latitude", None) or 0.0
         longitude = getattr(stats, "geodetic_longitude", None) or 0.0
-        elevation = getattr(stats, "elevation", None) or 99_999.0
+        elevation = getattr(stats, "elevation", None) or self.__FILL_VALUE
         conditions_of_use = getattr(stats, "conditions_of_use", None) or ""
         vector_orientation = getattr(stats, "sensor_orientation", None) or ""
         data_interval_type = getattr(stats, "data_interval_type", None) or self.interval
@@ -423,7 +371,7 @@ class ImagCDFFactory(TimeseriesFactory):
         sensor_sampling_rate = getattr(stats, "sensor_sampling_rate", None) or 0.0
         comments = getattr(stats, "filter_comments", None) or [""]
         declination_base = getattr(stats, "declination_base", None) or 0.0
-        publication_level = IMCDFPublicationLevel(data_type=self.type).to_string()
+        publication_level = IMCDFPublicationLevel(data_type=self.type).get_level()
         global_attrs = {
             "FormatDescription": {0: "INTERMAGNET CDF Format"},
             "FormatVersion": {0: "1.2"},
@@ -433,7 +381,7 @@ class ImagCDFFactory(TimeseriesFactory):
             "PublicationLevel": {0: publication_level},
             "PublicationDate": {
                 0: [
-                    cdflib.cdfepoch.timestamp_to_tt2000(
+                    cdfepoch.timestamp_to_tt2000(
                         datetime.timestamp(datetime.now(timezone.utc))
                     ),
                     "cdf_time_tt2000",
@@ -444,24 +392,23 @@ class ImagCDFFactory(TimeseriesFactory):
             "Longitude": {0: np.array([longitude], dtype=np.float64)},
             "Elevation": {0: np.array([elevation], dtype=np.float64)},
             "Institution": {0: institution},
-            "VectorSensOrient": {
-                0: vector_orientation
-            },  # remove F - because its a calculation, not an element?
-            "StandardLevel": {0: "None"},  # Set to 'None'
-            # Temporarily Omit 'StandardName', 'StandardVersion', 'PartialStandDesc'
+            "VectorSensOrient": {0: vector_orientation},
+            "StandardLevel": {0: "None"},
+            # Can Omit 'StandardName', 'StandardVersion', 'PartialStandDesc' when StandardLevel=None
             "Source": {
                 0: "institute"
             },  # "institute" - if the named institution provided the data, â€œINTERMAGNETâ€ - if the data file has been created by INTERMAGNET from another data source, â€œWDCâ€ - if the World Data Centre has created the file from another data source
             "TermsOfUse": {0: conditions_of_use},
+            # [[Optional Attributes]]
             # 'UniqueIdentifier': {0: ''},
             # 'ParentIdentifiers': {0: ''},
             # 'ReferenceLinks': {0: ''}, #links to /ws, plots, USGS.gov
-            # Custom Attributes Below
-            "SensorSamplingRate": {0: sensor_sampling_rate},  
-            "DataType": {0: data_type},  
-            "Comments": {0: comments},  
-            "DeclinationBase": {0: declination_base},  
-            "Network": {0: network},  
+            # [[Custom Attributes]]
+            "SensorSamplingRate": {0: sensor_sampling_rate},
+            "DataType": {0: data_type},
+            "Comments": {0: comments},
+            "DeclinationBase": {0: declination_base},
+            "Network": {0: network},
         }
         return global_attrs
 
@@ -481,7 +428,7 @@ class ImagCDFFactory(TimeseriesFactory):
                 for i in range(trace.stats.npts)
             ]
             # Convert timestamps to TT2000 format required by CDF
-            tt2000_times = cdflib.cdfepoch.timestamp_to_tt2000(
+            tt2000_times = cdfepoch.timestamp_to_tt2000(
                 [time.timestamp() for time in times]
             )
 
@@ -547,49 +494,6 @@ class ImagCDFFactory(TimeseriesFactory):
             else time_vars
         )
 
-    def _create_var_spec(
-        self,
-        var_name: str,
-        data_type: str,
-        num_elements: int,
-        var_type: str,
-        dim_sizes: List[int],
-        sparse: bool,
-        compress: int,
-        pad: Optional[Union[str, np.ndarray]],
-    ) -> dict:
-        """
-        Create a variable specification dictionary for cdflib.
-
-        This is used to define the properties of a variable when writing it to
-        the CDF file.
-
-        Parameters:
-        - var_name: Name of the variable.
-        - data_type: CDF data type.
-        - num_elements: Number of elements per record.
-        - var_type: Variable type ('zVariable' or 'rVariable').
-        - dim_sizes: Dimensions of the variable (empty list for 0D).
-        - sparse: Whether the variable uses sparse records.
-        - compress: Compression level.
-        - pad: Pad value for sparse records.
-
-        Reference:
-        - CDF User's Guide: Variable Specification
-        """
-        var_spec = {
-            "Variable": var_name,
-            "Data_Type": data_type,
-            "Num_Elements": num_elements,
-            "Rec_Vary": True,
-            "Var_Type": var_type,
-            "Dim_Sizes": dim_sizes,
-            "Sparse": "no_sparse" if not sparse else "pad_sparse",
-            "Compress": compress,
-            "Pad": pad,
-        }
-        return var_spec
-
     def _create_var_attrs(
         self,
         trace: Trace,
@@ -641,57 +545,17 @@ class ImagCDFFactory(TimeseriesFactory):
         var_attrs = {
             "FIELDNAM": fieldnam,
             "UNITS": units,
-            "FILLVAL": 99999.0,
+            "FILLVAL": self.__FILL_VALUE,
             "VALIDMIN": validmin,
             "VALIDMAX": validmax,
             "DEPEND_0": depend_0,
             "DISPLAY_TYPE": "time_series",
             "LABLAXIS": channel,
-            # custom ImagCDF variable attributes below
+            # [[Custom Attributes]]
             "DATA_INTERVAL_TYPE": trace.stats.data_interval_type,
         }
         return var_attrs
 
-    def _create_time_var_attrs(self, ts_name: str) -> dict:
-        """
-        Create a dictionary of time variable attributes.
-
-        These attributes provide metadata for time variables.
-        Note: None of these attributes are required for the time stamp variables.
-        Reference:
-        - ImagCDF Technical Documentation: ImagCDF Data
-        """
-        # var_attrs = {
-        # 'UNITS': 'TT2000',
-        # 'DISPLAY_TYPE': 'time_series',
-        # 'LABLAXIS': 'Time',
-        # }
-        # return var_attrs
-        return {}
-
-    def _get_cdf_data_type(self, trace: Trace) -> int:
-        """
-        Map ObsPy trace data type to CDF data type.
-
-        Determines the appropriate CDF data type based on the NumPy data type
-        of the trace data.
-
-        Returns:
-        - CDF_DOUBLE (45) for floating-point data.
-        - CDF_INT4 (41) for integer data.
-
-        Reference:
-        - See CDF for more data types
-        """
-
-        if trace.data.dtype in [np.float32, np.float64]:
-            return CDFWriter.CDF_DOUBLE
-        elif trace.data.dtype in [np.int32, np.int64]:
-            return CDFWriter.CDF_INT4
-        else:
-            # Default to double precision float
-            return CDFWriter.CDF_DOUBLE
-
     def _read_cdf(self, cdf: CDFReader, channels: Optional[List[str]]) -> Stream:
         """
         Read CDF data into an ObsPy Stream.
@@ -747,24 +611,32 @@ class ImagCDFFactory(TimeseriesFactory):
         institution = global_attrs.get("Institution", [""])[0]
         latitude = global_attrs.get("Latitude", [0.0])[0]
         longitude = global_attrs.get("Longitude", [0.0])[0]
-        elevation = global_attrs.get("Elevation", [99_999.0])[
+        elevation = global_attrs.get("Elevation", [self.__FILL_VALUE])[
             0
         ]  # default to 99_999 per technical documents.
-        sensor_sampling_rate = global_attrs.get("SensorSamplingRate", [0.0])[0]
         sensor_orientation = global_attrs.get("VectorSensOrient", [""])[0]
-        data_type = global_attrs.get("DataType", ["variation"])[0]
-        publication_level = global_attrs.get("PublicationLevel", ["1"])[0]
-        comments = global_attrs.get("Comments", [""]) #keep comments as an array
+        publication_level = global_attrs.get("PublicationLevel", ["0"])[0]
+        publication = IMCDFPublicationLevel(publication_level)
         terms_of_use = global_attrs.get("TermsOfUse", [""])[0]
-        declination_base = global_attrs.get("DeclinationBase", [0.0])[0]
-        network = global_attrs.get("Network", [""])[0]
+
+        # Not expected in ImagCDF
+        data_type = (
+            publication.get_imf_data_type()
+            if "imf" in self.output
+            else publication.get_iaga2002_data_type()
+        )
+
+        sensor_sampling_rate = global_attrs.get("SensorSamplingRate", [None])[0]
+        comments = global_attrs.get("Comments", [None])  # keep comments as an array
+        declination_base = global_attrs.get("DeclinationBase", [None])[0]
+        network = global_attrs.get("Network", [None])[0]
 
         # Identify all time variables
         time_vars = {}
         for var in cdf.cdf_info().zVariables:
             if var.endswith("Times"):
                 time_data = cdf.varget(var)
-                unix_times = cdflib.cdfepoch.unixtime(time_data)
+                unix_times = cdfepoch.unixtime(time_data)
                 utc_times = [UTCDateTime(t) for t in unix_times]
                 time_vars[var] = utc_times
 
@@ -830,7 +702,7 @@ class ImagCDFFactory(TimeseriesFactory):
                     delta = 60.0
 
             time_attrs = cdf.varattsget(var)
-            data_interval = time_attrs.get("DATA_INTERVAL_TYPE", [""])
+            data_interval = time_attrs.get("DATA_INTERVAL_TYPE", None)
 
             # Required variable attributes based on ImagCDF standards
             required_variable_attrs = [
@@ -856,29 +728,36 @@ class ImagCDFFactory(TimeseriesFactory):
                 )
                 raise TimeseriesFactoryException(error_message)
 
+            header = {
+                "station": observatory,
+                "channel": channel,
+                "starttime": times[0],
+                "delta": delta,
+                "geodetic_latitude": latitude,
+                "geodetic_longitude": longitude,
+                "elevation": elevation,
+                "sensor_orientation": "".join(sensor_orientation),
+                "data_type": data_type,
+                "station_name": station_name,
+                "agency_name": institution,
+                "conditions_of_use": terms_of_use,
+            }
+            # data points not expected in ImagCDF specs
+            if sensor_sampling_rate is not None:
+                header.update({"sensor_sampling_rate": sensor_sampling_rate})
+            if data_interval is not None:
+                header.update({"data_interval_type": data_interval})
+            if declination_base is not None:
+                header.update({"declination_base": declination_base})
+            if comments is not None:
+                header.update({"filter_comments": comments})
+            if network is not None:
+                header.update({"network": network})
+
             # Create a trace
             trace = Trace(
                 data=data,
-                header={
-                    "station": observatory,
-                    "channel": channel,
-                    "starttime": times[0],
-                    "delta": delta,
-                    "geodetic_latitude": latitude,
-                    "geodetic_longitude": longitude,
-                    "elevation": elevation,
-                    "sensor_orientation": "".join(sensor_orientation),
-                    "data_type": data_type,
-                    "station_name": station_name,
-                    "agency_name": institution,
-                    "conditions_of_use": terms_of_use,
-                    # data points not in a traditional ImagCDF
-                    "sensor_sampling_rate": sensor_sampling_rate,
-                    "data_interval_type": data_interval,
-                    "declination_base": declination_base,
-                    "filter_comments": comments,
-                    "network": network
-                },
+                header=header,
             )
             stream += trace
 
@@ -915,7 +794,7 @@ class ImagCDFFactory(TimeseriesFactory):
         - ImagCDF Technical Documentation: ImagCDF File Names
         """
         # Get the publication level for the type
-        publication_level = IMCDFPublicationLevel(data_type=type).to_string()
+        publication_level = IMCDFPublicationLevel(data_type=type).get_level()
 
         # Format of Date/Time Portion of Filename based on interval see reference: https://tech-man.intermagnet.org/latest/appendices/dataformats.html#example-data-file:~:text=Format%20of%20Date,%EF%83%81
         if interval == "year":
-- 
GitLab


From 3af41f5ce83a46b629d69c1f1d0fb1b0d25254d9 Mon Sep 17 00:00:00 2001
From: Nicholas Shavers <nshavers@contractor.usgs.gov>
Date: Mon, 16 Dec 2024 15:21:29 -0800
Subject: [PATCH 17/30] final tweaks

---
 geomagio/Controller.py             |  1 +
 geomagio/imagcdf/ImagCDFFactory.py | 32 +++++++++++-------------------
 2 files changed, 13 insertions(+), 20 deletions(-)

diff --git a/geomagio/Controller.py b/geomagio/Controller.py
index 826683f1..199befdd 100644
--- a/geomagio/Controller.py
+++ b/geomagio/Controller.py
@@ -573,6 +573,7 @@ def get_input_factory(args):
         elif input_type == "covjson":
             input_factory = covjson.CovJSONFactory(**input_factory_args)
         elif input_type == "imagcdf":
+            input_factory_args["output"] = args.output
             input_factory = ImagCDFFactory(**input_factory_args)
         # wrap stream
         if input_stream is not None:
diff --git a/geomagio/imagcdf/ImagCDFFactory.py b/geomagio/imagcdf/ImagCDFFactory.py
index de58ced1..f25d0c27 100644
--- a/geomagio/imagcdf/ImagCDFFactory.py
+++ b/geomagio/imagcdf/ImagCDFFactory.py
@@ -77,7 +77,9 @@ class ImagCDFFactory(TimeseriesFactory):
             urlInterval=urlInterval,
         )
         self.inputFile = inputFile
-        self.output = output
+        self.output = (
+            output if output in ["iaga2002", "imfjson"] else "iaga2002"
+        )  # determines mapping of publication level to data type
 
     def write_file(self, fh, timeseries: Stream, channels: List[str]):
         # Create a temporary file to write the CDF data
@@ -87,9 +89,9 @@ class ImagCDFFactory(TimeseriesFactory):
         try:
             # Initialize the CDF writer
             cdf_spec = {
-                "Compressed": self.__MAX_COMPRESSION,  # Max Gzip compression (1-9). Almost always the GZIP is the best choice for all data. (CDF User Guide p.23 1.4.3 )
+                "Compressed": self.__MAX_COMPRESSION,  # Max Gzip compression (1-9). Almost always the GZIP is the best choice for all data. (CDF User Guide p.23 1.4.3)
                 "Majority": CDFWriter.ROW_MAJOR,  # The first dimension changes the slowest (CDF User Guide p.45 2.3.15 Majority)
-                "Encoding": CDFWriter.NETWORK_ENCODING,  #  XDR Encoding - If a CDF must be portable between two or more different types of computers use network encoded.
+                "Encoding": CDFWriter.NETWORK_ENCODING,  #  XDR Encoding -Portable between two or more different types of computers.
                 "Checksum": True,  # True for Data Integrity. False for faster writes (optional)
                 "rDim_sizes": [],  # Applicable only if using rVariables - CDF protocol recommends only using zVariables.
             }
@@ -239,7 +241,7 @@ class ImagCDFFactory(TimeseriesFactory):
                     starttime=interval_start,
                     endtime=interval_end,
                 )
-                # Check if the file already exists to merge data
+                # Check if the file already exists
                 if os.path.isfile(url_file):
                     raise TimeseriesFactoryException(
                         f"Error: File '{url_file}' already exists."
@@ -250,7 +252,7 @@ class ImagCDFFactory(TimeseriesFactory):
                     endtime=interval_end,
                     nearest_sample=True,
                     pad=True,
-                    fill_value=self.__FILL_VALUE,  # FILLVAL
+                    fill_value=self.__FILL_VALUE,
                 )
 
                 # Write the data to the CDF file
@@ -366,12 +368,11 @@ class ImagCDFFactory(TimeseriesFactory):
         elevation = getattr(stats, "elevation", None) or self.__FILL_VALUE
         conditions_of_use = getattr(stats, "conditions_of_use", None) or ""
         vector_orientation = getattr(stats, "sensor_orientation", None) or ""
-        data_interval_type = getattr(stats, "data_interval_type", None) or self.interval
         data_type = getattr(stats, "data_type", None) or "variation"
         sensor_sampling_rate = getattr(stats, "sensor_sampling_rate", None) or 0.0
         comments = getattr(stats, "filter_comments", None) or [""]
         declination_base = getattr(stats, "declination_base", None) or 0.0
-        publication_level = IMCDFPublicationLevel(data_type=self.type).get_level()
+        publication_level = IMCDFPublicationLevel(value=self.type).get_level()
         global_attrs = {
             "FormatDescription": {0: "INTERMAGNET CDF Format"},
             "FormatVersion": {0: "1.2"},
@@ -627,7 +628,7 @@ class ImagCDFFactory(TimeseriesFactory):
         )
 
         sensor_sampling_rate = global_attrs.get("SensorSamplingRate", [None])[0]
-        comments = global_attrs.get("Comments", [None])  # keep comments as an array
+        comments = global_attrs.get("Comments", [])  # keep comments as an array
         declination_base = global_attrs.get("DeclinationBase", [None])[0]
         network = global_attrs.get("Network", [None])[0]
 
@@ -666,9 +667,6 @@ class ImagCDFFactory(TimeseriesFactory):
 
             # Determine DEPEND_0 (the time variable name) and validate
             ts_name = attrs.get("DEPEND_0")
-            # if not ts_name:
-            #     # If no DEPEND_0, skip this variable as we cannot map times
-            #     continue
 
             # The ImagCDF can have DataTimes, GeomagneticVectorTimes, GeomagneticScalarTimes, TemperatureNTimes (for N > 0), etc.
             matched_time_key = None
@@ -677,9 +675,6 @@ class ImagCDFFactory(TimeseriesFactory):
                     matched_time_key = tkey
                     break
 
-            # if matched_time_key not in time_vars:
-            #     # If we cannot find the matching time variable, skip this variable
-            #     continue
             times = []
             if matched_time_key in time_vars:
                 times = time_vars[matched_time_key]
@@ -749,7 +744,7 @@ class ImagCDFFactory(TimeseriesFactory):
                 header.update({"data_interval_type": data_interval})
             if declination_base is not None:
                 header.update({"declination_base": declination_base})
-            if comments is not None:
+            if len(comments) > 0:
                 header.update({"filter_comments": comments})
             if network is not None:
                 header.update({"network": network})
@@ -775,8 +770,7 @@ class ImagCDFFactory(TimeseriesFactory):
         Generate the file URL specific to ImagCDF conventions.
 
         This method constructs the filename based on the ImagCDF naming
-        conventions, which include the observatory code, date-time formatted
-        according to the data interval, and the publication level.
+        conventions.
 
         [iaga-code]_[date-time]_[publication-level].cdf
 
@@ -794,9 +788,8 @@ class ImagCDFFactory(TimeseriesFactory):
         - ImagCDF Technical Documentation: ImagCDF File Names
         """
         # Get the publication level for the type
-        publication_level = IMCDFPublicationLevel(data_type=type).get_level()
+        publication_level = IMCDFPublicationLevel(value=type).get_level()
 
-        # Format of Date/Time Portion of Filename based on interval see reference: https://tech-man.intermagnet.org/latest/appendices/dataformats.html#example-data-file:~:text=Format%20of%20Date,%EF%83%81
         if interval == "year":
             date_format = date.strftime("%Y")
         elif interval == "month":
@@ -814,7 +807,6 @@ class ImagCDFFactory(TimeseriesFactory):
                 f"Unsupported interval: {interval}"
             )  # tenhertz currently not supported
 
-        # Filename following ImagCDF convention, see reference: https://tech-man.intermagnet.org/latest/appendices/dataformats.html#imagcdf-file-names
         filename = f"{observatory.lower()}_{date_format}_{publication_level}.cdf"
 
         # If the urlTemplate explicitly specifies 'stdout', return 'stdout'
-- 
GitLab


From 2a6ccab6c4f94a5180db90a39798880537b77a34 Mon Sep 17 00:00:00 2001
From: Nicholas Shavers <nshavers@contractor.usgs.gov>
Date: Thu, 19 Dec 2024 09:21:11 -0800
Subject: [PATCH 18/30] ImagCDF Factory moved into Stream compatible block

---
 geomagio/Controller.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/geomagio/Controller.py b/geomagio/Controller.py
index 199befdd..a5777a2f 100644
--- a/geomagio/Controller.py
+++ b/geomagio/Controller.py
@@ -636,8 +636,6 @@ def get_output_factory(args):
         )
     elif output_type == "plot":
         output_factory = PlotTimeseriesFactory()
-    elif output_type == "imagcdf":
-        output_factory = ImagCDFFactory(**output_factory_args)
     else:
         # stream compatible factories
         if output_type == "binlog":
@@ -668,6 +666,8 @@ def get_output_factory(args):
             )
         elif output_type == "xml":
             output_factory = xml.XMLFactory(**output_factory_args)
+        elif output_type == "imagcdf":
+            output_factory = ImagCDFFactory(**output_factory_args)
         # wrap stream
         if output_stream is not None:
             output_factory = StreamTimeseriesFactory(
-- 
GitLab


From b4123eb149105862aeb6867866d7497a8164b402 Mon Sep 17 00:00:00 2001
From: Nicholas Shavers <nshavers@contractor.usgs.gov>
Date: Thu, 19 Dec 2024 09:45:43 -0800
Subject: [PATCH 19/30] remove /etc/imagcdf path from urlTemplate, urlInterval
 removed from constructor

---
 geomagio/imagcdf/ImagCDFFactory.py | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/geomagio/imagcdf/ImagCDFFactory.py b/geomagio/imagcdf/ImagCDFFactory.py
index f25d0c27..fda30542 100644
--- a/geomagio/imagcdf/ImagCDFFactory.py
+++ b/geomagio/imagcdf/ImagCDFFactory.py
@@ -51,8 +51,7 @@ class ImagCDFFactory(TimeseriesFactory):
         channels: List[str] = ("H", "D", "Z", "F"),
         type: DataType = "variation",
         interval: DataInterval = "minute",
-        urlTemplate="file://etc/imagcdf/{obs}_{dt}_{t}.cdf",
-        urlInterval: int = -1,
+        urlTemplate="file://{obs}_{dt}_{t}.cdf",
         inputFile: Optional[str] = None,
         output: Optional[str] = "iaga2002",
     ):
@@ -74,7 +73,6 @@ class ImagCDFFactory(TimeseriesFactory):
             type=type,
             interval=interval,
             urlTemplate=urlTemplate,
-            urlInterval=urlInterval,
         )
         self.inputFile = inputFile
         self.output = (
@@ -419,9 +417,10 @@ class ImagCDFFactory(TimeseriesFactory):
         nonstandard_times = None
         temperature_times = {}
         temperature_index = 1
-
+        channels = []
         for trace in timeseries:
             channel = trace.stats.channel
+            channels.append(channel)
             times = [
                 (trace.stats.starttime + trace.stats.delta * i).datetime.replace(
                     tzinfo=timezone.utc
@@ -479,8 +478,8 @@ class ImagCDFFactory(TimeseriesFactory):
         last_times = []
 
         self.isUniqueTimes = (
-            len(time_vars) == 1
-        )  # (true if is a single time stamp variable in the file)
+            len(channels) == 1
+        )  # (If a single channel, then use specific naming i.e GeomagneticVectorTimes)
         for index, times in enumerate(time_vars.values()):
             if index > 0:
                 self.isUniqueTimes = not np.array_equal(last_times, times)
@@ -845,7 +844,7 @@ class ImagCDFFactory(TimeseriesFactory):
             base_path = self.urlTemplate[7:]
             if not base_path or base_path == "{obs}_{dt}_{t}.cdf":
                 base_path = os.getcwd()  # Default to current working directory
-                return os.path.join(base_path, "etc", "imagcdf", filename)
+                return os.path.join(base_path,filename)
             return os.path.join(self.urlTemplate, filename)
 
         # Unsupported URL scheme
-- 
GitLab


From 8a9da540d0a47cf0f9413bbca854feeed48e3a39 Mon Sep 17 00:00:00 2001
From: Nicholas Shavers <nshavers@contractor.usgs.gov>
Date: Thu, 19 Dec 2024 11:20:00 -0800
Subject: [PATCH 20/30] necessary tweaks: urlInterval brought back, remove need
 for cdf extension on input-file

---
 geomagio/Controller.py             | 6 +++---
 geomagio/imagcdf/ImagCDFFactory.py | 2 ++
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/geomagio/Controller.py b/geomagio/Controller.py
index a5777a2f..c79cdd16 100644
--- a/geomagio/Controller.py
+++ b/geomagio/Controller.py
@@ -7,8 +7,6 @@ from typing import List, Optional, Tuple, Union
 
 from obspy.core import Stream, UTCDateTime
 
-from geomagio.imagcdf.ImagCDFFactory import ImagCDFFactory
-
 from .algorithm import Algorithm, algorithms, AlgorithmException, FilterAlgorithm
 from .DerivedTimeseriesFactory import DerivedTimeseriesFactory
 from .PlotTimeseriesFactory import PlotTimeseriesFactory
@@ -504,8 +502,10 @@ def get_input_factory(args):
     input_factory_args["output"] = args.output
     # stream/url arguments
     if args.input_file is not None:
-        if input_type in ["netcdf", "miniseed", "imagcdf"]:
+        if input_type in ["netcdf", "miniseed"]:
             input_stream = open(args.input_file, "rb")
+        elif input_type in ["imagcdf"]:
+            input_factory_args["inputFile"] = args.input_file #imagcdf file is binary but lib used accepts a file path
         else:
             input_stream = open(args.input_file, "r")
     elif args.input_stdin:
diff --git a/geomagio/imagcdf/ImagCDFFactory.py b/geomagio/imagcdf/ImagCDFFactory.py
index fda30542..643238bc 100644
--- a/geomagio/imagcdf/ImagCDFFactory.py
+++ b/geomagio/imagcdf/ImagCDFFactory.py
@@ -54,6 +54,7 @@ class ImagCDFFactory(TimeseriesFactory):
         urlTemplate="file://{obs}_{dt}_{t}.cdf",
         inputFile: Optional[str] = None,
         output: Optional[str] = "iaga2002",
+        urlInterval: int= -1
     ):
         """
         Initialize the ImagCDFFactory with default parameters.
@@ -73,6 +74,7 @@ class ImagCDFFactory(TimeseriesFactory):
             type=type,
             interval=interval,
             urlTemplate=urlTemplate,
+            urlInterval=urlInterval
         )
         self.inputFile = inputFile
         self.output = (
-- 
GitLab


From e2f206bca1267db883d13057f704984496c85473 Mon Sep 17 00:00:00 2001
From: Nicholas Shavers <nshavers@contractor.usgs.gov>
Date: Thu, 19 Dec 2024 11:35:42 -0800
Subject: [PATCH 21/30] import aggregation

---
 geomagio/Controller.py             |  6 +++---
 geomagio/imagcdf/ImagCDFFactory.py |  6 +++---
 geomagio/imagcdf/__init__.py       | 10 ++++++++++
 3 files changed, 16 insertions(+), 6 deletions(-)
 create mode 100644 geomagio/imagcdf/__init__.py

diff --git a/geomagio/Controller.py b/geomagio/Controller.py
index c79cdd16..d9b73750 100644
--- a/geomagio/Controller.py
+++ b/geomagio/Controller.py
@@ -26,7 +26,7 @@ from . import vbf
 from . import xml
 from . import covjson
 from . import netcdf
-from geomagio.imagcdf.ImagCDFFactory import ImagCDFFactory
+from . import imagcdf
 
 
 class Controller(object):
@@ -574,7 +574,7 @@ def get_input_factory(args):
             input_factory = covjson.CovJSONFactory(**input_factory_args)
         elif input_type == "imagcdf":
             input_factory_args["output"] = args.output
-            input_factory = ImagCDFFactory(**input_factory_args)
+            input_factory = imagcdf.ImagCDFFactory(**input_factory_args)
         # wrap stream
         if input_stream is not None:
             input_factory = StreamTimeseriesFactory(
@@ -667,7 +667,7 @@ def get_output_factory(args):
         elif output_type == "xml":
             output_factory = xml.XMLFactory(**output_factory_args)
         elif output_type == "imagcdf":
-            output_factory = ImagCDFFactory(**output_factory_args)
+            output_factory = imagcdf.ImagCDFFactory(**output_factory_args)
         # wrap stream
         if output_stream is not None:
             output_factory = StreamTimeseriesFactory(
diff --git a/geomagio/imagcdf/ImagCDFFactory.py b/geomagio/imagcdf/ImagCDFFactory.py
index 643238bc..72a0dc19 100644
--- a/geomagio/imagcdf/ImagCDFFactory.py
+++ b/geomagio/imagcdf/ImagCDFFactory.py
@@ -54,7 +54,7 @@ class ImagCDFFactory(TimeseriesFactory):
         urlTemplate="file://{obs}_{dt}_{t}.cdf",
         inputFile: Optional[str] = None,
         output: Optional[str] = "iaga2002",
-        urlInterval: int= -1
+        urlInterval: int = -1,
     ):
         """
         Initialize the ImagCDFFactory with default parameters.
@@ -74,7 +74,7 @@ class ImagCDFFactory(TimeseriesFactory):
             type=type,
             interval=interval,
             urlTemplate=urlTemplate,
-            urlInterval=urlInterval
+            urlInterval=urlInterval,
         )
         self.inputFile = inputFile
         self.output = (
@@ -846,7 +846,7 @@ class ImagCDFFactory(TimeseriesFactory):
             base_path = self.urlTemplate[7:]
             if not base_path or base_path == "{obs}_{dt}_{t}.cdf":
                 base_path = os.getcwd()  # Default to current working directory
-                return os.path.join(base_path,filename)
+                return os.path.join(base_path, filename)
             return os.path.join(self.urlTemplate, filename)
 
         # Unsupported URL scheme
diff --git a/geomagio/imagcdf/__init__.py b/geomagio/imagcdf/__init__.py
new file mode 100644
index 00000000..e7611ab4
--- /dev/null
+++ b/geomagio/imagcdf/__init__.py
@@ -0,0 +1,10 @@
+"""IO Module for ImagCDF Format
+"""
+
+from __future__ import absolute_import
+
+from .ImagCDFFactory import ImagCDFFactory
+from .IMCDFPublication import IMCDFPublicationLevel
+
+
+__all__ = ["ImagCDFFactory", "IMCDFPublicationLevel"]
-- 
GitLab


From bdc4f7850995bf300cff9c76ef844c5978828368 Mon Sep 17 00:00:00 2001
From: "E. Joshua Rigler" <erigler@usgs.gov>
Date: Mon, 13 Jan 2025 13:59:11 -0700
Subject: [PATCH 22/30] Testing modify MR while not owner

---
 README.md | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 9de81eed..d06d0724 100644
--- a/README.md
+++ b/README.md
@@ -6,9 +6,8 @@
 Geomag Algorithms is an open source library for processing
 Geomagnetic timeseries data. It includes algorithms and input/output factories
 used by the [USGS Geomagnetism Program](http://geomag.usgs.gov) to
-translate between data formats,
-generate derived data and indices in near-realtime,
-and research and develop new algorithms.
+translate between data formats, generate derived data and indices in
+near-realtime, and research and develop new algorithms.
 
 - Accesses USGS data services.
 - Built using established open source python libraries
-- 
GitLab


From 381c535d74088c29d6230dd2de49eb9e84dd8d5c Mon Sep 17 00:00:00 2001
From: "Erin (Josh) Rigler" <erigler@usgs.gov>
Date: Mon, 13 Jan 2025 21:09:42 +0000
Subject: [PATCH 23/30] Testing modify MR while not owner - from Gitlab

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index d06d0724..aef477d8 100644
--- a/README.md
+++ b/README.md
@@ -4,8 +4,8 @@
 [![coverage report](https://code.usgs.gov/ghsc/geomag/geomag-algorithms/badges/master/coverage.svg)](https://code.usgs.gov/ghsc/geomag/geomag-algorithms/-/commits/master)
 
 Geomag Algorithms is an open source library for processing
-Geomagnetic timeseries data. It includes algorithms and input/output factories
-used by the [USGS Geomagnetism Program](http://geomag.usgs.gov) to
+Geomagnetic timeseries data. It includes algorithms and input/output
+factories used by the [USGS Geomagnetism Program](http://geomag.usgs.gov) to
 translate between data formats, generate derived data and indices in
 near-realtime, and research and develop new algorithms.
 
-- 
GitLab


From 39f0e813366232c2c7e3cff84d804a06b7c45ea0 Mon Sep 17 00:00:00 2001
From: Nicholas Shavers <nshavers@contractor.usgs.gov>
Date: Wed, 15 Jan 2025 10:09:01 -0800
Subject: [PATCH 24/30] skip writing a timeseries if channel not in channels
 requested

---
 geomagio/imagcdf/ImagCDFFactory.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/geomagio/imagcdf/ImagCDFFactory.py b/geomagio/imagcdf/ImagCDFFactory.py
index 72a0dc19..414aaf99 100644
--- a/geomagio/imagcdf/ImagCDFFactory.py
+++ b/geomagio/imagcdf/ImagCDFFactory.py
@@ -131,6 +131,8 @@ class ImagCDFFactory(TimeseriesFactory):
             temperature_index = 0
             for trace in timeseries:
                 channel = trace.stats.channel
+                if channels and channel not in channels:
+                    continue
                 var_name = f"GeomagneticField{channel}"
                 # if channel in REAL_TEMPERATURE:
                 #     temperature_index += 1  # MUST INCREMENT INDEX BEFORE USING
-- 
GitLab


From 40f43349fb4de94b82b0989bb10558244f06f5ee Mon Sep 17 00:00:00 2001
From: Nicholas Shavers <nshavers@contractor.usgs.gov>
Date: Wed, 15 Jan 2025 13:08:31 -0800
Subject: [PATCH 25/30] poetry lock

---
 poetry.lock | 52 +++++++++++++++++++++++++++-------------------------
 1 file changed, 27 insertions(+), 25 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index a4aac078..3a6030c1 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -38,13 +38,13 @@ docs = ["sphinx (==7.2.6)", "sphinx-mdinclude (==0.5.3)"]
 
 [[package]]
 name = "alembic"
-version = "1.13.3"
+version = "1.14.0"
 description = "A database migration tool for SQLAlchemy."
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "alembic-1.13.3-py3-none-any.whl", hash = "sha256:908e905976d15235fae59c9ac42c4c5b75cfcefe3d27c0fbf7ae15a37715d80e"},
-    {file = "alembic-1.13.3.tar.gz", hash = "sha256:203503117415561e203aa14541740643a611f641517f0209fcae63e9fa09f1a2"},
+    {file = "alembic-1.14.0-py3-none-any.whl", hash = "sha256:99bd884ca390466db5e27ffccff1d179ec5c05c965cfefc0607e69f9e411cb25"},
+    {file = "alembic-1.14.0.tar.gz", hash = "sha256:b00892b53b3642d0b8dbedba234dbf1924b69be83a9a769d5a624b01094e304b"},
 ]
 
 [package.dependencies]
@@ -711,13 +711,13 @@ files = [
 
 [[package]]
 name = "dparse"
-version = "0.6.3"
+version = "0.6.4"
 description = "A parser for Python dependency files"
 optional = false
-python-versions = ">=3.6"
+python-versions = ">=3.7"
 files = [
-    {file = "dparse-0.6.3-py3-none-any.whl", hash = "sha256:0d8fe18714056ca632d98b24fbfc4e9791d4e47065285ab486182288813a5318"},
-    {file = "dparse-0.6.3.tar.gz", hash = "sha256:27bb8b4bcaefec3997697ba3f6e06b2447200ba273c0b085c3d012a04571b528"},
+    {file = "dparse-0.6.4-py3-none-any.whl", hash = "sha256:fbab4d50d54d0e739fbb4dedfc3d92771003a5b9aa8545ca7a7045e3b174af57"},
+    {file = "dparse-0.6.4.tar.gz", hash = "sha256:90b29c39e3edc36c6284c82c4132648eaf28a01863eb3c231c2512196132201a"},
 ]
 
 [package.dependencies]
@@ -725,18 +725,20 @@ packaging = "*"
 tomli = {version = "*", markers = "python_version < \"3.11\""}
 
 [package.extras]
+all = ["pipenv", "poetry", "pyyaml"]
 conda = ["pyyaml"]
-pipenv = ["pipenv (<=2022.12.19)"]
+pipenv = ["pipenv"]
+poetry = ["poetry"]
 
 [[package]]
 name = "et-xmlfile"
-version = "1.1.0"
+version = "2.0.0"
 description = "An implementation of lxml.xmlfile for the standard library"
 optional = false
-python-versions = ">=3.6"
+python-versions = ">=3.8"
 files = [
-    {file = "et_xmlfile-1.1.0-py3-none-any.whl", hash = "sha256:a2ba85d1d6a74ef63837eed693bcb89c3f752169b0e3e7ae5b16ca5e1b3deada"},
-    {file = "et_xmlfile-1.1.0.tar.gz", hash = "sha256:8eb9e2bc2f8c97e37a2dc85a09ecdcdec9d8a396530a6d5a33b30b9a92da0c5c"},
+    {file = "et_xmlfile-2.0.0-py3-none-any.whl", hash = "sha256:7a91720bc756843502c3b7504c77b8fe44217c85c537d85037f0f536151b2caa"},
+    {file = "et_xmlfile-2.0.0.tar.gz", hash = "sha256:dab3f4764309081ce75662649be815c4c9081e88f0837825f90fd28317d4da54"},
 ]
 
 [[package]]
@@ -1793,13 +1795,13 @@ typing-extensions = ">=3.7.4"
 
 [[package]]
 name = "packaging"
-version = "24.1"
+version = "24.2"
 description = "Core utilities for Python packages"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "packaging-24.1-py3-none-any.whl", hash = "sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124"},
-    {file = "packaging-24.1.tar.gz", hash = "sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002"},
+    {file = "packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759"},
+    {file = "packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f"},
 ]
 
 [[package]]
@@ -2497,23 +2499,23 @@ test = ["asv", "gmpy2", "mpmath", "pooch", "pytest", "pytest-cov", "pytest-timeo
 
 [[package]]
 name = "setuptools"
-version = "75.2.0"
+version = "75.3.0"
 description = "Easily download, build, install, upgrade, and uninstall Python packages"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "setuptools-75.2.0-py3-none-any.whl", hash = "sha256:a7fcb66f68b4d9e8e66b42f9876150a3371558f98fa32222ffaa5bced76406f8"},
-    {file = "setuptools-75.2.0.tar.gz", hash = "sha256:753bb6ebf1f465a1912e19ed1d41f403a79173a9acf66a42e7e6aec45c3c16ec"},
+    {file = "setuptools-75.3.0-py3-none-any.whl", hash = "sha256:f2504966861356aa38616760c0f66568e535562374995367b4e69c7143cf6bcd"},
+    {file = "setuptools-75.3.0.tar.gz", hash = "sha256:fba5dd4d766e97be1b1681d98712680ae8f2f26d7881245f2ce9e40714f1a686"},
 ]
 
 [package.extras]
 check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1)", "ruff (>=0.5.2)"]
-core = ["importlib-metadata (>=6)", "importlib-resources (>=5.10.2)", "jaraco.collections", "jaraco.functools", "jaraco.text (>=3.7)", "more-itertools", "more-itertools (>=8.8)", "packaging", "packaging (>=24)", "platformdirs (>=2.6.2)", "tomli (>=2.0.1)", "wheel (>=0.43.0)"]
+core = ["importlib-metadata (>=6)", "importlib-resources (>=5.10.2)", "jaraco.collections", "jaraco.functools", "jaraco.text (>=3.7)", "more-itertools", "more-itertools (>=8.8)", "packaging", "packaging (>=24)", "platformdirs (>=4.2.2)", "tomli (>=2.0.1)", "wheel (>=0.43.0)"]
 cover = ["pytest-cov"]
 doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "pyproject-hooks (!=1.1)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier", "towncrier (<24.7)"]
 enabler = ["pytest-enabler (>=2.2)"]
-test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "jaraco.test", "packaging (>=23.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-home (>=0.5)", "pytest-perf", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel (>=0.44.0)"]
-type = ["importlib-metadata (>=7.0.2)", "jaraco.develop (>=7.21)", "mypy (==1.11.*)", "pytest-mypy"]
+test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "jaraco.test (>=5.5)", "packaging (>=23.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-home (>=0.5)", "pytest-perf", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel (>=0.44.0)"]
+type = ["importlib-metadata (>=7.0.2)", "jaraco.develop (>=7.21)", "mypy (==1.12.*)", "pytest-mypy"]
 
 [[package]]
 name = "six"
@@ -2643,13 +2645,13 @@ SQLAlchemy = ">=0.9.0"
 
 [[package]]
 name = "starlette"
-version = "0.41.0"
+version = "0.41.3"
 description = "The little ASGI library that shines."
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "starlette-0.41.0-py3-none-any.whl", hash = "sha256:a0193a3c413ebc9c78bff1c3546a45bb8c8bcb4a84cae8747d650a65bd37210a"},
-    {file = "starlette-0.41.0.tar.gz", hash = "sha256:39cbd8768b107d68bfe1ff1672b38a2c38b49777de46d2a592841d58e3bf7c2a"},
+    {file = "starlette-0.41.3-py3-none-any.whl", hash = "sha256:44cedb2b7c77a9de33a8b74b2b90e9f50d11fcf25d8270ea525ad71a25374ff7"},
+    {file = "starlette-0.41.3.tar.gz", hash = "sha256:0e4ab3d16522a255be6b28260b938eae2482f98ce5cc934cb08dce8dc3ba5835"},
 ]
 
 [package.dependencies]
@@ -3064,4 +3066,4 @@ pycurl = ["pycurl"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.8,<3.12"
-content-hash = "de2ec42ee0160626455b58e0b8f67e2669ad3ef5e13d58bc4500d0b8a1643cb8"
+content-hash = "326ddbf7328f54b06ea3cd323053134836fb707d18599929a23a3bd2abf06ade"
-- 
GitLab


From ad5f8ed38e4dfd4b62ecc0702313e677fcf22fa5 Mon Sep 17 00:00:00 2001
From: Nicholas Shavers <nshavers@contractor.usgs.gov>
Date: Thu, 16 Jan 2025 13:54:35 -0800
Subject: [PATCH 26/30] needs createParentDir set to true

---
 geomagio/imagcdf/ImagCDFFactory.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/geomagio/imagcdf/ImagCDFFactory.py b/geomagio/imagcdf/ImagCDFFactory.py
index 414aaf99..914bba78 100644
--- a/geomagio/imagcdf/ImagCDFFactory.py
+++ b/geomagio/imagcdf/ImagCDFFactory.py
@@ -238,7 +238,7 @@ class ImagCDFFactory(TimeseriesFactory):
             # Handle 'file://' output
             elif url.startswith("file://"):
                 # Get the file path from the URL
-                url_file = Util.get_file_from_url(url, createParentDirectory=False)
+                url_file = Util.get_file_from_url(url, createParentDirectory=True)
                 url_data = timeseries.slice(
                     starttime=interval_start,
                     endtime=interval_end,
-- 
GitLab


From 351333dd24dd1e6a3afa5ed7c8b054c23998c206 Mon Sep 17 00:00:00 2001
From: Nicholas Shavers <nshavers@contractor.usgs.gov>
Date: Thu, 16 Jan 2025 14:31:48 -0800
Subject: [PATCH 27/30] expand allowed elements - to any. units set to known,
 validmin/max set to default.

---
 geomagio/imagcdf/ImagCDFFactory.py | 27 +++++++++++++++++----------
 1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/geomagio/imagcdf/ImagCDFFactory.py b/geomagio/imagcdf/ImagCDFFactory.py
index 914bba78..b68f03d5 100644
--- a/geomagio/imagcdf/ImagCDFFactory.py
+++ b/geomagio/imagcdf/ImagCDFFactory.py
@@ -506,7 +506,8 @@ class ImagCDFFactory(TimeseriesFactory):
     ) -> dict:
         channel = trace.stats.channel.upper()
         fieldnam = f"Geomagnetic Field Element {channel}"  # â€œGeomagnetic Field Element â€ + the element code or â€œTemperature â€ + the name of the location where the temperature was recorded.
-        units = ""  # Must be one of â€œnTâ€, â€œDegrees of arcâ€ or â€œCelsiusâ€
+        units = ""  # Should be one of â€œnTâ€, â€œDegrees of arcâ€ or â€œCelsiusâ€ to adhere to standard.
+        depend_0 = ""
         if channel == "D":
             units = "Degrees of arc"
             validmin = -360.0
@@ -535,16 +536,22 @@ class ImagCDFFactory(TimeseriesFactory):
             units = "nT"
             validmin = -79_999.0
             validmax = 79_999.0
+        else:
+            units = "Unknown"  # unknown or possibly assume to be nT
+            validmin = -79_999  # largest valid min for all other elements
+            validmax = 79_999  # largest valid max for all other elements
+            depend_0 = "DataTimes"  # can be used for nonstandard element
 
-        # Determine DEPEND_0 based on channel type
-        if not isUniqueTimes:
-            depend_0 = "DataTimes"
-        elif channel in self._get_vector_elements():
-            depend_0 = "GeomagneticVectorTimes"
-        elif channel in self._get_scalar_elements():
-            depend_0 = "GeomagneticScalarTimes"
-        # elif channel in REAL_TEMPERATURES:
-        #     depend_0 = f"Temperature{temperature_index}Times"
+        # Determine DEPEND_0 based on channel type if necessary
+        if not depend_0:
+            if not isUniqueTimes:
+                depend_0 = "DataTimes"
+            elif channel in self._get_vector_elements():
+                depend_0 = "GeomagneticVectorTimes"
+            elif channel in self._get_scalar_elements():
+                depend_0 = "GeomagneticScalarTimes"
+            # elif channel in REAL_TEMPERATURES:
+            #     depend_0 = f"Temperature{temperature_index}Times"
 
         var_attrs = {
             "FIELDNAM": fieldnam,
-- 
GitLab


From 61f1cac360d3aea55f4720f8d180f0385ac912b4 Mon Sep 17 00:00:00 2001
From: Nicholas Shavers <nshavers@contractor.usgs.gov>
Date: Fri, 17 Jan 2025 11:26:03 -0800
Subject: [PATCH 28/30] lint + rebasing

---
 geomagio/Controller.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/geomagio/Controller.py b/geomagio/Controller.py
index d9b73750..97d46793 100644
--- a/geomagio/Controller.py
+++ b/geomagio/Controller.py
@@ -505,7 +505,9 @@ def get_input_factory(args):
         if input_type in ["netcdf", "miniseed"]:
             input_stream = open(args.input_file, "rb")
         elif input_type in ["imagcdf"]:
-            input_factory_args["inputFile"] = args.input_file #imagcdf file is binary but lib used accepts a file path
+            input_factory_args["inputFile"] = (
+                args.input_file
+            )  # imagcdf file is binary but lib used accepts a file path
         else:
             input_stream = open(args.input_file, "r")
     elif args.input_stdin:
@@ -841,7 +843,7 @@ def parse_args(args):
             "xml",
             "covjson",
             "netcdf",
-            "imagcdf"
+            "imagcdf",
         ),
         default="edge",
         help='Input format (Default "edge")',
-- 
GitLab


From f086538c963096ea320edb8dae7cc168197de4fa Mon Sep 17 00:00:00 2001
From: Nicholas Shavers <nshavers@contractor.usgs.gov>
Date: Fri, 17 Jan 2025 11:28:37 -0800
Subject: [PATCH 29/30] poetry lock

---
 poetry.lock | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/poetry.lock b/poetry.lock
index 3a6030c1..05a9e1d1 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -3066,4 +3066,4 @@ pycurl = ["pycurl"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.8,<3.12"
-content-hash = "326ddbf7328f54b06ea3cd323053134836fb707d18599929a23a3bd2abf06ade"
+content-hash = "517a488d608ab52b45699d4605d4f2c4f9dd57533102a42e026049be71b9a3ce"
-- 
GitLab


From 0a91ddd97d51e92b9eac6c8ab1cc8b7069830ff5 Mon Sep 17 00:00:00 2001
From: Nicholas Shavers <nshavers@contractor.usgs.gov>
Date: Fri, 17 Jan 2025 13:25:31 -0800
Subject: [PATCH 30/30] small bug fixes: dupe code line + import of
 DerivedTimeseriesFactory

---
 geomagio/Controller.py  | 1 -
 geomagio/api/ws/data.py | 3 ++-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/geomagio/Controller.py b/geomagio/Controller.py
index 97d46793..23661817 100644
--- a/geomagio/Controller.py
+++ b/geomagio/Controller.py
@@ -499,7 +499,6 @@ def get_input_factory(args):
     input_factory_args["observatory"] = args.observatory
     input_factory_args["type"] = args.type
     input_type = args.input
-    input_factory_args["output"] = args.output
     # stream/url arguments
     if args.input_file is not None:
         if input_type in ["netcdf", "miniseed"]:
diff --git a/geomagio/api/ws/data.py b/geomagio/api/ws/data.py
index 1eb735fd..c2bc9227 100644
--- a/geomagio/api/ws/data.py
+++ b/geomagio/api/ws/data.py
@@ -5,7 +5,8 @@ from fastapi import APIRouter, Depends, Query, Request
 from obspy import UTCDateTime, Stream
 from starlette.responses import Response
 
-from ... import DerivedTimeseriesFactory, TimeseriesFactory, TimeseriesUtility
+from ... import TimeseriesFactory, TimeseriesUtility
+from ...DerivedTimeseriesFactory import DerivedTimeseriesFactory
 from ...edge import EdgeFactory, FDSNFactory, MiniSeedFactory
 from ...iaga2002 import IAGA2002Writer
 from ...imfjson import IMFJSONWriter
-- 
GitLab