From 7d8c33375fe947b811302cb9ed3176e313af0053 Mon Sep 17 00:00:00 2001
From: Nicholas Shavers <nshavers@contractor.usgs.gov>
Date: Mon, 30 Dec 2024 17:30:14 -0800
Subject: [PATCH 1/8] netcdf poc, works

---
 geomagio/Controller.py           |   8 +
 geomagio/netcdf/NetCDFFactory.py | 259 +++++++++++++++++++++++++++++++
 2 files changed, 267 insertions(+)
 create mode 100644 geomagio/netcdf/NetCDFFactory.py

diff --git a/geomagio/Controller.py b/geomagio/Controller.py
index dce6b919..6b7c336f 100644
--- a/geomagio/Controller.py
+++ b/geomagio/Controller.py
@@ -7,6 +7,8 @@ from typing import List, Optional, Tuple, Union
 
 from obspy.core import Stream, UTCDateTime
 
+from geomagio.netcdf.NetCDFFactory import NetCDFFactory
+
 from .algorithm import Algorithm, algorithms, AlgorithmException, FilterAlgorithm
 from .DerivedTimeseriesFactory import DerivedTimeseriesFactory
 from .PlotTimeseriesFactory import PlotTimeseriesFactory
@@ -547,6 +549,8 @@ def get_input_factory(args):
         # stream compatible factories
         if input_type == "iaga2002":
             input_factory = iaga2002.IAGA2002Factory(**input_factory_args)
+        if input_type == "netcdf":
+            input_factory = NetCDFFactory(**input_factory_args)
         elif input_type == "imfv122":
             input_factory = imfv122.IMFV122Factory(**input_factory_args)
         elif input_type == "imfv283":
@@ -632,6 +636,8 @@ def get_output_factory(args):
             output_factory = binlog.BinLogFactory(**output_factory_args)
         elif output_type == "iaga2002":
             output_factory = iaga2002.IAGA2002Factory(**output_factory_args)
+        elif output_type == "netcdf":
+            output_factory = NetCDFFactory(**output_factory_args)
         elif output_type == "imfjson":
             output_factory = imfjson.IMFJSONFactory(**output_factory_args)
         elif output_type == "covjson":
@@ -826,6 +832,7 @@ def parse_args(args):
             "pcdcp",
             "xml",
             "covjson",
+            "netcdf",
         ),
         default="edge",
         help='Input format (Default "edge")',
@@ -1032,6 +1039,7 @@ def parse_args(args):
             "vbf",
             "xml",
             "covjson",
+            "netcdf",
         ),
         # TODO: set default to 'iaga2002'
         help="Output format",
diff --git a/geomagio/netcdf/NetCDFFactory.py b/geomagio/netcdf/NetCDFFactory.py
new file mode 100644
index 00000000..4c877e7b
--- /dev/null
+++ b/geomagio/netcdf/NetCDFFactory.py
@@ -0,0 +1,259 @@
+import netCDF4
+import numpy as np
+from obspy import Stream, Trace, UTCDateTime
+from datetime import datetime, timezone
+from io import BytesIO
+
+from geomagio.TimeseriesFactory import TimeseriesFactory
+
+
+class NetCDFFactory(TimeseriesFactory):
+    """Factory for reading/writing NetCDF format Geomagnetic Data using numeric epoch times."""
+
+    def __init__(self, **kwargs):
+        """
+        Initializes the NetCDFFactory.
+
+        Parameters
+        ----------
+        **kwargs : dict
+            Additional keyword arguments for the base TimeseriesFactory.
+        """
+        super().__init__(**kwargs)
+        self.time_format = "numeric"  # Fixed to numeric epoch times
+
+    def parse_string(self, data: bytes, **kwargs):
+        """
+        Parse a NetCDF byte string into an ObsPy Stream.
+
+        The NetCDF file should follow a structure with global attributes for metadata
+        and variables for each geomagnetic component, including a 'time' variable
+        representing epoch times in seconds.
+
+        Parameters
+        ----------
+        data : bytes
+            Byte content of the NetCDF file.
+
+        Returns
+        -------
+        Stream
+            An ObsPy Stream object containing the parsed data. Returns an empty Stream if parsing fails.
+        """
+        try:
+            # Create a NetCDF dataset from the byte data
+            nc_dataset = netCDF4.Dataset("inmemory", mode="r", memory=data)
+        except Exception as e:
+            print(f"Failed to parse NetCDF data: {e}")
+            return Stream()
+
+        try:
+            # Extract global attributes
+            institute = getattr(nc_dataset, "institute_name", "")
+            observatory = getattr(nc_dataset, "observatory_name", "")
+            observatory_code = getattr(nc_dataset, "observatory_code", "")
+            latitude = getattr(nc_dataset, "sensor_latitude", 0.0)
+            longitude = getattr(nc_dataset, "sensor_longitude", 0.0)
+            elevation = getattr(nc_dataset, "sensor_elevation", 0.0)
+            sensor_orientation = getattr(nc_dataset, "sensor_orientation", "")
+            original_sample_rate = getattr(nc_dataset, "original_sample_rate", "")
+            data_type = getattr(nc_dataset, "data_type", "")
+            start_time_num = getattr(
+                nc_dataset, "start_time", 0.0
+            )  # Seconds since epoch
+            sample_period = getattr(nc_dataset, "sample_period", 1.0)  # In milliseconds
+
+            # Convert numeric start time to UTCDateTime
+            try:
+                starttime = UTCDateTime(start_time_num)
+            except Exception:
+                starttime = UTCDateTime()
+
+            # Extract time variable
+            if "time" not in nc_dataset.variables:
+                print("No 'time' variable found in NetCDF data.")
+                return Stream()
+
+            time_var = nc_dataset.variables["time"]
+            times_epoch = time_var[:]  # Numeric epoch times in seconds
+
+            # Extract channel data
+            channel_names = [var for var in nc_dataset.variables if var != "time"]
+            channel_data = {}
+            for ch in channel_names:
+                data_array = nc_dataset.variables[ch][:]
+                # Ensure data is a 1D array
+                channel_data[ch] = data_array.flatten()
+
+            nc_dataset.close()
+        except Exception as e:
+            print(f"Error while extracting data from NetCDF: {e}")
+            return Stream()
+
+        # Create Stream and Traces
+        stream = Stream()
+        npts = len(times_epoch)
+        if npts == 0:
+            print("No time points found in NetCDF data.")
+            return stream
+
+        # Calculate delta as the average sampling interval
+        if npts > 1:
+            delta = (times_epoch[-1] - times_epoch[0]) / (npts - 1)
+        else:
+            delta = 1.0  # Default to 1 second if only one point
+
+        for ch_name, values in channel_data.items():
+            if len(values) != npts:
+                print(
+                    f"Channel '{ch_name}' has mismatched data length. Expected {npts}, got {len(values)}. Skipping."
+                )
+                continue
+            data_array = np.array(values, dtype=float)
+            try:
+                trace_starttime = UTCDateTime(times_epoch[0])
+            except Exception:
+                trace_starttime = UTCDateTime()
+
+            stats = {
+                "network": "",
+                "station": observatory_code,
+                "channel": ch_name,
+                "starttime": trace_starttime,
+                "npts": npts,
+                "sampling_rate": 1.0 / delta if delta != 0 else 1.0,
+                "geodetic_longitude": longitude,
+                "geodetic_latitude": latitude,
+                "elevation": elevation,
+                "station_name": observatory,
+                "data_type": data_type,
+                "data_interval_type": original_sample_rate,
+            }
+
+            trace = Trace(data=data_array, header=stats)
+            stream += trace
+
+        return stream
+
+    def write_file_from_buffer(self, fh, timeseries: Stream, channels=None):
+        """
+        Write an ObsPy Stream to a NetCDF file.
+
+        The NetCDF file will contain global attributes for metadata and variables
+        for each geomagnetic component, including a 'time' variable representing
+        epoch times in seconds.
+
+        Parameters
+        ----------
+        fh : file-like object
+            File handle to write the NetCDF data.
+        timeseries : Stream
+            ObsPy Stream object containing the data to write.
+        channels : list, optional
+            List of channel names to include. If None, all channels are included.
+        """
+        if not timeseries or len(timeseries) == 0:
+            # Create an empty NetCDF structure
+            with netCDF4.Dataset(fh, "w", format="NETCDF4") as nc_dataset:
+                nc_dataset.description = "Empty Geomagnetic Time Series Data"
+            return
+
+        timeseries.merge()
+
+        # Filter channels if specified
+        if channels:
+            # Ensure channel names are uppercase for consistency
+            channels = [ch.upper() for ch in channels]
+            # Manually filter the Stream to include only desired channels
+            timeseries = Stream(
+                [tr for tr in timeseries if tr.stats.channel.upper() in channels]
+            )
+
+        if len(timeseries) == 0:
+            print("No matching channels found after filtering.")
+            with netCDF4.Dataset(fh, "w", format="NETCDF4") as nc_dataset:
+                nc_dataset.description = (
+                    "Empty Geomagnetic Time Series Data after channel filtering"
+                )
+            return
+
+        timeseries.sort(keys=["starttime"])
+        tr = timeseries[0]
+        stats = tr.stats
+
+        station = stats.station or ""
+        lat = float(getattr(stats, "geodetic_latitude", 0.0))
+        lon = float(getattr(stats, "geodetic_longitude", 0.0))
+        alt = float(getattr(stats, "elevation", 0.0))
+        data_type = getattr(stats, "data_type", "unknown")
+        data_interval_type = getattr(stats, "data_interval_type", "unknown")
+        station_name = getattr(stats, "station_name", station)
+        sensor_orientation = getattr(stats, "sensor_orientation", "")
+        original_sample_rate = getattr(stats, "original_sample_rate", "")
+        sample_period = getattr(stats, "sampling_period", 1.0)  # In milliseconds
+
+        npts = tr.stats.npts
+        delta = tr.stats.delta
+        starttime = tr.stats.starttime
+
+        # Generate time values as epoch seconds
+        times_epoch = np.array([starttime.timestamp + i * delta for i in range(npts)])
+
+        # Create NetCDF dataset
+        with netCDF4.Dataset(fh, "w", format="NETCDF4") as nc_dataset:
+            # Define the time dimension
+            nc_dataset.createDimension("time", npts)
+
+            # Create the time variable
+            time_var = nc_dataset.createVariable("time", "f8", ("time",))
+            time_var.units = "seconds since 1970-01-01T00:00:00Z"
+            time_var.calendar = "standard"
+            time_var[:] = times_epoch
+
+            # Create channel variables
+            for trace in timeseries:
+                ch_name = trace.stats.channel.upper()
+                # Ensure channel name is a valid NetCDF variable name
+                ch_name_nc = ch_name.replace("-", "_").replace(" ", "_")
+                var = nc_dataset.createVariable(ch_name_nc, "f8", ("time",))
+                var.units = (
+                    "nT"  # non standard elements such as temperature not considered.
+                )
+                var.long_name = f"Geomagnetic component {ch_name}"
+                var[:] = trace.data
+
+            # Set global attributes
+            nc_dataset.institute_name = getattr(stats, "agency_name", "")
+            nc_dataset.observatory_name = station_name
+            nc_dataset.observatory_code = station
+            nc_dataset.sensor_latitude = lat
+            nc_dataset.sensor_longitude = lon
+            nc_dataset.sensor_elevation = alt
+            nc_dataset.sensor_orientation = sensor_orientation
+            nc_dataset.original_sample_rate = original_sample_rate
+            nc_dataset.data_type = data_type
+            nc_dataset.start_time = starttime.timestamp  # Numeric epoch time
+            nc_dataset.sample_period = sample_period  # In milliseconds
+
+            # Optional global attributes
+            nc_dataset.history = f"Created on {datetime.now(timezone.utc).isoformat()}"
+
+    def write_file(self, fh, timeseries: Stream, channels=None):
+        import tempfile
+        import shutil
+        import os
+
+        # Create a temporary file
+        with tempfile.NamedTemporaryFile(delete=False) as tmp:
+            tmp_filepath = tmp.name
+
+        try:
+            # Write to the temporary file using the existing write_file method
+            self.write_file_from_buffer(tmp_filepath, timeseries, channels)
+
+            # Read the temporary file content
+            with open(tmp_filepath, "rb") as tmp:
+                shutil.copyfileobj(tmp, fh)
+        finally:
+            # Clean up the temporary file
+            os.remove(tmp_filepath)
-- 
GitLab


From e0ee44a5772d6aa58e9cc4030d556498a5e429a1 Mon Sep 17 00:00:00 2001
From: Nicholas Shavers <nshavers@contractor.usgs.gov>
Date: Mon, 30 Dec 2024 17:35:51 -0800
Subject: [PATCH 2/8] netcdf poc poetry.lock files

---
 poetry.lock    | 95 ++++++++++++++++++++++++++++++++++++++++++++++++++
 pyproject.toml |  1 +
 2 files changed, 96 insertions(+)

diff --git a/poetry.lock b/poetry.lock
index 5946b52d..ad3771c9 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -243,6 +243,54 @@ files = [
 [package.dependencies]
 pycparser = "*"
 
+[[package]]
+name = "cftime"
+version = "1.6.4.post1"
+description = "Time-handling functionality from netcdf4-python"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "cftime-1.6.4.post1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0baa9bc4850929da9f92c25329aa1f651e2d6f23e237504f337ee9e12a769f5d"},
+    {file = "cftime-1.6.4.post1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6bb6b087f4b2513c37670bccd457e2a666ca489c5f2aad6e2c0e94604dc1b5b9"},
+    {file = "cftime-1.6.4.post1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7d9bdeb9174962c9ca00015190bfd693de6b0ec3ec0b3dbc35c693a4f48efdcc"},
+    {file = "cftime-1.6.4.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e735cfd544878eb94d0108ff5a093bd1a332dba90f979a31a357756d609a90d5"},
+    {file = "cftime-1.6.4.post1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:1dcd1b140bf50da6775c56bd7ca179e84bd258b2f159b53eefd5c514b341f2bf"},
+    {file = "cftime-1.6.4.post1-cp310-cp310-win_amd64.whl", hash = "sha256:e60b8f24b20753f7548f410f7510e28b941f336f84bd34e3cfd7874af6e70281"},
+    {file = "cftime-1.6.4.post1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1bf7be0a0afc87628cb8c8483412aac6e48e83877004faa0936afb5bf8a877ba"},
+    {file = "cftime-1.6.4.post1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:0f64ca83acc4e3029f737bf3a32530ffa1fbf53124f5bee70b47548bc58671a7"},
+    {file = "cftime-1.6.4.post1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7ebdfd81726b0cfb8b524309224fa952898dfa177c13d5f6af5b18cefbf497d"},
+    {file = "cftime-1.6.4.post1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c9ea0965a4c87739aebd84fe8eed966e5809d10065eeffd35c99c274b6f8da15"},
+    {file = "cftime-1.6.4.post1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:800a18aea4e8cb2b206450397cb8a53b154798738af3cdd3c922ce1ca198b0e6"},
+    {file = "cftime-1.6.4.post1-cp311-cp311-win_amd64.whl", hash = "sha256:5dcfc872f455db1f12eabe3c3ba98e93757cd60ed3526a53246e966ccde46c8a"},
+    {file = "cftime-1.6.4.post1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:a590f73506f4704ba5e154ef55bfbaed5e1b4ac170f3caeb8c58e4f2c619ee4e"},
+    {file = "cftime-1.6.4.post1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:933cb10e1af4e362e77f513e3eb92b34a688729ddbf938bbdfa5ac20a7f44ba0"},
+    {file = "cftime-1.6.4.post1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cf17a1b36f62e9e73c4c9363dd811e1bbf1170f5ac26d343fb26012ccf482908"},
+    {file = "cftime-1.6.4.post1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8e18021f421aa26527bad8688c1acf0c85fa72730beb6efce969c316743294f2"},
+    {file = "cftime-1.6.4.post1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:5835b9d622f9304d1c23a35603a0f068739f428d902860f25e6e7e5a1b7cd8ea"},
+    {file = "cftime-1.6.4.post1-cp312-cp312-win_amd64.whl", hash = "sha256:7f50bf0d1b664924aaee636eb2933746b942417d1f8b82ab6c1f6e8ba0da6885"},
+    {file = "cftime-1.6.4.post1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5c89766ebf088c097832ea618c24ed5075331f0b7bf8e9c2d4144aefbf2f1850"},
+    {file = "cftime-1.6.4.post1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7f27113f7ccd1ca32881fdcb9a4bec806a5f54ae621fc1c374f1171f3ed98ef2"},
+    {file = "cftime-1.6.4.post1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:da367b23eea7cf4df071c88e014a1600d6c5bbf22e3393a4af409903fa397e28"},
+    {file = "cftime-1.6.4.post1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6579c5c83cdf09d73aa94c7bc34925edd93c5f2c7dd28e074f568f7e376271a0"},
+    {file = "cftime-1.6.4.post1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6b731c7133d17b479ca0c3c46a7a04f96197f0a4d753f4c2284c3ff0447279b4"},
+    {file = "cftime-1.6.4.post1-cp313-cp313-win_amd64.whl", hash = "sha256:d2a8c223faea7f1248ab469cc0d7795dd46f2a423789038f439fee7190bae259"},
+    {file = "cftime-1.6.4.post1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9df3e2d49e548c62d1939e923800b08d2ab732d3ac8d75b857edd7982c878552"},
+    {file = "cftime-1.6.4.post1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:2892b7e7654142d825655f60eb66c3e1af745901890316907071d44cf9a18d8a"},
+    {file = "cftime-1.6.4.post1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a4ab54e6c04e68395d454cd4001188fc4ade2fe48035589ed65af80c4527ef08"},
+    {file = "cftime-1.6.4.post1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:568b69fc52f406e361db62a4d7a219c6fb0ced138937144c3b3a511648dd6c50"},
+    {file = "cftime-1.6.4.post1-cp38-cp38-win_amd64.whl", hash = "sha256:640911d2629f4a8f81f6bc0163a983b6b94f86d1007449b8cbfd926136cda253"},
+    {file = "cftime-1.6.4.post1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:44e9f8052600803b55f8cb6bcac2be49405c21efa900ec77a9fb7f692db2f7a6"},
+    {file = "cftime-1.6.4.post1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a90b6ef4a3fc65322c212a2c99cec75d1886f1ebaf0ff6189f7b327566762222"},
+    {file = "cftime-1.6.4.post1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:652700130dbcca3ae36dbb5b61ff360e62aa09fabcabc42ec521091a14389901"},
+    {file = "cftime-1.6.4.post1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:24a7fb6cc541a027dab37fdeb695f8a2b21cd7d200be606f81b5abc38f2391e2"},
+    {file = "cftime-1.6.4.post1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:fc2c0abe2dbd147e1b1e6d0f3de19a5ea8b04956acc204830fd8418066090989"},
+    {file = "cftime-1.6.4.post1-cp39-cp39-win_amd64.whl", hash = "sha256:0ee2f5af8643aa1b47b7e388763a1a6e0dc05558cd2902cffb9cbcf954397648"},
+    {file = "cftime-1.6.4.post1.tar.gz", hash = "sha256:50ac76cc9f10ab7bd46e44a71c51a6927051b499b4407df4f29ab13d741b942f"},
+]
+
+[package.dependencies]
+numpy = {version = ">1.13.3", markers = "python_version < \"3.12.0.rc1\""}
+
 [[package]]
 name = "charset-normalizer"
 version = "3.4.1"
@@ -1570,6 +1618,53 @@ files = [
     {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"},
 ]
 
+[[package]]
+name = "netcdf4"
+version = "1.7.2"
+description = "Provides an object-oriented python interface to the netCDF version 4 library"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "netCDF4-1.7.2-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:5e9b485e3bd9294d25ff7dc9addefce42b3d23c1ee7e3627605277d159819392"},
+    {file = "netCDF4-1.7.2-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:118b476fd00d7e3ab9aa7771186d547da645ae3b49c0c7bdab866793ebf22f07"},
+    {file = "netCDF4-1.7.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:abe5b1837ff209185ecfe50bd71884c866b3ee69691051833e410e57f177e059"},
+    {file = "netCDF4-1.7.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:28021c7e886e5bccf9a8ce504c032d1d7f98d86f67495fb7cf2c9564eba04510"},
+    {file = "netCDF4-1.7.2-cp310-cp310-win_amd64.whl", hash = "sha256:7460b638e41c8ce4179d082a81cb6456f0ce083d4d959f4d9e87a95cd86f64cb"},
+    {file = "netCDF4-1.7.2-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:09d61c2ddb6011afb51e77ea0f25cd0bdc28887fb426ffbbc661d920f20c9749"},
+    {file = "netCDF4-1.7.2-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:fd2a16dbddeb8fa7cf48c37bfc1967290332f2862bb82f984eec2007bb120aeb"},
+    {file = "netCDF4-1.7.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f54f5d39ffbcf1726a1e6fd90cb5fa74277ecea739a5fa0f424636d71beafe24"},
+    {file = "netCDF4-1.7.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:902aa50d70f49d002d896212a171d344c38f7b8ca520837c56c922ac1535c4a3"},
+    {file = "netCDF4-1.7.2-cp311-cp311-win_amd64.whl", hash = "sha256:3291f9ad0c98c49a4dd16aefad1a9abd3a1b884171db6c81bdcee94671cfabe3"},
+    {file = "netCDF4-1.7.2-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:e73e3baa0b74afc414e53ff5095748fdbec7fb346eda351e567c23f2f0d247f1"},
+    {file = "netCDF4-1.7.2-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:a51da09258b31776f474c1d47e484fc7214914cdc59edf4cee789ba632184591"},
+    {file = "netCDF4-1.7.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb95b11804fe051897d1f2044b05d82a1847bc2549631cdd2f655dde7de77a9c"},
+    {file = "netCDF4-1.7.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f9d8a848373723f41ef662590b4f5e1832227501c9fd4513e8ad8da58c269977"},
+    {file = "netCDF4-1.7.2-cp312-cp312-win_amd64.whl", hash = "sha256:568ea369e00b581302d77fc5fd0b8f78e520c7e08d0b5af5219ba51f3f1cd694"},
+    {file = "netCDF4-1.7.2-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:205a5f1de3ddb993c7c97fb204a923a22408cc2e5facf08d75a8eb89b3e7e1a8"},
+    {file = "netCDF4-1.7.2-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:96653fc75057df196010818367c63ba6d7e9af603df0a7fe43fcdad3fe0e9e56"},
+    {file = "netCDF4-1.7.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:30d20e56b9ba2c48884eb89c91b63e6c0612b4927881707e34402719153ef17f"},
+    {file = "netCDF4-1.7.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8d6bfd38ba0bde04d56f06c1554714a2ea9dab75811c89450dc3ec57a9d36b80"},
+    {file = "netCDF4-1.7.2-cp313-cp313-win_amd64.whl", hash = "sha256:5c5fbee6134ee1246c397e1508e5297d825aa19221fdf3fa8dc9727ad824d7a5"},
+    {file = "netCDF4-1.7.2-cp38-cp38-macosx_12_0_x86_64.whl", hash = "sha256:6bf402c2c7c063474576e5cf89af877d0b0cd097d9316d5bc4fcb22b62f12567"},
+    {file = "netCDF4-1.7.2-cp38-cp38-macosx_14_0_arm64.whl", hash = "sha256:5bdf3b34e6fd4210e34fdc5d1a669a22c4863d96f8a20a3928366acae7b3cbbb"},
+    {file = "netCDF4-1.7.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:657774404b9f78a5e4d26506ac9bfe106e4a37238282a70803cc7ce679c5a6cc"},
+    {file = "netCDF4-1.7.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e896d92f01fbf365e33e2513d5a8c4cfe16ff406aae9b6034e5ba1538c8c7a8"},
+    {file = "netCDF4-1.7.2-cp39-cp39-macosx_12_0_x86_64.whl", hash = "sha256:eb87c08d1700fe67c301898cf5ba3a3e1f8f2fbb417fcd0e2ac784846b60b058"},
+    {file = "netCDF4-1.7.2-cp39-cp39-macosx_14_0_arm64.whl", hash = "sha256:59b403032774c723ee749d7f2135be311bad7d00d1db284bebfab58b9d5cdb92"},
+    {file = "netCDF4-1.7.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:572f71459ef4b30e8554dcc4e1e6f55de515acc82a50968b48fe622244a64548"},
+    {file = "netCDF4-1.7.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7f77e72281acc5f331f82271e5f7f014d46f5ca9bcaa5aafe3e46d66cee21320"},
+    {file = "netCDF4-1.7.2-cp39-cp39-win_amd64.whl", hash = "sha256:d0fa7a9674fae8ae4877e813173c3ff7a6beee166b8730bdc847f517b282ed31"},
+    {file = "netcdf4-1.7.2.tar.gz", hash = "sha256:a4c6375540b19989896136943abb6d44850ff6f1fa7d3f063253b1ad3f8b7fce"},
+]
+
+[package.dependencies]
+certifi = "*"
+cftime = "*"
+numpy = "*"
+
+[package.extras]
+tests = ["Cython", "packaging", "pytest"]
+
 [[package]]
 name = "numpy"
 version = "1.24.4"
diff --git a/pyproject.toml b/pyproject.toml
index ec3207c2..c163d9b0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -44,6 +44,7 @@ httpx = "0.28.1"
 SQLAlchemy = "1.4.41"
 SQLAlchemy-Utc = "^0.14.0"
 uvicorn = {extras = ["standard"], version = "^0.22.0"}
+netcdf4 = "^1.7.2"
 
 
 [tool.poetry.dev-dependencies]
-- 
GitLab


From 87823bcdf72f81b9f12c8221393940cda50c929d Mon Sep 17 00:00:00 2001
From: Nicholas Shavers <nshavers@contractor.usgs.gov>
Date: Thu, 2 Jan 2025 16:20:35 -0800
Subject: [PATCH 3/8] updates for CF compliance. more metadata extracted

---
 geomagio/Controller.py           |   2 +-
 geomagio/netcdf/NetCDFFactory.py | 376 +++++++++++++++++++------------
 2 files changed, 235 insertions(+), 143 deletions(-)

diff --git a/geomagio/Controller.py b/geomagio/Controller.py
index 6b7c336f..fae8902b 100644
--- a/geomagio/Controller.py
+++ b/geomagio/Controller.py
@@ -501,7 +501,7 @@ def get_input_factory(args):
     input_type = args.input
     # stream/url arguments
     if args.input_file is not None:
-        if input_type == "miniseed":
+        if input_type in ["netcdf", "miniseed"] :
             input_stream = open(args.input_file, "rb")
         else:
             input_stream = open(args.input_file, "r")
diff --git a/geomagio/netcdf/NetCDFFactory.py b/geomagio/netcdf/NetCDFFactory.py
index 4c877e7b..920c9d8c 100644
--- a/geomagio/netcdf/NetCDFFactory.py
+++ b/geomagio/netcdf/NetCDFFactory.py
@@ -2,34 +2,84 @@ import netCDF4
 import numpy as np
 from obspy import Stream, Trace, UTCDateTime
 from datetime import datetime, timezone
-from io import BytesIO
+import tempfile
+import shutil
+import os
 
 from geomagio.TimeseriesFactory import TimeseriesFactory
+from geomagio.api.ws.Element import ELEMENT_INDEX
 
 
 class NetCDFFactory(TimeseriesFactory):
     """Factory for reading/writing NetCDF format Geomagnetic Data using numeric epoch times."""
 
+    # Define temperature element IDs
+    TEMPERATURE_ELEMENTS_ID = {"UK1", "UK2", "UK3"}
+
+    # Define the list of optional global attributes to handle
+    OPTIONAL_GLOBAL_ATTRS = [
+        "data_interval_type",
+        "data_type",
+        "station_name",
+        "station",
+        "network",
+        "location",
+        "declination_base",
+        "sensor_orientation",
+        "sensor_sampling_rate",
+        "sample_period",
+        "conditions_of_use",
+    ]
+
     def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        self.time_format = "numeric"  # Fixed to numeric epoch times
+
+    def _get_var_units_validmin_validmax(self, channel: str):
         """
-        Initializes the NetCDFFactory.
+        Determine units, valid_min, and valid_max based on channel name.
 
         Parameters
         ----------
-        **kwargs : dict
-            Additional keyword arguments for the base TimeseriesFactory.
+        channel : str
+            The channel name.
+
+        Returns
+        -------
+        tuple
+            A tuple containing (units, valid_min, valid_max).
         """
-        super().__init__(**kwargs)
-        self.time_format = "numeric"  # Fixed to numeric epoch times
+        channel = channel.upper()
+        if channel == "D":
+            units = "degree"
+            valid_min = -360.0
+            valid_max = 360.0
+        elif channel == "I":
+            units = "degree"
+            valid_min = -90.0
+            valid_max = 90.0
+        # elif channel in self.TEMPERATURE_ELEMENTS_ID:
+        #     units = "degree_Celsius"
+        #     valid_min = -273.15
+        #     valid_max = 79999.0
+        elif channel in ["F", "S"]:
+            units = "nT"
+            valid_min = 0.0
+            valid_max = 79999.0
+        elif channel in ["X", "Y", "Z", "H", "E", "V", "G"]:
+            units = "nT"
+            valid_min = -79999.0
+            valid_max = 79999.0
+        else:
+            units = getattr(channel, "units", "")
+            valid_min = getattr(channel, "valid_min", np.nan)
+            valid_max = getattr(channel, "valid_max", np.nan)
+        return units, valid_min, valid_max
 
     def parse_string(self, data: bytes, **kwargs):
         """
         Parse a NetCDF byte string into an ObsPy Stream.
 
-        The NetCDF file should follow a structure with global attributes for metadata
-        and variables for each geomagnetic component, including a 'time' variable
-        representing epoch times in seconds.
-
         Parameters
         ----------
         data : bytes
@@ -38,133 +88,124 @@ class NetCDFFactory(TimeseriesFactory):
         Returns
         -------
         Stream
-            An ObsPy Stream object containing the parsed data. Returns an empty Stream if parsing fails.
+            An ObsPy Stream object containing the parsed data.
         """
         try:
-            # Create a NetCDF dataset from the byte data
             nc_dataset = netCDF4.Dataset("inmemory", mode="r", memory=data)
         except Exception as e:
-            print(f"Failed to parse NetCDF data: {e}")
+            print(f"Error loading NetCDF data: {e}")
             return Stream()
 
-        try:
-            # Extract global attributes
-            institute = getattr(nc_dataset, "institute_name", "")
-            observatory = getattr(nc_dataset, "observatory_name", "")
-            observatory_code = getattr(nc_dataset, "observatory_code", "")
-            latitude = getattr(nc_dataset, "sensor_latitude", 0.0)
-            longitude = getattr(nc_dataset, "sensor_longitude", 0.0)
-            elevation = getattr(nc_dataset, "sensor_elevation", 0.0)
-            sensor_orientation = getattr(nc_dataset, "sensor_orientation", "")
-            original_sample_rate = getattr(nc_dataset, "original_sample_rate", "")
-            data_type = getattr(nc_dataset, "data_type", "")
-            start_time_num = getattr(
-                nc_dataset, "start_time", 0.0
-            )  # Seconds since epoch
-            sample_period = getattr(nc_dataset, "sample_period", 1.0)  # In milliseconds
-
-            # Convert numeric start time to UTCDateTime
-            try:
-                starttime = UTCDateTime(start_time_num)
-            except Exception:
-                starttime = UTCDateTime()
-
-            # Extract time variable
-            if "time" not in nc_dataset.variables:
-                print("No 'time' variable found in NetCDF data.")
-                return Stream()
-
-            time_var = nc_dataset.variables["time"]
-            times_epoch = time_var[:]  # Numeric epoch times in seconds
-
-            # Extract channel data
-            channel_names = [var for var in nc_dataset.variables if var != "time"]
-            channel_data = {}
-            for ch in channel_names:
-                data_array = nc_dataset.variables[ch][:]
-                # Ensure data is a 1D array
-                channel_data[ch] = data_array.flatten()
-
+        # Map from known NetCDF attributes to the stats keys we actually use
+        global_attrs_map = {
+            "institution": "agency_name",
+            "conditions_of_use": "conditions_of_use",
+            "data_interval": "data_interval",
+            "data_interval_type": "data_interval_type",
+            "data_type": "data_type",
+            "station_name": "station_name",
+            "station": "station",
+            "network": "network",
+            "location": "location",
+            "declination_base": "declination_base",
+            "sensor_orientation": "sensor_orientation",
+            "sensor_sampling_rate": "sensor_sampling_rate",
+            "latitude": "geodetic_latitude",
+            "longitude": "geodetic_longitude",
+            "elevation": "elevation",
+        }
+
+        # Extract recognized global attributes
+        global_attrs = {}
+        for nc_attr_name, stats_attr_name in global_attrs_map.items():
+            val = getattr(nc_dataset, nc_attr_name, None)
+            if val is not None:
+                global_attrs[stats_attr_name] = val
+
+        # Convert "comment" to filter_comments array if present
+        comment = getattr(nc_dataset, "comment", None)
+        if comment:
+            global_attrs["filter_comments"] = [comment]
+
+        # Check for time variable
+        if "time" not in nc_dataset.variables:
+            print("No 'time' variable found in NetCDF data.")
             nc_dataset.close()
-        except Exception as e:
-            print(f"Error while extracting data from NetCDF: {e}")
             return Stream()
 
-        # Create Stream and Traces
-        stream = Stream()
-        npts = len(times_epoch)
-        if npts == 0:
-            print("No time points found in NetCDF data.")
-            return stream
-
-        # Calculate delta as the average sampling interval
-        if npts > 1:
-            delta = (times_epoch[-1] - times_epoch[0]) / (npts - 1)
-        else:
-            delta = 1.0  # Default to 1 second if only one point
+        time_var = nc_dataset.variables["time"]
+        times_epoch = time_var[:]
 
-        for ch_name, values in channel_data.items():
-            if len(values) != npts:
-                print(
-                    f"Channel '{ch_name}' has mismatched data length. Expected {npts}, got {len(values)}. Skipping."
-                )
+        # Collect the channels (all variables except 'time')
+        channels = [v for v in nc_dataset.variables if v != "time"]
+        channel_data = {}
+        for ch in channels:
+            channel_data[ch] = nc_dataset.variables[ch][:]
+
+        nc_dataset.close()
+
+        # If no time points, return empty
+        if len(times_epoch) == 0:
+            print("No time points found in the NetCDF data.")
+            return Stream()
+
+        # Calculate sampling interval
+        delta = (
+            (times_epoch[-1] - times_epoch[0]) / (len(times_epoch) - 1)
+            if len(times_epoch) > 1
+            else 1.0
+        )
+        starttime = UTCDateTime(times_epoch[0])
+        sampling_rate = 1.0 / delta if delta else 0
+
+        stream = Stream()
+        for channel_name, data_array in channel_data.items():
+            # Length of data should matches times
+            if len(data_array) != len(times_epoch):
+                print(f"Channel '{channel_name}' has mismatched length. Skipping.")
                 continue
-            data_array = np.array(values, dtype=float)
-            try:
-                trace_starttime = UTCDateTime(times_epoch[0])
-            except Exception:
-                trace_starttime = UTCDateTime()
-
-            stats = {
-                "network": "",
-                "station": observatory_code,
-                "channel": ch_name,
-                "starttime": trace_starttime,
-                "npts": npts,
-                "sampling_rate": 1.0 / delta if delta != 0 else 1.0,
-                "geodetic_longitude": longitude,
-                "geodetic_latitude": latitude,
-                "elevation": elevation,
-                "station_name": observatory,
-                "data_type": data_type,
-                "data_interval_type": original_sample_rate,
-            }
-
-            trace = Trace(data=data_array, header=stats)
-            stream += trace
+
+            # Build stats from recognized attributes + basic fields
+            stats = {}
+            stats.update(global_attrs)  # put in all recognized global attrs
+
+            # Basic per-trace stats
+            stats["channel"] = channel_name
+            stats["starttime"] = starttime
+            stats["sampling_rate"] = sampling_rate
+            # endtime, delta, npts, etc. will be handled automatically by ObsPy
+
+            # Create Trace
+            trace = Trace(data=np.array(data_array, dtype=float), header=stats)
+            stream.append(trace)
 
         return stream
 
     def write_file_from_buffer(self, fh, timeseries: Stream, channels=None):
         """
-        Write an ObsPy Stream to a NetCDF file.
-
-        The NetCDF file will contain global attributes for metadata and variables
-        for each geomagnetic component, including a 'time' variable representing
-        epoch times in seconds.
+        Write an ObsPy Stream to a NetCDF file, including optional attributes.
 
         Parameters
         ----------
-        fh : file-like object
-            File handle to write the NetCDF data.
+        fh : str or file-like object
+            File path or file handle to write the NetCDF data.
         timeseries : Stream
             ObsPy Stream object containing the data to write.
         channels : list, optional
             List of channel names to include. If None, all channels are included.
         """
         if not timeseries or len(timeseries) == 0:
-            # Create an empty NetCDF structure
+            # Create an empty NetCDF structure with minimal CF compliance
             with netCDF4.Dataset(fh, "w", format="NETCDF4") as nc_dataset:
-                nc_dataset.description = "Empty Geomagnetic Time Series Data"
+                nc_dataset.title = "Geomagnetic Time Series Data"
+                nc_dataset.Conventions = "CF-1.4"
             return
 
         timeseries.merge()
 
         # Filter channels if specified
         if channels:
-            # Ensure channel names are uppercase for consistency
             channels = [ch.upper() for ch in channels]
-            # Manually filter the Stream to include only desired channels
             timeseries = Stream(
                 [tr for tr in timeseries if tr.stats.channel.upper() in channels]
             )
@@ -172,25 +213,19 @@ class NetCDFFactory(TimeseriesFactory):
         if len(timeseries) == 0:
             print("No matching channels found after filtering.")
             with netCDF4.Dataset(fh, "w", format="NETCDF4") as nc_dataset:
-                nc_dataset.description = (
-                    "Empty Geomagnetic Time Series Data after channel filtering"
-                )
+                nc_dataset.title = "Geomagnetic Time Series Data"
+                nc_dataset.Conventions = "CF-1.4"
             return
 
         timeseries.sort(keys=["starttime"])
         tr = timeseries[0]
         stats = tr.stats
 
-        station = stats.station or ""
+        # Extract necessary attributes
         lat = float(getattr(stats, "geodetic_latitude", 0.0))
         lon = float(getattr(stats, "geodetic_longitude", 0.0))
         alt = float(getattr(stats, "elevation", 0.0))
-        data_type = getattr(stats, "data_type", "unknown")
-        data_interval_type = getattr(stats, "data_interval_type", "unknown")
-        station_name = getattr(stats, "station_name", station)
-        sensor_orientation = getattr(stats, "sensor_orientation", "")
-        original_sample_rate = getattr(stats, "original_sample_rate", "")
-        sample_period = getattr(stats, "sampling_period", 1.0)  # In milliseconds
+        reported_orientation = "".join(channels) if channels else ""
 
         npts = tr.stats.npts
         delta = tr.stats.delta
@@ -199,59 +234,116 @@ class NetCDFFactory(TimeseriesFactory):
         # Generate time values as epoch seconds
         times_epoch = np.array([starttime.timestamp + i * delta for i in range(npts)])
 
-        # Create NetCDF dataset
+        # Create NetCDF dataset with CF conventions
         with netCDF4.Dataset(fh, "w", format="NETCDF4") as nc_dataset:
+            # Define global attributes with CF compliance
+            nc_dataset.Conventions = "CF-1.4"
+            nc_dataset.title = "Geomagnetic Time Series Data"
+            nc_dataset.institution = getattr(stats, "agency_name", "")
+            # nc_dataset.source = getattr(stats, "source", "")
+            history_entry = f"Created on {datetime.now(timezone.utc).isoformat()}"
+            existing_history = getattr(nc_dataset, "history", "")
+            if existing_history:
+                nc_dataset.history = existing_history + "\n" + history_entry
+            else:
+                nc_dataset.history = history_entry
+            nc_dataset.references = getattr(stats, "references", "")
+            nc_dataset.comment = getattr(stats, "filter_comments", "")
+
+            # Add optional global attributes if they exist in stats and are not empty
+            for attr in self.OPTIONAL_GLOBAL_ATTRS:
+                val = getattr(stats, attr, None)
+                if val not in [None, ""]:
+                    setattr(nc_dataset, attr, val)
+
+            if reported_orientation:
+                setattr(nc_dataset, "reported_orientation", reported_orientation)
+
             # Define the time dimension
             nc_dataset.createDimension("time", npts)
 
             # Create the time variable
             time_var = nc_dataset.createVariable("time", "f8", ("time",))
-            time_var.units = "seconds since 1970-01-01T00:00:00Z"
-            time_var.calendar = "standard"
+            time_var.units = "seconds since 1970-01-01T00:00:00Z"  # CF-compliant units
+            time_var.calendar = getattr(stats, "calendar", "standard")
+            time_var.standard_name = "time"
+            time_var.long_name = "Time"
             time_var[:] = times_epoch
 
-            # Create channel variables
+            # Assign valid range if available
+            if "valid_min" in stats and "valid_max" in stats:
+                time_var.valid_min = stats["valid_min"]
+                time_var.valid_max = stats["valid_max"]
+
+            # Create channel variables with CF attributes
             for trace in timeseries:
                 ch_name = trace.stats.channel.upper()
+                if len(ch_name) > 63:
+                    raise ValueError(
+                        f"Channel name '{ch_name}' exceeds NetCDF variable name length limit."
+                    )
+
                 # Ensure channel name is a valid NetCDF variable name
                 ch_name_nc = ch_name.replace("-", "_").replace(" ", "_")
-                var = nc_dataset.createVariable(ch_name_nc, "f8", ("time",))
-                var.units = (
-                    "nT"  # non standard elements such as temperature not considered.
+
+                # Determine units, valid_min, and valid_max based on channel
+                units, valid_min, valid_max = self._get_var_units_validmin_validmax(
+                    ch_name
+                )
+
+                var = nc_dataset.createVariable(
+                    ch_name_nc,
+                    "f8",
+                    ("time",),
+                    fill_value=getattr(trace.stats, "_FillValue", 99_999),
+                )
+
+                # Set CF attributes
+                var.units = units
+                var.long_name = getattr(
+                    trace.stats, "long_name", ELEMENT_INDEX.get(ch_name).name
+                )
+                var.standard_name = getattr(
+                    trace.stats, "standard_name", f"geomagnetic_field_{ch_name.lower()}"
                 )
-                var.long_name = f"Geomagnetic component {ch_name}"
+
+                # Set data validity attributes
+                var.valid_min = valid_min
+                var.valid_max = valid_max
+
+                # Assign data
                 var[:] = trace.data
 
-            # Set global attributes
-            nc_dataset.institute_name = getattr(stats, "agency_name", "")
-            nc_dataset.observatory_name = station_name
-            nc_dataset.observatory_code = station
-            nc_dataset.sensor_latitude = lat
-            nc_dataset.sensor_longitude = lon
-            nc_dataset.sensor_elevation = alt
-            nc_dataset.sensor_orientation = sensor_orientation
-            nc_dataset.original_sample_rate = original_sample_rate
-            nc_dataset.data_type = data_type
-            nc_dataset.start_time = starttime.timestamp  # Numeric epoch time
-            nc_dataset.sample_period = sample_period  # In milliseconds
-
-            # Optional global attributes
-            nc_dataset.history = f"Created on {datetime.now(timezone.utc).isoformat()}"
+            # Define geospatial coordinates
+            nc_dataset.latitude = lat
+            # nc_dataset.lat_units = "degrees_north"
+            nc_dataset.longitude = lon
+            # nc_dataset.geospatial_lon_units = "degrees_east"
+            nc_dataset.elevation = alt
+            # nc_dataset.elevation_units = "meters"
 
     def write_file(self, fh, timeseries: Stream, channels=None):
-        import tempfile
-        import shutil
-        import os
+        """
+        Write an ObsPy Stream to a NetCDF file.
 
+        Parameters
+        ----------
+        fh : file-like object
+            File handle to write the NetCDF data.
+        timeseries : Stream
+            ObsPy Stream object containing the data to write.
+        channels : list, optional
+            List of channel names to include. If None, all channels are included.
+        """
         # Create a temporary file
         with tempfile.NamedTemporaryFile(delete=False) as tmp:
             tmp_filepath = tmp.name
 
         try:
-            # Write to the temporary file using the existing write_file method
+            # Write to the temporary file
             self.write_file_from_buffer(tmp_filepath, timeseries, channels)
 
-            # Read the temporary file content
+            # Read the temporary file content into destination file
             with open(tmp_filepath, "rb") as tmp:
                 shutil.copyfileobj(tmp, fh)
         finally:
-- 
GitLab


From ff57e71fd7bfcc3ef9b7dc580d1f5ad97e9d589e Mon Sep 17 00:00:00 2001
From: Nicholas Shavers <nshavers@contractor.usgs.gov>
Date: Mon, 6 Jan 2025 09:24:23 -0800
Subject: [PATCH 4/8] cf convention updated from 1.4 to 1.6

---
 geomagio/netcdf/NetCDFFactory.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/geomagio/netcdf/NetCDFFactory.py b/geomagio/netcdf/NetCDFFactory.py
index 920c9d8c..590e1ead 100644
--- a/geomagio/netcdf/NetCDFFactory.py
+++ b/geomagio/netcdf/NetCDFFactory.py
@@ -198,7 +198,7 @@ class NetCDFFactory(TimeseriesFactory):
             # Create an empty NetCDF structure with minimal CF compliance
             with netCDF4.Dataset(fh, "w", format="NETCDF4") as nc_dataset:
                 nc_dataset.title = "Geomagnetic Time Series Data"
-                nc_dataset.Conventions = "CF-1.4"
+                nc_dataset.Conventions = "CF-1.6"
             return
 
         timeseries.merge()
@@ -214,7 +214,7 @@ class NetCDFFactory(TimeseriesFactory):
             print("No matching channels found after filtering.")
             with netCDF4.Dataset(fh, "w", format="NETCDF4") as nc_dataset:
                 nc_dataset.title = "Geomagnetic Time Series Data"
-                nc_dataset.Conventions = "CF-1.4"
+                nc_dataset.Conventions = "CF-1.6"
             return
 
         timeseries.sort(keys=["starttime"])
@@ -237,7 +237,7 @@ class NetCDFFactory(TimeseriesFactory):
         # Create NetCDF dataset with CF conventions
         with netCDF4.Dataset(fh, "w", format="NETCDF4") as nc_dataset:
             # Define global attributes with CF compliance
-            nc_dataset.Conventions = "CF-1.4"
+            nc_dataset.Conventions = "CF-1.6"
             nc_dataset.title = "Geomagnetic Time Series Data"
             nc_dataset.institution = getattr(stats, "agency_name", "")
             # nc_dataset.source = getattr(stats, "source", "")
-- 
GitLab


From 8b9a79fc3ea7cf84595e95d68512779c5da2c425 Mon Sep 17 00:00:00 2001
From: Nicholas Shavers <nshavers@contractor.usgs.gov>
Date: Mon, 6 Jan 2025 10:52:26 -0800
Subject: [PATCH 5/8] arcmin to degrees per cf convention. other minor updates
 per cf convention.

---
 geomagio/ChannelConverter.py     | 38 ++++++++++++++++++++++++++++++--
 geomagio/netcdf/NetCDFFactory.py | 32 ++++++++++++++++++++++-----
 2 files changed, 63 insertions(+), 7 deletions(-)

diff --git a/geomagio/ChannelConverter.py b/geomagio/ChannelConverter.py
index 0ca91bfd..3fc705ff 100644
--- a/geomagio/ChannelConverter.py
+++ b/geomagio/ChannelConverter.py
@@ -21,8 +21,8 @@ import numpy
 
 M2R = numpy.pi / 180 / 60  # Minutes to Radians
 R2M = 180.0 / numpy.pi * 60  # Radians to Minutes
-
-
+M2D = 1.0 / 60.0  # Minutes to Degrees
+D2M = 60.0  # Degrees to Minutes
 # ###
 # get geographic coordinates from....
 # ###
@@ -434,3 +434,37 @@ def get_minutes_from_radians(r):
         the radian value to be converted
     """
     return numpy.multiply(r, R2M)
+
+
+def get_degrees_from_minutes(m):
+    """
+    Converts angular minutes to degrees.
+
+    Parameters
+    ----------
+    m : float or array_like
+        The angular minutes to be converted.
+
+    Returns
+    -------
+    float or array_like
+        The corresponding degrees.
+    """
+    return numpy.multiply(m, M2D)
+
+
+def get_minutes_from_degrees(d):
+    """
+    Converts degrees to angular minutes.
+
+    Parameters
+    ----------
+    d : float or array_like
+        The degrees to be converted.
+
+    Returns
+    -------
+    float or array_like
+        The corresponding angular minutes.
+    """
+    return numpy.multiply(d, D2M)
diff --git a/geomagio/netcdf/NetCDFFactory.py b/geomagio/netcdf/NetCDFFactory.py
index 590e1ead..3a92d580 100644
--- a/geomagio/netcdf/NetCDFFactory.py
+++ b/geomagio/netcdf/NetCDFFactory.py
@@ -6,6 +6,7 @@ import tempfile
 import shutil
 import os
 
+from geomagio import ChannelConverter
 from geomagio.TimeseriesFactory import TimeseriesFactory
 from geomagio.api.ws.Element import ELEMENT_INDEX
 
@@ -142,8 +143,6 @@ class NetCDFFactory(TimeseriesFactory):
         for ch in channels:
             channel_data[ch] = nc_dataset.variables[ch][:]
 
-        nc_dataset.close()
-
         # If no time points, return empty
         if len(times_epoch) == 0:
             print("No time points found in the NetCDF data.")
@@ -177,8 +176,19 @@ class NetCDFFactory(TimeseriesFactory):
 
             # Create Trace
             trace = Trace(data=np.array(data_array, dtype=float), header=stats)
+
+            # Convert minutes to arcradians
+            if channel_name in ["D", "I"]:
+                if nc_dataset.variables[channel_name].units.startswith("deg"):
+                    trace.data = ChannelConverter.get_minutes_from_degrees(trace.data)
+                elif nc_dataset.variables[channel_name].units.startswith("rad"):
+                    trace.data = ChannelConverter.get_minutes_from_radians(trace.data)
+                elif nc_dataset.variables[channel_name].units.startswith("arc"):
+                    pass  # nothing to do.
+
             stream.append(trace)
 
+        nc_dataset.close()
         return stream
 
     def write_file_from_buffer(self, fh, timeseries: Stream, channels=None):
@@ -198,6 +208,7 @@ class NetCDFFactory(TimeseriesFactory):
             # Create an empty NetCDF structure with minimal CF compliance
             with netCDF4.Dataset(fh, "w", format="NETCDF4") as nc_dataset:
                 nc_dataset.title = "Geomagnetic Time Series Data"
+                nc_dataset.featureType = "timeSeries"  # per cf conventions
                 nc_dataset.Conventions = "CF-1.6"
             return
 
@@ -214,6 +225,7 @@ class NetCDFFactory(TimeseriesFactory):
             print("No matching channels found after filtering.")
             with netCDF4.Dataset(fh, "w", format="NETCDF4") as nc_dataset:
                 nc_dataset.title = "Geomagnetic Time Series Data"
+                nc_dataset.featureType = "timeSeries"  # per cf conventions
                 nc_dataset.Conventions = "CF-1.6"
             return
 
@@ -239,6 +251,7 @@ class NetCDFFactory(TimeseriesFactory):
             # Define global attributes with CF compliance
             nc_dataset.Conventions = "CF-1.6"
             nc_dataset.title = "Geomagnetic Time Series Data"
+            nc_dataset.featureType = "timeSeries"  # per cf conventions
             nc_dataset.institution = getattr(stats, "agency_name", "")
             # nc_dataset.source = getattr(stats, "source", "")
             history_entry = f"Created on {datetime.now(timezone.utc).isoformat()}"
@@ -268,6 +281,7 @@ class NetCDFFactory(TimeseriesFactory):
             time_var.calendar = getattr(stats, "calendar", "standard")
             time_var.standard_name = "time"
             time_var.long_name = "Time"
+            time_var.axis = "T"
             time_var[:] = times_epoch
 
             # Assign valid range if available
@@ -278,6 +292,10 @@ class NetCDFFactory(TimeseriesFactory):
             # Create channel variables with CF attributes
             for trace in timeseries:
                 ch_name = trace.stats.channel.upper()
+                # Convert arcminutes to radians
+                if "D" == ch_name:
+                    trace.data = ChannelConverter.get_degrees_from_minutes(trace.data)
+
                 if len(ch_name) > 63:
                     raise ValueError(
                         f"Channel name '{ch_name}' exceeds NetCDF variable name length limit."
@@ -300,9 +318,13 @@ class NetCDFFactory(TimeseriesFactory):
 
                 # Set CF attributes
                 var.units = units
-                var.long_name = getattr(
-                    trace.stats, "long_name", ELEMENT_INDEX.get(ch_name).name
-                )
+                if ELEMENT_INDEX.get(ch_name):
+                    # remove arcminutes, other things
+                    long_name = ELEMENT_INDEX.get(ch_name).name.replace(
+                        "(arcminutes)", ""
+                    )
+                    long_name = long_name.strip()
+                    var.long_name = getattr(trace.stats, "long_name", long_name)
                 var.standard_name = getattr(
                     trace.stats, "standard_name", f"geomagnetic_field_{ch_name.lower()}"
                 )
-- 
GitLab


From 158e289bafa87326d165ffc2cda9f3b1afe99bfe Mon Sep 17 00:00:00 2001
From: Nicholas Shavers <nshavers@contractor.usgs.gov>
Date: Wed, 15 Jan 2025 13:31:41 -0800
Subject: [PATCH 6/8] import module syntax used i.e  __init__.py

---
 geomagio/Controller.py      | 6 +++---
 geomagio/netcdf/__init__.py | 8 ++++++++
 2 files changed, 11 insertions(+), 3 deletions(-)
 create mode 100644 geomagio/netcdf/__init__.py

diff --git a/geomagio/Controller.py b/geomagio/Controller.py
index fae8902b..fab26a3f 100644
--- a/geomagio/Controller.py
+++ b/geomagio/Controller.py
@@ -7,7 +7,6 @@ from typing import List, Optional, Tuple, Union
 
 from obspy.core import Stream, UTCDateTime
 
-from geomagio.netcdf.NetCDFFactory import NetCDFFactory
 
 from .algorithm import Algorithm, algorithms, AlgorithmException, FilterAlgorithm
 from .DerivedTimeseriesFactory import DerivedTimeseriesFactory
@@ -27,6 +26,7 @@ from . import temperature
 from . import vbf
 from . import xml
 from . import covjson
+from . import netcdf
 
 
 class Controller(object):
@@ -550,7 +550,7 @@ def get_input_factory(args):
         if input_type == "iaga2002":
             input_factory = iaga2002.IAGA2002Factory(**input_factory_args)
         if input_type == "netcdf":
-            input_factory = NetCDFFactory(**input_factory_args)
+            input_factory = netcdf.NetCDFFactory(**input_factory_args)
         elif input_type == "imfv122":
             input_factory = imfv122.IMFV122Factory(**input_factory_args)
         elif input_type == "imfv283":
@@ -637,7 +637,7 @@ def get_output_factory(args):
         elif output_type == "iaga2002":
             output_factory = iaga2002.IAGA2002Factory(**output_factory_args)
         elif output_type == "netcdf":
-            output_factory = NetCDFFactory(**output_factory_args)
+            output_factory = netcdf.NetCDFFactory(**output_factory_args)
         elif output_type == "imfjson":
             output_factory = imfjson.IMFJSONFactory(**output_factory_args)
         elif output_type == "covjson":
diff --git a/geomagio/netcdf/__init__.py b/geomagio/netcdf/__init__.py
new file mode 100644
index 00000000..f2ccd864
--- /dev/null
+++ b/geomagio/netcdf/__init__.py
@@ -0,0 +1,8 @@
+"""IO Module for NetCDF Format
+"""
+
+from __future__ import absolute_import
+
+from .NetCDFFactory import NetCDFFactory
+
+__all__ = ["NetCDFFactory"]
-- 
GitLab


From f4416c8e97d56497f41281238cb78971d04ef6bc Mon Sep 17 00:00:00 2001
From: Nicholas Shavers <nshavers@contractor.usgs.gov>
Date: Wed, 15 Jan 2025 13:33:39 -0800
Subject: [PATCH 7/8] rebase + poetry lock

---
 poetry.lock | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index ad3771c9..3d7add0e 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.8.5 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.
 
 [[package]]
 name = "aiomysql"
@@ -3047,4 +3047,4 @@ pycurl = ["pycurl"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.8,<3.12"
-content-hash = "e74f5314dc78dc4ab24fd3afab056baa724eb022c689df183a91854cc9463ad8"
+content-hash = "de2ec42ee0160626455b58e0b8f67e2669ad3ef5e13d58bc4500d0b8a1643cb8"
-- 
GitLab


From 38548c4cda8d351a6422fddb68d5aa1036272148 Mon Sep 17 00:00:00 2001
From: Nicholas Shavers <nshavers@contractor.usgs.gov>
Date: Fri, 17 Jan 2025 10:38:02 -0800
Subject: [PATCH 8/8] lint + rebasing

---
 geomagio/Controller.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/geomagio/Controller.py b/geomagio/Controller.py
index fab26a3f..564b54c9 100644
--- a/geomagio/Controller.py
+++ b/geomagio/Controller.py
@@ -501,7 +501,7 @@ def get_input_factory(args):
     input_type = args.input
     # stream/url arguments
     if args.input_file is not None:
-        if input_type in ["netcdf", "miniseed"] :
+        if input_type in ["netcdf", "miniseed"]:
             input_stream = open(args.input_file, "rb")
         else:
             input_stream = open(args.input_file, "r")
-- 
GitLab