From 3af41f5ce83a46b629d69c1f1d0fb1b0d25254d9 Mon Sep 17 00:00:00 2001
From: Nicholas Shavers <nshavers@contractor.usgs.gov>
Date: Mon, 16 Dec 2024 15:21:29 -0800
Subject: [PATCH] final tweaks

---
 geomagio/Controller.py             |  1 +
 geomagio/imagcdf/ImagCDFFactory.py | 32 +++++++++++-------------------
 2 files changed, 13 insertions(+), 20 deletions(-)

diff --git a/geomagio/Controller.py b/geomagio/Controller.py
index 826683f1..199befdd 100644
--- a/geomagio/Controller.py
+++ b/geomagio/Controller.py
@@ -573,6 +573,7 @@ def get_input_factory(args):
         elif input_type == "covjson":
             input_factory = covjson.CovJSONFactory(**input_factory_args)
         elif input_type == "imagcdf":
+            input_factory_args["output"] = args.output
             input_factory = ImagCDFFactory(**input_factory_args)
         # wrap stream
         if input_stream is not None:
diff --git a/geomagio/imagcdf/ImagCDFFactory.py b/geomagio/imagcdf/ImagCDFFactory.py
index de58ced1..f25d0c27 100644
--- a/geomagio/imagcdf/ImagCDFFactory.py
+++ b/geomagio/imagcdf/ImagCDFFactory.py
@@ -77,7 +77,9 @@ class ImagCDFFactory(TimeseriesFactory):
             urlInterval=urlInterval,
         )
         self.inputFile = inputFile
-        self.output = output
+        self.output = (
+            output if output in ["iaga2002", "imfjson"] else "iaga2002"
+        )  # determines mapping of publication level to data type
 
     def write_file(self, fh, timeseries: Stream, channels: List[str]):
         # Create a temporary file to write the CDF data
@@ -87,9 +89,9 @@ class ImagCDFFactory(TimeseriesFactory):
         try:
             # Initialize the CDF writer
             cdf_spec = {
-                "Compressed": self.__MAX_COMPRESSION,  # Max Gzip compression (1-9). Almost always the GZIP is the best choice for all data. (CDF User Guide p.23 1.4.3 )
+                "Compressed": self.__MAX_COMPRESSION,  # Max Gzip compression (1-9). Almost always the GZIP is the best choice for all data. (CDF User Guide p.23 1.4.3)
                 "Majority": CDFWriter.ROW_MAJOR,  # The first dimension changes the slowest (CDF User Guide p.45 2.3.15 Majority)
-                "Encoding": CDFWriter.NETWORK_ENCODING,  #  XDR Encoding - If a CDF must be portable between two or more different types of computers use network encoded.
+                "Encoding": CDFWriter.NETWORK_ENCODING,  #  XDR Encoding -Portable between two or more different types of computers.
                 "Checksum": True,  # True for Data Integrity. False for faster writes (optional)
                 "rDim_sizes": [],  # Applicable only if using rVariables - CDF protocol recommends only using zVariables.
             }
@@ -239,7 +241,7 @@ class ImagCDFFactory(TimeseriesFactory):
                     starttime=interval_start,
                     endtime=interval_end,
                 )
-                # Check if the file already exists to merge data
+                # Check if the file already exists
                 if os.path.isfile(url_file):
                     raise TimeseriesFactoryException(
                         f"Error: File '{url_file}' already exists."
@@ -250,7 +252,7 @@ class ImagCDFFactory(TimeseriesFactory):
                     endtime=interval_end,
                     nearest_sample=True,
                     pad=True,
-                    fill_value=self.__FILL_VALUE,  # FILLVAL
+                    fill_value=self.__FILL_VALUE,
                 )
 
                 # Write the data to the CDF file
@@ -366,12 +368,11 @@ class ImagCDFFactory(TimeseriesFactory):
         elevation = getattr(stats, "elevation", None) or self.__FILL_VALUE
         conditions_of_use = getattr(stats, "conditions_of_use", None) or ""
         vector_orientation = getattr(stats, "sensor_orientation", None) or ""
-        data_interval_type = getattr(stats, "data_interval_type", None) or self.interval
         data_type = getattr(stats, "data_type", None) or "variation"
         sensor_sampling_rate = getattr(stats, "sensor_sampling_rate", None) or 0.0
         comments = getattr(stats, "filter_comments", None) or [""]
         declination_base = getattr(stats, "declination_base", None) or 0.0
-        publication_level = IMCDFPublicationLevel(data_type=self.type).get_level()
+        publication_level = IMCDFPublicationLevel(value=self.type).get_level()
         global_attrs = {
             "FormatDescription": {0: "INTERMAGNET CDF Format"},
             "FormatVersion": {0: "1.2"},
@@ -627,7 +628,7 @@ class ImagCDFFactory(TimeseriesFactory):
         )
 
         sensor_sampling_rate = global_attrs.get("SensorSamplingRate", [None])[0]
-        comments = global_attrs.get("Comments", [None])  # keep comments as an array
+        comments = global_attrs.get("Comments", [])  # keep comments as an array
         declination_base = global_attrs.get("DeclinationBase", [None])[0]
         network = global_attrs.get("Network", [None])[0]
 
@@ -666,9 +667,6 @@ class ImagCDFFactory(TimeseriesFactory):
 
             # Determine DEPEND_0 (the time variable name) and validate
             ts_name = attrs.get("DEPEND_0")
-            # if not ts_name:
-            #     # If no DEPEND_0, skip this variable as we cannot map times
-            #     continue
 
             # The ImagCDF can have DataTimes, GeomagneticVectorTimes, GeomagneticScalarTimes, TemperatureNTimes (for N > 0), etc.
             matched_time_key = None
@@ -677,9 +675,6 @@ class ImagCDFFactory(TimeseriesFactory):
                     matched_time_key = tkey
                     break
 
-            # if matched_time_key not in time_vars:
-            #     # If we cannot find the matching time variable, skip this variable
-            #     continue
             times = []
             if matched_time_key in time_vars:
                 times = time_vars[matched_time_key]
@@ -749,7 +744,7 @@ class ImagCDFFactory(TimeseriesFactory):
                 header.update({"data_interval_type": data_interval})
             if declination_base is not None:
                 header.update({"declination_base": declination_base})
-            if comments is not None:
+            if len(comments) > 0:
                 header.update({"filter_comments": comments})
             if network is not None:
                 header.update({"network": network})
@@ -775,8 +770,7 @@ class ImagCDFFactory(TimeseriesFactory):
         Generate the file URL specific to ImagCDF conventions.
 
         This method constructs the filename based on the ImagCDF naming
-        conventions, which include the observatory code, date-time formatted
-        according to the data interval, and the publication level.
+        conventions.
 
         [iaga-code]_[date-time]_[publication-level].cdf
 
@@ -794,9 +788,8 @@ class ImagCDFFactory(TimeseriesFactory):
         - ImagCDF Technical Documentation: ImagCDF File Names
         """
         # Get the publication level for the type
-        publication_level = IMCDFPublicationLevel(data_type=type).get_level()
+        publication_level = IMCDFPublicationLevel(value=type).get_level()
 
-        # Format of Date/Time Portion of Filename based on interval see reference: https://tech-man.intermagnet.org/latest/appendices/dataformats.html#example-data-file:~:text=Format%20of%20Date,%EF%83%81
         if interval == "year":
             date_format = date.strftime("%Y")
         elif interval == "month":
@@ -814,7 +807,6 @@ class ImagCDFFactory(TimeseriesFactory):
                 f"Unsupported interval: {interval}"
             )  # tenhertz currently not supported
 
-        # Filename following ImagCDF convention, see reference: https://tech-man.intermagnet.org/latest/appendices/dataformats.html#imagcdf-file-names
         filename = f"{observatory.lower()}_{date_format}_{publication_level}.cdf"
 
         # If the urlTemplate explicitly specifies 'stdout', return 'stdout'
-- 
GitLab