From 7d8c33375fe947b811302cb9ed3176e313af0053 Mon Sep 17 00:00:00 2001 From: Nicholas Shavers <nshavers@contractor.usgs.gov> Date: Mon, 30 Dec 2024 17:30:14 -0800 Subject: [PATCH 1/8] netcdf poc, works --- geomagio/Controller.py | 8 + geomagio/netcdf/NetCDFFactory.py | 259 +++++++++++++++++++++++++++++++ 2 files changed, 267 insertions(+) create mode 100644 geomagio/netcdf/NetCDFFactory.py diff --git a/geomagio/Controller.py b/geomagio/Controller.py index dce6b919..6b7c336f 100644 --- a/geomagio/Controller.py +++ b/geomagio/Controller.py @@ -7,6 +7,8 @@ from typing import List, Optional, Tuple, Union from obspy.core import Stream, UTCDateTime +from geomagio.netcdf.NetCDFFactory import NetCDFFactory + from .algorithm import Algorithm, algorithms, AlgorithmException, FilterAlgorithm from .DerivedTimeseriesFactory import DerivedTimeseriesFactory from .PlotTimeseriesFactory import PlotTimeseriesFactory @@ -547,6 +549,8 @@ def get_input_factory(args): # stream compatible factories if input_type == "iaga2002": input_factory = iaga2002.IAGA2002Factory(**input_factory_args) + if input_type == "netcdf": + input_factory = NetCDFFactory(**input_factory_args) elif input_type == "imfv122": input_factory = imfv122.IMFV122Factory(**input_factory_args) elif input_type == "imfv283": @@ -632,6 +636,8 @@ def get_output_factory(args): output_factory = binlog.BinLogFactory(**output_factory_args) elif output_type == "iaga2002": output_factory = iaga2002.IAGA2002Factory(**output_factory_args) + elif output_type == "netcdf": + output_factory = NetCDFFactory(**output_factory_args) elif output_type == "imfjson": output_factory = imfjson.IMFJSONFactory(**output_factory_args) elif output_type == "covjson": @@ -826,6 +832,7 @@ def parse_args(args): "pcdcp", "xml", "covjson", + "netcdf", ), default="edge", help='Input format (Default "edge")', @@ -1032,6 +1039,7 @@ def parse_args(args): "vbf", "xml", "covjson", + "netcdf", ), # TODO: set default to 'iaga2002' help="Output format", diff --git a/geomagio/netcdf/NetCDFFactory.py b/geomagio/netcdf/NetCDFFactory.py new file mode 100644 index 00000000..4c877e7b --- /dev/null +++ b/geomagio/netcdf/NetCDFFactory.py @@ -0,0 +1,259 @@ +import netCDF4 +import numpy as np +from obspy import Stream, Trace, UTCDateTime +from datetime import datetime, timezone +from io import BytesIO + +from geomagio.TimeseriesFactory import TimeseriesFactory + + +class NetCDFFactory(TimeseriesFactory): + """Factory for reading/writing NetCDF format Geomagnetic Data using numeric epoch times.""" + + def __init__(self, **kwargs): + """ + Initializes the NetCDFFactory. + + Parameters + ---------- + **kwargs : dict + Additional keyword arguments for the base TimeseriesFactory. + """ + super().__init__(**kwargs) + self.time_format = "numeric" # Fixed to numeric epoch times + + def parse_string(self, data: bytes, **kwargs): + """ + Parse a NetCDF byte string into an ObsPy Stream. + + The NetCDF file should follow a structure with global attributes for metadata + and variables for each geomagnetic component, including a 'time' variable + representing epoch times in seconds. + + Parameters + ---------- + data : bytes + Byte content of the NetCDF file. + + Returns + ------- + Stream + An ObsPy Stream object containing the parsed data. Returns an empty Stream if parsing fails. + """ + try: + # Create a NetCDF dataset from the byte data + nc_dataset = netCDF4.Dataset("inmemory", mode="r", memory=data) + except Exception as e: + print(f"Failed to parse NetCDF data: {e}") + return Stream() + + try: + # Extract global attributes + institute = getattr(nc_dataset, "institute_name", "") + observatory = getattr(nc_dataset, "observatory_name", "") + observatory_code = getattr(nc_dataset, "observatory_code", "") + latitude = getattr(nc_dataset, "sensor_latitude", 0.0) + longitude = getattr(nc_dataset, "sensor_longitude", 0.0) + elevation = getattr(nc_dataset, "sensor_elevation", 0.0) + sensor_orientation = getattr(nc_dataset, "sensor_orientation", "") + original_sample_rate = getattr(nc_dataset, "original_sample_rate", "") + data_type = getattr(nc_dataset, "data_type", "") + start_time_num = getattr( + nc_dataset, "start_time", 0.0 + ) # Seconds since epoch + sample_period = getattr(nc_dataset, "sample_period", 1.0) # In milliseconds + + # Convert numeric start time to UTCDateTime + try: + starttime = UTCDateTime(start_time_num) + except Exception: + starttime = UTCDateTime() + + # Extract time variable + if "time" not in nc_dataset.variables: + print("No 'time' variable found in NetCDF data.") + return Stream() + + time_var = nc_dataset.variables["time"] + times_epoch = time_var[:] # Numeric epoch times in seconds + + # Extract channel data + channel_names = [var for var in nc_dataset.variables if var != "time"] + channel_data = {} + for ch in channel_names: + data_array = nc_dataset.variables[ch][:] + # Ensure data is a 1D array + channel_data[ch] = data_array.flatten() + + nc_dataset.close() + except Exception as e: + print(f"Error while extracting data from NetCDF: {e}") + return Stream() + + # Create Stream and Traces + stream = Stream() + npts = len(times_epoch) + if npts == 0: + print("No time points found in NetCDF data.") + return stream + + # Calculate delta as the average sampling interval + if npts > 1: + delta = (times_epoch[-1] - times_epoch[0]) / (npts - 1) + else: + delta = 1.0 # Default to 1 second if only one point + + for ch_name, values in channel_data.items(): + if len(values) != npts: + print( + f"Channel '{ch_name}' has mismatched data length. Expected {npts}, got {len(values)}. Skipping." + ) + continue + data_array = np.array(values, dtype=float) + try: + trace_starttime = UTCDateTime(times_epoch[0]) + except Exception: + trace_starttime = UTCDateTime() + + stats = { + "network": "", + "station": observatory_code, + "channel": ch_name, + "starttime": trace_starttime, + "npts": npts, + "sampling_rate": 1.0 / delta if delta != 0 else 1.0, + "geodetic_longitude": longitude, + "geodetic_latitude": latitude, + "elevation": elevation, + "station_name": observatory, + "data_type": data_type, + "data_interval_type": original_sample_rate, + } + + trace = Trace(data=data_array, header=stats) + stream += trace + + return stream + + def write_file_from_buffer(self, fh, timeseries: Stream, channels=None): + """ + Write an ObsPy Stream to a NetCDF file. + + The NetCDF file will contain global attributes for metadata and variables + for each geomagnetic component, including a 'time' variable representing + epoch times in seconds. + + Parameters + ---------- + fh : file-like object + File handle to write the NetCDF data. + timeseries : Stream + ObsPy Stream object containing the data to write. + channels : list, optional + List of channel names to include. If None, all channels are included. + """ + if not timeseries or len(timeseries) == 0: + # Create an empty NetCDF structure + with netCDF4.Dataset(fh, "w", format="NETCDF4") as nc_dataset: + nc_dataset.description = "Empty Geomagnetic Time Series Data" + return + + timeseries.merge() + + # Filter channels if specified + if channels: + # Ensure channel names are uppercase for consistency + channels = [ch.upper() for ch in channels] + # Manually filter the Stream to include only desired channels + timeseries = Stream( + [tr for tr in timeseries if tr.stats.channel.upper() in channels] + ) + + if len(timeseries) == 0: + print("No matching channels found after filtering.") + with netCDF4.Dataset(fh, "w", format="NETCDF4") as nc_dataset: + nc_dataset.description = ( + "Empty Geomagnetic Time Series Data after channel filtering" + ) + return + + timeseries.sort(keys=["starttime"]) + tr = timeseries[0] + stats = tr.stats + + station = stats.station or "" + lat = float(getattr(stats, "geodetic_latitude", 0.0)) + lon = float(getattr(stats, "geodetic_longitude", 0.0)) + alt = float(getattr(stats, "elevation", 0.0)) + data_type = getattr(stats, "data_type", "unknown") + data_interval_type = getattr(stats, "data_interval_type", "unknown") + station_name = getattr(stats, "station_name", station) + sensor_orientation = getattr(stats, "sensor_orientation", "") + original_sample_rate = getattr(stats, "original_sample_rate", "") + sample_period = getattr(stats, "sampling_period", 1.0) # In milliseconds + + npts = tr.stats.npts + delta = tr.stats.delta + starttime = tr.stats.starttime + + # Generate time values as epoch seconds + times_epoch = np.array([starttime.timestamp + i * delta for i in range(npts)]) + + # Create NetCDF dataset + with netCDF4.Dataset(fh, "w", format="NETCDF4") as nc_dataset: + # Define the time dimension + nc_dataset.createDimension("time", npts) + + # Create the time variable + time_var = nc_dataset.createVariable("time", "f8", ("time",)) + time_var.units = "seconds since 1970-01-01T00:00:00Z" + time_var.calendar = "standard" + time_var[:] = times_epoch + + # Create channel variables + for trace in timeseries: + ch_name = trace.stats.channel.upper() + # Ensure channel name is a valid NetCDF variable name + ch_name_nc = ch_name.replace("-", "_").replace(" ", "_") + var = nc_dataset.createVariable(ch_name_nc, "f8", ("time",)) + var.units = ( + "nT" # non standard elements such as temperature not considered. + ) + var.long_name = f"Geomagnetic component {ch_name}" + var[:] = trace.data + + # Set global attributes + nc_dataset.institute_name = getattr(stats, "agency_name", "") + nc_dataset.observatory_name = station_name + nc_dataset.observatory_code = station + nc_dataset.sensor_latitude = lat + nc_dataset.sensor_longitude = lon + nc_dataset.sensor_elevation = alt + nc_dataset.sensor_orientation = sensor_orientation + nc_dataset.original_sample_rate = original_sample_rate + nc_dataset.data_type = data_type + nc_dataset.start_time = starttime.timestamp # Numeric epoch time + nc_dataset.sample_period = sample_period # In milliseconds + + # Optional global attributes + nc_dataset.history = f"Created on {datetime.now(timezone.utc).isoformat()}" + + def write_file(self, fh, timeseries: Stream, channels=None): + import tempfile + import shutil + import os + + # Create a temporary file + with tempfile.NamedTemporaryFile(delete=False) as tmp: + tmp_filepath = tmp.name + + try: + # Write to the temporary file using the existing write_file method + self.write_file_from_buffer(tmp_filepath, timeseries, channels) + + # Read the temporary file content + with open(tmp_filepath, "rb") as tmp: + shutil.copyfileobj(tmp, fh) + finally: + # Clean up the temporary file + os.remove(tmp_filepath) -- GitLab From e0ee44a5772d6aa58e9cc4030d556498a5e429a1 Mon Sep 17 00:00:00 2001 From: Nicholas Shavers <nshavers@contractor.usgs.gov> Date: Mon, 30 Dec 2024 17:35:51 -0800 Subject: [PATCH 2/8] netcdf poc poetry.lock files --- poetry.lock | 95 ++++++++++++++++++++++++++++++++++++++++++++++++++ pyproject.toml | 1 + 2 files changed, 96 insertions(+) diff --git a/poetry.lock b/poetry.lock index 5946b52d..ad3771c9 100644 --- a/poetry.lock +++ b/poetry.lock @@ -243,6 +243,54 @@ files = [ [package.dependencies] pycparser = "*" +[[package]] +name = "cftime" +version = "1.6.4.post1" +description = "Time-handling functionality from netcdf4-python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "cftime-1.6.4.post1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0baa9bc4850929da9f92c25329aa1f651e2d6f23e237504f337ee9e12a769f5d"}, + {file = "cftime-1.6.4.post1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6bb6b087f4b2513c37670bccd457e2a666ca489c5f2aad6e2c0e94604dc1b5b9"}, + {file = "cftime-1.6.4.post1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7d9bdeb9174962c9ca00015190bfd693de6b0ec3ec0b3dbc35c693a4f48efdcc"}, + {file = "cftime-1.6.4.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e735cfd544878eb94d0108ff5a093bd1a332dba90f979a31a357756d609a90d5"}, + {file = "cftime-1.6.4.post1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:1dcd1b140bf50da6775c56bd7ca179e84bd258b2f159b53eefd5c514b341f2bf"}, + {file = "cftime-1.6.4.post1-cp310-cp310-win_amd64.whl", hash = "sha256:e60b8f24b20753f7548f410f7510e28b941f336f84bd34e3cfd7874af6e70281"}, + {file = "cftime-1.6.4.post1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1bf7be0a0afc87628cb8c8483412aac6e48e83877004faa0936afb5bf8a877ba"}, + {file = "cftime-1.6.4.post1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:0f64ca83acc4e3029f737bf3a32530ffa1fbf53124f5bee70b47548bc58671a7"}, + {file = "cftime-1.6.4.post1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7ebdfd81726b0cfb8b524309224fa952898dfa177c13d5f6af5b18cefbf497d"}, + {file = "cftime-1.6.4.post1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c9ea0965a4c87739aebd84fe8eed966e5809d10065eeffd35c99c274b6f8da15"}, + {file = "cftime-1.6.4.post1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:800a18aea4e8cb2b206450397cb8a53b154798738af3cdd3c922ce1ca198b0e6"}, + {file = "cftime-1.6.4.post1-cp311-cp311-win_amd64.whl", hash = "sha256:5dcfc872f455db1f12eabe3c3ba98e93757cd60ed3526a53246e966ccde46c8a"}, + {file = "cftime-1.6.4.post1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:a590f73506f4704ba5e154ef55bfbaed5e1b4ac170f3caeb8c58e4f2c619ee4e"}, + {file = "cftime-1.6.4.post1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:933cb10e1af4e362e77f513e3eb92b34a688729ddbf938bbdfa5ac20a7f44ba0"}, + {file = "cftime-1.6.4.post1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cf17a1b36f62e9e73c4c9363dd811e1bbf1170f5ac26d343fb26012ccf482908"}, + {file = "cftime-1.6.4.post1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8e18021f421aa26527bad8688c1acf0c85fa72730beb6efce969c316743294f2"}, + {file = "cftime-1.6.4.post1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:5835b9d622f9304d1c23a35603a0f068739f428d902860f25e6e7e5a1b7cd8ea"}, + {file = "cftime-1.6.4.post1-cp312-cp312-win_amd64.whl", hash = "sha256:7f50bf0d1b664924aaee636eb2933746b942417d1f8b82ab6c1f6e8ba0da6885"}, + {file = "cftime-1.6.4.post1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5c89766ebf088c097832ea618c24ed5075331f0b7bf8e9c2d4144aefbf2f1850"}, + {file = "cftime-1.6.4.post1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7f27113f7ccd1ca32881fdcb9a4bec806a5f54ae621fc1c374f1171f3ed98ef2"}, + {file = "cftime-1.6.4.post1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:da367b23eea7cf4df071c88e014a1600d6c5bbf22e3393a4af409903fa397e28"}, + {file = "cftime-1.6.4.post1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6579c5c83cdf09d73aa94c7bc34925edd93c5f2c7dd28e074f568f7e376271a0"}, + {file = "cftime-1.6.4.post1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6b731c7133d17b479ca0c3c46a7a04f96197f0a4d753f4c2284c3ff0447279b4"}, + {file = "cftime-1.6.4.post1-cp313-cp313-win_amd64.whl", hash = "sha256:d2a8c223faea7f1248ab469cc0d7795dd46f2a423789038f439fee7190bae259"}, + {file = "cftime-1.6.4.post1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9df3e2d49e548c62d1939e923800b08d2ab732d3ac8d75b857edd7982c878552"}, + {file = "cftime-1.6.4.post1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:2892b7e7654142d825655f60eb66c3e1af745901890316907071d44cf9a18d8a"}, + {file = "cftime-1.6.4.post1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a4ab54e6c04e68395d454cd4001188fc4ade2fe48035589ed65af80c4527ef08"}, + {file = "cftime-1.6.4.post1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:568b69fc52f406e361db62a4d7a219c6fb0ced138937144c3b3a511648dd6c50"}, + {file = "cftime-1.6.4.post1-cp38-cp38-win_amd64.whl", hash = "sha256:640911d2629f4a8f81f6bc0163a983b6b94f86d1007449b8cbfd926136cda253"}, + {file = "cftime-1.6.4.post1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:44e9f8052600803b55f8cb6bcac2be49405c21efa900ec77a9fb7f692db2f7a6"}, + {file = "cftime-1.6.4.post1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a90b6ef4a3fc65322c212a2c99cec75d1886f1ebaf0ff6189f7b327566762222"}, + {file = "cftime-1.6.4.post1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:652700130dbcca3ae36dbb5b61ff360e62aa09fabcabc42ec521091a14389901"}, + {file = "cftime-1.6.4.post1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:24a7fb6cc541a027dab37fdeb695f8a2b21cd7d200be606f81b5abc38f2391e2"}, + {file = "cftime-1.6.4.post1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:fc2c0abe2dbd147e1b1e6d0f3de19a5ea8b04956acc204830fd8418066090989"}, + {file = "cftime-1.6.4.post1-cp39-cp39-win_amd64.whl", hash = "sha256:0ee2f5af8643aa1b47b7e388763a1a6e0dc05558cd2902cffb9cbcf954397648"}, + {file = "cftime-1.6.4.post1.tar.gz", hash = "sha256:50ac76cc9f10ab7bd46e44a71c51a6927051b499b4407df4f29ab13d741b942f"}, +] + +[package.dependencies] +numpy = {version = ">1.13.3", markers = "python_version < \"3.12.0.rc1\""} + [[package]] name = "charset-normalizer" version = "3.4.1" @@ -1570,6 +1618,53 @@ files = [ {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, ] +[[package]] +name = "netcdf4" +version = "1.7.2" +description = "Provides an object-oriented python interface to the netCDF version 4 library" +optional = false +python-versions = ">=3.8" +files = [ + {file = "netCDF4-1.7.2-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:5e9b485e3bd9294d25ff7dc9addefce42b3d23c1ee7e3627605277d159819392"}, + {file = "netCDF4-1.7.2-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:118b476fd00d7e3ab9aa7771186d547da645ae3b49c0c7bdab866793ebf22f07"}, + {file = "netCDF4-1.7.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:abe5b1837ff209185ecfe50bd71884c866b3ee69691051833e410e57f177e059"}, + {file = "netCDF4-1.7.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:28021c7e886e5bccf9a8ce504c032d1d7f98d86f67495fb7cf2c9564eba04510"}, + {file = "netCDF4-1.7.2-cp310-cp310-win_amd64.whl", hash = "sha256:7460b638e41c8ce4179d082a81cb6456f0ce083d4d959f4d9e87a95cd86f64cb"}, + {file = "netCDF4-1.7.2-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:09d61c2ddb6011afb51e77ea0f25cd0bdc28887fb426ffbbc661d920f20c9749"}, + {file = "netCDF4-1.7.2-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:fd2a16dbddeb8fa7cf48c37bfc1967290332f2862bb82f984eec2007bb120aeb"}, + {file = "netCDF4-1.7.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f54f5d39ffbcf1726a1e6fd90cb5fa74277ecea739a5fa0f424636d71beafe24"}, + {file = "netCDF4-1.7.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:902aa50d70f49d002d896212a171d344c38f7b8ca520837c56c922ac1535c4a3"}, + {file = "netCDF4-1.7.2-cp311-cp311-win_amd64.whl", hash = "sha256:3291f9ad0c98c49a4dd16aefad1a9abd3a1b884171db6c81bdcee94671cfabe3"}, + {file = "netCDF4-1.7.2-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:e73e3baa0b74afc414e53ff5095748fdbec7fb346eda351e567c23f2f0d247f1"}, + {file = "netCDF4-1.7.2-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:a51da09258b31776f474c1d47e484fc7214914cdc59edf4cee789ba632184591"}, + {file = "netCDF4-1.7.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb95b11804fe051897d1f2044b05d82a1847bc2549631cdd2f655dde7de77a9c"}, + {file = "netCDF4-1.7.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f9d8a848373723f41ef662590b4f5e1832227501c9fd4513e8ad8da58c269977"}, + {file = "netCDF4-1.7.2-cp312-cp312-win_amd64.whl", hash = "sha256:568ea369e00b581302d77fc5fd0b8f78e520c7e08d0b5af5219ba51f3f1cd694"}, + {file = "netCDF4-1.7.2-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:205a5f1de3ddb993c7c97fb204a923a22408cc2e5facf08d75a8eb89b3e7e1a8"}, + {file = "netCDF4-1.7.2-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:96653fc75057df196010818367c63ba6d7e9af603df0a7fe43fcdad3fe0e9e56"}, + {file = "netCDF4-1.7.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:30d20e56b9ba2c48884eb89c91b63e6c0612b4927881707e34402719153ef17f"}, + {file = "netCDF4-1.7.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8d6bfd38ba0bde04d56f06c1554714a2ea9dab75811c89450dc3ec57a9d36b80"}, + {file = "netCDF4-1.7.2-cp313-cp313-win_amd64.whl", hash = "sha256:5c5fbee6134ee1246c397e1508e5297d825aa19221fdf3fa8dc9727ad824d7a5"}, + {file = "netCDF4-1.7.2-cp38-cp38-macosx_12_0_x86_64.whl", hash = "sha256:6bf402c2c7c063474576e5cf89af877d0b0cd097d9316d5bc4fcb22b62f12567"}, + {file = "netCDF4-1.7.2-cp38-cp38-macosx_14_0_arm64.whl", hash = "sha256:5bdf3b34e6fd4210e34fdc5d1a669a22c4863d96f8a20a3928366acae7b3cbbb"}, + {file = "netCDF4-1.7.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:657774404b9f78a5e4d26506ac9bfe106e4a37238282a70803cc7ce679c5a6cc"}, + {file = "netCDF4-1.7.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e896d92f01fbf365e33e2513d5a8c4cfe16ff406aae9b6034e5ba1538c8c7a8"}, + {file = "netCDF4-1.7.2-cp39-cp39-macosx_12_0_x86_64.whl", hash = "sha256:eb87c08d1700fe67c301898cf5ba3a3e1f8f2fbb417fcd0e2ac784846b60b058"}, + {file = "netCDF4-1.7.2-cp39-cp39-macosx_14_0_arm64.whl", hash = "sha256:59b403032774c723ee749d7f2135be311bad7d00d1db284bebfab58b9d5cdb92"}, + {file = "netCDF4-1.7.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:572f71459ef4b30e8554dcc4e1e6f55de515acc82a50968b48fe622244a64548"}, + {file = "netCDF4-1.7.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7f77e72281acc5f331f82271e5f7f014d46f5ca9bcaa5aafe3e46d66cee21320"}, + {file = "netCDF4-1.7.2-cp39-cp39-win_amd64.whl", hash = "sha256:d0fa7a9674fae8ae4877e813173c3ff7a6beee166b8730bdc847f517b282ed31"}, + {file = "netcdf4-1.7.2.tar.gz", hash = "sha256:a4c6375540b19989896136943abb6d44850ff6f1fa7d3f063253b1ad3f8b7fce"}, +] + +[package.dependencies] +certifi = "*" +cftime = "*" +numpy = "*" + +[package.extras] +tests = ["Cython", "packaging", "pytest"] + [[package]] name = "numpy" version = "1.24.4" diff --git a/pyproject.toml b/pyproject.toml index ec3207c2..c163d9b0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,6 +44,7 @@ httpx = "0.28.1" SQLAlchemy = "1.4.41" SQLAlchemy-Utc = "^0.14.0" uvicorn = {extras = ["standard"], version = "^0.22.0"} +netcdf4 = "^1.7.2" [tool.poetry.dev-dependencies] -- GitLab From 87823bcdf72f81b9f12c8221393940cda50c929d Mon Sep 17 00:00:00 2001 From: Nicholas Shavers <nshavers@contractor.usgs.gov> Date: Thu, 2 Jan 2025 16:20:35 -0800 Subject: [PATCH 3/8] updates for CF compliance. more metadata extracted --- geomagio/Controller.py | 2 +- geomagio/netcdf/NetCDFFactory.py | 376 +++++++++++++++++++------------ 2 files changed, 235 insertions(+), 143 deletions(-) diff --git a/geomagio/Controller.py b/geomagio/Controller.py index 6b7c336f..fae8902b 100644 --- a/geomagio/Controller.py +++ b/geomagio/Controller.py @@ -501,7 +501,7 @@ def get_input_factory(args): input_type = args.input # stream/url arguments if args.input_file is not None: - if input_type == "miniseed": + if input_type in ["netcdf", "miniseed"] : input_stream = open(args.input_file, "rb") else: input_stream = open(args.input_file, "r") diff --git a/geomagio/netcdf/NetCDFFactory.py b/geomagio/netcdf/NetCDFFactory.py index 4c877e7b..920c9d8c 100644 --- a/geomagio/netcdf/NetCDFFactory.py +++ b/geomagio/netcdf/NetCDFFactory.py @@ -2,34 +2,84 @@ import netCDF4 import numpy as np from obspy import Stream, Trace, UTCDateTime from datetime import datetime, timezone -from io import BytesIO +import tempfile +import shutil +import os from geomagio.TimeseriesFactory import TimeseriesFactory +from geomagio.api.ws.Element import ELEMENT_INDEX class NetCDFFactory(TimeseriesFactory): """Factory for reading/writing NetCDF format Geomagnetic Data using numeric epoch times.""" + # Define temperature element IDs + TEMPERATURE_ELEMENTS_ID = {"UK1", "UK2", "UK3"} + + # Define the list of optional global attributes to handle + OPTIONAL_GLOBAL_ATTRS = [ + "data_interval_type", + "data_type", + "station_name", + "station", + "network", + "location", + "declination_base", + "sensor_orientation", + "sensor_sampling_rate", + "sample_period", + "conditions_of_use", + ] + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.time_format = "numeric" # Fixed to numeric epoch times + + def _get_var_units_validmin_validmax(self, channel: str): """ - Initializes the NetCDFFactory. + Determine units, valid_min, and valid_max based on channel name. Parameters ---------- - **kwargs : dict - Additional keyword arguments for the base TimeseriesFactory. + channel : str + The channel name. + + Returns + ------- + tuple + A tuple containing (units, valid_min, valid_max). """ - super().__init__(**kwargs) - self.time_format = "numeric" # Fixed to numeric epoch times + channel = channel.upper() + if channel == "D": + units = "degree" + valid_min = -360.0 + valid_max = 360.0 + elif channel == "I": + units = "degree" + valid_min = -90.0 + valid_max = 90.0 + # elif channel in self.TEMPERATURE_ELEMENTS_ID: + # units = "degree_Celsius" + # valid_min = -273.15 + # valid_max = 79999.0 + elif channel in ["F", "S"]: + units = "nT" + valid_min = 0.0 + valid_max = 79999.0 + elif channel in ["X", "Y", "Z", "H", "E", "V", "G"]: + units = "nT" + valid_min = -79999.0 + valid_max = 79999.0 + else: + units = getattr(channel, "units", "") + valid_min = getattr(channel, "valid_min", np.nan) + valid_max = getattr(channel, "valid_max", np.nan) + return units, valid_min, valid_max def parse_string(self, data: bytes, **kwargs): """ Parse a NetCDF byte string into an ObsPy Stream. - The NetCDF file should follow a structure with global attributes for metadata - and variables for each geomagnetic component, including a 'time' variable - representing epoch times in seconds. - Parameters ---------- data : bytes @@ -38,133 +88,124 @@ class NetCDFFactory(TimeseriesFactory): Returns ------- Stream - An ObsPy Stream object containing the parsed data. Returns an empty Stream if parsing fails. + An ObsPy Stream object containing the parsed data. """ try: - # Create a NetCDF dataset from the byte data nc_dataset = netCDF4.Dataset("inmemory", mode="r", memory=data) except Exception as e: - print(f"Failed to parse NetCDF data: {e}") + print(f"Error loading NetCDF data: {e}") return Stream() - try: - # Extract global attributes - institute = getattr(nc_dataset, "institute_name", "") - observatory = getattr(nc_dataset, "observatory_name", "") - observatory_code = getattr(nc_dataset, "observatory_code", "") - latitude = getattr(nc_dataset, "sensor_latitude", 0.0) - longitude = getattr(nc_dataset, "sensor_longitude", 0.0) - elevation = getattr(nc_dataset, "sensor_elevation", 0.0) - sensor_orientation = getattr(nc_dataset, "sensor_orientation", "") - original_sample_rate = getattr(nc_dataset, "original_sample_rate", "") - data_type = getattr(nc_dataset, "data_type", "") - start_time_num = getattr( - nc_dataset, "start_time", 0.0 - ) # Seconds since epoch - sample_period = getattr(nc_dataset, "sample_period", 1.0) # In milliseconds - - # Convert numeric start time to UTCDateTime - try: - starttime = UTCDateTime(start_time_num) - except Exception: - starttime = UTCDateTime() - - # Extract time variable - if "time" not in nc_dataset.variables: - print("No 'time' variable found in NetCDF data.") - return Stream() - - time_var = nc_dataset.variables["time"] - times_epoch = time_var[:] # Numeric epoch times in seconds - - # Extract channel data - channel_names = [var for var in nc_dataset.variables if var != "time"] - channel_data = {} - for ch in channel_names: - data_array = nc_dataset.variables[ch][:] - # Ensure data is a 1D array - channel_data[ch] = data_array.flatten() - + # Map from known NetCDF attributes to the stats keys we actually use + global_attrs_map = { + "institution": "agency_name", + "conditions_of_use": "conditions_of_use", + "data_interval": "data_interval", + "data_interval_type": "data_interval_type", + "data_type": "data_type", + "station_name": "station_name", + "station": "station", + "network": "network", + "location": "location", + "declination_base": "declination_base", + "sensor_orientation": "sensor_orientation", + "sensor_sampling_rate": "sensor_sampling_rate", + "latitude": "geodetic_latitude", + "longitude": "geodetic_longitude", + "elevation": "elevation", + } + + # Extract recognized global attributes + global_attrs = {} + for nc_attr_name, stats_attr_name in global_attrs_map.items(): + val = getattr(nc_dataset, nc_attr_name, None) + if val is not None: + global_attrs[stats_attr_name] = val + + # Convert "comment" to filter_comments array if present + comment = getattr(nc_dataset, "comment", None) + if comment: + global_attrs["filter_comments"] = [comment] + + # Check for time variable + if "time" not in nc_dataset.variables: + print("No 'time' variable found in NetCDF data.") nc_dataset.close() - except Exception as e: - print(f"Error while extracting data from NetCDF: {e}") return Stream() - # Create Stream and Traces - stream = Stream() - npts = len(times_epoch) - if npts == 0: - print("No time points found in NetCDF data.") - return stream - - # Calculate delta as the average sampling interval - if npts > 1: - delta = (times_epoch[-1] - times_epoch[0]) / (npts - 1) - else: - delta = 1.0 # Default to 1 second if only one point + time_var = nc_dataset.variables["time"] + times_epoch = time_var[:] - for ch_name, values in channel_data.items(): - if len(values) != npts: - print( - f"Channel '{ch_name}' has mismatched data length. Expected {npts}, got {len(values)}. Skipping." - ) + # Collect the channels (all variables except 'time') + channels = [v for v in nc_dataset.variables if v != "time"] + channel_data = {} + for ch in channels: + channel_data[ch] = nc_dataset.variables[ch][:] + + nc_dataset.close() + + # If no time points, return empty + if len(times_epoch) == 0: + print("No time points found in the NetCDF data.") + return Stream() + + # Calculate sampling interval + delta = ( + (times_epoch[-1] - times_epoch[0]) / (len(times_epoch) - 1) + if len(times_epoch) > 1 + else 1.0 + ) + starttime = UTCDateTime(times_epoch[0]) + sampling_rate = 1.0 / delta if delta else 0 + + stream = Stream() + for channel_name, data_array in channel_data.items(): + # Length of data should matches times + if len(data_array) != len(times_epoch): + print(f"Channel '{channel_name}' has mismatched length. Skipping.") continue - data_array = np.array(values, dtype=float) - try: - trace_starttime = UTCDateTime(times_epoch[0]) - except Exception: - trace_starttime = UTCDateTime() - - stats = { - "network": "", - "station": observatory_code, - "channel": ch_name, - "starttime": trace_starttime, - "npts": npts, - "sampling_rate": 1.0 / delta if delta != 0 else 1.0, - "geodetic_longitude": longitude, - "geodetic_latitude": latitude, - "elevation": elevation, - "station_name": observatory, - "data_type": data_type, - "data_interval_type": original_sample_rate, - } - - trace = Trace(data=data_array, header=stats) - stream += trace + + # Build stats from recognized attributes + basic fields + stats = {} + stats.update(global_attrs) # put in all recognized global attrs + + # Basic per-trace stats + stats["channel"] = channel_name + stats["starttime"] = starttime + stats["sampling_rate"] = sampling_rate + # endtime, delta, npts, etc. will be handled automatically by ObsPy + + # Create Trace + trace = Trace(data=np.array(data_array, dtype=float), header=stats) + stream.append(trace) return stream def write_file_from_buffer(self, fh, timeseries: Stream, channels=None): """ - Write an ObsPy Stream to a NetCDF file. - - The NetCDF file will contain global attributes for metadata and variables - for each geomagnetic component, including a 'time' variable representing - epoch times in seconds. + Write an ObsPy Stream to a NetCDF file, including optional attributes. Parameters ---------- - fh : file-like object - File handle to write the NetCDF data. + fh : str or file-like object + File path or file handle to write the NetCDF data. timeseries : Stream ObsPy Stream object containing the data to write. channels : list, optional List of channel names to include. If None, all channels are included. """ if not timeseries or len(timeseries) == 0: - # Create an empty NetCDF structure + # Create an empty NetCDF structure with minimal CF compliance with netCDF4.Dataset(fh, "w", format="NETCDF4") as nc_dataset: - nc_dataset.description = "Empty Geomagnetic Time Series Data" + nc_dataset.title = "Geomagnetic Time Series Data" + nc_dataset.Conventions = "CF-1.4" return timeseries.merge() # Filter channels if specified if channels: - # Ensure channel names are uppercase for consistency channels = [ch.upper() for ch in channels] - # Manually filter the Stream to include only desired channels timeseries = Stream( [tr for tr in timeseries if tr.stats.channel.upper() in channels] ) @@ -172,25 +213,19 @@ class NetCDFFactory(TimeseriesFactory): if len(timeseries) == 0: print("No matching channels found after filtering.") with netCDF4.Dataset(fh, "w", format="NETCDF4") as nc_dataset: - nc_dataset.description = ( - "Empty Geomagnetic Time Series Data after channel filtering" - ) + nc_dataset.title = "Geomagnetic Time Series Data" + nc_dataset.Conventions = "CF-1.4" return timeseries.sort(keys=["starttime"]) tr = timeseries[0] stats = tr.stats - station = stats.station or "" + # Extract necessary attributes lat = float(getattr(stats, "geodetic_latitude", 0.0)) lon = float(getattr(stats, "geodetic_longitude", 0.0)) alt = float(getattr(stats, "elevation", 0.0)) - data_type = getattr(stats, "data_type", "unknown") - data_interval_type = getattr(stats, "data_interval_type", "unknown") - station_name = getattr(stats, "station_name", station) - sensor_orientation = getattr(stats, "sensor_orientation", "") - original_sample_rate = getattr(stats, "original_sample_rate", "") - sample_period = getattr(stats, "sampling_period", 1.0) # In milliseconds + reported_orientation = "".join(channels) if channels else "" npts = tr.stats.npts delta = tr.stats.delta @@ -199,59 +234,116 @@ class NetCDFFactory(TimeseriesFactory): # Generate time values as epoch seconds times_epoch = np.array([starttime.timestamp + i * delta for i in range(npts)]) - # Create NetCDF dataset + # Create NetCDF dataset with CF conventions with netCDF4.Dataset(fh, "w", format="NETCDF4") as nc_dataset: + # Define global attributes with CF compliance + nc_dataset.Conventions = "CF-1.4" + nc_dataset.title = "Geomagnetic Time Series Data" + nc_dataset.institution = getattr(stats, "agency_name", "") + # nc_dataset.source = getattr(stats, "source", "") + history_entry = f"Created on {datetime.now(timezone.utc).isoformat()}" + existing_history = getattr(nc_dataset, "history", "") + if existing_history: + nc_dataset.history = existing_history + "\n" + history_entry + else: + nc_dataset.history = history_entry + nc_dataset.references = getattr(stats, "references", "") + nc_dataset.comment = getattr(stats, "filter_comments", "") + + # Add optional global attributes if they exist in stats and are not empty + for attr in self.OPTIONAL_GLOBAL_ATTRS: + val = getattr(stats, attr, None) + if val not in [None, ""]: + setattr(nc_dataset, attr, val) + + if reported_orientation: + setattr(nc_dataset, "reported_orientation", reported_orientation) + # Define the time dimension nc_dataset.createDimension("time", npts) # Create the time variable time_var = nc_dataset.createVariable("time", "f8", ("time",)) - time_var.units = "seconds since 1970-01-01T00:00:00Z" - time_var.calendar = "standard" + time_var.units = "seconds since 1970-01-01T00:00:00Z" # CF-compliant units + time_var.calendar = getattr(stats, "calendar", "standard") + time_var.standard_name = "time" + time_var.long_name = "Time" time_var[:] = times_epoch - # Create channel variables + # Assign valid range if available + if "valid_min" in stats and "valid_max" in stats: + time_var.valid_min = stats["valid_min"] + time_var.valid_max = stats["valid_max"] + + # Create channel variables with CF attributes for trace in timeseries: ch_name = trace.stats.channel.upper() + if len(ch_name) > 63: + raise ValueError( + f"Channel name '{ch_name}' exceeds NetCDF variable name length limit." + ) + # Ensure channel name is a valid NetCDF variable name ch_name_nc = ch_name.replace("-", "_").replace(" ", "_") - var = nc_dataset.createVariable(ch_name_nc, "f8", ("time",)) - var.units = ( - "nT" # non standard elements such as temperature not considered. + + # Determine units, valid_min, and valid_max based on channel + units, valid_min, valid_max = self._get_var_units_validmin_validmax( + ch_name + ) + + var = nc_dataset.createVariable( + ch_name_nc, + "f8", + ("time",), + fill_value=getattr(trace.stats, "_FillValue", 99_999), + ) + + # Set CF attributes + var.units = units + var.long_name = getattr( + trace.stats, "long_name", ELEMENT_INDEX.get(ch_name).name + ) + var.standard_name = getattr( + trace.stats, "standard_name", f"geomagnetic_field_{ch_name.lower()}" ) - var.long_name = f"Geomagnetic component {ch_name}" + + # Set data validity attributes + var.valid_min = valid_min + var.valid_max = valid_max + + # Assign data var[:] = trace.data - # Set global attributes - nc_dataset.institute_name = getattr(stats, "agency_name", "") - nc_dataset.observatory_name = station_name - nc_dataset.observatory_code = station - nc_dataset.sensor_latitude = lat - nc_dataset.sensor_longitude = lon - nc_dataset.sensor_elevation = alt - nc_dataset.sensor_orientation = sensor_orientation - nc_dataset.original_sample_rate = original_sample_rate - nc_dataset.data_type = data_type - nc_dataset.start_time = starttime.timestamp # Numeric epoch time - nc_dataset.sample_period = sample_period # In milliseconds - - # Optional global attributes - nc_dataset.history = f"Created on {datetime.now(timezone.utc).isoformat()}" + # Define geospatial coordinates + nc_dataset.latitude = lat + # nc_dataset.lat_units = "degrees_north" + nc_dataset.longitude = lon + # nc_dataset.geospatial_lon_units = "degrees_east" + nc_dataset.elevation = alt + # nc_dataset.elevation_units = "meters" def write_file(self, fh, timeseries: Stream, channels=None): - import tempfile - import shutil - import os + """ + Write an ObsPy Stream to a NetCDF file. + Parameters + ---------- + fh : file-like object + File handle to write the NetCDF data. + timeseries : Stream + ObsPy Stream object containing the data to write. + channels : list, optional + List of channel names to include. If None, all channels are included. + """ # Create a temporary file with tempfile.NamedTemporaryFile(delete=False) as tmp: tmp_filepath = tmp.name try: - # Write to the temporary file using the existing write_file method + # Write to the temporary file self.write_file_from_buffer(tmp_filepath, timeseries, channels) - # Read the temporary file content + # Read the temporary file content into destination file with open(tmp_filepath, "rb") as tmp: shutil.copyfileobj(tmp, fh) finally: -- GitLab From ff57e71fd7bfcc3ef9b7dc580d1f5ad97e9d589e Mon Sep 17 00:00:00 2001 From: Nicholas Shavers <nshavers@contractor.usgs.gov> Date: Mon, 6 Jan 2025 09:24:23 -0800 Subject: [PATCH 4/8] cf convention updated from 1.4 to 1.6 --- geomagio/netcdf/NetCDFFactory.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/geomagio/netcdf/NetCDFFactory.py b/geomagio/netcdf/NetCDFFactory.py index 920c9d8c..590e1ead 100644 --- a/geomagio/netcdf/NetCDFFactory.py +++ b/geomagio/netcdf/NetCDFFactory.py @@ -198,7 +198,7 @@ class NetCDFFactory(TimeseriesFactory): # Create an empty NetCDF structure with minimal CF compliance with netCDF4.Dataset(fh, "w", format="NETCDF4") as nc_dataset: nc_dataset.title = "Geomagnetic Time Series Data" - nc_dataset.Conventions = "CF-1.4" + nc_dataset.Conventions = "CF-1.6" return timeseries.merge() @@ -214,7 +214,7 @@ class NetCDFFactory(TimeseriesFactory): print("No matching channels found after filtering.") with netCDF4.Dataset(fh, "w", format="NETCDF4") as nc_dataset: nc_dataset.title = "Geomagnetic Time Series Data" - nc_dataset.Conventions = "CF-1.4" + nc_dataset.Conventions = "CF-1.6" return timeseries.sort(keys=["starttime"]) @@ -237,7 +237,7 @@ class NetCDFFactory(TimeseriesFactory): # Create NetCDF dataset with CF conventions with netCDF4.Dataset(fh, "w", format="NETCDF4") as nc_dataset: # Define global attributes with CF compliance - nc_dataset.Conventions = "CF-1.4" + nc_dataset.Conventions = "CF-1.6" nc_dataset.title = "Geomagnetic Time Series Data" nc_dataset.institution = getattr(stats, "agency_name", "") # nc_dataset.source = getattr(stats, "source", "") -- GitLab From 8b9a79fc3ea7cf84595e95d68512779c5da2c425 Mon Sep 17 00:00:00 2001 From: Nicholas Shavers <nshavers@contractor.usgs.gov> Date: Mon, 6 Jan 2025 10:52:26 -0800 Subject: [PATCH 5/8] arcmin to degrees per cf convention. other minor updates per cf convention. --- geomagio/ChannelConverter.py | 38 ++++++++++++++++++++++++++++++-- geomagio/netcdf/NetCDFFactory.py | 32 ++++++++++++++++++++++----- 2 files changed, 63 insertions(+), 7 deletions(-) diff --git a/geomagio/ChannelConverter.py b/geomagio/ChannelConverter.py index 0ca91bfd..3fc705ff 100644 --- a/geomagio/ChannelConverter.py +++ b/geomagio/ChannelConverter.py @@ -21,8 +21,8 @@ import numpy M2R = numpy.pi / 180 / 60 # Minutes to Radians R2M = 180.0 / numpy.pi * 60 # Radians to Minutes - - +M2D = 1.0 / 60.0 # Minutes to Degrees +D2M = 60.0 # Degrees to Minutes # ### # get geographic coordinates from.... # ### @@ -434,3 +434,37 @@ def get_minutes_from_radians(r): the radian value to be converted """ return numpy.multiply(r, R2M) + + +def get_degrees_from_minutes(m): + """ + Converts angular minutes to degrees. + + Parameters + ---------- + m : float or array_like + The angular minutes to be converted. + + Returns + ------- + float or array_like + The corresponding degrees. + """ + return numpy.multiply(m, M2D) + + +def get_minutes_from_degrees(d): + """ + Converts degrees to angular minutes. + + Parameters + ---------- + d : float or array_like + The degrees to be converted. + + Returns + ------- + float or array_like + The corresponding angular minutes. + """ + return numpy.multiply(d, D2M) diff --git a/geomagio/netcdf/NetCDFFactory.py b/geomagio/netcdf/NetCDFFactory.py index 590e1ead..3a92d580 100644 --- a/geomagio/netcdf/NetCDFFactory.py +++ b/geomagio/netcdf/NetCDFFactory.py @@ -6,6 +6,7 @@ import tempfile import shutil import os +from geomagio import ChannelConverter from geomagio.TimeseriesFactory import TimeseriesFactory from geomagio.api.ws.Element import ELEMENT_INDEX @@ -142,8 +143,6 @@ class NetCDFFactory(TimeseriesFactory): for ch in channels: channel_data[ch] = nc_dataset.variables[ch][:] - nc_dataset.close() - # If no time points, return empty if len(times_epoch) == 0: print("No time points found in the NetCDF data.") @@ -177,8 +176,19 @@ class NetCDFFactory(TimeseriesFactory): # Create Trace trace = Trace(data=np.array(data_array, dtype=float), header=stats) + + # Convert minutes to arcradians + if channel_name in ["D", "I"]: + if nc_dataset.variables[channel_name].units.startswith("deg"): + trace.data = ChannelConverter.get_minutes_from_degrees(trace.data) + elif nc_dataset.variables[channel_name].units.startswith("rad"): + trace.data = ChannelConverter.get_minutes_from_radians(trace.data) + elif nc_dataset.variables[channel_name].units.startswith("arc"): + pass # nothing to do. + stream.append(trace) + nc_dataset.close() return stream def write_file_from_buffer(self, fh, timeseries: Stream, channels=None): @@ -198,6 +208,7 @@ class NetCDFFactory(TimeseriesFactory): # Create an empty NetCDF structure with minimal CF compliance with netCDF4.Dataset(fh, "w", format="NETCDF4") as nc_dataset: nc_dataset.title = "Geomagnetic Time Series Data" + nc_dataset.featureType = "timeSeries" # per cf conventions nc_dataset.Conventions = "CF-1.6" return @@ -214,6 +225,7 @@ class NetCDFFactory(TimeseriesFactory): print("No matching channels found after filtering.") with netCDF4.Dataset(fh, "w", format="NETCDF4") as nc_dataset: nc_dataset.title = "Geomagnetic Time Series Data" + nc_dataset.featureType = "timeSeries" # per cf conventions nc_dataset.Conventions = "CF-1.6" return @@ -239,6 +251,7 @@ class NetCDFFactory(TimeseriesFactory): # Define global attributes with CF compliance nc_dataset.Conventions = "CF-1.6" nc_dataset.title = "Geomagnetic Time Series Data" + nc_dataset.featureType = "timeSeries" # per cf conventions nc_dataset.institution = getattr(stats, "agency_name", "") # nc_dataset.source = getattr(stats, "source", "") history_entry = f"Created on {datetime.now(timezone.utc).isoformat()}" @@ -268,6 +281,7 @@ class NetCDFFactory(TimeseriesFactory): time_var.calendar = getattr(stats, "calendar", "standard") time_var.standard_name = "time" time_var.long_name = "Time" + time_var.axis = "T" time_var[:] = times_epoch # Assign valid range if available @@ -278,6 +292,10 @@ class NetCDFFactory(TimeseriesFactory): # Create channel variables with CF attributes for trace in timeseries: ch_name = trace.stats.channel.upper() + # Convert arcminutes to radians + if "D" == ch_name: + trace.data = ChannelConverter.get_degrees_from_minutes(trace.data) + if len(ch_name) > 63: raise ValueError( f"Channel name '{ch_name}' exceeds NetCDF variable name length limit." @@ -300,9 +318,13 @@ class NetCDFFactory(TimeseriesFactory): # Set CF attributes var.units = units - var.long_name = getattr( - trace.stats, "long_name", ELEMENT_INDEX.get(ch_name).name - ) + if ELEMENT_INDEX.get(ch_name): + # remove arcminutes, other things + long_name = ELEMENT_INDEX.get(ch_name).name.replace( + "(arcminutes)", "" + ) + long_name = long_name.strip() + var.long_name = getattr(trace.stats, "long_name", long_name) var.standard_name = getattr( trace.stats, "standard_name", f"geomagnetic_field_{ch_name.lower()}" ) -- GitLab From 158e289bafa87326d165ffc2cda9f3b1afe99bfe Mon Sep 17 00:00:00 2001 From: Nicholas Shavers <nshavers@contractor.usgs.gov> Date: Wed, 15 Jan 2025 13:31:41 -0800 Subject: [PATCH 6/8] import module syntax used i.e __init__.py --- geomagio/Controller.py | 6 +++--- geomagio/netcdf/__init__.py | 8 ++++++++ 2 files changed, 11 insertions(+), 3 deletions(-) create mode 100644 geomagio/netcdf/__init__.py diff --git a/geomagio/Controller.py b/geomagio/Controller.py index fae8902b..fab26a3f 100644 --- a/geomagio/Controller.py +++ b/geomagio/Controller.py @@ -7,7 +7,6 @@ from typing import List, Optional, Tuple, Union from obspy.core import Stream, UTCDateTime -from geomagio.netcdf.NetCDFFactory import NetCDFFactory from .algorithm import Algorithm, algorithms, AlgorithmException, FilterAlgorithm from .DerivedTimeseriesFactory import DerivedTimeseriesFactory @@ -27,6 +26,7 @@ from . import temperature from . import vbf from . import xml from . import covjson +from . import netcdf class Controller(object): @@ -550,7 +550,7 @@ def get_input_factory(args): if input_type == "iaga2002": input_factory = iaga2002.IAGA2002Factory(**input_factory_args) if input_type == "netcdf": - input_factory = NetCDFFactory(**input_factory_args) + input_factory = netcdf.NetCDFFactory(**input_factory_args) elif input_type == "imfv122": input_factory = imfv122.IMFV122Factory(**input_factory_args) elif input_type == "imfv283": @@ -637,7 +637,7 @@ def get_output_factory(args): elif output_type == "iaga2002": output_factory = iaga2002.IAGA2002Factory(**output_factory_args) elif output_type == "netcdf": - output_factory = NetCDFFactory(**output_factory_args) + output_factory = netcdf.NetCDFFactory(**output_factory_args) elif output_type == "imfjson": output_factory = imfjson.IMFJSONFactory(**output_factory_args) elif output_type == "covjson": diff --git a/geomagio/netcdf/__init__.py b/geomagio/netcdf/__init__.py new file mode 100644 index 00000000..f2ccd864 --- /dev/null +++ b/geomagio/netcdf/__init__.py @@ -0,0 +1,8 @@ +"""IO Module for NetCDF Format +""" + +from __future__ import absolute_import + +from .NetCDFFactory import NetCDFFactory + +__all__ = ["NetCDFFactory"] -- GitLab From f4416c8e97d56497f41281238cb78971d04ef6bc Mon Sep 17 00:00:00 2001 From: Nicholas Shavers <nshavers@contractor.usgs.gov> Date: Wed, 15 Jan 2025 13:33:39 -0800 Subject: [PATCH 7/8] rebase + poetry lock --- poetry.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/poetry.lock b/poetry.lock index ad3771c9..3d7add0e 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.5 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. [[package]] name = "aiomysql" @@ -3047,4 +3047,4 @@ pycurl = ["pycurl"] [metadata] lock-version = "2.0" python-versions = ">=3.8,<3.12" -content-hash = "e74f5314dc78dc4ab24fd3afab056baa724eb022c689df183a91854cc9463ad8" +content-hash = "de2ec42ee0160626455b58e0b8f67e2669ad3ef5e13d58bc4500d0b8a1643cb8" -- GitLab From 38548c4cda8d351a6422fddb68d5aa1036272148 Mon Sep 17 00:00:00 2001 From: Nicholas Shavers <nshavers@contractor.usgs.gov> Date: Fri, 17 Jan 2025 10:38:02 -0800 Subject: [PATCH 8/8] lint + rebasing --- geomagio/Controller.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/geomagio/Controller.py b/geomagio/Controller.py index fab26a3f..564b54c9 100644 --- a/geomagio/Controller.py +++ b/geomagio/Controller.py @@ -501,7 +501,7 @@ def get_input_factory(args): input_type = args.input # stream/url arguments if args.input_file is not None: - if input_type in ["netcdf", "miniseed"] : + if input_type in ["netcdf", "miniseed"]: input_stream = open(args.input_file, "rb") else: input_stream = open(args.input_file, "r") -- GitLab