Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
G
geomag-algorithms
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
ghsc
National Geomagnetism Program
geomag-algorithms
Commits
b221ec1d
Commit
b221ec1d
authored
3 months ago
by
Shavers, Nicholas H
Browse files
Options
Downloads
Patches
Plain Diff
network encoding for maximum portability. code cleanup
parent
066e59b4
No related branches found
Branches containing commit
No related tags found
Tags containing commit
1 merge request
!368
Imagcdf factory mvp
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
geomagio/imagcdf/ImagCDFFactory.py
+17
-69
17 additions, 69 deletions
geomagio/imagcdf/ImagCDFFactory.py
with
17 additions
and
69 deletions
geomagio/imagcdf/ImagCDFFactory.py
+
17
−
69
View file @
b221ec1d
...
@@ -132,9 +132,9 @@ class ImagCDFFactory(TimeseriesFactory):
...
@@ -132,9 +132,9 @@ class ImagCDFFactory(TimeseriesFactory):
try
:
try
:
# Initialize the CDF writer
# Initialize the CDF writer
cdf_spec
=
{
cdf_spec
=
{
"
Compressed
"
:
9
,
# Enable compression (
0
-9)
"
Compressed
"
:
9
,
# Enable compression (
1
-9)
"
Majority
"
:
CDFWriter
.
ROW_MAJOR
,
# Data layout - gets set automatically
"
Majority
"
:
CDFWriter
.
ROW_MAJOR
,
"
Encoding
"
:
CDFWriter
.
HOST
_ENCODING
,
#
gets set automatically
"
Encoding
"
:
CDFWriter
.
NETWORK
_ENCODING
,
#
XDR Encoding - If a CDF must be portable between two or more different types of computers use network encoded.
"
Checksum
"
:
True
,
# Disable checksum for faster writes (optional)
"
Checksum
"
:
True
,
# Disable checksum for faster writes (optional)
"
rDim_sizes
"
:
[],
# Applicable only if using rVariables - CDF protocol recommends only using zVariables.
"
rDim_sizes
"
:
[],
# Applicable only if using rVariables - CDF protocol recommends only using zVariables.
}
}
...
@@ -259,9 +259,6 @@ class ImagCDFFactory(TimeseriesFactory):
...
@@ -259,9 +259,6 @@ class ImagCDFFactory(TimeseriesFactory):
for
urlInterval
in
urlIntervals
:
for
urlInterval
in
urlIntervals
:
interval_start
=
urlInterval
[
"
start
"
]
interval_start
=
urlInterval
[
"
start
"
]
interval_end
=
urlInterval
[
"
end
"
]
interval_end
=
urlInterval
[
"
end
"
]
# Removes last data point ex: if endtime = 02:00:00, this could return 01:59:00 as last data point.
# if interval_start != interval_end:
# interval_end = interval_end - delta
url
=
self
.
_get_url
(
url
=
self
.
_get_url
(
observatory
=
observatory
,
observatory
=
observatory
,
date
=
interval_start
,
date
=
interval_start
,
...
@@ -357,13 +354,7 @@ class ImagCDFFactory(TimeseriesFactory):
...
@@ -357,13 +354,7 @@ class ImagCDFFactory(TimeseriesFactory):
try
:
try
:
# Read CDF data and merge
# Read CDF data and merge
cdf
=
CDFReader
(
url_file
)
cdf
=
CDFReader
(
url_file
)
# file_stream = self._read_cdf(cdf, channels)
timeseries
=
self
.
_read_cdf
(
cdf
,
channels
)
timeseries
=
self
.
_read_cdf
(
cdf
,
channels
)
# Attempt to select only requested channelws (redundant as read_cdf can more efficiently filter)
# selected = Stream()
# for ch in channels:
# selected += file_stream.select(channel=ch)
# timeseries += selected
except
Exception
as
e
:
except
Exception
as
e
:
print
(
f
"
Error reading CDF file
'
{
url_file
}
'
:
{
e
}
"
,
file
=
sys
.
stderr
)
print
(
f
"
Error reading CDF file
'
{
url_file
}
'
:
{
e
}
"
,
file
=
sys
.
stderr
)
...
@@ -395,48 +386,6 @@ class ImagCDFFactory(TimeseriesFactory):
...
@@ -395,48 +386,6 @@ class ImagCDFFactory(TimeseriesFactory):
timeseries
.
sort
()
timeseries
.
sort
()
return
timeseries
return
timeseries
# Removed - cdflib takes a file path as an input more efficiently than taking in byte data.
# def parse_string(self, data: str, **kwargs):
# """
# Parse ImagCDF binary data into an ObsPy Stream.
# This method writes the provided binary data to a temporary file,
# reads the file using `cdflib`, and converts the data into an ObsPy
# Stream.
# Parameters
# ----------
# data : bytes
# Binary data containing ImagCDF content.
# Returns
# -------
# Stream
# An ObsPy Stream object with the parsed geomagnetic time series data.
# Raises
# ------
# TimeseriesFactoryException
# If an error occurs while parsing the ImagCDF data.
# """
# # Create a temporary file to store the CDF data
# with tempfile.NamedTemporaryFile(delete=False, suffix=".cdf") as tmp_file:
# tmp_file_name = tmp_file.name
# tmp_file.write(data)
# channels = kwargs.get('channels', [])
# try:
# # Read the CDF from the temporary file
# cdf = CDFReader(tmp_file_name)
# stream = self._read_cdf(cdf, channels)
# # no cdf.close() method required
# except Exception as e:
# raise TimeseriesFactoryException(f"Error parsing ImagCDF data: {e}")
# finally:
# # Clean up the temporary file
# os.remove(tmp_file_name)
# return stream
def
_create_global_attributes
(
def
_create_global_attributes
(
self
,
timeseries
:
Stream
,
channels
:
List
[
str
]
self
,
timeseries
:
Stream
,
channels
:
List
[
str
]
)
->
dict
:
)
->
dict
:
...
@@ -654,7 +603,7 @@ class ImagCDFFactory(TimeseriesFactory):
...
@@ -654,7 +603,7 @@ class ImagCDFFactory(TimeseriesFactory):
units
=
"
Celsius
"
units
=
"
Celsius
"
validmin
=
-
273.15
# absolute zero
validmin
=
-
273.15
# absolute zero
validmax
=
79_999
validmax
=
79_999
depend_0
=
"
DataTimes
"
#
can be used for nonstandard element
depend_0
=
"
DataTimes
"
#
can be used for nonstandard element
# elif channel in [REAL_TEMPERATURES]:
# elif channel in [REAL_TEMPERATURES]:
# units = "Celsius"
# units = "Celsius"
# fieldnam = f"Temperature {temperature_index} {trace.stats.location}"
# fieldnam = f"Temperature {temperature_index} {trace.stats.location}"
...
@@ -782,7 +731,7 @@ class ImagCDFFactory(TimeseriesFactory):
...
@@ -782,7 +731,7 @@ class ImagCDFFactory(TimeseriesFactory):
f
"
{
'
,
'
.
join
(
missing_global_attrs
)
}
"
f
"
{
'
,
'
.
join
(
missing_global_attrs
)
}
"
)
)
raise
TimeseriesFactoryException
(
error_message
)
raise
TimeseriesFactoryException
(
error_message
)
# Map global attributes to Stream-level metadata
# Map global attributes to Stream-level metadata
observatory
=
global_attrs
.
get
(
"
IagaCode
"
,
[
""
])[
0
]
observatory
=
global_attrs
.
get
(
"
IagaCode
"
,
[
""
])[
0
]
station_name
=
global_attrs
.
get
(
"
ObservatoryName
"
,
[
""
])[
0
]
station_name
=
global_attrs
.
get
(
"
ObservatoryName
"
,
[
""
])[
0
]
...
@@ -811,26 +760,25 @@ class ImagCDFFactory(TimeseriesFactory):
...
@@ -811,26 +760,25 @@ class ImagCDFFactory(TimeseriesFactory):
# Read data variables and associate them with time variables
# Read data variables and associate them with time variables
for
var
in
cdf
.
cdf_info
().
zVariables
:
for
var
in
cdf
.
cdf_info
().
zVariables
:
# Skip time variables
# Skip time variables
if
var
.
endswith
(
"
Times
"
):
if
var
.
endswith
(
"
Times
"
):
continue
continue
# Map the variable name back to a standard channel code
# Map the variable name back to a standard channel code by removing known prefixes
# Geomagnetic fields are named like GeomagneticFieldH, GeomagneticFieldD, etc.
# Names are like GeomagneticFieldH, GeomagneticFieldD, Temperature1, Temperature2, ...
# Temperatures are named like Temperature1, Temperature2, ...
# Extract channel name by removing known prefixes
if
var
.
startswith
(
"
GeomagneticField
"
):
if
var
.
startswith
(
"
GeomagneticField
"
):
channel
=
var
.
replace
(
"
GeomagneticField
"
,
""
)
channel
=
var
.
replace
(
"
GeomagneticField
"
,
""
)
elif
var
.
startswith
(
"
Temperature
"
):
#
elif var.startswith("Temperature"):
# Temperature variables may not map directly to a geomagnetic channel
#
# Temperature variables may not map directly to a geomagnetic channel
# but to temperature sensors. We can just use the label from LABLAXIS if needed
#
# but to temperature sensors. We can just use the label from LABLAXIS if needed
channel
=
attrs
.
get
(
"
LABLAXIS
"
,
var
)
#
channel = attrs.get("LABLAXIS", var)
else
:
else
:
# fallback if naming doesn't match expected patterns
# fallback if naming doesn't match expected patterns
channel
=
var
channel
=
var
if
channels
and
channel
not
in
channels
:
continue
if
channels
and
channel
not
in
channels
:
continue
data
=
cdf
.
varget
(
var
)
data
=
cdf
.
varget
(
var
)
attrs
=
cdf
.
varattsget
(
var
)
attrs
=
cdf
.
varattsget
(
var
)
...
@@ -852,7 +800,7 @@ class ImagCDFFactory(TimeseriesFactory):
...
@@ -852,7 +800,7 @@ class ImagCDFFactory(TimeseriesFactory):
# continue
# continue
times
=
[]
times
=
[]
if
matched_time_key
in
time_vars
:
if
matched_time_key
in
time_vars
:
times
=
time_vars
[
matched_time_key
]
times
=
time_vars
[
matched_time_key
]
# Determine delta (sample interval)
# Determine delta (sample interval)
if
len
(
times
)
>
1
:
if
len
(
times
)
>
1
:
...
@@ -883,7 +831,7 @@ class ImagCDFFactory(TimeseriesFactory):
...
@@ -883,7 +831,7 @@ class ImagCDFFactory(TimeseriesFactory):
"
VALIDMAX
"
,
"
VALIDMAX
"
,
"
DISPLAY_TYPE
"
,
"
DISPLAY_TYPE
"
,
"
LABLAXIS
"
,
"
LABLAXIS
"
,
"
DEPEND_0
"
"
DEPEND_0
"
,
]
]
# Validate presence of required variable attributes
# Validate presence of required variable attributes
missing_var_attrs
=
[]
missing_var_attrs
=
[]
...
@@ -1014,7 +962,7 @@ class ImagCDFFactory(TimeseriesFactory):
...
@@ -1014,7 +962,7 @@ class ImagCDFFactory(TimeseriesFactory):
base_path
=
self
.
urlTemplate
[
7
:]
base_path
=
self
.
urlTemplate
[
7
:]
if
not
base_path
or
base_path
==
"
{obs}_{dt}_{t}.cdf
"
:
if
not
base_path
or
base_path
==
"
{obs}_{dt}_{t}.cdf
"
:
base_path
=
os
.
getcwd
()
# Default to current working directory
base_path
=
os
.
getcwd
()
# Default to current working directory
return
os
.
path
.
join
(
base_path
,
"
etc
"
,
"
imagcdf
"
,
filename
)
return
os
.
path
.
join
(
base_path
,
"
etc
"
,
"
imagcdf
"
,
filename
)
return
os
.
path
.
join
(
self
.
urlTemplate
,
filename
)
return
os
.
path
.
join
(
self
.
urlTemplate
,
filename
)
# Unsupported URL scheme
# Unsupported URL scheme
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment