From c361a04ca56eeb0b44d6d01ce7d4decf48aa1aea Mon Sep 17 00:00:00 2001 From: Abram Claycomb <aclaycomb@usgs.gov> Date: Mon, 13 May 2019 17:12:23 -0600 Subject: [PATCH] add one second parsing, tests for headers, simplify for readability --- geomagio/pcdcp/PCDCPFactory.py | 1 + geomagio/pcdcp/PCDCPParser.py | 26 ++++++++++----------- test/pcdcp_test/PCDCPParser_test.py | 35 ++++++++++++++++++++++++++++- 3 files changed, 48 insertions(+), 14 deletions(-) diff --git a/geomagio/pcdcp/PCDCPFactory.py b/geomagio/pcdcp/PCDCPFactory.py index fb3d9ea8c..05d3bdbd9 100644 --- a/geomagio/pcdcp/PCDCPFactory.py +++ b/geomagio/pcdcp/PCDCPFactory.py @@ -10,6 +10,7 @@ from .PCDCPWriter import PCDCPWriter # pattern for pcdcp file names PCDCP_FILE_PATTERN = '%(obs)s%(y)s%(j)s.%(i)s' +# note: seconds files end in .raw, not .sec class PCDCPFactory(TimeseriesFactory): diff --git a/geomagio/pcdcp/PCDCPParser.py b/geomagio/pcdcp/PCDCPParser.py index f82a51a1d..4d3f21f21 100644 --- a/geomagio/pcdcp/PCDCPParser.py +++ b/geomagio/pcdcp/PCDCPParser.py @@ -29,7 +29,11 @@ class PCDCPParser(object): def __init__(self): """Create a new PCDCP parser.""" # header fields + self.header_fields = ['station', 'year', 'yearday', 'date', + 'orientation', 'resolution', 'Version'] self.header = {} + # resolution (float) + self.resolution = 0.0 # array of channel names self.channels = [] # timestamps of data (datetime.datetime) @@ -64,11 +68,10 @@ class PCDCPParser(object): Adds value to ``self.header``. """ - self.header['header'] = line - self.header['station'] = line[0:3] - self.header['year'] = line[5:9] - self.header['yearday'] = line[11:14] - self.header['date'] = line[16:25] + self.header = dict(zip(self.header_fields, + line.split(None, len(self.header_fields)))) + + self.resolution = float(self.header['resolution'].split('nT')[0]) return @@ -78,13 +81,9 @@ class PCDCPParser(object): Adds time to ``self.times``. Adds channel values to ``self.data``. """ - t, d1, d2, d3, d4 = self._parsedata - - t.append(line[0:4]) - d1.append(int(line[5:13])) - d2.append(int(line[14:22])) - d3.append(int(line[23:31])) - d4.append(int(line[32:40])) + values = line.split() + for (value, column) in zip(values, self._parsedata): + column.append(value) def _post_process(self): """Post processing after data is parsed. @@ -98,7 +97,8 @@ class PCDCPParser(object): data = numpy.array(data, dtype=numpy.float64) # filter empty values data[data == NINES] = numpy.nan - data = numpy.divide(data, 100) + data[data == NINES_RAW] = numpy.nan + data = numpy.multiply(data, self.resolution) self.data[channel] = data self._parsedata = None diff --git a/test/pcdcp_test/PCDCPParser_test.py b/test/pcdcp_test/PCDCPParser_test.py index e31a4f8ba..192cd911d 100644 --- a/test/pcdcp_test/PCDCPParser_test.py +++ b/test/pcdcp_test/PCDCPParser_test.py @@ -18,8 +18,24 @@ BOU 2015 001 01-Jan-15 HEZF 0.01nT File Version 2.00 0008 2086278 -5571 4745734 5237808 """ +PCDCP_EXAMPLE_SECOND = \ +""" +BOU 2015 001 01-Jan-15 HEZF 0.001nT File Version 2.00 +00000 20861520 -57095 47457409 52377630 +00001 20861533 -57096 47457397 52377650 +00002 20861554 -57077 47457391 52377650 +00003 20861578 -57068 47457389 52377680 +00004 20861600 -57068 47457384 52377660 +00005 20861640 -57047 47457388 52377690 +00006 20861654 -57039 47457378 52377650 +00007 20861699 -57026 47457377 52377690 +00008 20861721 -56995 47457365 52377680 +00009 20861743 -56977 47457350 52377680 +00010 20861750 -56968 47457349 52377690 +""" + -def test__parse_header(): +def test_parse_header(): """pcdcp_test.PCDCPParser_test.test_parse_header() Call the _parse_header method with a header. @@ -33,3 +49,20 @@ def test__parse_header(): assert_equals(parser.header['station'], 'BOU') assert_equals(parser.header['year'], '2015') assert_equals(parser.header['yearday'], '001') + assert_equals(parser.header['resolution'], '0.01nT') + +def test_parse_header_sec(): + """pcdcp_test.PCDCPParsser_test.test_parse_header_sec() + + Call the _parse_header method with a pcdcp seconds file '.raw' + header. Verify the header name and value are split correctly. + """ + parser = PCDCPParser() + parser._parse_header('BOU 2015 001 01-Jan-15 HEZF 0.001nT' + + ' File Version 2.00') + + assert_equals(parser.header['date'], '01-Jan-15') + assert_equals(parser.header['station'], 'BOU') + assert_equals(parser.header['year'], '2015') + assert_equals(parser.header['yearday'], '001') + assert_equals(parser.header['resolution'], '0.001nT') \ No newline at end of file -- GitLab