diff --git a/geomagio/residual/SpreadsheetAbsolutesFactory.py b/geomagio/residual/SpreadsheetAbsolutesFactory.py index fc22cae1f6cf914ab585210bfe48ed4b03cd239c..e9cbc860c6ee48ea0ddeffef11a9d2cfceefc36c 100644 --- a/geomagio/residual/SpreadsheetAbsolutesFactory.py +++ b/geomagio/residual/SpreadsheetAbsolutesFactory.py @@ -233,15 +233,13 @@ def parse_relative_time(base_date: str, time: str) -> UTCDateTime: return None -def get_summary_flags( +def get_summary_readings( factory: str, observatory: str, starttime: UTCDateTime, endtime: UTCDateTime, - absolute_time: UTCDateTime, ) -> List[Reading]: - """Get valid flags from SpreadsheetSummaryFactory and define "reviewed" metadata. - + """Get summary sheet readings from SpreadsheetSummaryFactory Parameters ---------- factory @@ -252,20 +250,38 @@ def get_summary_flags( search start time endtime search end time - absolute_time - time of the absolute Returns ------- - Array of valid flags for absolutes from summary sheets and metadata + returns summary readings from SpreadsheetSummaryFactory """ - readings = factory.get_readings( + summary_readings = factory.get_readings( observatory=observatory, starttime=UTCDateTime(starttime), endtime=UTCDateTime(endtime), ) + return summary_readings + + +def match_summary_flags( + summary_readings: List, + absolute_time: UTCDateTime, +) -> List: + """Match valid flags from summary readings and define "reviewed" metadata. + + Parameters + ---------- + summary_readings + list of summary spreadsheet readings + absolute_time + time of the absolute + + Returns + ------- + Array of valid flags for absolutes from summary sheets and metadata + """ try: - for reading in readings: + for reading in summary_readings: if reading.time == absolute_time: summary_flags = [v.valid for v in reading.absolutes] reviewed = True @@ -310,21 +326,30 @@ class SpreadsheetAbsolutesFactory(object): # unique and sorted filenames filenames, idxs = numpy.unique(all_files, return_index=True) dirpaths = numpy.array(all_dirs)[idxs] + + # pull readings from summary sheets before iterating + summary_readings = get_summary_readings( + factory=SpreadsheetSummaryFactory(self.base_directory), + observatory=observatory, + starttime=UTCDateTime(starttime), + endtime=UTCDateTime(endtime), + ) for dirpath, filename in zip(dirpaths, filenames): if start_filename <= filename < end_filename: readings.append( self.parse_spreadsheet( path=os.path.join(dirpath, filename), - observatory=observatory, - starttime=starttime, - endtime=endtime, include_measurements=include_measurements, + summary_readings=summary_readings, ) ) return readings def parse_spreadsheet( - self, observatory, starttime, endtime, path: str, include_measurements=True + self, + path: str, + summary_readings: List, + include_measurements=True, ) -> Reading: """Parse a residual spreadsheet file. @@ -339,7 +364,7 @@ class SpreadsheetAbsolutesFactory(object): constants_sheet, measurement_sheet, calculation_sheet, summary_sheet ) absolutes, reviewed = self._parse_absolutes( - summary_sheet, observatory, starttime, endtime, metadata["date"] + summary_sheet, metadata["date"], summary_readings=summary_readings ) metadata["reviewed:"] = reviewed measurements = ( @@ -367,23 +392,17 @@ class SpreadsheetAbsolutesFactory(object): def _parse_absolutes( self, sheet: openpyxl.worksheet, - observatory, - starttime, - endtime, base_date: str, + summary_readings: List, ) -> List[Absolute]: """Parse absolutes from a summary sheet.""" # absolute time should be the same in each spreadsheet absolute_time = parse_relative_time( base_date, "{:04d}".format(sheet["B12"].value) ) - # pull valid flags from summary spreadsheets and match with absolute_time... - # and define metadata "reviewed" based on valid flags - summary_flags, reviewed = get_summary_flags( - factory=SpreadsheetSummaryFactory(self.base_directory), - observatory=observatory, - starttime=UTCDateTime(starttime), - endtime=UTCDateTime(endtime), + # match valid flags using absolute_time and define "reviewed" metadata + summary_flags, reviewed = match_summary_flags( + summary_readings=summary_readings, absolute_time=absolute_time, ) absolutes = [ @@ -496,6 +515,13 @@ class SpreadsheetAbsolutesFactory(object): except: errors.append("Unable to read mark azimuth") year = measurement_sheet["B8"].value + + # scale value changes depending on spreadsheet version + try: + scale_value = summary_sheet["D33"].value + except: + scale_value = summary_sheet["D37"].value + return { # pad in case month starts with zero (which is trimmed) "date": f"{year}{measurement_sheet['C8'].value:04}", @@ -507,7 +533,7 @@ class SpreadsheetAbsolutesFactory(object): "observer": measurement_sheet["E8"].value, "pier_correction": calculation_sheet["I24"].value, "pier_name": summary_sheet["B5"].value, - "scale_value": summary_sheet["D33"].value, + "scale_value": scale_value, "station": measurement_sheet["A8"].value, "temperature": constants_sheet["J58"].value, "year": year,