diff --git a/Pipfile b/Pipfile index 6c3ab9a6d8f0f1814b93cf700e8c4425c82c21da..b0976d84f49fc125d4148a160608122ee5973ba2 100644 --- a/Pipfile +++ b/Pipfile @@ -15,16 +15,18 @@ numpy = "*" scipy = "*" obspy = ">1.2.0" pycurl = "*" + authlib = "*" +cryptography = "*" databases = {extras = ["postgresql", "sqlite"],version = "*"} fastapi = "*" +httpx = "==0.11.1" +openpyxl = "*" orm = "*" -pydantic = "*" +pydantic = "==1.4" sqlalchemy = "*" uvicorn = "*" typesystem = "==0.2.4" -httpx = "==0.11.1" -cryptography = "*" [pipenv] allow_prereleases = true diff --git a/Pipfile.lock b/Pipfile.lock index af1a54a92dfc669c6c04c965ba4e06bd8f10b38e..732b95e0b49c84ae03702f9fe97cabdd01db160a 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "ae682eeb1615d7cea4d2e5eee7bac37c26ae85cc6405e3190b1ca8b3d21114c6" + "sha256": "54200d2b34430d8ae6e6ff03ae3fac3aa0e70bdf65149f8b9208271915fe4c6e" }, "pipfile-spec": 6, "requires": {}, @@ -56,10 +56,10 @@ }, "certifi": { "hashes": [ - "sha256:b0e07438175de96ab74de9ab5dc40985ef8b44a41e9636a2000099dc3b670ddd", - "sha256:e68768546aa055623812ada64aec5e1f02ca20a9e7f3d3432dd8b0f35a6e7951" + "sha256:1d987a998c75633c40847cc966fcf5904906c920a7f17ef374f5aa4282abd304", + "sha256:51fcb31174be6e6664c5f69e3e1691a2d72a1a12e90f872cbdb1567eb47b6519" ], - "version": "==2020.4.5" + "version": "==2020.4.5.1" }, "cffi": { "hashes": [ @@ -158,6 +158,12 @@ ], "version": "==4.4.2" }, + "et-xmlfile": { + "hashes": [ + "sha256:614d9722d572f6246302c4491846d2c393c199cfa4edc9af593437691683335b" + ], + "version": "==1.0.1" + }, "fastapi": { "hashes": [ "sha256:33188cc5abe96fb93a9e01bd953c72194f04536eb3d7b87d880d434efd283268", @@ -240,6 +246,13 @@ ], "version": "==2.9" }, + "jdcal": { + "hashes": [ + "sha256:1abf1305fce18b4e8aa248cf8fe0c56ce2032392bc64bbd61b5dff2a19ec8bba", + "sha256:472872e096eb8df219c23f2689fc336668bdb43d194094b5cc1707e1640acfc8" + ], + "version": "==1.4.1" + }, "kiwisolver": { "hashes": [ "sha256:03662cbd3e6729f341a97dd2690b271e51a67a68322affab12a5b011344b973c", @@ -356,6 +369,13 @@ "index": "pypi", "version": "==1.2.1" }, + "openpyxl": { + "hashes": [ + "sha256:547a9fc6aafcf44abe358b89ed4438d077e9d92e4f182c87e2dc294186dc4b64" + ], + "index": "pypi", + "version": "==3.0.3" + }, "orm": { "hashes": [ "sha256:37cb4757b670c1713f4e0d65874c5afe819acbd712abb9743c97e1d4b00d511c" @@ -444,10 +464,10 @@ }, "pyparsing": { "hashes": [ - "sha256:4c830582a84fb022400b85429791bc551f1f4871c33f23e44f353119e92f969f", - "sha256:c342dccb5250c08d45fd6f8b4a559613ca603b57498511740e65cd11a2e7dcec" + "sha256:67199f0c41a9c702154efb0e7a8cc08accf830eb003b4d9fa42c4059002e2492", + "sha256:700d17888d441604b0bd51535908dcb297561b040819cccde647a92439db5a2a" ], - "version": "==2.4.6" + "version": "==3.0.0a1" }, "python-dateutil": { "hashes": [ @@ -607,11 +627,11 @@ }, "beautifulsoup4": { "hashes": [ - "sha256:05fd825eb01c290877657a56df4c6e4c311b3965bda790c613a3d6fb01a5462a", - "sha256:9fbb4d6e48ecd30bcacc5b63b94088192dcda178513b2ae3c394229f8911b887", - "sha256:e1505eeed31b0f4ce2dbb3bc8eb256c04cc2b3b72af7d551a4ab6efd5cbe5dae" + "sha256:594ca51a10d2b3443cbac41214e12dbb2a1cd57e1a7344659849e2e20ba6a8d8", + "sha256:a4bbe77fd30670455c5296242967a123ec28c37e9702a8a81bd2f20a4baf0368", + "sha256:d4e96ac9b0c3a6d3f0caae2e4124e6055c5dcafde8e2f831ff194c104f0775a0" ], - "version": "==4.8.2" + "version": "==4.9.0" }, "black": { "hashes": [ @@ -750,10 +770,10 @@ }, "pyparsing": { "hashes": [ - "sha256:4c830582a84fb022400b85429791bc551f1f4871c33f23e44f353119e92f969f", - "sha256:c342dccb5250c08d45fd6f8b4a559613ca603b57498511740e65cd11a2e7dcec" + "sha256:67199f0c41a9c702154efb0e7a8cc08accf830eb003b4d9fa42c4059002e2492", + "sha256:700d17888d441604b0bd51535908dcb297561b040819cccde647a92439db5a2a" ], - "version": "==2.4.6" + "version": "==3.0.0a1" }, "pytest": { "hashes": [ diff --git a/geomagio/api/db/metadata.py b/geomagio/api/db/metadata.py new file mode 100644 index 0000000000000000000000000000000000000000..384d970e0a7ccf5d8bb7c89e7cdd39e0cdc943cf --- /dev/null +++ b/geomagio/api/db/metadata.py @@ -0,0 +1,93 @@ +import datetime +import enum + +import orm + + +from .common import database, sqlalchemy_metadata + + +# known category values as enumeration +class MetadataCategory(str, enum.Enum): + ADJUSTED_MATRIX = "adjusted-matrix" + FLAG = "flag" + READING = "reading" + + +class Metadata(orm.Model): + """Metadata database model. + + This class is used for Data flagging and other Metadata. + + Flag example: + ``` + automatic_flag = Metadata( + created_by = 'algorithm/version', + start_time = UTCDateTime('2020-01-02T00:17:00.1Z'), + end_time = UTCDateTime('2020-01-02T00:17:00.1Z'), + network = 'NT', + station = 'BOU', + channel = 'BEU', + category = CATEGORY_FLAG, + comment = "spike detected", + priority = 1, + data_valid = False) + ``` + + Adjusted Matrix example: + ``` + adjusted_matrix = Metadata( + created_by = 'algorithm/version', + start_time = UTCDateTime('2020-01-02T00:17:00Z'), + end_time = None, + network = 'NT', + station = 'BOU', + category = CATEGORY_ADJUSTED_MATRIX, + comment = 'automatic adjusted matrix', + priority = 1, + value = { + 'parameters': {'x': 1, 'y': 2, 'z': 3} + 'matrix': [ ... ] + } + ) + ``` + """ + + __tablename__ = "metadata" + __database__ = database + __metadata__ = sqlalchemy_metadata + + id = orm.Integer(primary_key=True) + + # author + created_by = orm.Text(index=True) + created_time = orm.DateTime(default=datetime.datetime.utcnow, index=True) + # reviewer + reviewed_by = orm.Text(allow_null=True, index=True) + reviewed_time = orm.DateTime(allow_null=True, index=True) + + # time range + starttime = orm.DateTime(allow_null=True, index=True) + endtime = orm.DateTime(allow_null=True, index=True) + # what metadata applies to + # channel/location allow_null for wildcard + network = orm.String(index=True, max_length=255) + station = orm.String(index=True, max_length=255) + channel = orm.String(allow_null=True, index=True, max_length=255) + location = orm.String(allow_null=True, index=True, max_length=255) + + # category (flag, matrix, etc) + category = orm.String(index=True, max_length=255) + # higher priority overrides lower priority + priority = orm.Integer(default=1, index=True) + # whether data is valid (primarily for flags) + data_valid = orm.Boolean(default=True, index=True) + # value + metadata = orm.JSON(allow_null=True) + # whether metadata is valid (based on review) + metadata_valid = orm.Boolean(default=True, index=True) + + # general comment + comment = orm.Text(allow_null=True) + # review specific comment + review_comment = orm.Text(allow_null=True) diff --git a/geomagio/api/ws/DataApiQuery.py b/geomagio/api/ws/DataApiQuery.py index 7d89d73815ded285275c6e32ee39107a24689f6f..5d7cb3b2eabbf3867681bff082c4a6ed7d09cd86 100644 --- a/geomagio/api/ws/DataApiQuery.py +++ b/geomagio/api/ws/DataApiQuery.py @@ -5,6 +5,7 @@ from typing import Any, Dict, List, Optional, Union from obspy import UTCDateTime from pydantic import BaseModel, root_validator, validator +from ... import pydantic_utcdatetime from .Element import ELEMENTS, ELEMENT_INDEX DEFAULT_ELEMENTS = ["X", "Y", "Z", "F"] @@ -59,8 +60,8 @@ class SamplingPeriod(float, enum.Enum): class DataApiQuery(BaseModel): id: str - starttime: datetime.datetime = None - endtime: datetime.datetime = None + starttime: UTCDateTime = None + endtime: UTCDateTime = None elements: List[str] = DEFAULT_ELEMENTS sampling_period: SamplingPeriod = SamplingPeriod.MINUTE data_type: Union[DataType, str] = DataType.VARIATION @@ -100,32 +101,18 @@ class DataApiQuery(BaseModel): ) return id - @validator("starttime", pre=True, always=True) - def validate_starttime( - cls, starttime: Union[datetime.datetime, datetime.date] - ) -> datetime.datetime: + @validator("starttime", always=True) + def validate_starttime(cls, starttime: UTCDateTime) -> UTCDateTime: if not starttime: # default to start of current day now = datetime.datetime.now(tz=datetime.timezone.utc) - return datetime.datetime( - year=now.year, - month=now.month, - day=now.day, - tzinfo=datetime.timezone.utc, - ) - elif isinstance(starttime, datetime.date): - return datetime.datetime( - year=starttime.year, - month=starttime.month, - day=starttime.day, - tzinfo=datetime.timezone.utc, - ) + return UTCDateTime(year=now.year, month=now.month, day=now.day) return starttime - @validator("endtime", always=True, pre=True) + @validator("endtime", always=True) def validate_endtime( - cls, endtime: Union[datetime.datetime, datetime.date], *, values: Dict, **kwargs - ) -> datetime.datetime: + cls, endtime: UTCDateTime, *, values: Dict, **kwargs + ) -> UTCDateTime: """Default endtime is based on starttime. This method needs to be after validate_starttime. @@ -133,14 +120,7 @@ class DataApiQuery(BaseModel): if not endtime: # endtime defaults to 1 day after startime starttime = values.get("starttime") - endtime = starttime + datetime.timedelta(seconds=86400 - 0.001) - elif isinstance(endtime, datetime.date): - return datetime.datetime( - year=endtime.year, - month=endtime.month, - day=endtime.day, - tzinfo=datetime.timezone.utc, - ) + endtime = starttime + (86400 - 0.001) return endtime @root_validator @@ -157,9 +137,7 @@ class DataApiQuery(BaseModel): if starttime > endtime: raise ValueError("Starttime must be before endtime.") # check data volume - samples = int( - len(elements) * (endtime - starttime).total_seconds() / sampling_period - ) + samples = int(len(elements) * (endtime - starttime) / sampling_period) if samples > REQUEST_LIMIT: raise ValueError(f"Request exceeds limit ({samples} > {REQUEST_LIMIT})") # otherwise okay diff --git a/geomagio/api/ws/app.py b/geomagio/api/ws/app.py index 04fcebcf456f2d2ab3c57d041487d795006900e6..2ab28a600ed3a543e5d40c35fd2c5becaed26dd6 100644 --- a/geomagio/api/ws/app.py +++ b/geomagio/api/ws/app.py @@ -1,9 +1,9 @@ -import datetime from typing import Dict, Union from fastapi import FastAPI, Request, Response from fastapi.exceptions import RequestValidationError -from starlette.responses import RedirectResponse +from fastapi.responses import JSONResponse, PlainTextResponse, RedirectResponse +from obspy import UTCDateTime from . import data, elements @@ -31,37 +31,42 @@ async def redirect_to_docs(): return RedirectResponse("/ws/docs") -@app.exception_handler(ValueError) +@app.exception_handler(RequestValidationError) async def validation_exception_handler(request: Request, exc: RequestValidationError): """Value errors are user errors. """ - if "format" in request.query_params: - data_format = str(request.query_params["format"]) + data_format = ( + "format" in request.query_params + and str(request.query_params["format"]) + or "text" + ) return format_error(400, str(exc), data_format, request) @app.exception_handler(Exception) -async def validation_exception_handler(request: Request, exc: RequestValidationError): +async def server_exception_handler(request: Request, exc: Exception): """Other exceptions are server errors. """ - if "format" in request.query_params: - data_format = str(request.query_params["format"]) + data_format = ( + "format" in request.query_params + and str(request.query_params["format"]) + or "text" + ) return format_error(500, str(exc), data_format, request) def format_error( status_code: int, exception: str, format: str, request: Request ) -> Response: - """Assign error_body value based on error format.""" + """Assign error_body value based on error format. + """ if format == "json": return json_error(status_code, exception, request.url) else: - return Response( - text_error(status_code, exception, request.url), media_type="text/plain" - ) + return text_error(status_code, exception, request.url) -def json_error(code: int, error: Exception, url: str) -> Dict: +def json_error(code: int, error: Exception, url: str) -> Response: """Format json error message. Returns @@ -69,26 +74,31 @@ def json_error(code: int, error: Exception, url: str) -> Dict: error_body : str body of json error message. """ - return { - "type": "Error", - "metadata": { - "title": ERROR_CODE_MESSAGES[code], - "status": code, - "error": str(error), - "generated": datetime.datetime.utcnow(), - "url": str(url), + return JSONResponse( + content={ + "type": "Error", + "metadata": { + "title": ERROR_CODE_MESSAGES[code], + "status": code, + "error": str(error), + "generated": f"{UTCDateTime().isoformat()}Z", + "url": str(url), + "version": VERSION, + }, }, - } + status_code=code, + ) -def text_error(code: int, error: Exception, url: str) -> str: +def text_error(code: int, error: Exception, url: str) -> Response: """Format error message as plain text Returns ------- error message formatted as plain text. """ - return f"""Error {code}: {ERROR_CODE_MESSAGES[code]} + return PlainTextResponse( + content=f"""Error {code}: {ERROR_CODE_MESSAGES[code]} {error} @@ -98,8 +108,10 @@ Request: {url} Request Submitted: -{datetime.datetime.utcnow().isoformat()} +{UTCDateTime().isoformat()}Z Service Version: {VERSION} -""" +""", + status_code=code, + ) diff --git a/geomagio/api/ws/data.py b/geomagio/api/ws/data.py index 128126eac3235639aa4b370c85816a141ece29ad..16638de4a17e943aee01612a488aff16a16a505e 100644 --- a/geomagio/api/ws/data.py +++ b/geomagio/api/ws/data.py @@ -1,4 +1,3 @@ -import datetime import os from typing import Any, Dict, List, Union @@ -67,8 +66,8 @@ def get_timeseries(data_factory: TimeseriesFactory, query: DataApiQuery) -> Stre """ # get data timeseries = data_factory.get_timeseries( - starttime=UTCDateTime(query.starttime), - endtime=UTCDateTime(query.endtime), + starttime=query.starttime, + endtime=query.endtime, observatory=query.id, channels=query.elements, type=query.data_type, @@ -83,8 +82,8 @@ router = APIRouter() @router.get("/data/") def get_data( id: str, - starttime: Union[datetime.datetime, datetime.date] = Query(None), - endtime: Union[datetime.datetime, datetime.date] = Query(None), + starttime: UTCDateTime = Query(None), + endtime: UTCDateTime = Query(None), elements: List[str] = Query(DEFAULT_ELEMENTS), sampling_period: Union[SamplingPeriod, float] = Query(SamplingPeriod.MINUTE), data_type: Union[DataType, str] = Query(DataType.ADJUSTED), diff --git a/geomagio/pydantic_utcdatetime.py b/geomagio/pydantic_utcdatetime.py new file mode 100644 index 0000000000000000000000000000000000000000..d7974b9f5fd9f78eefcf1bf13230f58de22a806e --- /dev/null +++ b/geomagio/pydantic_utcdatetime.py @@ -0,0 +1,61 @@ +"""Configure pydantic to allow UTCDateTime attributes on models. +""" +from datetime import datetime +from typing import Any, Callable, Dict, List, Tuple, TypeVar, Union + +from obspy import UTCDateTime +from pydantic.errors import PydanticValueError +import pydantic.json +import pydantic.schema +import pydantic.validators + + +# placeholder type for register_custom_pydantic_type method +CustomType = TypeVar("CustomType") + + +def register_custom_pydantic_type( + custom_type: CustomType, + encoder: Callable[[CustomType], Any], + json_schema: Dict, + parsers: List[Callable[[Any], CustomType]], +): + try: + if custom_type.__custom_pydantic_type__: + # already registered + return + except: + pass + # add encoder + pydantic.json.ENCODERS_BY_TYPE[custom_type] = encoder + # add openapi mapping + pydantic.schema.field_class_to_schema += ((custom_type, json_schema),) + # add validator + pydantic.validators._VALIDATORS.append((custom_type, parsers)) + # mark as installed + custom_type.__custom_pydantic_type__ = True + + +class UTCDateTimeError(PydanticValueError): + msg_template = "invalid date-time format" + + +def format_utcdatetime(o: UTCDateTime) -> str: + return o.isoformat() + + +def parse_utcdatetime( + value: Union[datetime, float, int, str, UTCDateTime] +) -> UTCDateTime: + try: + return UTCDateTime(value) + except: + raise UTCDateTimeError() + + +register_custom_pydantic_type( + UTCDateTime, + encoder=format_utcdatetime, + json_schema={"type": "string", "format": "date-time"}, + parsers=[parse_utcdatetime], +) diff --git a/geomagio/residual/Absolute.py b/geomagio/residual/Absolute.py new file mode 100644 index 0000000000000000000000000000000000000000..d927757af2167ffc771d6bf7252580ff7a49dae3 --- /dev/null +++ b/geomagio/residual/Absolute.py @@ -0,0 +1,41 @@ +from typing import Optional + +from obspy import UTCDateTime +from pydantic import BaseModel + +from .. import pydantic_utcdatetime + + +class Absolute(BaseModel): + """Computed absolute and baseline measurement. + + Attributes + ---------- + element: the absolute and baseline component. + absolute: absolute measurement. + nT or ?radians? + baseline: baseline measurement. + nT or ?radians? + starttime: time of first measurement used. + endtime: time of last measurement used. + shift: used to correct polarity. + valid: whether values are considered valid. + """ + + element: str + absolute: Optional[float] = None + baseline: Optional[float] = None + starttime: Optional[UTCDateTime] = None + endtime: Optional[UTCDateTime] = None + shift: float = 0 + valid: bool = True + + def is_valid(self) -> bool: + return ( + self.valid + and self.absolute is not None + and self.baseline is not None + and self.element is not None + and self.endtime is not None + and self.starttime is not None + ) diff --git a/geomagio/residual/Angle.py b/geomagio/residual/Angle.py new file mode 100644 index 0000000000000000000000000000000000000000..06ce1c89354bd7e4726dcc9677a118ec084c3c68 --- /dev/null +++ b/geomagio/residual/Angle.py @@ -0,0 +1,13 @@ +from typing import List + + +def from_dms(degrees: float = 0, minutes: float = 0, seconds: float = 0) -> float: + """Convert degrees, minutes, seconds to decimal degrees""" + return degrees + (minutes / 60.0) + (seconds / 3600.0) + + +def to_dms(degrees: float) -> List[float]: + """Convert decimal degrees to degrees, minutes, seconds""" + minutes = (degrees - int(degrees)) * 60 + seconds = (minutes - int(minutes)) * 60 + return [int(degrees), int(minutes), seconds] diff --git a/geomagio/residual/CalFileFactory.py b/geomagio/residual/CalFileFactory.py new file mode 100644 index 0000000000000000000000000000000000000000..baf8f87243be6ef9b4513d535a39cae26b3263d7 --- /dev/null +++ b/geomagio/residual/CalFileFactory.py @@ -0,0 +1,90 @@ +"""Factory for CalFiles, used by MagProc. +""" +from __future__ import print_function + +from typing import List +import itertools +from io import StringIO + +from .Absolute import Absolute +from .Reading import Reading + + +class CalFileFactory(object): + def format_readings(self, readings: List[Reading]) -> str: + absolutes = [] + # list of all absolutes + for r in readings: + absolutes.extend(r.absolutes) + return self._format_absolutes(absolutes) + + def _format_absolutes(self, absolutes: List[Absolute]) -> str: + out = StringIO() + # filter invalid + absolutes = [a for a in absolutes if a.is_valid()] + # sort by starttime + absolutes = sorted(absolutes, key=lambda a: a.starttime) + # group by date + for date, cals in itertools.groupby(absolutes, key=lambda a: a.starttime.date): + # convert group to list so it can be reused + cals = list(cals) + # within each day, order by H, then D, then Z + for element in ["H", "D", "Z"]: + element_cals = [c for c in cals if c.element == element] + if not element_cals: + # no matching values + continue + # channel header + out.write(f"--{date:%Y %m %d} ({element})\n") + for c in element_cals: + absolute, baseline = c.absolute, c.baseline + if element == "D": # convert to minutes + absolute, baseline = absolute * 60, baseline * 60 + out.write( # this is one line... + f"{c.starttime.datetime:%H%M}-{c.endtime.datetime:%H%M}" + f" c{baseline:9.1f}{absolute:9.1f}\n" + ) + # add new line to end + out.write("\n") + return out.getvalue() + + +""" +CAL format example: +- ordered by date +- within date, order by H, then D, then Z component +- component values order by start time +- D component values in minutes. + + +--2015 03 30 (H) +2140-2143 c 175.0 12531.3 +2152-2156 c 174.9 12533.3 +2205-2210 c 174.8 12533.1 +2220-2223 c 174.9 12520.7 +--2015 03 30 (D) +2133-2137 c 1128.3 1118.5 +2145-2149 c 1128.4 1116.4 +2159-2203 c 1128.3 1113.1 +2212-2216 c 1128.4 1113.5 +--2015 03 30 (Z) +2140-2143 c -52.9 55403.4 +2152-2156 c -52.8 55403.8 +2205-2210 c -52.8 55404.0 +2220-2223 c -52.8 55410.5 +--2015 07 27 (H) +2146-2151 c 173.5 12542.5 +2204-2210 c 173.8 12542.5 +2225-2229 c 173.8 12547.2 +2240-2246 c 173.6 12538.7 +--2015 07 27 (D) +2137-2142 c 1127.8 1109.2 +2154-2158 c 1128.3 1106.3 +2213-2220 c 1128.0 1106.3 +2232-2237 c 1128.3 1104.7 +--2015 07 27 (Z) +2146-2151 c -53.9 55382.7 +2204-2210 c -54.0 55382.5 +2225-2229 c -54.1 55383.7 +2240-2246 c -54.1 55389.0 +""" diff --git a/geomagio/residual/Measurement.py b/geomagio/residual/Measurement.py new file mode 100644 index 0000000000000000000000000000000000000000..03d968261b321b7a6953b689b74ab12153970d6b --- /dev/null +++ b/geomagio/residual/Measurement.py @@ -0,0 +1,24 @@ +from typing import Optional + +from obspy.core import UTCDateTime +from pydantic import BaseModel + +from .. import pydantic_utcdatetime +from .MeasurementType import MeasurementType + + +class Measurement(BaseModel): + """One angle and time measurement with optional residual. + + Attributes + ---------- + measurement_type: type of measurement. + angle: measured angle, decimal degrees. + residual: residual at time of measurement. + time: when measurement was taken. + """ + + measurement_type: MeasurementType + angle: float = 0 + residual: float = 0 + time: Optional[UTCDateTime] = None diff --git a/geomagio/residual/MeasurementType.py b/geomagio/residual/MeasurementType.py new file mode 100644 index 0000000000000000000000000000000000000000..5a00d90179d785abc26c77a9c9d654f5534efbcb --- /dev/null +++ b/geomagio/residual/MeasurementType.py @@ -0,0 +1,26 @@ +import enum + + +class MeasurementType(str, enum.Enum): + """Measurement types used during absolutes.""" + + # declination + FIRST_MARK_UP = "FirstMarkUp" + FIRST_MARK_DOWN = "FirstMarkDown" + WEST_DOWN = "WestDown" + EAST_DOWN = "EastDown" + WEST_UP = "WestUp" + EAST_UP = "EastUp" + SECOND_MARK_UP = "SecondMarkUp" + SECOND_MARK_DOWN = "SecondMarkDown" + + # meridian + # meridian is the average of declination measurements + # but recorded because calculated and used during inclination measurements. + MERIDIAN = "Meridian" + + # inclination + SOUTH_DOWN = "SouthDown" + NORTH_UP = "NorthUp" + SOUTH_UP = "SouthUp" + NORTH_DOWN = "NorthDown" diff --git a/geomagio/residual/Reading.py b/geomagio/residual/Reading.py new file mode 100644 index 0000000000000000000000000000000000000000..d324f147467ffa15f9056ce59e9ab55dfb92233d --- /dev/null +++ b/geomagio/residual/Reading.py @@ -0,0 +1,50 @@ +import collections +from typing import Dict, List, Optional + +from pydantic import BaseModel + +from .Absolute import Absolute +from .Measurement import Measurement +from .MeasurementType import MeasurementType + + +class Reading(BaseModel): + """A collection of absolute measurements. + + Attributes + ---------- + absolutes: absolutes computed from measurements. + azimuth: azimuth angle to mark used for measurements, decimal degrees. + hemisphere: 1 for northern hemisphere, -1 for southern hemisphere + measurements: raw measurements used to compute absolutes. + metadata: metadata used during absolute calculations. + pier_correction: pier correction value, nT. + """ + + absolutes: Optional[List[Absolute]] = None + azimuth: float = 0 + hemisphere: float = 1 # maybe hemisphere should be calculated from latitude + measurements: Optional[List[Measurement]] = [] + metadata: Optional[Dict] = [] + pier_correction: float = 0 + + def absolutes_index(self) -> Dict[str, Absolute]: + """Generate index of absolutes keyed by element. + """ + return {a.element: a for a in self.absolutes} + + def calculate_absolutes(self) -> List[Absolute]: + """Use measurements and metadata to (re)calculate absolutes. + """ + raise NotImplementedError("TODO: implement this") + + def measurement_index(self) -> Dict[MeasurementType, List[Measurement]]: + """Generate index of measurements keyed by MeasurementType. + + Any missing MeasurementType returns an empty list. + There may be multiple measurements of each MeasurementType. + """ + index = collections.defaultdict(list) + for m in self.measurements: + index[m.measurement_type].append(m) + return index diff --git a/geomagio/residual/SpreadsheetAbsolutesFactory.py b/geomagio/residual/SpreadsheetAbsolutesFactory.py new file mode 100644 index 0000000000000000000000000000000000000000..fbbccd92017a9a5e820d48fdb413e7b8c7de79a4 --- /dev/null +++ b/geomagio/residual/SpreadsheetAbsolutesFactory.py @@ -0,0 +1,219 @@ +import os +from typing import Dict, IO, List, Mapping, Optional, Union + +from obspy.core import UTCDateTime +import openpyxl + +from .Absolute import Absolute +from .Measurement import Measurement +from .MeasurementType import MeasurementType as mt +from .Reading import Reading +from . import Angle + + +SPREADSHEET_MEASUREMENTS = [ + # first mark + {"type": mt.FIRST_MARK_UP, "angle": "A13"}, + {"type": mt.FIRST_MARK_UP, "angle": "B13"}, + {"type": mt.FIRST_MARK_DOWN, "angle": "C13"}, + {"type": mt.FIRST_MARK_DOWN, "angle": "D13"}, + # declination + {"type": mt.WEST_DOWN, "angle": "C19", "residual": "E19", "time": "B19"}, + {"type": mt.WEST_DOWN, "angle": "C20", "residual": "E20", "time": "B20"}, + {"type": mt.EAST_DOWN, "angle": "C21", "residual": "E21", "time": "B21"}, + {"type": mt.EAST_DOWN, "angle": "C22", "residual": "E22", "time": "B22"}, + {"type": mt.WEST_UP, "angle": "C23", "residual": "E23", "time": "B23"}, + {"type": mt.WEST_UP, "angle": "C24", "residual": "E24", "time": "B24"}, + {"type": mt.EAST_UP, "angle": "C25", "residual": "E25", "time": "B25"}, + {"type": mt.EAST_UP, "angle": "C26", "residual": "E26", "time": "B26"}, + # second mark + {"type": mt.SECOND_MARK_UP, "angle": "A31"}, + {"type": mt.SECOND_MARK_UP, "angle": "B31"}, + {"type": mt.SECOND_MARK_DOWN, "angle": "C31"}, + {"type": mt.SECOND_MARK_DOWN, "angle": "D31"}, + # meridian + {"type": mt.MERIDIAN, "angle": "C37"}, + # inclination + {"type": mt.SOUTH_DOWN, "angle": "D37", "residual": "E37", "time": "B37"}, + {"type": mt.SOUTH_DOWN, "angle": "D38", "residual": "E38", "time": "B38"}, + {"type": mt.NORTH_UP, "angle": "D39", "residual": "E39", "time": "B39"}, + {"type": mt.NORTH_UP, "angle": "D40", "residual": "E40", "time": "B40"}, + {"type": mt.SOUTH_UP, "angle": "D41", "residual": "E41", "time": "B41"}, + {"type": mt.SOUTH_UP, "angle": "D42", "residual": "E42", "time": "B42"}, + {"type": mt.NORTH_DOWN, "angle": "D43", "residual": "E43", "time": "B43"}, + {"type": mt.NORTH_DOWN, "angle": "D44", "residual": "E44", "time": "B44"}, + {"type": mt.NORTH_DOWN, "angle": "D45", "residual": "E45", "time": "B45"}, +] + + +def parse_relative_time(base_date: str, time: str) -> UTCDateTime: + """Parse a relative date. + + Arguments + --------- + base_date: date when time occurs (YYYYMMDD) + time: time on base_date (HHMMSS) + left padded with zeros to 6 characters + """ + try: + return UTCDateTime(f"{base_date}T{time:06}") + except Exception as e: + print(f"error parsing relative date '{base_date}T{time:06}': {e}") + return None + + +class SpreadsheetAbsolutesFactory(object): + """Read absolutes from residual spreadsheets. + + Attributes + ---------- + base_directory: directory where spreadsheets exist. + Assumed structure is base/OBS/YEAR/OBS/*.xlsm + Where each xlsm file is named OBS-YEARJULHHMM.xlsm + """ + + def __init__(self, base_directory="/Volumes/geomag/pub/observatories"): + self.base_directory = base_directory + + def get_readings( + self, + observatory: str, + starttime: UTCDateTime, + endtime: UTCDateTime, + include_measurements: bool = True, + ) -> List[Reading]: + """Read spreadsheet files between starttime/endtime. + """ + readings = [] + start_filename = f"{observatory}-{starttime.datetime:%Y%j%H%M}.xlsm" + end_filename = f"{observatory}-{endtime.datetime:%Y%j%H%M}.xlsm" + for year in range(starttime.year, endtime.year + 1): + # start in observatory year directory to scan fewer files + observatory_directory = os.path.join( + self.base_directory, observatory, f"{year}", observatory + ) + for (dirpath, dirnames, filenames) in os.walk(observatory_directory): + for filename in filenames: + if start_filename <= filename < end_filename: + readings.append( + self.parse_spreadsheet(os.path.join(dirpath, filename)) + ) + return readings + + def parse_spreadsheet(self, path: str, include_measurements=True) -> Reading: + """Parse a residual spreadsheet file. + + Be sure to check Reading metadata for errors. + """ + workbook = openpyxl.load_workbook(path, data_only=True) + constants_sheet = workbook["constants"] + measurement_sheet = workbook["measurement"] + summary_sheet = workbook["Summary"] + metadata = self._parse_metadata( + constants_sheet, measurement_sheet, summary_sheet + ) + absolutes = self._parse_absolutes(summary_sheet, metadata["date"]) + measurements = ( + include_measurements + and self._parse_measurements(measurement_sheet, metadata["date"]) + or None + ) + return Reading( + absolutes=absolutes, + azimuth=metadata["mark_azimuth"], + hemisphere=metadata["hemisphere"], + measurements=measurements, + metadata=metadata, + pier_correction=metadata["pier_correction"], + ) + + def _parse_absolutes( + self, sheet: openpyxl.worksheet, base_date: str + ) -> List[Absolute]: + """Parse absolutes from a summary sheet. + """ + absolutes = [ + Absolute( + element="D", + absolute=Angle.from_dms( + degrees=sheet["C12"].value, minutes=sheet["D12"].value + ), + baseline=Angle.from_dms(minutes=sheet["F12"].value), + endtime=parse_relative_time(base_date, sheet["B12"].value), + starttime=parse_relative_time(base_date, sheet["B12"].value), + ), + Absolute( + element="H", + absolute=sheet["C17"].value, + baseline=sheet["F17"].value, + endtime=parse_relative_time(base_date, sheet["B17"].value), + starttime=parse_relative_time(base_date, sheet["B17"].value), + ), + Absolute( + element="Z", + absolute=sheet["C22"].value, + baseline=sheet["F22"].value, + endtime=parse_relative_time(base_date, sheet["B22"].value), + starttime=parse_relative_time(base_date, sheet["B22"].value), + ), + ] + return absolutes + + def _parse_measurements( + self, sheet: openpyxl.worksheet, base_date: str + ) -> List[Measurement]: + """Parse measurements from a measurement sheet. + """ + measurements = [] + for m in SPREADSHEET_MEASUREMENTS: + measurement_type = m["type"] + angle = "angle" in m and sheet[m["angle"]].value or None + residual = "residual" in m and sheet[m["residual"]].value or None + time = ( + "time" in m + and parse_relative_time(base_date, sheet[m["time"]].value) + or None + ) + measurements.append( + Measurement( + measurement_type=measurement_type, + angle=angle, + residual=residual, + time=time, + ) + ) + return measurements + + def _parse_metadata( + self, + constants_sheet: openpyxl.worksheet, + measurement_sheet: openpyxl.worksheet, + summary_sheet: openpyxl.worksheet, + ) -> Dict: + """Parse metadata from various sheets. + """ + errors = [] + mark_azimuth = None + try: + azimuth_number = measurement_sheet["F8"].value + mark_azimuth = Angle.from_dms( + minutes=constants_sheet[f"F{azimuth_number + 5}"].value + ) + except: + errors.append("Unable to read mark azimuth") + year = measurement_sheet["B8"].value + return { + # pad in case month starts with zero (which is trimmed) + "date": f"{year}{measurement_sheet['C8'].value:04}", + "di_scale": measurement_sheet["K8"].value, + "errors": errors, + "hemisphere": measurement_sheet["J8"].value, + "instrument": f"{summary_sheet['B4'].value}", + "mark_azimuth": mark_azimuth, + "observer": measurement_sheet["E8"].value, + "pier_correction": constants_sheet["H6"].value, + "pier_name": summary_sheet["B5"].value, + "station": measurement_sheet["A8"].value, + "temperature": constants_sheet["J58"].value, + "year": year, + } diff --git a/geomagio/residual/WebAbsolutesFactory.py b/geomagio/residual/WebAbsolutesFactory.py new file mode 100644 index 0000000000000000000000000000000000000000..f3f062756abeec805911b12c4e12b50efa481027 --- /dev/null +++ b/geomagio/residual/WebAbsolutesFactory.py @@ -0,0 +1,108 @@ +import json +import urllib +from typing import Dict, IO, List, Mapping, Optional, Union + +from obspy.core import UTCDateTime + +from .Absolute import Absolute +from .Measurement import Measurement +from .MeasurementType import MeasurementType +from .Reading import Reading + + +class WebAbsolutesFactory(object): + """Read absolutes from web absolutes service. + """ + + def __init__( + self, url: str = "https://geomag.usgs.gov/baselines/observation.json.php" + ): + self.url = url + + def get_readings( + self, + observatory: str, + starttime: UTCDateTime, + endtime: UTCDateTime, + include_measurements: bool = True, + ) -> List[Reading]: + """Get readings from the Web Absolutes Service.""" + args = urllib.parse.urlencode( + { + "observatory": observatory, + "starttime": starttime.isoformat(), + "endtime": endtime.isoformat(), + "includemeasurements": include_measurements and "true" or "false", + } + ) + with urllib.request.urlopen(f"{self.url}?{args}") as data: + return self.parse_json(data) + + def parse_json(self, jsonstr: IO[str]) -> List[Reading]: + """Parse readings from the web absolutes JSON format. + """ + readings = [] + response = json.load(jsonstr) + for data in response["data"]: + metadata = self._parse_metadata(data) + readings.extend( + [self._parse_reading(metadata, r) for r in data["readings"]] + ) + return readings + + def _parse_absolute(self, element: str, data: Mapping) -> Absolute: + return Absolute( + element=element, + absolute=data["absolute"], + baseline=data["baseline"], + starttime=data["start"] and UTCDateTime(data["start"]) or None, + endtime=data["end"] and UTCDateTime(data["end"]) or None, + shift="shift" in data and data["shift"] or 0, + valid=data["valid"], + ) + + def _parse_measurement(self, data: Mapping) -> Measurement: + return Measurement( + measurement_type=MeasurementType(data["type"]), + angle=data["angle"], + residual=0, + time=data["time"] and UTCDateTime(data["time"]) or None, + ) + + def _parse_metadata(self, data: Mapping) -> Dict: + return { + "time": data["time"], + "reviewed": data["reviewed"], + "electronics": data["electronics"]["serial"], + "theodolite": data["theodolite"]["serial"], + "mark_name": data["mark"]["name"], + "mark_azimuth": data["mark"]["azimuth"], + "pier_name": data["pier"]["name"], + "pier_correction": data["pier"]["correction"], + "observer": data["observer"], + "reviewer": data["reviewer"], + } + + def _parse_reading(self, metadata: Mapping, data: Mapping) -> Reading: + """Parse absolutes and measurements from Reading json. + """ + absolutes = [ + self._parse_absolute(element, data[element]) + for element in ["D", "H", "Z"] + if element in data + ] + measurements = ( + [self._parse_measurement(m) for m in data["measurements"]] + if "measurements" in data + else [] + ) + return Reading( + absolutes=absolutes, + azimuth=("mark_azimuth" in metadata and metadata["mark_azimuth"] or 0), + hemisphere=1, + measurements=measurements, + metadata=metadata, + pier_correction=( + "pier_correction" in metadata and metadata["pier_correction"] or 0 + ), + ) diff --git a/geomagio/residual/__init__.py b/geomagio/residual/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9ec21abb70a7b1603e4008d45ce4b09ddf9aa3d8 --- /dev/null +++ b/geomagio/residual/__init__.py @@ -0,0 +1,22 @@ +# residual module +from __future__ import absolute_import + +from .Absolute import Absolute +from . import Angle +from .CalFileFactory import CalFileFactory +from .Measurement import Measurement +from .MeasurementType import MeasurementType +from .Reading import Reading +from .SpreadsheetAbsolutesFactory import SpreadsheetAbsolutesFactory +from .WebAbsolutesFactory import WebAbsolutesFactory + +__all__ = [ + "Absolute", + "Angle", + "CalFileFactory", + "Measurement", + "MeasurementType", + "Reading", + "SpreadsheetAbsolutesFactory", + "WebAbsolutesFactory", +]