From 55b3ee7eccb180b68c7d226945c2f774981b6e7c Mon Sep 17 00:00:00 2001 From: pcain-usgs <pcain@usgs.gov> Date: Fri, 29 Jan 2021 10:05:01 -0700 Subject: [PATCH] Update documentation, change Transform to BaseModel --- geomagio/adjusted/Affine.py | 64 +++++++++++++++++++++++++++++----- geomagio/adjusted/Metric.py | 13 +++++-- geomagio/adjusted/Transform.py | 24 +++---------- 3 files changed, 71 insertions(+), 30 deletions(-) diff --git a/geomagio/adjusted/Affine.py b/geomagio/adjusted/Affine.py index 7c53cf5e0..1acc42171 100644 --- a/geomagio/adjusted/Affine.py +++ b/geomagio/adjusted/Affine.py @@ -43,10 +43,8 @@ def filter_iqr( True that fall within these multiples of quantile ranges. NOTE: NumPy has a percentile function, but it does not yet handle - weights. This algorithm was adapted shamelessly from the PyPI - package wquantiles (https://pypi.org/project/wquantiles/). If - NumPy should ever implement their own weighted algorithm, we - should use it instead. + weights. This algorithm was adapted from the PyPI + package wquantiles (https://pypi.org/project/wquantiles/) Inputs: series: array of observations to filter @@ -103,7 +101,7 @@ class Affine(BaseModel): endtime: end time for matrix creation acausal: when True, utilizes readings from after set endtime update_interval: window of time a matrix is representative of - transforms: list of methods for matrix calculation + transforms: list of methods for matrix calculations """ observatory: str = None @@ -130,17 +128,21 @@ class Affine(BaseModel): ------- Ms: list of AdjustedMatrix objects created from calculations """ + # default set to create one matrix between starttime and endtime update_interval = self.update_interval or (self.endtime - self.starttime) all_readings = [r for r in readings if r.valid] Ms = [] time = self.starttime epoch_start = None epoch_end = None + # search for "bad" H values epochs = [r.time for r in all_readings if r.get_absolute("H").absolute == 0] while time < self.endtime: + # update epochs for current time epoch_start, epoch_end = self.get_epochs( epoch_start=epoch_start, epoch_end=epoch_end, epochs=epochs, time=time ) + # utilize readings that occur after or before a bad reading readings = [ r for r in all_readings @@ -148,10 +150,9 @@ class Affine(BaseModel): or (epoch_end is None or r.time < epoch_end) ] M = self.calculate_matrix(time, readings) + # if readings are trimmed by bad data, mark the vakid interval M.starttime = epoch_start M.endtime = epoch_end - - # increment start_UTC time += update_interval Ms.append(M) @@ -165,6 +166,20 @@ class Affine(BaseModel): epochs: List[float], time: UTCDateTime, ) -> Tuple[float, float]: + """Updates valid start/end time for a given interval + + Attributes + ---------- + epoch_start: float value signifying start of last valid interval + epoch_end: float value signifying end of last valid interval + epochs: list of floats signifying bad data times + time: current time epoch is being evaluated at + + Outputs + ------- + epoch_start: float value signifying start of current valid interval + epoch_end: float value signifying end of current valid interval + """ for e in epochs: if e > time: if epoch_end is None or e < epoch_end: @@ -237,6 +252,11 @@ class Affine(BaseModel): ) -> AdjustedMatrix: """Calculates affine matrix for a given time + Attributes + ---------- + time: time within calculation interval + readings: list of valid readings + Outputs ------- AdjustedMatrix object containing result @@ -270,7 +290,6 @@ class Affine(BaseModel): inputs = np.dot( M, np.vstack([inputs[0], inputs[1], inputs[2], np.ones_like(inputs[0])]) )[0:3] - metrics.append(transform.metric) Ms.append(M) # compose affine transform matrices using reverse ordered matrices @@ -298,6 +317,21 @@ class Affine(BaseModel): def compute_metrics( self, absolutes: List[float], ordinates: List[float], matrix: List[float] ) -> Tuple[List[float], List[float], float, float]: + """Computes mean absolute error and standard deviation for X, Y, Z, and dF between expected and predicted values. + + Attributes + ---------- + absolutes: X, Y and Z absolutes + ordinates: H, E and Z ordinates + matrix: composed matrix + + Outputs + ------- + std: standard deviation between expected and predicted XYZ values + mae: mean absolute error between expected and predicted XYZ values + std_df: standard deviation of dF computed from expected and predicted XYZ values + mae_df: mean absolute error of dF computed from expected and predicted XYZ values + """ # expected values are absolutes predicted = matrix @ ordinates # mean absolute erros and standard deviations ignore the 4th row comprison, which is trivial @@ -320,6 +354,19 @@ class Affine(BaseModel): times: List[UTCDateTime], transform: Transform, ) -> np.array: + """ + + Attributes + ---------- + time: time within calculation interval + times: times of valid readings + transform: matrix calculation method + + Outputs + ------- + weights: array of weights to apply to absolutes/ordinates within calculations + """ + weights = transform.get_weights(time=time.timestamp, times=times) # set weights for future observations to zero if not acausal if not self.acausal: @@ -332,6 +379,7 @@ class Affine(BaseModel): weights: List[float], threshold=3, ) -> List[float]: + """Filters "bad" weights generated by unreliable readings""" good = ( filter_iqr(baselines[0], threshold=threshold, weights=weights) & filter_iqr(baselines[1], threshold=threshold, weights=weights) diff --git a/geomagio/adjusted/Metric.py b/geomagio/adjusted/Metric.py index 19ba74ada..a8bd5d60b 100644 --- a/geomagio/adjusted/Metric.py +++ b/geomagio/adjusted/Metric.py @@ -2,6 +2,15 @@ from pydantic import BaseModel class Metric(BaseModel): + """Mean absolute error and standard deviation for a given element + + Attributes + ---------- + element: Channel that metrics are representative of + mae: mean absolute error + std: standard deviation + """ + element: str - mae: float - std: float + mae: float = None + std: float = None diff --git a/geomagio/adjusted/Transform.py b/geomagio/adjusted/Transform.py index dd98f1b67..aeb33131a 100644 --- a/geomagio/adjusted/Transform.py +++ b/geomagio/adjusted/Transform.py @@ -1,10 +1,11 @@ import numpy as np from obspy import UTCDateTime +from pydantic import BaseModel import scipy.linalg as spl from typing import List, Tuple, Optional -class Transform(object): +class Transform(BaseModel): """Method for generating an affine matrix. Attributes @@ -13,13 +14,7 @@ class Transform(object): Defaults to infinite(equal weighting) """ - def __init__(self, memory: float = np.inf, metric: float = 0.0): - self.memory = memory - self.metric = metric - - def update_metric(self, sigma): - # indicates the maximum possible number of lost significant values - self.metric = np.log(abs(sigma[0] / sigma[-1])) + memory: Optional[float] = None def get_weights(self, times: UTCDateTime, time: int = None) -> List[float]: """ @@ -32,7 +27,6 @@ class Transform(object): """ # convert to array of floats - # (allows UTCDateTimes, but not datetime.datetimes) times = np.asarray(times).astype(float) if time is None: @@ -80,7 +74,7 @@ class LeastSq(Transform): Output ------ - X, Y and Z absolutes place end to end and transposed + X, Y and Z absolutes placed end to end and transposed """ return np.vstack([absolutes[0], absolutes[1], absolutes[2]]).T.ravel() @@ -181,7 +175,6 @@ class NoConstraints(LeastSq): # regression matrix M that minimizes L2 norm M_r, res, rank, sigma = spl.lstsq(ord_stacked.T, abs_stacked.T) - self.update_metric(sigma) if rank < 3: print("Poorly conditioned or singular matrix, returning NaNs") return np.nan * np.ones((4, 4)) @@ -227,7 +220,6 @@ class ZRotationShear(LeastSq): # regression matrix M that minimizes L2 norm M_r, res, rank, sigma = spl.lstsq(ord_stacked.T, abs_stacked.T) - self.update_metric(sigma) if rank < 3: print("Poorly conditioned or singular matrix, returning NaNs") return np.nan * np.ones((4, 4)) @@ -274,7 +266,6 @@ class ZRotationHscale(LeastSq): # regression matrix M that minimizes L2 norm M_r, res, rank, sigma = spl.lstsq(ord_stacked.T, abs_stacked.T) - self.update_metric(sigma) if rank < 3: print("Poorly conditioned or singular matrix, returning NaNs") return np.nan * np.ones((4, 4)) @@ -321,7 +312,6 @@ class ZRotationHscaleZbaseline(LeastSq): # regression matrix M that minimizes L2 norm M_r, res, rank, sigma = spl.lstsq(ord_stacked.T, abs_stacked.T) - self.update_metric(sigma) if rank < 3: print("Poorly conditioned or singular matrix, returning NaNs") return np.nan * np.ones((4, 4)) @@ -365,7 +355,6 @@ class RotationTranslation3D(SVD): # Singular value decomposition, then rotation matrix from L&R eigenvectors # (the determinant guarantees a rotation, and not a reflection) U, S, Vh = np.linalg.svd(H) - self.update_metric(S) if np.sum(S) < 3: print("Poorly conditioned or singular matrix, returning NaNs") return np.nan * np.ones((4, 4)) @@ -415,7 +404,6 @@ class Rescale3D(LeastSq): # regression matrix M that minimizes L2 norm M_r, res, rank, sigma = spl.lstsq(ord_stacked.T, abs_stacked.T) - self.update_metric(sigma) if rank < 3: print("Poorly conditioned or singular matrix, returning NaNs") return np.nan * np.ones((4, 4)) @@ -470,7 +458,6 @@ class TranslateOrigins(LeastSq): # regression matrix M that minimizes L2 norm M_r, res, rank, sigma = spl.lstsq(ord_stacked.T, abs_stacked.T) - self.update_metric(sigma) if rank < 3: print("Poorly conditioned or singular matrix, returning NaNs") return np.nan * np.ones((4, 4)) @@ -510,7 +497,6 @@ class ShearYZ(LeastSq): abs_stacked = self.get_weighted_values(values=abs_stacked, weights=weights) # regression matrix M that minimizes L2 norm M_r, res, rank, sigma = spl.lstsq(ord_stacked.T, abs_stacked.T) - self.update_metric(sigma) if rank < 3: print("Poorly conditioned or singular matrix, returning NaNs") return np.nan * np.ones((4, 4)) @@ -559,7 +545,6 @@ class RotationTranslationXY(SVD): # Singular value decomposition, then rotation matrix from L&R eigenvectors # (the determinant guarantees a rotation, and not a reflection) U, S, Vh = np.linalg.svd(H) - self.update_metric(S) if np.sum(S) < 2: print("Poorly conditioned or singular matrix, returning NaNs") return np.nan * np.ones((4, 4)) @@ -642,7 +627,6 @@ class QRFactorization(SVD): # regression matrix M that minimizes L2 norm M_r, res, rank, sigma = spl.lstsq(ord_stacked.T, abs_stacked.T) - self.update_metric(sigma) if rank < 2: print("Poorly conditioned or singular matrix, returning NaNs") return np.nan * np.ones((4, 4)) -- GitLab