From 55b3ee7eccb180b68c7d226945c2f774981b6e7c Mon Sep 17 00:00:00 2001
From: pcain-usgs <pcain@usgs.gov>
Date: Fri, 29 Jan 2021 10:05:01 -0700
Subject: [PATCH] Update documentation, change Transform to BaseModel

---
 geomagio/adjusted/Affine.py    | 64 +++++++++++++++++++++++++++++-----
 geomagio/adjusted/Metric.py    | 13 +++++--
 geomagio/adjusted/Transform.py | 24 +++----------
 3 files changed, 71 insertions(+), 30 deletions(-)

diff --git a/geomagio/adjusted/Affine.py b/geomagio/adjusted/Affine.py
index 7c53cf5e0..1acc42171 100644
--- a/geomagio/adjusted/Affine.py
+++ b/geomagio/adjusted/Affine.py
@@ -43,10 +43,8 @@ def filter_iqr(
     True that fall within these multiples of quantile ranges.
 
     NOTE: NumPy has a percentile function, but it does not yet handle
-          weights. This algorithm was adapted shamelessly from the PyPI
-          package wquantiles (https://pypi.org/project/wquantiles/). If
-          NumPy should ever implement their own weighted algorithm, we
-          should use it instead.
+          weights. This algorithm was adapted from the PyPI
+          package wquantiles (https://pypi.org/project/wquantiles/)
 
     Inputs:
     series: array of observations to filter
@@ -103,7 +101,7 @@ class Affine(BaseModel):
     endtime: end time for matrix creation
     acausal: when True, utilizes readings from after set endtime
     update_interval: window of time a matrix is representative of
-    transforms: list of methods for matrix calculation
+    transforms: list of methods for matrix calculations
     """
 
     observatory: str = None
@@ -130,17 +128,21 @@ class Affine(BaseModel):
         -------
         Ms: list of AdjustedMatrix objects created from calculations
         """
+        # default set to create one matrix between starttime and endtime
         update_interval = self.update_interval or (self.endtime - self.starttime)
         all_readings = [r for r in readings if r.valid]
         Ms = []
         time = self.starttime
         epoch_start = None
         epoch_end = None
+        # search for "bad" H values
         epochs = [r.time for r in all_readings if r.get_absolute("H").absolute == 0]
         while time < self.endtime:
+            # update epochs for current time
             epoch_start, epoch_end = self.get_epochs(
                 epoch_start=epoch_start, epoch_end=epoch_end, epochs=epochs, time=time
             )
+            # utilize readings that occur after or before a bad reading
             readings = [
                 r
                 for r in all_readings
@@ -148,10 +150,9 @@ class Affine(BaseModel):
                 or (epoch_end is None or r.time < epoch_end)
             ]
             M = self.calculate_matrix(time, readings)
+            # if readings are trimmed by bad data, mark the vakid interval
             M.starttime = epoch_start
             M.endtime = epoch_end
-
-            # increment start_UTC
             time += update_interval
 
             Ms.append(M)
@@ -165,6 +166,20 @@ class Affine(BaseModel):
         epochs: List[float],
         time: UTCDateTime,
     ) -> Tuple[float, float]:
+        """Updates valid start/end time for a given interval
+
+        Attributes
+        ----------
+        epoch_start: float value signifying start of last valid interval
+        epoch_end: float value signifying end of last valid interval
+        epochs: list of floats signifying bad data times
+        time: current time epoch is being evaluated at
+
+        Outputs
+        -------
+        epoch_start: float value signifying start of current valid interval
+        epoch_end: float value signifying end of current valid interval
+        """
         for e in epochs:
             if e > time:
                 if epoch_end is None or e < epoch_end:
@@ -237,6 +252,11 @@ class Affine(BaseModel):
     ) -> AdjustedMatrix:
         """Calculates affine matrix for a given time
 
+        Attributes
+        ----------
+        time: time within calculation interval
+        readings: list of valid readings
+
         Outputs
         -------
         AdjustedMatrix object containing result
@@ -270,7 +290,6 @@ class Affine(BaseModel):
             inputs = np.dot(
                 M, np.vstack([inputs[0], inputs[1], inputs[2], np.ones_like(inputs[0])])
             )[0:3]
-            metrics.append(transform.metric)
             Ms.append(M)
 
         # compose affine transform matrices using reverse ordered matrices
@@ -298,6 +317,21 @@ class Affine(BaseModel):
     def compute_metrics(
         self, absolutes: List[float], ordinates: List[float], matrix: List[float]
     ) -> Tuple[List[float], List[float], float, float]:
+        """Computes mean absolute error and standard deviation for X, Y, Z, and dF between expected and predicted values.
+
+        Attributes
+        ----------
+        absolutes: X, Y and Z absolutes
+        ordinates: H, E and Z ordinates
+        matrix: composed matrix
+
+        Outputs
+        -------
+        std: standard deviation between expected and predicted XYZ values
+        mae: mean absolute error between expected and predicted XYZ values
+        std_df: standard deviation of dF computed from expected and predicted XYZ values
+        mae_df: mean absolute error of dF computed from expected and predicted XYZ values
+        """
         # expected values are absolutes
         predicted = matrix @ ordinates
         # mean absolute erros and standard deviations ignore the 4th row comprison, which is trivial
@@ -320,6 +354,19 @@ class Affine(BaseModel):
         times: List[UTCDateTime],
         transform: Transform,
     ) -> np.array:
+        """
+
+        Attributes
+        ----------
+        time: time within calculation interval
+        times: times of valid readings
+        transform: matrix calculation method
+
+        Outputs
+        -------
+        weights: array of weights to apply to absolutes/ordinates within calculations
+        """
+
         weights = transform.get_weights(time=time.timestamp, times=times)
         # set weights for future observations to zero if not acausal
         if not self.acausal:
@@ -332,6 +379,7 @@ class Affine(BaseModel):
         weights: List[float],
         threshold=3,
     ) -> List[float]:
+        """Filters "bad" weights generated by unreliable readings"""
         good = (
             filter_iqr(baselines[0], threshold=threshold, weights=weights)
             & filter_iqr(baselines[1], threshold=threshold, weights=weights)
diff --git a/geomagio/adjusted/Metric.py b/geomagio/adjusted/Metric.py
index 19ba74ada..a8bd5d60b 100644
--- a/geomagio/adjusted/Metric.py
+++ b/geomagio/adjusted/Metric.py
@@ -2,6 +2,15 @@ from pydantic import BaseModel
 
 
 class Metric(BaseModel):
+    """Mean absolute error and standard deviation for a given element
+
+    Attributes
+    ----------
+    element: Channel that metrics are representative of
+    mae: mean absolute error
+    std: standard deviation
+    """
+
     element: str
-    mae: float
-    std: float
+    mae: float = None
+    std: float = None
diff --git a/geomagio/adjusted/Transform.py b/geomagio/adjusted/Transform.py
index dd98f1b67..aeb33131a 100644
--- a/geomagio/adjusted/Transform.py
+++ b/geomagio/adjusted/Transform.py
@@ -1,10 +1,11 @@
 import numpy as np
 from obspy import UTCDateTime
+from pydantic import BaseModel
 import scipy.linalg as spl
 from typing import List, Tuple, Optional
 
 
-class Transform(object):
+class Transform(BaseModel):
     """Method for generating an affine matrix.
 
     Attributes
@@ -13,13 +14,7 @@ class Transform(object):
     Defaults to infinite(equal weighting)
     """
 
-    def __init__(self, memory: float = np.inf, metric: float = 0.0):
-        self.memory = memory
-        self.metric = metric
-
-    def update_metric(self, sigma):
-        # indicates the maximum possible number of lost significant values
-        self.metric = np.log(abs(sigma[0] / sigma[-1]))
+    memory: Optional[float] = None
 
     def get_weights(self, times: UTCDateTime, time: int = None) -> List[float]:
         """
@@ -32,7 +27,6 @@ class Transform(object):
         """
 
         # convert to array of floats
-        # (allows UTCDateTimes, but not datetime.datetimes)
         times = np.asarray(times).astype(float)
 
         if time is None:
@@ -80,7 +74,7 @@ class LeastSq(Transform):
 
         Output
         ------
-        X, Y and Z absolutes place end to end and transposed
+        X, Y and Z absolutes placed end to end and transposed
         """
         return np.vstack([absolutes[0], absolutes[1], absolutes[2]]).T.ravel()
 
@@ -181,7 +175,6 @@ class NoConstraints(LeastSq):
 
         # regression matrix M that minimizes L2 norm
         M_r, res, rank, sigma = spl.lstsq(ord_stacked.T, abs_stacked.T)
-        self.update_metric(sigma)
         if rank < 3:
             print("Poorly conditioned or singular matrix, returning NaNs")
             return np.nan * np.ones((4, 4))
@@ -227,7 +220,6 @@ class ZRotationShear(LeastSq):
 
         # regression matrix M that minimizes L2 norm
         M_r, res, rank, sigma = spl.lstsq(ord_stacked.T, abs_stacked.T)
-        self.update_metric(sigma)
         if rank < 3:
             print("Poorly conditioned or singular matrix, returning NaNs")
             return np.nan * np.ones((4, 4))
@@ -274,7 +266,6 @@ class ZRotationHscale(LeastSq):
 
         # regression matrix M that minimizes L2 norm
         M_r, res, rank, sigma = spl.lstsq(ord_stacked.T, abs_stacked.T)
-        self.update_metric(sigma)
         if rank < 3:
             print("Poorly conditioned or singular matrix, returning NaNs")
             return np.nan * np.ones((4, 4))
@@ -321,7 +312,6 @@ class ZRotationHscaleZbaseline(LeastSq):
 
         # regression matrix M that minimizes L2 norm
         M_r, res, rank, sigma = spl.lstsq(ord_stacked.T, abs_stacked.T)
-        self.update_metric(sigma)
         if rank < 3:
             print("Poorly conditioned or singular matrix, returning NaNs")
             return np.nan * np.ones((4, 4))
@@ -365,7 +355,6 @@ class RotationTranslation3D(SVD):
         # Singular value decomposition, then rotation matrix from L&R eigenvectors
         # (the determinant guarantees a rotation, and not a reflection)
         U, S, Vh = np.linalg.svd(H)
-        self.update_metric(S)
         if np.sum(S) < 3:
             print("Poorly conditioned or singular matrix, returning NaNs")
             return np.nan * np.ones((4, 4))
@@ -415,7 +404,6 @@ class Rescale3D(LeastSq):
 
         # regression matrix M that minimizes L2 norm
         M_r, res, rank, sigma = spl.lstsq(ord_stacked.T, abs_stacked.T)
-        self.update_metric(sigma)
         if rank < 3:
             print("Poorly conditioned or singular matrix, returning NaNs")
             return np.nan * np.ones((4, 4))
@@ -470,7 +458,6 @@ class TranslateOrigins(LeastSq):
 
         # regression matrix M that minimizes L2 norm
         M_r, res, rank, sigma = spl.lstsq(ord_stacked.T, abs_stacked.T)
-        self.update_metric(sigma)
         if rank < 3:
             print("Poorly conditioned or singular matrix, returning NaNs")
             return np.nan * np.ones((4, 4))
@@ -510,7 +497,6 @@ class ShearYZ(LeastSq):
         abs_stacked = self.get_weighted_values(values=abs_stacked, weights=weights)
         # regression matrix M that minimizes L2 norm
         M_r, res, rank, sigma = spl.lstsq(ord_stacked.T, abs_stacked.T)
-        self.update_metric(sigma)
         if rank < 3:
             print("Poorly conditioned or singular matrix, returning NaNs")
             return np.nan * np.ones((4, 4))
@@ -559,7 +545,6 @@ class RotationTranslationXY(SVD):
         # Singular value decomposition, then rotation matrix from L&R eigenvectors
         # (the determinant guarantees a rotation, and not a reflection)
         U, S, Vh = np.linalg.svd(H)
-        self.update_metric(S)
         if np.sum(S) < 2:
             print("Poorly conditioned or singular matrix, returning NaNs")
             return np.nan * np.ones((4, 4))
@@ -642,7 +627,6 @@ class QRFactorization(SVD):
 
         # regression matrix M that minimizes L2 norm
         M_r, res, rank, sigma = spl.lstsq(ord_stacked.T, abs_stacked.T)
-        self.update_metric(sigma)
         if rank < 2:
             print("Poorly conditioned or singular matrix, returning NaNs")
             return np.nan * np.ones((4, 4))
-- 
GitLab