From f9d3ddb62a2b10f25badbfbf6e84a98521d5411a Mon Sep 17 00:00:00 2001 From: "E. Joshua Rigler" <erigler@usgs.gov> Date: Fri, 12 Oct 2018 14:31:06 -0600 Subject: [PATCH] Add option to trim NaNs from SqDist input stream The SqDistAlgorithm has had an issue since first being deployed wherein its SV+SQ terms de-synchronized with the Dist term, which in turn led to frequent reinitialization in real time operations. Reinitialization requires loading and processing 90 days worth of data, which nullifies two major advantages of this recursive algorithm (i.e., small data footprint, low cpu-usage). This commit adds the ability to optionally trim NaNs off the end of the input stream prior to calling the 'additive' Holt-Winters exponential smoother. This ensures that the SV+SQ+Dist outputs are always synchronized, and that the `next_starttime` element of the state file used to store the state between calls to SqDistAlgorithm will always coincide with the beginning of the most recent gap in a real time data stream. Finally, this change does not address the situation where more than a single "end gap" within a chunk of input data exists. In such a case, the algorithm will still reinitialize because the `next_starttime` will not coincide with the beginning of this gap. If this is undesirable, the calling procedure should be set up such that the requested input chunks should be the same length as the interval since the last call to SqDistAlgorithm. This can be easily done given a recent commit to change the controller's `--realtime` option. --- geomagio/algorithm/SqDistAlgorithm.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/geomagio/algorithm/SqDistAlgorithm.py b/geomagio/algorithm/SqDistAlgorithm.py index acc66392a..e73a8c016 100644 --- a/geomagio/algorithm/SqDistAlgorithm.py +++ b/geomagio/algorithm/SqDistAlgorithm.py @@ -18,6 +18,7 @@ from .Algorithm import Algorithm from .AlgorithmException import AlgorithmException import json import numpy as np +import sys from obspy.core import Stream, UTCDateTime from scipy.optimize import fmin_l_bfgs_b @@ -28,7 +29,7 @@ class SqDistAlgorithm(Algorithm): def __init__(self, alpha=None, beta=None, gamma=None, phi=1, m=1, yhat0=None, s0=None, l0=None, b0=None, sigma0=None, zthresh=6, fc=0, hstep=0, statefile=None, mag=False, - smooth=1): + smooth=1, trim=False): Algorithm.__init__(self, inchannels=None, outchannels=None) self.alpha = alpha self.beta = beta @@ -41,6 +42,7 @@ class SqDistAlgorithm(Algorithm): self.statefile = statefile self.mag = mag self.smooth = smooth + self.trim = trim # state variables self.yhat0 = yhat0 self.s0 = s0 @@ -80,6 +82,8 @@ class SqDistAlgorithm(Algorithm): # state is up to date, only need new data return (start, end) # state not up to date, need to prime + print('WARNING: missing or incompatible state...' + + 'reinitializing.', file=sys.stderr) return (start - 3 * 30 * 24 * 60 * 60, end) def load_state(self): @@ -196,6 +200,10 @@ class SqDistAlgorithm(Algorithm): self.l0 = None self.b0 = None self.sigma0 = None + # trim trailing NaNs if self.trim is set + if self.trim: + trace.stats['npts'] = np.argwhere(~np.isnan(trace.data))[-1][0] + 1 + trace.data = trace.data[:trace.stats['npts']] # process yhat, shat, sigmahat, yhat0, s0, l0, b0, sigma0 = self.additive( yobs=trace.data, @@ -652,6 +660,11 @@ class SqDistAlgorithm(Algorithm): parser.add_argument('--sqdist-smooth', default=1, help='Local SQ smoothing parameter') + parser.add_argument('--sqdist-trim', + action='store_true', + default=False, + help='Trim gap since last observation prior ' + + 'to processing input timeseries.') def configure(self, arguments): """Configure algorithm using comand line arguments. @@ -670,4 +683,5 @@ class SqDistAlgorithm(Algorithm): self.statefile = arguments.sqdist_statefile self.zthresh = arguments.sqdist_zthresh self.smooth = arguments.sqdist_smooth + self.trim = arguments.sqdist_trim self.load_state() -- GitLab