Newer
Older
Hal Simpson
committed
"""Controller class for geomag algorithms"""
import argparse
import sys
from obspy.core import Stream, UTCDateTime
from algorithm import algorithms
from PlotTimeseriesFactory import PlotTimeseriesFactory
import TimeseriesUtility
import edge
import iaga2002
import pcdcp
import temperature
import vbf
class Controller(object):
"""Controller for geomag algorithms.
Parameters
----------
the factory that will read in timeseries data
the factory that will output the timeseries data
Hal Simpson
committed
algorithm: Algorithm
the algorithm(s) that will procees the timeseries data
Notes
-----
Hal Simpson
committed
Run simply sends all the data in a stream to edge. If a startime/endtime is
provided, it will send the data from the stream that is within that
time span.
Update will update any data that has changed between the source, and
the target during a given timeframe. It will also attempt to
recursively backup so it can update all missing data.
def __init__(self, inputFactory, outputFactory, algorithm):
self._inputFactory = inputFactory
self._algorithm = algorithm
self._outputFactory = outputFactory
def _get_input_timeseries(self, observatory, channels, starttime, endtime):
"""Get timeseries from the input factory for requested options.
Parameters
----------
observatory : array_like
observatories to request.
channels : array_like
channels to request.
starttime : obspy.core.UTCDateTime
time of first sample to request.
endtime : obspy.core.UTCDateTime
time of last sample to request.
renames : array_like
list of channels to rename
each list item should be array_like:
the first element is the channel to rename,
the last element is the new channel name
Returns
-------
timeseries : obspy.core.Stream
"""
timeseries = Stream()
# get input interval for observatory
# do this per observatory in case an
# algorithm needs different amounts of data
input_start, input_end = self._algorithm.get_input_interval(
start=starttime,
end=endtime,
observatory=obs,
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
timeseries += self._inputFactory.get_timeseries(
observatory=obs,
starttime=input_start,
endtime=input_end,
channels=channels)
return timeseries
def _rename_channels(self, timeseries, renames):
"""Rename trace channel names.
Parameters
----------
timeseries : obspy.core.Stream
stream with channels to rename
renames : array_like
list of channels to rename
each list item should be array_like:
the first element is the channel to rename,
the last element is the new channel name
Returns
-------
timeseries : obspy.core.Stream
"""
for r in renames:
from_name, to_name = r[0], r[-1]
for t in timeseries.select(channel=from_name):
t.stats.channel = to_name
return timeseries
def _get_output_timeseries(self, observatory, channels, starttime,
endtime):
"""Get timeseries from the output factory for requested options.
Parameters
----------
observatory : array_like
observatories to request.
channels : array_like
channels to request.
starttime : obspy.core.UTCDateTime
time of first sample to request.
endtime : obspy.core.UTCDateTime
time of last sample to request.
Returns
-------
timeseries : obspy.core.Stream
"""
timeseries = Stream()
timeseries += self._outputFactory.get_timeseries(
observatory=obs,
starttime=starttime,
endtime=endtime,
channels=channels)
return timeseries
Hal Simpson
committed
def run(self, options):
Hal Simpson
committed
"""run controller
options: dictionary
The dictionary of all the command line arguments. Could in theory
contain other options passed in by the controller.
algorithm = self._algorithm
input_channels = options.inchannels or \
algorithm.get_input_channels()
output_channels = options.outchannels or \
algorithm.get_output_channels()
# input
timeseries = self._get_input_timeseries(
observatory=options.observatory,
starttime=options.starttime,
endtime=options.endtime,
channels=input_channels)
Hal Simpson
committed
if timeseries.count() == 0:
return
# process
if options.rename_input_channel:
timeseries = self._rename_channels(
timeseries=timeseries,
renames=options.rename_input_channel)
processed = algorithm.process(timeseries)
if options.rename_output_channel:
processed = self._rename_channels(
timeseries=processed,
renames=options.rename_output_channel)
# output
self._outputFactory.put_timeseries(
timeseries=processed,
starttime=options.starttime,
endtime=options.endtime,
Hal Simpson
committed
channels=output_channels)
Hal Simpson
committed
Hal Simpson
committed
def run_as_update(self, options, update_count=0):
Hal Simpson
committed
"""Updates data.
Parameters
----------
options: dictionary
The dictionary of all the command line arguments. Could in theory
contain other options passed in by the controller.
Hal Simpson
committed
Notes
-----
Finds gaps in the target data, and if there's new data in the input
source, calls run with the start/end time of a given gap to fill
in.
It checks the start of the target data, and if it's missing, and
there's new data available, it backs up the starttime/endtime,
and recursively calls itself, to check the previous period, to see
if new data is available there as well. Calls run for each new
period, oldest to newest.
Hal Simpson
committed
"""
# If an update_limit is set, make certain we don't step past it.
Hal Simpson
committed
if options.update_limit != 0:
Hal Simpson
committed
return
print >> sys.stderr, 'checking gaps', \
options.starttime, options.endtime
algorithm = self._algorithm
input_channels = options.inchannels or \
algorithm.get_input_channels()
output_channels = options.outchannels or \
algorithm.get_output_channels()
# request output to see what has already been generated
output_timeseries = self._get_output_timeseries(
observatory=options.observatory,
starttime=options.starttime,
endtime=options.endtime,
channels=output_channels)
if len(output_timeseries) > 0:
# find gaps in output, so they can be updated
output_gaps = TimeseriesUtility.get_merged_gaps(
TimeseriesUtility.get_stream_gaps(output_timeseries))
else:
output_gaps = [[
options.starttime,
options.endtime,
# next sample time not used
None
]]
input_timeseries = self._get_input_timeseries(
observatory=options.observatory,
starttime=output_gap[0],
endtime=output_gap[1],
channels=input_channels)
Hal Simpson
committed
if not algorithm.can_produce_data(
starttime=output_gap[0],
endtime=output_gap[1],
stream=input_timeseries):
Hal Simpson
committed
continue
# check for fillable gap at start
# found fillable gap at start, recurse to previous interval
interval = options.endtime - options.starttime
starttime = options.starttime - interval - 1
endtime = options.starttime - 1
options.starttime = starttime
options.endtime = endtime
self.run_as_update(options, update_count + 1)
# fill gap
print >> sys.stderr, 'processing', \
options.starttime, options.endtime
options.starttime = output_gap[0]
options.endtime = output_gap[1]
self.run(options)
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
def get_input_factory(args):
"""Parse input factory arguments.
Parameters
----------
args : argparse.Namespace
arguments
Returns
-------
TimeseriesFactory
input timeseries factory
"""
input_factory = None
input_factory_args = None
input_stream = None
input_url = None
# standard arguments
input_factory_args = {}
input_factory_args['interval'] = args.interval
input_factory_args['observatory'] = args.observatory
input_factory_args['type'] = args.type
# stream/url arguments
if args.input_file is not None:
input_stream = open(args.input_file, 'r')
input_factory_args['stream'] = input_stream
elif args.input_stdin:
input_stream = sys.stdin
input_factory_args['stream'] = input_stream
elif args.input_url is not None:
input_url = args.input_url
input_factory_args['urlInterval'] = args.input_url_interval
input_factory_args['urlTemplate'] = input_url
input_type = args.input
if input_type == 'edge':
input_factory = edge.EdgeFactory(
host=args.input_host,
port=args.input_port,
locationCode=args.locationcode,
**input_factory_args)
elif input_type == 'goes':
# TODO: deal with other goes arguments
input_factory = imfv283.GOESIMFV283Factory(
directory=args.input_goes_directory,
getdcpmessages=args.input_goes_getdcpmessages,
server=args.input_goes_server,
user=args.input_goes_user,
**input_factory_args)
elif input_type == 'iaga2002':
if input_stream is not None:
input_factory = iaga2002.StreamIAGA2002Factory(
**input_factory_args)
elif input_url is not None:
input_factory = iaga2002.IAGA2002Factory(
**input_factory_args)
elif input_type == 'imfv122':
if input_stream is not None:
input_factory = imfv122.StreamIMFV122Factory(
**input_factory_args)
elif input_url is not None:
input_factory = imfv122.IMFV122Factory(
**input_factory_args)
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
elif input_type == 'imfv283':
if input_stream is not None:
input_factory = imfv283.StreamIMFV283Factory(
**input_factory_args)
elif input_url is not None:
input_factory = imfv283.IMFV283Factory(
**input_factory_args)
elif input_type == 'pcdcp':
if input_stream is not None:
input_factory = pcdcp.StreamPCDCPFactory(
**input_factory_args)
elif input_url is not None:
input_factory = pcdcp.PCDCPFactory(
**input_factory_args)
return input_factory
def get_output_factory(args):
"""Parse output factory arguments.
Parameters
----------
args : argparse.Namespace
arguments
Returns
-------
TimeseriesFactory
output timeseries factory
"""
output_factory = None
output_factory_args = None
output_stream = None
output_url = None
# standard arguments
output_factory_args = {}
output_factory_args['interval'] = args.interval
output_factory_args['observatory'] = args.observatory
output_factory_args['type'] = args.type
# stream/url arguments
if args.output_file is not None:
output_stream = open(args.output_file, 'wb')
output_factory_args['stream'] = output_stream
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
output_stream = sys.stdout
output_factory_args['stream'] = output_stream
elif args.output_url is not None:
output_url = args.output_url
output_factory_args['urlInterval'] = args.output_url_interval
output_factory_args['urlTemplate'] = output_url
output_type = args.output
if output_type == 'edge':
# TODO: deal with other edge arguments
locationcode = args.outlocationcode or args.locationcode or None
output_factory = edge.EdgeFactory(
host=args.output_host,
port=args.output_edge_read_port,
write_port=args.output_port,
locationCode=locationcode,
tag=args.output_edge_tag,
forceout=args.output_edge_forceout,
**output_factory_args)
elif output_type == 'iaga2002':
if output_stream is not None:
output_factory = iaga2002.StreamIAGA2002Factory(
**output_factory_args)
elif output_url is not None:
output_factory = iaga2002.IAGA2002Factory(
**output_factory_args)
elif output_type == 'pcdcp':
if output_stream is not None:
output_factory = pcdcp.StreamPCDCPFactory(
**output_factory_args)
elif output_url is not None:
output_factory = pcdcp.PCDCPFactory(
**output_factory_args)
elif output_type == 'plot':
output_factory = PlotTimeseriesFactory()
elif output_type == 'temperature':
if output_stream is not None:
output_factory = temperature.StreamTEMPFactory(
**output_factory_args)
elif output_url is not None:
output_factory = temperature.TEMPFactory(
**output_factory_args)
if output_stream is not None:
output_factory = vbf.StreamVBFFactory(
**output_factory_args)
elif output_type == 'binlog':
if output_stream is not None:
output_factory = binlog.StreamBinLogFactory(
**output_factory_args)
elif output_url is not None:
output_factory = binlog.BinLogFactory(
output=output_type,
**output_factory_args)
return output_factory
def main(args):
"""command line factory for geomag algorithms
Inputs
------
use geomag.py --help to see inputs, or see parse_args.
Notes
-----
parses command line options using argparse, then calls the controller
with instantiated I/O factories, and algorithm(s)
"""
# TODO: remove argument mapping in future version
# map legacy input arguments
if args.input_edge is not None:
args.input = 'edge'
args.input_host = args.input_edge
args.input_port = args.input_edge_port
elif args.input_iaga_file is not None:
args.input = 'iaga2002'
args.input_file = args.input_iaga_file
elif args.input_iaga_stdin:
args.input = 'iaga2002'
args.input_stdin = True
elif args.input_iaga_url is not None:
args.input = 'iaga2002'
args.input_url = args.input_iaga_url
args.input = 'imfv283'
args.input_file = args.input_imfv283_file
args.input = 'imfv283'
args.input_url = args.input_imfv283_url
elif args.input_imfv283_goes:
args.input = 'goes'
# map legacy output arguments
if args.output_edge is not None:
args.output = 'edge'
args.output_host = args.output_edge
args.output_port = args.edge_write_port
elif args.output_iaga_file is not None:
args.output = 'iaga2002'
args.output_file = args.output_iaga_file
elif args.output_iaga_stdout:
args.output = 'iaga2002'
args.output_stdout = True
elif args.output_iaga_url is not None:
args.output = 'iaga2002'
args.output_url = args.output_iaga_url
elif args.output_pcdcp_file is not None:
args.output = 'pcdcp'
args.output_file = args.output_pcdcp_file
elif args.output_pcdcp_stdout:
args.output = 'pcdcp'
args.output_stdout = True
elif args.output_pcdcp_url is not None:
args.output = 'pcdcp'
args.output_url = args.output_pcdcp_url
elif args.output_plot:
args.output = 'plot'
if usingDeprecated:
print >> sys.stderr, 'WARNING: you are using deprecated arguments,' + \
' please update your usage'
# TODO check for unused arguments.
# make sure observatory is a tuple
if isinstance(args.observatory, (str, unicode)):
args.observatory = (args.observatory,)
if args.observatory_foreach:
observatory = args.observatory
for obs in observatory:
args.observatory = (obs,)
_main(args)
else:
_main(args)
def _main(args):
"""Actual main method logic, called by main
Parameters
----------
args : argparse.Namespace
command line arguments
"""
# create controller
input_factory = get_input_factory(args)
output_factory = get_output_factory(args)
algorithm = algorithms[args.algorithm]()
algorithm.configure(args)
controller = Controller(input_factory, output_factory, algorithm)
if args.realtime:
now = UTCDateTime()
args.endtime = UTCDateTime(now.year, now.month, now.day,
now.hour, now.minute)
if args.interval == 'minute':
args.starttime = args.endtime - 3600
else:
args.starttime = args.endtime - 600
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
if args.update:
controller.run_as_update(args)
else:
controller.run(args)
def parse_args(args):
"""parse input arguments
Parameters
----------
args : list of strings
Returns
-------
argparse.Namespace
dictionary like object containing arguments.
"""
parser = argparse.ArgumentParser(
description='Use @ to read commands from a file.',
fromfile_prefix_chars='@',)
parser.add_argument('--starttime',
type=UTCDateTime,
default=None,
help='UTC date YYYY-MM-DD HH:MM:SS')
parser.add_argument('--endtime',
type=UTCDateTime,
default=None,
help='UTC date YYYY-MM-DD HH:MM:SS')
parser.add_argument('--observatory',
help='Observatory code ie BOU, CMO, etc.' +
' CAUTION: Using multiple observatories is not' +
' recommended in most cases; especially with' +
' single observatory formats like IAGA and PCDCP.',
nargs='*',
type=str)
parser.add_argument('--observatory-foreach',
action='store_true',
default=False,
help='When specifying multiple observatories, process'
' each observatory separately')
parser.add_argument('--inchannels',
nargs='*',
help='Channels H, E, Z, etc')
parser.add_argument('--outchannels',
nargs='*',
default=None,
help='Channels H, E, Z, etc')
parser.add_argument('--type',
default='variation',
choices=['variation', 'quasi-definitive', 'definitive'])
parser.add_argument('--rename-input-channel',
action='append',
help='Rename an input channel after it is read',
metavar=('FROM', 'TO'),
nargs=2)
parser.add_argument('--rename-output-channel',
action='append',
help='Rename an output channel before it is written',
metavar=('FROM', 'TO'),
nargs=2)
parser.add_argument('--locationcode',
help='EDGE location code, e.g. "R0", "R1"',
type=edge.LocationCode)
parser.add_argument('--outlocationcode',
help='EDGE output location code'
' (if different from --locationcode)',
parser.add_argument('--interval',
default='minute',
choices=['hourly', 'minute', 'second'])
parser.add_argument('--update',
action='store_true',
default=False,
help='Used to update data')
Hal Simpson
committed
parser.add_argument('--update-limit',
type=int,
default=0,
help='Used to limit the number of iterations update will recurse')
parser.add_argument('--no-trim',
action='store_true',
default=False,
help='Ensures output data will not be trimmed down'),
parser.add_argument('--input-edge-port',
type=int,
default=2060,
help='deprecated. \
Input port # for edge input, defaults to 2060')
parser.add_argument('--output-edge-port',
type=int,
dest='edge_write_port',
default=7981,
help='deprecated. \
Edge port for writing realtime data, defaults to 7981')
parser.add_argument('--output-edge-cwb-port',
type=int,
dest='edge_write_port',
default='7981',
help='deprecated. \
Edge port for writing older data. Not used by geomag.')
parser.add_argument('--output-edge-read-port',
type=int,
default=2060,
help='deprecated. \
Edge port for reading output data, defaults to 2060')
parser.add_argument('--output-edge-tag',
default='GEOMAG',
help='ID Tag for edge connections, defaults to GEOMAG')
Hal Simpson
committed
parser.add_argument('--output-edge-forceout',
action='store_true',
default=False,
help='Flag to force data into miniseed blocks. Should only ' +
'be used when certain the data is self contained.')
parser.add_argument('--realtime',
action='store_true',
default=False,
help='Flag to run the last hour if interval is minute, ' +
'or the last 10 minutes if interval is seconds')
Hal Simpson
committed
parser.add_argument('--input-goes-directory',
Hal Simpson
committed
help='Directory for support files for goes input of imfv283 data')
parser.add_argument('--input-goes-getdcpmessages',
default='',
help='Location of getDcpMessages.')
parser.add_argument('--input-goes-server',
nargs='*',
help='The server name(s) to retrieve the GOES data from')
parser.add_argument('--input-goes-user',
default='GEOMAG',
help='The user name to use to retrieve data from GOES')
# Input group
input_group = parser.add_mutually_exclusive_group(required=True)
input_group.add_argument('--input',
help='Input format',
choices=(
'edge',
'goes',
'iaga2002',
'imfv283',
'pcdcp'))
parser.add_argument('--input-file',
help='Read from specified file')
parser.add_argument('--input-host',
default='cwbpub.cr.usgs.gov',
help='Hostname or IP address')
parser.add_argument('--input-port',
default=2060,
help='Port number',
type=int)
parser.add_argument('--input-stdin',
action='store_true',
default=False,
help='Read from standard input')
parser.add_argument('--input-url',
help='Read from a url pattern')
parser.add_argument('--input-url-interval',
default=86400,
help='Read url interval in seconds',
type=int)
input_group.add_argument('--input-edge',
help='deprecated. \
Host IP #, see --input-edge-port for optional args')
input_group.add_argument('--input-iaga-file',
help='deprecated. Reads from the specified file.')
input_group.add_argument('--input-iaga-stdin',
action='store_true',
default=False,
help='deprecated. \
Pass in an iaga file using redirection from stdin.')
input_group.add_argument('--input-iaga-url',
help='deprecated. \
Example: file://./%%(obs)s%%(ymd)s%%(t)s%%(i)s.%%(i)s')
help='deprecated. Reads from the specified file.')
help='deprecated. \
Pass in a file using redirection from stdin')
help='deprecated. Example file://./')
Hal Simpson
committed
input_group.add_argument('--input-imfv283-goes',
action='store_true',
default=False,
help='deprecated. \
Retrieves data directly from a goes server to read')
input_group.add_argument('--input-pcdcp-file',
help='deprecated. Reads from the specified file.')
input_group.add_argument('--input-pcdcp-stdin',
action='store_true',
default=False,
help='deprecated. \
Pass in an pcdcp file using redirection from stdin.')
input_group.add_argument('--input-pcdcp-url',
help='deprecated. Example: file://./%%(obs)s%%(Y)s%%(j)s.%%(i)s')
# Output group
output_group = parser.add_mutually_exclusive_group(required=True)
output_group.add_argument('--output-iaga-file',
help='deprecated. Write to a single iaga file.')
output_group.add_argument('--output-iaga-stdout',
action='store_true', default=False,
help='deprecated. Write to stdout.')
output_group.add_argument('--output-iaga-url',
help='deprecated. \
Example: file://./%%(obs)s%%(ymd)s%%(t)s%%(i)s.%%(i)s')
output_group.add_argument('--output-pcdcp-file',
help='deprecated. Write to a single pcdcp file.')
output_group.add_argument('--output-pcdcp-stdout',
action='store_true', default=False,
help='deprecated. Write to stdout.')
output_group.add_argument('--output-pcdcp-url',
help='deprecated. Example: file://./%%(obs)s%%(Y)s%%(j)s.%%(i)s')
output_group.add_argument('--output-edge',
help='deprecated. \
Edge IP #. See --output-edge-* for other optional arguments')
output_group.add_argument('--output-plot',
action='store_true',
default=False,
help='deprecated. Plot the algorithm output using matplotlib')
# output arguments
output_group.add_argument('--output',
choices=(
'binlog',
'edge',
'iaga2002',
'pcdcp',
'plot',
'temperature',
'vbf'
),
# TODO: set default to 'iaga2002'
help='Output format')
parser.add_argument('--output-file',
help='Write to specified file')
parser.add_argument('--output-host',
default='cwbpub.cr.usgs.gov',
help='Write to specified host')
parser.add_argument('--output-port',
default=7981,
help='Write to specified port',
type=int)
parser.add_argument('--output-stdout',
action='store_true',
default=False,
help='Write to standard output')
parser.add_argument('--output-url',
help='Write to a file:// url pattern')
parser.add_argument('--output-url-interval',
default=86400,
help='Output interval in seconds',
type=int)
# Algorithms group
parser.add_argument('--algorithm',
choices=[k for k in algorithms],
default='identity')
for k in algorithms:
algorithms[k].add_arguments(parser)
return parser.parse_args(args)