Newer
Older
"""Factory that loads PCDCP Files."""
import obspy.core
import os
import urllib2
from geomagio import ChannelConverter, \
TimeseriesFactory, \
TimeseriesFactoryException
from PCDCPParser import PCDCPParser
from PCDCPWriter import PCDCPWriter
# pattern for pcdcp file names
PCDCP_FILE_PATTERN = '%(obs)s%(y)s%(j)s.%(i)s'
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
def read_url(url):
"""Open and read url contents.
Parameters
----------
url : str
A urllib2 compatible url, such as http:// or file://.
Returns
-------
str
contents returned by url.
Raises
------
urllib2.URLError
if any occurs
"""
response = urllib2.urlopen(url)
content = None
try:
content = response.read()
except urllib2.URLError, e:
print e.reason
raise
finally:
response.close()
return content
class PCDCPFactory(TimeseriesFactory):
"""TimeseriesFactory for PCDCP formatted files.
Parameters
----------
urlTemplate : str
A string that contains any of the following replacement patterns:
- '%(i)s' : interval abbreviation
- '%(interval)s' interval name
- '%(julian)s' julian day formatted as JJJ
- '%(obs)s' lowercase observatory code
- '%(OBS)s' uppercase observatory code
- '%(t)s' type abbreviation
- '%(type)s' type name
- '%(year)s' year formatted as YYYY
- '%(ymd)s' time formatted as YYYYMMDD
See Also
--------
PCDCPParser
"""
def __init__(self, urlTemplate, observatory=None, channels=None, type=None,
interval=None):
TimeseriesFactory.__init__(self, observatory, channels, type, interval)
self.urlTemplate = urlTemplate
def get_timeseries(self, starttime, endtime, observatory=None,
channels=None, type=None, interval=None):
"""Get timeseries data
Parameters
----------
observatory : str
type : {'variation', 'quasi-definitive'}
Returns
-------
obspy.core.Stream
timeseries object with requested data.
Raises
------
TimeseriesFactoryException
If invalid values are requested, or errors occur while
retrieving timeseries.
"""
observatory = observatory or self.observatory
channels = channels or self.channels
type = type or self.type
interval = interval or self.interval
days = self._get_days(starttime, endtime)
timeseries = obspy.core.Stream()
for day in days:
url = self._get_url(observatory, day, type, interval)
pcdcpFile = read_url(url)
timeseries += self.parse_string(pcdcpFile)
# merge channel traces for multiple days
timeseries.merge()
# trim to requested start/end time
timeseries.trim(starttime, endtime)
return timeseries
def parse_string(self, pcdcpString):
"""Parse the contents of a string in the format of a pcdcp file.
Parameters
----------
pcdcpString : str
String containing PCDCP content.
Returns
-------
obspy.core.Stream
"""
parser = PCDCPParser()
parser.parse(pcdcpString)
year = parser.header['year']
yearday = parser.header['yearday']
begin = int(parser.times[0])
startHour = str(int(begin / 60.0))
startMinute = str(int(begin % 60.0))
ending = int(parser.times[-1])
endHour = str(int(ending / 60.0))
endMinute = str(int(ending % 60.0))
start = year + yearday + "T" + startHour + ":" + \
startMinute + ":" + "00.0"
end = year + yearday + "T" + endHour + ":" + endMinute + ":" + "00.0"
starttime = obspy.core.UTCDateTime(start)
endtime = obspy.core.UTCDateTime(end)
length = len(data[data.keys()[0]])
rate = (length - 1) / (endtime - starttime)
stream = obspy.core.Stream()
stats = obspy.core.Stats()
stats.network = 'NT'
stats.station = parser.header['station']
stats.starttime = starttime
stats.sampling_rate = rate
stats.npts = length
stats.channel = channel
data[channel] = ChannelConverter.get_radians_from_minutes(
data[channel])
stream += obspy.core.Trace(data[channel], stats)
return stream
def _get_url(self, observatory, date, type='variation', interval='minute'):
"""Get the url for a specified PCDCP file.
Replaces patterns (described in class docstring) with values based on
parameter values.
Parameters
----------
observatory : str
observatory code.
date : obspy.core.UTCDateTime
day to fetch (only year, month, day are used)
type : {'variation', 'quasi-definitive'}
data type.
interval : {'minute', 'second'}
data interval.
Raises
------
TimeseriesFactoryException
If type or interval are not supported.
"""
return self.urlTemplate % {
'i': self._get_interval_abbreviation(interval),
'interval': self._get_interval_name(interval),
'julian': date.strftime("%j"),
'obs': observatory.lower(),
'OBS': observatory.upper(),
't': self._get_type_abbreviation(type),
'type': self._get_type_name(type),
'year': date.strftime("%Y"),
'ymd': date.strftime("%Y%m%d")
}
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
def _get_interval_abbreviation(self, interval):
"""Get abbreviation for a data interval.
Used by ``_get_url`` to replace ``%(i)s`` in urlTemplate.
Parameters
----------
interval : {'daily', 'hourly', 'minute', 'monthly', 'second'}
Returns
-------
abbreviation for ``interval``.
Raises
------
TimeseriesFactoryException
if ``interval`` is not supported.
"""
interval_abbr = None
if interval == 'daily':
interval_abbr = 'day'
elif interval == 'hourly':
interval_abbr = 'hor'
elif interval == 'minute':
interval_abbr = 'min'
elif interval == 'monthly':
interval_abbr = 'mon'
elif interval == 'second':
interval_abbr = 'sec'
else:
raise TimeseriesFactoryException(
'Unexpected interval "%s"' % interval)
return interval_abbr
def _get_interval_name(self, interval):
"""Get name for a data interval.
Used by ``_get_url`` to replace ``%(interval)s`` in urlTemplate.
Parameters
----------
interval : {'minute', 'second'}
Returns
-------
name for ``interval``.
Raises
------
TimeseriesFactoryException
if ``interval`` is not supported.
"""
interval_name = None
if interval == 'minute':
interval_name = 'OneMinute'
elif interval == 'second':
interval_name = 'OneSecond'
else:
raise TimeseriesFactoryException(
'Unsupported interval "%s"' % interval)
return interval_name
def _get_type_abbreviation(self, type):
"""Get abbreviation for a data type.
Used by ``_get_url`` to replace ``%(t)s`` in urlTemplate.
Parameters
----------
type : {'definitive', 'provisional', 'quasi-definitive', 'variation'}
Returns
-------
name for ``type``.
Raises
------
TimeseriesFactoryException
if ``type`` is not supported.
"""
type_abbr = None
if type == 'definitive':
type_abbr = 'd'
elif type == 'provisional':
type_abbr = 'p'
elif type == 'quasi-definitive':
type_abbr = 'q'
elif type == 'variation':
type_abbr = 'v'
else:
raise TimeseriesFactoryException(
'Unexpected type "%s"' % type)
return type_abbr
def _get_type_name(self, type):
"""Get name for a data type.
Used by ``_get_url`` to replace ``%(type)s`` in urlTemplate.
Parameters
----------
type : {'variation', 'quasi-definitive'}
Returns
-------
name for ``type``.
Raises
------
TimeseriesFactoryException
if ``type`` is not supported.
"""
type_name = None
if type == 'variation':
type_name = ''
elif type == 'quasi-definitive':
type_name = 'QuasiDefinitive'
else:
raise TimeseriesFactoryException(
'Unsupported type "%s"' % type)
return type_name
def _get_days(self, starttime, endtime):
"""Get days between (inclusive) starttime and endtime.
Parameters
----------
starttime : obspy.core.UTCDateTime
the start time
endtime : obspy.core.UTCDateTime
the end time
Returns
-------
array_like
list of times, one per day, for all days between and including
``starttime`` and ``endtime``.
Raises
------
TimeseriesFactoryException
if starttime is after endtime
"""
if starttime > endtime:
raise TimeseriesFactoryException(
'starttime must be before endtime.')
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
days = []
day = starttime
lastday = (endtime.year, endtime.month, endtime.day)
while True:
days.append(day)
if lastday == (day.year, day.month, day.day):
break
# move to next day
day = obspy.core.UTCDateTime(day.timestamp + 86400)
return days
def write_file(self, fh, timeseries, channels):
"""writes timeseries data to the given file object.
Parameters
----------
fh: file object
timeseries : obspy.core.Stream
stream containing traces to store.
channels : array_like
list of channels to store
"""
PCDCPWriter().write(fh, timeseries, channels)
def put_timeseries(self, timeseries, starttime=None, endtime=None,
channels=None, type=None, interval=None):
"""Store timeseries data.
Parameters
----------
timeseries : obspy.core.Stream
stream containing traces to store.
starttime : UTCDateTime
time of first sample in timeseries to store.
uses first sample if unspecified.
endtime : UTCDateTime
time of last sample in timeseries to store.
uses last sample if unspecified.
channels : array_like
list of channels to store, optional.
uses default if unspecified.
type : {'definitive', 'provisional', 'quasi-definitive', 'variation'}
data type, optional.
uses default if unspecified.
interval : {'daily', 'hourly', 'minute', 'monthly', 'second'}
data interval, optional.
uses default if unspecified.
"""
if not self.urlTemplate.startswith('file://'):
raise TimeseriesFactoryException('Only file urls are supported')
channels = channels or self.channels
type = type or self.type
interval = interval or self.interval
stats = timeseries[0].stats
observatory = stats.station
starttime = starttime or stats.starttime
endtime = endtime or stats.endtime
days = self._get_days(starttime, endtime)
for day in days:
day_filename = self._get_file_from_url(
self._get_url(observatory, day, type, interval))
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
day_timeseries = self._get_slice(timeseries, day, interval)
with open(day_filename, 'w') as fh:
self.write_file(fh, day_timeseries, channels)
def _get_file_from_url(self, url):
"""Get a file for writing.
Ensures parent directory exists.
Parameters
----------
url : str
Url path to PCDCP
Returns
-------
str
path to file without file:// prefix
Raises
------
TimeseriesFactoryException
if url does not start with file://
"""
if not url.startswith('file://'):
raise TimeseriesFactoryException(
'Only file urls are supported for writing')
filename = url.replace('file://', '')
parent = os.path.dirname(filename)
if not os.path.exists(parent):
os.makedirs(parent)
return filename
def _get_slice(self, timeseries, day, interval):
"""Get the first and last time for a day
Parameters
----------
timeseries : obspy.core.Stream
timeseries to slice
day : UTCDateTime
time in day to slice
Returns
-------
obspy.core.Stream
sliced stream
"""
day = day.datetime
start = obspy.core.UTCDateTime(day.year, day.month, day.day, 0, 0, 0)
if interval == 'minute':
end = start + 86340.0
else:
end = start + 86399.999999
return timeseries.slice(start, end)