Skip to content
Snippets Groups Projects
PCDCPFactory.py 12.9 KiB
Newer Older
  • Learn to ignore specific revisions
  • 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445
    """Factory that loads PCDCP Files."""
    
    import urllib2
    import obspy.core
    import os
    from geomagio import TimeseriesFactory, TimeseriesFactoryException
    from PCDCPParser import PCDCPParser
    from PCDCPWriter import PCDCPWriter
    from geomagio import ChannelConverter
    
    
    # pattern for pcdcp file names
    PCDCP_FILE_PATTERN = '%(obs)s%(y)s%(j)s.min'
    
    
    def read_url(url):
        """Open and read url contents.
    
        Parameters
        ----------
        url : str
            A urllib2 compatible url, such as http:// or file://.
    
        Returns
        -------
        str
            contents returned by url.
    
        Raises
        ------
        urllib2.URLError
            if any occurs
        """
        response = urllib2.urlopen(url)
        content = None
        try:
            content = response.read()
        except urllib2.URLError, e:
            print e.reason
            raise
        finally:
            response.close()
        return content
    
    
    class PCDCPFactory(TimeseriesFactory):
        """TimeseriesFactory for PCDCP formatted files.
    
        Parameters
        ----------
        urlTemplate : str
            A string that contains any of the following replacement patterns:
            - '%(obs)s' lowercase observatory code
            - '%(OBS)s' uppercase observatory code
            - '%(y)s' year formatted as YYYY
            - '%(j)s' julian day formatted as JJJ
    
        See Also
        --------
        PCDCPParser
        """
    
        def __init__(self, urlTemplate, observatory=None, channels=None, type=None,
                interval=None):
            TimeseriesFactory.__init__(self, observatory, channels, type, interval)
            self.urlTemplate = urlTemplate
    
        def get_timeseries(self, starttime, endtime, observatory=None,
                channels=None, type=None, interval=None):
            """Get timeseries data
    
            Parameters
            ----------
            observatory : str
                observatory code.
            starttime : obspy.core.UTCDateTime
                time of first sample.
            endtime : obspy.core.UTCDateTime
                time of last sample.
            type : {'variation', 'quasi-definitive'}
                data type.
            interval : {'minute', 'second'}
                data interval.
    
            Returns
            -------
            obspy.core.Stream
                timeseries object with requested data.
    
            Raises
            ------
            TimeseriesFactoryException
                if invalid values are requested, or errors occur while
                retrieving timeseries.
            """
            observatory = observatory or self.observatory
            channels = channels or self.channels
            type = type or self.type
            interval = interval or self.interval
            days = self._get_days(starttime, endtime)
            timeseries = obspy.core.Stream()
            for day in days:
                url = self._get_url(observatory, day, type, interval)
                pcdcpFile = read_url(url)
                timeseries += self.parse_string(pcdcpFile)
            # merge channel traces for multiple days
            timeseries.merge()
            # trim to requested start/end time
            timeseries.trim(starttime, endtime)
            return timeseries
    
        def parse_string(self, pcdcpString):
            """Parse the contents of a string in the format of an pcdcp file.
    
            Parameters
            ----------
            pcdcpString : str
                string containing PCDCP content.
    
            Returns
            -------
            obspy.core.Stream
                parsed data.
            """
            parser = PCDCPParser()
            parser.parse(pcdcpString)
            metadata = parser.metadata
            starttime = obspy.core.UTCDateTime(parser.times[0])
            endtime = obspy.core.UTCDateTime(parser.times[-1])
            data = parser.data
            length = len(data[data.keys()[0]])
            rate = (length - 1) / (endtime - starttime)
            stream = obspy.core.Stream()
            for channel in data.keys():
                stats = obspy.core.Stats(metadata)
                stats.starttime = starttime
                stats.sampling_rate = rate
                stats.npts = length
                stats.channel = channel
                if channel == 'D':
                    data[channel] = ChannelConverter.get_radians_from_minutes(
                        data[channel])
                stream += obspy.core.Trace(data[channel], stats)
            return stream
    
        def _get_url(self, observatory, date, type='variation', interval='minute'):
            """Get the url for a specified PCDCP file.
    
            Replaces patterns (described in class docstring) with values based on
            parameter values.
    
            Parameters
            ----------
            observatory : str
                observatory code.
            date : obspy.core.UTCDateTime
                day to fetch (only year, month, day are used)
            type : {'variation', 'quasi-definitive'}
                data type.
            interval : {'minute', 'second'}
                data interval.
    
            Raises
            ------
            TimeseriesFactoryException
                if type or interval are not supported.
            """
            return self.urlTemplate % {
                    'i': self._get_interval_abbreviation(interval),
                    'interval': self._get_interval_name(interval),
                    'obs': observatory.lower(),
                    'OBS': observatory.upper(),
                    't': self._get_type_abbreviation(type),
                    'type': self._get_type_name(type),
                    'ymd': date.strftime("%Y%m%d")}
    
        def _get_interval_abbreviation(self, interval):
            """Get abbreviation for a data interval.
    
            Used by ``_get_url`` to replace ``%(i)s`` in urlTemplate.
    
            Parameters
            ----------
            interval : {'daily', 'hourly', 'minute', 'monthly', 'second'}
    
            Returns
            -------
            abbreviation for ``interval``.
    
            Raises
            ------
            TimeseriesFactoryException
                if ``interval`` is not supported.
            """
            interval_abbr = None
            if interval == 'daily':
                interval_abbr = 'day'
            elif interval == 'hourly':
                interval_abbr = 'hor'
            elif interval == 'minute':
                interval_abbr = 'min'
            elif interval == 'monthly':
                interval_abbr = 'mon'
            elif interval == 'second':
                interval_abbr = 'sec'
            else:
                raise TimeseriesFactoryException(
                        'Unexpected interval "%s"' % interval)
            return interval_abbr
    
        def _get_interval_name(self, interval):
            """Get name for a data interval.
    
            Used by ``_get_url`` to replace ``%(interval)s`` in urlTemplate.
    
            Parameters
            ----------
            interval : {'minute', 'second'}
    
            Returns
            -------
            name for ``interval``.
    
            Raises
            ------
            TimeseriesFactoryException
                if ``interval`` is not supported.
            """
            interval_name = None
            if interval == 'minute':
                interval_name = 'OneMinute'
            elif interval == 'second':
                interval_name = 'OneSecond'
            else:
                raise TimeseriesFactoryException(
                        'Unsupported interval "%s"' % interval)
            return interval_name
    
        def _get_type_abbreviation(self, type):
            """Get abbreviation for a data type.
    
            Used by ``_get_url`` to replace ``%(t)s`` in urlTemplate.
    
            Parameters
            ----------
            type : {'definitive', 'provisional', 'quasi-definitive', 'variation'}
    
            Returns
            -------
            name for ``type``.
    
            Raises
            ------
            TimeseriesFactoryException
                if ``type`` is not supported.
            """
            type_abbr = None
            if type == 'definitive':
                type_abbr = 'd'
            elif type == 'provisional':
                type_abbr = 'p'
            elif type == 'quasi-definitive':
                type_abbr = 'q'
            elif type == 'variation':
                type_abbr = 'v'
            else:
                raise TimeseriesFactoryException(
                        'Unexpected type "%s"' % type)
            return type_abbr
    
        def _get_type_name(self, type):
            """Get name for a data type.
    
            Used by ``_get_url`` to replace ``%(type)s`` in urlTemplate.
    
            Parameters
            ----------
            type : {'variation', 'quasi-definitive'}
    
            Returns
            -------
            name for ``type``.
    
            Raises
            ------
            TimeseriesFactoryException
                if ``type`` is not supported.
            """
            type_name = None
            if type == 'variation':
                type_name = ''
            elif type == 'quasi-definitive':
                type_name = 'QuasiDefinitive'
            else:
                raise TimeseriesFactoryException(
                        'Unsupported type "%s"' % type)
            return type_name
    
        def _get_days(self, starttime, endtime):
            """Get days between (inclusive) starttime and endtime.
    
            Parameters
            ----------
            starttime : obspy.core.UTCDateTime
                the start time
            endtime : obspy.core.UTCDateTime
                the end time
    
            Returns
            -------
            array_like
                list of times, one per day, for all days between and including
                ``starttime`` and ``endtime``.
    
            Raises
            ------
            TimeseriesFactoryException
                if starttime is after endtime
            """
            if starttime > endtime:
                raise TimeseriesFactoryException('starttime must be before endtime')
    
            days = []
            day = starttime
            lastday = (endtime.year, endtime.month, endtime.day)
            while True:
                days.append(day)
                if lastday == (day.year, day.month, day.day):
                    break
                # move to next day
                day = obspy.core.UTCDateTime(day.timestamp + 86400)
            return days
    
        def write_file(self, fh, timeseries, channels):
            """writes timeseries data to the given file object.
    
            Parameters
            ----------
            fh: file object
            timeseries : obspy.core.Stream
                stream containing traces to store.
            channels : array_like
                list of channels to store
            """
            PCDCPWriter().write(fh, timeseries, channels)
    
        def put_timeseries(self, timeseries, starttime=None, endtime=None,
                channels=None, type=None, interval=None):
            """Store timeseries data.
    
            Parameters
            ----------
            timeseries : obspy.core.Stream
                stream containing traces to store.
            starttime : UTCDateTime
                time of first sample in timeseries to store.
                uses first sample if unspecified.
            endtime : UTCDateTime
                time of last sample in timeseries to store.
                uses last sample if unspecified.
            channels : array_like
                list of channels to store, optional.
                uses default if unspecified.
            type : {'definitive', 'provisional', 'quasi-definitive', 'variation'}
                data type, optional.
                uses default if unspecified.
            interval : {'daily', 'hourly', 'minute', 'monthly', 'second'}
                data interval, optional.
                uses default if unspecified.
            """
            if not self.urlTemplate.startswith('file://'):
                raise TimeseriesFactoryException('Only file urls are supported')
    
            channels = channels or self.channels
            type = type or self.type
            interval = interval or self.interval
            stats = timeseries[0].stats
            observatory = stats.station
            starttime = starttime or stats.starttime
            endtime = endtime or stats.endtime
            days = self._get_days(starttime, endtime)
    
            for day in days:
                day_filename = self._get_file_from_url(
                        self._get_url(observatory, day, type, interval))
                day_timeseries = self._get_slice(timeseries, day, interval)
                with open(day_filename, 'w') as fh:
                    self.write_file(fh, day_timeseries, channels)
    
        def _get_file_from_url(self, url):
            """Get a file for writing.
    
            Ensures parent directory exists.
    
            Parameters
            ----------
            url : str
                Url path to PCDCP
    
            Returns
            -------
            str
                path to file without file:// prefix
    
            Raises
            ------
            TimeseriesFactoryException
                if url does not start with file://
            """
            if not url.startswith('file://'):
                raise TimeseriesFactoryException(
                        'Only file urls are supported for writing')
    
            filename = url.replace('file://', '')
            parent = os.path.dirname(filename)
    
            if not os.path.exists(parent):
                os.makedirs(parent)
    
            return filename
    
        def _get_slice(self, timeseries, day, interval):
            """Get the first and last time for a day
    
            Parameters
            ----------
            timeseries : obspy.core.Stream
                timeseries to slice
            day : UTCDateTime
                time in day to slice
    
            Returns
            -------
            obspy.core.Stream
                sliced stream
            """
            day = day.datetime
            start = obspy.core.UTCDateTime(day.year, day.month, day.day, 0, 0, 0)
    
            if interval == 'minute':
                end = start + 86340.0
            else:
                end = start + 86399.999999
    
            return timeseries.slice(start, end)