Skip to content
Snippets Groups Projects
Url.py 6.59 KiB
Newer Older
  • Learn to ignore specific revisions
  • """Class to read a file from a URL given a template"""
    import os
    import urllib2
    from TimeseriesFactoryException import TimeseriesFactoryException
    
    
    class URL():
        """URL class to allow reading of files using the urllib2 class
    
        Parameters
        ----------
        urlTemplate : str
            A string that contains any of the following replacement patterns:
            - '%(i)s' : interval abbreviation
            - '%(interval)s' interval name
            - '%(julian)s' julian date
            - '%(obs)s' lowercase observatory code
            - '%(OBS)s' uppercase observatory code
            - '%(t)s' type abbreviation
            - '%(type)s' type name
            - '%(year)s' year formatted as YYYY
            - '%(ymd)s' time formatted as YYYYMMDD
        """
    
        def __init__(self, urlTemplate):
            self.urlTemplate = urlTemplate
    
        def get_file_from_url(self, url):
            """Get a file for writing.
    
            Ensures parent directory exists.
    
            Parameters
            ----------
            url : str
                path to file
    
            Returns
            -------
            str
                path to file without file:// prefix
    
            Raises
            ------
            TimeseriesFactoryException
                if url does not start with file://
            """
            if not url.startswith('file://'):
                raise TimeseriesFactoryException(
                        'Only file urls are supported for writing')
            filename = url.replace('file://', '')
            parent = os.path.dirname(filename)
            if not os.path.exists(parent):
                os.makedirs(parent)
            return filename
    
        def get_url(self, observatory, date, type='variation', interval='minute'):
            """Get the url for a specified file.
    
            Replaces patterns (described in class docstring) with values based on
            parameter values.
    
            Parameters
            ----------
            observatory : str
                observatory code.
            date : obspy.core.UTCDateTime
                day to fetch (only year, month, day are used)
            type : {'variation', 'quasi-definitive', 'definitive'}
                data type.
            interval : {'minute', 'second', 'hourly', 'daily'}
                data interval.
    
            Raises
            ------
            TimeseriesFactoryException
                if type or interval are not supported.
            """
            return self.urlTemplate % {
                    'i': self._get_interval_abbreviation(interval),
                    'interval': self._get_interval_name(interval),
                    'julian': date.strftime("%j"),
                    'obs': observatory.lower(),
                    'OBS': observatory.upper(),
                    't': self._get_type_abbreviation(type),
                    'type': self._get_type_name(type),
                    'year': date.strftime("%Y"),
                    'ymd': date.strftime('%Y%m%d')}
    
        def read_url(self, url):
            """Open and read url contents.
    
            Parameters
            ----------
            url : str
                A urllib2 compatible url, such as http:// or file://.
    
            Returns
            -------
            str
                contents returned by url.
    
            Raises
            ------
            urllib2.URLError
                if any occurs
            """
            response = urllib2.urlopen(url)
            content = None
            try:
                content = response.read()
            except urllib2.URLError, e:
                print e.reason
                raise
            finally:
                response.close()
            return content
    
        def _get_interval_abbreviation(self, interval):
            """Get abbreviation for a data interval.
    
            Used by ``_get_url`` to replace ``%(i)s`` in urlTemplate.
    
            Parameters
            ----------
            interval : {'daily', 'hourly', 'minute', 'monthly', 'second'}
    
            Returns
            -------
            abbreviation for ``interval``.
    
            Raises
            ------
            TimeseriesFactoryException
                if ``interval`` is not supported.
            """
            interval_abbr = None
            if interval == 'daily':
                interval_abbr = 'day'
            elif interval == 'hourly':
                interval_abbr = 'hor'
            elif interval == 'minute':
                interval_abbr = 'min'
            elif interval == 'monthly':
                interval_abbr = 'mon'
            elif interval == 'second':
                interval_abbr = 'sec'
            else:
                raise TimeseriesFactoryException(
                        'Unexpected interval "%s"' % interval)
            return interval_abbr
    
        def _get_interval_name(self, interval):
            """Get name for a data interval.
    
            Used by ``_get_url`` to replace ``%(interval)s`` in urlTemplate.
    
            Parameters
            ----------
            interval : {'minute', 'second'}
    
            Returns
            -------
            name for ``interval``.
    
            Raises
            ------
            TimeseriesFactoryException
                if ``interval`` is not supported.
            """
            interval_name = None
            if interval == 'minute':
                interval_name = 'OneMinute'
            elif interval == 'second':
                interval_name = 'OneSecond'
            else:
                raise TimeseriesFactoryException(
                        'Unsupported interval "%s"' % interval)
            return interval_name
    
        def _get_type_abbreviation(self, type):
            """Get abbreviation for a data type.
    
            Used by ``_get_url`` to replace ``%(t)s`` in urlTemplate.
    
            Parameters
            ----------
            type : {'definitive', 'provisional', 'quasi-definitive', 'variation'}
    
            Returns
            -------
            name for ``type``.
    
            Raises
            ------
            TimeseriesFactoryException
                if ``type`` is not supported.
            """
            type_abbr = None
            if type == 'definitive':
                type_abbr = 'd'
            elif type == 'provisional':
                type_abbr = 'p'
            elif type == 'quasi-definitive':
                type_abbr = 'q'
            elif type == 'variation':
                type_abbr = 'v'
            else:
                raise TimeseriesFactoryException(
                        'Unexpected type "%s"' % type)
            return type_abbr
    
        def _get_type_name(self, type):
            """Get name for a data type.
    
            Used by ``_get_url`` to replace ``%(type)s`` in urlTemplate.
    
            Parameters
            ----------
            type : {'variation', 'quasi-definitive'}
    
            Returns
            -------
            name for ``type``.
    
            Raises
            ------
            TimeseriesFactoryException
                if ``type`` is not supported.
            """
            type_name = None
            if type == 'variation':
                type_name = ''
            elif type == 'quasi-definitive':
                type_name = 'QuasiDefinitive'
            else:
                raise TimeseriesFactoryException(
                        'Unsupported type "%s"' % type)
            return type_name