importNGWMN_wml2.R 9.42 KB
Newer Older
David Watkins's avatar
David Watkins committed
1
#' Function to return data from the National Ground Water Monitoring Network waterML2 format
2
3
4
#'
#' This function accepts a url parameter for a WaterML2 getObservation. This function is still under development,
#' but the general functionality is correct.
David Watkins's avatar
David Watkins committed
5
#' 
6
7
#' @param input character or raw, containing the url for the retrieval or a path to the data file, or raw XML.
#' @param asDateTime logical, if \code{TRUE} returns date and time as POSIXct, if \code{FALSE}, character
Laura A DeCicco's avatar
Laura A DeCicco committed
8
9
10
11
12
13
#' @param tz character to set timezone attribute of dateTime. Default is "UTC", and converts the 
#' date times to UTC, properly accounting for daylight savings times based on the data's provided time zone offset.
#' Possible values to provide are "America/New_York","America/Chicago", "America/Denver","America/Los_Angeles",
#' "America/Anchorage", as well as the following which do not use daylight savings time: "America/Honolulu",
#' "America/Jamaica","America/Managua","America/Phoenix", and "America/Metlakatla". See also  \code{OlsonNames()} 
#' for more information on time zones.
David Watkins's avatar
David Watkins committed
14
#' @return mergedDF a data frame source, time, value, uom, uomTitle, comment, gmlID
15
16
17
18
19
#' @export
#' @importFrom xml2 read_xml
#' @importFrom xml2 xml_find_all
#' @importFrom xml2 xml_text
#' @importFrom xml2 xml_attr
20
#' @importFrom xml2 xml_find_first
21
#' @examples
22
#' \donttest{
Laura A DeCicco's avatar
Laura A DeCicco committed
23
24
25
26
27
#' obs_url <- paste("http://cida.usgs.gov/ngwmn_cache/sos?request=GetObservation",
#' "service=SOS","version=2.0.0",
#' "observedProperty=urn:ogc:def:property:OGC:GroundWaterLevel",
#' "responseFormat=text/xml",
#' "featureOfInterest=VW_GWDP_GEOSERVER.USGS.403836085374401",sep="&")
David Watkins's avatar
David Watkins committed
28
#' data <- importNGWMN(obs_url)
David Watkins's avatar
David Watkins committed
29
#' 
Laura A DeCicco's avatar
Laura A DeCicco committed
30
31
32
33
34
#' obs_url <- paste("http://cida.usgs.gov/ngwmn_cache/sos?request=GetObservation",
#' "service=SOS","version=2.0.0",
#' "observedProperty=urn:ogc:def:property:OGC:GroundWaterLevel",
#' "responseFormat=text/xml",
#' "featureOfInterest=VW_GWDP_GEOSERVER.USGS.474011117072901",sep="&")
David Watkins's avatar
David Watkins committed
35
#' data <- importNGWMN(obs_url)
David Watkins's avatar
David Watkins committed
36
#' }
David Watkins's avatar
David Watkins committed
37
#' 
Laura A DeCicco's avatar
Laura A DeCicco committed
38
39
importNGWMN <- function(input, asDateTime=FALSE, tz="UTC"){
  
40
  if(tz != ""){
David Watkins's avatar
David Watkins committed
41
    tz <- match.arg(tz, OlsonNames())
42
43
44
45
46
47
48
49
50
  }else{tz = "UTC"}
  
  raw <- FALSE
  if(class(input) == "character" && file.exists(input)){
    returnedDoc <- read_xml(input)
  }else if(class(input) == 'raw'){
    returnedDoc <- read_xml(input)
    raw <- TRUE
  } else {
51
52
53
54
    returnedDoc <- getWebServiceData(input, encoding='gzip')
    
    returnedDoc <- xml_root(returnedDoc)
    
55
56
  }
  
David Watkins's avatar
David Watkins committed
57
58
59
60
61
62
63
64
65
66
67
  response <- xml_name(returnedDoc)
  if(response == "GetObservationResponse"){
    
    timeSeries <- xml_find_all(returnedDoc, "//wml2:MeasurementTimeseries") #each parameter/site combo
    
    if(0 == length(timeSeries)){
      df <- data.frame()
      if(!raw){
        attr(df, "url") <- input
      }
      return(df)
68
69
    }
    
David Watkins's avatar
David Watkins committed
70
    mergedDF <- NULL
71
    
David Watkins's avatar
David Watkins committed
72
    for(t in timeSeries){
Laura A DeCicco's avatar
Laura A DeCicco committed
73
      df <- importWaterML2(t, asDateTime, tz)
74
      
David Watkins's avatar
David Watkins committed
75
76
77
78
      if (is.null(mergedDF)){
        mergedDF <- df
      } else {
        similarNames <- intersect(colnames(mergedDF), colnames(df))
Laura A DeCicco's avatar
Laura A DeCicco committed
79
        mergedDF <- dplyr::full_join(mergedDF, df, by=similarNames)
David Watkins's avatar
David Watkins committed
80
      }
81
    }
David Watkins's avatar
David Watkins committed
82
83
84
85
86
    
    if(!raw){
      url <- input
      attr(mergedDF, "url") <- url
    }
David Watkins's avatar
David Watkins committed
87
88
89
    if(asDateTime){
      mergedDF$date <- as.Date(mergedDF$date)
    }
90
91
    nonDateCols <- grep("date",names(mergedDF), value=TRUE, invert = TRUE)
    
David Watkins's avatar
David Watkins committed
92
    if(nrow(mergedDF) > 0){
93
94
      mergedDF[nonDateCols][mergedDF[nonDateCols] == "" | mergedDF[nonDateCols]== -999999.0] <- NA
    }
David Watkins's avatar
David Watkins committed
95
96
    attr(mergedDF, "gml:identifier") <- xml_text(xml_find_all(returnedDoc, ".//gml:identifier")) 
    attr(mergedDF, "generationDate") <- xml_text(xml_find_all(returnedDoc, ".//wml2:generationDate")) 
97
98
99
100
    meta <- xml_find_all(returnedDoc, ".//gmd:contact")
    attr(mergedDF, "contact") <- xml_attr(meta, "href")
    attr(mergedDF, "responsibleParty") <- xml_text(xml_find_all(meta, ".//gco:CharacterString"))
    
101
  } else if (response == "GetFeatureOfInterestResponse"){
102
103
    featureMembers <- xml_find_all(returnedDoc, ".//sos:featureMember")
    site <- xml_text(xml_find_all(featureMembers,".//gml:identifier"))
David Watkins's avatar
David Watkins committed
104
105
    site <- substring(site, 8)
    
106
107
    #some sites don't have a description
    siteDesc <- xml_text(xml_find_first(featureMembers, ".//gml:description"))
David Watkins's avatar
David Watkins committed
108
    
109
110
111
    siteLocs <- strsplit(xml_text(xml_find_all(featureMembers, ".//gml:pos")), " ")
    siteLocs <- data.frame(matrix(unlist(siteLocs), nrow=length(siteLocs), byrow=TRUE), stringsAsFactors = FALSE)
    names(siteLocs) <- c("dec_lat_va", "dec_lon_va")
112
113
    dec_lat_va <- "dplyr var"
    dec_lon_va <- "dplyr var"
114
    siteLocs <- mutate(siteLocs, dec_lat_va=as.numeric(dec_lat_va), dec_lon_va=as.numeric(dec_lon_va))
David Watkins's avatar
David Watkins committed
115
    mergedDF <- cbind.data.frame(site, description = siteDesc, siteLocs, stringsAsFactors = FALSE) 
116
117
118
119
  
  } else if (response == "ExceptionReport"){
    return(data.frame())
  } else {
David Watkins's avatar
David Watkins committed
120
    stop("Unrecognized response from the web service")
121
    return(data.frame())
122
123
124
  }
  return(mergedDF)
}
125

Laura A DeCicco's avatar
Laura A DeCicco committed
126
#' Parse the WaterML2 timeseries portion of a waterML2 file
127
#' 
David Watkins's avatar
David Watkins committed
128
129
130
131
#' Returns data frame columns of all information with each time series measurement;
#' Anything defined as a default, is returned as an attribute of that data frame.
#' 
#' @param input XML with only the wml2:MeasurementTimeseries node and children
Laura A DeCicco's avatar
Laura A DeCicco committed
132
133
134
135
136
#' @param asDateTime logical, if \code{TRUE} returns date and time as POSIXct, if \code{FALSE}, character
#' @param tz character to set timezone attribute of datetime. Default is an empty quote, which converts the 
#' datetimes to UTC (properly accounting for daylight savings times based on the data's provided time zone offset).
#' Possible values are "America/New_York","America/Chicago", "America/Denver","America/Los_Angeles",
#' "America/Anchorage","America/Honolulu","America/Jamaica","America/Managua","America/Phoenix", and "America/Metlakatla"
David Watkins's avatar
David Watkins committed
137
138
#' @importFrom xml2 xml_attr xml_find_all xml_text 
#' @importFrom dplyr mutate
139
#' @export
Laura A DeCicco's avatar
Laura A DeCicco committed
140
141
142
143
144
145
146
#' @examples 
#' baseURL <- "https://waterservices.usgs.gov/nwis/dv/?format=waterml,2.0"
#' URL <- paste(baseURL, "sites=01646500",
#'      "startDT=2014-09-01",
#'      "endDT=2014-09-08",
#'      "statCd=00003",
#'      "parameterCd=00060",sep="&")
147
#' \donttest{
Laura A DeCicco's avatar
Laura A DeCicco committed
148
149
150
151
152
153
154
155
156
#' timesereies <- importWaterML2(URL, asDateTime=TRUE, tz="UTC")
#' } 
importWaterML2 <- function(input, asDateTime=FALSE, tz="UTC") {
  
  returnedDoc <- check_if_xml(input)
  raw <- class(input) == 'raw'
  
  gmlID <- xml_attr(returnedDoc,"id") #TODO: make this an attribute
  TVP <- xml_find_all(returnedDoc, ".//wml2:MeasurementTVP")#time-value pairs
157
  if(length(TVP) == 0) { #empty nodes on some sites
David Watkins's avatar
David Watkins committed
158
159
    return(data.frame(site = character(0), source = character(0), date = character(0),
                      time = character(0), dateTime = character(0), value = numeric(0),
David Watkins's avatar
David Watkins committed
160
                      uom = character(0), comment = character(0), stringsAsFactors = FALSE))
161
  }
162
  rawTime <- xml_text(xml_find_all(returnedDoc, ".//wml2:MeasurementTVP/wml2:time"))
163
  
164
  valueNodes <- xml_find_all(returnedDoc, ".//wml2:MeasurementTVP/wml2:value")
David Watkins's avatar
David Watkins committed
165
166
167
168
  charValues <- xml_text(valueNodes)
  nilValues <- as.logical(xml_attr(valueNodes, "nil"))
  charValues[nilValues] <- NA
  values <- as.numeric(charValues)
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
  nVals <- length(values)
  
  #df of date, time, dateTime
  oneCol <- rep(NA, nVals) 
  timeDF <- data.frame(date=oneCol, time=oneCol, dateTime=oneCol)
  splitTime <- data.frame(matrix(unlist(strsplit(rawTime, "T")), nrow=nVals, byrow = TRUE), stringsAsFactors=FALSE)
  if(ncol(splitTime) > 1){ #some sites only have a date
    names(splitTime) <- c("date", "time")
  }else{
    names(splitTime) <- "date"
    splitTime <- mutate(splitTime, time = NA)
  }
  
  timeDF <- mutate(splitTime, dateTime = NA)
  logicVec <- nchar(rawTime) > 19
David Watkins's avatar
David Watkins committed
184
185
186
  if(!all(!logicVec)) { #otherwise sets it to char <NA>
    timeDF$dateTime[logicVec] <- rawTime[logicVec]
  }
187
  if(asDateTime){
Laura A DeCicco's avatar
Laura A DeCicco committed
188
    timeDF$dateTime <- lubridate::parse_date_time(timeDF$dateTime, c("%Y","%Y-%m-%d","%Y-%m-%dT%H:%M","%Y-%m-%dT%H:%M:%S",
189
190
                                                          "%Y-%m-%dT%H:%M:%OS","%Y-%m-%dT%H:%M:%OS%z"), exact = TRUE)
    #^^setting tz in as.POSIXct just sets the attribute, does not convert the time!
David Watkins's avatar
David Watkins committed
191
    attr(timeDF$dateTime, 'tzone') <- tz
192
193
194
  }
  
  uom <- xml_attr(valueNodes, "uom", default = NA)
David Watkins's avatar
David Watkins committed
195
  
David Watkins's avatar
David Watkins committed
196
197
198
199
200
201
202
  source <- xml_attr(xml_find_all(returnedDoc, 
                                  "./wml2:point/wml2:MeasurementTVP/wml2:metadata/wml2:source"), 
                     "title")
  comment <- xml_text(xml_find_all(returnedDoc, 
                  "./wml2:point/wml2:MeasurementTVP/wml2:metadata/wml2:comment"))
  tvpQuals <- xml_text(xml_find_all(returnedDoc, 
                        "./wml2:point/wml2:MeasurementTVP/wml2:metadata/swe:description"))
Laura A DeCicco's avatar
Laura A DeCicco committed
203
  defaultMeta <- xml_find_all(returnedDoc, ".//wml2:DefaultTVPMeasurementMetadata")
David Watkins's avatar
David Watkins committed
204
205
  defaultQuals <- xml_text(xml_find_all(defaultMeta, ".//swe:description"))
  defaultUOM <- xml_attr(xml_find_all(defaultMeta, ".//wml2:uom"), "title", default = NA)
David Watkins's avatar
David Watkins committed
206
207
208
 
  df_vars <- list(source = source, timeDF, value = values, 
                  uom = uom, comment = comment)
David Watkins's avatar
David Watkins committed
209
210
  df_use <- df_vars[sapply(df_vars, function(x){length(x) > 0 && !all(is.na(x))})]
  df <- data.frame(df_use, stringsAsFactors = FALSE)
David Watkins's avatar
David Watkins committed
211
  if(!"value" %in% names(df)) {df$value <- NA_real_}
David Watkins's avatar
David Watkins committed
212
213
  #from the default metadata section
  #append to existing attributes if they aren't empty
David Watkins's avatar
David Watkins committed
214
   mdAttribs <- list(defaultQualifier=defaultQuals, defaultUOM=defaultUOM, 
David Watkins's avatar
David Watkins committed
215
216
217
                    gmlID=gmlID) #all attributes must have names
  mdAttribs_use <- mdAttribs[sapply(mdAttribs, function(x){length(x) > 0})]
  attributes(df) <- append(attributes(df), mdAttribs_use)
218
219
  return(df)
}