importWaterML1.r 14.9 KB
Newer Older
1
#' Function to return data from the NWISWeb WaterML1.1 service
2
3
#'
#' This function accepts a url parameter that already contains the desired
Laura A DeCicco's avatar
Laura A DeCicco committed
4
#' NWIS site, parameter code, statistic, startdate and enddate. 
5
#'
6
#' @param obs_url character or raw, containing the url for the retrieval or a file path to the data file, or raw XML.
Laura A DeCicco's avatar
Laura A DeCicco committed
7
#' @param asDateTime logical, if \code{TRUE} returns date and time as POSIXct, if \code{FALSE}, Date
8
9
#' @param tz character to set timezone attribute of . Default is an empty quote, which converts the 
#' s to UTC (properly accounting for daylight savings times based on the data's provided tz_cd column).
10
11
#' Possible values to provide are "America/New_York","America/Chicago", "America/Denver","America/Los_Angeles",
#' "America/Anchorage","America/Honolulu","America/Jamaica","America/Managua","America/Phoenix", and "America/Metlakatla"
12
13
14
#' @return A data frame with the following columns:
#' \tabular{lll}{
#' Name \tab Type \tab Description \cr
15
16
#' agency_cd \tab character \tab The NWIS code for the agency reporting the data\cr
#' site_no \tab character \tab The USGS site number \cr
17
#'  \tab POSIXct \tab The date and time of the value converted to UTC (if asDateTime = TRUE), \cr 
18
#' \tab character \tab or raw character string (if asDateTime = FALSE) \cr
19
#' tz_cd \tab character \tab The time zone code for  \cr
20
21
22
23
24
25
26
27
28
29
#' code \tab character \tab Any codes that qualify the corresponding value\cr
#' value \tab numeric \tab The numeric value for the parameter \cr
#' }
#' Note that code and value are repeated for the parameters requested. The names are of the form 
#' X_D_P_S, where X is literal, 
#' D is an option description of the parameter, 
#' P is the parameter code, 
#' and S is the statistic code (if applicable).
#' 
#' There are also several useful attributes attached to the data frame:
Laura A DeCicco's avatar
Laura A DeCicco committed
30
#' \tabular{lll}{
31
32
33
34
35
36
37
38
39
#' Name \tab Type \tab Description \cr
#' url \tab character \tab The url used to generate the data \cr
#' siteInfo \tab data.frame \tab A data frame containing information on the requested sites \cr
#' variableInfo \tab data.frame \tab A data frame containing information on the requested parameters \cr
#' statisticInfo \tab data.frame \tab A data frame containing information on the requested statistics on the data \cr
#' queryTime \tab POSIXct \tab The time the data was returned \cr
#' }
#' 
#' @seealso \code{\link{renameNWISColumns}}
40
#' @export
41
42
#' @import utils
#' @import stats
43
44
#' @importFrom lubridate parse_date_time
#' @importFrom dplyr full_join
David Watkins's avatar
David Watkins committed
45
#' @importFrom dplyr bind_rows
David Watkins's avatar
David Watkins committed
46
#' @importFrom dplyr arrange
David Watkins's avatar
David Watkins committed
47
48
49
50
51
52
53
#' @importFrom xml2 read_xml
#' @importFrom xml2 xml_find_all
#' @importFrom xml2 xml_children
#' @importFrom xml2 xml_name
#' @importFrom xml2 xml_text
#' @importFrom xml2 xml_attrs
#' @importFrom xml2 xml_attr
David Watkins's avatar
David Watkins committed
54
#' @importFrom xml2 xml_root
55
#' @examples
56
#' siteNumber <- "02177000"
57
58
59
60
#' startDate <- "2012-09-01"
#' endDate <- "2012-10-01"
#' offering <- '00003'
#' property <- '00060'
61
#' obs_url <- constructNWISURL(siteNumber,property,startDate,endDate,'dv')
62
#' \dontrun{
63
#' data <- importWaterML1(obs_url, asDateTime=TRUE)
Laura A DeCicco's avatar
Laura A DeCicco committed
64
#' 
65
66
67
68
#' groundWaterSite <- "431049071324301"
#' startGW <- "2013-10-01"
#' endGW <- "2014-06-30"
#' groundwaterExampleURL <- constructNWISURL(groundWaterSite, NA,
Laura A DeCicco's avatar
Laura A DeCicco committed
69
#'           startGW,endGW, service="gwlevels")
Laura A DeCicco's avatar
Laura A DeCicco committed
70
#' groundWater <- importWaterML1(groundwaterExampleURL)
Laura A DeCicco's avatar
Laura A DeCicco committed
71
#' groundWater2 <- importWaterML1(groundwaterExampleURL, asDateTime=TRUE)
Laura A DeCicco's avatar
Laura A DeCicco committed
72
#' 
Laura A DeCicco's avatar
Laura A DeCicco committed
73
#' unitDataURL <- constructNWISURL(siteNumber,property,
Laura A DeCicco's avatar
Laura A DeCicco committed
74
#'          "2013-11-03","2013-11-03",'uv')
75
#' unitData <- importWaterML1(unitDataURL,TRUE)
Laura A DeCicco's avatar
Laura A DeCicco committed
76
77
78
#' 
#' # Two sites, two pcodes, one site has two data descriptors:
#' siteNumber <- c('01480015',"04085427")
79
80
#' obs_url <- constructNWISURL(siteNumber,c("00060","00010"),startDate,endDate,'dv')
#' data <- importWaterML1(obs_url)
Laura A DeCicco's avatar
Laura A DeCicco committed
81
82
83
84
85
#' data$dateTime <- as.Date(data$dateTime)
#' data <- renameNWISColumns(data)
#' names(attributes(data))
#' attr(data, "url")
#' attr(data, "disclaimer")
86
87
88
89
90
91
92
93
#' 
#' inactiveSite <- "05212700"
#' inactiveSite <- constructNWISURL(inactiveSite, "00060", "2014-01-01", "2014-01-10",'dv')
#' inactiveSite <- importWaterML1(inactiveSite)
#' 
#' inactiveAndAcitive <- c("07334200","05212700")
#' inactiveAndAcitive <- constructNWISURL(inactiveAndAcitive, "00060", "2014-01-01", "2014-01-10",'dv')
#' inactiveAndAcitive <- importWaterML1(inactiveAndAcitive)
94
#' 
95
96
97
98
#' Timezone change with specified local timezone:
#' tzURL <- constructNWISURL("04027000", c("00300","63680"), "2011-11-05", "2011-11-07","uv")
#' tzIssue <- importWaterML1(tzURL, TRUE, "America/Chicago")
#'
David Watkins's avatar
David Watkins committed
99
100
101
102
103
#' #raw XML
#' url <- constructNWISURL(service = 'dv', siteNumber = '02319300', parameterCd = "00060", 
#'                          startDate = "2014-01-01", endDate = "2014-01-01")
#' raw <- content(GET(url), as = 'raw')
#' rawParsed <- importWaterML1(raw)
104
105
106
107
#' }
#' filePath <- system.file("extdata", package="dataRetrieval")
#' fileName <- "WaterML1Example.xml"
#' fullPath <- file.path(filePath, fileName)
108
#' importFile <- importWaterML1(fullPath,TRUE)
109
#'
David Watkins's avatar
David Watkins committed
110

111
112
importWaterML1 <- function(obs_url,asDateTime=FALSE, tz=""){
  #note: obs_url is a dated name, does not have to be a url/path
David Watkins's avatar
David Watkins committed
113
  raw <- FALSE
114
115
116
117
  if(class(obs_url) == "character" && file.exists(obs_url)){
    returnedDoc <- read_xml(obs_url)
  }else if(class(obs_url) == 'raw'){
    returnedDoc <- read_xml(obs_url)
David Watkins's avatar
David Watkins committed
118
    raw <- TRUE
David Watkins's avatar
David Watkins committed
119
  } else {
120
    returnedDoc <- xml_root(getWebServiceData(obs_url, encoding='gzip'))
David Watkins's avatar
David Watkins committed
121
122
  }
  
David Watkins's avatar
David Watkins committed
123
  if(tz != ""){  #check tz is valid if supplied
124
125
126
127
128
    tz <- match.arg(tz, c("America/New_York","America/Chicago",
                          "America/Denver","America/Los_Angeles",
                          "America/Anchorage","America/Honolulu",
                          "America/Jamaica","America/Managua",
                          "America/Phoenix","America/Metlakatla"))
David Watkins's avatar
David Watkins committed
129
  }else{tz <- "UTC"}
Laura A DeCicco's avatar
Laura A DeCicco committed
130
  
David Watkins's avatar
David Watkins committed
131
  timeSeries <- xml_find_all(returnedDoc, ".//ns1:timeSeries") #each parameter/site combo
Laura A DeCicco's avatar
Laura A DeCicco committed
132
  
David Watkins's avatar
David Watkins committed
133
134
135
136
137
138
139
  #some intial attributes
  queryNodes <- xml_children(xml_find_all(returnedDoc,".//ns1:queryInfo"))
  notes <- queryNodes[xml_name(queryNodes)=="note"]
  noteTitles <- xml_attrs(notes)
  noteText <- xml_text(notes)
  noteList <- as.list(noteText)
  names(noteList) <- noteTitles
140
  
141
  if(0 == length(timeSeries)){
142
    df <- data.frame()
David Watkins's avatar
David Watkins committed
143
    attr(df, "queryInfo") <- noteList
David Watkins's avatar
David Watkins committed
144
    if(!raw){
145
      attr(df, "url") <- obs_url
David Watkins's avatar
David Watkins committed
146
    }
147
    return(df)
148
149
  }
  
150
  mergedDF <- NULL
Laura A DeCicco's avatar
Laura A DeCicco committed
151
  
David Watkins's avatar
David Watkins committed
152
  for(t in timeSeries){
153
    #check if there are multiple time series (ie with different descriptors)
154
    #descriptor will be appended to col name if so
155
156
    valParents <- xml_find_all(t,".//ns1:values")
    obsDF <- NULL
Laura A DeCicco's avatar
TGFtest    
Laura A DeCicco committed
157
158
    useMethodDesc <- FALSE
    if(length(valParents) > 1){ useMethodDesc <- TRUE} #append the method description to colnames later
Laura A DeCicco's avatar
Laura A DeCicco committed
159
    
David Watkins's avatar
David Watkins committed
160
161
    sourceInfo <- xml_children(xml_find_all(t, ".//ns1:sourceInfo"))
    variable <- xml_children(xml_find_all(t, ".//ns1:variable"))
162
163
164
165
    agency_cd <- xml_attr(sourceInfo[xml_name(sourceInfo)=="siteCode"],"agencyCode")
    pCode <- xml_text(variable[xml_name(variable)=="variableCode"])
    statCode <- xml_attr(xml_find_all(variable,".//ns1:option"),"optionCode")
    
166
167
168
169
170
    #site info
    srsNode <- xml_find_all(sourceInfo,".//ns1:geogLocation")
    srs <- xml_attr(srsNode, 'srs')
    locNodes <- xml_children(srsNode)
    locNames <- xml_name(locNodes)
David Watkins's avatar
David Watkins committed
171
    locText <- as.numeric(xml_text(locNodes))  
172
173
174
175
176
177
178
179
180
    names(locText) <- sub("longitude","dec_lon_va",sub("latitude","dec_lat_va",locNames))
    sitePropNodes <- sourceInfo[xml_name(sourceInfo)=="siteProperty"]
    siteProp <- xml_text(sitePropNodes)
    names(siteProp) <- xml_attr(sitePropNodes, "name")
    tzInfo <- unlist(xml_attrs(xml_find_all(sourceInfo,"ns1:defaultTimeZone")))
    siteName <- xml_text(sourceInfo[xml_name(sourceInfo)=="siteName"])
    siteCodeNode <- sourceInfo[xml_name(sourceInfo)=="siteCode"]
    site_no <- xml_text(siteCodeNode)
    siteCodeAtt <- unlist(xml_attrs(siteCodeNode))
David Watkins's avatar
David Watkins committed
181
182
    siteDF <- cbind.data.frame(t(locText),t(tzInfo),station_nm=siteName,t(siteCodeAtt),srs,t(siteProp),
                               site_no,stringsAsFactors = FALSE)
183
184
    defaultTZ <- xml_attr(xml_find_all(sourceInfo,".//ns1:defaultTimeZone"),"zoneAbbreviation")
    
185
186
187
    for(v in valParents){
      obsColName <- paste(pCode,statCode,sep = "_")
      obs <- xml_find_all(v, ".//ns1:value")
Laura A DeCicco's avatar
TGFtest    
Laura A DeCicco committed
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
      values <- as.numeric(xml_text(obs))  #actual observations

      nObs <- length(values)
      qual <- xml_attr(obs,"qualifiers")
      if(all(is.na(qual))){
        noQual <- TRUE
      }else{noQual <- FALSE}
      
      dateTime <- xml_attr(obs,"dateTime")
      if(asDateTime){
        numChar <- nchar(dateTime)
        dateTime <- parse_date_time(dateTime, c("%Y","%Y-%m-%d","%Y-%m-%dT%H:%M",
                                                "%Y-%m-%dT%H:%M:%S","%Y-%m-%dT%H:%M:%OS",
                                                "%Y-%m-%dT%H:%M:%OS%z"), exact = TRUE)
        if(any(numChar < 20) & any(numChar > 16)){
          offsetLibrary <- data.frame(offset=c(5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10, 10, 0),
                                      code=c("EST","EDT","CST","CDT","MST","MDT","PST","PDT","AKST","AKDT","HAST","HST",""),
                                      stringsAsFactors = FALSE)
          
          #not sure there is still a case for this (no offset on times)?
          dateTime[numChar < 20 & numChar > 16] <- dateTime[numChar < 20 & numChar > 16] + offsetLibrary[offsetLibrary$code == defaultTZ,"offset"]*60*60
          warning(paste("site",site_no[1], "had data without time zone offsets, so DST could not be accounted for"))
        }                                        
211
        
Laura A DeCicco's avatar
TGFtest    
Laura A DeCicco committed
212
213
214
215
216
217
218
219
220
221
222
        #^^setting tz in as.POSIXct just sets the attribute, does not convert the time!
        attr(dateTime, 'tzone') <- tz 
        tzCol <- rep(tz,nObs)
      }else{
        tzCol <- rep(defaultTZ, nObs)
      }
      #create column names, addressing if methodDesc is needed
      if(useMethodDesc){
        methodDesc <- xml_text(xml_find_all(v, ".//ns1:methodDescription"))
        #this keeps column names consistent with old version
        methodDesc <- gsub("\\[|\\]| |\\(|\\)",".",methodDesc)
David Watkins's avatar
David Watkins committed
223
        
Laura A DeCicco's avatar
TGFtest    
Laura A DeCicco committed
224
225
226
        #sometimes methodDesc is empty
        if(nchar(methodDesc) > 0){
          obsColName <- paste("X",methodDesc,obsColName, sep = "_")
David Watkins's avatar
David Watkins committed
227
228
229
        }else{
          obsColName <- paste("X",obsColName, sep = "_")
        }
Laura A DeCicco's avatar
TGFtest    
Laura A DeCicco committed
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
      } else{
        obsColName <- paste("X",obsColName, sep = "_")
      }
      qualColName <- paste(obsColName,"cd",sep = "_")
      
      valParentDF <- cbind.data.frame(dateTime, values, qual, tzCol, stringsAsFactors = FALSE)
      names(valParentDF) <- c("dateTime",obsColName, qualColName, "tz_cd")
      #delete qual column if all NA
      if(all(is.na(valParentDF[,eval(qualColName)]))){
        valParentDF <- subset(valParentDF, select = c("dateTime", eval(obsColName), "tz_cd"))
      }
      if(nrow(valParentDF) > 0){
        if(is.null(obsDF)){
          obsDF <- valParentDF
        }else{
          obsDF <- full_join(obsDF, valParentDF, by = c("dateTime","tz_cd"))
David Watkins's avatar
David Watkins committed
246
        }
Laura A DeCicco's avatar
TGFtest    
Laura A DeCicco committed
247
248
249
250
      }else{
        #need column names for joining later
        # but don't overwrite:
        if(is.null(obsDF)){
Laura A DeCicco's avatar
Laura A DeCicco committed
251
252
253
254
255
          obsDF <- data.frame(dateTime=character(0), tz_cd=character(0), stringsAsFactors = FALSE)
          if(asDateTime){
            obsDF$dateTime <- as.POSIXct(obsDF$dateTime)
            attr(obsDF$dateTime, "tzone") <- tz
          }
David Watkins's avatar
David Watkins committed
256
        }
257
258
      }
    }
Laura A DeCicco's avatar
TGFtest    
Laura A DeCicco committed
259
    
David Watkins's avatar
David Watkins committed
260
    if(is.null(obsDF)){
David Watkins's avatar
David Watkins committed
261
      mergedSite <- data.frame()
David Watkins's avatar
David Watkins committed
262
263
      next
    }
264
    nObs <- nrow(obsDF)
David Watkins's avatar
David Watkins committed
265
266
267
268
269
270
    
    #statistic info
    options <- xml_find_all(variable,"ns1:option")
    stat <- options[xml_attr(options,"name")=="Statistic"]
    stat_nm <- xml_text(options[xml_attr(stat,"name")=="Statistic"])
    statCd <- xml_attr(stat, "optionCode")
David Watkins's avatar
David Watkins committed
271
    statDF <- cbind.data.frame(statisticCd=statCd,statisticName=stat_nm, stringsAsFactors = FALSE)
David Watkins's avatar
David Watkins committed
272
273
274
275
276
277
278
    
    #variable info
    varText <- as.data.frame(t(xml_text(variable)),stringsAsFactors = FALSE)
    varNames <- xml_name(variable) 
    varName <- sub("unit", "param_unit",varNames) #rename to stay consistent with orig importWaterMl1
    names(varText) <- varNames
    
David Watkins's avatar
David Watkins committed
279
280
    #replace no data vals with NA, change attribute df
    noDataVal <- as.numeric(varText$noDataValue)
David Watkins's avatar
David Watkins committed
281
    if(nObs > 0){
David Watkins's avatar
David Watkins committed
282
283
284
285
      obsDF[obsDF$values == noDataVal] <- NA
    }
    varText$noDataValue <- NA
    
286
    #rep site no & agency, combine into DF
287
288
289
    obsDFrows <- nrow(obsDF)
    df <- cbind.data.frame(agency_cd = rep(agency_cd,obsDFrows), site_no = rep(site_no,obsDFrows), 
                           obsDF, stringsAsFactors = FALSE)
David Watkins's avatar
David Watkins committed
290
    
David Watkins's avatar
David Watkins committed
291
292
293
294
295
296
297
    #join by site no 
    #append siteInfo, stat, and variable if they don't match a previous one
    if (is.null(mergedDF)){
      mergedDF <- df
      mergedSite <- siteDF
      mergedVar <- varText
      mergedStat <- statDF
298
    } else {
David Watkins's avatar
David Watkins committed
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
      if(nrow(df) > 0){
        #merge separately with any same site nos, then recombine
        sameSite <- mergedDF[mergedDF$site_no == site_no,]
        if(nrow(sameSite) > 0){
          diffSite <- mergedDF[mergedDF$site_no != site_no,]
          #first need to delete the obs and qual columns if they have already been filled with NA
          deleteCols <- grepl(obsColName,colnames(sameSite))
          sameSite <- sameSite[,!deleteCols]
          sameSite_simNames <- intersect(colnames(sameSite), colnames(df))
          sameSite <- full_join(sameSite, df, by = sameSite_simNames)
          sameSite <- sameSite[order(as.Date(sameSite$dateTime)),]
          mergedDF <- bind_rows(sameSite, diffSite)
        }else{
          similarNames <- intersect(colnames(mergedDF), colnames(df))
          mergedDF <- full_join(mergedDF, df, by=similarNames)
        }
      }
      mergedSite <- full_join(mergedSite, siteDF, by = colnames(mergedSite))
      mergedVar <- full_join(mergedVar, varText, by = colnames(mergedVar))
      mergedStat <- full_join(mergedStat, statDF, by = colnames(mergedStat))
319
    }
320
  }
David Watkins's avatar
David Watkins committed
321
322
323
324
325
326
327
328
329
330
  
  if(!is.null(mergedSite)){
    #keep attribute df names the same as old version
    names(mergedSite) <- c("dec_lat_va", "dec_lon_va", "timeZoneOffset", "timeZoneAbbreviation",
                           "station_nm","network","agency_cd","srs","siteTypeCd",
                           "hucCd", "stateCd", "countyCd", "site_no")
    mergedSite <- mergedSite[c("station_nm", "site_no", "agency_cd", "timeZoneOffset", 
                               "timeZoneAbbreviation", "dec_lat_va","dec_lon_va","srs","siteTypeCd",
                               "hucCd","stateCd","countyCd","network")]
  }
David Watkins's avatar
David Watkins committed
331
  
David Watkins's avatar
David Watkins committed
332
333
334
335
  #move tz column to far right and sort by increasing site number to be consistent with old version
  mergedNames <- names(mergedDF)
  tzLoc <- grep("tz_cd", names(mergedDF))
  mergedDF <- mergedDF[c(mergedNames[-tzLoc],mergedNames[tzLoc])]
David Watkins's avatar
David Watkins committed
336
  mergedDF <- arrange(mergedDF,site_no, dateTime)
Laura A DeCicco's avatar
Laura A DeCicco committed
337
  
David Watkins's avatar
David Watkins committed
338
  #attach other site info etc as attributes of mergedDF
David Watkins's avatar
David Watkins committed
339
  if(!raw){
340
    attr(mergedDF, "url") <- obs_url
David Watkins's avatar
David Watkins committed
341
  }
David Watkins's avatar
David Watkins committed
342
343
344
345
  attr(mergedDF, "siteInfo") <- mergedSite
  attr(mergedDF, "variableInfo") <- mergedVar
  attr(mergedDF, "disclaimer") <- noteText[noteTitles=="disclaimer"]
  attr(mergedDF, "statisticInfo") <- mergedStat
Laura A DeCicco's avatar
Laura A DeCicco committed
346
  attr(mergedDF, "queryTime") <- Sys.time()
David Watkins's avatar
David Watkins committed
347
  
348
  return (mergedDF)
David Watkins's avatar
David Watkins committed
349
}