importNGWMN_wml2.R 6.3 KB
Newer Older
David Watkins's avatar
David Watkins committed
1
#' Function to return data from the National Ground Water Monitoring Network waterML2 format
2
3
4
#'
#' This function accepts a url parameter for a WaterML2 getObservation. This function is still under development,
#' but the general functionality is correct.
David Watkins's avatar
David Watkins committed
5
#' 
6
7
8
#' @param input character or raw, containing the url for the retrieval or a path to the data file, or raw XML.
#' @param asDateTime logical, if \code{TRUE} returns date and time as POSIXct, if \code{FALSE}, character
#' @param tz character to set timezone attribute of datetime. Default is an empty quote, which converts the 
David Watkins's avatar
David Watkins committed
9
#' datetimes to UTC (properly accounting for daylight savings times based on the data's provided time zone offset).
10
11
#' Possible values to provide are "America/New_York","America/Chicago", "America/Denver","America/Los_Angeles",
#' "America/Anchorage","America/Honolulu","America/Jamaica","America/Managua","America/Phoenix", and "America/Metlakatla"
David Watkins's avatar
David Watkins committed
12
#' @return mergedDF a data frame source, time, value, uom, uomTitle, comment, gmlID
13
14
15
16
17
#' @export
#' @importFrom xml2 read_xml
#' @importFrom xml2 xml_find_all
#' @importFrom xml2 xml_text
#' @importFrom xml2 xml_attr
18
#' @importFrom xml2 xml_find_first
19
20
#' @importFrom lubridate parse_date_time
#' @examples
David Watkins's avatar
David Watkins committed
21
#' \dontrun{
22
#' url <- "http://cida.usgs.gov/ngwmn_cache/sos?request=GetObservation&service=SOS&version=2.0.0
23
24
#' &observedProperty=urn:ogc:def:property:OGC:GroundWaterLevel&responseFormat=text/xml&featureOf
#' Interest=VW_GWDP_GEOSERVER.USGS.403836085374401"
David Watkins's avatar
David Watkins committed
25
#' data <- importNGWMN_wml2(url)
David Watkins's avatar
David Watkins committed
26
#' 
27
#' url <- "http://cida.usgs.gov/ngwmn_cache/sos?request=GetObservation&service=SOS&version=2.0.0
28
29
#' &observedProperty=urn:ogc:def:property:OGC:GroundWaterLevel&responseFormat=text/xml&featureOf
#' Interest=VW_GWDP_GEOSERVER.USGS.474011117072901"
David Watkins's avatar
David Watkins committed
30
#' data <- importNGWMN_wml2(url)
David Watkins's avatar
David Watkins committed
31
#' }
David Watkins's avatar
David Watkins committed
32
#' 
33
34
#' 
#TODO: separate id and agency name, give also as separate dimensions
David Watkins's avatar
David Watkins committed
35
importNGWMN_wml2 <- function(input, asDateTime=FALSE, tz=""){
36
  if(tz != ""){
David Watkins's avatar
David Watkins committed
37
    tz <- match.arg(tz, OlsonNames())
38
39
40
41
42
43
44
45
46
47
48
49
  }else{tz = "UTC"}
  
  raw <- FALSE
  if(class(input) == "character" && file.exists(input)){
    returnedDoc <- read_xml(input)
  }else if(class(input) == 'raw'){
    returnedDoc <- read_xml(input)
    raw <- TRUE
  } else {
    returnedDoc <- xml_root(getWebServiceData(input, encoding='gzip'))
  }
  
David Watkins's avatar
David Watkins committed
50
51
52
53
54
55
56
57
58
59
60
  response <- xml_name(returnedDoc)
  if(response == "GetObservationResponse"){
    
    timeSeries <- xml_find_all(returnedDoc, "//wml2:MeasurementTimeseries") #each parameter/site combo
    
    if(0 == length(timeSeries)){
      df <- data.frame()
      if(!raw){
        attr(df, "url") <- input
      }
      return(df)
61
62
    }
    
David Watkins's avatar
David Watkins committed
63
    mergedDF <- NULL
64
    
David Watkins's avatar
David Watkins committed
65
    for(t in timeSeries){
66
      df <- parseWaterML2Timeseries(t, asDateTime)
67
      
David Watkins's avatar
David Watkins committed
68
69
70
71
72
73
      if (is.null(mergedDF)){
        mergedDF <- df
      } else {
        similarNames <- intersect(colnames(mergedDF), colnames(df))
        mergedDF <- full_join(mergedDF, df, by=similarNames)
      }
74
    }
David Watkins's avatar
David Watkins committed
75
76
77
78
79
    
    if(!raw){
      url <- input
      attr(mergedDF, "url") <- url
    }
David Watkins's avatar
David Watkins committed
80
81
82
    if(asDateTime){
      mergedDF$date <- as.Date(mergedDF$date)
    }
83
84
85
    nonDateCols <- grep("date",names(mergedDF), value=TRUE, invert = TRUE)
    
    mergedDF[nonDateCols][mergedDF[nonDateCols] == "" | mergedDF[nonDateCols]== -999999.0] <- NA
David Watkins's avatar
David Watkins committed
86
87
    attr(mergedDF, "gml:identifier") <- xml_text(xml_find_all(returnedDoc, ".//gml:identifier")) 
    attr(mergedDF, "generationDate") <- xml_text(xml_find_all(returnedDoc, ".//wml2:generationDate")) 
88
89
90
91
    meta <- xml_find_all(returnedDoc, ".//gmd:contact")
    attr(mergedDF, "contact") <- xml_attr(meta, "href")
    attr(mergedDF, "responsibleParty") <- xml_text(xml_find_all(meta, ".//gco:CharacterString"))
    
David Watkins's avatar
David Watkins committed
92
93
    
  }else if(response == "GetFeatureOfInterestResponse"){
94
95
    featureMembers <- xml_find_all(returnedDoc, ".//sos:featureMember")
    site <- xml_text(xml_find_all(featureMembers,".//gml:identifier"))
David Watkins's avatar
David Watkins committed
96
97
    site <- substring(site, 8)
    
98
99
    #some sites don't have a description
    siteDesc <- xml_text(xml_find_first(featureMembers, ".//gml:description"))
David Watkins's avatar
David Watkins committed
100
    
101
102
103
    siteLocs <- strsplit(xml_text(xml_find_all(featureMembers, ".//gml:pos")), " ")
    siteLocs <- data.frame(matrix(unlist(siteLocs), nrow=length(siteLocs), byrow=TRUE), stringsAsFactors = FALSE)
    names(siteLocs) <- c("dec_lat_va", "dec_lon_va")
104
105
    dec_lat_va <- "dplyr var"
    dec_lon_va <- "dplyr var"
106
    siteLocs <- mutate(siteLocs, dec_lat_va=as.numeric(dec_lat_va), dec_lon_va=as.numeric(dec_lon_va))
David Watkins's avatar
David Watkins committed
107
108
    mergedDF <- cbind.data.frame(site, description = siteDesc, siteLocs, stringsAsFactors = FALSE) 
  }
David Watkins's avatar
David Watkins committed
109
110
  else{
    stop("Unrecognized response from the web service")
111
112
113
  }
  return(mergedDF)
}
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157

#' parse the timeseries portion of a waterML2 file
#' @param input XML with only the wml2:MeasurementTimeseries node and children
#' 
#' @export
parseWaterML2Timeseries <- function(input, asDateTime) {
  gmlID <- xml_attr(input,"id")
  TVP <- xml_find_all(input, ".//wml2:MeasurementTVP")#time-value pairs
  rawTime <- xml_text(xml_find_all(TVP,".//wml2:time"))
  
  valueNodes <- xml_find_all(TVP,".//wml2:value")
  values <- as.numeric(xml_text(valueNodes))
  nVals <- length(values)
  gmlID <- rep(gmlID, nVals)
  
  #df of date, time, dateTime
  oneCol <- rep(NA, nVals) 
  timeDF <- data.frame(date=oneCol, time=oneCol, dateTime=oneCol)
  splitTime <- data.frame(matrix(unlist(strsplit(rawTime, "T")), nrow=nVals, byrow = TRUE), stringsAsFactors=FALSE)
  if(ncol(splitTime) > 1){ #some sites only have a date
    names(splitTime) <- c("date", "time")
  }else{
    names(splitTime) <- "date"
    splitTime <- mutate(splitTime, time = NA)
  }
  
  timeDF <- mutate(splitTime, dateTime = NA)
  logicVec <- nchar(rawTime) > 19
  timeDF$dateTime[logicVec] <- rawTime[logicVec]
  if(asDateTime){
    timeDF$dateTime <- parse_date_time(timeDF$dateTime, c("%Y","%Y-%m-%d","%Y-%m-%dT%H:%M","%Y-%m-%dT%H:%M:%S",
                                                          "%Y-%m-%dT%H:%M:%OS","%Y-%m-%dT%H:%M:%OS%z"), exact = TRUE)
    #^^setting tz in as.POSIXct just sets the attribute, does not convert the time!
    attr(time, 'tzone') <- tz
  }
  
  uom <- xml_attr(valueNodes, "uom", default = NA)
  source <- xml_attr(xml_find_all(TVP, ".//wml2:source"), "title")
  comment <- xml_text(xml_find_all(TVP, ".//wml2:comment"))
  
  df <- cbind.data.frame(source, timeDF, value=values, uom, comment, gmlID,
                         stringsAsFactors=FALSE)
  return(df)
}