importNGWMN_wml2.R 5.78 KB
Newer Older
David Watkins's avatar
David Watkins committed
1
#' Function to return data from the National Ground Water Monitoring Network waterML2 format
2
3
4
#'
#' This function accepts a url parameter for a WaterML2 getObservation. This function is still under development,
#' but the general functionality is correct.
David Watkins's avatar
David Watkins committed
5
#' 
6
7
8
#' @param input character or raw, containing the url for the retrieval or a path to the data file, or raw XML.
#' @param asDateTime logical, if \code{TRUE} returns date and time as POSIXct, if \code{FALSE}, character
#' @param tz character to set timezone attribute of datetime. Default is an empty quote, which converts the 
David Watkins's avatar
David Watkins committed
9
#' datetimes to UTC (properly accounting for daylight savings times based on the data's provided time zone offset).
10
11
#' Possible values to provide are "America/New_York","America/Chicago", "America/Denver","America/Los_Angeles",
#' "America/Anchorage","America/Honolulu","America/Jamaica","America/Managua","America/Phoenix", and "America/Metlakatla"
David Watkins's avatar
David Watkins committed
12
#' @return mergedDF a data frame source, time, value, uom, uomTitle, comment, gmlID
13
14
15
16
17
18
19
#' @export
#' @importFrom xml2 read_xml
#' @importFrom xml2 xml_find_all
#' @importFrom xml2 xml_text
#' @importFrom xml2 xml_attr
#' @importFrom lubridate parse_date_time
#' @examples
David Watkins's avatar
David Watkins committed
20
#' \dontrun{
21
#' url <- "http://cida.usgs.gov/ngwmn_cache/sos?request=GetObservation&service=SOS&version=2.0.0
22
23
#' &observedProperty=urn:ogc:def:property:OGC:GroundWaterLevel&responseFormat=text/xml&featureOf
#' Interest=VW_GWDP_GEOSERVER.USGS.403836085374401"
David Watkins's avatar
David Watkins committed
24
#' data <- importNGWMN_wml2(url)
David Watkins's avatar
David Watkins committed
25
#' 
26
#' url <- "http://cida.usgs.gov/ngwmn_cache/sos?request=GetObservation&service=SOS&version=2.0.0
27
28
#' &observedProperty=urn:ogc:def:property:OGC:GroundWaterLevel&responseFormat=text/xml&featureOf
#' Interest=VW_GWDP_GEOSERVER.USGS.474011117072901"
David Watkins's avatar
David Watkins committed
29
#' data <- importNGWMN_wml2(url)
David Watkins's avatar
David Watkins committed
30
#' }
David Watkins's avatar
David Watkins committed
31
32
#' 
importNGWMN_wml2 <- function(input, asDateTime=FALSE, tz=""){
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
  if(tz != ""){
    tz <- match.arg(tz, c("America/New_York","America/Chicago",
                          "America/Denver","America/Los_Angeles",
                          "America/Anchorage","America/Honolulu",
                          "America/Jamaica","America/Managua",
                          "America/Phoenix","America/Metlakatla"))
  }else{tz = "UTC"}
  
  raw <- FALSE
  if(class(input) == "character" && file.exists(input)){
    returnedDoc <- read_xml(input)
  }else if(class(input) == 'raw'){
    returnedDoc <- read_xml(input)
    raw <- TRUE
  } else {
    returnedDoc <- xml_root(getWebServiceData(input, encoding='gzip'))
  }
  
David Watkins's avatar
David Watkins committed
51
52
53
54
55
56
57
58
59
60
61
  response <- xml_name(returnedDoc)
  if(response == "GetObservationResponse"){
    
    timeSeries <- xml_find_all(returnedDoc, "//wml2:MeasurementTimeseries") #each parameter/site combo
    
    if(0 == length(timeSeries)){
      df <- data.frame()
      if(!raw){
        attr(df, "url") <- input
      }
      return(df)
62
63
    }
    
David Watkins's avatar
David Watkins committed
64
    mergedDF <- NULL
65
    
David Watkins's avatar
David Watkins committed
66
67
68
    for(t in timeSeries){
      gmlID <- xml_attr(t,"id")
      TVP <- xml_find_all(t, ".//wml2:MeasurementTVP")#time-value pairs
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
      rawTime <- xml_text(xml_find_all(TVP,".//wml2:time"))
      
      valueNodes <- xml_find_all(TVP,".//wml2:value")
      values <- as.numeric(xml_text(valueNodes))
      nVals <- length(values)
      gmlID <- rep(gmlID, nVals)
      
      #df of date, time, dateTime
      oneCol <- rep(NA, nVals) 
      timeDF <- data.frame(date=oneCol, time=oneCol, dateTime=oneCol)
      splitTime <- data.frame(matrix(unlist(strsplit(rawTime, "T")), nrow=nVals, byrow = TRUE), stringsAsFactors=FALSE)
      names(splitTime) <- c("date", "time")
      timeDF <- mutate(splitTime, dateTime = NA)
      logicVec <- nchar(rawTime) > 19
      timeDF$dateTime[logicVec] <- rawTime[logicVec]
David Watkins's avatar
David Watkins committed
84
      if(asDateTime){
85
        timeDF$dateTime <- parse_date_time(timeDF$dateTime, c("%Y","%Y-%m-%d","%Y-%m-%dT%H:%M","%Y-%m-%dT%H:%M:%S",
David Watkins's avatar
David Watkins committed
86
87
88
89
                                        "%Y-%m-%dT%H:%M:%OS","%Y-%m-%dT%H:%M:%OS%z"), exact = TRUE)
        #^^setting tz in as.POSIXct just sets the attribute, does not convert the time!
        attr(time, 'tzone') <- tz 
      }
90
91
       
      
David Watkins's avatar
David Watkins committed
92
      
David Watkins's avatar
David Watkins committed
93
      uom <- xml_attr(valueNodes, "uom", default = NA)
David Watkins's avatar
David Watkins committed
94
      source <- xml_attr(xml_find_all(TVP, ".//wml2:source"), "title")
David Watkins's avatar
David Watkins committed
95
      comment <- xml_text(xml_find_all(TVP, ".//wml2:comment"))
David Watkins's avatar
David Watkins committed
96
      
97
      df <- cbind.data.frame(source, timeDF, value=values, uom, comment, gmlID,
David Watkins's avatar
David Watkins committed
98
99
100
101
102
103
104
                             stringsAsFactors=FALSE)
      if (is.null(mergedDF)){
        mergedDF <- df
      } else {
        similarNames <- intersect(colnames(mergedDF), colnames(df))
        mergedDF <- full_join(mergedDF, df, by=similarNames)
      }
105
    }
David Watkins's avatar
David Watkins committed
106
107
108
109
110
    
    if(!raw){
      url <- input
      attr(mergedDF, "url") <- url
    }
111
112
113
114
    mergedDF$date <- as.Date(mergedDF$date)
    nonDateCols <- grep("date",names(mergedDF), value=TRUE, invert = TRUE)
    
    mergedDF[nonDateCols][mergedDF[nonDateCols] == "" | mergedDF[nonDateCols]== -999999.0] <- NA
David Watkins's avatar
David Watkins committed
115
116
    attr(mergedDF, "gml:identifier") <- xml_text(xml_find_all(returnedDoc, ".//gml:identifier")) 
    attr(mergedDF, "generationDate") <- xml_text(xml_find_all(returnedDoc, ".//wml2:generationDate")) 
117
   
David Watkins's avatar
David Watkins committed
118
119
120
121
122
123
124
125
126
127
128
129
130
    
  }else if(response == "GetFeatureOfInterestResponse"){
    site <- xml_text(xml_find_all(returnedDoc,".//gml:identifier"))
    site <- substring(site, 8)
    
    #bandaid to work with only single site calls
    #TODO: need better solution when bbox is added
    siteDesc <- xml_text(xml_find_all(returnedDoc, ".//gml:description"))
    if(length(siteDesc) == 0){
      siteDesc <- NA
    }
    
    siteLocs <- strsplit(xml_text(xml_find_all(returnedDoc, ".//gml:pos")), " ")
131
    siteLocs <- data.frame(dec_lat_va=as.numeric(siteLocs[[1]][1]), dec_lon_va=as.numeric(siteLocs[[1]][2]), stringsAsFactors = FALSE)
David Watkins's avatar
David Watkins committed
132
133
    mergedDF <- cbind.data.frame(site, description = siteDesc, siteLocs, stringsAsFactors = FALSE) 
  }
David Watkins's avatar
David Watkins committed
134
135
  else{
    stop("Unrecognized response from the web service")
136
137
138
  }
  return(mergedDF)
}