importNGWMN_wml2.R 3.82 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
#' Function to return data from the WaterML2 data
#'
#' This function accepts a url parameter for a WaterML2 getObservation. This function is still under development,
#' but the general functionality is correct.
#'
#' @param input character or raw, containing the url for the retrieval or a path to the data file, or raw XML.
#' @param asDateTime logical, if \code{TRUE} returns date and time as POSIXct, if \code{FALSE}, character
#' @param tz character to set timezone attribute of datetime. Default is an empty quote, which converts the 
#' datetimes to UTC (properly accounting for daylight savings times based on the data's provided tz_cd column).
#' Possible values to provide are "America/New_York","America/Chicago", "America/Denver","America/Los_Angeles",
#' "America/Anchorage","America/Honolulu","America/Jamaica","America/Managua","America/Phoenix", and "America/Metlakatla"
#' @return mergedDF a data frame time, value, description, qualifier, and identifier
#' @export
#' @importFrom xml2 read_xml
#' @importFrom xml2 xml_find_all
#' @importFrom xml2 xml_text
#' @importFrom xml2 xml_attr
#' @importFrom lubridate parse_date_time
#' @examples
#' 
importNGMWN_wml2 <- function(input, asDateTime=FALSE, tz=""){
  
  if(tz != ""){
    tz <- match.arg(tz, c("America/New_York","America/Chicago",
                          "America/Denver","America/Los_Angeles",
                          "America/Anchorage","America/Honolulu",
                          "America/Jamaica","America/Managua",
                          "America/Phoenix","America/Metlakatla"))
  }else{tz = "UTC"}
  
  raw <- FALSE
  if(class(input) == "character" && file.exists(input)){
    returnedDoc <- read_xml(input)
  }else if(class(input) == 'raw'){
    returnedDoc <- read_xml(input)
    raw <- TRUE
  } else {
    returnedDoc <- xml_root(getWebServiceData(input, encoding='gzip'))
  }
  
  
  
  timeSeries <- xml_find_all(returnedDoc, "//wml2:MeasurementTimeseries") #each parameter/site combo
  
  if(0 == length(timeSeries)){
    df <- data.frame()
    if(!raw){
      attr(df, "url") <- input
    }
    return(df)
  }
  
  mergedDF <- NULL
  
  for(t in timeSeries){
    gmlID <- xml_attr(t,"id")
    uomTitle <- xml_attr(xml_find_all(t, ".//wml2:uom"), "title")
    TVP <- xml_find_all(t, ".//wml2:MeasurementTVP")#time-value pairs
    time <- xml_text(xml_find_all(TVP,".//wml2:time"))
    if(asDateTime){
      time <- parse_date_time(time, c("%Y","%Y-%m-%d","%Y-%m-%dT%H:%M","%Y-%m-%dT%H:%M:%S",
                                      "%Y-%m-%dT%H:%M:%OS","%Y-%m-%dT%H:%M:%OS%z"), exact = TRUE)
      #^^setting tz in as.POSIXct just sets the attribute, does not convert the time!
      attr(time, 'tzone') <- tz 
    }
    valueNodes <- xml_find_all(TVP,".//wml2:value")
    values <- as.numeric(xml_text(valueNodes))
    nVals <- length(values)
    gmlID <- rep(gmlID, nVals)
    uomTitle <- rep(uomTitle, nVals)
    
    uom <- removeBlanks(xml_attr(valueNodes, "uom", default = NA))
    source <- xml_attr(xml_find_all(TVP, ".//wml2:source"), "title")
    comment <- removeBlanks(xml_text(xml_find_all(TVP, ".//wml2:comment")))
    
    df <- cbind.data.frame(source, time, value=values, uom, uomTitle, comment, gmlID,
                           stringsAsFactors=FALSE)
    if (is.null(mergedDF)){
      mergedDF <- df
    } else {
      similarNames <- intersect(colnames(mergedDF), colnames(df))
      mergedDF <- full_join(mergedDF, df, by=similarNames)
    }
  }
  
  if(!raw){
    url <- input
    attr(mergedDF, "url") <- url
  }
  attr(mergedDF, "gml:identifier") <- xml_text(xml_find_all(returnedDoc, ".//gml:identifier")) 
  attr(mergedDF, "generationDate") <- xml_text(xml_find_all(returnedDoc, ".//wml2:generationDate")) 
  
  return(mergedDF)
  
}


#replace blank cells with NAs
removeBlanks <- function(input){
  input <- sapply(input, function(f){is.na(f)<-which(f == '');f})
  return(input)
}