From 4755182d470a438aad5d2b9e5f43217e7ed141b1 Mon Sep 17 00:00:00 2001
From: unknown <ldecicco@usgs.gov>
Date: Wed, 22 Oct 2014 08:53:21 -0500
Subject: [PATCH] Initial commit, still needs work.

---
 R/getWaterML2Data.r | 80 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 80 insertions(+)
 create mode 100644 R/getWaterML2Data.r

diff --git a/R/getWaterML2Data.r b/R/getWaterML2Data.r
new file mode 100644
index 00000000..fd5bb469
--- /dev/null
+++ b/R/getWaterML2Data.r
@@ -0,0 +1,80 @@
+#' Function to return data from the WaterML2 data
+#'
+#' This function accepts a url parameter for a WaterML2 getObservation 
+#'
+#' @param obs_url string containing the url for the retrieval
+#' @return mergedDF a data frame containing columns agency, site, dateTime, values, and remark codes for all requested combinations
+#' @export
+#' @import XML
+#' @importFrom dplyr rbind_all
+#' @examples
+#' baseURL <- "http://waterservices.usgs.gov/nwis/dv/?format=waterml,2.0"
+#' URL <- paste(baseURL, "sites=01646500",
+#'      "startDT=2014-09-01",
+#'      "endDT=2014-09-08",
+#'      "statCd=00003",
+#'      "parameterCd=00060",sep="&")
+#' \dontrun{dataReturned3 <- getWaterML2Data(URL)}
+getWaterML2Data <- function(obs_url){
+  
+  h <- basicHeaderGatherer()
+  doc = tryCatch({
+    returnedDoc <- getURL(obs_url, headerfunction = h$update)
+    if(h$value()["Content-Type"] == "text/xml;charset=UTF-8"){
+      xmlTreeParse(returnedDoc, getDTD = FALSE, useInternalNodes = TRUE)
+    } else {
+      message(paste("URL caused an error:", obs_url))
+      message("Content-Type=",h$value()["Content-Type"])
+      return(NA)
+    }   
+    
+  }, warning = function(w) {
+    message(paste("URL caused a warning:", obs_url))
+    message(w)
+  }, error = function(e) {
+    message(paste("URL does not seem to exist:", obs_url))
+    message(e)
+    return(NA)
+  }) 
+  
+  doc <- xmlRoot(doc)
+  
+  ns <- xmlNamespaceDefinitions(doc, simplify = TRUE)  
+  
+  timeseries2 <- xpathApply(doc, "//wml2:MeasurementTimeseries/wml2:point", namespaces = ns)
+  
+  xp <- xpathApply(doc, "//wml2:MeasurementTimeseries/wml2:point/wml2:MeasurementTVP", xpathSApply, ".//*[not(*)]", function(x)
+    setNames(ifelse(nzchar(xmlValue(x)), xmlValue(x), 
+                    ifelse("qualifier" == xmlName(x),xpathSApply(x,"./@xlink:title",namespaces = ns),"")), #originally I had the "" as xmlAttr(x) 
+             xmlName(x)), namespaces = ns)
+
+  DF2 <- do.call(rbind_all, lapply(xp, t))
+  DF2 <- as.data.frame(DF2,stringsAsFactors=FALSE)
+  DF2$time <- gsub(":","",DF2$time)
+  DF2$time <- with(DF2, ifelse(nchar(time) > 18,as.POSIXct(strptime(time, format="%Y-%m-%dT%H%M%S%z")),
+                     ifelse("Z" == substr(time,(nchar(time)),nchar(time)),as.POSIXct(strptime(time, format="%Y-%m-%dT%H%M%S",tz="GMT")),
+                            as.POSIXct(strptime(time, format="%Y-%m-%dT%H%M%S",tz="")))))
+  
+  DF2$time <- with(DF2, as.POSIXct(time,origin=as.POSIXct(strptime("1970-01-01 00:00:00", "%Y-%m-%d %H:%M:%S", tz="UTC"))))
+  
+  DF2$value <- as.numeric(gsub("true","",DF2$value))
+  
+  # Very specific to USGS:
+  defaultQualifier <- as.character(xpathApply(doc, "//wml2:defaultPointMetadata/wml2:DefaultTVPMeasurementMetadata/wml2:qualifier/@xlink:title",namespaces = ns))
+  
+  if (length(defaultQualifier) == 0 && (typeof(defaultQualifier) == "character")) {
+    defaultQualifier <- "NA"
+  }
+  
+  if("qualifier" %in% names(DF2)){
+    DF2$qualifier <- ifelse(defaultQualifier != DF2$qualifier,DF2$qualifier,defaultQualifier)
+  } else {
+    DF2$qualifier <- rep(defaultQualifier,nrow(DF2))
+  }
+  
+  
+  DF2$qualifier <- ifelse("Provisional data subject to revision." == DF2$qualifier, "P",
+                             ifelse("Approved for publication. Processing and review completed." == DF2$qualifier, "A", DF2$qualifier))
+  
+  return (DF2)
+}
-- 
GitLab