From b49aed55050303dff90367778862e3215b1fbcd4 Mon Sep 17 00:00:00 2001
From: unknown <ldecicco@usgs.gov>
Date: Thu, 30 Oct 2014 12:05:54 -0500
Subject: [PATCH] Adding WaterML2 parser back in.

---
 DESCRIPTION           |   2 +-
 NAMESPACE             |   2 +-
 R/importWaterML2.r    | 115 ++++++++++++++++++++++++++++++------------
 man/importWaterML2.Rd |  18 ++++++-
 4 files changed, 100 insertions(+), 37 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index cecbbf35..bba26be0 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -22,7 +22,7 @@ Imports:
     RCurl,
     reshape2,
     lubridate,
-    dplyr,
+    plyr,
     httr
 Suggests:
     xtable,
diff --git a/NAMESPACE b/NAMESPACE
index de3f9109..03d8ab66 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -26,5 +26,5 @@ import(RCurl)
 import(XML)
 import(httr)
 import(reshape2)
-importFrom(dplyr,rbind_all)
 importFrom(lubridate,parse_date_time)
+importFrom(plyr,rbind.fill.matrix)
diff --git a/R/importWaterML2.r b/R/importWaterML2.r
index 6ff20c31..23a2ad96 100644
--- a/R/importWaterML2.r
+++ b/R/importWaterML2.r
@@ -6,7 +6,7 @@
 #' @return mergedDF a data frame containing columns agency, site, dateTime, values, and remark codes for all requested combinations
 #' @export
 #' @import XML
-#' @importFrom dplyr rbind_all
+#' @importFrom plyr rbind.fill.matrix
 #' @examples
 #' baseURL <- "http://waterservices.usgs.gov/nwis/dv/?format=waterml,2.0"
 #' URL <- paste(baseURL, "sites=01646500",
@@ -14,13 +14,28 @@
 #'      "endDT=2014-09-08",
 #'      "statCd=00003",
 #'      "parameterCd=00060",sep="&")
-#' \dontrun{dataReturned3 <- importWaterML2(URL)}
-importWaterML2 <- function(obs_url){
+#' URL2 <- paste("http://cida.usgs.gov/noreast-sos/simple?request=GetObservation",
+#'      "featureID=MD-BC-BC-05",
+#'      "offering=RAW",
+#'      "observedProperty=WATER",sep="&")
+#' \dontrun{
+#' dataReturned1 <- importWaterML2(URL)
+#' dataReturn2 <- importWaterML2(URL2, TRUE)
+#' URLmulti <-  paste(baseURL,
+#'   "sites=04024430,04024000",
+#'   "startDT=2014-09-01",
+#'   "endDT=2014-09-08",
+#'   "statCd=00003",
+#'   "parameterCd=00060",sep="&")
+#' dataReturnMulti <- importWaterML2(URLmulti)
+#' }
+importWaterML2 <- function(obs_url, asDateTime=FALSE){
   
   h <- basicHeaderGatherer()
   doc = tryCatch({
     returnedDoc <- getURL(obs_url, headerfunction = h$update)
-    if(h$value()["Content-Type"] == "text/xml;charset=UTF-8"){
+    if(h$value()["Content-Type"] == "text/xml;charset=UTF-8" | 
+         h$value()["Content-Type"] == "text/xml; subtype=gml/3.1.1;charset=UTF-8"){
       xmlTreeParse(returnedDoc, getDTD = FALSE, useInternalNodes = TRUE)
     } else {
       message(paste("URL caused an error:", obs_url))
@@ -41,40 +56,74 @@ importWaterML2 <- function(obs_url){
   
   ns <- xmlNamespaceDefinitions(doc, simplify = TRUE)  
   
-  timeseries2 <- xpathApply(doc, "//wml2:MeasurementTimeseries/wml2:point", namespaces = ns)
   
-  xp <- xpathApply(doc, "//wml2:MeasurementTimeseries/wml2:point/wml2:MeasurementTVP", xpathSApply, ".//*[not(*)]", function(x)
-    setNames(ifelse(nzchar(xmlValue(x)), xmlValue(x), 
-                    ifelse("qualifier" == xmlName(x),xpathSApply(x,"./@xlink:title",namespaces = ns),"")), #originally I had the "" as xmlAttr(x) 
-             xmlName(x)), namespaces = ns)
-
-  DF2 <- do.call(rbind_all, lapply(xp, t))
-  DF2 <- as.data.frame(DF2,stringsAsFactors=FALSE)
-  DF2$time <- gsub(":","",DF2$time)
-  DF2$time <- with(DF2, ifelse(nchar(time) > 18,as.POSIXct(strptime(time, format="%Y-%m-%dT%H%M%S%z")),
-                     ifelse("Z" == substr(time,(nchar(time)),nchar(time)),as.POSIXct(strptime(time, format="%Y-%m-%dT%H%M%S",tz="GMT")),
-                            as.POSIXct(strptime(time, format="%Y-%m-%dT%H%M%S",tz="")))))
+  timeSeries <- xpathApply(doc, "//wml2:Collection", namespaces = ns)
   
-  DF2$time <- with(DF2, as.POSIXct(time,origin=as.POSIXct(strptime("1970-01-01 00:00:00", "%Y-%m-%d %H:%M:%S", tz="UTC"))))
+  for (i in 1:length(timeSeries)){
   
-  DF2$value <- as.numeric(gsub("true","",DF2$value))
+    chunk <- xmlDoc(timeSeries[[i]])
+    chunk <- xmlRoot(chunk)
+    chunkNS <- xmlNamespaceDefinitions(chunk, simplify = TRUE)
+    
+    xp <- xpathApply(chunk, "//wml2:MeasurementTimeseries/wml2:point/wml2:MeasurementTVP", 
+                     xpathSApply, ".//*[not(*)]", 
+                     function(x) setNames(ifelse(nzchar(xmlValue(x)), 
+                                                 xmlValue(x), 
+                                                    ifelse("qualifier" == xmlName(x),
+                                                           xpathSApply(x,"./@xlink:title",namespaces = ns),"")), #originally I had the "" as xmlAttr(x) 
+                                                            xmlName(x)), 
+                     namespaces = chunkNS)
   
-  # Very specific to USGS:
-  defaultQualifier <- as.character(xpathApply(doc, "//wml2:defaultPointMetadata/wml2:DefaultTVPMeasurementMetadata/wml2:qualifier/@xlink:title",namespaces = ns))
+    if(length(xpathApply(doc, 
+                  "//wml2:MeasurementTimeseries/wml2:point/wml2:MeasurementTVP/wml2:metadata/wml2:TVPMeasurementMetadata", 
+                  xmlValue, namespaces = ns)) != 0){
+      xp <- xp[-1]
+    }
+      
+    DF2 <- do.call(rbind.fill.matrix, lapply(xp, t))
+    DF2 <- as.data.frame(DF2,stringsAsFactors=FALSE)
+    
+    if(asDateTime){
+    
+      DF2$time <- gsub(":","",DF2$time)
+      DF2$time <- ifelse(nchar(DF2$time) > 18,
+                                   as.POSIXct(DF2$time, format="%Y-%m-%dT%H%M%S%z",tz="UTC"),
+                                         as.POSIXct(DF2$time, format="%Y-%m-%dT%H%M%S",tz="UTC"))
+      
+      DF2$time <- as.POSIXct(DF2$time, origin = "1970-01-01", tz="UTC")
+    } else {
+      DF2$time <- as.Date(DF2$time)
+    }
   
-  if (length(defaultQualifier) == 0 && (typeof(defaultQualifier) == "character")) {
-    defaultQualifier <- "NA"
-  }
+    DF2$value <- as.numeric(DF2$value)
+    # Very specific to USGS:
+    defaultQualifier <- as.character(xpathApply(chunk, "//wml2:defaultPointMetadata/wml2:DefaultTVPMeasurementMetadata/wml2:qualifier/@xlink:title",namespaces = chunkNS))
+    
+    if (length(defaultQualifier) == 0 && (typeof(defaultQualifier) == "character")) {
+      defaultQualifier <- "NA"
+    }
+    
+    if("qualifier" %in% names(DF2)){
+      DF2$qualifier <- ifelse(defaultQualifier != DF2$qualifier,DF2$qualifier,defaultQualifier)
+    } else {
+      DF2$qualifier <- rep(defaultQualifier,nrow(DF2))
+    }
+    
+    
+    DF2$qualifier <- ifelse("Provisional data subject to revision." == DF2$qualifier, "P",
+                               ifelse("Approved for publication. Processing and review completed." == DF2$qualifier, "A", DF2$qualifier))
+    
   
-  if("qualifier" %in% names(DF2)){
-    DF2$qualifier <- ifelse(defaultQualifier != DF2$qualifier,DF2$qualifier,defaultQualifier)
-  } else {
-    DF2$qualifier <- rep(defaultQualifier,nrow(DF2))
+    id <- as.character(xpathApply(chunk, "//gml:identifier", xmlValue, namespaces = chunkNS))
+    DF2$identifier <- rep(id, nrow(DF2))
+    
+    if (1 == i ){
+      mergedDF <- DF2
+    } else {
+      similarNames <- intersect(names(mergedDF), names(DF2))
+      mergedDF <- merge(mergedDF, DF2,by=similarNames,all=TRUE)
+    }
   }
-  
-  
-  DF2$qualifier <- ifelse("Provisional data subject to revision." == DF2$qualifier, "P",
-                             ifelse("Approved for publication. Processing and review completed." == DF2$qualifier, "A", DF2$qualifier))
-  
-  return (DF2)
+
+  return (mergedDF)
 }
diff --git a/man/importWaterML2.Rd b/man/importWaterML2.Rd
index 262dd6fd..8ff8f438 100644
--- a/man/importWaterML2.Rd
+++ b/man/importWaterML2.Rd
@@ -3,7 +3,7 @@
 \alias{importWaterML2}
 \title{Function to return data from the WaterML2 data}
 \usage{
-importWaterML2(obs_url)
+importWaterML2(obs_url, asDateTime = FALSE)
 }
 \arguments{
 \item{obs_url}{string containing the url for the retrieval}
@@ -21,6 +21,20 @@ URL <- paste(baseURL, "sites=01646500",
      "endDT=2014-09-08",
      "statCd=00003",
      "parameterCd=00060",sep="&")
-\dontrun{dataReturned3 <- importWaterML2(URL)}
+URL2 <- paste("http://cida.usgs.gov/noreast-sos/simple?request=GetObservation",
+     "featureID=MD-BC-BC-05",
+     "offering=RAW",
+     "observedProperty=WATER",sep="&")
+\dontrun{
+dataReturned1 <- importWaterML2(URL)
+dataReturn2 <- importWaterML2(URL2, TRUE)
+URLmulti <-  paste(baseURL,
+  "sites=04024430,04024000",
+  "startDT=2014-09-01",
+  "endDT=2014-09-08",
+  "statCd=00003",
+  "parameterCd=00060",sep="&")
+dataReturnMulti <- importWaterML2(URLmulti)
+}
 }
 
-- 
GitLab