diff --git a/DESCRIPTION b/DESCRIPTION index cecbbf355468bcb282a486c25b2baa53ca273bd5..bba26be039715a7486d58e240d05ebcc9d89affa 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -22,7 +22,7 @@ Imports: RCurl, reshape2, lubridate, - dplyr, + plyr, httr Suggests: xtable, diff --git a/NAMESPACE b/NAMESPACE index de3f9109e6dabace16f61a383e77b74d08023148..03d8ab66fec5236bea37004aa9e811efa88d2e1e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -26,5 +26,5 @@ import(RCurl) import(XML) import(httr) import(reshape2) -importFrom(dplyr,rbind_all) importFrom(lubridate,parse_date_time) +importFrom(plyr,rbind.fill.matrix) diff --git a/R/importWaterML2.r b/R/importWaterML2.r index 6ff20c31f6f8c3053683e99dcb88edf4235e651e..23a2ad961562dd5f8941af66fe8ac3f135c5e7f3 100644 --- a/R/importWaterML2.r +++ b/R/importWaterML2.r @@ -6,7 +6,7 @@ #' @return mergedDF a data frame containing columns agency, site, dateTime, values, and remark codes for all requested combinations #' @export #' @import XML -#' @importFrom dplyr rbind_all +#' @importFrom plyr rbind.fill.matrix #' @examples #' baseURL <- "http://waterservices.usgs.gov/nwis/dv/?format=waterml,2.0" #' URL <- paste(baseURL, "sites=01646500", @@ -14,13 +14,28 @@ #' "endDT=2014-09-08", #' "statCd=00003", #' "parameterCd=00060",sep="&") -#' \dontrun{dataReturned3 <- importWaterML2(URL)} -importWaterML2 <- function(obs_url){ +#' URL2 <- paste("http://cida.usgs.gov/noreast-sos/simple?request=GetObservation", +#' "featureID=MD-BC-BC-05", +#' "offering=RAW", +#' "observedProperty=WATER",sep="&") +#' \dontrun{ +#' dataReturned1 <- importWaterML2(URL) +#' dataReturn2 <- importWaterML2(URL2, TRUE) +#' URLmulti <- paste(baseURL, +#' "sites=04024430,04024000", +#' "startDT=2014-09-01", +#' "endDT=2014-09-08", +#' "statCd=00003", +#' "parameterCd=00060",sep="&") +#' dataReturnMulti <- importWaterML2(URLmulti) +#' } +importWaterML2 <- function(obs_url, asDateTime=FALSE){ h <- basicHeaderGatherer() doc = tryCatch({ returnedDoc <- getURL(obs_url, headerfunction = h$update) - if(h$value()["Content-Type"] == "text/xml;charset=UTF-8"){ + if(h$value()["Content-Type"] == "text/xml;charset=UTF-8" | + h$value()["Content-Type"] == "text/xml; subtype=gml/3.1.1;charset=UTF-8"){ xmlTreeParse(returnedDoc, getDTD = FALSE, useInternalNodes = TRUE) } else { message(paste("URL caused an error:", obs_url)) @@ -41,40 +56,74 @@ importWaterML2 <- function(obs_url){ ns <- xmlNamespaceDefinitions(doc, simplify = TRUE) - timeseries2 <- xpathApply(doc, "//wml2:MeasurementTimeseries/wml2:point", namespaces = ns) - xp <- xpathApply(doc, "//wml2:MeasurementTimeseries/wml2:point/wml2:MeasurementTVP", xpathSApply, ".//*[not(*)]", function(x) - setNames(ifelse(nzchar(xmlValue(x)), xmlValue(x), - ifelse("qualifier" == xmlName(x),xpathSApply(x,"./@xlink:title",namespaces = ns),"")), #originally I had the "" as xmlAttr(x) - xmlName(x)), namespaces = ns) - - DF2 <- do.call(rbind_all, lapply(xp, t)) - DF2 <- as.data.frame(DF2,stringsAsFactors=FALSE) - DF2$time <- gsub(":","",DF2$time) - DF2$time <- with(DF2, ifelse(nchar(time) > 18,as.POSIXct(strptime(time, format="%Y-%m-%dT%H%M%S%z")), - ifelse("Z" == substr(time,(nchar(time)),nchar(time)),as.POSIXct(strptime(time, format="%Y-%m-%dT%H%M%S",tz="GMT")), - as.POSIXct(strptime(time, format="%Y-%m-%dT%H%M%S",tz=""))))) + timeSeries <- xpathApply(doc, "//wml2:Collection", namespaces = ns) - DF2$time <- with(DF2, as.POSIXct(time,origin=as.POSIXct(strptime("1970-01-01 00:00:00", "%Y-%m-%d %H:%M:%S", tz="UTC")))) + for (i in 1:length(timeSeries)){ - DF2$value <- as.numeric(gsub("true","",DF2$value)) + chunk <- xmlDoc(timeSeries[[i]]) + chunk <- xmlRoot(chunk) + chunkNS <- xmlNamespaceDefinitions(chunk, simplify = TRUE) + + xp <- xpathApply(chunk, "//wml2:MeasurementTimeseries/wml2:point/wml2:MeasurementTVP", + xpathSApply, ".//*[not(*)]", + function(x) setNames(ifelse(nzchar(xmlValue(x)), + xmlValue(x), + ifelse("qualifier" == xmlName(x), + xpathSApply(x,"./@xlink:title",namespaces = ns),"")), #originally I had the "" as xmlAttr(x) + xmlName(x)), + namespaces = chunkNS) - # Very specific to USGS: - defaultQualifier <- as.character(xpathApply(doc, "//wml2:defaultPointMetadata/wml2:DefaultTVPMeasurementMetadata/wml2:qualifier/@xlink:title",namespaces = ns)) + if(length(xpathApply(doc, + "//wml2:MeasurementTimeseries/wml2:point/wml2:MeasurementTVP/wml2:metadata/wml2:TVPMeasurementMetadata", + xmlValue, namespaces = ns)) != 0){ + xp <- xp[-1] + } + + DF2 <- do.call(rbind.fill.matrix, lapply(xp, t)) + DF2 <- as.data.frame(DF2,stringsAsFactors=FALSE) + + if(asDateTime){ + + DF2$time <- gsub(":","",DF2$time) + DF2$time <- ifelse(nchar(DF2$time) > 18, + as.POSIXct(DF2$time, format="%Y-%m-%dT%H%M%S%z",tz="UTC"), + as.POSIXct(DF2$time, format="%Y-%m-%dT%H%M%S",tz="UTC")) + + DF2$time <- as.POSIXct(DF2$time, origin = "1970-01-01", tz="UTC") + } else { + DF2$time <- as.Date(DF2$time) + } - if (length(defaultQualifier) == 0 && (typeof(defaultQualifier) == "character")) { - defaultQualifier <- "NA" - } + DF2$value <- as.numeric(DF2$value) + # Very specific to USGS: + defaultQualifier <- as.character(xpathApply(chunk, "//wml2:defaultPointMetadata/wml2:DefaultTVPMeasurementMetadata/wml2:qualifier/@xlink:title",namespaces = chunkNS)) + + if (length(defaultQualifier) == 0 && (typeof(defaultQualifier) == "character")) { + defaultQualifier <- "NA" + } + + if("qualifier" %in% names(DF2)){ + DF2$qualifier <- ifelse(defaultQualifier != DF2$qualifier,DF2$qualifier,defaultQualifier) + } else { + DF2$qualifier <- rep(defaultQualifier,nrow(DF2)) + } + + + DF2$qualifier <- ifelse("Provisional data subject to revision." == DF2$qualifier, "P", + ifelse("Approved for publication. Processing and review completed." == DF2$qualifier, "A", DF2$qualifier)) + - if("qualifier" %in% names(DF2)){ - DF2$qualifier <- ifelse(defaultQualifier != DF2$qualifier,DF2$qualifier,defaultQualifier) - } else { - DF2$qualifier <- rep(defaultQualifier,nrow(DF2)) + id <- as.character(xpathApply(chunk, "//gml:identifier", xmlValue, namespaces = chunkNS)) + DF2$identifier <- rep(id, nrow(DF2)) + + if (1 == i ){ + mergedDF <- DF2 + } else { + similarNames <- intersect(names(mergedDF), names(DF2)) + mergedDF <- merge(mergedDF, DF2,by=similarNames,all=TRUE) + } } - - - DF2$qualifier <- ifelse("Provisional data subject to revision." == DF2$qualifier, "P", - ifelse("Approved for publication. Processing and review completed." == DF2$qualifier, "A", DF2$qualifier)) - - return (DF2) + + return (mergedDF) } diff --git a/man/importWaterML2.Rd b/man/importWaterML2.Rd index 262dd6fd3fe33edb5edfd1209689f6a1769e1e43..8ff8f4381103d1c47ef1d691d0a9f1fff6d8fa4f 100644 --- a/man/importWaterML2.Rd +++ b/man/importWaterML2.Rd @@ -3,7 +3,7 @@ \alias{importWaterML2} \title{Function to return data from the WaterML2 data} \usage{ -importWaterML2(obs_url) +importWaterML2(obs_url, asDateTime = FALSE) } \arguments{ \item{obs_url}{string containing the url for the retrieval} @@ -21,6 +21,20 @@ URL <- paste(baseURL, "sites=01646500", "endDT=2014-09-08", "statCd=00003", "parameterCd=00060",sep="&") -\dontrun{dataReturned3 <- importWaterML2(URL)} +URL2 <- paste("http://cida.usgs.gov/noreast-sos/simple?request=GetObservation", + "featureID=MD-BC-BC-05", + "offering=RAW", + "observedProperty=WATER",sep="&") +\dontrun{ +dataReturned1 <- importWaterML2(URL) +dataReturn2 <- importWaterML2(URL2, TRUE) +URLmulti <- paste(baseURL, + "sites=04024430,04024000", + "startDT=2014-09-01", + "endDT=2014-09-08", + "statCd=00003", + "parameterCd=00060",sep="&") +dataReturnMulti <- importWaterML2(URLmulti) +} }