diff --git a/DESCRIPTION b/DESCRIPTION index f7030f3af8a487fca8be8a7b4d25e7a5caa27b33..db24faf1fa1a3e4939cb31dc191b911b55bb1139 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: dataRetrieval Type: Package -Title: Retrieval functions for USGS data +Title: Retrieval functions for hydrologic data Version: 1.2.2 Date: 2012-12-31 Author: Robert M. Hirsch, Laura De Cicco @@ -55,6 +55,8 @@ Collate: 'padVariable.r' 'getRDB1Data.r' 'getSTORETSampleData.R' + 'getWaterML2Data.r' + 'renameColumns.R' Depends: R (>= 3.0) Imports: diff --git a/NAMESPACE b/NAMESPACE index b9206bbe640951f91f1c8c9b8733dd67dc99c519..a0effc82a4061ca822787666442b9667481b7b04 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -25,6 +25,7 @@ export(getSampleDataFromFile) export(getSiteFileData) export(getWQPData) export(getWaterML1Data) +export(getWaterML2Data) export(mergeReport) export(padVariable) export(populateConcentrations) @@ -35,6 +36,7 @@ export(populateSampleColumns) export(populateSiteINFO) export(processQWData) export(removeDuplicates) +export(renameColumns) export(retrieveNWISData) export(retrieveNWISqwData) export(retrieveUnitNWISData) diff --git a/R/getWaterML2Data.r b/R/getWaterML2Data.r index ca03fdda343995d6ffe10eef2016e6d5b6488bca..4baac2f8e055364f8d675dae8c0c703538fb6cde 100644 --- a/R/getWaterML2Data.r +++ b/R/getWaterML2Data.r @@ -21,43 +21,40 @@ getWaterML2Data <- function(obs_url){ ns <- xmlNamespaceDefinitions(doc, simplify = TRUE) - timeseries <- xpathApply(doc, "//wml2:MeasurementTimeseries/wml2:point/wml2:MeasurementTVP", namespaces = ns) - DF <- xmlToDataFrame(timeseries,stringsAsFactors=FALSE) - DF$time <- gsub(":","",DF$time) - - DF$time <- ifelse(nchar(DF$time) > 18,as.POSIXct(strptime(DF$time, format="%Y-%m-%dT%H%M%S%z")), - ifelse("Z" == substr(DF$time,(nchar(DF$time)),nchar(DF$time)),as.POSIXct(strptime(DF$time, format="%Y-%m-%dT%H%M%S",tz="GMT")), - as.POSIXct(strptime(DF$time, format="%Y-%m-%dT%H%M%S",tz="")))) - - DF$time <- as.POSIXct(DF$time,origin=as.POSIXct(strptime("1970-01-01 00:00:00", "%Y-%m-%d %H:%M:%S", tz="UTC"))) - DF$value<- as.numeric(DF$value) - + timeseries2 <- xpathApply(doc, "//wml2:MeasurementTimeseries/wml2:point", namespaces = ns) + + xp <- xpathApply(doc, "//wml2:MeasurementTimeseries/wml2:point/wml2:MeasurementTVP", xpathSApply, ".//*[not(*)]", function(x) + setNames(ifelse(nzchar(xmlValue(x)), xmlValue(x), + ifelse("qualifier" == xmlName(x),xpathSApply(x,"./@xlink:title",namespaces = ns),"")), #originally I had the "" as xmlAttr(x) + xmlName(x)), namespaces = ns) + library(plyr) + DF2 <- do.call(rbind.fill.matrix, lapply(xp, t)) + DF2 <- as.data.frame(DF2,stringsAsFactors=FALSE) + DF2$time <- gsub(":","",DF2$time) + DF2$time <- with(DF2, ifelse(nchar(time) > 18,as.POSIXct(strptime(time, format="%Y-%m-%dT%H%M%S%z")), + ifelse("Z" == substr(time,(nchar(time)),nchar(time)),as.POSIXct(strptime(time, format="%Y-%m-%dT%H%M%S",tz="GMT")), + as.POSIXct(strptime(time, format="%Y-%m-%dT%H%M%S",tz=""))))) + + DF2$time <- with(DF2, as.POSIXct(time,origin=as.POSIXct(strptime("1970-01-01 00:00:00", "%Y-%m-%d %H:%M:%S", tz="UTC")))) + + DF2$value <- as.numeric(gsub("true","",DF2$value)) + # Very specific to USGS: defaultQualifier <- as.character(xpathApply(doc, "//wml2:defaultPointMetadata/wml2:DefaultTVPMeasurementMetadata/wml2:qualifier/@xlink:title",namespaces = ns)) - - defaultQualifier <- ifelse("Provisional data subject to revision." == defaultQualifier, "P", - ifelse("Approved for publication. Processing and review completed." == defaultQualifier, "A", defaultQualifier)) - qualifier <- rep(defaultQualifier,nrow(DF)) - - realQual <- as.character(xpathSApply(doc,"//wml2:TVPMeasurementMetadata/wml2:qualifier/@xlink:title", namespaces = ns)) + if (length(defaultQualifier) == 0 && (typeof(defaultQualifier) == "character")) { + defaultQualifier <- "NA" + } - if (length(realQual) > 0){ - timeseriesSub <- xpathApply(doc, "//wml2:MeasurementTimeseries/wml2:point/wml2:MeasurementTVP/wml2:metadata", namespaces = ns) - DF2 <- xmlToDataFrame(timeseriesSub,stringsAsFactors=FALSE) + if("qualifier" %in% names(DF2)){ + DF2$qualifier <- ifelse(defaultQualifier != DF2$qualifier,DF2$qualifier,defaultQualifier) } else { - if (length(qualifier) > 0){ - DF$qualifier <- qualifier - } + DF2$qualifier <- rep(defaultQualifier,nrow(DF2)) } - timeseries2 <- xpathApply(doc, "//wml2:MeasurementTimeseries/wml2:point", namespaces = ns) - xp <- xpathApply(doc, "//wml2:MeasurementTimeseries/wml2:point/wml2:MeasurementTVP", xpathSApply, ".//*[not(*)]", function(x) - setNames(ifelse(nzchar(xmlValue(x)), xmlValue(x), - ifelse("qualifier" == xmlName(x),xpathSApply(x,"./@xlink:title",namespaces = ns),xmlAttrs(x))), - xmlName(x)), namespaces = ns) - library(plyr) - DF <- do.call(rbind.fill.matrix, lapply(xp, t)) - return (DF) + DF2$qualifier <- ifelse("Provisional data subject to revision." == DF2$qualifier, "P", + ifelse("Approved for publication. Processing and review completed." == DF2$qualifier, "A", DF2$qualifier)) + + return (DF2) } diff --git a/R/renameColumns.R b/R/renameColumns.R new file mode 100644 index 0000000000000000000000000000000000000000..27f98997080e9465bbfcfc6e348af9b14ba09a69 --- /dev/null +++ b/R/renameColumns.R @@ -0,0 +1,68 @@ +#' renameColumns +#' +#' Rename columns coming back from NWIS data retrievals +#' +#' @param rawData dataframe returned from retrieval functions +#' @keywords data import USGS web service +#' @return rawData dataframe with improved column names +#' @export +#' @examples +#' # These examples require an internet connection to run +#' siteNumber <- '05114000' +#' ParameterCd <- c('00060','00065') +#' StartDate <- as.character(Sys.Date()) +#' EndDate <- as.character(Sys.Date()) +#' # These examples require an internet connection to run +#' rawData <- retrieveUnitNWISData(siteNumber,ParameterCd,StartDate,EndDate,interactive=FALSE) +#' rawData <- renameColumns(rawData) +#' rawData2 <- retrieveNWISData(siteNumber,c("00010","00060","00300"),"2001-01-01","2002-01-01",StatCd=c("00001","00003"),interactive=FALSE) +#' rawData2 <- renameColumns(rawData2) +#' site <- '04027000' +#' pCodes <- c("00010","00060","00095","00300","00400","63680") +#' rawData3 <- retrieveUnitNWISData(site,pCodes,StartDate,EndDate,interactive=FALSE) +#' rawData3 <- renameColumns(rawData3) +renameColumns <- function(rawData){ + + columnNames <- names(rawData) + + dataCols <- columnNames["X" == substring(columnNames, 1, 1)] + dataCol_cds <- dataCols["cd" == substring(dataCols, nchar(dataCols)-1, nchar(dataCols))] + dataCol_names <- dataCols[!(dataCols %in% dataCol_cds)] + + pCodes <- sapply(strsplit(dataCol_names, "_"), function(x) x[2]) + statCd <- sapply(strsplit(dataCol_names, "_"), function(x) x[3]) + + pcodeINFO <- getParameterInfo(pCodes,interactive=FALSE) + multipleCodes <- anyDuplicated(pCodes) + + statCd <- sub("00001", "_Max", statCd) + statCd <- sub("00002", "_Min", statCd) + statCd <- sub("00003", "", statCd) # Leave mean blank + statCd <- sub("00011", "", statCd) # Also leaving blank + + DDnum <- sapply(strsplit(dataCol_names, "_"), function(x) x[1]) + DDnum <- gsub("X","",DDnum) + + if (!any(duplicated(pCodes))){ + dataColNames <- pcodeINFO$srsname[which(pcodeINFO$parameter_cd %in% pCodes)] + dataColNames <- paste(dataColNames,statCd,sep="") + } else { + dataColNames <- rep(NA,length(dataCol_names)) + for (i in 1:length(dataCol_names)){ + dataColNames[i] <- pcodeINFO$srsname[which(pcodeINFO$parameter_cd %in% pCodes[i])] + if((!(pCodes[i] %in% duplicated(pCodes))) && (pCodes[i] != pCodes[anyDuplicated(pCodes)])){ + dataColNames[i] <- paste(dataColNames[i],statCd[i],sep="") + } else { + dataColNames[i] <- paste(dataColNames[i],statCd[i],"_",DDnum[i],sep="") + } + + } + + } + dataColCDS <- paste(dataColNames, "_cd") + columnNames[which(columnNames %in% dataCol_names)] <- dataColNames + columnNames[which(columnNames %in% dataCol_cds)] <- dataColCDS + names(rawData) <- columnNames + + return(rawData) +} diff --git a/man/getWaterML2Data.Rd b/man/getWaterML2Data.Rd new file mode 100644 index 0000000000000000000000000000000000000000..3e7611bd1d4516cc91bb914fb33339def82b452d --- /dev/null +++ b/man/getWaterML2Data.Rd @@ -0,0 +1,24 @@ +\name{getWaterML2Data} +\alias{getWaterML2Data} +\title{Function to return data from the WaterML2 data} +\usage{ + getWaterML2Data(obs_url) +} +\arguments{ + \item{obs_url}{string containing the url for the + retrieval} +} +\value{ + mergedDF a data frame containing columns agency, site, + dateTime, values, and remark codes for all requested + combinations +} +\description{ + This function accepts a url parameter for a WaterML2 + getObservation +} +\examples{ +url <- "http://webvastage6.er.usgs.gov/ogc-swie/wml2/uv/sos?request=GetObservation&featureID=01446500&observedProperty=00065&offering=UNIT&beginPosition=2013-08-20" +dataReturned <- getWaterML2Data(urlMulti) +} + diff --git a/man/renameColumns.Rd b/man/renameColumns.Rd new file mode 100644 index 0000000000000000000000000000000000000000..1f68f8287b74f95468fbcb9c74310e07a0d7cbb0 --- /dev/null +++ b/man/renameColumns.Rd @@ -0,0 +1,38 @@ +\name{renameColumns} +\alias{renameColumns} +\title{renameColumns} +\usage{ + renameColumns(rawData) +} +\arguments{ + \item{rawData}{dataframe returned from retrieval + functions} +} +\value{ + rawData dataframe with improved column names +} +\description{ + Rename columns coming back from NWIS data retrievals +} +\examples{ +# These examples require an internet connection to run +siteNumber <- '05114000' +ParameterCd <- c('00060','00065') +StartDate <- as.character(Sys.Date()) +EndDate <- as.character(Sys.Date()) +# These examples require an internet connection to run +rawData <- retrieveUnitNWISData(siteNumber,ParameterCd,StartDate,EndDate,interactive=FALSE) +rawData <- renameColumns(rawData) +rawData2 <- retrieveNWISData(siteNumber,c("00010","00060","00300"),"2001-01-01","2002-01-01",StatCd=c("00001","00003"),interactive=FALSE) +rawData2 <- renameColumns(rawData2) +site <- '04027000' +pCodes <- c("00010","00060","00095","00300","00400","63680") +rawData3 <- retrieveUnitNWISData(site,pCodes,StartDate,EndDate,interactive=FALSE) +rawData3 <- renameColumns(rawData3) +} +\keyword{USGS} +\keyword{data} +\keyword{import} +\keyword{service} +\keyword{web} +