diff --git a/DESCRIPTION b/DESCRIPTION index 99fc11f8774489e6aaa27fc7a6147b5c7903c058..b980422ed9696fc3d46349978d23e78aaa1b20dc 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -52,8 +52,11 @@ Collate: 'constructNWISURL.r' 'getDataAvailability.r' 'getMultipleParameterNames.r' + 'getWaterML1Data.r' + 'padVariable.r' Depends: - R (>= 2.15.0) + R (>= 2.15.0), + XML Imports: zoo Suggests: diff --git a/NAMESPACE b/NAMESPACE index 040e62c0f0a5b1ea634cdcc328aafd5409f8967e..db66c1f70f3e20743ef45607a28e0ba719337c2a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -6,10 +6,10 @@ export(dateFormatCheck) export(formatCheckDate) export(formatCheckParameterCd) export(formatCheckSiteNumber) +export(getDVData) export(getDailyDataFromFile) export(getDataAvailability) export(getDataFromFile) -export(getDVData) export(getMetaData) export(getMultipleParameterNames) export(getParameterInfo) @@ -23,7 +23,9 @@ export(getSampleData) export(getSampleDataFromFile) export(getSiteFileData) export(getWQPData) +export(getWaterML1Data) export(mergeReport) +export(padVariable) export(populateConcentrations) export(populateDaily) export(populateDateColumns) diff --git a/R/constructNWISURL.r b/R/constructNWISURL.r index 0e1eeb436893b783fa2faeb14830d956eedede0a..fed2e3615ef4cd597a9311fc1ab19cea87057c38 100644 --- a/R/constructNWISURL.r +++ b/R/constructNWISURL.r @@ -10,6 +10,7 @@ #' @param endDate string ending date for data retrieval in the form YYYY-MM-DD. #' @param statCd string or vector USGS statistic code only used for daily value service. This is usually 5 digits. Daily mean (00003) is the default. #' @param service string USGS service to call. Possible values are "dv" (daily values), "uv" (unit/instantaneous values), "qw" (water quality data), and "wqp" (water quality portal, which can include STORET). +#' @param interactive logical Option for interactive mode. If true, there is user interaction for error handling and data checks. #' @keywords data import USGS web service #' @return url string #' @export @@ -23,7 +24,7 @@ #' url_qw_single <- constructNWISURL(siteNumber,"34220",startDate,endDate,'qwdata') #' url_qw <- constructNWISURL(siteNumber,c('34247','30234','32104','34220'),startDate,endDate,'qwdata') #' url_wqp <- constructNWISURL(siteNumber,"34220",startDate,endDate,'wqp') -constructNWISURL <- function(siteNumber,parameterCd,startDate,endDate,service,statCd="00003"){ +constructNWISURL <- function(siteNumber,parameterCd,startDate,endDate,service,statCd="00003",interactive=FALSE){ startDate <- formatCheckDate(startDate, "StartDate", interactive=interactive) endDate <- formatCheckDate(endDate, "EndDate", interactive=interactive) @@ -112,7 +113,7 @@ constructNWISURL <- function(siteNumber,parameterCd,startDate,endDate,service,st baseURL <- paste("http://waterservices.usgs.gov/nwis/",service,sep="") - url <- paste(baseURL,"?site=",siteNumber, "&ParameterCd=",parameterCd, "&format=rdb,1.0", sep = "") + url <- paste(baseURL,"/?site=",siteNumber, "&ParameterCd=",parameterCd, "&format=waterml,1.1", sep = "") if("dv"==service) { if(length(statCd) > 1){ diff --git a/R/getWaterML1Data.r b/R/getWaterML1Data.r new file mode 100644 index 0000000000000000000000000000000000000000..651cf6b60e3941d2358b4ef4a54c57930b1a34b7 --- /dev/null +++ b/R/getWaterML1Data.r @@ -0,0 +1,89 @@ +#' Function to return data from the NWISWeb WaterML1.1 daily values service +#' +#' This function accepts a url parameter that already contains the desired +#' NWIS site, parameter code, statistic, startdate and enddate. It returns a +#' data frame containing "Date" and "Discharge" +#' +#' @param obs_url string containing the url for the retrieval +#' @return Daily a data frame containing columns 'Date' and 'Discharge' +#' @export +#' @examples +#' url <- "http://waterservices.usgs.gov/nwis/dv/?format=waterml,1.1&sites=" +#' sites <- "02177000" +#' startDate <- "2012-09-01" +#' endDate <- "2012-10-01" +#' offering <- '00003' +#' property <- '00060' +#' obs_url <- constructNWISURL(sites,property,startDate,endDate,'dv') +#' data <- getWaterML1Data(obs_url) +#' urlMulti <- constructNWISURL("04085427",c("00060","00010"),startDate,endDate,'dv',statCd=c("00003","00001")) +#' multiData <- getWaterML1Data(urlMulti) +getWaterML1Data <- function(obs_url){ + + # This is more elegent, but requires yet another package dependency RCurl +# content <- getURLContent(obs_url,.opts=list(timeout.ms=500000)) +# test <- capture.output(tryCatch(xmlTreeParse(content, getDTD=FALSE, useInternalNodes=TRUE),"XMLParserErrorList" = function(e) {cat("incomplete",e$message)})) +# while (length(grep("<?xml",test))==0) { +# content <- getURLContent(obs_url,.opts=list(timeout.ms=500000)) +# test <- capture.output(tryCatch(xmlTreeParse(content, getDTD=FALSE, useInternalNodes=TRUE),"XMLParserErrorList" = function(e) {cat("incomplete",e$message)})) +# } +# doc <- htmlTreeParse(content, getDTD=TRUE, useInternalNodes=TRUE) + require(XML) + doc <- htmlParse(obs_url) + + timeSeries <- getNodeSet(doc, "//timeseries") + + for (i in 1:length(timeSeries)){ + + chunk <- xmlDoc(timeSeries[[i]]) + chunk <- xmlRoot(chunk) + pCode <-as.character(xpathApply(chunk, "variable/variablecode", xmlValue)) + statCd <- as.character(xpathApply(chunk, "variable/options/option/@optioncode")) + + + valuesIndex <- as.numeric(which("values" == names(chunk))) + + for (j in valuesIndex){ + subChunk <- xmlRoot(xmlDoc(chunk[[j]])) + methodID <- as.character(xpathSApply(subChunk, "method/@methodid")) + + methodID <- padVariable(methodID,2) + + value <- as.numeric(xpathSApply(subChunk, "value", xmlValue)) + dateTime <- strptime(xpathSApply(subChunk, "value/@datetime"),"%Y-%m-%dT%H:%M:%S.000") + qualifier <- as.character(xpathSApply(subChunk, "value/@qualifiers")) + + valueName <- paste(methodID,pCode,statCd,sep="_") + qualName <- paste(methodID,pCode,statCd,"cd",sep="_") + valueName <- paste("X",valueName,sep="") + qualName <- paste("X",qualName,sep="") + + assign(valueName,value) + assign(qualName,qualifier) + + df <- data.frame(dateTime, + get(valueName), + get(qualName) + ) + names(df) <- c("dateTime",valueName,qualName) + + if (1 == i & valuesIndex[1] == j){ + mergedDF <- df + } else { + mergedDF <- merge(mergedDF, df,by="dateTime",all=TRUE) + } + } + } + + agencyCd <- as.character(xpathSApply(timeSeries[[1]], "sourceinfo/sitecode/@agencycode")) + siteNo <- as.character(xpathSApply(timeSeries[[1]], "sourceinfo/sitecode", xmlValue)) + + mergedDF$agency <- rep(agencyCd, nrow(mergedDF)) + mergedDF$site <- rep(siteNo, nrow(mergedDF)) + + reorder <- c(ncol(mergedDF)-1, ncol(mergedDF), 1:(ncol(mergedDF)-2)) + + mergedDF <- mergedDF[,reorder] + + return (mergedDF) +} \ No newline at end of file