diff --git a/DESCRIPTION b/DESCRIPTION index f566b42c673a5cfa96b3a46338ab526c69cf6d91..cd5153936e7834e1ffe6936af8609d9a4266f59c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -54,6 +54,7 @@ Collate: 'getMultipleParameterNames.r' 'getWaterML1Data.r' 'padVariable.r' + 'getRDB1Data.r' Depends: R (>= 2.15.0), XML, diff --git a/NAMESPACE b/NAMESPACE index db66c1f70f3e20743ef45607a28e0ba719337c2a..35c26203cfff925cc0361a8ce0e9d8701fc96eb8 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -6,10 +6,10 @@ export(dateFormatCheck) export(formatCheckDate) export(formatCheckParameterCd) export(formatCheckSiteNumber) -export(getDVData) export(getDailyDataFromFile) export(getDataAvailability) export(getDataFromFile) +export(getDVData) export(getMetaData) export(getMultipleParameterNames) export(getParameterInfo) @@ -19,11 +19,12 @@ export(getPreLoadedSampleData) export(getQWData) export(getQWDataFromFile) export(getRawQWData) +export(getRDB1Data) export(getSampleData) export(getSampleDataFromFile) export(getSiteFileData) -export(getWQPData) export(getWaterML1Data) +export(getWQPData) export(mergeReport) export(padVariable) export(populateConcentrations) diff --git a/R/constructNWISURL.r b/R/constructNWISURL.r index c0c7f5c2473f22961ab305817e8a031ab575edc7..028e0541bfb1e0e713a413921b35e91cab5dad61 100644 --- a/R/constructNWISURL.r +++ b/R/constructNWISURL.r @@ -27,6 +27,7 @@ #' url_qw_single <- constructNWISURL(siteNumber,"01075",startDate,endDate,'qw') #' url_qw <- constructNWISURL(siteNumber,c('01075','00029','00453'),startDate,endDate,'qw') #' url_wqp <- constructNWISURL(siteNumber,c('01075','00029','00453'),startDate,endDate,'wqp') +#' url_daily_tsv <- constructNWISURL(siteNumber,pCode,startDate,endDate,'dv',statCd=c("00003","00001"),format="tsv") constructNWISURL <- function(siteNumber,parameterCd,startDate,endDate,service,statCd="00003", format="xml",interactive=FALSE){ startDate <- formatCheckDate(startDate, "StartDate", interactive=interactive) @@ -113,23 +114,32 @@ constructNWISURL <- function(siteNumber,parameterCd,startDate,endDate,service,st } if ("uv"==service) service <- "iv" + + if ("xml"==format){ + format <- "waterml,1.1" + } else if ("tsv" == format){ + format <- "rdb,1.0" + } else { + warning("non-supported format requested, please choose xml or tsv") + } baseURL <- paste("http://waterservices.usgs.gov/nwis/",service,sep="") - url <- paste(baseURL,"/?site=",siteNumber, "&ParameterCd=",parameterCd, "&format=waterml,1.1", sep = "") + url <- paste(baseURL,"/?site=",siteNumber, "&ParameterCd=",parameterCd, "&format=", format, sep = "") if("dv"==service) { if(length(statCd) > 1){ statCd <- paste(statCd, collapse=",") } - url <- paste(url, "&StatCd=", statCd, sep = "") } if (nzchar(startDate)) { url <- paste(url,"&startDT=",startDate,sep="") } else { - url <- paste(url,"&startDT=","1900-01-01",sep="") + startorgin <- "1850-01-01" + if ("uv" == service) startorgin <- "1900-01-01" + url <- paste(url,"&startDT=",startorgin,sep="") } if (nzchar(endDate)) { diff --git a/R/getRDB1Data.r b/R/getRDB1Data.r new file mode 100644 index 0000000000000000000000000000000000000000..6b94831b313997a84e32da05f319c02d614a85af --- /dev/null +++ b/R/getRDB1Data.r @@ -0,0 +1,45 @@ +#' Function to return data from the NWIS RDB 1.0 format +#' +#' This function accepts a url parameter that already contains the desired +#' NWIS site, parameter code, statistic, startdate and enddate. +#' +#' @param obs_url string containing the url for the retrieval +#' @param asDateTime logical, if TRUE returns date and time as POSIXct, if FALSE, Date +#' @return data a data frame containing columns agency, site, dateTime, values, and remark codes for all requested combinations +#' @export +#' @examples +#' sites <- "02177000" +#' startDate <- "2012-09-01" +#' endDate <- "2012-10-01" +#' offering <- '00003' +#' property <- '00060' +#' obs_url <- constructNWISURL(sites,property,startDate,endDate,'dv',format='tsv') +#' data <- getRDB1Data(obs_url) +#' urlMulti <- constructNWISURL("04085427",c("00060","00010"),startDate,endDate,'dv',statCd=c("00003","00001"),'tsv') +#' multiData <- getRDB1Data(urlMulti) +getRDB1Data <- function(obs_url,asDateTime=FALSE){ + tmp <- read.delim( + obs_url, + header = TRUE, + quote="\"", + dec=".", + sep='\t', + colClasses=c('character'), + fill = TRUE, + comment.char="#") + + dataType <- tmp[1,] + data <- tmp[-1,] + + if (asDateTime){ + data[,regexpr('d$', dataType) > 0] <- as.POSIXct(strptime(data[,regexpr('d$', dataType) > 0], "%Y-%m-%d %H:%M")) + } else { + data[,regexpr('d$', dataType) > 0] <- as.Date(data[,regexpr('d$', dataType) > 0]) + } + + tempDF <- data[,which(regexpr('n$', dataType) > 0)] + tempDF <- suppressWarnings(sapply(tempDF, function(x) as.numeric(x))) + data[,which(regexpr('n$', dataType) > 0)] <- tempDF + row.names(data) <- NULL + return(data) +} \ No newline at end of file diff --git a/R/getWaterML1Data.r b/R/getWaterML1Data.r index 2c07b40b4171562a21539d59c6a5eab9bb9bf21c..9f79f295975ddc2b8e9a06ae93ecb143fdf544f4 100644 --- a/R/getWaterML1Data.r +++ b/R/getWaterML1Data.r @@ -19,7 +19,7 @@ #' multiData <- getWaterML1Data(urlMulti) getWaterML1Data <- function(obs_url){ - # This is more elegent, but requires yet another package dependency RCurl + # This is more elegent, but requires yet another package dependency RCurl...which I now require for wqp # content <- getURLContent(obs_url,.opts=list(timeout.ms=500000)) # test <- capture.output(tryCatch(xmlTreeParse(content, getDTD=FALSE, useInternalNodes=TRUE),"XMLParserErrorList" = function(e) {cat("incomplete",e$message)})) # while (length(grep("<?xml",test))==0) { diff --git a/R/retrieveNWISData.r b/R/retrieveNWISData.r index 3f2447b172509fa593bb5a7e9e1eb948e3c77ffc..d4ca7a970486f20ee9f05d2843a8428b6ea782d7 100644 --- a/R/retrieveNWISData.r +++ b/R/retrieveNWISData.r @@ -9,6 +9,9 @@ #' @param StartDate string starting date for data retrieval in the form YYYY-MM-DD. #' @param EndDate string ending date for data retrieval in the form YYYY-MM-DD. #' @param StatCd string USGS statistic code. This is usually 5 digits. Daily mean (00003) is the default. +#' @param format string, can be "tsv" or "xml", and is only applicable for daily and unit value requests. "tsv" returns results faster, but there is a possiblitiy that an incomplete file is returned without warning. XML is slower, +#' but will offer a warning if the file was incomplete (for example, if there was a momentary problem with the internet connection). It is possible to safely use the "tsv" option, +#' but the user must carefully check the results to see if the data returns matches what is expected. The default is therefore "xml". #' @param interactive logical Option for interactive mode. If true, there is user interaction for error handling and data checks. #' @keywords data import USGS web service #' @return data dataframe with agency, site, dateTime, value, and code columns @@ -21,12 +24,18 @@ #' pCode <- "00060" #' rawDailyQ <- retrieveNWISData(siteNumber,pCode, startDate, endDate) #' rawDailyTemperature <- retrieveNWISData(siteNumber,'00010', startDate, endDate, StatCd='00001',interactive=FALSE) +#' rawDailyTemperatureTSV <- retrieveNWISData(siteNumber,'00010', startDate, endDate, StatCd='00001',format="tsv",interactive=FALSE) #' rawDailyQAndTempMeanMax <- retrieveNWISData(siteNumber,c('00010','00060'), startDate, endDate, StatCd=c('00001','00003'), interactive=FALSE) -retrieveNWISData <- function (siteNumber,ParameterCd,StartDate,EndDate,StatCd="00003",interactive=TRUE){ +retrieveNWISData <- function (siteNumber,ParameterCd,StartDate,EndDate,StatCd="00003",format="xml",interactive=TRUE){ + + url <- constructNWISURL(siteNumber,ParameterCd,StartDate,EndDate,"dv",statCd=StatCd,format=format) + + if (format == "xml") { + data <- getWaterML1Data(url) + data$dateTime <- as.Date(data$dateTime) + } else { + data <- getRDB1Data(url,asDateTime=FALSE) + } - url <- constructNWISURL(siteNumber,ParameterCd,StartDate,EndDate,"dv",StatCd) - data <- getWaterML1Data(url) - data$dateTime <- as.Date(data$dateTime) - return (data) } \ No newline at end of file diff --git a/R/retrieveUnitNWISData.r b/R/retrieveUnitNWISData.r index 536fa14a45c4188a0ca3936ffcbb65711a53cee3..591e472f5f4fa025cc2267a05bcd6fdfd3efe196 100644 --- a/R/retrieveUnitNWISData.r +++ b/R/retrieveUnitNWISData.r @@ -9,6 +9,9 @@ #' @param StartDate string starting date for data retrieval in the form YYYY-MM-DD. #' @param EndDate string ending date for data retrieval in the form YYYY-MM-DD. #' @param interactive logical Option for interactive mode. If true, there is user interaction for error handling and data checks. +#' @param format string, can be "tsv" or "xml", and is only applicable for daily and unit value requests. "tsv" returns results faster, but there is a possiblitiy that an incomplete file is returned without warning. XML is slower, +#' but will offer a warning if the file was incomplete (for example, if there was a momentary problem with the internet connection). It is possible to safely use the "tsv" option, +#' but the user must carefully check the results to see if the data returns matches what is expected. The default is therefore "xml". #' @keywords data import USGS web service #' @return data dataframe with agency, site, dateTime, time zone, value, and code columns #' @export @@ -19,10 +22,15 @@ #' EndDate <- as.character(Sys.Date()) #' # These examples require an internet connection to run #' rawData <- retrieveUnitNWISData(siteNumber,ParameterCd,StartDate,EndDate,interactive=FALSE) -retrieveUnitNWISData <- function (siteNumber,ParameterCd,StartDate,EndDate,interactive=TRUE){ +#' rawData2 <- retrieveUnitNWISData(siteNumber,ParameterCd,StartDate,EndDate,"tsv",interactive=FALSE) +retrieveUnitNWISData <- function (siteNumber,ParameterCd,StartDate,EndDate,format="xml",interactive=TRUE){ - url <- constructNWISURL(siteNumber,ParameterCd,StartDate,EndDate,"uv") - data <- getWaterML1Data(url) + url <- constructNWISURL(siteNumber,ParameterCd,StartDate,EndDate,"uv",format=format) + if (format == "xml") { + data <- getWaterML1Data(url) + } else { + data <- getRDB1Data(url,asDateTime=TRUE) + } return (data) } \ No newline at end of file