diff --git a/R/constructNWISURL.r b/R/constructNWISURL.r index 36508f022e00e0bfed004b0af40a9e8fcc2acb3e..ca19cb455db2ecf0c02fd3a0c2c720c17f7abd98 100644 --- a/R/constructNWISURL.r +++ b/R/constructNWISURL.r @@ -16,7 +16,7 @@ #' @param format string, can be "tsv" or "xml", and is only applicable for daily and unit value requests. "tsv" returns results faster, but there is a possiblitiy that an incomplete file is returned without warning. XML is slower, #' but will offer a warning if the file was incomplete (for example, if there was a momentary problem with the internet connection). It is possible to safely use the "tsv" option, #' but the user must carefully check the results to see if the data returns matches what is expected. The default is therefore "xml". -#' @param expanded logical defaults to FALSE. If TRUE, retrieves additional information, only applicable for qw data. +#' @param expanded logical defaults to \code{TRUE}. If \code{TRUE}, retrieves additional information, only applicable for qw data. #' @param ratingType can be "base", "corr", or "exsa". Only applies to rating curve data. #' @keywords data import USGS web service #' @return url string @@ -43,7 +43,7 @@ #' url_meas <- constructNWISURL(siteNumber, service="meas") #' } constructNWISURL <- function(siteNumber,parameterCd="00060",startDate="",endDate="", - service,statCd="00003", format="xml",expanded=FALSE, + service,statCd="00003", format="xml",expanded=TRUE, ratingType="base"){ service <- match.arg(service, c("dv","uv","iv","qw","gwlevels","rating","peak","meas")) diff --git a/R/importRDB1.r b/R/importRDB1.r index c39db0e9e6557cbda8ff6cbb632a69a45da37d7c..b07f23280974214230353ffe7cdad811d911188c 100644 --- a/R/importRDB1.r +++ b/R/importRDB1.r @@ -6,20 +6,20 @@ #' recommended to use the RDB format for importing multi-site data. #' #' @param obs_url character containing the url for the retrieval -#' @param asDateTime logical, if TRUE returns date and time as POSIXct, if FALSE, Date -#' @param qw logical, if TRUE parses as water quality data (where dates/times are in start and end times) +#' @param asDateTime logical, if \code{TRUE} returns date and time as POSIXct, if \code{FALSE}, Date +#' @param qw logical, if \code{TRUE} parses as water quality data (where dates/times are in start and end times) #' @param tz character to set timezone attribute of datetime. Default is an empty quote, which converts the #' datetimes to UTC (properly accounting for daylight savings times based on the data's provided tz_cd column). #' Possible values to provide are "America/New_York","America/Chicago", "America/Denver","America/Los_Angeles", #' "America/Anchorage","America/Honolulu","America/Jamaica","America/Managua","America/Phoenix", and "America/Metlakatla" -#' @param convertType logical, defaults to TRUE. If TRUE, the function will convert the data to dates, datetimes, +#' @param convertType logical, defaults to \code{TRUE}. If \code{TRUE}, the function will convert the data to dates, datetimes, #' numerics based on a standard algorithm. If false, everything is returned as a character #' @return A data frame with the following columns: #' \tabular{lll}{ #' Name \tab Type \tab Description \cr #' agency_cd \tab character \tab The NWIS code for the agency reporting the data\cr #' site_no \tab character \tab The USGS site number \cr -#' datetime \tab POSIXct \tab The date and time of the value converted to UTC (if asDateTime = TRUE), \cr +#' datetime \tab POSIXct \tab The date and time of the value converted to UTC (if asDateTime = \code{TRUE}), \cr #' \tab character \tab or raw character string (if asDateTime = FALSE) \cr #' tz_cd \tab character \tab The time zone code for datetime \cr #' code \tab character \tab Any codes that qualify the corresponding value\cr diff --git a/R/importWaterML1.r b/R/importWaterML1.r index 673b70a6a50583cb6076d6af753762f89f544313..0bd52435127d66f6a9fb5716b125bfa9068de7f9 100644 --- a/R/importWaterML1.r +++ b/R/importWaterML1.r @@ -4,7 +4,7 @@ #' NWIS site, parameter code, statistic, startdate and enddate. #' #' @param obs_url character containing the url for the retrieval -#' @param asDateTime logical, if TRUE returns date and time as POSIXct, if FALSE, Date +#' @param asDateTime logical, if \code{TRUE} returns date and time as POSIXct, if \code{FALSE}, Date #' @param tz character to set timezone attribute of datetime. Default is an empty quote, which converts the #' datetimes to UTC (properly accounting for daylight savings times based on the data's provided tz_cd column). #' Possible values to provide are "America/New_York","America/Chicago", "America/Denver","America/Los_Angeles", diff --git a/R/importWaterML2.r b/R/importWaterML2.r index 8879d7710db62ec8bfb3f1a29ea8e550a1667248..3d4c5baf3bef2bd1ed90bee22629717afef1c976 100644 --- a/R/importWaterML2.r +++ b/R/importWaterML2.r @@ -4,7 +4,7 @@ #' but the general functionality is correct. #' #' @param obs_url character containing the url for the retrieval -#' @param asDateTime logical, if TRUE returns date and time as POSIXct, if FALSE, Date +#' @param asDateTime logical, if \code{TRUE} returns date and time as POSIXct, if \code{FALSE}, Date #' @param tz character to set timezone attribute of datetime. Default is an empty quote, which converts the #' datetimes to UTC (properly accounting for daylight savings times based on the data's provided tz_cd column). #' Possible values to provide are "America/New_York","America/Chicago", "America/Denver","America/Los_Angeles", @@ -42,7 +42,9 @@ #' } importWaterML2 <- function(obs_url, asDateTime=FALSE, tz=""){ - if(url.exists(obs_url)){ + if(file.exists(obs_url)){ + doc <- xmlTreeParse(obs_url, getDTD = FALSE, useInternalNodes = TRUE) + } else { doc = tryCatch({ h <- basicHeaderGatherer() returnedDoc <- getURL(obs_url, headerfunction = h$update) @@ -62,9 +64,7 @@ importWaterML2 <- function(obs_url, asDateTime=FALSE, tz=""){ message(paste("URL does not seem to exist:", obs_url)) message(e) return(NA) - }) - } else { - doc <- xmlTreeParse(obs_url, getDTD = FALSE, useInternalNodes = TRUE) + }) } if(tz != ""){ diff --git a/R/readNWISpCode.r b/R/readNWISpCode.r index 416fa19f759c3e52eef228193ce97612dfef87b7..24e8fcdfdbb11e34a6a100b99da2f3933eee4ee5 100644 --- a/R/readNWISpCode.r +++ b/R/readNWISpCode.r @@ -3,7 +3,7 @@ #' Imports data from NWIS about meaured parameter based on user-supplied parameter code. #' This function gets the data from here: \url{http://nwis.waterdata.usgs.gov/nwis/pmcodes} #' -#' @param parameterCd character of USGS parameter codes. This is usually an 5 digit number. +#' @param parameterCd character of USGS parameter codes (or multiple parameter codes). This is usually an 5 digit number. #' @keywords data import USGS web service #' @return parameterData data frame with all information from the USGS about the particular parameter. #' @@ -19,7 +19,6 @@ #' #' @export #' @examples -#' # These examples require an internet connection to run #' paramINFO <- readNWISpCode(c('01075','00060','00931')) readNWISpCode <- function(parameterCd){ diff --git a/R/readNWISqw.r b/R/readNWISqw.r index 9e6229a5e9839f39a7e52832096a2b43b547e013..20b86a2f154f4f01058e57b1b9cf3df3b3e809e8 100644 --- a/R/readNWISqw.r +++ b/R/readNWISqw.r @@ -5,16 +5,18 @@ #' A list of statistic codes can be found here: \url{http://nwis.waterdata.usgs.gov/nwis/help/?read_file=stat&format=table} #' #' @param siteNumbers character of USGS site numbers. This is usually an 8 digit number -#' @param pCodes character of USGS parameter code(s). This is usually an 5 digit number. +#' @param parameterCd character of USGS parameter code(s). This is usually an 5 digit number. #' @param startDate character starting date for data retrieval in the form YYYY-MM-DD. Default is "" which indicates #' retrieval for the earliest possible record. #' @param endDate character ending date for data retrieval in the form YYYY-MM-DD. Default is "" which indicates #' retrieval for the latest possible record. -#' @param expanded logical defaults to TRUE. If TRUE, retrieves additional information. Expanded data includes +#' @param expanded logical defaults to \code{TRUE}. If \code{TRUE}, retrieves additional information. Expanded data includes #' remark_cd (remark code), result_va (result value), val_qual_tx (result value qualifier code), meth_cd (method code), -#' dqi_cd (data-quality indicator code), rpt_lev_va (reporting level), and rpt_lev_cd (reporting level type). -#' @param reshape logical. Will reshape the data to a wide format if TRUE (default is FALSE). This is only -#' available for 'expanded' data. +#' dqi_cd (data-quality indicator code), rpt_lev_va (reporting level), and rpt_lev_cd (reporting level type). If \code{FALSE}, +#' only returns remark_cd (remark code) and result_va (result value). Expanded = \code{FALSE} will not give +#' sufficient information for unbiased statistical analysis. +#' @param reshape logical, reshape the data. If \code{TRUE}, then return a wide data frame with all water-quality in a single row for each sample. +#' If \code{FALSE} (default), then return a long data frame with each water-quality result in a single row. #' @param tz character to set timezone attribute of datetime. Default is an empty quote, which converts the #' datetimes to UTC (properly accounting for daylight savings times based on the data's provided tz_cd column). #' Possible values to provide are "America/New_York","America/Chicago", "America/Denver","America/Los_Angeles", @@ -38,6 +40,8 @@ #' url \tab character \tab The url used to generate the data \cr #' queryTime \tab POSIXct \tab The time the data was returned \cr #' comment \tab character \tab Header comments from the RDB file \cr +#' siteInfo \tab data.frame \tab A data frame containing information on the requested sites \cr +#' variableInfo \tab data.frame \tab A data frame containing information on the requested parameters \cr #' } #' @export #' @import reshape2 @@ -47,16 +51,16 @@ #' siteNumbers <- c('04024430','04024000') #' startDate <- '2010-01-01' #' endDate <- '' -#' pCodes <- c('34247','30234','32104','34220') +#' parameterCd <- c('34247','30234','32104','34220') #' -#' rawNWISqwData <- readNWISqw(siteNumbers,pCodes,startDate,endDate) -#' rawNWISqwDataReshaped <- readNWISqw(siteNumbers,pCodes, +#' rawNWISqwData <- readNWISqw(siteNumbers,parameterCd,startDate,endDate) +#' rawNWISqwDataReshaped <- readNWISqw(siteNumbers,parameterCd, #' startDate,endDate,reshape=TRUE) #' -readNWISqw <- function (siteNumbers,pCodes,startDate="",endDate="", +readNWISqw <- function (siteNumbers,parameterCd,startDate="",endDate="", expanded=TRUE,reshape=FALSE,tz=""){ - url <- constructNWISURL(siteNumbers,pCodes,startDate,endDate,"qw",expanded=expanded) + url <- constructNWISURL(siteNumbers,parameterCd,startDate,endDate,"qw",expanded=expanded) data <- importRDB1(url,asDateTime=TRUE, qw=TRUE, tz = tz) originalHeader <- comment(data) @@ -70,7 +74,7 @@ readNWISqw <- function (siteNumbers,pCodes,startDate="",endDate="", wideDF <- dcast(longDF, ... ~ variable + parm_cd ) wideDF[,grep("_va_",names(wideDF))] <- sapply(wideDF[,grep("_va_",names(wideDF))], function(x) as.numeric(x)) - groupByPCode <- as.vector(sapply(pCodes, function(x) grep(x, names(wideDF)) )) + groupByPCode <- as.vector(sapply(parameterCd, function(x) grep(x, names(wideDF)) )) data <- wideDF[,c(1:length(columnsToMelt)-1,groupByPCode)] comment(data) <- originalHeader @@ -81,7 +85,7 @@ readNWISqw <- function (siteNumbers,pCodes,startDate="",endDate="", } siteInfo <- readNWISsite(siteNumbers) - varInfo <- readNWISpCode(pCodes) + varInfo <- readNWISpCode(parameterCd) attr(data, "siteInfo") <- siteInfo attr(data, "variableInfo") <- varInfo diff --git a/R/readNWISsite.r b/R/readNWISsite.r index aa69f3dfad960fa897ce045a094faa438b4b477d..cd29868abd294ad8e2061d24b820f881eef0ef3b 100644 --- a/R/readNWISsite.r +++ b/R/readNWISsite.r @@ -2,7 +2,7 @@ #' #' Imports data from USGS site file site. This function gets data from here: \url{http://waterservices.usgs.gov/} #' -#' @param siteNumbers character USGS site number. This is usually an 8 digit number +#' @param siteNumbers character USGS site number (or multiple sites). This is usually an 8 digit number #' @keywords data import USGS web service #' @return A data frame with at least the following columns: #' \tabular{lll}{ diff --git a/R/readNWISunit.r b/R/readNWISunit.r index 5f27695e44837fa318f641d2c3bbcf0b533f6d20..99203ea4da63412cd43d57bb8f281d78446405a3 100644 --- a/R/readNWISunit.r +++ b/R/readNWISunit.r @@ -107,14 +107,14 @@ readNWISuv <- function (siteNumbers,parameterCd,startDate="",endDate="", tz=""){ readNWISpeak <- function (siteNumbers,startDate="",endDate=""){ # Doesn't seem to be a peak xml service - url <- constructNWISURL(siteNumber,NA,startDate,endDate,"peak") + url <- constructNWISURL(siteNumbers,NA,startDate,endDate,"peak") data <- importRDB1(url, asDateTime=FALSE) data$peak_dt <- as.Date(data$peak_dt) data$gage_ht <- as.numeric(data$gage_ht) - siteInfo <- readNWISsite(siteNumber) + siteInfo <- readNWISsite(siteNumbers) attr(data, "siteInfo") <- siteInfo attr(data, "variableInfo") <- NULL diff --git a/inst/doc/dataRetrieval.pdf b/inst/doc/dataRetrieval.pdf index 7a64272609718a7c11616097f9a2b1b33e2ecd77..b4c14374c26f784d7b7c4c382b1baf63407846dd 100644 Binary files a/inst/doc/dataRetrieval.pdf and b/inst/doc/dataRetrieval.pdf differ diff --git a/man/constructNWISURL.Rd b/man/constructNWISURL.Rd index 8f39fcb721a8f221bd45bf64e7a2b75f04baa904..100f4eaaafabd1fde6f67c4c4b1cf511122bd0d9 100644 --- a/man/constructNWISURL.Rd +++ b/man/constructNWISURL.Rd @@ -5,7 +5,7 @@ \usage{ constructNWISURL(siteNumber, parameterCd = "00060", startDate = "", endDate = "", service, statCd = "00003", format = "xml", - expanded = FALSE, ratingType = "base") + expanded = TRUE, ratingType = "base") } \arguments{ \item{siteNumber}{string or vector of strings USGS site number. This is usually an 8 digit number} @@ -27,7 +27,7 @@ retrieval for the latest possible record.} but will offer a warning if the file was incomplete (for example, if there was a momentary problem with the internet connection). It is possible to safely use the "tsv" option, but the user must carefully check the results to see if the data returns matches what is expected. The default is therefore "xml".} -\item{expanded}{logical defaults to FALSE. If TRUE, retrieves additional information, only applicable for qw data.} +\item{expanded}{logical defaults to \code{TRUE}. If \code{TRUE}, retrieves additional information, only applicable for qw data.} \item{ratingType}{can be "base", "corr", or "exsa". Only applies to rating curve data.} } diff --git a/man/importRDB1.Rd b/man/importRDB1.Rd index 0812b312ab375e7dc8e7b76aa0fb00fc195e1ed0..5726875b9c16a3449e29d1be8710a4b0679f8a3f 100644 --- a/man/importRDB1.Rd +++ b/man/importRDB1.Rd @@ -9,11 +9,11 @@ importRDB1(obs_url, asDateTime = FALSE, qw = FALSE, convertType = TRUE, \arguments{ \item{obs_url}{character containing the url for the retrieval} -\item{asDateTime}{logical, if TRUE returns date and time as POSIXct, if FALSE, Date} +\item{asDateTime}{logical, if \code{TRUE} returns date and time as POSIXct, if \code{FALSE}, Date} -\item{qw}{logical, if TRUE parses as water quality data (where dates/times are in start and end times)} +\item{qw}{logical, if \code{TRUE} parses as water quality data (where dates/times are in start and end times)} -\item{convertType}{logical, defaults to TRUE. If TRUE, the function will convert the data to dates, datetimes, +\item{convertType}{logical, defaults to \code{TRUE}. If \code{TRUE}, the function will convert the data to dates, datetimes, numerics based on a standard algorithm. If false, everything is returned as a character} \item{tz}{character to set timezone attribute of datetime. Default is an empty quote, which converts the @@ -27,7 +27,7 @@ A data frame with the following columns: Name \tab Type \tab Description \cr agency_cd \tab character \tab The NWIS code for the agency reporting the data\cr site_no \tab character \tab The USGS site number \cr -datetime \tab POSIXct \tab The date and time of the value converted to UTC (if asDateTime = TRUE), \cr +datetime \tab POSIXct \tab The date and time of the value converted to UTC (if asDateTime = \code{TRUE}), \cr \tab character \tab or raw character string (if asDateTime = FALSE) \cr tz_cd \tab character \tab The time zone code for datetime \cr code \tab character \tab Any codes that qualify the corresponding value\cr diff --git a/man/importWaterML1.Rd b/man/importWaterML1.Rd index 6ca6214eefdbb120be851aa291f56ce3077e60b2..8f4d2a59a1b24b73c9f4c9529c8bcaddbb0f487b 100644 --- a/man/importWaterML1.Rd +++ b/man/importWaterML1.Rd @@ -8,7 +8,7 @@ importWaterML1(obs_url, asDateTime = FALSE, tz = "") \arguments{ \item{obs_url}{character containing the url for the retrieval} -\item{asDateTime}{logical, if TRUE returns date and time as POSIXct, if FALSE, Date} +\item{asDateTime}{logical, if \code{TRUE} returns date and time as POSIXct, if \code{FALSE}, Date} \item{tz}{character to set timezone attribute of datetime. Default is an empty quote, which converts the datetimes to UTC (properly accounting for daylight savings times based on the data's provided tz_cd column). diff --git a/man/importWaterML2.Rd b/man/importWaterML2.Rd index 9fca77fbe3efab41824e2928eba59ec78901cee2..44c9bf6e8e925032c422b737b2db83c317dd0299 100644 --- a/man/importWaterML2.Rd +++ b/man/importWaterML2.Rd @@ -8,7 +8,7 @@ importWaterML2(obs_url, asDateTime = FALSE, tz = "") \arguments{ \item{obs_url}{character containing the url for the retrieval} -\item{asDateTime}{logical, if TRUE returns date and time as POSIXct, if FALSE, Date} +\item{asDateTime}{logical, if \code{TRUE} returns date and time as POSIXct, if \code{FALSE}, Date} \item{tz}{character to set timezone attribute of datetime. Default is an empty quote, which converts the datetimes to UTC (properly accounting for daylight savings times based on the data's provided tz_cd column). diff --git a/man/readNWISpCode.Rd b/man/readNWISpCode.Rd index 4025c6b5d7b45ae31c52b72be30f0e67a83b948c..a5c44f97cdbf437916d2b1396e2ac72d44cd9243 100644 --- a/man/readNWISpCode.Rd +++ b/man/readNWISpCode.Rd @@ -6,7 +6,7 @@ readNWISpCode(parameterCd) } \arguments{ -\item{parameterCd}{character of USGS parameter codes. This is usually an 5 digit number.} +\item{parameterCd}{character of USGS parameter codes (or multiple parameter codes). This is usually an 5 digit number.} } \value{ parameterData data frame with all information from the USGS about the particular parameter. @@ -26,7 +26,6 @@ Imports data from NWIS about meaured parameter based on user-supplied parameter This function gets the data from here: \url{http://nwis.waterdata.usgs.gov/nwis/pmcodes} } \examples{ -# These examples require an internet connection to run paramINFO <- readNWISpCode(c('01075','00060','00931')) } \keyword{USGS} diff --git a/man/readNWISqw.Rd b/man/readNWISqw.Rd index b63ee7b7dc196bbc588bd3af5bfb05522a4e1eb5..116e38cff996c4b1411865997ca554e490d01693 100644 --- a/man/readNWISqw.Rd +++ b/man/readNWISqw.Rd @@ -3,13 +3,13 @@ \alias{readNWISqw} \title{Raw Data Import for USGS NWIS QW Data} \usage{ -readNWISqw(siteNumbers, pCodes, startDate = "", endDate = "", +readNWISqw(siteNumbers, parameterCd, startDate = "", endDate = "", expanded = TRUE, reshape = FALSE, tz = "") } \arguments{ \item{siteNumbers}{character of USGS site numbers. This is usually an 8 digit number} -\item{pCodes}{character of USGS parameter code(s). This is usually an 5 digit number.} +\item{parameterCd}{character of USGS parameter code(s). This is usually an 5 digit number.} \item{startDate}{character starting date for data retrieval in the form YYYY-MM-DD. Default is "" which indicates retrieval for the earliest possible record.} @@ -17,12 +17,14 @@ retrieval for the earliest possible record.} \item{endDate}{character ending date for data retrieval in the form YYYY-MM-DD. Default is "" which indicates retrieval for the latest possible record.} -\item{expanded}{logical defaults to TRUE. If TRUE, retrieves additional information. Expanded data includes +\item{expanded}{logical defaults to \code{TRUE}. If \code{TRUE}, retrieves additional information. Expanded data includes remark_cd (remark code), result_va (result value), val_qual_tx (result value qualifier code), meth_cd (method code), -dqi_cd (data-quality indicator code), rpt_lev_va (reporting level), and rpt_lev_cd (reporting level type).} +dqi_cd (data-quality indicator code), rpt_lev_va (reporting level), and rpt_lev_cd (reporting level type). If \code{FALSE}, +only returns remark_cd (remark code) and result_va (result value). Expanded = \code{FALSE} will not give +sufficient information for unbiased statistical analysis.} -\item{reshape}{logical. Will reshape the data to a wide format if TRUE (default is FALSE). This is only -available for 'expanded' data.} +\item{reshape}{logical, reshape the data. If \code{TRUE}, then return a wide data frame with all water-quality in a single row for each sample. +If \code{FALSE} (default), then return a long data frame with each water-quality result in a single row.} \item{tz}{character to set timezone attribute of datetime. Default is an empty quote, which converts the datetimes to UTC (properly accounting for daylight savings times based on the data's provided tz_cd column). @@ -48,6 +50,8 @@ Name \tab Type \tab Description \cr url \tab character \tab The url used to generate the data \cr queryTime \tab POSIXct \tab The time the data was returned \cr comment \tab character \tab Header comments from the RDB file \cr +siteInfo \tab data.frame \tab A data frame containing information on the requested sites \cr +variableInfo \tab data.frame \tab A data frame containing information on the requested parameters \cr } } \description{ @@ -59,10 +63,10 @@ A list of statistic codes can be found here: \url{http://nwis.waterdata.usgs.gov siteNumbers <- c('04024430','04024000') startDate <- '2010-01-01' endDate <- '' -pCodes <- c('34247','30234','32104','34220') +parameterCd <- c('34247','30234','32104','34220') -rawNWISqwData <- readNWISqw(siteNumbers,pCodes,startDate,endDate) -rawNWISqwDataReshaped <- readNWISqw(siteNumbers,pCodes, +rawNWISqwData <- readNWISqw(siteNumbers,parameterCd,startDate,endDate) +rawNWISqwDataReshaped <- readNWISqw(siteNumbers,parameterCd, startDate,endDate,reshape=TRUE) } \seealso{ diff --git a/man/readNWISsite.Rd b/man/readNWISsite.Rd index e740573e6ba7fc41f473c77037dc5ff425e011a4..8802bcd84bc2256931467ad5312a53c9c59009df 100644 --- a/man/readNWISsite.Rd +++ b/man/readNWISsite.Rd @@ -6,7 +6,7 @@ readNWISsite(siteNumbers) } \arguments{ -\item{siteNumbers}{character USGS site number. This is usually an 8 digit number} +\item{siteNumbers}{character USGS site number (or multiple sites). This is usually an 8 digit number} } \value{ A data frame with at least the following columns: diff --git a/vignettes/figure/getNWIStemperaturePlot-1.pdf b/vignettes/figure/getNWIStemperaturePlot-1.pdf index 690d55aeb71ae546305160ead49a81e76e8f87f4..579e593554b281fe8a4012fa060bc46b8f112444 100644 Binary files a/vignettes/figure/getNWIStemperaturePlot-1.pdf and b/vignettes/figure/getNWIStemperaturePlot-1.pdf differ