diff --git a/DESCRIPTION b/DESCRIPTION index 30925fcbb8a41824dbd0c09758037f9d16116c8c..b2c8a69f22f63a5ad12640fc331ec8b58acaaf18 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -2,7 +2,7 @@ Package: dataRetrieval Type: Package Title: Retrieval functions for hydrologic data Version: 1.3.3 -Date: 2014-08-04 +Date: 2014-09-12 Author: Robert M. Hirsch, Laura De Cicco Maintainer: Laura De Cicco <ldecicco@usgs.gov> Description: Collection of functions to help retrieve USGS data from either web diff --git a/NAMESPACE b/NAMESPACE index ae86ab235dd98080bc95ce3978f1aa0f677a25ee..f1d2b6d2a24a646aa5a176b6dfe5be2b2e29933b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,6 @@ # Generated by roxygen2 (4.0.2): do not edit by hand +export(basicWQPData) export(checkStartEndDate) export(compressData) export(constructNWISURL) diff --git a/R/basicWQPData.R b/R/basicWQPData.R new file mode 100644 index 0000000000000000000000000000000000000000..2e2e13158c5ea8026235b34579162c52d13da58a --- /dev/null +++ b/R/basicWQPData.R @@ -0,0 +1,109 @@ +#' Basic Water Quality Portal Data grabber +#' +#' Imports data from the Water Quality Portal based on a specified url. +#' +#' @param url string URL to Water Quality Portal#' @keywords data import USGS web service +#' @return retval dataframe raw data returned from the Water Quality Portal. Additionally, a POSIXct dateTime column is supplied for +#' start and end times. +#' @export +#' @import RCurl +#' @examples +#' # These examples require an internet connection to run +#' rawSampleURL <- constructNWISURL('USGS-01594440','01075', '1985-01-01', '1985-03-31',"wqp") +#' rawSample <- basicWQPData(rawSampleURL) +basicWQPData <- function(url){ + + h <- basicHeaderGatherer() + + retval = tryCatch({ + doc <- getURL(url, headerfunction = h$update) + + }, warning = function(w) { + message(paste("URL caused a warning:", url)) + message(w) + }, error = function(e) { + message(paste("URL does not seem to exist:", url)) + message(e) + return(NA) + }) + + if(h$value()["Content-Type"] == "text/tab-separated-values;charset=UTF-8"){ + + numToBeReturned <- as.numeric(h$value()["Total-Result-Count"]) + + if (!is.na(numToBeReturned) | numToBeReturned != 0){ + + + namesData <- read.delim(textConnection(doc), header = TRUE, quote="\"", + dec=".", sep='\t', + colClasses='character', + fill = TRUE,nrow=1) + classColumns <- setNames(rep('character',ncol(namesData)),names(namesData)) + classColumns["ActivityStartDate"] <- "Date" + + classColumns[grep("MeasureValue",names(classColumns))] <- NA + + retval <- read.delim(textConnection(doc), header = TRUE, quote="\"", + dec=".", sep='\t', + colClasses=as.character(classColumns), + fill = TRUE) + actualNumReturned <- nrow(retval) + + if(actualNumReturned != numToBeReturned) warning(numToBeReturned, " sample results were expected, ", actualNumReturned, " were returned") + + timeZoneLibrary <- setNames(c("America/New_York","America/New_York","America/Chicago","America/Chicago", + "America/Denver","America/Denver","America/Los_Angeles","America/Los_Angeles", + "America/Anchorage","America/Anchorage","America/Honolulu","America/Honolulu"), + c("EST","EDT","CST","CDT","MST","MDT","PST","PDT","AKST","AKDT","HAST","HST")) + timeZoneStart <- as.character(timeZoneLibrary[retval$ActivityStartTime.TimeZoneCode]) + timeZoneEnd <- as.character(timeZoneLibrary[retval$ActivityEndTime.TimeZoneCode]) + + if(any(!is.na(timeZoneStart))){ + if(length(unique(timeZoneStart)) == 1){ + retval$ActivityStartDateTime <- with(retval, as.POSIXct(paste(ActivityStartDate, ActivityStartTime.Time),format="%Y-%m-%d %H:%M:%S", tz=unique(timeZoneStart))) + } else { + warning("Mixed time zone information") + if(any(is.na(timeZoneStart))){ + warning("Missing time zone information, all dateTimes default to user's local time") + retval$ActivityStartDateTime <- with(retval, as.POSIXct(paste(ActivityStartDate, ActivityStartTime.Time), format="%Y-%m-%d %H:%M:%S"),tz=Sys.timezone()) + } else { + for(i in seq_along(row.names(retval))){ + timeZone <- timeZoneStart[i] + retval$ActivityStartDateTime[i] <- with(retval, as.POSIXct(paste(ActivityStartDate[i], ActivityStartTime.Time[i]), format="%Y-%m-%d %H:%M:%S",tz=timeZone)) + } + } + } + } + + if(any(!is.na(timeZoneEnd))){ + if(length(unique(timeZoneEnd)) == 1){ + retval$ActivityEndDateTime <- with(retval, as.POSIXct(paste(ActivityEndDate, ActivityEndTime.Time), format="%Y-%m-%d %H:%M:%S",tz=unique(timeZoneEnd))) + } else { + warning("Mixed time zone information") + if(any(is.na(timeZoneEnd))){ + warning("Missing time zone information, all dateTimes default to user's local time") + retval$ActivityEndDateTime <- with(retval, as.POSIXct(paste(ActivityEndDate, ActivityEndTime.Time), format="%Y-%m-%d %H:%M:%S"), tz=Sys.timezone()) + } else { + for(i in seq_along(row.names(retval))){ + retval$ActivityEndDateTime[i] <- with(retval, as.POSIXct(paste(ActivityEndDate[i], ActivityEndTime.Time[i]), format="%Y-%m-%d %H:%M:%S",tz=timeZoneEnd[i])) + } + } + } + } + + if(any(retval$ActivityEndDate != "")){ + retval$ActivityEndDate <- as.Date(retval$ActivityEndDate) + } + + return(retval) + + } else { + warning("No data to retrieve") + return(NA) + } + } else { + message(paste("URL caused an error:", url)) + message("Content-Type=",h$value()["Content-Type"]) + return(NA) + } +} \ No newline at end of file diff --git a/R/getGeneralWQPData.R b/R/getGeneralWQPData.R index 3e7eb95f47316200c8cf590a0a79a4f1ed01bcf0..4faf41f2c250cf3e906b59b797fecb8d151b30a2 100644 --- a/R/getGeneralWQPData.R +++ b/R/getGeneralWQPData.R @@ -30,45 +30,7 @@ getGeneralWQPData <- function(...){ urlCall <- paste(baseURL, urlCall, "&mimeType=tsv",sep = "") - - doc = tryCatch({ - h <- basicHeaderGatherer() - doc <- getURL(urlCall, headerfunction = h$update) - - }, warning = function(w) { - message(paste("URL caused a warning:", urlCall)) - message(w) - }, error = function(e) { - message(paste("URL does not seem to exist:", urlCall)) - message(e) - return(NA) - }) - - if(h$value()["Content-Type"] == "text/tab-separated-values;charset=UTF-8"){ - - numToBeReturned <- as.numeric(h$value()["Total-Result-Count"]) - - if (!is.na(numToBeReturned) | numToBeReturned != 0){ - retval <- read.delim(textConnection(doc), header = TRUE, quote="\"", - dec=".", sep='\t', - colClasses=c('character'), - fill = TRUE) - - actualNumReturned <- nrow(retval) - - if(actualNumReturned != numToBeReturned) warning(numToBeReturned, " sample results were expected, ", actualNumReturned, " were returned") - - return(retval) - } else { - warning(paste("No data to retrieve from",urlCall)) - return(NA) - } - - } else { - message(paste("URL caused an error:", urlCall)) - message("Content-Type=",h$value()["Content-Type"]) - return(NA) - } - + retVal <- basicWQPData(urlCall) + return(retVal) } \ No newline at end of file diff --git a/R/getRawQWData.r b/R/getRawQWData.r index 3b748d0bf2a6cb0b489cc10ed9477c047ba4820d..a491059d3a909188ab2980205bb766376c2070f3 100644 --- a/R/getRawQWData.r +++ b/R/getRawQWData.r @@ -1,17 +1,16 @@ -#' Raw Data Import for USGS NWIS Water Quality Data +#' Raw Data Import for Water Quality Portal #' -#' Imports data from NWIS web service. This function gets the data from here: \url{http://www.waterqualitydata.us} -#' A list of parameter codes can be found here: \url{http://nwis.waterdata.usgs.gov/nwis/pmcodes/} -#' A list of statistic codes can be found here: \url{http://nwis.waterdata.usgs.gov/nwis/help/?read_file=stat&format=table} +#' Imports data from the Water Quality Portal. This function gets the data from here: \url{http://www.waterqualitydata.us} #' -#' @param siteNumber string USGS site number. This is usually an 8 digit number -#' @param parameterCd vector of USGS 5-digit parameter code or string of characteristicNames. Leaving this blank will return all of the measured values during the specified time period. +#' @param siteNumber string site number. This needs to include the full agency code prefix. +#' @param parameterCd vector of USGS 5-digit parameter code or string of characteristicNames. +#' Leaving this blank will return all of the measured values during the specified time period. #' @param startDate string starting date for data retrieval in the form YYYY-MM-DD. #' @param endDate string ending date for data retrieval in the form YYYY-MM-DD. #' @param interactive logical Option for interactive mode. If true, there is user interaction for error handling and data checks. #' @keywords data import USGS web service -#' @return retval dataframe with first column dateTime, and at least one qualifier and value columns -#' (subsequent qualifier/value columns could follow depending on requested parameter codes) +#' @return retval dataframe raw data returned from the Water Quality Portal. Additionally, a POSIXct dateTime column is supplied for +#' start and end times. #' @export #' @import RCurl #' @examples @@ -23,44 +22,7 @@ retrieveWQPqwData <- function(siteNumber,parameterCd,startDate,endDate,interactive=TRUE){ url <- constructNWISURL(siteNumber,parameterCd,startDate,endDate,"wqp",interactive=interactive) - - retval = tryCatch({ - h <- basicHeaderGatherer() - doc <- getURL(url, headerfunction = h$update) - - }, warning = function(w) { - message(paste("URL caused a warning:", url)) - message(w) - }, error = function(e) { - message(paste("URL does not seem to exist:", url)) - message(e) - return(NA) - }) - - if(h$value()["Content-Type"] == "text/tab-separated-values;charset=UTF-8"){ - - numToBeReturned <- as.numeric(h$value()["Total-Result-Count"]) - - if (!is.na(numToBeReturned) | numToBeReturned != 0){ - - retval <- read.delim(textConnection(doc), header = TRUE, quote="\"", - dec=".", sep='\t', - colClasses=c('character'), - fill = TRUE) - actualNumReturned <- nrow(retval) - - if(actualNumReturned != numToBeReturned) warning(numToBeReturned, " sample results were expected, ", actualNumReturned, " were returned") - - return(retval) - - } else { - warning("No data to retrieve") - return(NA) - } - } else { - message(paste("URL caused an error:", url)) - message("Content-Type=",h$value()["Content-Type"]) - return(NA) - } + retVal <- basicWQPData(url) + return(retVal) } diff --git a/man/basicWQPData.Rd b/man/basicWQPData.Rd new file mode 100644 index 0000000000000000000000000000000000000000..d7a1768157e0ff9094822ed6de9e8454db8f551d --- /dev/null +++ b/man/basicWQPData.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2 (4.0.2): do not edit by hand +\name{basicWQPData} +\alias{basicWQPData} +\title{Basic Water Quality Portal Data grabber} +\usage{ +basicWQPData(url) +} +\arguments{ +\item{url}{string URL to Water Quality Portal#'} +} +\value{ +retval dataframe raw data returned from the Water Quality Portal. Additionally, a POSIXct dateTime column is supplied for +start and end times. +} +\description{ +Imports data from the Water Quality Portal based on a specified url. +} +\examples{ +# These examples require an internet connection to run +rawSampleURL <- constructNWISURL('USGS-01594440','01075', '1985-01-01', '1985-03-31',"wqp") +rawSample <- basicWQPData(rawSampleURL) +} +\keyword{USGS} +\keyword{data} +\keyword{import} +\keyword{service} +\keyword{web} + diff --git a/man/retrieveWQPqwData.Rd b/man/retrieveWQPqwData.Rd index 79b0c0ce34dc4d7c02a7b4c9c89a42ba46e16888..e79341e04b5daae51c77f0178278bec9fcf3ed9b 100644 --- a/man/retrieveWQPqwData.Rd +++ b/man/retrieveWQPqwData.Rd @@ -1,15 +1,16 @@ % Generated by roxygen2 (4.0.2): do not edit by hand \name{retrieveWQPqwData} \alias{retrieveWQPqwData} -\title{Raw Data Import for USGS NWIS Water Quality Data} +\title{Raw Data Import for Water Quality Portal} \usage{ retrieveWQPqwData(siteNumber, parameterCd, startDate, endDate, interactive = TRUE) } \arguments{ -\item{siteNumber}{string USGS site number. This is usually an 8 digit number} +\item{siteNumber}{string site number. This needs to include the full agency code prefix.} -\item{parameterCd}{vector of USGS 5-digit parameter code or string of characteristicNames. Leaving this blank will return all of the measured values during the specified time period.} +\item{parameterCd}{vector of USGS 5-digit parameter code or string of characteristicNames. +Leaving this blank will return all of the measured values during the specified time period.} \item{startDate}{string starting date for data retrieval in the form YYYY-MM-DD.} @@ -18,13 +19,11 @@ retrieveWQPqwData(siteNumber, parameterCd, startDate, endDate, \item{interactive}{logical Option for interactive mode. If true, there is user interaction for error handling and data checks.} } \value{ -retval dataframe with first column dateTime, and at least one qualifier and value columns -(subsequent qualifier/value columns could follow depending on requested parameter codes) +retval dataframe raw data returned from the Water Quality Portal. Additionally, a POSIXct dateTime column is supplied for +start and end times. } \description{ -Imports data from NWIS web service. This function gets the data from here: \url{http://www.waterqualitydata.us} -A list of parameter codes can be found here: \url{http://nwis.waterdata.usgs.gov/nwis/pmcodes/} -A list of statistic codes can be found here: \url{http://nwis.waterdata.usgs.gov/nwis/help/?read_file=stat&format=table} +Imports data from the Water Quality Portal. This function gets the data from here: \url{http://www.waterqualitydata.us} } \examples{ # These examples require an internet connection to run