From 84c91b456502d814a6c05afee278cea70717bb34 Mon Sep 17 00:00:00 2001 From: unknown <ldecicco@usgs.gov> Date: Thu, 11 Sep 2014 16:02:17 -0500 Subject: [PATCH] Added dates and date/times to raw wqp data pulls. --- DESCRIPTION | 2 +- NAMESPACE | 1 + R/basicWQPData.R | 109 +++++++++++++++++++++++++++++++++++++++ R/getGeneralWQPData.R | 42 +-------------- R/getRawQWData.r | 56 ++++---------------- man/basicWQPData.Rd | 28 ++++++++++ man/retrieveWQPqwData.Rd | 15 +++--- 7 files changed, 157 insertions(+), 96 deletions(-) create mode 100644 R/basicWQPData.R create mode 100644 man/basicWQPData.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 30925fcb..b2c8a69f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -2,7 +2,7 @@ Package: dataRetrieval Type: Package Title: Retrieval functions for hydrologic data Version: 1.3.3 -Date: 2014-08-04 +Date: 2014-09-12 Author: Robert M. Hirsch, Laura De Cicco Maintainer: Laura De Cicco <ldecicco@usgs.gov> Description: Collection of functions to help retrieve USGS data from either web diff --git a/NAMESPACE b/NAMESPACE index ae86ab23..f1d2b6d2 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,6 @@ # Generated by roxygen2 (4.0.2): do not edit by hand +export(basicWQPData) export(checkStartEndDate) export(compressData) export(constructNWISURL) diff --git a/R/basicWQPData.R b/R/basicWQPData.R new file mode 100644 index 00000000..2e2e1315 --- /dev/null +++ b/R/basicWQPData.R @@ -0,0 +1,109 @@ +#' Basic Water Quality Portal Data grabber +#' +#' Imports data from the Water Quality Portal based on a specified url. +#' +#' @param url string URL to Water Quality Portal#' @keywords data import USGS web service +#' @return retval dataframe raw data returned from the Water Quality Portal. Additionally, a POSIXct dateTime column is supplied for +#' start and end times. +#' @export +#' @import RCurl +#' @examples +#' # These examples require an internet connection to run +#' rawSampleURL <- constructNWISURL('USGS-01594440','01075', '1985-01-01', '1985-03-31',"wqp") +#' rawSample <- basicWQPData(rawSampleURL) +basicWQPData <- function(url){ + + h <- basicHeaderGatherer() + + retval = tryCatch({ + doc <- getURL(url, headerfunction = h$update) + + }, warning = function(w) { + message(paste("URL caused a warning:", url)) + message(w) + }, error = function(e) { + message(paste("URL does not seem to exist:", url)) + message(e) + return(NA) + }) + + if(h$value()["Content-Type"] == "text/tab-separated-values;charset=UTF-8"){ + + numToBeReturned <- as.numeric(h$value()["Total-Result-Count"]) + + if (!is.na(numToBeReturned) | numToBeReturned != 0){ + + + namesData <- read.delim(textConnection(doc), header = TRUE, quote="\"", + dec=".", sep='\t', + colClasses='character', + fill = TRUE,nrow=1) + classColumns <- setNames(rep('character',ncol(namesData)),names(namesData)) + classColumns["ActivityStartDate"] <- "Date" + + classColumns[grep("MeasureValue",names(classColumns))] <- NA + + retval <- read.delim(textConnection(doc), header = TRUE, quote="\"", + dec=".", sep='\t', + colClasses=as.character(classColumns), + fill = TRUE) + actualNumReturned <- nrow(retval) + + if(actualNumReturned != numToBeReturned) warning(numToBeReturned, " sample results were expected, ", actualNumReturned, " were returned") + + timeZoneLibrary <- setNames(c("America/New_York","America/New_York","America/Chicago","America/Chicago", + "America/Denver","America/Denver","America/Los_Angeles","America/Los_Angeles", + "America/Anchorage","America/Anchorage","America/Honolulu","America/Honolulu"), + c("EST","EDT","CST","CDT","MST","MDT","PST","PDT","AKST","AKDT","HAST","HST")) + timeZoneStart <- as.character(timeZoneLibrary[retval$ActivityStartTime.TimeZoneCode]) + timeZoneEnd <- as.character(timeZoneLibrary[retval$ActivityEndTime.TimeZoneCode]) + + if(any(!is.na(timeZoneStart))){ + if(length(unique(timeZoneStart)) == 1){ + retval$ActivityStartDateTime <- with(retval, as.POSIXct(paste(ActivityStartDate, ActivityStartTime.Time),format="%Y-%m-%d %H:%M:%S", tz=unique(timeZoneStart))) + } else { + warning("Mixed time zone information") + if(any(is.na(timeZoneStart))){ + warning("Missing time zone information, all dateTimes default to user's local time") + retval$ActivityStartDateTime <- with(retval, as.POSIXct(paste(ActivityStartDate, ActivityStartTime.Time), format="%Y-%m-%d %H:%M:%S"),tz=Sys.timezone()) + } else { + for(i in seq_along(row.names(retval))){ + timeZone <- timeZoneStart[i] + retval$ActivityStartDateTime[i] <- with(retval, as.POSIXct(paste(ActivityStartDate[i], ActivityStartTime.Time[i]), format="%Y-%m-%d %H:%M:%S",tz=timeZone)) + } + } + } + } + + if(any(!is.na(timeZoneEnd))){ + if(length(unique(timeZoneEnd)) == 1){ + retval$ActivityEndDateTime <- with(retval, as.POSIXct(paste(ActivityEndDate, ActivityEndTime.Time), format="%Y-%m-%d %H:%M:%S",tz=unique(timeZoneEnd))) + } else { + warning("Mixed time zone information") + if(any(is.na(timeZoneEnd))){ + warning("Missing time zone information, all dateTimes default to user's local time") + retval$ActivityEndDateTime <- with(retval, as.POSIXct(paste(ActivityEndDate, ActivityEndTime.Time), format="%Y-%m-%d %H:%M:%S"), tz=Sys.timezone()) + } else { + for(i in seq_along(row.names(retval))){ + retval$ActivityEndDateTime[i] <- with(retval, as.POSIXct(paste(ActivityEndDate[i], ActivityEndTime.Time[i]), format="%Y-%m-%d %H:%M:%S",tz=timeZoneEnd[i])) + } + } + } + } + + if(any(retval$ActivityEndDate != "")){ + retval$ActivityEndDate <- as.Date(retval$ActivityEndDate) + } + + return(retval) + + } else { + warning("No data to retrieve") + return(NA) + } + } else { + message(paste("URL caused an error:", url)) + message("Content-Type=",h$value()["Content-Type"]) + return(NA) + } +} \ No newline at end of file diff --git a/R/getGeneralWQPData.R b/R/getGeneralWQPData.R index 3e7eb95f..4faf41f2 100644 --- a/R/getGeneralWQPData.R +++ b/R/getGeneralWQPData.R @@ -30,45 +30,7 @@ getGeneralWQPData <- function(...){ urlCall <- paste(baseURL, urlCall, "&mimeType=tsv",sep = "") - - doc = tryCatch({ - h <- basicHeaderGatherer() - doc <- getURL(urlCall, headerfunction = h$update) - - }, warning = function(w) { - message(paste("URL caused a warning:", urlCall)) - message(w) - }, error = function(e) { - message(paste("URL does not seem to exist:", urlCall)) - message(e) - return(NA) - }) - - if(h$value()["Content-Type"] == "text/tab-separated-values;charset=UTF-8"){ - - numToBeReturned <- as.numeric(h$value()["Total-Result-Count"]) - - if (!is.na(numToBeReturned) | numToBeReturned != 0){ - retval <- read.delim(textConnection(doc), header = TRUE, quote="\"", - dec=".", sep='\t', - colClasses=c('character'), - fill = TRUE) - - actualNumReturned <- nrow(retval) - - if(actualNumReturned != numToBeReturned) warning(numToBeReturned, " sample results were expected, ", actualNumReturned, " were returned") - - return(retval) - } else { - warning(paste("No data to retrieve from",urlCall)) - return(NA) - } - - } else { - message(paste("URL caused an error:", urlCall)) - message("Content-Type=",h$value()["Content-Type"]) - return(NA) - } - + retVal <- basicWQPData(urlCall) + return(retVal) } \ No newline at end of file diff --git a/R/getRawQWData.r b/R/getRawQWData.r index 3b748d0b..a491059d 100644 --- a/R/getRawQWData.r +++ b/R/getRawQWData.r @@ -1,17 +1,16 @@ -#' Raw Data Import for USGS NWIS Water Quality Data +#' Raw Data Import for Water Quality Portal #' -#' Imports data from NWIS web service. This function gets the data from here: \url{http://www.waterqualitydata.us} -#' A list of parameter codes can be found here: \url{http://nwis.waterdata.usgs.gov/nwis/pmcodes/} -#' A list of statistic codes can be found here: \url{http://nwis.waterdata.usgs.gov/nwis/help/?read_file=stat&format=table} +#' Imports data from the Water Quality Portal. This function gets the data from here: \url{http://www.waterqualitydata.us} #' -#' @param siteNumber string USGS site number. This is usually an 8 digit number -#' @param parameterCd vector of USGS 5-digit parameter code or string of characteristicNames. Leaving this blank will return all of the measured values during the specified time period. +#' @param siteNumber string site number. This needs to include the full agency code prefix. +#' @param parameterCd vector of USGS 5-digit parameter code or string of characteristicNames. +#' Leaving this blank will return all of the measured values during the specified time period. #' @param startDate string starting date for data retrieval in the form YYYY-MM-DD. #' @param endDate string ending date for data retrieval in the form YYYY-MM-DD. #' @param interactive logical Option for interactive mode. If true, there is user interaction for error handling and data checks. #' @keywords data import USGS web service -#' @return retval dataframe with first column dateTime, and at least one qualifier and value columns -#' (subsequent qualifier/value columns could follow depending on requested parameter codes) +#' @return retval dataframe raw data returned from the Water Quality Portal. Additionally, a POSIXct dateTime column is supplied for +#' start and end times. #' @export #' @import RCurl #' @examples @@ -23,44 +22,7 @@ retrieveWQPqwData <- function(siteNumber,parameterCd,startDate,endDate,interactive=TRUE){ url <- constructNWISURL(siteNumber,parameterCd,startDate,endDate,"wqp",interactive=interactive) - - retval = tryCatch({ - h <- basicHeaderGatherer() - doc <- getURL(url, headerfunction = h$update) - - }, warning = function(w) { - message(paste("URL caused a warning:", url)) - message(w) - }, error = function(e) { - message(paste("URL does not seem to exist:", url)) - message(e) - return(NA) - }) - - if(h$value()["Content-Type"] == "text/tab-separated-values;charset=UTF-8"){ - - numToBeReturned <- as.numeric(h$value()["Total-Result-Count"]) - - if (!is.na(numToBeReturned) | numToBeReturned != 0){ - - retval <- read.delim(textConnection(doc), header = TRUE, quote="\"", - dec=".", sep='\t', - colClasses=c('character'), - fill = TRUE) - actualNumReturned <- nrow(retval) - - if(actualNumReturned != numToBeReturned) warning(numToBeReturned, " sample results were expected, ", actualNumReturned, " were returned") - - return(retval) - - } else { - warning("No data to retrieve") - return(NA) - } - } else { - message(paste("URL caused an error:", url)) - message("Content-Type=",h$value()["Content-Type"]) - return(NA) - } + retVal <- basicWQPData(url) + return(retVal) } diff --git a/man/basicWQPData.Rd b/man/basicWQPData.Rd new file mode 100644 index 00000000..d7a17681 --- /dev/null +++ b/man/basicWQPData.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2 (4.0.2): do not edit by hand +\name{basicWQPData} +\alias{basicWQPData} +\title{Basic Water Quality Portal Data grabber} +\usage{ +basicWQPData(url) +} +\arguments{ +\item{url}{string URL to Water Quality Portal#'} +} +\value{ +retval dataframe raw data returned from the Water Quality Portal. Additionally, a POSIXct dateTime column is supplied for +start and end times. +} +\description{ +Imports data from the Water Quality Portal based on a specified url. +} +\examples{ +# These examples require an internet connection to run +rawSampleURL <- constructNWISURL('USGS-01594440','01075', '1985-01-01', '1985-03-31',"wqp") +rawSample <- basicWQPData(rawSampleURL) +} +\keyword{USGS} +\keyword{data} +\keyword{import} +\keyword{service} +\keyword{web} + diff --git a/man/retrieveWQPqwData.Rd b/man/retrieveWQPqwData.Rd index 79b0c0ce..e79341e0 100644 --- a/man/retrieveWQPqwData.Rd +++ b/man/retrieveWQPqwData.Rd @@ -1,15 +1,16 @@ % Generated by roxygen2 (4.0.2): do not edit by hand \name{retrieveWQPqwData} \alias{retrieveWQPqwData} -\title{Raw Data Import for USGS NWIS Water Quality Data} +\title{Raw Data Import for Water Quality Portal} \usage{ retrieveWQPqwData(siteNumber, parameterCd, startDate, endDate, interactive = TRUE) } \arguments{ -\item{siteNumber}{string USGS site number. This is usually an 8 digit number} +\item{siteNumber}{string site number. This needs to include the full agency code prefix.} -\item{parameterCd}{vector of USGS 5-digit parameter code or string of characteristicNames. Leaving this blank will return all of the measured values during the specified time period.} +\item{parameterCd}{vector of USGS 5-digit parameter code or string of characteristicNames. +Leaving this blank will return all of the measured values during the specified time period.} \item{startDate}{string starting date for data retrieval in the form YYYY-MM-DD.} @@ -18,13 +19,11 @@ retrieveWQPqwData(siteNumber, parameterCd, startDate, endDate, \item{interactive}{logical Option for interactive mode. If true, there is user interaction for error handling and data checks.} } \value{ -retval dataframe with first column dateTime, and at least one qualifier and value columns -(subsequent qualifier/value columns could follow depending on requested parameter codes) +retval dataframe raw data returned from the Water Quality Portal. Additionally, a POSIXct dateTime column is supplied for +start and end times. } \description{ -Imports data from NWIS web service. This function gets the data from here: \url{http://www.waterqualitydata.us} -A list of parameter codes can be found here: \url{http://nwis.waterdata.usgs.gov/nwis/pmcodes/} -A list of statistic codes can be found here: \url{http://nwis.waterdata.usgs.gov/nwis/help/?read_file=stat&format=table} +Imports data from the Water Quality Portal. This function gets the data from here: \url{http://www.waterqualitydata.us} } \examples{ # These examples require an internet connection to run -- GitLab