diff --git a/R/importRDB1.r b/R/importRDB1.r index adc8da795a75cdeed6f1b42100331f9d04566d10..28622f4246b3987395552302084efd41aa10926a 100644 --- a/R/importRDB1.r +++ b/R/importRDB1.r @@ -7,6 +7,12 @@ #' @param obs_url string containing the url for the retrieval #' @param asDateTime logical, if TRUE returns date and time as POSIXct, if FALSE, Date #' @param qw logical, if TRUE parses as water quality data (where dates/times are in start and end times) +#' @param tz string to set timezone attribute of datetime. Default is an empty quote, which converts the +#' datetimes to UTC (properly accounting for daylight savings times based on the data's provided tz_cd column). +#' Possible values to provide are "America/New_York","America/Chicago", "America/Denver","America/Los_Angeles", +#' "America/Anchorage","America/Honolulu","America/Jamaica","America/Managua","America/Phoenix", and "America/Metlakatla" +#' @param convertType logical, defaults to TRUE. If TRUE, the function will convert the data to dates, datetimes, +#' numerics based on a standard algorithm. If false, everything is returned as a string. #' @return data a data frame containing columns agency, site, dateTime (converted to UTC), values, and remark codes for all requested combinations #' @export #' @import RCurl @@ -28,13 +34,21 @@ #' qwURL <- constructNWISURL(c('04024430','04024000'), #' c('34247','30234','32104','34220'), #' "2010-11-03","","qw",format="rdb") -#' qwData <- importRDB1(qwURL, qw=TRUE) +#' qwData <- importRDB1(qwURL, qw=TRUE, tz="America/Chicago") #' # User file: #' filePath <- system.file("extdata", package="dataRetrievaldemo") #' fileName <- "RDB1Example.txt" #' fullPath <- file.path(filePath, fileName) #' importUserRDB <- importRDB1(fullPath) -importRDB1 <- function(obs_url,asDateTime=FALSE, qw=FALSE){ +importRDB1 <- function(obs_url, asDateTime=FALSE, qw=FALSE, convertType = TRUE, tz=""){ + + if(tz != ""){ + tz <- match.arg(tz, c("America/New_York","America/Chicago", + "America/Denver","America/Los_Angeles", + "America/Anchorage","America/Honolulu", + "America/Jamaica","America/Managua", + "America/Phoenix","America/Metlakatla")) + } if(url.exists(obs_url)){ retval = tryCatch({ @@ -74,96 +88,104 @@ importRDB1 <- function(obs_url,asDateTime=FALSE, qw=FALSE){ dataType <- tmp[1,] data <- tmp[-1,] - multiSiteCorrections <- -which(as.logical(apply(data[,1:2], 1, FUN=function(x) all(x %in% as.character(dataType[,1:2]))))) - - if(length(multiSiteCorrections) > 0){ - data <- data[multiSiteCorrections,] + if(convertType){ + multiSiteCorrections <- -which(as.logical(apply(data[,1:2], 1, FUN=function(x) all(x %in% as.character(dataType[,1:2]))))) - findRowsWithHeaderInfo <- as.integer(apply(data[,1:2], 1, FUN = function(x) if(x[1] == names(data)[1] & x[2] == names(data)[2]) 1 else 0)) - findRowsWithHeaderInfo <- which(findRowsWithHeaderInfo == 0) - data <- data[findRowsWithHeaderInfo,] - } - - timeZoneLibrary <- setNames(c("America/New_York","America/New_York","America/Chicago","America/Chicago", - "America/Denver","America/Denver","America/Los_Angeles","America/Los_Angeles", - "America/Anchorage","America/Anchorage","America/Honolulu","America/Honolulu"), - c("EST","EDT","CST","CDT","MST","MDT","PST","PDT","AKST","AKDT","HAST","HST")) - - - offsetLibrary <- setNames(c(5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10, 10), - c("EST","EDT","CST","CDT","MST","MDT","PST","PDT","AKST","AKDT","HAST","HST")) - - data[,grep('n$', dataType)] <- suppressWarnings(sapply(data[,grep('n$', dataType)], function(x) as.numeric(x))) - - if(length(grep('d$', dataType)) > 0){ - if (asDateTime & !qw){ + if(length(multiSiteCorrections) > 0){ + data <- data[multiSiteCorrections,] - if("tz_cd" %in% names(data)){ - offset <- offsetLibrary[data$tz_cd] - } else { - offset <- 0 - } - offset[is.na(offset)] <- 0 - - data[,regexpr('d$', dataType) > 0] <- as.POSIXct(data[,regexpr('d$', dataType) > 0], "%Y-%m-%d %H:%M", tz = "UTC") - data[,regexpr('d$', dataType) > 0] <- data[,regexpr('d$', dataType) > 0] + offset*60*60 - data[,regexpr('d$', dataType) > 0] <- as.POSIXct(data[,regexpr('d$', dataType) > 0]) - - } else if (qw){ - - if("sample_start_time_datum_cd" %in% names(data)){ - timeZoneStartOffset <- offsetLibrary[data$sample_start_time_datum_cd] - timeZoneStartOffset[is.na(timeZoneStartOffset)] <- 0 - } else { - timeZoneStartOffset <- 0 - } - - if("sample_end_time_datum_cd" %in% names(data)){ - timeZoneEndOffset <- offsetLibrary[data$sample_end_time_datum_cd] - timeZoneEndOffset[is.na(timeZoneEndOffset)] <- 0 - composite <- TRUE - } else { - composite <- FALSE - if(any(data$sample_end_dt != "") & any(data$sample_end_dm != "")){ - if(which(data$sample_end_dt != "") == which(data$sample_end_dm != "")){ - composite <- TRUE + findRowsWithHeaderInfo <- as.integer(apply(data[,1:2], 1, FUN = function(x) if(x[1] == names(data)[1] & x[2] == names(data)[2]) 1 else 0)) + findRowsWithHeaderInfo <- which(findRowsWithHeaderInfo == 0) + data <- data[findRowsWithHeaderInfo,] + } + + offsetLibrary <- setNames(c(5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10, 10), + c("EST","EDT","CST","CDT","MST","MDT","PST","PDT","AKST","AKDT","HAST","HST")) + + data[,grep('n$', dataType)] <- suppressWarnings(sapply(data[,grep('n$', dataType)], function(x) as.numeric(x))) + + if(length(grep('d$', dataType)) > 0){ + if (asDateTime & !qw){ + + if("tz_cd" %in% names(data)){ + offset <- offsetLibrary[data$tz_cd] + } else { + offset <- 0 + } + offset[is.na(offset)] <- 0 + + data[,regexpr('d$', dataType) > 0] <- as.POSIXct(data[,regexpr('d$', dataType) > 0], "%Y-%m-%d %H:%M", tz = "UTC") + data[,regexpr('d$', dataType) > 0] <- data[,regexpr('d$', dataType) > 0] + offset*60*60 + data[,regexpr('d$', dataType) > 0] <- as.POSIXct(data[,regexpr('d$', dataType) > 0]) + + if(tz != ""){ + attr(data[,regexpr('d$', dataType) > 0], "tzone") <- tz + } + + } else if (qw){ + + if("sample_start_time_datum_cd" %in% names(data)){ + timeZoneStartOffset <- offsetLibrary[data$sample_start_time_datum_cd] + timeZoneStartOffset[is.na(timeZoneStartOffset)] <- 0 + } else { + timeZoneStartOffset <- 0 + } + + if("sample_end_time_datum_cd" %in% names(data)){ + timeZoneEndOffset <- offsetLibrary[data$sample_end_time_datum_cd] + timeZoneEndOffset[is.na(timeZoneEndOffset)] <- 0 + composite <- TRUE + } else { + composite <- FALSE + if(any(data$sample_end_dt != "") & any(data$sample_end_dm != "")){ + if(which(data$sample_end_dt != "") == which(data$sample_end_dm != "")){ + composite <- TRUE + } } + timeZoneEndOffset <- 0 } - timeZoneEndOffset <- 0 - } - - if("sample_dt" %in% names(data)){ - if(any(data$sample_dt != "")){ - suppressWarnings(data$sample_dt <- as.Date(parse_date_time(data$sample_dt, c("Ymd", "mdY")))) + + if("sample_dt" %in% names(data)){ + if(any(data$sample_dt != "")){ + suppressWarnings(data$sample_dt <- as.Date(parse_date_time(data$sample_dt, c("Ymd", "mdY")))) + } } - } - - if("sample_end_dt" %in% names(data)){ - if(any(data$sample_end_dt != "")){ - suppressWarnings(data$sample_end_dt <- as.Date(parse_date_time(data$sample_end_dt, c("Ymd", "mdY")))) - } - } - - data$startDateTime <- with(data, as.POSIXct(paste(sample_dt, sample_tm),format="%Y-%m-%d %H:%M", tz = "UTC")) - data$startDateTime <- data$startDateTime + timeZoneStartOffset*60*60 - data$startDateTime <- as.POSIXct(data$startDateTime) - - if(composite){ - data$endDateTime <- with(data, as.POSIXct(paste(sample_end_dt, sample_end_tm),format="%Y-%m-%d %H:%M", tz = "UTC")) - data$endDateTime <- data$endDateTime + timeZoneEndOffset*60*60 - data$endDateTime <- as.POSIXct(data$endDateTime) - } - - } else { - for (i in grep('d$', dataType)){ - if (all(data[,i] != "")){ - data[,i] <- as.Date(data[,i]) + + if("sample_end_dt" %in% names(data)){ + if(any(data$sample_end_dt != "")){ + suppressWarnings(data$sample_end_dt <- as.Date(parse_date_time(data$sample_end_dt, c("Ymd", "mdY")))) + } + } + + data$startDateTime <- with(data, as.POSIXct(paste(sample_dt, sample_tm),format="%Y-%m-%d %H:%M", tz = "UTC")) + data$startDateTime <- data$startDateTime + timeZoneStartOffset*60*60 + data$startDateTime <- as.POSIXct(data$startDateTime) + + if(tz != ""){ + attr(data$startDateTime, "tzone") <- tz + } + + if(composite){ + data$endDateTime <- with(data, as.POSIXct(paste(sample_end_dt, sample_end_tm),format="%Y-%m-%d %H:%M", tz = "UTC")) + data$endDateTime <- data$endDateTime + timeZoneEndOffset*60*60 + data$endDateTime <- as.POSIXct(data$endDateTime) + + if(tz != ""){ + attr(data$endDateTime, "tzone") <- tz + } + } + + } else { + for (i in grep('d$', dataType)){ + if (all(data[,i] != "")){ + data[,i] <- as.Date(data[,i]) + } } } } - } - row.names(data) <- NULL + row.names(data) <- NULL + } return(data) } diff --git a/R/readNWISunit.r b/R/readNWISunit.r index 140f27ae97db2a4dd951bf838c5ea1c4da285414..07b12738048f1a4351b6d3c93fd6b02baa7dfd51 100644 --- a/R/readNWISunit.r +++ b/R/readNWISunit.r @@ -8,9 +8,6 @@ #' @param parameterCd string USGS parameter code. This is usually an 5 digit number. #' @param startDate string starting date for data retrieval in the form YYYY-MM-DD. #' @param endDate string ending date for data retrieval in the form YYYY-MM-DD. -#' @param format string, can be "tsv" or "xml", and is only applicable for daily and unit value requests. "tsv" returns results faster, but there is a possiblitiy that an incomplete file is returned without warning. XML is slower, -#' but will offer a warning if the file was incomplete (for example, if there was a momentary problem with the internet connection). It is possible to safely use the "tsv" option, -#' but the user must carefully check the results to see if the data returns matches what is expected. The default is therefore "xml". #' @keywords data import USGS web service #' @return data dataframe with agency, site, dateTime, time zone, value, and code columns #' @export @@ -21,20 +18,16 @@ #' endDate <- "2014-10-10" #' # These examples require an internet connection to run #' rawData <- readNWISunit(siteNumber,parameterCd,startDate,endDate) -#' summary(rawData) -#' rawData2 <- readNWISunit(siteNumber,parameterCd,startDate,endDate,"tsv") -#' summary(rawData2) +#' #' timeZoneChange <- readNWISunit(c('04024430','04024000'),parameterCd, #' "2013-11-03","2013-11-03","tsv") #' firstSite <- timeZoneChange[timeZoneChange$site_no == '04024430',] -readNWISunit <- function (siteNumber,parameterCd,startDate,endDate,format="xml"){ +readNWISunit <- function (siteNumber,parameterCd,startDate,endDate){ + + url <- constructNWISURL(siteNumber,parameterCd,startDate,endDate,"uv",format="xml") + + data <- importWaterML1(url,asDateTime=TRUE) - url <- constructNWISURL(siteNumber,parameterCd,startDate,endDate,"uv",format=format) - if (format == "xml" | format == "wml1") { - data <- importWaterML1(url,asDateTime=TRUE) - } else { - data <- importRDB1(url,asDateTime=TRUE) - } return (data) } diff --git a/inst/doc/dataRetrieval.R b/inst/doc/dataRetrieval.R index 015b15f6b6b1db7ca1c5c9429a7e409147640607..a550c4b09b8921cd11cbc26adcce86584c6c2cf0 100644 --- a/inst/doc/dataRetrieval.R +++ b/inst/doc/dataRetrieval.R @@ -35,6 +35,8 @@ addSpace <- function(x) ifelse(x != "1", "[5pt]","") # qwData <- readNWISqw(siteNumber,parameterCd, # "1980-01-01","2010-01-01") # +# pCode <- readNWISpCode(parameterCd) +# ## ----tableParameterCodes, echo=FALSE,results='asis'------- pCode <- c('00060', '00065', '00010','00045','00400') @@ -86,7 +88,7 @@ siteINFO$station.nm # Continuing from the previous example: # This pulls out just the daily data: -dailyDataAvailable <- whatNWISData(siteNumbers, +dailyDataAvailable <- whatNWISdata(siteNumbers, service="dv") @@ -183,7 +185,7 @@ endDate <- "2012-05-13" dischargeToday <- readNWISunit(siteNumber, parameterCd, startDate, endDate) -## ----dischargeData, echo=FALSE---------------------------- +## ----dischargeData, echo=TRUE----------------------------- head(dischargeToday) ## ----label=getQW, echo=TRUE------------------------------- @@ -268,7 +270,7 @@ readNWISpCode # library(dataRetrievaldemo) ## ----label=getSiteApp, echo=TRUE-------------------------- -availableData <- whatNWISData(siteNumber, "dv") +availableData <- whatNWISdata(siteNumber, "dv") dailyData <- availableData["00003" == availableData$statCd,] tableData <- with(dailyData, diff --git a/inst/doc/dataRetrieval.Rnw b/inst/doc/dataRetrieval.Rnw index edf969f18b15c1576ce4c77cb9dca51c288abc80..6302b4e0b2c9087bf1f057cc1a4689d1d14891d0 100644 --- a/inst/doc/dataRetrieval.Rnw +++ b/inst/doc/dataRetrieval.Rnw @@ -181,14 +181,11 @@ addSpace <- function(x) ifelse(x != "1", "[5pt]","") %------------------------------------------------------------ \section{Introduction to dataRetrievaldemo} %------------------------------------------------------------ -The dataRetrievaldemo package was created to simplify the process of loading hydrologic data into the R environment. It has been specifically designed to work seamlessly with the EGRETdemo R package: Exploration and Graphics for RivEr Trends. See: \url{https://github.com/USGS-R/EGRETdemo/wiki} or \url{http://dx.doi.org/10.3133/tm4A10} for information on EGRETdemo. EGRETdemo is designed to provide analysis of water quality data sets using the Weighted Regressions on Time, Discharge and Season (WRTDS) method as well as analysis of discharge trends using robust time-series smoothing techniques. Both of these capabilities provide both tabular and graphical analyses of long-term data sets. - - -The dataRetrievaldemo package is designed to retrieve many of the major data types of U.S. Geological Survey (USGS) hydrologic data that are available on the Web. Users may also load data from other sources (text files, spreadsheets) using dataRetrievaldemo. Section \ref{sec:genRetrievals} provides examples of how one can obtain raw data from USGS sources on the Web and load them into dataframes within the R environment. The functionality described in section \ref{sec:genRetrievals} is for general use and is not tailored for the specific uses of the EGRETdemo package. The functionality described in section \ref{sec:EGRETdfs} is tailored specifically to obtaining input from the Web and structuring it for use in the EGRETdemo package. The functionality described in section \ref{sec:userFiles} is for converting hydrologic data from user-supplied files and structuring it specifically for use in the EGRETdemo package. +The dataRetrievaldemo package was created to simplify the process of loading hydrologic data into the R environment. It is designed to retrieve the major data types of U.S. Geological Survey (USGS) hydrologic data that are available on the Web, as well as data from the Water Quality Portal, which currently houses water quality data from the Environmental Protection Agency (EPA), U.S. Department of Agriculture (USDA), and USGS. For information on getting started in R and installing the package, see (\ref{sec:appendix1}): Getting Started. Any use of trade, firm, or product names is for descriptive purposes only and does not imply endorsement by the U.S. Government. -A quick workflow for major dataRetrievaldemo functions: +A quick workflow for USGS dataRetrievaldemo functions: <<workflow, echo=TRUE,eval=FALSE>>= library(dataRetrievaldemo) @@ -206,14 +203,76 @@ parameterCd <- "00618" qwData <- readNWISqw(siteNumber,parameterCd, "1980-01-01","2010-01-01") +pCode <- readNWISpCode(parameterCd) + @ +USGS data is made available through the National Water Information System (NWIS). + +Table \ref{tab:func} describes the functions available in the dataRetrievaldemo package. + +\begin{table}[!ht] +\begin{minipage}{\linewidth} +{\footnotesize +\caption{dataRetrievaldemo functions} +\label{tab:func} +\begin{tabular}{lll} + \hline +\multicolumn{1}{c}{\textbf{\textsf{Function Name}}} & +\multicolumn{1}{c}{\textbf{\textsf{Arguments}}} & +\multicolumn{1}{c}{\textbf{\textsf{Description}}} \\ [0pt] + \hline + \texttt{readNWISdata} & \texttt{...} & NWIS data using user-specified queries\\ + & service & \\ + [5pt]\texttt{readNWISdv} & siteNumber & NWIS daily data\\ + & parameterCd & \\ + & startDate & \\ + & endDate & \\ + & statCd & \\ + [5pt]\texttt{readNWISqw} & siteNumber & NWIS water quality data\\ + & parameterCd & \\ + & startDate & \\ + & endDate & \\ + & expanded & \\ + [5pt]\texttt{readNWISunit} & siteNumber & NWIS water quality data\\ + & parameterCd & \\ + & startDate & \\ + & endDate & \\ + [5pt]\texttt{readNWISrating} & siteNumber & NWIS rating table for active streamgage \\ + & type & \\ + [5pt]\texttt{readNWISmeas} & siteNumber & NWIS surface-water measurements \\ + & startDate & \\ + & endDate & \\ + [5pt]\texttt{readNWISpeak} & siteNumber & NWIS peak flow data \\ + & startDate & \\ + & endDate & \\ + [5pt]\texttt{readNWISgwl} & siteNumber & NWIS groundwater level measurements \\ + & startDate & \\ + & endDate & \\ + [5pt]\texttt{readNWISpCode} & parameterCd & NWIS parameter code information\\ + [5pt]\texttt{readNWISsite} & siteNumber & NWIS site information \\ + [5pt]\texttt{whatNWISsites} & \texttt{...} & NWIS site search using user-specified queries \\ + [5pt]\texttt{whatNWISdata} & siteNumber & NWIS data availability, including period of record and count \\ + & service & \\ + [5pt]\texttt{readWQPdata} & \texttt{...} & WQP data using user-specified queries \\ + [5pt]\texttt{readWQPqw} & siteNumber & WQP data \\ + & parameterCd (or characteristic name) & \\ + & startDate & \\ + & endDate & \\ + [5pt]\texttt{whatWQPsites} & \texttt{...} & WQP site search using user-specified queries \\ + \hline +\end{tabular} +} +\end{minipage} +\end{table} + +\clearpage %------------------------------------------------------------ \section{USGS Web Retrievals} \label{sec:genRetrievals} %------------------------------------------------------------ -In this section, five examples of Web retrievals document how to get raw data. This data includes site information (\ref{sec:usgsSite}), measured parameter information (\ref{sec:usgsParams}), historical daily values(\ref{sec:usgsDaily}), unit values (which include real-time data but can also include other sensor data stored at regular time intervals) (\ref{sec:usgsRT}), and water quality data (\ref{sec:usgsWQP}) or (\ref{sec:usgsSTORET}). We will use the Choptank River near Greensboro, MD as an example. Daily discharge measurements are available as far back as 1948. Additionally, nitrate has been measured since 1964. +In this section, five examples of Web retrievals document how to get raw data. This data includes site information (\ref{sec:usgsSite}), measured parameter information (\ref{sec:usgsParams}), historical daily values(\ref{sec:usgsDaily}), unit values (which include real-time data but can also include other sensor data stored at regular time intervals) (\ref{sec:usgsRT}), and water quality data (\ref{sec:usgsWQP}). We will use the Choptank River near Greensboro, MD as an example. Daily discharge measurements are available as far back as 1948. Additionally, nitrate has been measured since 1964. % %------------------------------------------------------------ % \subsection{Introduction} @@ -306,17 +365,17 @@ Site information is obtained from \url{http://waterservices.usgs.gov/rest/Site-T \FloatBarrier %------------------------------------------------------------ -\subsubsection{whatNWISData} +\subsubsection{whatNWISdata} \label{sec:usgsDataAvailability} %------------------------------------------------------------ -To discover what data is available for a particular USGS site, including measured parameters, period of record, and number of samples (count), use the \texttt{whatNWISData} function. It is possible to limit the retrieval information to a subset of services (\texttt{"}dv\texttt{"}, \texttt{"}uv\texttt{"}, or \texttt{"}qw\texttt{"}). In the following example, we limit the retrieved Choptank data to only daily data. Leaving the \texttt{"}service\texttt{"} argument blank returns all of the available data for that site. +To discover what data is available for a particular USGS site, including measured parameters, period of record, and number of samples (count), use the \texttt{whatNWISdata} function. It is possible to limit the retrieval information to a subset of services (\texttt{"}dv\texttt{"}, \texttt{"}uv\texttt{"}, or \texttt{"}qw\texttt{"}). In the following example, we limit the retrieved Choptank data to only daily data. Leaving the \texttt{"}service\texttt{"} argument blank returns all of the available data for that site. <<getSiteExtended, echo=TRUE>>= # Continuing from the previous example: # This pulls out just the daily data: -dailyDataAvailable <- whatNWISData(siteNumbers, +dailyDataAvailable <- whatNWISdata(siteNumbers, service="dv") @ @@ -380,7 +439,7 @@ Parameter information can obtained from \url{http://nwis.waterdata.usgs.gov/usa/ \subsection{Daily Values} \label{sec:usgsDaily} %------------------------------------------------------------ -To obtain daily records of USGS data, use the \texttt{readNWISdv} function. The arguments for this function are siteNumber, parameterCd, startDate, endDate, statCd, and a logical (TRUE/FALSE) interactive. There are 2 default arguments: statCd (defaults to \texttt{"}00003\texttt{"}), and interactive (defaults to TRUE). If you want to use the default values, you do not need to list them in the function call. By setting the \texttt{"}interactive\texttt{"} option to FALSE, the operation of the function will advance automatically. It might make more sense to run large batch collections with the interactive option set to FALSE. +To obtain daily records of USGS data, use the \texttt{readNWISdv} function. The arguments for this function are siteNumber, parameterCd, startDate, endDate, and statCd (defaults to \texttt{"}00003\texttt{"}). If you want to use the default values, you do not need to list them in the function call. The dates (start and end) must be in the format \texttt{"}YYYY-MM-DD\texttt{"} (note: the user must include the quotes). Setting the start date to \texttt{"}\texttt{"} (no space) will prompt the program to ask for the earliest date, and setting the end date to \texttt{"}\texttt{"} (no space) will prompt for the latest available date. @@ -468,11 +527,13 @@ dischargeToday <- readNWISunit(siteNumber, parameterCd, The retrieval produces the following dataframe: -<<dischargeData, echo=FALSE>>= +<<dischargeData, echo=TRUE>>= head(dischargeToday) @ -Note that time now becomes important, so the variable datetime is a POSIXct, and the time zone is included in a separate column. Data are retrieved from \url{http://waterservices.usgs.gov/rest/IV-Test-Tool.html}. There are occasions where NWIS values are not reported as numbers, instead a common example is \enquote{Ice.} Any value that cannot be converted to a number will be reported as NA in this package. +Note that time now becomes important, so the variable datetime is a POSIXct, and the reported time zone is included in a separate column. The datetime column is converted automatically to \enquote{UTC} (Coordinated Universal Time). To override the timezone + +Data are retrieved from \url{http://waterservices.usgs.gov/rest/IV-Test-Tool.html}. There are occasions where NWIS values are not reported as numbers, instead a common example is \enquote{Ice.} Any value that cannot be converted to a number will be reported as NA in this package. \newpage @@ -483,7 +544,7 @@ Note that time now becomes important, so the variable datetime is a POSIXct, and \subsection{Water Quality Values} \label{sec:usgsWQP} %------------------------------------------------------------ -To get USGS water quality data from water samples collected at the streamgage or other monitoring site (as distinct from unit values collected through some type of automatic monitor) we can use the function \texttt{readNWISqw}, with the input arguments: siteNumber, parameterCd, startDate, endDate, and interactive (similar to \texttt{readNWISunit} and \texttt{readNWISdv}). Additionally, the argument \texttt{"}expanded\texttt{"} is a logical input that allows the user to choose between a simple return of datetimes/qualifier/values (expanded=FALSE), or a more complete and verbose output (expanded=TRUE). Expanded = TRUE includes such columns as remark codes, value qualifying text, and detection level. +To get USGS water quality data from water samples collected at the streamgage or other monitoring site (as distinct from unit values collected through some type of automatic monitor) we can use the function \texttt{readNWISqw}, with the input arguments: siteNumber, parameterCd, startDate, and endDate and \texttt{readNWISdv}). Additionally, the argument \texttt{"}expanded\texttt{"} is a logical input that allows the user to choose between a simple return of datetimes/qualifier/values (expanded=FALSE), or a more complete and verbose output (expanded=TRUE). Expanded = TRUE includes such columns as remark codes, value qualifying text, and detection level. <<label=getQW, echo=TRUE>>= @@ -721,7 +782,7 @@ library(dataRetrievaldemo) There are a few steps that are required in order to create a table in Microsoft\textregistered\ software (Excel, Word, PowerPoint, etc.) from an R dataframe. There are certainly a variety of good methods, one of which is detailed here. The example we will step through here will be to create a table in Microsoft Excel based on the dataframe tableData: <<label=getSiteApp, echo=TRUE>>= -availableData <- whatNWISData(siteNumber, "dv") +availableData <- whatNWISdata(siteNumber, "dv") dailyData <- availableData["00003" == availableData$statCd,] tableData <- with(dailyData, diff --git a/inst/doc/dataRetrieval.pdf b/inst/doc/dataRetrieval.pdf index b6916c3962b260d29d02c2e22ef5d993f1c8903e..33e495f3558059af19cf9005d686d9094819650b 100644 Binary files a/inst/doc/dataRetrieval.pdf and b/inst/doc/dataRetrieval.pdf differ diff --git a/man/importRDB1.Rd b/man/importRDB1.Rd index 7a3fb700f0ded6105d3fed1de9ef3670e66f1fa0..f80c8bd53c6ea0b69a77bc725c696e7b498d4440 100644 --- a/man/importRDB1.Rd +++ b/man/importRDB1.Rd @@ -3,7 +3,8 @@ \alias{importRDB1} \title{Function to return data from the NWIS RDB 1.0 format} \usage{ -importRDB1(obs_url, asDateTime = FALSE, qw = FALSE) +importRDB1(obs_url, asDateTime = FALSE, qw = FALSE, convertType = TRUE, + tz = "") } \arguments{ \item{obs_url}{string containing the url for the retrieval} @@ -11,6 +12,14 @@ importRDB1(obs_url, asDateTime = FALSE, qw = FALSE) \item{asDateTime}{logical, if TRUE returns date and time as POSIXct, if FALSE, Date} \item{qw}{logical, if TRUE parses as water quality data (where dates/times are in start and end times)} + +\item{convertType}{logical, defaults to TRUE. If TRUE, the function will convert the data to dates, datetimes, +numerics based on a standard algorithm. If false, everything is returned as a string.} + +\item{tz}{string to set timezone attribute of datetime. Default is an empty quote, which converts the +datetimes to UTC (properly accounting for daylight savings times based on the data's provided tz_cd column). +Possible values to provide are "America/New_York","America/Chicago", "America/Denver","America/Los_Angeles", +"America/Anchorage","America/Honolulu","America/Jamaica","America/Managua","America/Phoenix", and "America/Metlakatla"} } \value{ data a data frame containing columns agency, site, dateTime (converted to UTC), values, and remark codes for all requested combinations @@ -37,7 +46,7 @@ unitData <- importRDB1(unitDataURL, asDateTime=TRUE) qwURL <- constructNWISURL(c('04024430','04024000'), c('34247','30234','32104','34220'), "2010-11-03","","qw",format="rdb") -qwData <- importRDB1(qwURL, qw=TRUE) +qwData <- importRDB1(qwURL, qw=TRUE, tz="America/Chicago") # User file: filePath <- system.file("extdata", package="dataRetrievaldemo") fileName <- "RDB1Example.txt" diff --git a/man/readNWISunit.Rd b/man/readNWISunit.Rd index 0aa9778e0ac6d74ea951556c096dabbfb6ca5aff..64201c06e3a2acc8dc7c750d0054213a01d92957 100644 --- a/man/readNWISunit.Rd +++ b/man/readNWISunit.Rd @@ -3,7 +3,7 @@ \alias{readNWISunit} \title{Raw Data Import for Instantaneous USGS NWIS Data} \usage{ -readNWISunit(siteNumber, parameterCd, startDate, endDate, format = "xml") +readNWISunit(siteNumber, parameterCd, startDate, endDate) } \arguments{ \item{siteNumber}{string USGS site number. This is usually an 8 digit number} @@ -13,10 +13,6 @@ readNWISunit(siteNumber, parameterCd, startDate, endDate, format = "xml") \item{startDate}{string starting date for data retrieval in the form YYYY-MM-DD.} \item{endDate}{string ending date for data retrieval in the form YYYY-MM-DD.} - -\item{format}{string, can be "tsv" or "xml", and is only applicable for daily and unit value requests. "tsv" returns results faster, but there is a possiblitiy that an incomplete file is returned without warning. XML is slower, -but will offer a warning if the file was incomplete (for example, if there was a momentary problem with the internet connection). It is possible to safely use the "tsv" option, -but the user must carefully check the results to see if the data returns matches what is expected. The default is therefore "xml".} } \value{ data dataframe with agency, site, dateTime, time zone, value, and code columns @@ -33,9 +29,7 @@ startDate <- "2014-10-10" endDate <- "2014-10-10" # These examples require an internet connection to run rawData <- readNWISunit(siteNumber,parameterCd,startDate,endDate) -summary(rawData) -rawData2 <- readNWISunit(siteNumber,parameterCd,startDate,endDate,"tsv") -summary(rawData2) + timeZoneChange <- readNWISunit(c('04024430','04024000'),parameterCd, "2013-11-03","2013-11-03","tsv") firstSite <- timeZoneChange[timeZoneChange$site_no == '04024430',] diff --git a/vignettes/dataRetrieval.Rnw b/vignettes/dataRetrieval.Rnw index 8a660a8bbc1d2ad0c4460e584ac4e14110025f68..6302b4e0b2c9087bf1f057cc1a4689d1d14891d0 100644 --- a/vignettes/dataRetrieval.Rnw +++ b/vignettes/dataRetrieval.Rnw @@ -181,7 +181,7 @@ addSpace <- function(x) ifelse(x != "1", "[5pt]","") %------------------------------------------------------------ \section{Introduction to dataRetrievaldemo} %------------------------------------------------------------ -The dataRetrievaldemo package was created to simplify the process of loading hydrologic data into the R environment. It is designed to retrieve the major data types of U.S. Geological Survey (USGS) hydrologic data that are available on the Web, as well as data from the Water Quality Portal, which currently houses water quality data from the Environmental Protection Agency (EPA), U.S. Department of Agriculture (USDA), and USGS. Section \ref{sec:genRetrievals} provides examples of how one can obtain raw data from USGS sources and load them into dataframes within the R environment. +The dataRetrievaldemo package was created to simplify the process of loading hydrologic data into the R environment. It is designed to retrieve the major data types of U.S. Geological Survey (USGS) hydrologic data that are available on the Web, as well as data from the Water Quality Portal, which currently houses water quality data from the Environmental Protection Agency (EPA), U.S. Department of Agriculture (USDA), and USGS. For information on getting started in R and installing the package, see (\ref{sec:appendix1}): Getting Started. Any use of trade, firm, or product names is for descriptive purposes only and does not imply endorsement by the U.S. Government. @@ -266,6 +266,8 @@ Table \ref{tab:func} describes the functions available in the dataRetrievaldemo \end{minipage} \end{table} +\clearpage + %------------------------------------------------------------ \section{USGS Web Retrievals} \label{sec:genRetrievals} @@ -437,7 +439,7 @@ Parameter information can obtained from \url{http://nwis.waterdata.usgs.gov/usa/ \subsection{Daily Values} \label{sec:usgsDaily} %------------------------------------------------------------ -To obtain daily records of USGS data, use the \texttt{readNWISdv} function. The arguments for this function are siteNumber, parameterCd, startDate, endDate, statCd, and a logical (TRUE/FALSE) interactive. There are 2 default arguments: statCd (defaults to \texttt{"}00003\texttt{"}), and interactive (defaults to TRUE). If you want to use the default values, you do not need to list them in the function call. By setting the \texttt{"}interactive\texttt{"} option to FALSE, the operation of the function will advance automatically. It might make more sense to run large batch collections with the interactive option set to FALSE. +To obtain daily records of USGS data, use the \texttt{readNWISdv} function. The arguments for this function are siteNumber, parameterCd, startDate, endDate, and statCd (defaults to \texttt{"}00003\texttt{"}). If you want to use the default values, you do not need to list them in the function call. The dates (start and end) must be in the format \texttt{"}YYYY-MM-DD\texttt{"} (note: the user must include the quotes). Setting the start date to \texttt{"}\texttt{"} (no space) will prompt the program to ask for the earliest date, and setting the end date to \texttt{"}\texttt{"} (no space) will prompt for the latest available date. @@ -525,11 +527,13 @@ dischargeToday <- readNWISunit(siteNumber, parameterCd, The retrieval produces the following dataframe: -<<dischargeData, echo=FALSE>>= +<<dischargeData, echo=TRUE>>= head(dischargeToday) @ -Note that time now becomes important, so the variable datetime is a POSIXct, and the time zone is included in a separate column. Data are retrieved from \url{http://waterservices.usgs.gov/rest/IV-Test-Tool.html}. There are occasions where NWIS values are not reported as numbers, instead a common example is \enquote{Ice.} Any value that cannot be converted to a number will be reported as NA in this package. +Note that time now becomes important, so the variable datetime is a POSIXct, and the reported time zone is included in a separate column. The datetime column is converted automatically to \enquote{UTC} (Coordinated Universal Time). To override the timezone + +Data are retrieved from \url{http://waterservices.usgs.gov/rest/IV-Test-Tool.html}. There are occasions where NWIS values are not reported as numbers, instead a common example is \enquote{Ice.} Any value that cannot be converted to a number will be reported as NA in this package. \newpage @@ -540,7 +544,7 @@ Note that time now becomes important, so the variable datetime is a POSIXct, and \subsection{Water Quality Values} \label{sec:usgsWQP} %------------------------------------------------------------ -To get USGS water quality data from water samples collected at the streamgage or other monitoring site (as distinct from unit values collected through some type of automatic monitor) we can use the function \texttt{readNWISqw}, with the input arguments: siteNumber, parameterCd, startDate, endDate, and interactive (similar to \texttt{readNWISunit} and \texttt{readNWISdv}). Additionally, the argument \texttt{"}expanded\texttt{"} is a logical input that allows the user to choose between a simple return of datetimes/qualifier/values (expanded=FALSE), or a more complete and verbose output (expanded=TRUE). Expanded = TRUE includes such columns as remark codes, value qualifying text, and detection level. +To get USGS water quality data from water samples collected at the streamgage or other monitoring site (as distinct from unit values collected through some type of automatic monitor) we can use the function \texttt{readNWISqw}, with the input arguments: siteNumber, parameterCd, startDate, and endDate and \texttt{readNWISdv}). Additionally, the argument \texttt{"}expanded\texttt{"} is a logical input that allows the user to choose between a simple return of datetimes/qualifier/values (expanded=FALSE), or a more complete and verbose output (expanded=TRUE). Expanded = TRUE includes such columns as remark codes, value qualifying text, and detection level. <<label=getQW, echo=TRUE>>= diff --git a/vignettes/figure/getNWIStemperaturePlot-1.pdf b/vignettes/figure/getNWIStemperaturePlot-1.pdf index 71ee37b7688cbd05a69505b141b96c7cb2f68086..2497ae840f6d7f3010fe72296a53421eba4d1e50 100644 Binary files a/vignettes/figure/getNWIStemperaturePlot-1.pdf and b/vignettes/figure/getNWIStemperaturePlot-1.pdf differ diff --git a/vignettes/figure/getQWtemperaturePlot-1.pdf b/vignettes/figure/getQWtemperaturePlot-1.pdf index 90d537ef7f0df406e492e2eee464211eed1e73d7..a81a777a7eddca4e64a26aed8510f2ebe5366732 100644 Binary files a/vignettes/figure/getQWtemperaturePlot-1.pdf and b/vignettes/figure/getQWtemperaturePlot-1.pdf differ