diff --git a/vignettes/Rhelp.png b/vignettes/Rhelp.png new file mode 100644 index 0000000000000000000000000000000000000000..6806da1ca474f57423f6e54be3c10741d03b63d7 Binary files /dev/null and b/vignettes/Rhelp.png differ diff --git a/vignettes/dataRetrieval.Rnw b/vignettes/dataRetrieval.Rnw new file mode 100644 index 0000000000000000000000000000000000000000..866a27962dd18beeeec2a8ca3101bc99b3a4ab93 --- /dev/null +++ b/vignettes/dataRetrieval.Rnw @@ -0,0 +1,797 @@ +%\VignetteIndexEntry{Introduction to the dataRetrieval package} +%\VignetteEngine{knitr::knitr} +%\VignetteDepends{} +%\VignetteSuggests{xtable,EGRET} +%\VignetteImports{zoo, XML, RCurl, reshape2,lubridate} +%\VignettePackage{dataRetrieval} + +\documentclass[a4paper,11pt]{article} + +\usepackage{amsmath} +\usepackage{times} +\usepackage{hyperref} +\usepackage[numbers, round]{natbib} +\usepackage[american]{babel} +\usepackage{authblk} +\usepackage{subfig} +\usepackage{placeins} +\usepackage{footnote} +\usepackage{tabularx} +\usepackage{threeparttable} +\usepackage{parskip} + +\usepackage{csquotes} +\usepackage{setspace} + +% \doublespacing + +\renewcommand{\topfraction}{0.85} +\renewcommand{\textfraction}{0.1} +\usepackage{graphicx} + + +\usepackage{mathptmx}% Times Roman font +\usepackage[scaled=.90]{helvet}% Helvetica, served as a model for arial + +% \usepackage{indentfirst} +% \setlength\parindent{20pt} +\setlength{\parskip}{0pt} + +\usepackage{courier} + +\usepackage{titlesec} +\usepackage{titletoc} + +\titleformat{\section} + {\normalfont\sffamily\bfseries\LARGE} + {\thesection}{0.5em}{} +\titleformat{\subsection} + {\normalfont\sffamily\bfseries\Large} + {\thesubsection}{0.5em}{} +\titleformat{\subsubsection} + {\normalfont\sffamily\large} + {\thesubsubsection}{0.5em}{} + +\titlecontents{section} +[2em] % adjust left margin +{\sffamily} % font formatting +{\contentslabel{2.3em}} % section label and offset +{\hspace*{-2.3em}} +{\titlerule*[0.25pc]{.}\contentspage} + +\titlecontents{subsection} +[4.6em] % adjust left margin +{\sffamily} % font formatting +{\contentslabel{2.3em}} % section label and offset +{\hspace*{-2.3em}} +{\titlerule*[0.25pc]{.}\contentspage} + +\titlecontents{subsubsection} +[6.9em] % adjust left margin +{\sffamily} % font formatting +{\contentslabel{2.3em}} % section label and offset +{\hspace*{-2.3em}} +{\titlerule*[0.25pc]{.}\contentspage} + +\titlecontents{table} +[0em] % adjust left margin +{\sffamily} % font formatting +{Table\hspace*{2em} \contentslabel {2em}} % section label and offset +{\hspace*{4em}} +{\titlerule*[0.25pc]{.}\contentspage} + +\titlecontents{figure} +[0em] % adjust left margin +{\sffamily} % font formatting +{Figure\hspace*{2em} \contentslabel {2em}} % section label and offset +{\hspace*{4em}} +{\titlerule*[0.25pc]{.}\contentspage} + +%Italisize and change font of urls: +\urlstyle{sf} +\renewcommand\UrlFont\itshape + +\usepackage{caption} +\captionsetup{ + font={sf}, + labelfont={bf,sf}, + labelsep=period, + justification=justified, + singlelinecheck=false +} + + + +\textwidth=6.2in +\textheight=8.5in +\parskip=.3cm +\oddsidemargin=.1in +\evensidemargin=.1in +\headheight=-.3in + + +%------------------------------------------------------------ +% newcommand +%------------------------------------------------------------ +\newcommand{\scscst}{\scriptscriptstyle} +\newcommand{\scst}{\scriptstyle} +\newcommand{\Robject}[1]{{\texttt{#1}}} +\newcommand{\Rfunction}[1]{{\texttt{#1}}} +\newcommand{\Rclass}[1]{\textit{#1}} +\newcommand{\Rpackage}[1]{\textit{#1}} +\newcommand{\Rexpression}[1]{\texttt{#1}} +\newcommand{\Rmethod}[1]{{\texttt{#1}}} +\newcommand{\Rfunarg}[1]{{\texttt{#1}}} + +\begin{document} + +<<openLibrary, echo=FALSE>>= +library(xtable) +options(continue=" ") +options(width=60) +library(knitr) + +@ + +\renewenvironment{knitrout}{\begin{singlespace}}{\end{singlespace}} +\renewcommand*\listfigurename{Figures} + +\renewcommand*\listtablename{Tables} + + +%------------------------------------------------------------ +\title{The dataRetrieval R package} +%------------------------------------------------------------ +\author[1]{Laura De Cicco} +\author[1]{Robert Hirsch} +\affil[1]{United States Geological Survey} + + +<<include=TRUE ,echo=FALSE,eval=TRUE>>= +opts_chunk$set(highlight=TRUE, tidy=TRUE, keep.space=TRUE, keep.blank.space=FALSE, keep.comment=TRUE, tidy=FALSE,comment="") +knit_hooks$set(inline = function(x) { + if (is.numeric(x)) round(x, 3)}) +knit_hooks$set(crop = hook_pdfcrop) + +bold.colHeaders <- function(x) { + x <- gsub("\\^(\\d)","$\\^\\1$",x) + x <- gsub("\\%","\\\\%",x) + x <- gsub("\\_"," ",x) + returnX <- paste("\\multicolumn{1}{c}{\\textbf{\\textsf{", x, "}}}", sep = "") +} +addSpace <- function(x) ifelse(x != "1", "[5pt]","") +@ + +\noindent{\huge\textsf{\textbf{The dataRetrieval R package}}} + +\noindent\textsf{By Laura De Cicco and Robert Hirsch} + +\noindent\textsf{\today} + +% \maketitle +% +% \newpage + +\tableofcontents +\listoffigures +\listoftables + +\newpage + +%------------------------------------------------------------ +\section{Introduction to dataRetrieval} +%------------------------------------------------------------ +The dataRetrieval package was created to simplify the process of loading hydrologic data into the R environment. It has been specifically designed to work seamlessly with the EGRET R package: Exploration and Graphics for RivEr Trends. See: \url{https://github.com/USGS-R/EGRET/wiki} or \url{http://dx.doi.org/10.3133/tm4A10} for information on EGRET. EGRET is designed to provide analysis of water quality data sets using the Weighted Regressions on Time, Discharge and Season (WRTDS) method as well as analysis of discharge trends using robust time-series smoothing techniques. Both of these capabilities provide both tabular and graphical analyses of long-term data sets. + + +The dataRetrieval package is designed to retrieve many of the major data types of U.S. Geological Survey (USGS) hydrologic data that are available on the Web. Users may also load data from other sources (text files, spreadsheets) using dataRetrieval. Section \ref{sec:genRetrievals} provides examples of how one can obtain raw data from USGS sources on the Web and load them into dataframes within the R environment. The functionality described in section \ref{sec:genRetrievals} is for general use and is not tailored for the specific uses of the EGRET package. The functionality described in section \ref{sec:EGRETdfs} is tailored specifically to obtaining input from the Web and structuring it for use in the EGRET package. The functionality described in section \ref{sec:userFiles} is for converting hydrologic data from user-supplied files and structuring it specifically for use in the EGRET package. + +For information on getting started in R and installing the package, see (\ref{sec:appendix1}): Getting Started. Any use of trade, firm, or product names is for descriptive purposes only and does not imply endorsement by the U.S. Government. + +A quick workflow for major dataRetrieval functions: + +<<workflow, echo=TRUE,eval=FALSE>>= +library(dataRetrieval) +# Choptank River near Greensboro, MD +siteNumber <- "01491000" +ChoptankInfo <- getNWISSiteInfo(siteNumber) +parameterCd <- "00060" + +#Raw daily data: +rawDailyData <- getNWISdvData(siteNumber,parameterCd, + "1980-01-01","2010-01-01") +# Data compiled for EGRET analysis +Daily <- getNWISDaily(siteNumber,parameterCd, + "1980-01-01","2010-01-01") + +# Sample data Nitrate: +parameterCd <- "00618" +Sample <- getNWISSample(siteNumber,parameterCd, + "1980-01-01","2010-01-01") + +# Metadata on site and nitrate: +INFO <- getNWISInfo(siteNumber,parameterCd) + +# Merge discharge and nitrate data to one dataframe: +Sample <- mergeReport() + +@ + + +%------------------------------------------------------------ +\section{USGS Web Retrievals} +\label{sec:genRetrievals} +%------------------------------------------------------------ +In this section, five examples of Web retrievals document how to get raw data. This data includes site information (\ref{sec:usgsSite}), measured parameter information (\ref{sec:usgsParams}), historical daily values(\ref{sec:usgsDaily}), unit values (which include real-time data but can also include other sensor data stored at regular time intervals) (\ref{sec:usgsRT}), and water quality data (\ref{sec:usgsWQP}) or (\ref{sec:usgsSTORET}). We will use the Choptank River near Greensboro, MD as an example. Daily discharge measurements are available as far back as 1948. Additionally, nitrate has been measured since 1964. + +% %------------------------------------------------------------ +% \subsection{Introduction} +% %------------------------------------------------------------ +The USGS organizes hydrologic data in a standard structure. Streamgages are located throughout the United States, and each streamgage has a unique ID (referred in this document and throughout the dataRetrieval package as \enquote{siteNumber}). Often (but not always), these ID's are 8 digits. The first step to finding data is discovering this siteNumber. There are many ways to do this, one is the National Water Information System: Mapper \url{http://maps.waterdata.usgs.gov/mapper/index.html}. + +Once the siteNumber is known, the next required input for USGS data retrievals is the \enquote{parameter code}. This is a 5-digit code that specifies the measured parameter being requested. For example, parameter code 00631 represents \enquote{Nitrate plus nitrite, water, filtered, milligrams per liter as nitrogen}, with units of \enquote{mg/l as N}. A complete list of possible USGS parameter codes can be found at \url{http://nwis.waterdata.usgs.gov/usa/nwis/pmcodes?help}. + +Not every station will measure all parameters. A short list of commonly measured parameters is shown in Table \ref{tab:params}. + + +<<tableParameterCodes, echo=FALSE,results='asis'>>= +pCode <- c('00060', '00065', '00010','00045','00400') +shortName <- c("Discharge [ft$^3$/s]","Gage height [ft]","Temperature [C]", "Precipitation [in]", "pH") + +data.df <- data.frame(pCode, shortName, stringsAsFactors=FALSE) + +print(xtable(data.df, + label="tab:params", + caption="Common USGS Parameter Codes"), + caption.placement="top", + size = "\\footnotesize", + latex.environment=NULL, + sanitize.text.function = function(x) {x}, + sanitize.colnames.function = bold.colHeaders, + sanitize.rownames.function = addSpace + ) + +@ + +A complete list (as of September 25, 2013) is available as data attached to the package. It is accessed by the following: + +<<tableParameterCodesDataRetrieval>>= +library(dataRetrieval) +parameterCdFile <- parameterCdFile +names(parameterCdFile) +@ + + +For unit values data (sensor data measured at regular time intervals such as 15 minutes or hourly), knowing the parameter code and siteNumber is enough to make a request for data. For most variables that are measured on a continuous basis, the USGS also stores the historical data as daily values. These daily values are statistical summaries of the continuous data, e.g. maximum, minimum, mean, or median. The different statistics are specified by a 5-digit statistics code. A complete list of statistic codes can be found here: + +\url{http://nwis.waterdata.usgs.gov/nwis/help/?read_file=stat&format=table} + +Some common codes are shown in Table \ref{tab:stat}. + +<<tableStatCodes, echo=FALSE,results='asis'>>= +StatCode <- c('00001', '00002', '00003','00008') +shortName <- c("Maximum","Minimum","Mean", "Median") + +data.df <- data.frame(StatCode, shortName, stringsAsFactors=FALSE) + +print(xtable(data.df,label="tab:stat", + caption="Commonly used USGS Stat Codes"), + caption.placement="top", + size = "\\footnotesize", + latex.environment=NULL, + sanitize.colnames.function = bold.colHeaders, + sanitize.rownames.function = addSpace + ) + +@ + +Examples for using these siteNumber's, parameter codes, and stat codes will be presented in subsequent sections. + +\FloatBarrier + +%------------------------------------------------------------ +\subsection{Site Information} +\label{sec:usgsSite} +%------------------------------------------------------------ + +%------------------------------------------------------------ +\subsubsection{getNWISSiteInfo} +\label{sec:usgsSiteFileData} +%------------------------------------------------------------ +Use the \texttt{getNWISSiteInfo} function to obtain all of the information available for a particular USGS site such as full station name, drainage area, latitude, and longitude. \texttt{getNWISSiteInfo} can also access information about multiple sites with a vector input. + + +<<getSite, echo=TRUE>>= +siteNumbers <- c("01491000","01645000") +siteINFO <- getNWISSiteInfo(siteNumbers) +@ + +A specific example piece of information can be retrieved, in this case a station name, as follows: + +<<siteNames2, echo=TRUE>>= +siteINFO$station.nm +@ +Site information is obtained from \url{http://waterservices.usgs.gov/rest/Site-Test-Tool.html} +\FloatBarrier + +%------------------------------------------------------------ +\subsubsection{getNWISDataAvailability} +\label{sec:usgsDataAvailability} +%------------------------------------------------------------ +To discover what data is available for a particular USGS site, including measured parameters, period of record, and number of samples (count), use the \texttt{getNWISDataAvailability} function. It is possible to limit the retrieval information to a subset of types (\texttt{"}dv\texttt{"}, \texttt{"}uv\texttt{"}, or \texttt{"}qw\texttt{"}). In the following example, we limit the retrieved Choptank data to only daily data. Leaving the \texttt{"}type\texttt{"} argument blank returns all of the available data for that site. + + +<<getSiteExtended, echo=TRUE>>= +# Continuing from the previous example: +# This pulls out just the daily data: + +dailyDataAvailable <- getNWISDataAvailability(siteNumbers, + type="dv") + +@ + +<<tablegda, echo=FALSE,results='asis'>>= +tableData <- with(dailyDataAvailable, + data.frame( + siteNumber= site_no, + srsname=srsname, + startDate=as.character(startDate), + endDate=as.character(endDate), + count=as.character(count), + units=parameter_units, + statCd = statCd, + stringsAsFactors=FALSE) + ) + +tableData$units[which(tableData$units == "ft3/s")] <- "ft$^3$/s" +tableData$units[which(tableData$units == "uS/cm @25C")] <- "$\\mu$S/cm @25C" + + +print(xtable(tableData,label="tab:gda", + caption="Daily mean data availabile at the Choptank River near Greensboro, MD. [Some columns deleted for space considerations]"), + caption.placement="top", + size = "\\footnotesize", + latex.environment=NULL, + sanitize.text.function = function(x) {x}, + sanitize.colnames.function = bold.colHeaders, + sanitize.rownames.function = addSpace + ) + +@ + + + +See Section \ref{app:createWordTable} for instructions on converting an R dataframe to a table in Microsoft\textregistered\ software Excel or Word to display a data availability table similar to Table \ref{tab:gda}. Excel, Microsoft, PowerPoint, Windows, and Word are registered trademarks of Microsoft Corporation in the United States and other countries. + +\FloatBarrier + +%------------------------------------------------------------ +\subsection{Parameter Information} +\label{sec:usgsParams} +%------------------------------------------------------------ +To obtain all of the available information concerning a measured parameter (or multiple parameters), use the \texttt{getNWISPcodeInfo} function: + +<<label=getPCodeInfo, echo=TRUE>>= +# Using defaults: +parameterCd <- "00618" +parameterINFO <- getNWISPcodeInfo(parameterCd) +colnames(parameterINFO) +@ + +A specific example piece of information, in this case parameter name, can be obtained as follows: + +<<siteNames, echo=TRUE>>= +parameterINFO$parameter_nm +@ +Parameter information can obtained from \url{http://nwis.waterdata.usgs.gov/usa/nwis/pmcodes} +\FloatBarrier +%------------------------------------------------------------ +\subsection{Daily Values} +\label{sec:usgsDaily} +%------------------------------------------------------------ +To obtain daily records of USGS data, use the \texttt{getNWISdvData} function. The arguments for this function are siteNumber, parameterCd, startDate, endDate, statCd, and a logical (TRUE/FALSE) interactive. There are 2 default arguments: statCd (defaults to \texttt{"}00003\texttt{"}), and interactive (defaults to TRUE). If you want to use the default values, you do not need to list them in the function call. By setting the \texttt{"}interactive\texttt{"} option to FALSE, the operation of the function will advance automatically. It might make more sense to run large batch collections with the interactive option set to FALSE. + +The dates (start and end) must be in the format \texttt{"}YYYY-MM-DD\texttt{"} (note: the user must include the quotes). Setting the start date to \texttt{"}\texttt{"} (no space) will prompt the program to ask for the earliest date, and setting the end date to \texttt{"}\texttt{"} (no space) will prompt for the latest available date. + +<<label=getNWISDaily, echo=TRUE, eval=TRUE>>= + +# Continuing with our Choptank River example +siteNumber <- "01491000" +parameterCd <- "00060" # Discharge +startDate <- "" # Will request earliest date +endDate <- "" # Will request latest date + +discharge <- getNWISdvData(siteNumber, + parameterCd, startDate, endDate) +names(discharge) +@ + +The column \texttt{"}datetime\texttt{"} in the returned dataframe is automatically imported as a variable of class \texttt{"}Date\texttt{"} in R. Each requested parameter has a value and remark code column. The names of these columns depend on the requested parameter and stat code combinations. USGS remark codes are often \texttt{"}A\texttt{"} (approved for publication) or \texttt{"}P\texttt{"} (provisional data subject to revision). A more complete list of remark codes can be found here: +\url{http://nwis.waterdata.usgs.gov/usa/nwis/pmcodes} + +Another example that doesn't use the defaults would be a request for mean and maximum daily temperature and discharge in early 2012: +<<label=getNWIStemperature, echo=TRUE>>= + +parameterCd <- c("00010","00060") # Temperature and discharge +statCd <- c("00001","00003") # Mean and maximum +startDate <- "2012-01-01" +endDate <- "2012-05-01" + +temperatureAndFlow <- getNWISdvData(siteNumber, parameterCd, + startDate, endDate, statCd=statCd) + +@ + +Daily data is pulled from \url{http://waterservices.usgs.gov/rest/DV-Test-Tool.html}. + +The column names can be automatically adjusted based on the parameter and statistic codes using the \texttt{renameColumns} function. This is not necessary, but may be useful when analyzing the data. + +<<label=renameColumns, echo=TRUE>>= +names(temperatureAndFlow) + +temperatureAndFlow <- renameColumns(temperatureAndFlow) +names(temperatureAndFlow) +@ + +An example of plotting the above data (Figure \ref{fig:getNWIStemperaturePlot}): + +<<getNWIStemperaturePlot, echo=TRUE, fig.cap="Temperature and discharge plot of Choptank River in 2012.",out.width='1\\linewidth',out.height='1\\linewidth',fig.show='hold'>>= +par(mar=c(5,5,5,5)) #sets the size of the plot window + +with(temperatureAndFlow, plot( + datetime, Temperature_water_degrees_Celsius_Max_01, + xlab="Date",ylab="Max Temperature [C]" + )) +par(new=TRUE) +with(temperatureAndFlow, plot( + datetime, Discharge_cubic_feet_per_second, + col="red",type="l",xaxt="n",yaxt="n",xlab="",ylab="",axes=FALSE + )) +axis(4,col="red",col.axis="red") +mtext(expression(paste("Mean Discharge [ft"^"3","/s]", + sep="")),side=4,line=3,col="red") +title(paste(siteINFO$station.nm[1],"2012",sep=" ")) +legend("topleft", c("Max Temperature", "Mean Discharge"), + col=c("black","red"),lty=c(NA,1),pch=c(1,NA)) +@ + + +There are occasions where NWIS values are not reported as numbers, instead there might be text describing a certain event such as \enquote{Ice.} Any value that cannot be converted to a number will be reported as NA in this package (not including remark code columns). + +\FloatBarrier + +%------------------------------------------------------------ +\subsection{Unit Values} +\label{sec:usgsRT} +%------------------------------------------------------------ +Any data collected at regular time intervals (such as 15-minute or hourly) are known as \enquote{unit values.} Many of these are delivered on a real time basis and very recent data (even less than an hour old in many cases) are available through the function \texttt{getNWISunitData}. Some of these unit values are available for many years, and some are only available for a recent time period such as 120 days. Here is an example of a retrieval of such data. + +<<label=getNWISUnit, echo=TRUE>>= + +parameterCd <- "00060" # Discharge +startDate <- "2012-05-12" +endDate <- "2012-05-13" +dischargeToday <- getNWISunitData(siteNumber, parameterCd, + startDate, endDate) +@ + +The retrieval produces the following dataframe: + +<<dischargeData, echo=FALSE>>= +head(dischargeToday) +@ + +Note that time now becomes important, so the variable datetime is a POSIXct, and the time zone is included in a separate column. Data are retrieved from \url{http://waterservices.usgs.gov/rest/IV-Test-Tool.html}. There are occasions where NWIS values are not reported as numbers, instead a common example is \enquote{Ice.} Any value that cannot be converted to a number will be reported as NA in this package. + +\newpage + + +\FloatBarrier + +%------------------------------------------------------------ +\subsection{Water Quality Values} +\label{sec:usgsWQP} +%------------------------------------------------------------ +To get USGS water quality data from water samples collected at the streamgage or other monitoring site (as distinct from unit values collected through some type of automatic monitor) we can use the function \texttt{getNWISqwData}, with the input arguments: siteNumber, parameterCd, startDate, endDate, and interactive (similar to \texttt{getNWISunitData} and \texttt{getNWISdvData}). Additionally, the argument \texttt{"}expanded\texttt{"} is a logical input that allows the user to choose between a simple return of datetimes/qualifier/values (expanded=FALSE), or a more complete and verbose output (expanded=TRUE). Expanded = TRUE includes such columns as remark codes, value qualifying text, and detection level. + + +<<label=getQW, echo=TRUE>>= + +# Dissolved Nitrate parameter codes: +parameterCd <- c("00618","71851") +startDate <- "1985-10-01" +endDate <- "2012-09-30" + +dissolvedNitrate <- getNWISqwData(siteNumber, parameterCd, + startDate, endDate, expanded=TRUE) +names(dissolvedNitrate) + + +@ + + +<<getQWtemperaturePlot, echo=TRUE, fig.cap=paste(parameterINFO$parameter_nm, "at", siteINFO$station.nm[1])>>= +with(dissolvedNitrate, plot( + startDateTime, result_va_00618, + xlab="Date",ylab = paste(parameterINFO$srsname, + "[",parameterINFO$parameter_units,"]") + )) +title(siteINFO$station.nm[1]) +@ + +\FloatBarrier + +%------------------------------------------------------------ +\subsection{URL Construction} +\label{sec:usgsURL} +%------------------------------------------------------------ +There may be times when you might be interested in seeing the URL (Web address) that was used to obtain the raw data. The \texttt{constructNWISURL} function returns the URL. In addition to input variables that have been described, there is a new argument \texttt{"}service\texttt{"}. The service argument can be \texttt{"}dv\texttt{"} (daily values), \texttt{"}uv\texttt{"} (unit values), \texttt{"}qw\texttt{"} (NWIS water quality values), or \texttt{"}wqp\texttt{"} (general Water Quality Portal values). + + +<<label=geturl, echo=TRUE, eval=FALSE>>= +# Dissolved Nitrate parameter codes: +pCode <- c("00618","71851") +startDate <- "1964-06-11" +endDate <- "2012-12-18" +url_qw <- constructNWISURL(siteNumber,pCode,startDate,endDate,'qw') +url_dv <- constructNWISURL(siteNumber,"00060",startDate,endDate, + 'dv',statCd="00003") +url_uv <- constructNWISURL(siteNumber,"00060",startDate,endDate,'uv') +@ + + + +%------------------------------------------------------------ +\section{Water Quality Portal Web Retrievals} +\label{sec:usgsSTORET} +%------------------------------------------------------------ +There are additional water quality data sets available from the Water Quality Data Portal (\url{http://www.waterqualitydata.us/}). These data sets can be housed in either the STORET database (data from EPA), NWIS database (data from USGS), STEWARDS database (data from USDA), and additional databases are slated to be included. Because only USGS uses parameter codes, a \texttt{"}characteristic name\texttt{"} must be supplied. The \texttt{getWQPqwData} function can take either a USGS parameter code, or a more general characteristic name in the parameterCd input argument. The Water Quality Data Portal includes data discovery tools and information on characteristic names. The following example retrieves specific conductance from a DNR site in Wisconsin. + + +<<label=getQWData, echo=TRUE, eval=FALSE>>= +specificCond <- getWQPqwData('WIDNR_WQX-10032762', + 'Specific conductance','2011-05-01','2011-09-30') +@ + +Guidance for finding characteristic names can be found at: \url{http://www.waterqualitydata.us/webservices_documentation.jsp}. + +\FloatBarrier + +%------------------------------------------------------------ +\section{Generalized Retrievals} +\label{sec:general} +%------------------------------------------------------------ +The previous examples all took specific input arguments: siteNumber, parameterCd (or characteristic name), startDate, endDate, etc. However, the Web services that supply the data can accept a wide variety of additional arguments. + +%------------------------------------------------------------ +\subsubsection{NWIS sites} +\label{sec:NWISGenSite} +%------------------------------------------------------------ +The function \texttt{getNWISSites} can be used to discover NWIS sites based on any query that the NWIS Site Service offers. This is done by using the \texttt{"..."} argument, which allows the user to use any arbitrary input argument. We can then use the service here: + +\url{http://waterservices.usgs.gov/rest/Site-Test-Tool.html} + +to discover many options for searching for NWIS sites. For example, you may want to search for sites in a lat/lon bounding box, or only sites tidal streams, or sites with water quality samples, sites above a certain altitude, etc. The results of this site query generate a URL. For example, the tool provided a search within a specified bounding box, for sites that have daily discharge (parameter code = 00060) and temperature (parameter code = 00010). The generated URL is: + +\url{http://waterservices.usgs.gov/nwis/site/?format=rdb&bBox=-83.0,36.5,-81.0,38.5¶meterCd=00010,00060&hasDataTypeCd=dv} + +The following dataRetrieval code can be used to get those sites: + +<<siteSearch>>= +sites <- getNWISSites(bBox="-83.0,36.5,-81.0,38.5", + parameterCd="00010,00060", + hasDataTypeCd="dv") + +names(sites) +nrow(sites) +@ + + +%------------------------------------------------------------ +\subsubsection{NWIS data} +\label{sec:NWISGenData} +%------------------------------------------------------------ +For NWIS data, the function \texttt{getNWISData} can be used. The argument listed in the R help file is \texttt{"..."} and \texttt{"}service\texttt{"} (only for data requests). Table \ref{tab:NWISGeneral} describes the services are available. + +\begin{table}[!ht] +\begin{minipage}{\linewidth} +{\footnotesize +\caption{NWIS general data calls} +\label{tab:NWISGeneral} +\begin{tabular}{lll} + \hline +\multicolumn{1}{c}{\textbf{\textsf{Service}}} & +\multicolumn{1}{c}{\textbf{\textsf{Description}}} & +\multicolumn{1}{c}{\textbf{\textsf{Reference URL}}} \\ [0pt] + \hline + daily values & dv & \url{http://waterservices.usgs.gov/rest/DV-Test-Tool.html}\\ + [5pt]instantaneous & iv & \url{http://waterservices.usgs.gov/rest/IV-Test-Tool.html}\\ + [5pt]groundwater levels & gwlevels & \url{http://waterservices.usgs.gov/rest/GW-Levels-Test-Tool.html}\\ + [5pt]water quality & qwdata & \url{http://nwis.waterdata.usgs.gov/nwis/qwdata}\\ + \hline +\end{tabular} +} +\end{minipage} +\end{table} + +The \texttt{"..."} argument allows the user to create their own queries based on the instructions found in the web links above. The links provide instructions on how to create a URL to request data. Perhaps you want sites only in Wisconsin, with a drainage area less than 50 mi$^2$, and the most recent daily dischage data. That request would be done as follows: + +<<dataExample>>= +dischargeWI <- getNWISData(stateCd="WI", + parameterCd="00060", + drainAreaMin="50", + statCd="00003") +names(dischargeWI) +nrow(dischargeWI) +@ + +%------------------------------------------------------------ +\subsubsection{Water Quality Portal sites} +\label{sec:WQPGenSite} +%------------------------------------------------------------ + +Just as with NWIS, the Water Quality Portal (WQP) offers a variety of ways to search for sites and request data. The possible Web service arguments for WQP site searches is found here: + +\url{http://www.waterqualitydata.us/webservices_documentation.jsp} + +To discover available sites in the WQP in New Jersey that have measured Chloride, use the function \texttt{getWQPSites}. + +<<NJChloride, eval=FALSE>>= + +sitesNJ <- getWQPSites(statecode="US:34", + characteristicName="Chloride") + +@ + + +%------------------------------------------------------------ +\subsubsection{Water Quality Portal data} +\label{sec:WQPGenData} +%------------------------------------------------------------ +Finally, to get data from the WQP using generalized Web service calls, use the function \texttt{getWQPData}. For example, to get all the pH data in Wisconsin: + +<<phData, eval=FALSE>>= + +dataPH <- getWQPData(statecode="US:55", + characteristicName="pH") + +@ + + + +\FloatBarrier + +\clearpage + + +%------------------------------------------------------------ +\section{Getting Started in R} +\label{sec:appendix1} +%------------------------------------------------------------ +This section describes the options for downloading and installing the dataRetrieval package. + +%------------------------------------------------------------ +\subsection{New to R?} +%------------------------------------------------------------ +If you are new to R, you will need to first install the latest version of R, which can be found here: \url{http://www.r-project.org/}. + +At any time, you can get information about any function in R by typing a question mark before the functions name. This will open a file (in RStudio, in the Help window) that describes the function, the required arguments, and provides working examples. + +<<helpFunc,eval = FALSE>>= +?getNWISPcodeInfo +@ + +This will open a help file similar to Figure \ref{fig:help}. + +\FloatBarrier + +To see the raw code for a particular code, type the name of the function, without parentheses.: +<<rawFunc,eval = TRUE>>= +getNWISPcodeInfo +@ + + +\begin{figure}[ht!] +\centering + \resizebox{0.95\textwidth}{!}{\includegraphics{Rhelp.png}} +\caption{A simple R help file} +\label{fig:help} +\end{figure} + +Additionally, many R packages have vignette files attached (such as this paper). To view the vignette: +<<seeVignette,eval = FALSE>>= +vignette(dataRetrieval) +@ + +\FloatBarrier +\clearpage +%------------------------------------------------------------ +\subsection{R User: Installing dataRetrieval} +%------------------------------------------------------------ +The following command installs dataRetrieval and subsequent required packages: + +<<installFromCran,eval = FALSE>>= +install.packages("dataRetrieval", +repos=c("http://usgs-r.github.com","http://cran.us.r-project.org"), +dependencies=TRUE, +type="both") +@ + +After installing the package, you need to open the library each time you re-start R. This is done with the simple command: +<<openLibraryTest, eval=FALSE>>= +library(dataRetrieval) +@ + + +%------------------------------------------------------------ +\section{Creating tables in Microsoft\textregistered\ software from R} +\label{app:createWordTable} +%------------------------------------------------------------ +There are a few steps that are required in order to create a table in Microsoft\textregistered\ software (Excel, Word, PowerPoint, etc.) from an R dataframe. There are certainly a variety of good methods, one of which is detailed here. The example we will step through here will be to create a table in Microsoft Excel based on the dataframe tableData: + +<<label=getSiteApp, echo=TRUE>>= +availableData <- getNWISDataAvailability(siteNumber) +dailyData <- availableData["dv" == availableData$service,] +dailyData <- dailyData["00003" == dailyData$statCd,] + +tableData <- with(dailyData, + data.frame( + shortName=srsname, + Start=startDate, + End=endDate, + Count=count, + Units=parameter_units) + ) +tableData +@ + +First, save the dataframe as a tab delimited file (you don't want to use comma delimited because there are commas in some of the data elements): + + +<<label=saveData, echo=TRUE, eval=FALSE>>= +write.table(tableData, file="tableData.tsv",sep="\t", + row.names = FALSE,quote=FALSE) +@ + +This will save a file in your working directory called tableData.tsv. You can see your working directory by typing getwd() in the R console. Opening the file in a general-purpose text editor, you should see the following: + +\begin{verbatim} +shortName Start End Count Units +Temperature, water 2010-10-01 2012-06-24 575 deg C +Stream flow, mean. daily 1948-01-01 2013-03-13 23814 ft3/s +Specific conductance 2010-10-01 2012-06-24 551 uS/cm @25C +Suspended sediment concentration (SSC) 1980-10-01 1991-09-30 3651 mg/l +Suspended sediment discharge 1980-10-01 1991-09-30 3652 tons/day +\end{verbatim} + +Next, follow the steps below to open this file in Excel: +\begin{enumerate} +\item Open Excel +\item Click on the File tab +\item Click on the Open option +\item Navigate to the working directory (as shown in the results of \texttt{getwd()}) +\item Next to the File name text box, change the dropdown type to All Files (*.*) +\item Double click tableData.tsv +\item A text import wizard will open up, in the first window, choose the Delimited radio button if it is not automatically picked, then click on Next. +\item In the second window, click on the Tab delimiter if it is not automatically checked, then click Finished. +\item Use the many formatting tools within Excel to customize the table +\end{enumerate} + +From Excel, it is simple to copy and paste the tables in other Microsoft\textregistered\ software. An example using one of the default Excel table formats is here. + +\begin{figure}[ht!] +\centering + \resizebox{0.9\textwidth}{!}{\includegraphics{table1.png}} +\caption{A simple table produced in Microsoft\textregistered\ Excel. Additional formatting will be requried, for example converting u to $\mu$ } +\label{overflow} +\end{figure} + +\clearpage + +%------------------------------------- +\section{Disclaimer} +%------------------------------------ +This information is preliminary and is subject to revision. It is being provided to meet the need for timely best science. The information is provided on the condition that neither the U.S. Geological Survey nor the U.S. Government may be held liable for any damages resulting from the authorized or unauthorized use of the information. + + +\end{document} diff --git a/vignettes/functionOrg.png b/vignettes/functionOrg.png new file mode 100644 index 0000000000000000000000000000000000000000..045b7f4aa831ef524dc8c0e600f1cd0c258e4b9c Binary files /dev/null and b/vignettes/functionOrg.png differ diff --git a/vignettes/table1.png b/vignettes/table1.png new file mode 100644 index 0000000000000000000000000000000000000000..7749eaea99398767a012bccae4508e94e49362e7 Binary files /dev/null and b/vignettes/table1.png differ