dataRetrieval.Rnw 41.2 KB
Newer Older
Laura A DeCicco's avatar
Laura A DeCicco committed
1
%\VignetteIndexEntry{Introduction to the dataRetrieval package}
Laura A DeCicco's avatar
Laura A DeCicco committed
2
3
%\VignetteEngine{knitr::knitr}
%\VignetteDepends{}
4
%\VignetteSuggests{xtable, testthat}
5
%\VignetteImports{XML, httr, reshape2,lubridate,utils,stats}
Laura A DeCicco's avatar
Laura A DeCicco committed
6
%\VignettePackage{dataRetrieval}
Laura A DeCicco's avatar
Laura A DeCicco committed
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142

\documentclass[a4paper,11pt]{article}

\usepackage{amsmath}
\usepackage{times}
\usepackage{hyperref}
\usepackage[numbers, round]{natbib}
\usepackage[american]{babel}
\usepackage{authblk}
\usepackage{subfig}
\usepackage{placeins}
\usepackage{footnote}
\usepackage{tabularx}
\usepackage{threeparttable}
\usepackage{parskip}

\usepackage{csquotes}
\usepackage{setspace}

% \doublespacing

\renewcommand{\topfraction}{0.85}
\renewcommand{\textfraction}{0.1}
\usepackage{graphicx}


\usepackage{mathptmx}% Times Roman font
\usepackage[scaled=.90]{helvet}% Helvetica, served as a model for arial

% \usepackage{indentfirst}
% \setlength\parindent{20pt}
\setlength{\parskip}{0pt}

\usepackage{courier}

\usepackage{titlesec}
\usepackage{titletoc}

\titleformat{\section}
  {\normalfont\sffamily\bfseries\LARGE}
  {\thesection}{0.5em}{}
\titleformat{\subsection}
  {\normalfont\sffamily\bfseries\Large}
  {\thesubsection}{0.5em}{}
\titleformat{\subsubsection}
  {\normalfont\sffamily\large}
  {\thesubsubsection}{0.5em}{}
  
\titlecontents{section}
[2em]                 % adjust left margin
{\sffamily}             % font formatting
{\contentslabel{2.3em}} % section label and offset
{\hspace*{-2.3em}}
{\titlerule*[0.25pc]{.}\contentspage}
  
\titlecontents{subsection}
[4.6em]                 % adjust left margin
{\sffamily}             % font formatting
{\contentslabel{2.3em}} % section label and offset
{\hspace*{-2.3em}}
{\titlerule*[0.25pc]{.}\contentspage}
  
\titlecontents{subsubsection}
[6.9em]                 % adjust left margin
{\sffamily}             % font formatting
{\contentslabel{2.3em}} % section label and offset
{\hspace*{-2.3em}}
{\titlerule*[0.25pc]{.}\contentspage}

\titlecontents{table}
[0em]                 % adjust left margin
{\sffamily}             % font formatting
{Table\hspace*{2em} \contentslabel {2em}} % section label and offset
{\hspace*{4em}}
{\titlerule*[0.25pc]{.}\contentspage}

\titlecontents{figure}
[0em]                 % adjust left margin
{\sffamily}             % font formatting
{Figure\hspace*{2em} \contentslabel {2em}} % section label and offset
{\hspace*{4em}}
{\titlerule*[0.25pc]{.}\contentspage}

%Italisize and change font of urls:
\urlstyle{sf}
\renewcommand\UrlFont\itshape

\usepackage{caption}
\captionsetup{
  font={sf},
  labelfont={bf,sf},
  labelsep=period,
  justification=justified,
  singlelinecheck=false
}



\textwidth=6.2in
\textheight=8.5in
\parskip=.3cm
\oddsidemargin=.1in
\evensidemargin=.1in
\headheight=-.3in


%------------------------------------------------------------
% newcommand
%------------------------------------------------------------
\newcommand{\scscst}{\scriptscriptstyle}
\newcommand{\scst}{\scriptstyle}
\newcommand{\Robject}[1]{{\texttt{#1}}}
\newcommand{\Rfunction}[1]{{\texttt{#1}}}
\newcommand{\Rclass}[1]{\textit{#1}}
\newcommand{\Rpackage}[1]{\textit{#1}}
\newcommand{\Rexpression}[1]{\texttt{#1}}
\newcommand{\Rmethod}[1]{{\texttt{#1}}}
\newcommand{\Rfunarg}[1]{{\texttt{#1}}}

\begin{document}

<<openLibrary, echo=FALSE>>=
library(xtable)
options(continue=" ")
options(width=60)
library(knitr)

@

\renewenvironment{knitrout}{\begin{singlespace}}{\end{singlespace}}
\renewcommand*\listfigurename{Figures}

\renewcommand*\listtablename{Tables}


%------------------------------------------------------------
Laura A DeCicco's avatar
Laura A DeCicco committed
143
\title{The dataRetrieval R package}
Laura A DeCicco's avatar
Laura A DeCicco committed
144
%------------------------------------------------------------
Laura A DeCicco's avatar
Laura A DeCicco committed
145
146
\author[1]{Laura A. De Cicco}
\author[1]{Robert M. Hirsch}
Laura A DeCicco's avatar
Laura A DeCicco committed
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
\affil[1]{United States Geological Survey}


<<include=TRUE ,echo=FALSE,eval=TRUE>>=
opts_chunk$set(highlight=TRUE, tidy=TRUE, keep.space=TRUE, keep.blank.space=FALSE, keep.comment=TRUE, tidy=FALSE,comment="")
knit_hooks$set(inline = function(x) {
   if (is.numeric(x)) round(x, 3)})
knit_hooks$set(crop = hook_pdfcrop)

bold.colHeaders <- function(x) {
  x <- gsub("\\^(\\d)","$\\^\\1$",x)
  x <- gsub("\\%","\\\\%",x)
  x <- gsub("\\_"," ",x)
  returnX <- paste("\\multicolumn{1}{c}{\\textbf{\\textsf{", x, "}}}", sep = "")
}
addSpace <- function(x) ifelse(x != "1", "[5pt]","")
Laura A DeCicco's avatar
Laura A DeCicco committed
163
library(dataRetrieval)
Laura A DeCicco's avatar
Laura A DeCicco committed
164
165
@

Laura A DeCicco's avatar
Laura A DeCicco committed
166
\noindent{\huge\textsf{\textbf{The dataRetrieval R package}}}
Laura A DeCicco's avatar
Laura A DeCicco committed
167

Laura A DeCicco's avatar
Laura A DeCicco committed
168
\noindent\textsf{By Laura A. De Cicco and Robert M. Hirsch}
Laura A DeCicco's avatar
Laura A DeCicco committed
169
170
171
172
173
174
175
176
177
178
179
180
181
182

\noindent\textsf{\today}

% \maketitle
% 
% \newpage 

\tableofcontents
\listoffigures
\listoftables

\newpage

%------------------------------------------------------------
Laura A DeCicco's avatar
Laura A DeCicco committed
183
\section{Introduction to dataRetrieval}
Laura A DeCicco's avatar
Laura A DeCicco committed
184
%------------------------------------------------------------ 
Laura A DeCicco's avatar
Laura A DeCicco committed
185
The dataRetrieval package was created to simplify the process of loading hydrologic data into the R environment. It is designed to retrieve the major data types of U.S. Geological Survey (USGS) hydrologic data that are available on the Web, as well as data from the Water Quality Portal (WQP), which currently houses water quality data from the Environmental Protection Agency (EPA), U.S. Department of Agriculture (USDA), and USGS. Direct USGS data is obtained from a service called the National Water Information System (NWIS). 
Laura A DeCicco's avatar
Laura A DeCicco committed
186
187
188

For information on getting started in R and installing the package, see (\ref{sec:appendix1}): Getting Started. Any use of trade, firm, or product names is for descriptive purposes only and does not imply endorsement by the U.S. Government.

Laura A DeCicco's avatar
Laura A DeCicco committed
189
A quick workflow for USGS dataRetrieval functions:
Laura A DeCicco's avatar
Laura A DeCicco committed
190
191

<<workflow, echo=TRUE,eval=FALSE>>=
Laura A DeCicco's avatar
Laura A DeCicco committed
192
library(dataRetrieval)
Laura A DeCicco's avatar
Laura A DeCicco committed
193
194
195
196
197
198
199
200
201
202
203
204
205
206
# Choptank River near Greensboro, MD
siteNumber <- "01491000" 
ChoptankInfo <- readNWISsite(siteNumber)
parameterCd <- "00060"

#Raw daily data:
rawDailyData <- readNWISdv(siteNumber,parameterCd,
                      "1980-01-01","2010-01-01")

# Sample data Nitrate:
parameterCd <- "00618"
qwData <- readNWISqw(siteNumber,parameterCd,
                      "1980-01-01","2010-01-01")

207
208
pCode <- readNWISpCode(parameterCd)

Laura A DeCicco's avatar
Laura A DeCicco committed
209
210
@

Laura A DeCicco's avatar
Laura A DeCicco committed
211
USGS data are made available through the National Water Information System (NWIS).
212

Laura A DeCicco's avatar
Laura A DeCicco committed
213
Table \ref{tab:func} describes the functions available in the dataRetrieval package.
214
215
216
217

\begin{table}[!ht]
\begin{minipage}{\linewidth}
{\footnotesize
Laura A DeCicco's avatar
Laura A DeCicco committed
218
\caption{dataRetrieval functions} 
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
\label{tab:func}
\begin{tabular}{lll}
  \hline
\multicolumn{1}{c}{\textbf{\textsf{Function Name}}} &
\multicolumn{1}{c}{\textbf{\textsf{Arguments}}}  &
\multicolumn{1}{c}{\textbf{\textsf{Description}}} \\  [0pt]
  \hline
  \texttt{readNWISdata} &  \texttt{...} & NWIS data using user-specified queries\\
   & service & \\
  [5pt]\texttt{readNWISdv} & siteNumber & NWIS daily data\\
  & parameterCd & \\
  & startDate & \\
  & endDate & \\
  & statCd & \\
  [5pt]\texttt{readNWISqw} & siteNumber & NWIS water quality data\\
    & parameterCd & \\
  & startDate & \\
  & endDate & \\
  & expanded & \\
David Watkins's avatar
David Watkins committed
238
  [5pt]\texttt{readNWISuv} & siteNumber & NWIS instantaneous value data\\
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
  & parameterCd & \\
  & startDate & \\
  & endDate & \\
  [5pt]\texttt{readNWISrating} & siteNumber & NWIS rating table for active streamgage \\
  & type & \\
  [5pt]\texttt{readNWISmeas} & siteNumber & NWIS surface-water measurements \\
  & startDate & \\
  & endDate & \\
  [5pt]\texttt{readNWISpeak} & siteNumber & NWIS peak flow data \\
  & startDate & \\
  & endDate & \\
  [5pt]\texttt{readNWISgwl} & siteNumber & NWIS groundwater level measurements \\
  & startDate & \\
  & endDate & \\  
  [5pt]\texttt{readNWISpCode} & parameterCd & NWIS parameter code information\\
  [5pt]\texttt{readNWISsite} & siteNumber & NWIS site information \\
  [5pt]\texttt{whatNWISsites} & \texttt{...} & NWIS site search using user-specified queries \\
  [5pt]\texttt{whatNWISdata} & siteNumber & NWIS data availability, including period of record and count \\ 
   & service & \\
   [5pt]\texttt{readWQPdata} & \texttt{...} & WQP data using user-specified queries \\
   [5pt]\texttt{readWQPqw} & siteNumber & WQP data \\
     & parameterCd (or characteristic name) & \\
  & startDate & \\
  & endDate & \\
  [5pt]\texttt{whatWQPsites} & \texttt{...} & WQP site search using user-specified queries \\  
   \hline
\end{tabular}
}
\end{minipage}
\end{table}

\clearpage
Laura A DeCicco's avatar
Laura A DeCicco committed
271
272
273
274
275

%------------------------------------------------------------
\section{USGS Web Retrievals}
\label{sec:genRetrievals}
%------------------------------------------------------------ 
Laura A DeCicco's avatar
Laura A DeCicco committed
276
In this section, examples of Web retrievals document how to get raw data. This data includes site information (\ref{sec:usgsSite}), measured parameter information (\ref{sec:usgsParams}), historical daily values(\ref{sec:usgsDaily}), unit values (which include real-time data but can also include other sensor data stored at regular time intervals) (\ref{sec:usgsRT}), water quality data (\ref{sec:usgsWQP}), groundwater level data (\ref{sec:gwl}), peak flow data (\ref{sec:peak}), rating curve data (\ref{sec:rating}, and surface-water measurement data (\ref{sec:meas}). Section \ref{sec:metadata} shows instructions for getting metadata that is attached to each returned data frame.
Laura A DeCicco's avatar
Laura A DeCicco committed
277

278
The USGS organizes hydrologic data in a standard structure.  Streamgages are located throughout the United States, and each streamgage has a unique ID (referred in this document and throughout the dataRetrieval package as \enquote{siteNumber}).  Often (but not always), these ID's are 8 digits for surface-water sites and 15 digits for groundwater sites.  The first step to finding data is discovering this siteNumber. There are many ways to do this, one is the National Water Information System: Mapper \url{https://maps.waterdata.usgs.gov/mapper/index.html}.
Laura A DeCicco's avatar
Laura A DeCicco committed
279
280

Once the siteNumber is known, the next required input for USGS data retrievals is the \enquote{parameter code}.  This is a 5-digit code that specifies the measured parameter being requested.  For example, parameter code 00631 represents \enquote{Nitrate plus nitrite, water, filtered, milligrams per liter as nitrogen}, with units of \enquote{mg/l as N}. 
Laura A DeCicco's avatar
Laura A DeCicco committed
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302

Not every station will measure all parameters. A short list of commonly measured parameters is shown in Table \ref{tab:params}.

<<tableParameterCodes, echo=FALSE,results='asis'>>=
pCode <- c('00060', '00065', '00010','00045','00400')
shortName <- c("Discharge [ft$^3$/s]","Gage height [ft]","Temperature [C]", "Precipitation [in]", "pH")

data.df <- data.frame(pCode, shortName, stringsAsFactors=FALSE)

print(xtable(data.df,
       label="tab:params",
       caption="Common USGS Parameter Codes"),
       caption.placement="top",
       size = "\\footnotesize",
       latex.environment=NULL,
       sanitize.text.function = function(x) {x},
       sanitize.colnames.function =  bold.colHeaders,
       sanitize.rownames.function = addSpace
      )

@

Laura A DeCicco's avatar
Laura A DeCicco committed
303
304
305
306
307
308
309
Two output columns that may not be obvious are \enquote{srsname} and \enquote{casrn}. Srsname stands for \enquote{Substance Registry Services}. More information on the srs name can be found here:

\url{http://ofmpub.epa.gov/sor_internet/registry/substreg/home/overview/home.do}

Casrn stands for \enquote{Chemical Abstracts Service (CAS) Registry Number}. More information on CAS can be found here:

\url{http://www.cas.org/content/chemical-substances/faqs}
Laura A DeCicco's avatar
Laura A DeCicco committed
310

Laura A DeCicco's avatar
Laura A DeCicco committed
311
For unit values data (sensor data measured at regular time intervals such as 15 minutes or hourly), knowing the parameter code and siteNumber is enough to make a request for data.  For most variables that are measured on a continuous basis, the USGS also stores the historical data as daily values.  These daily values are statistical summaries of the continuous data, e.g. maximum, minimum, mean, or median. The different statistics are specified by a 5-digit statistics code.  
Laura A DeCicco's avatar
Laura A DeCicco committed
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331

Some common codes are shown in Table \ref{tab:stat}.

<<tableStatCodes, echo=FALSE,results='asis'>>=
StatCode <- c('00001', '00002', '00003','00008')
shortName <- c("Maximum","Minimum","Mean", "Median")

data.df <- data.frame(StatCode, shortName, stringsAsFactors=FALSE)

print(xtable(data.df,label="tab:stat",
           caption="Commonly used USGS Stat Codes"),
       caption.placement="top",
       size = "\\footnotesize",
       latex.environment=NULL,
       sanitize.colnames.function = bold.colHeaders,
       sanitize.rownames.function = addSpace
      )

@

Laura A DeCicco's avatar
Laura A DeCicco committed
332
Examples for using these siteNumbers, parameter codes, and stat codes will be presented in subsequent sections.
Laura A DeCicco's avatar
Laura A DeCicco committed
333
334
335
336
337
338
339
340
341
342
343
344

\FloatBarrier

%------------------------------------------------------------
\subsection{Site Information}
\label{sec:usgsSite}
%------------------------------------------------------------

%------------------------------------------------------------
\subsubsection{readNWISsite}
\label{sec:usgsSiteFileData}
%------------------------------------------------------------
Laura A DeCicco's avatar
Laura A DeCicco committed
345
Use the \texttt{readNWISsite} function to obtain all of the information available for a particular USGS site (or sites) such as full station name, drainage area, latitude, and longitude. \texttt{readNWISsite} can also access information about multiple sites with a vector input.
Laura A DeCicco's avatar
Laura A DeCicco committed
346
347


Laura A DeCicco's avatar
Laura A DeCicco committed
348
<<getSite, echo=TRUE, eval=FALSE>>=
Laura A DeCicco's avatar
Laura A DeCicco committed
349
350
351
352
siteNumbers <- c("01491000","01645000") 
siteINFO <- readNWISsite(siteNumbers)
@

Laura A DeCicco's avatar
Laura A DeCicco committed
353
Site information is obtained from:
354
\url{https://waterservices.usgs.gov/rest/Site-Test-Tool.html}
Laura A DeCicco's avatar
Laura A DeCicco committed
355

Laura A DeCicco's avatar
Laura A DeCicco committed
356
Information on the returned data can be found with the \texttt{comment} function as described in section \ref{sec:metadata}.
Laura A DeCicco's avatar
Laura A DeCicco committed
357

Laura A DeCicco's avatar
Laura A DeCicco committed
358
<<siteNames3, echo=TRUE, eval=FALSE>>=
Laura A DeCicco's avatar
Laura A DeCicco committed
359
360
361
362
363
comment(siteINFO)
@



Laura A DeCicco's avatar
Laura A DeCicco committed
364
365
366
\FloatBarrier

%------------------------------------------------------------
367
\subsubsection{whatNWISdata}
Laura A DeCicco's avatar
Laura A DeCicco committed
368
369
\label{sec:usgsDataAvailability}
%------------------------------------------------------------
Laura A DeCicco's avatar
Laura A DeCicco committed
370
371
To discover what data is available for a particular USGS site, including measured parameters, period of record, and number of samples (count), use the \texttt{whatNWISdata} function. It is possible to limit the retrieval information to a subset of services. The possible choices for services are: \texttt{"}dv\texttt{"} (daily values), \texttt{"}uv\texttt{"}, \texttt{"}rt\texttt{"}, or \texttt{"}iv\texttt{"} (unit values), \texttt{"}qw\texttt{"} (water-quality), \texttt{"}sv\texttt{"} (sites visits), \texttt{"}pk\texttt{"} (peak measurements), \texttt{"}gw\texttt{"} (groundwater levels), \texttt{"}ad\texttt{"} (sites included in USGS Annual Water Data Reports External Link), \texttt{"}aw\texttt{"} (sites monitored by the USGS Active Groundwater Level Network External Link), and \texttt{"}id\texttt{"} (historical instantaneous values).

Laura A DeCicco's avatar
Laura A DeCicco committed
372
In the following example, we limit the retrieved data to only daily data. The default for \texttt{"}service\texttt{"} is \enquote{all}, which returns all of the available data for that site. Likewise, there are arguments for parameter code (\texttt{parameterCd}) and statistic code (\texttt{statCd}) to filter the results. The default for both is to return all possible values (\enquote{all}). The returned \texttt{"}count\_nu\texttt{"} for \texttt{"}uv\texttt{"} data is the count of days with returned data, not the actual count of returned values.
Laura A DeCicco's avatar
Laura A DeCicco committed
373
374


Laura A DeCicco's avatar
Laura A DeCicco committed
375
<<getSiteExtended, echo=TRUE, eval=FALSE>>=
Laura A DeCicco's avatar
Laura A DeCicco committed
376
# Continuing from the previous example:
Laura A DeCicco's avatar
Laura A DeCicco committed
377
# This pulls out just the daily, mean data:
Laura A DeCicco's avatar
Laura A DeCicco committed
378

379
dailyDataAvailable <- whatNWISdata(siteNumbers,
Laura A DeCicco's avatar
Laura A DeCicco committed
380
381
                    service="dv", statCd="00003")

Laura A DeCicco's avatar
Laura A DeCicco committed
382
383
384

@

Laura A DeCicco's avatar
Laura A DeCicco committed
385
<<tablegda, echo=FALSE,eval=FALSE>>=
Laura A DeCicco's avatar
Laura A DeCicco committed
386
387
388
389
tableData <- with(dailyDataAvailable, 
      data.frame( 
      siteNumber= site_no,
      srsname=srsname, 
Laura A DeCicco's avatar
Laura A DeCicco committed
390
391
392
      startDate=as.character(begin_date), 
      endDate=as.character(end_date), 
      count=as.character(count_nu),
Laura A DeCicco's avatar
Laura A DeCicco committed
393
      units=parameter_units,
Laura A DeCicco's avatar
Laura A DeCicco committed
394
#       statCd = stat_cd,
Laura A DeCicco's avatar
Laura A DeCicco committed
395
396
397
398
399
400
401
402
      stringsAsFactors=FALSE)
      )

tableData$units[which(tableData$units == "ft3/s")] <- "ft$^3$/s"
tableData$units[which(tableData$units == "uS/cm @25C")] <- "$\\mu$S/cm @25C"


print(xtable(tableData,label="tab:gda",
Laura A DeCicco's avatar
Laura A DeCicco committed
403
    caption="Reformatted version of output from \\texttt{whatNWISdata} function for the Choptank River near Greensboro, MD, and from Seneca Creek at Dawsonville, MD from the daily values service [Some columns deleted for space considerations]"),
Laura A DeCicco's avatar
Laura A DeCicco committed
404
405
406
407
408
409
410
411
412
413
       caption.placement="top",
       size = "\\footnotesize",
       latex.environment=NULL,
       sanitize.text.function = function(x) {x},
       sanitize.colnames.function =  bold.colHeaders,
       sanitize.rownames.function = addSpace
      )

@

Laura A DeCicco's avatar
Laura A DeCicco committed
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
\begin{table}[ht]
\caption{Reformatted version of output from \texttt{whatNWISdata} function for the Choptank River near Greensboro, MD, and from Seneca Creek at Dawsonville, MD from the daily values service [Some columns deleted for space considerations]} 
\label{tab:gda}
{\footnotesize
\begin{tabular}{rllllll}
  \hline
 & \multicolumn{1}{c}{\textbf{\textsf{siteNumber}}} & \multicolumn{1}{c}{\textbf{\textsf{srsname}}} & \multicolumn{1}{c}{\textbf{\textsf{startDate}}} & \multicolumn{1}{c}{\textbf{\textsf{endDate}}} & \multicolumn{1}{c}{\textbf{\textsf{count}}} & \multicolumn{1}{c}{\textbf{\textsf{units}}} \\ 
  \hline
 & 01491000 & Temperature, water & 2010-10-01 & 2012-05-09 & 529 & deg C \\ 
  [5pt] & 01645000 & Stream flow, mean. daily & 1930-09-26 & 2015-02-19 & 30828 & ft$^3$/s \\ 
  [5pt] & 01491000 & Stream flow, mean. daily & 1948-01-01 & 2015-02-19 & 24522 & ft$^3$/s \\ 
  [5pt] & 01491000 & Specific conductance & 2010-10-01 & 2012-05-09 & 527 & $\mu$S/cm @25C \\ 
  [5pt] & 01491000 & Suspended sediment concentration (SSC) & 1980-10-01 & 1991-09-30 & 3651 & mg/l \\ 
  [5pt] & 01491000 & Suspended sediment discharge & 1980-10-01 & 1991-09-30 & 3652 & tons/day \\ 
   \hline
\end{tabular}
}
\end{table}
Laura A DeCicco's avatar
Laura A DeCicco committed
432

Laura A DeCicco's avatar
Laura A DeCicco committed
433
See Section \ref{app:createWordTable} for instructions on converting an R data frame to a table in Microsoft\textregistered\ software Excel or Word to display a data availability table similar to Table \ref{tab:gda}. Excel, Microsoft, PowerPoint, Windows, and Word are registered trademarks of Microsoft Corporation in the United States and other countries.
Laura A DeCicco's avatar
Laura A DeCicco committed
434
435
436
437
438
439
440
441
442

\FloatBarrier

%------------------------------------------------------------
\subsection{Parameter Information}
\label{sec:usgsParams}
%------------------------------------------------------------
To obtain all of the available information concerning a measured parameter (or multiple parameters), use the \texttt{readNWISpCode} function:

Laura A DeCicco's avatar
Laura A DeCicco committed
443
<<label=getPCodeInfo, echo=TRUE, eval=FALSE>>=
Laura A DeCicco's avatar
Laura A DeCicco committed
444
445
446
447
448
# Using defaults:
parameterCd <- "00618" 
parameterINFO <- readNWISpCode(parameterCd)
@

Laura A DeCicco's avatar
Laura A DeCicco committed
449

Laura A DeCicco's avatar
Laura A DeCicco committed
450
\FloatBarrier
Laura A DeCicco's avatar
Laura A DeCicco committed
451

Laura A DeCicco's avatar
Laura A DeCicco committed
452
%------------------------------------------------------------
Laura A DeCicco's avatar
Laura A DeCicco committed
453
\subsection{Daily Data}
Laura A DeCicco's avatar
Laura A DeCicco committed
454
455
\label{sec:usgsDaily}
%------------------------------------------------------------
456
To obtain daily records of USGS data, use the \texttt{readNWISdv} function. The arguments for this function are siteNumber, parameterCd, startDate, endDate, and statCd (defaults to \texttt{"}00003\texttt{"}).  If you want to use the default values, you do not need to list them in the function call. Daily data is pulled from \url{https://waterservices.usgs.gov/rest/DV-Test-Tool.html}.
Laura A DeCicco's avatar
Laura A DeCicco committed
457
458
459

The dates (start and end) must be in the format \texttt{"}YYYY-MM-DD\texttt{"} (note: the user must include the quotes).  Setting the start date to \texttt{"}\texttt{"} (no space) will prompt the program to ask for the earliest date, and setting the end date to \texttt{"}\texttt{"} (no space) will prompt for the latest available date.

Laura A DeCicco's avatar
Laura A DeCicco committed
460
<<label=getNWISDaily, echo=TRUE, eval=FALSE>>=
Laura A DeCicco's avatar
Laura A DeCicco committed
461

Laura A DeCicco's avatar
Laura A DeCicco committed
462
# Choptank River near Greensboro, MD:
Laura A DeCicco's avatar
Laura A DeCicco committed
463
464
siteNumber <- "01491000"
parameterCd <- "00060"  # Discharge
Laura A DeCicco's avatar
Laura A DeCicco committed
465
466
startDate <- "2009-10-01"  
endDate <- "2012-09-30" 
Laura A DeCicco's avatar
Laura A DeCicco committed
467
468
469
470
471

discharge <- readNWISdv(siteNumber, 
                    parameterCd, startDate, endDate)
@

Laura A DeCicco's avatar
Laura A DeCicco committed
472
The column \texttt{"}datetime\texttt{"} in the returned data frame is automatically imported as a variable of class \texttt{"}Date\texttt{"} in R. Each requested parameter has a value and remark code column.  The names of these columns depend on the requested parameter and stat code combinations. USGS daily value qualification codes are often \texttt{"}A\texttt{"} (approved for publication) or \texttt{"}P\texttt{"} (provisional data subject to revision). 
Laura A DeCicco's avatar
Laura A DeCicco committed
473

Laura A DeCicco's avatar
Laura A DeCicco committed
474
Another example would be a request for mean and maximum daily temperature and discharge in early 2012:
Laura A DeCicco's avatar
Laura A DeCicco committed
475

Laura A DeCicco's avatar
Laura A DeCicco committed
476
477
<<label=getNWIStemperature, echo=TRUE, eval=FALSE>>=
siteNumber <- "01491000"
Laura A DeCicco's avatar
Laura A DeCicco committed
478
479
480
481
482
483
484
485
486
487
parameterCd <- c("00010","00060")  # Temperature and discharge
statCd <- c("00001","00003")  # Mean and maximum
startDate <- "2012-01-01"
endDate <- "2012-05-01"

temperatureAndFlow <- readNWISdv(siteNumber, parameterCd, 
        startDate, endDate, statCd=statCd)

@

Laura A DeCicco's avatar
Laura A DeCicco committed
488
489
490
491
492
493
494
<<label=getNWIStemperature2, echo=FALSE, eval=TRUE>>=
filePath <- system.file("extdata", package="dataRetrieval")
fileName <- "temperatureAndFlow.RData"
fullPath <- file.path(filePath, fileName)
load(fullPath)

@
Laura A DeCicco's avatar
Laura A DeCicco committed
495

Laura A DeCicco's avatar
Laura A DeCicco committed
496
497
The column names can be shortened and simplified using the \texttt{renameNWISColumns} function.  This is not necessary, but may streamline subsequent data analysis and presentation. Site information, daily statistic information, and measured parameter information is attached to the data frame as attributes. This is discused further in section \ref{sec:metadata}.

Laura A DeCicco's avatar
Laura A DeCicco committed
498
499
500
501

<<label=renameColumns, echo=TRUE>>=
names(temperatureAndFlow)

Laura A DeCicco's avatar
Laura A DeCicco committed
502
temperatureAndFlow <- renameNWISColumns(temperatureAndFlow)
Laura A DeCicco's avatar
Laura A DeCicco committed
503
names(temperatureAndFlow)
Laura A DeCicco's avatar
Laura A DeCicco committed
504
505
506
507
508
509
510
511
512
513
514

@

<<label=attr1, echo=TRUE>>=
#Information about the data frame attributes:
names(attributes(temperatureAndFlow))

statInfo <- attr(temperatureAndFlow, "statisticInfo")
variableInfo <- attr(temperatureAndFlow, "variableInfo")
siteInfo <- attr(temperatureAndFlow, "siteInfo")

Laura A DeCicco's avatar
Laura A DeCicco committed
515
516
@

Laura A DeCicco's avatar
Laura A DeCicco committed
517
518


Laura A DeCicco's avatar
Laura A DeCicco committed
519
520
521
An example of plotting the above data (Figure \ref{fig:getNWIStemperaturePlot}):

<<getNWIStemperaturePlot, echo=TRUE, fig.cap="Temperature and discharge plot of Choptank River in 2012.",out.width='1\\linewidth',out.height='1\\linewidth',fig.show='hold'>>=
Laura A DeCicco's avatar
Laura A DeCicco committed
522
523
524
variableInfo <- attr(temperatureAndFlow, "variableInfo")
siteInfo <- attr(temperatureAndFlow, "siteInfo")

Laura A DeCicco's avatar
Laura A DeCicco committed
525
526
par(mar=c(5,5,5,5)) #sets the size of the plot window

Laura A DeCicco's avatar
Laura A DeCicco committed
527
plot(temperatureAndFlow$Date, temperatureAndFlow$Wtemp_Max,
Laura A DeCicco's avatar
Laura A DeCicco committed
528
  ylab=variableInfo$parameter_desc[1],xlab="" )
Laura A DeCicco's avatar
Laura A DeCicco committed
529
par(new=TRUE)
Laura A DeCicco's avatar
Laura A DeCicco committed
530
plot(temperatureAndFlow$Date, temperatureAndFlow$Flow,
Laura A DeCicco's avatar
Laura A DeCicco committed
531
  col="red",type="l",xaxt="n",yaxt="n",xlab="",ylab="",axes=FALSE
Laura A DeCicco's avatar
Laura A DeCicco committed
532
  )
Laura A DeCicco's avatar
Laura A DeCicco committed
533
axis(4,col="red",col.axis="red")
Laura A DeCicco's avatar
Laura A DeCicco committed
534
535
536
mtext(variableInfo$parameter_desc[2],side=4,line=3,col="red")
title(paste(siteInfo$station_nm,"2012"))
legend("topleft", variableInfo$param_units, 
Laura A DeCicco's avatar
Laura A DeCicco committed
537
538
539
540
541
542
543
544
545
       col=c("black","red"),lty=c(NA,1),pch=c(1,NA))
@


There are occasions where NWIS values are not reported as numbers, instead there might be text describing a certain event such as \enquote{Ice.}  Any value that cannot be converted to a number will be reported as NA in this package (not including remark code columns).

\FloatBarrier

%------------------------------------------------------------
Laura A DeCicco's avatar
Laura A DeCicco committed
546
\subsection{Unit Data}
Laura A DeCicco's avatar
Laura A DeCicco committed
547
548
\label{sec:usgsRT}
%------------------------------------------------------------
Laura A DeCicco's avatar
Laura A DeCicco committed
549
Any data collected at regular time intervals (such as 15-minute or hourly) are known as \enquote{unit values.} Many of these are delivered on a real time basis and very recent data (even less than an hour old in many cases) are available through the function \texttt{readNWISuv}.  Some of these unit values are available for many years, and some are only available for a recent time period such as 120 days.  Here is an example of a retrieval of such data.  
Laura A DeCicco's avatar
Laura A DeCicco committed
550

Laura A DeCicco's avatar
Laura A DeCicco committed
551
<<label=readNWISuv, eval=FALSE>>=
Laura A DeCicco's avatar
Laura A DeCicco committed
552
553
554
555

parameterCd <- "00060"  # Discharge
startDate <- "2012-05-12" 
endDate <- "2012-05-13" 
Laura A DeCicco's avatar
Laura A DeCicco committed
556
dischargeUnit <- readNWISuv(siteNumber, parameterCd, 
Laura A DeCicco's avatar
Laura A DeCicco committed
557
        startDate, endDate)
Laura A DeCicco's avatar
Laura A DeCicco committed
558
dischargeUnit <- renameNWISColumns(dischargeUnit)
Laura A DeCicco's avatar
Laura A DeCicco committed
559
560
@

Laura A DeCicco's avatar
Laura A DeCicco committed
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
The retrieval produces a data frame that contains 96 rows (one for every 15 minute period in the day).  They include all data collected from the startDate through the endDate (starting and ending with midnight locally-collected time). The dateTime column is converted to \enquote{UTC} (Coordinated Universal Time), so midnight EST will be 5 hours earlier in the dateTime column (the previous day, at 7pm).

To override the UTC timezone, specify a valid timezone in the tz argument. Default is \texttt{""}, which will keep the dateTime column in UTC. Other valid timezones are:

\begin{verbatim}
America/New_York
America/Chicago
America/Denver
America/Los_Angeles
America/Anchorage
America/Honolulu
America/Jamaica
America/Managua
America/Phoenix
America/Metlakatla
\end{verbatim}
577

578
Data are retrieved from \url{https://waterservices.usgs.gov/rest/IV-Test-Tool.html}. There are occasions where NWIS values are not reported as numbers, instead a common example is \enquote{Ice.}  Any value that cannot be converted to a number will be reported as NA in this package. Site information and measured parameter information is attached to the data frame as attributes. This is discused further in section \ref{sec:metadata}.
Laura A DeCicco's avatar
Laura A DeCicco committed
579
580
581
582
583
584
585

\newpage


\FloatBarrier

%------------------------------------------------------------
Laura A DeCicco's avatar
Laura A DeCicco committed
586
\subsection{Water Quality Data}
Laura A DeCicco's avatar
Laura A DeCicco committed
587
588
\label{sec:usgsWQP}
%------------------------------------------------------------
Laura A DeCicco's avatar
Laura A DeCicco committed
589
To get USGS water quality data from water samples collected at the streamgage or other monitoring site (as distinct from unit values collected through some type of automatic monitor) we can use the function \texttt{readNWISqw}, with the input arguments: siteNumber, parameterCd, startDate, and endDate. Additionally, the argument \texttt{"}expanded\texttt{"} is a logical input that allows the user to choose between a simple return of datetimes/qualifier/values (expanded=FALSE), or a more complete and verbose output (expanded=TRUE). Expanded = TRUE includes such columns as remark codes, value qualifying text, and detection level for each parameter code. There also includes an argument \texttt{"}reshape\texttt{"}, that converts the expanded dataset to a \texttt{"}wide\texttt{"} format (each requested parameter code gets individual columns). The defaults are expanded=TRUE, and reshape=FALSE. 
Laura A DeCicco's avatar
Laura A DeCicco committed
590

Laura A DeCicco's avatar
Laura A DeCicco committed
591
<<label=getQW, echo=TRUE, eval=FALSE>>=
Laura A DeCicco's avatar
Laura A DeCicco committed
592
593
594
595
596
597
 
# Dissolved Nitrate parameter codes:
parameterCd <- c("00618","71851")
startDate <- "1985-10-01"
endDate <- "2012-09-30"

Laura A DeCicco's avatar
Laura A DeCicco committed
598
dfLong <- readNWISqw(siteNumber, parameterCd, 
Laura A DeCicco's avatar
Laura A DeCicco committed
599
      startDate, endDate)
Laura A DeCicco's avatar
Laura A DeCicco committed
600

Laura A DeCicco's avatar
Laura A DeCicco committed
601
602
# Or the wide return:
# dfWide <- readNWISqw(siteNumber, parameterCd, 
Laura A DeCicco's avatar
Laura A DeCicco committed
603
#       startDate, endDate, reshape=TRUE)
Laura A DeCicco's avatar
Laura A DeCicco committed
604
605
606

@

Laura A DeCicco's avatar
Laura A DeCicco committed
607
Site information and measured parameter information is attached to the data frame as attributes. This is discused further in section \ref{sec:metadata}. Additional metadata, such as information about the column names can be found by using the \texttt{comment} function, also described in section \ref{sec:metadata}.
Laura A DeCicco's avatar
Laura A DeCicco committed
608

Laura A DeCicco's avatar
Laura A DeCicco committed
609
<<qwmeta, echo=TRUE, eval=FALSE>>=
Laura A DeCicco's avatar
Laura A DeCicco committed
610

Laura A DeCicco's avatar
Laura A DeCicco committed
611
comment(dfLong)
Laura A DeCicco's avatar
Laura A DeCicco committed
612

Laura A DeCicco's avatar
Laura A DeCicco committed
613
614
615
616
@

\FloatBarrier

Laura A DeCicco's avatar
Laura A DeCicco committed
617
%------------------------------------------------------------
Laura A DeCicco's avatar
Laura A DeCicco committed
618
\subsection{Groundwater Level Data}
Laura A DeCicco's avatar
Laura A DeCicco committed
619
620
\label{sec:gwl}
%------------------------------------------------------------
Laura A DeCicco's avatar
Laura A DeCicco committed
621
Groundwater level measurements can be obtained with the \texttt{readNWISgwl} function. Information on the returned data can be found with the \texttt{comment} function, and attached attributes as described in section \ref{sec:metadata}.
Laura A DeCicco's avatar
Laura A DeCicco committed
622

Laura A DeCicco's avatar
Laura A DeCicco committed
623
<<gwlexample, echo=TRUE, eval=FALSE>>=
Laura A DeCicco's avatar
Laura A DeCicco committed
624
siteNumber <- "434400121275801"
Laura A DeCicco's avatar
Laura A DeCicco committed
625
groundWater <- readNWISgwl(siteNumber)
Laura A DeCicco's avatar
Laura A DeCicco committed
626
@
Laura A DeCicco's avatar
Laura A DeCicco committed
627
628

%------------------------------------------------------------
Laura A DeCicco's avatar
Laura A DeCicco committed
629
\subsection{Peak Flow Data}
Laura A DeCicco's avatar
Laura A DeCicco committed
630
631
632
\label{sec:peak}
%------------------------------------------------------------

Laura A DeCicco's avatar
Laura A DeCicco committed
633
Peak flow data are instantaneous discharge or stage data that record the maximum values of these variables during a flood event.  They include the annual peak flood event but can also include records of other peaks that are lower than the annual maximum. Peak discharge measurements can be obtained with the \texttt{readNWISpeak} function. Information on the returned data can be found with the \texttt{comment} function and attached attributes as described in section \ref{sec:metadata}.
Laura A DeCicco's avatar
Laura A DeCicco committed
634

Laura A DeCicco's avatar
Laura A DeCicco committed
635
<<peakexample, echo=TRUE, eval=FALSE>>=
Laura A DeCicco's avatar
Laura A DeCicco committed
636
637
638
639
640
641
siteNumber <- '01594440'
peakData <- readNWISpeak(siteNumber)

@


Laura A DeCicco's avatar
Laura A DeCicco committed
642
%------------------------------------------------------------
Laura A DeCicco's avatar
Laura A DeCicco committed
643
\subsection{Rating Curve Data}
Laura A DeCicco's avatar
Laura A DeCicco committed
644
645
\label{sec:rating}
%------------------------------------------------------------
Laura A DeCicco's avatar
Laura A DeCicco committed
646
Rating curves are the calibration curves that are used to convert measurements of stage to discharge.  Because of changing hydrologic conditions these rating curves change over time. Information on the returned data can be found with the \texttt{comment} function and attached attributes as described in section \ref{sec:metadata}.
Laura A DeCicco's avatar
Laura A DeCicco committed
647

Laura A DeCicco's avatar
Laura A DeCicco committed
648
649
Rating curves can be obtained with the \texttt{readNWISrating} function.

Laura A DeCicco's avatar
Laura A DeCicco committed
650
<<ratingexample, echo=TRUE, eval=FALSE>>=
Laura A DeCicco's avatar
Laura A DeCicco committed
651
652
653
654
655
656
ratingData <- readNWISrating(siteNumber, "base")
attr(ratingData, "RATING")

@


Laura A DeCicco's avatar
Laura A DeCicco committed
657
658

%------------------------------------------------------------
Laura A DeCicco's avatar
Laura A DeCicco committed
659
\subsection{Surface-Water Measurement Data}
Laura A DeCicco's avatar
Laura A DeCicco committed
660
661
\label{sec:meas}
%------------------------------------------------------------
Laura A DeCicco's avatar
Laura A DeCicco committed
662
These data are the discrete measurements of discharge that are made for the purpose of developing or revising the rating curve.  Information on the returned data can be found with the \texttt{comment} function and attached attributes as described in section \ref{sec:metadata}.
Laura A DeCicco's avatar
Laura A DeCicco committed
663

Laura A DeCicco's avatar
Laura A DeCicco committed
664
665
Surface-water measurement data can be obtained with the \texttt{readNWISmeas} function.

Laura A DeCicco's avatar
Laura A DeCicco committed
666
<<surfexample, echo=TRUE, eval=FALSE>>=
Laura A DeCicco's avatar
Laura A DeCicco committed
667
668
669
670
671
surfaceData <- readNWISmeas(siteNumber)

@


Laura A DeCicco's avatar
Laura A DeCicco committed
672
673
674
675
%------------------------------------------------------------
\section{Water Quality Portal Web Retrievals}
\label{sec:usgsSTORET}
%------------------------------------------------------------
676
There are additional water quality data sets available from the Water Quality Data Portal (\url{https://www.waterqualitydata.us/}).  These data sets can be housed in either the STORET database (data from EPA), NWIS database (data from USGS), STEWARDS database (data from USDA), and additional databases are slated to be included in the future.  Because only USGS uses parameter codes, a \texttt{"}characteristic name\texttt{"} must be supplied.  The \texttt{readWQPqw} function can take either a USGS parameter code, or a more general characteristic name in the parameterCd input argument. The Water Quality Data Portal includes data discovery tools and information on characteristic names. The following example retrieves specific conductance from a DNR site in Wisconsin. 
Laura A DeCicco's avatar
Laura A DeCicco committed
677
678
679
680
681
682
683


<<label=getQWData, echo=TRUE, eval=FALSE>>=
specificCond <- readWQPqw('WIDNR_WQX-10032762',
                'Specific conductance','2011-05-01','2011-09-30')
@

Laura A DeCicco's avatar
Laura A DeCicco committed
684
685
A tool for finding NWIS characteristic names can be found at: 

686
\url{https://www.waterqualitydata.us/public_srsnames/}
Laura A DeCicco's avatar
Laura A DeCicco committed
687
688
689
690
691
692
693
694
695
696

\FloatBarrier

%------------------------------------------------------------
\section{Generalized Retrievals}
\label{sec:general}
%------------------------------------------------------------
The previous examples all took specific input arguments: siteNumber, parameterCd (or characteristic name), startDate, endDate, etc. However, the Web services that supply the data can accept a wide variety of additional arguments. 

%------------------------------------------------------------
Laura A DeCicco's avatar
Laura A DeCicco committed
697
\subsubsection{NWIS Sites}
Laura A DeCicco's avatar
Laura A DeCicco committed
698
699
700
701
\label{sec:NWISGenSite}
%------------------------------------------------------------
The function \texttt{whatNWISsites} can be used to discover NWIS sites based on any query that the NWIS Site Service offers. This is done by using the \texttt{"..."} argument, which allows the user to use any arbitrary input argument. We can then use the service here:

702
\url{https://waterservices.usgs.gov/rest/Site-Test-Tool.html}
Laura A DeCicco's avatar
Laura A DeCicco committed
703
704
705

to discover many options for searching for NWIS sites. For example, you may want to search for sites in a lat/lon bounding box, or only sites tidal streams, or sites with water quality samples, sites above a certain altitude, etc. The results of this site query generate a URL. For example, the tool provided a search within a specified bounding box, for sites that have daily discharge (parameter code = 00060) and temperature (parameter code = 00010). The generated URL is:

706
\url{https://waterservices.usgs.gov/nwis/site/?format=rdb&bBox=-83.0,36.5,-81.0,38.5&parameterCd=00010,00060&hasDataTypeCd=dv}
Laura A DeCicco's avatar
Laura A DeCicco committed
707

Laura A DeCicco's avatar
Laura A DeCicco committed
708
The following dataRetrieval code can be used to get those sites:
Laura A DeCicco's avatar
Laura A DeCicco committed
709

Laura A DeCicco's avatar
Laura A DeCicco committed
710
711
712
<<siteSearch, eval=FALSE>>=
sites <- whatNWISsites(bBox=c(-83.0,36.5,-81.0,38.5), 
                      parameterCd=c("00010","00060"),
Laura A DeCicco's avatar
Laura A DeCicco committed
713
714
715
716
717
                      hasDataTypeCd="dv")
@


%------------------------------------------------------------
Laura A DeCicco's avatar
Laura A DeCicco committed
718
\subsubsection{NWIS Data}
Laura A DeCicco's avatar
Laura A DeCicco committed
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
\label{sec:NWISGenData}
%------------------------------------------------------------
For NWIS data, the function \texttt{readNWISdata} can be used. The argument listed in the R help file is \texttt{"..."} and \texttt{"}service\texttt{"} (only for data requests). Table \ref{tab:NWISGeneral} describes the services are available.

\begin{table}[!ht]
\begin{minipage}{\linewidth}
{\footnotesize
\caption{NWIS general data calls} 
\label{tab:NWISGeneral}
\begin{tabular}{lll}
  \hline
\multicolumn{1}{c}{\textbf{\textsf{Service}}} &
\multicolumn{1}{c}{\textbf{\textsf{Description}}}  &
\multicolumn{1}{c}{\textbf{\textsf{Reference URL}}} \\  [0pt]
  \hline
734
735
736
737
  daily values &  dv & \url{https://waterservices.usgs.gov/rest/DV-Test-Tool.html}\\
  [5pt]instantaneous & iv & \url{https://waterservices.usgs.gov/rest/IV-Test-Tool.html}\\
  [5pt]groundwater levels & gwlevels & \url{https://waterservices.usgs.gov/rest/GW-Levels-Test-Tool.html}\\
  [5pt]water quality & qwdata & \url{https://nwis.waterdata.usgs.gov/nwis/qwdata}\\
Laura A DeCicco's avatar
Laura A DeCicco committed
738
739
740
741
742
743
744
745
   \hline
\end{tabular}
}
\end{minipage}
\end{table}

The \texttt{"..."} argument allows the user to create their own queries based on the instructions found in the web links above. The links provide instructions on how to create a URL to request data. Perhaps you want sites only in Wisconsin, with a drainage area less than 50 mi$^2$, and the most recent daily dischage data. That request would be done as follows:

746
<<dataExample, eval=FALSE>>=
Laura A DeCicco's avatar
Laura A DeCicco committed
747
748
dischargeWI <- readNWISdata(service="dv",
                           stateCd="WI",
Laura A DeCicco's avatar
Laura A DeCicco committed
749
750
751
                           parameterCd="00060",
                           drainAreaMin="50",
                           statCd="00003")
Laura A DeCicco's avatar
Laura A DeCicco committed
752
753
754

siteInfo <- attr(dischargeWI, "siteInfo")

Laura A DeCicco's avatar
Laura A DeCicco committed
755
756
757
@

%------------------------------------------------------------
Laura A DeCicco's avatar
Laura A DeCicco committed
758
\subsubsection{WQP Sites}
Laura A DeCicco's avatar
Laura A DeCicco committed
759
760
761
762
763
\label{sec:WQPGenSite}
%------------------------------------------------------------

Just as with NWIS, the Water Quality Portal (WQP) offers a variety of ways to search for sites and request data. The possible Web service arguments for WQP site searches is found here:

764
\url{https://www.waterqualitydata.us/webservices_documentation}
Laura A DeCicco's avatar
Laura A DeCicco committed
765
766
767
768
769
770
771
772
773
774
775
776

To discover available sites in the WQP in New Jersey that have measured Chloride, use the function \texttt{whatWQPsites}.

<<NJChloride, eval=FALSE>>=

sitesNJ <- whatWQPsites(statecode="US:34",
                       characteristicName="Chloride")

@


%------------------------------------------------------------
Laura A DeCicco's avatar
Laura A DeCicco committed
777
\subsubsection{WQP Data}
Laura A DeCicco's avatar
Laura A DeCicco committed
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
\label{sec:WQPGenData}
%------------------------------------------------------------
Finally, to get data from the WQP using generalized Web service calls, use the function \texttt{readWQPdata}. For example, to get all the pH data in Wisconsin:

<<phData, eval=FALSE>>=

dataPH <- readWQPdata(statecode="US:55", 
                 characteristicName="pH")

@



\FloatBarrier

\clearpage

Laura A DeCicco's avatar
Laura A DeCicco committed
795
%------------------------------------------------------------
Laura A DeCicco's avatar
Laura A DeCicco committed
796
\section{Embedded Metadata}
Laura A DeCicco's avatar
Laura A DeCicco committed
797
798
\label{sec:metadata}
%------------------------------------------------------------
Laura A DeCicco's avatar
Laura A DeCicco committed
799
All data frames returned from the Web services have some form of associated metadata. This information is included as attributes to the data frame. All data frames will have a \texttt{url} (returning a character of the url used to obtain the data), \texttt{siteInfo} (returning a data frame with information on sites),  and \texttt{queryTime} (returning a POSIXct datetime) attributes. For example, the url and query time used to obtain the data can be found as follows:
Laura A DeCicco's avatar
Laura A DeCicco committed
800

801
<<meta1, eval=FALSE>>=
Laura A DeCicco's avatar
Laura A DeCicco committed
802
803
804
805
806

attr(dischargeWI, "url")

attr(dischargeWI, "queryTime")

Laura A DeCicco's avatar
Laura A DeCicco committed
807
808
siteInfo <- attr(dischargeWI, "siteInfo")

Laura A DeCicco's avatar
Laura A DeCicco committed
809
810
@

Laura A DeCicco's avatar
Laura A DeCicco committed
811
Depending on the format that the data was obtained (RDB, WaterML1, etc), there will be additional information embedded in the data frame as attributes. To discover the available attributes:
Laura A DeCicco's avatar
Laura A DeCicco committed
812

813
<<meta2, eval=FALSE>>=
Laura A DeCicco's avatar
Laura A DeCicco committed
814
815
816
817
818
819
820

names(attributes(dischargeWI))

@

For data obtained from \texttt{readNWISuv}, \texttt{readNWISdv}, \texttt{readNWISgwl} there are two attributes that are particularly useful: \texttt{siteInfo} and \texttt{variableInfo}.

821
<<meta3, eval=FALSE>>=
Laura A DeCicco's avatar
Laura A DeCicco committed
822
823
824
825
826
827
828
829

siteInfo <- attr(dischargeWI, "siteInfo")

variableInfo <- attr(dischargeWI, "variableInfo")


@

Laura A DeCicco's avatar
Laura A DeCicco committed
830
831
Data obtained from \texttt{readNWISpeak}, \texttt{readNWISmeas}, and \texttt{readNWISrating}, the \texttt{comment} attribute is useful.

Laura A DeCicco's avatar
Laura A DeCicco committed
832
<<meta5, eval=FALSE>>=
Laura A DeCicco's avatar
Laura A DeCicco committed
833
834
835
836
837
838
comment(peakData)

#Which is equivalent to:
# attr(peakData, "comment")
@

Laura A DeCicco's avatar
Laura A DeCicco committed
839

Laura A DeCicco's avatar
Laura A DeCicco committed
840
841
842
843
844

%------------------------------------------------------------ 
\section{Getting Started in R}
\label{sec:appendix1}
%------------------------------------------------------------ 
Laura A DeCicco's avatar
Laura A DeCicco committed
845
This section describes the options for downloading and installing the dataRetrieval package.
Laura A DeCicco's avatar
Laura A DeCicco committed
846
847
848
849
850
851

%------------------------------------------------------------
\subsection{New to R?}
%------------------------------------------------------------ 
If you are new to R, you will need to first install the latest version of R, which can be found here: \url{http://www.r-project.org/}.

Laura A DeCicco's avatar
Laura A DeCicco committed
852
853
At any time, you can get information about any function in R by typing a question mark before the functions name.  This will open a file (in RStudio, in the Help window) that describes the function, the required arguments, and provides working examples. This will open a help file similar to Figure \ref{fig:help}. To see the raw code for a particular code, type the name of the function, without parentheses.

Laura A DeCicco's avatar
Laura A DeCicco committed
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870

<<helpFunc,eval = FALSE>>=
?readNWISpCode
@

\FloatBarrier


\begin{figure}[ht!]
\centering
 \resizebox{0.95\textwidth}{!}{\includegraphics{Rhelp.png}} 
\caption{A simple R help file}
\label{fig:help}
\end{figure}

Additionally, many R packages have vignette files attached (such as this paper). To view the vignette:
<<seeVignette,eval = FALSE>>=
Laura A DeCicco's avatar
Laura A DeCicco committed
871
vignette(dataRetrieval)
Laura A DeCicco's avatar
Laura A DeCicco committed
872
873
874
875
876
@

\FloatBarrier
\clearpage
%------------------------------------------------------------
Laura A DeCicco's avatar
Laura A DeCicco committed
877
\subsection{R User: Installing dataRetrieval}
Laura A DeCicco's avatar
Laura A DeCicco committed
878
%------------------------------------------------------------ 
Laura A DeCicco's avatar
Laura A DeCicco committed
879
The following command installs dataRetrieval and subsequent required packages:
Laura A DeCicco's avatar
Laura A DeCicco committed
880
881

<<installFromCran,eval = FALSE>>=
Laura A DeCicco's avatar
Laura A DeCicco committed
882
install.packages("dataRetrieval")
Laura A DeCicco's avatar
Laura A DeCicco committed
883
884
885
886
@

After installing the package, you need to open the library each time you re-start R.  This is done with the simple command:
<<openLibraryTest, eval=FALSE>>=
Laura A DeCicco's avatar
Laura A DeCicco committed
887
library(dataRetrieval)
Laura A DeCicco's avatar
Laura A DeCicco committed
888
889
890
891
@


%------------------------------------------------------------ 
Laura A DeCicco's avatar
Laura A DeCicco committed
892
\section{Creating Tables in Microsoft\textregistered\ Software from R}
Laura A DeCicco's avatar
Laura A DeCicco committed
893
894
\label{app:createWordTable}
%------------------------------------------------------------
Laura A DeCicco's avatar
Laura A DeCicco committed
895
There are a few steps that are required in order to create a table in Microsoft\textregistered\ software (Excel, Word, PowerPoint, etc.) from an R data frame. There are certainly a variety of good methods, one of which is detailed here. The example we will step through here will be to create a table in Microsoft Excel based on the data frame tableData:
Laura A DeCicco's avatar
Laura A DeCicco committed
896

Laura A DeCicco's avatar
Laura A DeCicco committed
897
<<label=getSiteApp, echo=TRUE, eval=FALSE>>=
898
availableData <- whatNWISdata(siteNumber, "dv")
Laura A DeCicco's avatar
Laura A DeCicco committed
899
dailyData <- availableData["00003" == availableData$stat_cd,]
Laura A DeCicco's avatar
Laura A DeCicco committed
900
901
902
903

tableData <- with(dailyData, 
      data.frame(
        shortName=srsname, 
Laura A DeCicco's avatar
Laura A DeCicco committed
904
905
906
        Start=begin_date, 
        End=end_date, 
        Count=count_nu,
Laura A DeCicco's avatar
Laura A DeCicco committed
907
908
909
910
        Units=parameter_units)
      )
@

Laura A DeCicco's avatar
Laura A DeCicco committed
911
First, save the data frame as a tab delimited file (you don't want to use comma delimited because there are commas in some of the data elements):
Laura A DeCicco's avatar
Laura A DeCicco committed
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942


<<label=saveData, echo=TRUE, eval=FALSE>>=
write.table(tableData, file="tableData.tsv",sep="\t",
            row.names = FALSE,quote=FALSE)
@

This will save a file in your working directory called tableData.tsv.  You can see your working directory by typing getwd() in the R console. Opening the file in a general-purpose text editor, you should see the following:

\begin{verbatim}
shortName  Start  End	Count	Units
Temperature, water	2010-10-01	2012-06-24	575	deg C
Stream flow, mean. daily	1948-01-01	2013-03-13	23814	ft3/s
Specific conductance	2010-10-01	2012-06-24	551	uS/cm @25C
Suspended sediment concentration (SSC)	1980-10-01	1991-09-30	3651	mg/l
Suspended sediment discharge	1980-10-01	1991-09-30	3652	tons/day
\end{verbatim}

Next, follow the steps below to open this file in Excel:
\begin{enumerate}
\item Open Excel
\item Click on the File tab
\item Click on the Open option
\item Navigate to the working directory (as shown in the results of \texttt{getwd()})
\item Next to the File name text box, change the dropdown type to All Files (*.*)
\item Double click tableData.tsv
\item A text import wizard will open up, in the first window, choose the Delimited radio button if it is not automatically picked, then click on Next.
\item In the second window, click on the Tab delimiter if it is not automatically checked, then click Finished.
\item Use the many formatting tools within Excel to customize the table
\end{enumerate}

Laura A DeCicco's avatar
Laura A DeCicco committed
943
From Excel, it is simple to copy and paste the tables in other Microsoft\textregistered\ software. An example using one of the default Excel table formats is here. Additional formatting could be requried in Excel, for example converting u to  $\mu$.
Laura A DeCicco's avatar
Laura A DeCicco committed
944
945
946
947

\begin{figure}[ht!]
\centering
 \resizebox{0.9\textwidth}{!}{\includegraphics{table1.png}} 
Laura A DeCicco's avatar
Laura A DeCicco committed
948
\caption{A simple table produced in Microsoft\textregistered\ Excel.}
Laura A DeCicco's avatar
Laura A DeCicco committed
949
950
951
952
953
954
955
956
957
958
959
960
\label{overflow}
\end{figure}

\clearpage

%-------------------------------------
\section{Disclaimer}
%------------------------------------
This information is preliminary and is subject to revision. It is being provided to meet the need for timely best science. The information is provided on the condition that neither the U.S. Geological Survey nor the U.S. Government may be held liable for any damages resulting from the authorized or unauthorized use of the information.


\end{document}