diff --git a/inst/doc/Rplots.pdf b/inst/doc/Rplots.pdf index 2245d91a15b36dd9216658bd3698057f29f340c4..32a1d08d8aa29e3234d319ebc1ed7e336e61aa12 100644 Binary files a/inst/doc/Rplots.pdf and b/inst/doc/Rplots.pdf differ diff --git a/inst/doc/dataRetrieval-concordance.tex b/inst/doc/dataRetrieval-concordance.tex index 64ad8a1c9c4240f2ec2b06c03718c85b13bf7cb3..573b84d2421210e7db4f581e7fe977674971ee9f 100644 --- a/inst/doc/dataRetrieval-concordance.tex +++ b/inst/doc/dataRetrieval-concordance.tex @@ -1,13 +1,16 @@ \Sconcordance{concordance:dataRetrieval.tex:dataRetrieval.Rnw:% -1 82 1 1 8 1 1 1 10 16 0 1 2 5 1 1 10 15 0 1 2 6 1 1 2 1 0 1 2 1 0 1 1 % -3 0 1 2 2 1 1 2 7 0 1 2 6 1 1 3 2 0 2 1 7 0 1 2 1 1 1 2 7 0 1 2 9 1 1 3 % -2 0 3 1 1 2 3 0 1 2 1 1 1 2 10 0 1 2 4 1 1 3 2 0 4 1 1 3 4 0 1 2 4 1 1 % -6 4 0 1 1 1 4 3 0 3 1 3 0 1 2 3 1 1 -5 1 9 14 1 1 2 1 0 3 1 1 2 4 0 2 2 % -10 0 1 2 3 1 1 5 4 0 1 1 3 0 1 2 3 1 1 -5 1 9 12 1 1 2 1 0 1 2 1 0 2 1 % -1 3 4 0 1 2 4 1 1 3 2 0 1 1 7 0 1 2 3 1 1 6 5 0 1 1 3 0 1 2 2 1 1 -4 1 % -8 10 1 1 3 2 0 1 1 12 0 1 2 13 1 1 2 4 0 1 2 7 1 1 2 1 0 3 1 1 2 3 0 1 % -2 2 1 1 11 18 0 1 2 8 1 1 3 5 0 1 2 2 1 1 11 20 0 1 2 12 1 1 14 12 0 1 % -2 9 1 1 2 17 0 1 3 5 1 1 2 1 0 5 1 11 0 1 1 9 0 1 2 30 1 1 2 1 0 2 1 3 % -0 1 2 15 1 1 2 1 0 2 1 3 0 1 2 18 1 1 2 4 0 1 2 1 1 1 2 12 0 1 2 6 1 1 % -2 1 0 1 1 3 0 1 2 3 1 1 2 4 0 1 2 7 1 1 2 1 0 1 1 3 0 1 2 1 1 1 2 4 0 1 % -2 9 1 1 5 47 0 1 2 9 1 1 6 45 0 1 2 1 1 1 6 27 0 1 2 20 1} +1 84 1 1 8 1 1 1 10 16 0 1 2 5 1 1 10 15 0 1 2 13 1 1 2 1 0 1 2 1 0 1 1 % +3 0 1 2 2 1 1 2 7 0 1 2 7 1 1 3 2 0 1 1 12 0 1 2 4 1 1 4 3 0 1 2 1 0 1 % +3 7 0 1 3 4 0 1 2 3 1 1 8 7 0 1 4 1 0 1 2 21 0 1 2 7 1 1 3 2 0 2 1 7 0 % +1 2 1 1 1 2 7 0 1 2 9 1 1 3 2 0 3 1 1 2 3 0 1 2 1 1 1 2 10 0 1 2 4 1 1 % +3 2 0 4 1 1 3 4 0 1 2 4 1 1 6 4 0 1 1 1 4 3 0 3 1 3 0 1 2 3 1 1 -5 1 9 % +14 1 1 2 1 0 3 1 1 2 4 0 2 2 10 0 1 2 3 1 1 5 4 0 1 1 3 0 1 2 3 1 1 -5 % +1 9 12 1 1 2 1 0 1 2 1 0 2 1 1 3 4 0 1 2 2 1 1 3 2 0 1 1 7 0 1 2 3 1 1 % +6 5 0 1 1 3 0 1 2 2 1 1 -4 1 8 10 1 1 3 2 0 1 1 12 0 1 2 13 1 1 2 4 0 1 % +2 7 1 1 2 1 0 2 1 1 3 5 0 1 2 2 1 1 11 18 0 1 2 8 1 1 3 5 0 1 2 2 1 1 % +12 24 0 1 2 10 1 1 14 12 0 1 2 9 1 1 2 17 0 1 3 27 1 1 2 1 0 2 1 3 0 1 % +2 15 1 1 2 1 0 2 1 3 0 1 2 6 1 1 2 1 0 3 1 1 2 2 1 11 0 1 1 19 0 1 2 24 % +1 1 2 4 0 1 2 1 1 1 2 13 0 1 2 6 1 1 2 1 0 1 1 3 0 1 2 3 1 1 2 4 0 1 2 % +7 1 1 2 1 0 1 1 3 0 1 2 1 1 1 2 4 0 1 2 12 1 1 5 47 0 1 2 9 1 1 6 45 0 % +1 2 2 1 1 6 27 0 1 2 8 1 1 2 1 0 4 1 1 9 7 0 1 1 11 0 1 2 4 1 1 3 5 0 1 % +2 53 1} diff --git a/inst/doc/dataRetrieval-fig1.pdf b/inst/doc/dataRetrieval-fig1.pdf index eb62f13a13afb234b72dd89820561414966965b5..3ea96ee2cd6ff5f86e849c701af6e3cfb55a5eb1 100644 Binary files a/inst/doc/dataRetrieval-fig1.pdf and b/inst/doc/dataRetrieval-fig1.pdf differ diff --git a/inst/doc/dataRetrieval-fig2.pdf b/inst/doc/dataRetrieval-fig2.pdf index 3dba06cd93efb5bca776e9bd5e0f69809912e10b..3495a7a39205ff053538e5301b624137b79e7919 100644 Binary files a/inst/doc/dataRetrieval-fig2.pdf and b/inst/doc/dataRetrieval-fig2.pdf differ diff --git a/inst/doc/dataRetrieval-fig3.pdf b/inst/doc/dataRetrieval-fig3.pdf index 7624e84c04b0537ed3ec2f497a79fb22af0f7b8a..42e70b553aceffd65341e7cc834679e5545ef9a7 100644 Binary files a/inst/doc/dataRetrieval-fig3.pdf and b/inst/doc/dataRetrieval-fig3.pdf differ diff --git a/inst/doc/dataRetrieval.Rnw b/inst/doc/dataRetrieval.Rnw index 2fca66a961d5fc9d2aa51588ef6e4af3146bb9b9..6c841e6fd1ef0afbd5db055325c0b90b74d3d7b6 100644 --- a/inst/doc/dataRetrieval.Rnw +++ b/inst/doc/dataRetrieval.Rnw @@ -13,6 +13,7 @@ \usepackage[american]{babel} \usepackage{authblk} \usepackage{footnote} +\usepackage{placeins} \renewcommand\Affilfont{\itshape\small} \usepackage{Sweave} \renewcommand{\topfraction}{0.85} @@ -151,31 +152,49 @@ Site information is obtained from \url{http://waterservices.usgs.gov/rest/Site-T \subsubsection{getDataAvailability} \label{sec:usgsDataAvailability} %------------------------------------------------------------ -To find out the data available at a particular USGS site, such as measured parameters, period of record, and number of samples (count), use the getDataAvailability function: +To find out the available data at a particular USGS site, including measured parameters, period of record, and number of samples (count), use the getDataAvailability function: -<<label=getSite, echo=TRUE>>= -# Site ID for Choptank River near Greensboro, MD -siteNumber <- "01491000" +<<label=getSiteAvailability, echo=TRUE>>= +# Continuing from the previous example: ChoptankAvailableData <- getDataAvailability(siteNumber) head(ChoptankAvailableData) @ There is an additional argument to the getDataAvailability called longNames, which defaults to FALSE. Setting longNames to TRUE will cause the function to make a web service call for each parameter and return expanded information on that parameter. Currently, this is a very slow process because each parameter code makes a unique web service call. If the site does not have many measured parameters, setting longNames to TRUE is reasonable. -It is also possible to only request information for certain variables. In the following example, we retrieve just the daily value (dv) parameter information from the Choptank data availability dataframe (excluding all unit value and water quality values). +It is also possible to only request information for certain variables. In the following example, we retrieve just the daily mean parameter information from the Choptank data availability dataframe (excluding all unit value and water quality values). -<<label=getSite, echo=TRUE>>= +<<label=getSiteExtended, echo=TRUE>>= # Continuing from the previous example: # This pulls out just the daily data: ChoptankDailyData <- ChoptankAvailableData["dv" == ChoptankAvailableData$service,] +# This pulls out the mean: +ChoptankDailyData <- ChoptankDailyData["00003" == ChoptankDailyData$statCd,] #Now, make a call to get all of the parameter information: pCodeINFO <- getMultipleParameterNames(ChoptankDailyData$parameter_cd) #Merge the available dataframe with the parameter information dataframe: ChoptankDailyData <- merge(ChoptankDailyData,pCodeINFO,by="parameter_cd") -head(ChoptankDailyData) +@ + +The daily data at the Choptank River site can be displayed in a nice \LaTeX table using the xtable package. See Appendix \ref{app:createWordTable} for instructions on converting an R dataframe to a nice table in Microsoft Excel or Word. + +<<label=tablegda, echo=TRUE,results=tex>>= +tableData <- with(ChoptankDailyData, + data.frame(shortName=srsname, + Start=as.character(startDate), + End=as.character(endDate), + Count=as.character(count), + Units=parameter_units) + ) + + +data.table <- xtable(tableData,label="tab:gda", + caption="Daily mean data availabile at the Choptank River") +print(data.table, + caption.placement="top",include.rownames=FALSE) @ @@ -569,9 +588,11 @@ head(Sample) %------------------------------------------------------------ The EGRET package.... + +\clearpage \appendix %------------------------------------------------------------ -\section{Appendix 1: Getting Started} +\section{Getting Started in R} \label{sec:appendix1} %------------------------------------------------------------ This section describes the options for downloading and installing the dataRetrieval package. @@ -629,15 +650,18 @@ To then open the library, simply type: library(dataRetrieval) @ +\FloatBarrier %------------------------------------------------------------ -\section{Appendix 2: Columns Names} -%------------------------------------------------------------ +\section{Columns Names} \label{sec:appendix2} +%------------------------------------------------------------ + %------------------------------------------------------------ \subsection{INFO dataframe} -%------------------------------------------------------------ \label{sec:appendix2INFO} +%------------------------------------------------------------ + <<label=colNamesQW, echo=FALSE,results=tex>>= infoDF <- data.frame(ColumnNames=names(INFO)) data.table <- xtable(infoDF, @@ -645,13 +669,13 @@ data.table <- xtable(infoDF, print(data.table, caption.placement="top",floating="FALSE",latex.environments=NULL,include.rownames=FALSE,include.colnames=FALSE) @ -\\* - +\FloatBarrier %------------------------------------------------------------ \subsection{Water Quality Portal} -%------------------------------------------------------------ \label{sec:appendix2WQP} +%------------------------------------------------------------ + There are 62 columns returned from the water quality portal. <<label=colNamesQW, echo=FALSE,results=tex>>= @@ -661,8 +685,9 @@ data.table <- xtable(infoDF, print(data.table, caption.placement="top",floating="FALSE",latex.environments=NULL, include.rownames=FALSE,include.colnames=FALSE) @ -\\* -\newpage + +\FloatBarrier + <<label=colNamesQW2, echo=FALSE,results=tex>>= infoDF <- data.frame(ColumnNames_Continued=names(dissolvedNitrate[41:62])) data.table <- xtable(infoDF, @@ -670,8 +695,76 @@ data.table <- xtable(infoDF, print(data.table, caption.placement="top",floating="FALSE",latex.environments=NULL, include.rownames=FALSE,include.colnames=FALSE) @ -\\* -\newpage + +\clearpage + +%------------------------------------------------------------ +\section{Creating tables in Microsoft from R} +\label{app:createWordTable} +%------------------------------------------------------------ +There are a few steps that are required in order to create a table in a Microsoft product (Excel, Word, Powerpoint, etc.) from an R dataframe. There are actually a variety of methods, one of which is detailed here. The example we will step through here will be to create the following data. + +<<label=getSiteApp, echo=TRUE>>= +ChoptankAvailableData <- getDataAvailability(siteNumber) +ChoptankDailyData <- ChoptankAvailableData["dv" == ChoptankAvailableData$service,] +ChoptankDailyData <- ChoptankDailyData["00003" == ChoptankDailyData$statCd,] +pCodeINFO <- getMultipleParameterNames(ChoptankDailyData$parameter_cd, interactive=FALSE) +ChoptankDailyData <- merge(ChoptankDailyData,pCodeINFO,by="parameter_cd") + +tableData <- with(ChoptankDailyData, + data.frame( + shortName=srsname, + Start=startDate, + End=endDate, + Count=count, + Units=parameter_units) + ) +tableData +@ + +Our goal now is to get the data from the dataframe tableData to a Microsoft Word table. + +First, save the dataframe as a tab delimited file: + +<<label=saveData, echo=TRUE, eval=FALSE>>= +write.table(tableData, file="tableData.tsv",sep="\t", + row.names = FALSE,quote=FALSE) +@ + +This will save a file in your working directory called tableData.tsv. You can see your working directory by typing getwd() in the R console. Opening the file in a general-purpose text editor, you should see the following: + +\begin{verbatim} +shortName Start End Count Units +Temperature, water 2010-10-01 2012-06-24 575 deg C +Stream flow, mean. daily 1948-01-01 2013-03-13 23814 cfs +Specific conductance 2010-10-01 2012-06-24 551 uS/cm @25C +Suspended sediment concentration (SSC) 1980-10-01 1991-09-30 3651 mg/l +Suspended sediment discharge 1980-10-01 1991-09-30 3652 tons/day +\end{verbatim} + +To open this file in Excel: +\begin{enumerate} +\item Open Excel +\item Click on the File tab +\item Click on the Open option +\item Browse to the working directory (as shown in the results of getwd()) +\item Next to the File name text box, change the dropdown type to All Files (*.*) +\item Double click tableData.tsv +\item A text import wizard will open up, in the first window, choose the Delimited radio button if it is not automatically picked, then click on Next. +\item In the second window, click on the Tab delimiter if it is not automatically checked, then click Finished. +\item Use the many formatting tools within Excel to customize the table +\end{enumerate} + +From Excel, it is simple to copy and paste the tables in other Microsoft products. An example using one of the default Excel table formats is here. + +\begin{figure}[ht!] +\centering + \resizebox{0.9\textwidth}{!}{\includegraphics{table1.png}} +\caption{A simple table produced in Microsoft Excel} +\label{overflow} +\end{figure} + +\clearpage %------------------------------------------------------------ % BIBLIO %------------------------------------------------------------ diff --git a/inst/doc/dataRetrieval.log b/inst/doc/dataRetrieval.log index 2915e0db5cf9ee7c4063bf88f1fe90f7588db356..28d2e8b578b3d81483138f18f20d5e612073e2a0 100644 --- a/inst/doc/dataRetrieval.log +++ b/inst/doc/dataRetrieval.log @@ -1,4 +1,4 @@ -This is pdfTeX, Version 3.1415926-2.3-1.40.12 (MiKTeX 2.9) (preloaded format=pdflatex 2012.1.6) 13 MAR 2013 17:00 +This is pdfTeX, Version 3.1415926-2.3-1.40.12 (MiKTeX 2.9) (preloaded format=pdflatex 2012.1.6) 14 MAR 2013 14:54 entering extended mode **dataRetrieval.tex (D:\LADData\RCode\dataRetrieval\inst\doc\dataRetrieval.tex @@ -238,6 +238,14 @@ Package: authblk 2009/11/18 1.3 (PWD) \c@authors=\count112 \c@affil=\count113 ) +(C:\Users\ldecicco\AppData\Roaming\MiKTeX\2.9\tex\latex\mdwtools\footnote.sty +Package: footnote 1997/01/28 1.13 Save footnotes around boxes +\fn@notes=\box28 +\fn@width=\dimen116 +) +(C:\Users\ldecicco\AppData\Roaming\MiKTeX\2.9\tex\latex\placeins\placeins.sty +Package: placeins 2005/04/18 v 2.2 +) (C:/PROGRA~1/R/R-215~1.3/share/texmf/tex/latex\Sweave.sty Package: Sweave @@ -262,8 +270,8 @@ Package graphics Info: Driver file: pdftex.def on input line 91. File: pdftex.def 2011/05/27 v0.06d Graphics/color for pdfTeX \Gread@gobject=\count114 )) -\Gin@req@height=\dimen116 -\Gin@req@width=\dimen117 +\Gin@req@height=\dimen117 +\Gin@req@width=\dimen118 ) (C:\Users\ldecicco\AppData\Roaming\MiKTeX\2.9\tex\latex\fancyvrb\fancyvrb.sty Package: fancyvrb 2008/02/07 @@ -272,7 +280,7 @@ Style option: `fancyvrb' v2.7a, with DG/SPQR fixes, and firstline=lastline fix <2008/02/07> (tvz) \FV@CodeLineNo=\count115 \FV@InFile=\read1 -\FV@TabBox=\box28 +\FV@TabBox=\box29 \c@FancyVerbLine=\count116 \FV@StepNumber=\count117 \FV@OutFile=\write3 @@ -385,29 +393,29 @@ LaTeX Font Info: Try loading font information for T1+aer on input line 100. File: t1aer.fd 1997/11/16 Font definitions for T1/aer. )))) (D:\LADData\RCode\dataRetrieval\inst\doc\dataRetrieval.aux) -LaTeX Font Info: Checking defaults for OML/cmm/m/it on input line 42. -LaTeX Font Info: ... okay on input line 42. -LaTeX Font Info: Checking defaults for T1/cmr/m/n on input line 42. -LaTeX Font Info: ... okay on input line 42. -LaTeX Font Info: Checking defaults for OT1/cmr/m/n on input line 42. -LaTeX Font Info: ... okay on input line 42. -LaTeX Font Info: Checking defaults for OMS/cmsy/m/n on input line 42. -LaTeX Font Info: ... okay on input line 42. -LaTeX Font Info: Checking defaults for OMX/cmex/m/n on input line 42. -LaTeX Font Info: ... okay on input line 42. -LaTeX Font Info: Checking defaults for U/cmr/m/n on input line 42. -LaTeX Font Info: ... okay on input line 42. -LaTeX Font Info: Checking defaults for PD1/pdf/m/n on input line 42. -LaTeX Font Info: ... okay on input line 42. -LaTeX Font Info: Checking defaults for TS1/cmr/m/n on input line 42. -LaTeX Font Info: Try loading font information for TS1+cmr on input line 42. +LaTeX Font Info: Checking defaults for OML/cmm/m/it on input line 44. +LaTeX Font Info: ... okay on input line 44. +LaTeX Font Info: Checking defaults for T1/cmr/m/n on input line 44. +LaTeX Font Info: ... okay on input line 44. +LaTeX Font Info: Checking defaults for OT1/cmr/m/n on input line 44. +LaTeX Font Info: ... okay on input line 44. +LaTeX Font Info: Checking defaults for OMS/cmsy/m/n on input line 44. +LaTeX Font Info: ... okay on input line 44. +LaTeX Font Info: Checking defaults for OMX/cmex/m/n on input line 44. +LaTeX Font Info: ... okay on input line 44. +LaTeX Font Info: Checking defaults for U/cmr/m/n on input line 44. +LaTeX Font Info: ... okay on input line 44. +LaTeX Font Info: Checking defaults for PD1/pdf/m/n on input line 44. +LaTeX Font Info: ... okay on input line 44. +LaTeX Font Info: Checking defaults for TS1/cmr/m/n on input line 44. +LaTeX Font Info: Try loading font information for TS1+cmr on input line 44. ("C:\Program Files (x86)\MiKTeX 2.9\tex\latex\base\ts1cmr.fd" File: ts1cmr.fd 1999/05/25 v2.5h Standard LaTeX font definitions ) -LaTeX Font Info: ... okay on input line 42. -\AtBeginShipoutBox=\box29 -Package hyperref Info: Link coloring OFF on input line 42. +LaTeX Font Info: ... okay on input line 44. +\AtBeginShipoutBox=\box30 +Package hyperref Info: Link coloring OFF on input line 44. ("C:\Program Files (x86)\MiKTeX 2.9\tex\latex\hyperref\nameref.sty" Package: nameref 2010/04/30 v2.40 Cross-referencing by name of section @@ -417,9 +425,9 @@ Package: gettitlestring 2010/12/03 v1.4 Cleanup title references (HO) ) \c@section@level=\count118 ) -LaTeX Info: Redefining \ref on input line 42. -LaTeX Info: Redefining \pageref on input line 42. -LaTeX Info: Redefining \nameref on input line 42. +LaTeX Info: Redefining \ref on input line 44. +LaTeX Info: Redefining \pageref on input line 44. +LaTeX Info: Redefining \nameref on input line 44. (D:\LADData\RCode\dataRetrieval\inst\doc\dataRetrieval.out) (D:\LADData\RCode\dataRetrieval\inst\doc\dataRetrieval.out) @@ -428,13 +436,13 @@ LaTeX Info: Redefining \nameref on input line 42. (C:\Users\ldecicco\AppData\Roaming\MiKTeX\2.9\tex\context\base\supp-pdf.mkii [Loading MPS to PDF converter (version 2006.09.02).] \scratchcounter=\count119 -\scratchdimen=\dimen118 -\scratchbox=\box30 +\scratchdimen=\dimen119 +\scratchbox=\box31 \nofMPsegments=\count120 \nofMParguments=\count121 \everyMPshowfont=\toks20 \MPscratchCnt=\count122 -\MPscratchDim=\dimen119 +\MPscratchDim=\dimen120 \MPnumerator=\count123 \makeMPintoPDFobject=\count124 \everyMPtoPDFconversion=\toks21 @@ -447,7 +455,7 @@ Overfull \vbox (21.68121pt too high) has occurred while \output is active [] {C:/Users/ldecicco/AppData/Local/MiKTeX/2.9/pdftex/config/pdftex.map}]) \tf@toc=\write5 -LaTeX Font Info: Try loading font information for T1+aett on input line 60. +LaTeX Font Info: Try loading font information for T1+aett on input line 62. ("C:\Program Files (x86)\MiKTeX 2.9\tex\latex\ae\t1aett.fd" File: t1aett.fd 1997/11/16 Font definitions for T1/aett. @@ -456,19 +464,19 @@ Overfull \vbox (21.68121pt too high) has occurred while \output is active [] [2] -Overfull \hbox (22.21066pt too wide) in paragraph at lines 80--81 +Overfull \hbox (22.21066pt too wide) in paragraph at lines 82--83 [][]$\T1/aett/m/n/10.95 http : / / nwis . waterdata . usgs . gov / usa / nwis / pmcodes ? radio _ pm _ search = param _ group&pm _$ [] -Overfull \hbox (23.424pt too wide) in paragraph at lines 80--81 +Overfull \hbox (23.424pt too wide) in paragraph at lines 82--83 $\T1/aett/m/n/10.95 group = All + -[]-[] + include + all + parameter + groups&p m _ search = &casrn _ search = &srsname _ search =$ [] -Overfull \hbox (68.32622pt too wide) in paragraph at lines 80--81 +Overfull \hbox (68.32622pt too wide) in paragraph at lines 82--83 $\T1/aett/m/n/10.95 &format = html _ table&show = parameter _ group _ nm&show = parameter _ nm&show = casrn&show = srsname&show =$ [] @@ -486,70 +494,85 @@ Overfull \vbox (21.68121pt too high) has occurred while \output is active [] [5] +Overfull \hbox (3.57079pt too wide) in paragraph at lines 232--243 + [][] + [] + + Overfull \vbox (21.68121pt too high) has occurred while \output is active [] -[6] <dataRetrieval-fig1.pdf, id=193, 433.62pt x 289.08pt> +[6] +Overfull \vbox (21.68121pt too high) has occurred while \output is active [] + + +[7] +Overfull \vbox (21.68121pt too high) has occurred while \output is active [] + + +[8] <dataRetrieval-fig1.pdf, id=223, 433.62pt x 289.08pt> File: dataRetrieval-fig1.pdf Graphic file (type pdf) <use dataRetrieval-fig1.pdf> -Package pdftex.def Info: dataRetrieval-fig1.pdf used on input line 255. +Package pdftex.def Info: dataRetrieval-fig1.pdf used on input line 349. (pdftex.def) Requested size: 358.46039pt x 238.98355pt. Overfull \vbox (21.68121pt too high) has occurred while \output is active [] -[7 <D:/LADData/RCode/dataRetrieval/inst/doc/dataRetrieval-fig1.pdf>] +[9 <D:/LADData/RCode/dataRetrieval/inst/doc/dataRetrieval-fig1.pdf>] Overfull \vbox (21.68121pt too high) has occurred while \output is active [] -[8] <dataRetrieval-fig2.pdf, id=213, 433.62pt x 289.08pt> +[10] <dataRetrieval-fig2.pdf, id=242, 433.62pt x 289.08pt> File: dataRetrieval-fig2.pdf Graphic file (type pdf) <use dataRetrieval-fig2.pdf> -Package pdftex.def Info: dataRetrieval-fig2.pdf used on input line 310. +Package pdftex.def Info: dataRetrieval-fig2.pdf used on input line 404. (pdftex.def) Requested size: 358.46039pt x 238.98355pt. Overfull \vbox (21.68121pt too high) has occurred while \output is active [] -[9 <D:/LADData/RCode/dataRetrieval/inst/doc/dataRetrieval-fig2.pdf>] -<dataRetrieval-fig3.pdf, id=227, 433.62pt x 289.08pt> +[11 <D:/LADData/RCode/dataRetrieval/inst/doc/dataRetrieval-fig2.pdf>] +<dataRetrieval-fig3.pdf, id=256, 433.62pt x 289.08pt> File: dataRetrieval-fig3.pdf Graphic file (type pdf) <use dataRetrieval-fig3.pdf> -Package pdftex.def Info: dataRetrieval-fig3.pdf used on input line 368. +Package pdftex.def Info: dataRetrieval-fig3.pdf used on input line 460. (pdftex.def) Requested size: 358.46039pt x 238.98355pt. -Overfull \hbox (35.98744pt too wide) in paragraph at lines 378--379 -\T1/aer/m/n/10.95 There are ad-di-tional data sets avail-able on the Wa-ter Qua -l-ity Por-tal ([]$\T1/aett/m/n/10.95 http : / / www . waterqualitydata .$ +Overfull \hbox (0.79091pt too wide) in paragraph at lines 470--471 +\T1/aer/m/n/10.95 EPA) or NWIS database. Since STORET does not use USGS pa-ram- +e-ter codes, a \T1/aett/m/n/10.95 "\T1/aer/m/n/10.95 characteristic [] -LaTeX Font Info: Try loading font information for TS1+aett on input line 382 + +Overfull \vbox (21.68121pt too high) has occurred while \output is active [] + + +[12 <D:/LADData/RCode/dataRetrieval/inst/doc/dataRetrieval-fig3.pdf>] +LaTeX Font Info: Try loading font information for TS1+aett on input line 474 . + (C:/PROGRA~1/R/R-215~1.3/share/texmf/tex/latex\ts1aett.fd File: ts1aett.fd ) -LaTeX Font Info: Try loading font information for TS1+cmtt on input line 382 +LaTeX Font Info: Try loading font information for TS1+cmtt on input line 474 . ("C:\Program Files (x86)\MiKTeX 2.9\tex\latex\base\ts1cmtt.fd" File: ts1cmtt.fd 1999/05/25 v2.5h Standard LaTeX font definitions ) LaTeX Font Info: Font shape `TS1/aett/m/sl' in size <10.95> not available -(Font) Font shape `TS1/cmtt/m/sl' tried instead on input line 382. - - -Overfull \vbox (21.68121pt too high) has occurred while \output is active [] +(Font) Font shape `TS1/cmtt/m/sl' tried instead on input line 474. -[10] Overfull \vbox (21.68121pt too high) has occurred while \output is active [] -[11 <D:/LADData/RCode/dataRetrieval/inst/doc/dataRetrieval-fig3.pdf>] -Underfull \hbox (badness 10000) in paragraph at lines 437--455 +[13] +Underfull \hbox (badness 10000) in paragraph at lines 530--548 [] @@ -557,14 +580,6 @@ Underfull \hbox (badness 10000) in paragraph at lines 437--455 Overfull \vbox (21.68121pt too high) has occurred while \output is active [] -[12] -Overfull \vbox (21.68121pt too high) has occurred while \output is active [] - - -[13] -Overfull \vbox (21.68121pt too high) has occurred while \output is active [] - - [14] Overfull \vbox (21.68121pt too high) has occurred while \output is active [] @@ -582,65 +597,78 @@ Overfull \vbox (21.68121pt too high) has occurred while \output is active [] [18] -Underfull \hbox (badness 10000) in paragraph at lines 736--784 +Overfull \vbox (21.68121pt too high) has occurred while \output is active [] - [] + +[19] +Overfull \vbox (21.68121pt too high) has occurred while \output is active [] +[20 + +] Overfull \vbox (21.68121pt too high) has occurred while \output is active [] -[19] +[21] Overfull \vbox (21.68121pt too high) has occurred while \output is active [] -[20] -Overfull \vbox (21.66835pt too high) has occurred while \output is active [] +[22] +Overfull \vbox (15.16835pt too high) has occurred while \output is active [] Overfull \vbox (21.68121pt too high) has occurred while \output is active [] -[21] -Underfull \hbox (badness 10000) in paragraph at lines 794--839 - - [] +[23] +Overfull \vbox (21.68121pt too high) has occurred while \output is active [] +[24] Overfull \vbox (21.68121pt too high) has occurred while \output is active [] -[22] -Underfull \hbox (badness 10000) in paragraph at lines 842--869 +[25] +Overfull \vbox (21.68121pt too high) has occurred while \output is active [] - [] +[26 + +] <table1.png, id=338, 554.07pt x 125.71968pt> +File: table1.png Graphic file (type png) + <use table1.png> +Package pdftex.def Info: table1.png used on input line 1058. +(pdftex.def) Requested size: 358.46039pt x 81.33507pt. Overfull \vbox (21.68121pt too high) has occurred while \output is active [] -[23] -Package atveryend Info: Empty hook `BeforeClearDocument' on input line 886. +[27 <D:/LADData/RCode/dataRetrieval/inst/doc/table1.png>] +Package atveryend Info: Empty hook `BeforeClearDocument' on input line 1080. Overfull \vbox (21.68121pt too high) has occurred while \output is active [] -[24] -Package atveryend Info: Empty hook `AfterLastShipout' on input line 886. +[28 + +] +Package atveryend Info: Empty hook `AfterLastShipout' on input line 1080. (D:\LADData\RCode\dataRetrieval\inst\doc\dataRetrieval.aux) -Package atveryend Info: Executing hook `AtVeryEndDocument' on input line 886. -Package atveryend Info: Executing hook `AtEndAfterFileList' on input line 886. +Package atveryend Info: Executing hook `AtVeryEndDocument' on input line 1080. +Package atveryend Info: Executing hook `AtEndAfterFileList' on input line 1080. + Package rerunfilecheck Info: File `dataRetrieval.out' has not changed. -(rerunfilecheck) Checksum: 614BBE003F9372697FA43A46BAFF5BE8;1901. +(rerunfilecheck) Checksum: 40BB526E32B9712C796C3616F1DDBE3B;2038. ) Here is how much of TeX's memory you used: - 7426 strings out of 494045 - 106538 string characters out of 3145961 - 190779 words of memory out of 3000000 - 10519 multiletter control sequences out of 15000+200000 - 40005 words of font info for 82 fonts, out of 3000000 for 9000 + 7534 strings out of 494045 + 108149 string characters out of 3145961 + 191631 words of memory out of 3000000 + 10589 multiletter control sequences out of 15000+200000 + 45443 words of font info for 92 fonts, out of 3000000 for 9000 715 hyphenation exceptions out of 8191 - 35i,8n,28p,913b,481s stack positions out of 5000i,500n,10000p,200000b,50000s + 35i,12n,28p,913b,487s stack positions out of 5000i,500n,10000p,200000b,50000s <C:\Users\ldecicco\AppData\Local\MiKTeX\2.9\fonts\pk\ljfour\jknappen\ec\dpi6 00\tcst1095.pk><C:/Program Files (x86)/MiKTeX 2.9/fonts/type1/public/amsfonts/c m/cmbx10.pfb><C:/Program Files (x86)/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/ @@ -648,14 +676,16 @@ cmbx12.pfb><C:/Program Files (x86)/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cm mi10.pfb><C:/Program Files (x86)/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmr1 0.pfb><C:/Program Files (x86)/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmr12.p fb><C:/Program Files (x86)/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmr17.pfb> -<C:/Program Files (x86)/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmr7.pfb><C:/ -Program Files (x86)/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmr8.pfb><C:/Prog -ram Files (x86)/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmsltt10.pfb><C:/Prog -ram Files (x86)/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmti10.pfb><C:/Progra -m Files (x86)/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmtt10.pfb> -Output written on dataRetrieval.pdf (24 pages, 307962 bytes). +<C:/Program Files (x86)/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmr6.pfb><C:/ +Program Files (x86)/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmr7.pfb><C:/Prog +ram Files (x86)/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmr8.pfb><C:/Program +Files (x86)/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmr9.pfb><C:/Program File +s (x86)/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmsltt10.pfb><C:/Program File +s (x86)/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmti10.pfb><C:/Program Files +(x86)/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmtt10.pfb> +Output written on dataRetrieval.pdf (28 pages, 354429 bytes). PDF statistics: - 368 PDF objects out of 1000 (max. 8388607) - 60 named destinations out of 1000 (max. 500000) - 220 words of extra memory for PDF output out of 10000 (max. 10000000) + 433 PDF objects out of 1000 (max. 8388607) + 80 named destinations out of 1000 (max. 500000) + 257 words of extra memory for PDF output out of 10000 (max. 10000000) diff --git a/inst/doc/dataRetrieval.pdf b/inst/doc/dataRetrieval.pdf index 6f67dfb4fcc13c6511b3567fd6a8e50a681666d5..e1e16abeb10a474158ed839f5c3b85d6d969b10f 100644 Binary files a/inst/doc/dataRetrieval.pdf and b/inst/doc/dataRetrieval.pdf differ diff --git a/inst/doc/dataRetrieval.synctex.gz b/inst/doc/dataRetrieval.synctex.gz index 3d0bb31822a76c8c39589dad8cf4419f03c14689..cf8f93768a30c91fc4462f1b1046c051fc518a5c 100644 Binary files a/inst/doc/dataRetrieval.synctex.gz and b/inst/doc/dataRetrieval.synctex.gz differ diff --git a/inst/doc/dataRetrieval.tex b/inst/doc/dataRetrieval.tex index c03ab5dd9d062e0f03406afb95a59f77b26b23b5..75f8d8adeaa76d0f68e69aef568a6f699d0bcffe 100644 --- a/inst/doc/dataRetrieval.tex +++ b/inst/doc/dataRetrieval.tex @@ -12,6 +12,8 @@ \usepackage[numbers, round]{natbib} \usepackage[american]{babel} \usepackage{authblk} +\usepackage{footnote} +\usepackage{placeins} \renewcommand\Affilfont{\itshape\small} \usepackage{Sweave} \renewcommand{\topfraction}{0.85} @@ -43,7 +45,7 @@ \input{dataRetrieval-concordance} %------------------------------------------------------------ -\title{Introduction to the dataRetrieval package} +\title{The dataRetrieval R package} %------------------------------------------------------------ \author[1]{Laura De Cicco} \author[1]{Robert Hirsch} @@ -66,14 +68,14 @@ For information on getting started in R, downloading and installing the package, %------------------------------------------------------------ -\section{General USGS Web Retrieval Examples} +\section{General USGS Web Retrievals} %------------------------------------------------------------ In this section, we will run through 5 examples, documenting how to get raw data from the web. This includes site information (\ref{sec:usgsSite}), measured parameter information (\ref{sec:usgsParams}), historical daily values(\ref{sec:usgsDaily}), real-time current values (\ref{sec:usgsRT}), and water quality data (\ref{sec:usgsWQP}) or (\ref{sec:usgsSTORET}). We will use the Choptank River near Greensboro, MD as an example. The site-ID for this gage station is 01491000. Daily discharge measurements are available as far back as 1948. Additionally, forms of nitrate have been measured dating back to 1964. The functions/examples in this section are for raw data retrieval. This may or may not be the easiest data to work with. In the next section, we will use functions that retrieve and process the data in a dataframe that may prove more friendly for R analysis. %------------------------------------------------------------ -\subsection{USGS Web Retrieval Introduction} +\subsection{Introduction} %------------------------------------------------------------ -The United States Geological Survey organizes their hydrological data in fairly standard structure. Streamgages are located throughout the United States, and each streamgage has a unique ID. Often (but not always), these ID's are 8 digits. The first step to finding data is discoving this 8-digit ID. One potential tool for discovering data is Environmental Data Discovery and Transformation (EnDDaT): \url{http://cida.usgs.gov/enddat/}. Follow the example on the EnDDaT web page to learn how to discover USGS stations and available data from any location in the United States. +The United States Geological Survey organizes their hydrological data in standard structure. Streamgages are located throughout the United States, and each streamgage has a unique ID. Often (but not always), these ID's are 8 digits. The first step to finding data is discoving this 8-digit ID. One potential tool for discovering data is Environmental Data Discovery and Transformation (EnDDaT): \url{http://cida.usgs.gov/enddat/}. Follow the example on the EnDDaT web page to learn how to discover USGS stations and available data from any location in the United States. Once the site-ID is known, the next required input for USGS data retrievals is the 'parameter code'. This is a 5-digit code that specifies what measured paramater is being requested. A complete list of possible USGS parameter codes can be found at: @@ -84,7 +86,7 @@ Not every station will measure all parameters. A list of commonly measured param % latex table generated in R 2.15.3 by xtable 1.7-1 package -% Wed Mar 13 16:59:57 2013 +% Thu Mar 14 14:54:17 2013 \begin{table}[ht] \centering \caption{Commonly found USGS Parameter Codes} @@ -101,13 +103,13 @@ pCode & shortName \\ \hline \end{tabular} \end{table} -For real-time data, the parameter code and site ID will suffice. For most variables that are measured on a continuous basis, the USGS stores the historical data as daily values. These daily values may be in the form statistics such as the daily mean values, but they can also include daily maximums, minimums or medians. These different statistics are specified by a 5-digit \texttt{"}stat code\texttt{"}. A complete list of stat codes can be found here: +For real-time data, the parameter code and site ID will suffice. For most variables that are measured on a continuous basis, the USGS stores the historical data as daily values. These daily values may be in the form of statistics such as the daily mean values, but they can also include daily maximums, minimums or medians. These different statistics are specified by a 5-digit \texttt{"}stat code\texttt{"}. A complete list of stat codes can be found here: \url{http://nwis.waterdata.usgs.gov/nwis/help/?read_file=stat&format=table} Some common stat codes are shown in Table \ref{tab:stat}. % latex table generated in R 2.15.3 by xtable 1.7-1 package -% Wed Mar 13 16:59:57 2013 +% Thu Mar 14 14:54:17 2013 \begin{table}[ht] \centering \caption{Commonly found USGS Stat Codes} @@ -125,10 +127,17 @@ StatCode & shortName \\ \end{table} %------------------------------------------------------------ -\subsection{USGS Site Information Retrievals} +\subsection{Site Information} \label{sec:usgsSite} %------------------------------------------------------------ -To obtain all of the available site information, use the getSiteFileData function: + +%------------------------------------------------------------ +\subsubsection{getSiteFileData} +\label{sec:usgsSiteFileData} +%------------------------------------------------------------ +Use the getSiteFileData function to obtain all of the information available for a particular USGS site such as full station name, drainage area, latitude, and longitude: + + \begin{Schunk} \begin{Sinput} > library(dataRetrieval) @@ -151,7 +160,92 @@ A list of the available columns are found in Appendix 2: INFO dataframe (\ref{se Site information is obtained from \url{http://waterservices.usgs.gov/rest/Site-Test-Tool.html} %------------------------------------------------------------ -\subsection{USGS Parameter Information Retrievals} +\subsubsection{getDataAvailability} +\label{sec:usgsDataAvailability} +%------------------------------------------------------------ +To find out the available data at a particular USGS site, including measured parameters, period of record, and number of samples (count), use the getDataAvailability function: + +\begin{Schunk} +\begin{Sinput} +> # Continuing from the previous example: +> ChoptankAvailableData <- getDataAvailability(siteNumber) +> head(ChoptankAvailableData) +\end{Sinput} +\begin{Soutput} + parameter_cd statCd startDate endDate count service +2 00010 00001 1988-10-01 2012-06-24 940 dv +3 00010 00002 2010-10-01 2012-06-24 575 dv +4 00010 00003 2010-10-01 2012-06-24 575 dv +5 00060 00003 1948-01-01 2013-03-13 23814 dv +6 00095 00001 2010-10-01 2012-06-24 551 dv +7 00095 00002 2010-10-01 2012-06-24 551 dv +\end{Soutput} +\end{Schunk} + +There is an additional argument to the getDataAvailability called longNames, which defaults to FALSE. Setting longNames to TRUE will cause the function to make a web service call for each parameter and return expanded information on that parameter. Currently, this is a very slow process because each parameter code makes a unique web service call. If the site does not have many measured parameters, setting longNames to TRUE is reasonable. + +It is also possible to only request information for certain variables. In the following example, we retrieve just the daily mean parameter information from the Choptank data availability dataframe (excluding all unit value and water quality values). + +\begin{Schunk} +\begin{Sinput} +> # Continuing from the previous example: +> # This pulls out just the daily data: +> ChoptankDailyData <- ChoptankAvailableData["dv" == ChoptankAvailableData$service,] +> # This pulls out the mean: +> ChoptankDailyData <- ChoptankDailyData["00003" == ChoptankDailyData$statCd,] +> #Now, make a call to get all of the parameter information: +> pCodeINFO <- getMultipleParameterNames(ChoptankDailyData$parameter_cd) +\end{Sinput} +\begin{Soutput} +Percent complete: +20 40 60 80 100 +\end{Soutput} +\begin{Sinput} +> #Merge the available dataframe with the parameter information dataframe: +> ChoptankDailyData <- merge(ChoptankDailyData,pCodeINFO,by="parameter_cd") +\end{Sinput} +\end{Schunk} + +The daily data at the Choptank River site can be displayed in a nice \LaTeX table using the xtable package. See Appendix \ref{app:createWordTable} for instructions on converting an R dataframe to a nice table in Microsoft Excel or Word. + + +\begin{Schunk} +\begin{Sinput} +> tableData <- with(ChoptankDailyData, + data.frame(shortName=srsname, + Start=as.character(startDate), + End=as.character(endDate), + Count=as.character(count), + Units=parameter_units) + ) +> data.table <- xtable(tableData,label="tab:gda", + caption="Daily mean data availabile at the Choptank River") +> print(data.table, + caption.placement="top",include.rownames=FALSE) +\end{Sinput} +% latex table generated in R 2.15.3 by xtable 1.7-1 package +% Thu Mar 14 14:54:22 2013 +\begin{table}[ht] +\centering +\caption{Daily mean data availabile at the Choptank River} +\label{tab:gda} +\begin{tabular}{lllll} + \hline +shortName & Start & End & Count & Units \\ + \hline +Temperature, water & 2010-10-01 & 2012-06-24 & 575 & deg C \\ + Stream flow, mean. daily & 1948-01-01 & 2013-03-13 & 23814 & cfs \\ + Specific conductance & 2010-10-01 & 2012-06-24 & 551 & uS/cm @25C \\ + Suspended sediment concentration (SSC) & 1980-10-01 & 1991-09-30 & 3651 & mg/l \\ + Suspended sediment discharge & 1980-10-01 & 1991-09-30 & 3652 & tons/day \\ + \hline +\end{tabular} +\end{table}\end{Schunk} + + + +%------------------------------------------------------------ +\subsection{Parameter Information} \label{sec:usgsParams} %------------------------------------------------------------ To obtain all of the available information concerning a measured parameter, use the getParameterInfo function: @@ -180,12 +274,12 @@ Pulling out a specific example piece of information, in this case parameter name Parameter information is obtained from \url{http://nwis.waterdata.usgs.gov/nwis/pmcodes/} %------------------------------------------------------------ -\subsection{USGS Daily Value Retrievals} +\subsection{Daily Values} \label{sec:usgsDaily} %------------------------------------------------------------ -To obtain historic daily records of USGS data, use the retrieveNWISData function. The arguments for this function are siteNumber, parameterCd, startDate, endDate, statCd, and a logical (true/false) interactive. There are 2 default argument: statCd defaults to \texttt{"}00003\texttt{"} and interactive defaults to TRUE. If you want to use the default values, you do not need to list them in the function call. Setting the 'interactive' option to true will walk you through the function. It might make more sense to run large batch collections with the interactive option set to FALSE. +To obtain historic daily records of USGS data, use the retrieveNWISData function. The arguments for this function are siteNumber, parameterCd, startDate, endDate, statCd, and a logical (true/false) interactive. There are 2 default argument: statCd (defaults to \texttt{"}00003\texttt{"}), and interactive (defaults to TRUE). If you want to use the default values, you do not need to list them in the function call. Setting the 'interactive' option to true will walk you through the function. It might make more sense to run large batch collections with the interactive option set to FALSE. -The dates (start and end) need to be in the format \texttt{"}YYYY-MM-DD\texttt{"}. Setting the start date to \texttt{"}\texttt{"} will indicate to the program to ask for the earliest date, setting the end date to \texttt{"}\texttt{"} will ask for the latest available date. +The dates (start and end) need to be in the format \texttt{"}YYYY-MM-DD\texttt{"} (note: the user does need to include the quotes). Setting the start date to \texttt{"}\texttt{"} will indicate to the program to ask for the earliest date, setting the end date to \texttt{"}\texttt{"} will ask for the latest available date. \begin{Schunk} \begin{Sinput} @@ -263,7 +357,7 @@ There are occasions where NWIS values are not reported as numbers, instead there %------------------------------------------------------------ -\subsection{USGS Unit Value Retrievals} +\subsection{Unit Values} \label{sec:usgsRT} %------------------------------------------------------------ Any data that are collected at regular time intervals (such as 15-minute or hourly) are known as \texttt{"}Unit Values\texttt{"} - many of these are delivered on a real time basis and very recent data (even less than an hour old in many cases) are available through the function retrieveUnitNWISData. Some of these Unit Values are available for the past several years, and some are only available for a recent time period such as 120 days or a year. Here is an example of a retrieval of such data. @@ -315,10 +409,10 @@ A simple plotting example is shown in Figure \ref{fig:RT}: %------------------------------------------------------------ -\subsection{USGS Water Quality Retrievals} +\subsection{Water Quality Values} \label{sec:usgsWQP} %------------------------------------------------------------ -To get water quality data from water samples collected at the streamgage (as distinct from unit values collected through some type of automatic monitor) we can use the dataRetrieval package from the water quality data portal: \url{http://www.waterqualitydata.us/}. The raw data are obtained from the function getRawQWData, with the similar input arguments: siteNumber, parameterCd, startDate, endDate, and interactive. The difference is in parameterCd, in this function multiple parameters can be queried using a \texttt{"};\texttt{"} separator, and setting parameterCd to \texttt{"}\texttt{"} will return all of the measured observations. The raw data can be overwelming (as will be demonstrated), a simplified version of the data can be obtained using getQWData. +To get USGS water quality data from water samples collected at the streamgage (as distinct from unit values collected through some type of automatic monitor) we can use the Water Quality Data Portal: \url{http://www.waterqualitydata.us/}. The raw data are obtained from the function getRawQWData, with the similar input arguments: siteNumber, parameterCd, startDate, endDate, and interactive. The difference is in parameterCd, in this function multiple parameters can be queried using a \texttt{"};\texttt{"} separator, and setting parameterCd to \texttt{"}\texttt{"} will return all of the measured observations. The raw data can be overwelming, a simplified version of the data can be obtained using getQWData. \begin{Schunk} @@ -333,9 +427,7 @@ To get water quality data from water samples collected at the streamgage (as dis \end{Sinput} \end{Schunk} -There is a large amount of data returned for each observation. The column names are listed in Appendix 2 (\ref{sec:appendix2WQP}). - -To get a simplified dataframe that contains only datetime, value, and qualifier, use the function getQWData: +There is a large amount of data returned for each observation. The column names are listed in Appendix 2 (\ref{sec:appendix2WQP}). To get a simplified dataframe that contains only datetime, value, and qualifier, use the function getQWData: \begin{Schunk} \begin{Sinput} @@ -372,10 +464,10 @@ An example of plotting the above data (Figure \ref{fig:nitrate}): \end{figure} %------------------------------------------------------------ -\subsection{Other Water Quality Retrievals} +\subsection{STORET Water Quality Retrievals} \label{sec:usgsSTORET} %------------------------------------------------------------ -There are additional data sets available on the Water Quality Portal (\url{http://www.waterqualitydata.us/}). These data sets can be housed in either the STORET or NWIS database. Since STORET does not use USGS parameter codes, a 'characteristic name' must be supplied. The following example retrieves specific conductance from a DNR site in Wisconsin. +There are additional data sets available on the Water Quality Data Portal (\url{http://www.waterqualitydata.us/}). These data sets can be housed in either the STORET (data from EPA) or NWIS database. Since STORET does not use USGS parameter codes, a \texttt{"}characteristic name\texttt{"} must be supplied. The following example retrieves specific conductance from a DNR site in Wisconsin. \begin{Schunk} \begin{Sinput} @@ -396,7 +488,7 @@ There are additional data sets available on the Water Quality Portal (\url{http: %------------------------------------------------------------ -\section{USGS Web Retrieval Examples Structured For Use In The EGRET Package} +\section{Data Retrievals Structured For Use In The EGRET Package} %------------------------------------------------------------ Rather than using the raw data as retrieved by the web, the dataRetrieval package also includes functions that return the data in a structure that has been designed to work with the EGRET R package (\url{https://github.com/USGS-R/EGRET/wiki}). In general, these dataframes may be much more 'R-friendly' than the raw data, and will contain additional date information that allows for efficient data analysis. @@ -405,7 +497,7 @@ In this section, we use 3 dataRetrieval functions to get sufficient data to perf %------------------------------------------------------------ \subsection{INFO Data} %------------------------------------------------------------ -The function to obtain \texttt{"}metadata\texttt{"}, data about the streamgage and measured parameters is getMetaData. This function essentially combines getSiteFileData and getParameterInfo, producing one dataframe called INFO. +The function to obtain metadata, or data about the streamgage and measured parameters is getMetaData. This function combines getSiteFileData and getParameterInfo, producing one dataframe called INFO. \begin{Schunk} \begin{Sinput} @@ -418,14 +510,15 @@ Column names in the INFO dataframe are listed in Appendix 2 (\ref{sec:appendix2I %------------------------------------------------------------ \subsection{Daily Data} %------------------------------------------------------------ -The function to obtain the daily values (discharge in this case) is getDVData. It requires the inputs siteNumber, ParameterCd, StartDate, EndDate, interactive, and convert. Most of these arguments are described in the previous section, however 'convert' is a new argument, the default is TRUE, and it tells the program to convert the values from cubic feet per second (cfs) to cubic meters per second (cms). For EGRET applications do not use this argument (the default is TRUE), EGRET assumes that discharge is always in cubic meters per second. If you don't want this conversion and are not using EGRET, set convert=FALSE in the function call. +The function to obtain the daily values (discharge in this case) is getDVData. It requires the inputs siteNumber, ParameterCd, StartDate, EndDate, interactive, and convert. Most of these arguments are described in the previous section, however \texttt{"}convert\texttt{"} is a new argument (defaults to TRUE), and it tells the program to convert the values from cubic feet per second (cfs) to cubic meters per second (cms). For EGRET applications with NWIS web retrieval, do not use this argument (the default is TRUE), EGRET assumes that discharge is always in cubic meters per second. If you don't want this conversion and are not using EGRET, set convert=FALSE in the function call. \begin{Schunk} \begin{Sinput} > siteNumber <- "01491000" -> parameterCd <- "00631" # Nitrate > startDate <- "1964-01-01" > endDate <- "2013-01-01" +> # This call will get NWIS data that is in cfs, and convert it +> # to cms since we didn't override the default in the convert argument: > Daily <- getDVData(siteNumber, "00060", startDate, endDate,interactive=FALSE) \end{Sinput} \end{Schunk} @@ -433,7 +526,7 @@ The function to obtain the daily values (discharge in this case) is getDVData. Details of the Daily dataframe are listed below: % latex table generated in R 2.15.3 by xtable 1.7-1 package -% Wed Mar 13 17:00:07 2013 +% Thu Mar 14 14:54:30 2013 \begin{tabular}{llll} \hline ColumnName & Type & Description & Units \\ @@ -453,7 +546,7 @@ Date & Date & Date & date \\ \hline \end{tabular}\\* -The code will shift the discharge values to 0.001 times the mean if there are zero values detected in order to perform the logarithm. Columns Q7 and Q30 are 7 and 30 day running averages. +If there are discharge values of zero, the code will add a small constant to all of the daily discharges. This constant is 0.001 times the mean discharge. The code will also report on the number of zero values and the size of the constant. EGRET should only be used if the number of zero values is a very small fraction of the total days in the record (say less than 0.1\% of the days). Columns Q7 and Q30 are the 7 and 30 day running averages for the 7 or 30 days ending on this specific date." %------------------------------------------------------------ \subsection{Sample Data} @@ -470,7 +563,10 @@ The function to obtain sample data from the water quality portal is getSampleDat Details of the Sample dataframe are listed below: % latex table generated in R 2.15.3 by xtable 1.7-1 package -% Wed Mar 13 17:00:08 2013 +% Thu Mar 14 14:54:32 2013 +\begin{table}[!ht] +\centering +\caption{Sample dataframe} \begin{tabular}{llll} \hline ColumnName & Type & Description & Units \\ @@ -487,24 +583,23 @@ Date & Date & Date & date \\ MonthSeq & integer & Number of months since January 1, 1850 & months \\ SinDY & number & Sine of DecYear & numeric \\ CosDY & number & Cosine of DecYear & numeric \\ - Q & number & Discharge ** & cms \\ - LogQ & number & Natural logarithm of flow ** & numeric \\ + Q \footnotemark[1] & number & Discharge & cms \\ + LogQ \footnotemark[1] & number & Natural logarithm of flow & numeric \\ \hline -\end{tabular}\\ -** Flow columns are populated from data in the Daily dataframe after calling the mergeReport function. - - -In a more complex situation, the Sample data frame will combine all of the measured parameters. An example is provided to explain how the values are combined: +\end{tabular} +\end{table}\footnotetext[1]{Flow columns are populated from data in the Daily dataframe after calling the mergeReport function.} %------------------------------------------------------------ -\subsection{Complex Sample Data Example} +\subsection{Censored Data Evaluation} %------------------------------------------------------------ -As an example, let us say that in 2004 and earlier, we computed a total phosphorus (tp) as the sum of dissolved phosphorus (dp) and particulate phosphorus (pp). From 2005 and onward, we have direct measurements of total phosphorus (tp). A small subset of this fictional data looks like this: +In the typical case where none of the data are censored (that is, no values are reported as \texttt{"}less-than\texttt{"} values) the ConcLow = ConcHigh = ConcAve all of which are equal to the reported value and Uncen=0. In the typical form of censoring where a value is reported as less than the reporting limit, then ConcLow = NA, ConcHigh = reporting limit, ConcAve = 0.5 * reporting limit, and Uncen = 1. The next section describes a more complex situation where concentrations are computed as the sum of one or more measured parameters. + +As an example to understand how the dataRetrieval package handles a more complex censoring problem, let us say that in 2004 and earlier, we computed a total phosphorus (tp) as the sum of dissolved phosphorus (dp) and particulate phosphorus (pp). From 2005 and onward, we have direct measurements of total phosphorus (tp). A small subset of this fictional data looks like this: \begin{center} % latex table generated in R 2.15.3 by xtable 1.7-1 package -% Wed Mar 13 17:00:08 2013 +% Thu Mar 14 14:54:32 2013 \begin{tabular}{llrlrlr} \hline cdate & rdp & dp & rpp & pp & rtp & tp \\ @@ -529,12 +624,12 @@ For the more complex example case, let us say dp is reported as <0.01 and pp is \begin{Schunk} \begin{Soutput} Date ConcLow ConcHigh Uncen ConcAve Julian Month Day DecYear MonthSeq -1 2003-02-15 0.52 0.520 1 0.5200 55927 2 46 2003.124 1838 -2 2003-06-30 0.30 0.310 0 0.3050 56062 6 181 2003.493 1842 -3 2004-09-15 NA 0.205 0 0.1025 56505 9 259 2004.706 1857 -4 2005-01-30 0.43 0.430 1 0.4300 56642 1 30 2005.081 1861 -5 2005-05-30 NA 0.050 0 0.0250 56762 5 150 2005.408 1865 -6 2005-10-30 NA 0.020 0 0.0100 56915 10 303 2005.827 1870 +1 2003-02-15 0.520 0.520 1 0.520 55927 2 46 2003.124 1838 +2 2003-06-30 0.310 0.310 1 0.310 56062 6 181 2003.493 1842 +3 2004-09-15 0.205 0.205 1 0.205 56505 9 259 2004.706 1857 +4 2005-01-30 0.430 0.430 1 0.430 56642 1 30 2005.081 1861 +5 2005-05-30 0.050 0.050 1 0.050 56762 5 150 2005.408 1865 +6 2005-10-30 0.020 0.020 1 0.020 56915 10 303 2005.827 1870 SinDY CosDY 1 0.70406552 0.7101350 2 0.04290476 -0.9990792 @@ -545,66 +640,29 @@ For the more complex example case, let us say dp is reported as <0.01 and pp is \end{Soutput} \end{Schunk} -%------------------------------------------------------------ -\subsection{Merge Report} -%------------------------------------------------------------ -Finally, there is a function called mergeReport that will look at both the Daily and Sample dataframe, and populate Q and LogQ columns into the Sample dataframe. The default arguments are Daily and Sample, however if you want to use other similarly structured dataframes, you can specify localDaily or localSample. - -\begin{Schunk} -\begin{Sinput} -> startDate <-'1985-01-01' -> endDate <- '1985-03-31' -> site <- '01594440' -> Daily <- getDVData(site,'00060', startDate, endDate, interactive=FALSE) -> Sample <- getSampleData(site,'01075', startDate, endDate, interactive=FALSE) -> Sample <- mergeReport() -\end{Sinput} -\begin{Soutput} - Discharge Record is 90 days long, which is 0 years - First day of the discharge record is 1985-01-01 and last day is 1985-03-31 - The water quality record has 1 samples - The first sample is from 1985-03-13 and the last sample is from 1985-03-13 - Discharge: Minimum, mean and maximum 2.83 8.41 106 - Concentration: Minimum, mean and maximum 1 1 1 - Percentage of the sample values that are censored is 100 % -\end{Soutput} -\begin{Sinput} -> head(Sample) -\end{Sinput} -\begin{Soutput} - Date ConcLow ConcHigh Uncen ConcAve Julian Month Day DecYear MonthSeq -1 1985-03-13 NA 1 0 0.5 49379 3 72 1985.195 1623 - SinDY CosDY Q LogQ -1 0.9416344 0.3366373 5.2103 1.650637 -\end{Soutput} -\end{Schunk} - - - -\newpage %------------------------------------------------------------ -\section{Ingesting User-Generated Data Files To Structure Them For Use In The EGRET Package} +\subsection{User-Generated Data Files} %------------------------------------------------------------ Aside from retrieving data from the USGS web services, the dataRetrieval package includes functions to generate the Daily and Sample data frame from local files. %------------------------------------------------------------ -\subsection{getDailyDataFromFile} +\subsubsection{getDailyDataFromFile} %------------------------------------------------------------ getDailyDataFromFile will load a user-supplied text file and convert it to the Daily dataframe. The file should have two columns, the first dates, the second values. The dates should be formatted either mm/dd/yyyy or yyyy-mm-dd. Using a 4-digit year is required. This function has the following inputs: filePath, fileName,hasHeader (TRUE/FALSE), separator, qUnit, and interactive (TRUE/FALSE). filePath is a string that defines the path to your file. This can either be a full path, or path relative to your R working directory. The input fileName is a string that defines the file name (including the extension). Text files that contain this sort of data require some sort of a separator, for example, a 'csv' file (comma-separated value) file uses a comma to separate the date and value column. A tab delimited file would use a tab (\texttt{"}\verb@\t@\texttt{"}) rather than the comma (\texttt{"},\texttt{"}). The type of separator you use can be defined in the function call in the \texttt{"}separator\texttt{"} argument, the default is \texttt{"},\texttt{\texttt{"}}. Another function input is a logical variable: hasHeader. The default is TRUE. If your data does not have column names, set this variable to FALSE. -Finally, qUnit is a numeric input that defines the discharge units. Flow from the NWIS web results are typically given in cubic feet per second (qUnit=1), but the EGRET package requires flow to be given in cubic meters per second (qUnit=2). Other allowed values are 10\verb@^@3 cubic feet per second (qUnit=3) and 10\verb@^@3 cubic meters per second (qUnit=4). If you do not want your data to be converted, use qUnit=2. The default is qUnit=1 (assumes flow is in cubic feet per second). +Finally, qUnit is a numeric argument that defines the discharge units used in the input file. The default is qUnit = 1 which assumes discharge is in cubic feet per second. If the discharge in the file is already in cubic meters per second then set qUnit = 2. If it is in some other units (like liters per second or acre-feet per day), the user will have to pre-process the data with a unit conversion that changes it to either cubic feet per second or cubic meters per second. So, if you have a file called \texttt{"}ChoptankRiverFlow.txt\texttt{"} located in a folder called \texttt{"}RData\texttt{"} on the C drive (this is a Window's example), and the file is structured as follows (tab-separated): \begin{verbatim} date Qdaily -10/1/1999 3.029902561 -10/2/1999 2.406931941 -10/3/1999 2.152080324 -10/4/1999 2.152080324 -10/5/1999 3.19980364 -10/6/1999 2.775050944 +10/1/1999 107 +10/2/1999 85 +10/3/1999 76 +10/4/1999 76 +10/5/1999 113 +10/6/1999 98 ... \end{verbatim} @@ -618,7 +676,7 @@ The call to open this file, convert the flow to cubic meters per second, and pop \end{Schunk} %------------------------------------------------------------ -\subsection{getSampleDataFromFile} +\subsubsection{getSampleDataFromFile} %------------------------------------------------------------ Similarly to the previous section, getSampleDataFromFile will import a user-generated file and populate the Sample dataframe. The difference between sample data and flow data is that the code requires a third column that contains a remark code, either blank or \texttt{"}\verb@<@\texttt{"}, which will tell the program that the data was 'left-censored' (or, below the detection limit of the sensor). Therefore, the data is required to be in the form: date, remark, value. If multiple constituents are going to be used, the format can be date, remark\_A, value\_A, remark\_b, value\_b, etc... An example of a comma-delimited file would be: @@ -640,11 +698,63 @@ The call to open this file, and populate the Sample dataframe would be: \end{Sinput} \end{Schunk} +%------------------------------------------------------------ +\subsection{Merge Report} +%------------------------------------------------------------ +Finally, there is a function called mergeReport that will look at both the Daily and Sample dataframe, and populate Q and LogQ columns into the Sample dataframe. The default arguments are Daily and Sample, however if you want to use other similarly structured dataframes, you can specify localDaily or localSample. Once mergeReport has been run, the Sample dataframe will be augumented with the daily discharges for all the days with samples. None of the water quality functions in EGRET will work without first having run the mergeReport function. + + +\begin{Schunk} +\begin{Sinput} +> siteNumber <- "01491000" +> parameterCd <- "00631" # Nitrate +> startDate <- "1964-01-01" +> endDate <- "2013-01-01" +> Daily <- getDVData(siteNumber, "00060", startDate, endDate,interactive=FALSE) +> Sample <- getSampleData(siteNumber,parameterCd, startDate, endDate, interactive=FALSE) +> Sample <- mergeReport() +\end{Sinput} +\begin{Soutput} + Discharge Record is 17899 days long, which is 49 years + First day of the discharge record is 1964-01-01 and last day is 2013-01-01 + The water quality record has 627 samples + The first sample is from 1973-06-04 and the last sample is from 2012-12-18 + Discharge: Minimum, mean and maximum 0.00991 4.02 246 + Concentration: Minimum, mean and maximum 0.05 1.1 2.4 + Percentage of the sample values that are censored is 0.16 % +\end{Soutput} +\begin{Sinput} +> head(Sample) +\end{Sinput} +\begin{Soutput} + Date ConcLow ConcHigh Uncen ConcAve Julian Month Day DecYear MonthSeq +1 1973-06-04 1.30 1.30 1 1.30 45079 6 155 1973.422 1482 +2 1979-09-25 0.52 0.52 1 0.52 47383 9 268 1979.731 1557 +3 1979-10-24 0.62 0.62 1 0.62 47412 10 297 1979.810 1558 +4 1979-12-05 1.40 1.40 1 1.40 47454 12 339 1979.925 1560 +5 1979-12-21 1.20 1.20 1 1.20 47470 12 355 1979.969 1560 +6 1980-01-24 0.84 0.84 1 0.84 47504 1 24 1980.064 1561 + SinDY CosDY Q LogQ +1 0.4699767 -0.8826788 3.256437 1.180634 +2 -0.9927882 -0.1198812 3.398022 1.223193 +3 -0.9295235 0.3687629 3.199804 1.163089 +4 -0.4547551 0.8906165 2.973269 1.089662 +5 -0.1961425 0.9805754 2.944952 1.080093 +6 0.3925740 0.9197204 10.901986 2.388945 +\end{Soutput} +\end{Schunk} + -\newpage +%------------------------------------------------------------ +\subsection{EGRET Plots} +%------------------------------------------------------------ +The EGRET package.... + + +\clearpage \appendix %------------------------------------------------------------ -\section{Appendix 1: Getting Started} +\section{Getting Started in R} \label{sec:appendix1} %------------------------------------------------------------ This section describes the options for downloading and installing the dataRetrieval package. @@ -670,10 +780,11 @@ To see the raw code for a particular code, type the name of the function: > removeDuplicates \end{Sinput} \begin{Soutput} -function(localSample=Sample) { - Sample1 <- localSample[!duplicated(localSample[c("DecYear","ConcHigh")]),] - - return(Sample1) +function (localSample = Sample) +{ + Sample1 <- localSample[!duplicated(localSample[c("DecYear", + "ConcHigh")]), ] + return(Sample1) } <environment: namespace:dataRetrieval> \end{Soutput} @@ -722,17 +833,20 @@ To then open the library, simply type: \end{Sinput} \end{Schunk} +\FloatBarrier %------------------------------------------------------------ -\section{Appendix 2: Columns Names} -%------------------------------------------------------------ +\section{Columns Names} \label{sec:appendix2} +%------------------------------------------------------------ + %------------------------------------------------------------ \subsection{INFO dataframe} -%------------------------------------------------------------ \label{sec:appendix2INFO} +%------------------------------------------------------------ + % latex table generated in R 2.15.3 by xtable 1.7-1 package -% Wed Mar 13 17:00:09 2013 +% Thu Mar 14 14:54:34 2013 \begin{tabular}{l} \hline \hline @@ -780,17 +894,17 @@ agency.cd \\ constitAbbrev \\ \hline \end{tabular} -\\* - +\FloatBarrier %------------------------------------------------------------ \subsection{Water Quality Portal} -%------------------------------------------------------------ \label{sec:appendix2WQP} +%------------------------------------------------------------ + There are 62 columns returned from the water quality portal. % latex table generated in R 2.15.3 by xtable 1.7-1 package -% Wed Mar 13 17:00:09 2013 +% Thu Mar 14 14:54:34 2013 \begin{tabular}{l} \hline \hline @@ -835,10 +949,11 @@ OrganizationIdentifier \\ ResultValueTypeName \\ ResultWeightBasisText \\ \hline -\end{tabular}\\* -\newpage +\end{tabular} +\FloatBarrier + % latex table generated in R 2.15.3 by xtable 1.7-1 package -% Wed Mar 13 17:00:09 2013 +% Thu Mar 14 14:54:34 2013 \begin{tabular}{l} \hline \hline @@ -865,8 +980,87 @@ ResultTimeBasisText \\ DetectionQuantitationLimitMeasure.MeasureUnitCode \\ PreparationStartDate \\ \hline -\end{tabular}\\* -\newpage +\end{tabular} +\clearpage + +%------------------------------------------------------------ +\section{Creating tables in Microsoft from R} +\label{app:createWordTable} +%------------------------------------------------------------ +There are a few steps that are required in order to create a table in a Microsoft product (Excel, Word, Powerpoint, etc.) from an R dataframe. There are actually a variety of methods, one of which is detailed here. The example we will step through here will be to create the following data. + +\begin{Schunk} +\begin{Sinput} +> ChoptankAvailableData <- getDataAvailability(siteNumber) +> ChoptankDailyData <- ChoptankAvailableData["dv" == ChoptankAvailableData$service,] +> ChoptankDailyData <- ChoptankDailyData["00003" == ChoptankDailyData$statCd,] +> pCodeINFO <- getMultipleParameterNames(ChoptankDailyData$parameter_cd, interactive=FALSE) +> ChoptankDailyData <- merge(ChoptankDailyData,pCodeINFO,by="parameter_cd") +> tableData <- with(ChoptankDailyData, + data.frame( + shortName=srsname, + Start=startDate, + End=endDate, + Count=count, + Units=parameter_units) + ) +> tableData +\end{Sinput} +\begin{Soutput} + shortName Start End Count Units +1 Temperature, water 2010-10-01 2012-06-24 575 deg C +2 Stream flow, mean. daily 1948-01-01 2013-03-13 23814 cfs +3 Specific conductance 2010-10-01 2012-06-24 551 uS/cm @25C +4 Suspended sediment concentration (SSC) 1980-10-01 1991-09-30 3651 mg/l +5 Suspended sediment discharge 1980-10-01 1991-09-30 3652 tons/day +\end{Soutput} +\end{Schunk} + +Our goal now is to get the data from the dataframe tableData to a Microsoft Word table. + +First, save the dataframe as a tab delimited file: + +\begin{Schunk} +\begin{Sinput} +> write.table(tableData, file="tableData.tsv",sep="\t", + row.names = FALSE,quote=FALSE) +\end{Sinput} +\end{Schunk} + +This will save a file in your working directory called tableData.tsv. You can see your working directory by typing getwd() in the R console. Opening the file in a general-purpose text editor, you should see the following: + +\begin{verbatim} +shortName Start End Count Units +Temperature, water 2010-10-01 2012-06-24 575 deg C +Stream flow, mean. daily 1948-01-01 2013-03-13 23814 cfs +Specific conductance 2010-10-01 2012-06-24 551 uS/cm @25C +Suspended sediment concentration (SSC) 1980-10-01 1991-09-30 3651 mg/l +Suspended sediment discharge 1980-10-01 1991-09-30 3652 tons/day +\end{verbatim} + +To open this file in Excel: +\begin{enumerate} +\item Open Excel +\item Click on the File tab +\item Click on the Open option +\item Browse to the working directory (as shown in the results of getwd()) +\item Next to the File name text box, change the dropdown type to All Files (*.*) +\item Double click tableData.tsv +\item A text import wizard will open up, in the first window, choose the Delimited radio button if it is not automatically picked, then click on Next. +\item In the second window, click on the Tab delimiter if it is not automatically checked, then click Finished. +\item Use the many formatting tools within Excel to customize the table +\end{enumerate} + +From Excel, it is simple to copy and paste the tables in other Microsoft products. An example using one of the default Excel table formats is here. + +\begin{figure}[ht!] +\centering + \resizebox{0.9\textwidth}{!}{\includegraphics{table1.png}} +\caption{A simple table produced in Microsoft Excel} +\label{overflow} +\end{figure} + +\clearpage %------------------------------------------------------------ % BIBLIO %------------------------------------------------------------ diff --git a/inst/doc/dataRetrieval.toc b/inst/doc/dataRetrieval.toc index b02fa1b0cff01cef3d711359cd0e4f64526307ec..df3eefd5c4fd23a57ddae6aea05b09b51dd9bd27 100644 --- a/inst/doc/dataRetrieval.toc +++ b/inst/doc/dataRetrieval.toc @@ -1,26 +1,30 @@ \select@language {american} \contentsline {section}{\numberline {1}Introduction to dataRetrieval}{2}{section.1} -\contentsline {section}{\numberline {2}General USGS Web Retrieval Examples}{3}{section.2} -\contentsline {subsection}{\numberline {2.1}USGS Web Retrieval Introduction}{3}{subsection.2.1} -\contentsline {subsection}{\numberline {2.2}USGS Site Information Retrievals}{4}{subsection.2.2} -\contentsline {subsection}{\numberline {2.3}USGS Parameter Information Retrievals}{4}{subsection.2.3} -\contentsline {subsection}{\numberline {2.4}USGS Daily Value Retrievals}{5}{subsection.2.4} -\contentsline {subsection}{\numberline {2.5}USGS Unit Value Retrievals}{7}{subsection.2.5} -\contentsline {subsection}{\numberline {2.6}USGS Water Quality Retrievals}{9}{subsection.2.6} -\contentsline {subsection}{\numberline {2.7}Other Water Quality Retrievals}{10}{subsection.2.7} -\contentsline {section}{\numberline {3}USGS Web Retrieval Examples Structured For Use In The EGRET Package}{11}{section.3} -\contentsline {subsection}{\numberline {3.1}INFO Data}{11}{subsection.3.1} -\contentsline {subsection}{\numberline {3.2}Daily Data}{12}{subsection.3.2} -\contentsline {subsection}{\numberline {3.3}Sample Data}{13}{subsection.3.3} -\contentsline {subsection}{\numberline {3.4}Complex Sample Data Example}{13}{subsection.3.4} -\contentsline {subsection}{\numberline {3.5}Merge Report}{15}{subsection.3.5} -\contentsline {section}{\numberline {4}Ingesting User-Generated Data Files To Structure Them For Use In The EGRET Package}{16}{section.4} -\contentsline {subsection}{\numberline {4.1}getDailyDataFromFile}{16}{subsection.4.1} -\contentsline {subsection}{\numberline {4.2}getSampleDataFromFile}{17}{subsection.4.2} -\contentsline {section}{\numberline {A}Appendix 1: Getting Started}{18}{appendix.A} -\contentsline {subsection}{\numberline {A.1}New to R?}{18}{subsection.A.1} -\contentsline {subsection}{\numberline {A.2}R User: Installing dataRetrieval}{18}{subsection.A.2} -\contentsline {subsection}{\numberline {A.3}R Developers: Installing dataRetrieval from gitHub}{19}{subsection.A.3} -\contentsline {section}{\numberline {B}Appendix 2: Columns Names}{21}{appendix.B} -\contentsline {subsection}{\numberline {B.1}INFO dataframe}{21}{subsection.B.1} -\contentsline {subsection}{\numberline {B.2}Water Quality Portal}{22}{subsection.B.2} +\contentsline {section}{\numberline {2}General USGS Web Retrievals}{3}{section.2} +\contentsline {subsection}{\numberline {2.1}Introduction}{3}{subsection.2.1} +\contentsline {subsection}{\numberline {2.2}Site Information}{4}{subsection.2.2} +\contentsline {subsubsection}{\numberline {2.2.1}getSiteFileData}{4}{subsubsection.2.2.1} +\contentsline {subsubsection}{\numberline {2.2.2}getDataAvailability}{5}{subsubsection.2.2.2} +\contentsline {subsection}{\numberline {2.3}Parameter Information}{6}{subsection.2.3} +\contentsline {subsection}{\numberline {2.4}Daily Values}{7}{subsection.2.4} +\contentsline {subsection}{\numberline {2.5}Unit Values}{9}{subsection.2.5} +\contentsline {subsection}{\numberline {2.6}Water Quality Values}{11}{subsection.2.6} +\contentsline {subsection}{\numberline {2.7}STORET Water Quality Retrievals}{13}{subsection.2.7} +\contentsline {section}{\numberline {3}Data Retrievals Structured For Use In The EGRET Package}{13}{section.3} +\contentsline {subsection}{\numberline {3.1}INFO Data}{13}{subsection.3.1} +\contentsline {subsection}{\numberline {3.2}Daily Data}{14}{subsection.3.2} +\contentsline {subsection}{\numberline {3.3}Sample Data}{15}{subsection.3.3} +\contentsline {subsection}{\numberline {3.4}Censored Data Evaluation}{15}{subsection.3.4} +\contentsline {subsection}{\numberline {3.5}User-Generated Data Files}{17}{subsection.3.5} +\contentsline {subsubsection}{\numberline {3.5.1}getDailyDataFromFile}{17}{subsubsection.3.5.1} +\contentsline {subsubsection}{\numberline {3.5.2}getSampleDataFromFile}{18}{subsubsection.3.5.2} +\contentsline {subsection}{\numberline {3.6}Merge Report}{18}{subsection.3.6} +\contentsline {subsection}{\numberline {3.7}EGRET Plots}{19}{subsection.3.7} +\contentsline {section}{\numberline {A}Getting Started in R}{20}{appendix.A} +\contentsline {subsection}{\numberline {A.1}New to R?}{20}{subsection.A.1} +\contentsline {subsection}{\numberline {A.2}R User: Installing dataRetrieval}{20}{subsection.A.2} +\contentsline {subsection}{\numberline {A.3}R Developers: Installing dataRetrieval from gitHub}{21}{subsection.A.3} +\contentsline {section}{\numberline {B}Columns Names}{23}{appendix.B} +\contentsline {subsection}{\numberline {B.1}INFO dataframe}{23}{subsection.B.1} +\contentsline {subsection}{\numberline {B.2}Water Quality Portal}{24}{subsection.B.2} +\contentsline {section}{\numberline {C}Creating tables in Microsoft from R}{26}{appendix.C} diff --git a/inst/doc/table1.png b/inst/doc/table1.png new file mode 100644 index 0000000000000000000000000000000000000000..7749eaea99398767a012bccae4508e94e49362e7 Binary files /dev/null and b/inst/doc/table1.png differ diff --git a/man/checkStartEndDate.Rd b/man/checkStartEndDate.Rd index aa1aa87150fb5d96f1f4f053fd04972bebad9d9e..892f64cca9c0ad84ad4ad691484ae396ed210d2e 100644 --- a/man/checkStartEndDate.Rd +++ b/man/checkStartEndDate.Rd @@ -26,6 +26,6 @@ startDate <- '1985-01-01' endDate <- '1990-01-01' checkStartEndDate(startDate, endDate, interactive = FALSE) } -\keyword{flow} \keyword{WRTDS} +\keyword{flow}