diff --git a/inst/doc/Rplots.pdf b/inst/doc/Rplots.pdf index 621263b66ef1eb5e74710e9ed85e6bd425ff6ced..fae910beddbb0c1420d8e40d77b551a44c7a643e 100644 Binary files a/inst/doc/Rplots.pdf and b/inst/doc/Rplots.pdf differ diff --git a/inst/doc/dataRetrieval-concordance.tex b/inst/doc/dataRetrieval-concordance.tex index 959494902083fe9e0cfc62826276dec1c0892f5d..c7609fa3302d2c2d931302846122e69b4b3d7338 100644 --- a/inst/doc/dataRetrieval-concordance.tex +++ b/inst/doc/dataRetrieval-concordance.tex @@ -5,8 +5,9 @@ 1 3 4 0 1 2 4 1 1 6 4 0 1 1 1 4 3 0 2 1 3 0 1 2 3 1 1 -5 1 9 11 1 1 2 1 % 0 2 1 1 2 1 0 1 4 6 0 2 2 10 0 1 2 3 1 1 5 7 0 1 2 3 1 1 -5 1 9 10 1 1 % 2 1 0 1 2 1 0 2 1 1 3 4 0 1 2 3 1 1 5 37 0 1 2 3 1 1 3 2 0 1 1 7 0 1 2 % -3 1 1 6 8 0 1 2 2 1 1 -4 1 8 8 1 1 3 2 0 1 1 12 0 1 2 10 1 1 2 1 0 3 1 % -1 2 1 1 29 0 1 2 1 10 18 0 1 2 4 1 1 3 2 0 1 1 28 0 1 2 1 10 20 0 1 2 3 % -1 1 2 4 0 1 2 1 5 27 0 1 2 31 1 1 2 1 0 3 1 12 0 1 2 15 1 1 2 1 0 3 1 % -19 0 1 2 19 1 1 3 5 0 1 2 2 1 1 4 6 0 1 2 2 1 1 4 6 0 1 2 3 1 1 2 4 0 1 % -2 6 1 1 2 1 0 1 1 3 0 1 2 1 1 1 2 4 0 1 2 22 1} +3 1 1 6 8 0 1 2 2 1 1 -4 1 8 8 1 1 3 2 0 1 1 12 0 1 2 13 1 1 2 1 0 3 1 % +1 2 1 1 29 0 1 2 2 1 1 10 18 0 1 2 8 1 1 3 2 0 1 1 28 0 1 2 2 1 1 10 20 % +0 1 2 7 1 1 14 12 0 1 2 7 1 1 2 17 0 1 3 7 1 1 2 4 0 1 2 2 1 1 5 27 0 1 % +2 31 1 1 2 1 0 3 1 12 0 1 2 15 1 1 2 1 0 3 1 19 0 1 2 19 1 1 3 5 0 1 2 % +2 1 1 4 6 0 1 2 2 1 1 4 6 0 1 2 3 1 1 2 4 0 1 2 6 1 1 2 1 0 1 1 3 0 1 2 % +1 1 1 2 4 0 1 2 22 1} diff --git a/inst/doc/dataRetrieval-fig1.pdf b/inst/doc/dataRetrieval-fig1.pdf index 7b14512009128caed1019b26888126d6d2b0e5ea..61919310ccb773070876660c1837751912b6ab10 100644 Binary files a/inst/doc/dataRetrieval-fig1.pdf and b/inst/doc/dataRetrieval-fig1.pdf differ diff --git a/inst/doc/dataRetrieval-fig2.pdf b/inst/doc/dataRetrieval-fig2.pdf index fddc2e53cece25256ebbb56c7346f742bf31e574..a17f642fdb300a78de521ff0a29d635b21c8f578 100644 Binary files a/inst/doc/dataRetrieval-fig2.pdf and b/inst/doc/dataRetrieval-fig2.pdf differ diff --git a/inst/doc/dataRetrieval-fig3.pdf b/inst/doc/dataRetrieval-fig3.pdf index f15e2b1f4423c1290cbc82297adc8dbd601cdeea..d1a4520ab2b06dee7b24712ea5840e5094d5954b 100644 Binary files a/inst/doc/dataRetrieval-fig3.pdf and b/inst/doc/dataRetrieval-fig3.pdf differ diff --git a/inst/doc/dataRetrieval.Rnw b/inst/doc/dataRetrieval.Rnw index ceb505171ccf048a418844cc249d12c63ae85b11..efd32c9f106268f650f27f768db3b4ba69764a2c 100644 --- a/inst/doc/dataRetrieval.Rnw +++ b/inst/doc/dataRetrieval.Rnw @@ -404,6 +404,43 @@ data.table <- xtable(DF, print(data.table, caption.placement="top",floating="FALSE",latex.environments=NULL) @ +In a more complex situation, the Sample data frame will combine all of the measured parameters. An example is provided to explain how the values are combined: + +%------------------------------------------------------------ +\subsection{Complex Sample Data Example} +%------------------------------------------------------------ +As an example, let us say that in 2004 and earlier, we computed a total phosphorus (tp) as the sum of dissolved phosphorus (dp) and particulate phosphorus (pp). Form 2005 and onward, we have direct measurements of total phosphorus (tp). A small subset of this fictional data looks like this: + +<<label=exampleComplexQW, echo=FALSE,results=tex>>= +cdate <- c("2003-02-15","2003-06-30","2004-09-15","2005-01-30","2005-05-30","2005-10-30") +rdp <- c("", "<","<","","","") +dp <- c(0.02,0.01,0.005,NA,NA,NA) +rpp <- c("", "","<","","","") +pp <- c(0.5,0.3,0.2,NA,NA,NA) +rtp <- c("","","","","<","<") +tp <- c(NA,NA,NA,0.43,0.05,0.02) + +DF <- data.frame(cdate,rdp,dp,rpp,pp,rtp,tp) + +data.table <- xtable(DF, + caption="Example data") +print(data.table, caption.placement="top",floating="FALSE",latex.environments=NULL) +@ +\\* + +The dataRetrieval package will "add up" all the values in a given row to form the total for that sample. Thus, you only want to enter data that should be added together. For example, we might know the value for dp on 5/30/2005, but we don't want to put it in the table because under the rules of this data set, we are not suppose to add it in to the values in 2005. + +For every sample, the EGRET package requires a pair of numbers to define an interval in which the true value lies (ConcLow and ConcHigh). In a simple non-censored case (the reported value is above the detection limit), ConcLow equals ConcHigh and the interval collapses down to a single point.In a simple censored case, the value might be reported as <0.2, then ConcLow=NA and ConcHigh=0.2. We use NA instead of 0 as a way to elegantly handle future logarithm calculations. + +For the more complex example case, let us say dp is reported as <0.01 and pp is reported as 0.3. We know that the total must be at least 0.3 and could be as much as 0.31. Therefore, ConcLow=0.3 and ConcHigh=0.31. Another case would be if dp is reported as <0.005 and pp is reported <0.2. We know in this case that the true value could be as low as zero, but could be as high as 0.205. Therefore, in this case, ConcLow=NA and ConcHigh=0.205. The Sample dataframe for the example data is therefore: + +<<thirdExample,echo=FALSE>>= +getPreLoadedSampleData(DF) + +@ + + + %------------------------------------------------------------ \subsection{INFO Data} %------------------------------------------------------------ diff --git a/inst/doc/dataRetrieval.log b/inst/doc/dataRetrieval.log index 0d1e8a63bb9cb347182d3b494eac6fd87d92c531..b8833590faa3c5022e267f5bc306e4ca8a4864ab 100644 --- a/inst/doc/dataRetrieval.log +++ b/inst/doc/dataRetrieval.log @@ -1,4 +1,4 @@ -This is pdfTeX, Version 3.1415926-2.3-1.40.12 (MiKTeX 2.9) (preloaded format=pdflatex 2012.1.6) 5 FEB 2013 15:37 +This is pdfTeX, Version 3.1415926-2.3-1.40.12 (MiKTeX 2.9) (preloaded format=pdflatex 2012.1.6) 14 FEB 2013 17:09 entering extended mode **dataRetrieval.tex (D:\LADData\RCode\dataRetrieval\inst\doc\dataRetrieval.tex @@ -439,15 +439,14 @@ LaTeX Info: Redefining \nameref on input line 42. \makeMPintoPDFobject=\count124 \everyMPtoPDFconversion=\toks21 ) (D:\LADData\RCode\dataRetrieval\inst\doc\dataRetrieval-concordance.tex) -(D:\LADData\RCode\dataRetrieval\inst\doc\dataRetrieval.toc) -\tf@toc=\write5 - +(D:\LADData\RCode\dataRetrieval\inst\doc\dataRetrieval.toc Overfull \vbox (21.68121pt too high) has occurred while \output is active [] [1 -{C:/Users/ldecicco/AppData/Local/MiKTeX/2.9/pdftex/config/pdftex.map}] +{C:/Users/ldecicco/AppData/Local/MiKTeX/2.9/pdftex/config/pdftex.map}]) +\tf@toc=\write5 LaTeX Font Info: Try loading font information for T1+aett on input line 60. ("C:\Program Files (x86)\MiKTeX 2.9\tex\latex\ae\t1aett.fd" @@ -477,7 +476,7 @@ Overfull \vbox (21.68121pt too high) has occurred while \output is active [] Overfull \vbox (21.68121pt too high) has occurred while \output is active [] -[6] <dataRetrieval-fig1.pdf, id=137, 433.62pt x 289.08pt> +[6] <dataRetrieval-fig1.pdf, id=157, 433.62pt x 289.08pt> File: dataRetrieval-fig1.pdf Graphic file (type pdf) <use dataRetrieval-fig1.pdf> @@ -491,7 +490,7 @@ Overfull \vbox (21.68121pt too high) has occurred while \output is active [] Overfull \vbox (21.68121pt too high) has occurred while \output is active [] -[8] <dataRetrieval-fig2.pdf, id=155, 433.62pt x 289.08pt> +[8] <dataRetrieval-fig2.pdf, id=175, 433.62pt x 289.08pt> File: dataRetrieval-fig2.pdf Graphic file (type pdf) <use dataRetrieval-fig2.pdf> @@ -510,7 +509,7 @@ Overfull \vbox (21.68121pt too high) has occurred while \output is active [] Overfull \vbox (21.68121pt too high) has occurred while \output is active [] -[10] <dataRetrieval-fig3.pdf, id=172, 433.62pt x 289.08pt> +[10] <dataRetrieval-fig3.pdf, id=192, 433.62pt x 289.08pt> File: dataRetrieval-fig3.pdf Graphic file (type pdf) <use dataRetrieval-fig3.pdf> @@ -541,25 +540,35 @@ Overfull \vbox (21.68121pt too high) has occurred while \output is active [] [12] LaTeX Font Info: Font shape `TS1/aett/m/n' in size <10.95> not available -(Font) Font shape `TS1/cmtt/m/n' tried instead on input line 486. +(Font) Font shape `TS1/cmtt/m/n' tried instead on input line 489. -Overfull \vbox (21.68121pt too high) has occurred while \output is active [] +Underfull \hbox (badness 10000) in paragraph at lines 497--515 + [] -[13] -Underfull \hbox (badness 10000) in paragraph at lines 580--608 - [] +Overfull \vbox (21.68121pt too high) has occurred while \output is active [] +[13] Overfull \vbox (21.68121pt too high) has occurred while \output is active [] [14] +Underfull \hbox (badness 10000) in paragraph at lines 589--601 + + [] + + Overfull \vbox (21.68121pt too high) has occurred while \output is active [] [15] +Underfull \hbox (badness 10000) in paragraph at lines 644--672 + + [] + + Overfull \vbox (21.68121pt too high) has occurred while \output is active [] @@ -572,7 +581,15 @@ Overfull \vbox (21.68121pt too high) has occurred while \output is active [] [18] -Overfull \hbox (63.21521pt too wide) in paragraph at lines 742--743 +Overfull \vbox (21.68121pt too high) has occurred while \output is active [] + + +[19] +Overfull \vbox (21.68121pt too high) has occurred while \output is active [] + + +[20] +Overfull \hbox (63.21521pt too wide) in paragraph at lines 806--807 \T1/aer/m/n/10.95 library/2.15/dataRetrieval, and the de-fault for a Mac: /User s/userA/Library/R/2.15/library/dataRetrieval. [] @@ -581,29 +598,29 @@ s/userA/Library/R/2.15/library/dataRetrieval. Overfull \vbox (21.68121pt too high) has occurred while \output is active [] -[19] +[21] Overfull \vbox (21.68121pt too high) has occurred while \output is active [] -[20] -Package atveryend Info: Empty hook `BeforeClearDocument' on input line 790. +[22] +Package atveryend Info: Empty hook `BeforeClearDocument' on input line 854. Overfull \vbox (21.68121pt too high) has occurred while \output is active [] -[21] -Package atveryend Info: Empty hook `AfterLastShipout' on input line 790. +[23] +Package atveryend Info: Empty hook `AfterLastShipout' on input line 854. (D:\LADData\RCode\dataRetrieval\inst\doc\dataRetrieval.aux) -Package atveryend Info: Executing hook `AtVeryEndDocument' on input line 790. -Package atveryend Info: Executing hook `AtEndAfterFileList' on input line 790. +Package atveryend Info: Executing hook `AtVeryEndDocument' on input line 854. +Package atveryend Info: Executing hook `AtEndAfterFileList' on input line 854. Package rerunfilecheck Info: File `dataRetrieval.out' has not changed. -(rerunfilecheck) Checksum: EC7004D55096E88EE19A0D81007FF8E5;1310. +(rerunfilecheck) Checksum: 02DBB41E477B3A8A750BA3946A061AE5;1577. ) Here is how much of TeX's memory you used: - 7390 strings out of 494045 - 105940 string characters out of 3145961 - 191876 words of memory out of 3000000 - 10493 multiletter control sequences out of 15000+200000 + 7400 strings out of 494045 + 106074 string characters out of 3145961 + 191879 words of memory out of 3000000 + 10497 multiletter control sequences out of 15000+200000 40303 words of font info for 83 fonts, out of 3000000 for 9000 715 hyphenation exceptions out of 8191 35i,8n,28p,866b,483s stack positions out of 5000i,500n,10000p,200000b,50000s @@ -621,9 +638,9 @@ cmr7.pfb><C:/Program Files (x86)/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmr8 .pfb><C:/Program Files (x86)/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmti10.p fb><C:/Program Files (x86)/MiKTeX 2.9/fonts/type1/public/amsfonts/cm/cmtt10.pfb > -Output written on dataRetrieval.pdf (21 pages, 290588 bytes). +Output written on dataRetrieval.pdf (23 pages, 297741 bytes). PDF statistics: - 300 PDF objects out of 1000 (max. 8388607) - 49 named destinations out of 1000 (max. 500000) - 156 words of extra memory for PDF output out of 10000 (max. 10000000) + 329 PDF objects out of 1000 (max. 8388607) + 55 named destinations out of 1000 (max. 500000) + 188 words of extra memory for PDF output out of 10000 (max. 10000000) diff --git a/inst/doc/dataRetrieval.pdf b/inst/doc/dataRetrieval.pdf index 50be436e4ea625207daca044b64159f017f2a507..e543c1ea044cec033b46a95124804dae5ad3f332 100644 Binary files a/inst/doc/dataRetrieval.pdf and b/inst/doc/dataRetrieval.pdf differ diff --git a/inst/doc/dataRetrieval.synctex.gz b/inst/doc/dataRetrieval.synctex.gz index 36568d724ad12d0d7576143c2c59ed52ea2f6a8b..b98a5e4bc0405493828df71d860cee0e3eb8d4ce 100644 Binary files a/inst/doc/dataRetrieval.synctex.gz and b/inst/doc/dataRetrieval.synctex.gz differ diff --git a/inst/doc/dataRetrieval.tex b/inst/doc/dataRetrieval.tex index 1e305048004d2f4527970d89198f9de2e4437c36..6847e80296ff17bfe7ecc8cf7b34ee79a2e6df66 100644 --- a/inst/doc/dataRetrieval.tex +++ b/inst/doc/dataRetrieval.tex @@ -79,7 +79,7 @@ Once the site-ID is known, the next required input for USGS data retrievals is t % latex table generated in R 2.15.2 by xtable 1.7-0 package -% Tue Feb 05 15:37:17 2013 +% Thu Feb 14 17:09:15 2013 \begin{table}[ht] \begin{center} \caption{Commonly found USGS Parameter Codes} @@ -102,7 +102,7 @@ For real-time data, the parameter code and site ID will suffice. The USGS store The most common stat codes are: % latex table generated in R 2.15.2 by xtable 1.7-0 package -% Tue Feb 05 15:37:17 2013 +% Thu Feb 14 17:09:16 2013 \begin{table}[ht] \begin{center} \caption{Commonly found USGS Stat Codes} @@ -134,7 +134,7 @@ To obtain all of the available site information, use the getSiteFileData functio % latex table generated in R 2.15.2 by xtable 1.7-0 package -% Tue Feb 05 15:37:18 2013 +% Thu Feb 14 17:09:16 2013 \begin{tabular}{rllll} \hline & ColumnNames & ColumnNames.1 & ColumnNames.2 & ColumnNames.3 \\ @@ -293,12 +293,12 @@ Which produces the following dataframe: \begin{Schunk} \begin{Soutput} agency_cd site_no datetime tz_cd X02_00060 X02_00060_cd -1 USGS 01491000 2013-02-04 00:00:00 EST 175 P -2 USGS 01491000 2013-02-04 00:15:00 EST 175 P -3 USGS 01491000 2013-02-04 00:30:00 EST 178 P -4 USGS 01491000 2013-02-04 00:45:00 EST 178 P -5 USGS 01491000 2013-02-04 01:00:00 EST 178 P -6 USGS 01491000 2013-02-04 01:15:00 EST 178 P +1 USGS 01491000 2013-02-13 00:00:00 EST 209 P +2 USGS 01491000 2013-02-13 00:15:00 EST 209 P +3 USGS 01491000 2013-02-13 00:30:00 EST 211 P +4 USGS 01491000 2013-02-13 00:45:00 EST 209 P +5 USGS 01491000 2013-02-13 01:00:00 EST 206 P +6 USGS 01491000 2013-02-13 01:15:00 EST 206 P \end{Soutput} \end{Schunk} @@ -326,7 +326,7 @@ A simple plotting example is shown in Figure 2: %------------------------------------------------------------ \subsection{USGS Water Quality Retrievals} %------------------------------------------------------------ -Finally, we can use the dataRetrieval package to get water quality data that is available on the water quality data portal: \url{http://www.waterqualitydata.us/}. The raw data us obtained from the function getRawQWData, with the similar input arguments: siteNumber, parameterCd, startDate, endDate, and interactive. The difference is in parameterCd, in this function multiple parameters can be queried using a ";" separator, and setting parameterCd <- "" will return all of the measured observations. The raw data can be overwelming (as will be demonstrated), a simplified version of the data can be obtained using getQWData. +Finally, we can use the dataRetrieval package to get USGS water quality data that is available on the water quality data portal: \url{http://www.waterqualitydata.us/}. The raw data us obtained from the function getRawQWData, with the similar input arguments: siteNumber, parameterCd, startDate, endDate, and interactive. The difference is in parameterCd, in this function multiple parameters can be queried using a ";" separator, and setting parameterCd <- "" will return all of the measured observations. The raw data can be overwelming (as will be demonstrated), a simplified version of the data can be obtained using getQWData. \begin{Schunk} @@ -345,7 +345,7 @@ There is a large amount of data returned for each observation. The column names % latex table generated in R 2.15.2 by xtable 1.7-0 package -% Tue Feb 05 15:37:22 2013 +% Thu Feb 14 17:09:20 2013 \begin{tabular}{rll} \hline & ColumnNames & ColumnNames.1 \\ @@ -393,8 +393,8 @@ To get a simplified dataframe that contains only datetime, value, and qualifier, > names(dissolvedNitrateSimple) \end{Sinput} \begin{Soutput} -[1] "dateTime" "qualifier.00618" "value.00618" "qualifier.71851" -[5] "value.71851" +[1] "dateTime" "qualifier.71851" "value.71851" "qualifier.00618" +[5] "value.00618" \end{Soutput} \end{Schunk} Note that in this dataframe, datatime is imported as Dates (no times are included), and the qualifier is either blank or \verb@"<"@ signifying a censored value. @@ -419,7 +419,7 @@ An example of plotting the above data (Figure 3): \end{figure} %------------------------------------------------------------ -\subsection{Water Quality Retrievals} +\subsection{Other Water Quality Retrievals} %------------------------------------------------------------ Additionally, there are additional data sets available on the Water Quality Portal (\url{http://www.waterqualitydata.us/}). These data sets can be housed in either the STORET or NWIS database. Since STORET does not use USGS parameter codes, a 'characteristic name' must be supplied. The following example retrieves specific conductance from a DNR site in Wisconsin. @@ -448,7 +448,10 @@ Rather than using the raw data as retrieved by the web, the dataRetrieval packag In this section, we use 3 dataRetrieval functions to get sufficient data to perform an EGRET analysis. We will continue analyzing the Choptank River. We will need essentially the same data that was retrieved in the previous section, but we will get the daily discharge values in a dataframe called Daily, the nitrate sample data in a dataframe called Sample, and the data about the station and parameters in a dataframe called INFO. These are the dataframes that were exclusively designed to work with the EGRET R package, however can be very useful for all hydrologic studies. -The funtion to obtain the daily values (discharge in this case) is getDVData. It requires the inputs siteNumber, ParameterCd, StartDate, EndDate, interactive, and convert. Most of these arguments are described in the previous section, however 'convert' is a new argument, it's default is TRUE, and it tells the program to convert the values from cfs to cms. If you don't want this conversion, set convert=FALSE in the function call. +%------------------------------------------------------------ +\subsection{Daily Data} +%------------------------------------------------------------ +The function to obtain the daily values (discharge in this case) is getDVData. It requires the inputs siteNumber, ParameterCd, StartDate, EndDate, interactive, and convert. Most of these arguments are described in the previous section, however 'convert' is a new argument, it's default is TRUE, and it tells the program to convert the values from cfs to cms. If you don't want this conversion, set convert=FALSE in the function call. \begin{Schunk} \begin{Sinput} @@ -487,8 +490,10 @@ The funtion to obtain the daily values (discharge in this case) is getDVData. I \end{Soutput} \end{Schunk} +Details of the Daily dataframe are listed below: + % latex table generated in R 2.15.2 by xtable 1.7-0 package -% Tue Feb 05 15:37:28 2013 +% Thu Feb 14 17:09:24 2013 \begin{tabular}{rlll} \hline & ColumnName & Type & Description \\ @@ -507,8 +512,12 @@ The funtion to obtain the daily values (discharge in this case) is getDVData. I 12 & Q30 & number & 30 running average of Q \\ \hline \end{tabular}\\* + The code will shift the discharge values to 0.001 times the mean if there are zero values detected in order to perform the logarithm. Columns Q7 and Q30 are 7 and 30 day running averages. +%------------------------------------------------------------ +\subsection{Sample Data} +%------------------------------------------------------------ The function to obtain sample data from the water quality portal is getSampleData. The arguments for this function are also siteNumber, ParameterCd, StartDate, EndDate, interactive. These are the same inputs as getRawQWData or getQWData as described in the previous section. \begin{Schunk} @@ -544,8 +553,10 @@ The function to obtain sample data from the water quality portal is getSampleDat \end{Soutput} \end{Schunk} +Details of the Sample dataframe are listed below: + % latex table generated in R 2.15.2 by xtable 1.7-0 package -% Tue Feb 05 15:37:29 2013 +% Thu Feb 14 17:09:25 2013 \begin{tabular}{rlll} \hline & ColumnName & Type & Description \\ @@ -566,7 +577,58 @@ The function to obtain sample data from the water quality portal is getSampleDat 14 & LogQ & number & Natural logarithm of flow \\ \hline \end{tabular} +In a more complex situation, the Sample data frame will combine all of the measured parameters. An example is provided to explain how the values are combined: + +%------------------------------------------------------------ +\subsection{Complex Sample Data Example} +%------------------------------------------------------------ +As an example, let us say that in 2004 and earlier, we computed a total phosphorus (tp) as the sum of dissolved phosphorus (dp) and particulate phosphorus (pp). Form 2005 and onward, we have direct measurements of total phosphorus (tp). A small subset of this fictional data looks like this: + +% latex table generated in R 2.15.2 by xtable 1.7-0 package +% Thu Feb 14 17:09:25 2013 +\begin{tabular}{rllrlrlr} + \hline + & cdate & rdp & dp & rpp & pp & rtp & tp \\ + \hline +1 & 2003-02-15 & & 0.02 & & 0.50 & & \\ + 2 & 2003-06-30 & $<$ & 0.01 & & 0.30 & & \\ + 3 & 2004-09-15 & $<$ & 0.00 & $<$ & 0.20 & & \\ + 4 & 2005-01-30 & & & & & & 0.43 \\ + 5 & 2005-05-30 & & & & & $<$ & 0.05 \\ + 6 & 2005-10-30 & & & & & $<$ & 0.02 \\ + \hline +\end{tabular}\\* +The dataRetrieval package will "add up" all the values in a given row to form the total for that sample. Thus, you only want to enter data that should be added together. For example, we might know the value for dp on 5/30/2005, but we don't want to put it in the table because under the rules of this data set, we are not suppose to add it in to the values in 2005. + +For every sample, the EGRET package requires a pair of numbers to define an interval in which the true value lies (ConcLow and ConcHigh). In a simple non-censored case (the reported value is above the detection limit), ConcLow equals ConcHigh and the interval collapses down to a single point.In a simple censored case, the value might be reported as <0.2, then ConcLow=NA and ConcHigh=0.2. We use NA instead of 0 as a way to elegantly handle future logarithm calculations. + +For the more complex example case, let us say dp is reported as <0.01 and pp is reported as 0.3. We know that the total must be at least 0.3 and could be as much as 0.31. Therefore, ConcLow=0.3 and ConcHigh=0.31. Another case would be if dp is reported as <0.005 and pp is reported <0.2. We know in this case that the true value could be as low as zero, but could be as high as 0.205. Therefore, in this case, ConcLow=NA and ConcHigh=0.205. The Sample dataframe for the example data is therefore: + +\begin{Schunk} +\begin{Soutput} + Date ConcLow ConcHigh Uncen ConcAve Julian Month Day DecYear MonthSeq +1 2003-02-15 0.52 0.520 1 0.5200 55927 2 46 2003.124 1838 +2 2003-06-30 0.30 0.310 0 0.3050 56062 6 181 2003.493 1842 +3 2004-09-15 NA 0.205 0 0.1025 56505 9 259 2004.706 1857 +4 2005-01-30 0.43 0.430 1 0.4300 56642 1 30 2005.081 1861 +5 2005-05-30 NA 0.050 0 0.0250 56762 5 150 2005.408 1865 +6 2005-10-30 NA 0.020 0 0.0100 56915 10 303 2005.827 1870 + SinDY CosDY +1 0.70406552 0.7101350 +2 0.04290476 -0.9990792 +3 -0.96251346 -0.2712339 +4 0.48505985 0.8744810 +5 0.54391895 -0.8391378 +6 -0.88668032 0.4623830 +\end{Soutput} +\end{Schunk} + + + +%------------------------------------------------------------ +\subsection{INFO Data} +%------------------------------------------------------------ The function to obtain "metadata", data about the gage station and measured parameters is getMetaData. This function essentially combines getSiteFileData and getParameterInfo, producing one dataframe called INFO. \begin{Schunk} @@ -575,8 +637,10 @@ The function to obtain "metadata", data about the gage station and measured para \end{Sinput} \end{Schunk} +Column names in the INFO dataframe are listed below: + % latex table generated in R 2.15.2 by xtable 1.7-0 package -% Tue Feb 05 15:37:30 2013 +% Thu Feb 14 17:09:26 2013 \begin{tabular}{rll} \hline & ColumnNames & ColumnNames.1 \\ @@ -660,29 +724,29 @@ Similarly to the previous section, getSampleDataFromFile will import a user-gene \begin{verbatim} cdate;remarkCode;Nitrate -10/7/1999;;1.4 -11/4/1999;<;0.99 -12/3/1999;;1.42 -1/4/2000;;1.59 -2/3/2000;;1.54 +10/7/1999,,1.4 +11/4/1999,<,0.99 +12/3/1999,,1.42 +1/4/2000,,1.59 +2/3/2000,,1.54 ... \end{verbatim} The call to open this file, and populate the Sample dataframe would be: \begin{Schunk} \begin{Sinput} > fileName <- "ChoptankRiverNitrate.csv" -> filePath <- "~/RData/" -> Sample <- getSampleDataFromFile(filePath,fileName,separator=";",interactive=FALSE) +> filePath <- "C:/RData/" +> Sample <- getSampleDataFromFile(filePath,fileName,separator=",",interactive=FALSE) > head(Sample) \end{Sinput} \begin{Soutput} Date ConcLow ConcHigh Uncen ConcAve Julian Month Day DecYear MonthSeq -1 1999-10-07 1.40 1.40 1 1.40 54700 10 280 1999.764 1798 -2 1999-11-04 0.99 0.99 1 0.99 54728 11 308 1999.840 1799 -3 1999-12-03 1.42 1.42 1 1.42 54757 12 337 1999.919 1800 -4 2000-01-04 1.59 1.59 1 1.59 54789 1 4 2000.010 1801 -5 2000-02-03 1.54 1.54 1 1.54 54819 2 34 2000.092 1802 -6 2000-02-15 1.37 1.37 1 1.37 54831 2 46 2000.124 1802 +1 1999-10-07 1.40 1.40 1 1.400 54700 10 280 1999.764 1798 +2 1999-11-04 NA 0.99 0 0.495 54728 11 308 1999.840 1799 +3 1999-12-03 1.42 1.42 1 1.420 54757 12 337 1999.919 1800 +4 2000-01-04 1.59 1.59 1 1.590 54789 1 4 2000.010 1801 +5 2000-02-03 1.54 1.54 1 1.540 54819 2 34 2000.092 1802 +6 2000-02-15 1.37 1.37 1 1.370 54831 2 46 2000.124 1802 SinDY CosDY 1 -0.99631836 0.0857305 2 -0.84377556 0.5366962 diff --git a/inst/doc/dataRetrieval.toc b/inst/doc/dataRetrieval.toc index 6c3e62ce3909b9d84abd03e8be628dc94534bd2f..2c9af635083c7d6250a1ce8983cc725f701b2268 100644 --- a/inst/doc/dataRetrieval.toc +++ b/inst/doc/dataRetrieval.toc @@ -7,12 +7,16 @@ \contentsline {subsection}{\numberline {2.4}USGS Daily Value Retrievals}{5}{subsection.2.4} \contentsline {subsection}{\numberline {2.5}USGS Unit Value Retrievals}{7}{subsection.2.5} \contentsline {subsection}{\numberline {2.6}USGS Water Quality Retrievals}{9}{subsection.2.6} -\contentsline {subsection}{\numberline {2.7}Water Quality Retrievals}{11}{subsection.2.7} +\contentsline {subsection}{\numberline {2.7}Other Water Quality Retrievals}{11}{subsection.2.7} \contentsline {section}{\numberline {3}Polished Data: USGS Web Retrieval Examples}{12}{section.3} -\contentsline {section}{\numberline {4}Retrieving User-Generated Data Files}{16}{section.4} -\contentsline {subsection}{\numberline {4.1}getDailyDataFromFile}{16}{subsection.4.1} -\contentsline {subsection}{\numberline {4.2}getSampleDataFromFile}{17}{subsection.4.2} -\contentsline {section}{\numberline {5}Appendix 1: Getting Started}{19}{section.5} -\contentsline {subsection}{\numberline {5.1}New to R?}{19}{subsection.5.1} -\contentsline {subsection}{\numberline {5.2}R User: Installing dataRetrieval from downloaded binary}{19}{subsection.5.2} -\contentsline {subsection}{\numberline {5.3}R Developers: Installing dataRetrieval from gitHub}{20}{subsection.5.3} +\contentsline {subsection}{\numberline {3.1}Daily Data}{12}{subsection.3.1} +\contentsline {subsection}{\numberline {3.2}Sample Data}{13}{subsection.3.2} +\contentsline {subsection}{\numberline {3.3}Complex Sample Data Example}{15}{subsection.3.3} +\contentsline {subsection}{\numberline {3.4}INFO Data}{16}{subsection.3.4} +\contentsline {section}{\numberline {4}Retrieving User-Generated Data Files}{18}{section.4} +\contentsline {subsection}{\numberline {4.1}getDailyDataFromFile}{18}{subsection.4.1} +\contentsline {subsection}{\numberline {4.2}getSampleDataFromFile}{19}{subsection.4.2} +\contentsline {section}{\numberline {5}Appendix 1: Getting Started}{21}{section.5} +\contentsline {subsection}{\numberline {5.1}New to R?}{21}{subsection.5.1} +\contentsline {subsection}{\numberline {5.2}R User: Installing dataRetrieval from downloaded binary}{21}{subsection.5.2} +\contentsline {subsection}{\numberline {5.3}R Developers: Installing dataRetrieval from gitHub}{22}{subsection.5.3}