From 0d90d8c0dde0ee4ae8c664d6db0ba1cdc2db3c39 Mon Sep 17 00:00:00 2001 From: Laura DeCicco <ldecicco@usgs.gov> Date: Wed, 9 Jul 2014 13:54:40 -0500 Subject: [PATCH] Latest updates to respond to SPN. --- vignettes/dataRetrieval.Rnw | 126 ++++++++++++++++-------------------- 1 file changed, 55 insertions(+), 71 deletions(-) diff --git a/vignettes/dataRetrieval.Rnw b/vignettes/dataRetrieval.Rnw index ebb7c4f4..cebaca8f 100644 --- a/vignettes/dataRetrieval.Rnw +++ b/vignettes/dataRetrieval.Rnw @@ -163,9 +163,7 @@ addSpace <- function(x) ifelse(x != "1", "[5pt]","") \newpage \tableofcontents -% \cleardoublepage \listoffigures -% \cleardoublepage \listoftables \newpage @@ -180,7 +178,8 @@ The dataRetrieval package is designed to retrieve many of the major data types o For information on getting started in R and installing the package, see (\ref{sec:appendix1}): Getting Started. -Quick workflow for major dataRetrieval functions: +A quick workflow for major dataRetrieval functions: + <<workflow, echo=TRUE,eval=FALSE>>= library(dataRetrieval) # Site ID for Choptank River near Greensboro, MD @@ -227,7 +226,7 @@ Not every station will measure all parameters. A short list of commonly measured <<tableParameterCodes, echo=FALSE,results='asis'>>= pCode <- c('00060', '00065', '00010','00045','00400') -shortName <- c("Discharge [ft3/s]","Gage height [ft]","Temperature [C]", "Precipitation [in]", "pH") +shortName <- c("Discharge [ft$^3$/s]","Gage height [ft]","Temperature [C]", "Precipitation [in]", "pH") data.df <- data.frame(pCode, shortName, stringsAsFactors=FALSE) @@ -237,13 +236,14 @@ print(xtable(data.df, caption.placement="top", size = "\\footnotesize", latex.environment=NULL, + sanitize.text.function = function(x) {x}, sanitize.colnames.function = bold.colHeaders, sanitize.rownames.function = addSpace ) @ -A complete list (as of September 25, 2013) is available as data attached to the package. It can be accessed by the following: +A complete list (as of September 25, 2013) is available as data attached to the package. It is accessed by the following: <<tableParameterCodesDataRetrieval>>= library(dataRetrieval) @@ -301,7 +301,7 @@ siteNumber <- "01491000" ChoptankInfo <- getSiteFileData(siteNumber) @ -Pulling out a specific example piece of information, in this case station name can be done as follows: +A specific example piece of information can be retrieved, in this case a station name, as follows: <<siteNames2, echo=TRUE>>= ChoptankInfo$station.nm @@ -331,20 +331,25 @@ ChoptankDailyData <- subset(ChoptankDailyData, @ + <<tablegda, echo=FALSE,results='asis'>>= tableData <- with(ChoptankDailyData, data.frame( srsname=srsname, startDate=as.character(startDate), endDate=as.character(endDate), count=as.character(count), - units=parameter_units) + units=parameter_units, + stringsAsFactors=FALSE) ) +tableData$units[which(tableData$units == "ft3/s")] <- "ft$^3$/s" + print(xtable(tableData,label="tab:gda", - caption="Daily mean data availabile at the Choptank River near Greensboro, MD. Some columns deleted for space considerations."), + caption="Daily mean data availabile at the Choptank River near Greensboro, MD. [Some columns deleted for space considerations]"), caption.placement="top", size = "\\footnotesize", latex.environment=NULL, + sanitize.text.function = function(x) {x}, sanitize.colnames.function = bold.colHeaders, sanitize.rownames.function = addSpace ) @@ -360,6 +365,7 @@ See Section \ref{app:createWordTable} for instructions on converting an R datafr \label{sec:usgsParams} %------------------------------------------------------------ To obtain all of the available information concerning a measured parameter, use the \texttt{getParameterInfo} function: + <<label=getPCodeInfo, echo=TRUE>>= # Using defaults: parameterCd <- "00618" @@ -367,7 +373,8 @@ parameterINFO <- getParameterInfo(parameterCd) colnames(parameterINFO) @ -Pulling out a specific example piece of information, in this case parameter name can be done as follows: +A specific example piece of information, in this case parameter name, can be obtained as follows: + <<siteNames, echo=TRUE>>= parameterINFO$parameter_nm @ @@ -377,9 +384,9 @@ Parameter information is obtained from \url{http://nwis.waterdata.usgs.gov/nwis/ \subsection{Daily Values} \label{sec:usgsDaily} %------------------------------------------------------------ -To obtain daily records of USGS data, use the \texttt{retrieveNWISData} function. The arguments for this function are siteNumber, parameterCd, startDate, endDate, statCd, and a logical (TRUE/FALSE) interactive. There are 2 default arguments: statCd (defaults to \texttt{"}00003\texttt{"}), and interactive (defaults to TRUE). If you want to use the default values, you do not need to list them in the function call. Setting the \texttt{"}interactive\texttt{"} option to TRUE will walk you through the function. It might make more sense to run large batch collections with the interactive option set to FALSE. +To obtain daily records of USGS data, use the \texttt{retrieveNWISData} function. The arguments for this function are siteNumber, parameterCd, startDate, endDate, statCd, and a logical (TRUE/FALSE) interactive. There are 2 default arguments: statCd (defaults to \texttt{"}00003\texttt{"}), and interactive (defaults to TRUE). If you want to use the default values, you do not need to list them in the function call. By setting the \texttt{"}interactive\texttt{"} option to FALSE, the operation of the function will advance automatically. It might make more sense to run large batch collections with the interactive option set to FALSE. -The dates (start and end) need to be in the format \texttt{"}YYYY-MM-DD\texttt{"} (note: the user does need to include the quotes). Setting the start date to \texttt{"}\texttt{"} (no space) will indicate to the program to ask for the earliest date, setting the end date to \texttt{"}\texttt{"} (no space) will ask for the latest available date. +The dates (start and end) must be in the format \texttt{"}YYYY-MM-DD\texttt{"} (note: the user must include the quotes). Setting the start date to \texttt{"}\texttt{"} (no space) will prompt the program to ask for the earliest date, and setting the end date to \texttt{"}\texttt{"} (no space) will prompt for the latest available date. <<label=getNWISDaily, echo=TRUE, eval=TRUE>>= @@ -450,7 +457,7 @@ There are occasions where NWIS values are not reported as numbers, instead there \subsection{Unit Values} \label{sec:usgsRT} %------------------------------------------------------------ -Any data that are collected at regular time intervals (such as 15-minute or hourly) are known as \enquote{unit values}. Many of these are delivered on a real time basis and very recent data (even less than an hour old in many cases) are available through the function \texttt{retrieveUnitNWISData}. Some of these unit values are available for many years, and some are only available for a recent time period such as 120 days. Here is an example of a retrieval of such data. +Any data collected at regular time intervals (such as 15-minute or hourly) are known as \enquote{unit values}. Many of these are delivered on a real time basis and very recent data (even less than an hour old in many cases) are available through the function \texttt{retrieveUnitNWISData}. Some of these unit values are available for many years, and some are only available for a recent time period such as 120 days. Here is an example of a retrieval of such data. <<label=getNWISUnit, echo=TRUE>>= @@ -460,12 +467,14 @@ endDate <- "2012-05-13" dischargeToday <- retrieveUnitNWISData(siteNumber, parameterCd, startDate, endDate) @ -Which produces the following dataframe: + +The retrieval produces the following dataframe: + <<dischargeData, echo=FALSE>>= head(dischargeToday) @ -Note that time now becomes important, so the variable datetime is a POSIXct, and the time zone is included in a separate column. Data is pulled from \url{http://waterservices.usgs.gov/rest/IV-Test-Tool.html}. There are occasions where NWIS values are not reported as numbers, instead a common example is \enquote{Ice}. Any value that cannot be converted to a number will be reported as NA in this package. +Note that time now becomes important, so the variable datetime is a POSIXct, and the time zone is included in a separate column. Data is retrieved from \url{http://waterservices.usgs.gov/rest/IV-Test-Tool.html}. There are occasions where NWIS values are not reported as numbers, instead a common example is \enquote{Ice}. Any value that cannot be converted to a number will be reported as NA in this package. \newpage @@ -476,7 +485,7 @@ Note that time now becomes important, so the variable datetime is a POSIXct, and \subsection{Water Quality Values} \label{sec:usgsWQP} %------------------------------------------------------------ -To get USGS water quality data from water samples collected at the streamgage or other monitoring site (as distinct from unit values collected through some type of automatic monitor) we can use the function \texttt{retrieveNWISqwData}, with the similar input arguments: siteNumber, parameterCd, startDate, endDate, and interactive. The difference is in parameterCd, in this function multiple parameters can be queried using a vector, and setting parameterCd to \texttt{"}\texttt{"} will return all of the measured observations. +To get USGS water quality data from water samples collected at the streamgage or other monitoring site (as distinct from unit values collected through some type of automatic monitor) we can use the function \texttt{retrieveNWISqwData}, with the input arguments: siteNumber, parameterCd, startDate, endDate, and interactive (similar to \texttt{retrieveUnitNWISData} and \texttt{retrieveNWISData}). <<label=getQW, echo=TRUE>>= @@ -523,7 +532,7 @@ There are additional water quality data sets available from the Water Quality Da \subsection{URL Construction} \label{sec:usgsURL} %------------------------------------------------------------ -There may be times when you might be interested in seeing the URL (web address) that was used to obtain the raw data. The \texttt{constructNWISURL} function returns the URL. Aside from input variables that have already been described, there is a new argument \texttt{"}service\texttt{"}. The service argument can be \texttt{"}dv\texttt{"} (daily values), \texttt{"}uv\texttt{"} (unit values), \texttt{"}qw\texttt{"} (NWIS water quality values), or \texttt{"}wqp\texttt{"} (general Water Quality Portal values). +There may be times when you might be interested in seeing the URL (web address) that was used to obtain the raw data. The \texttt{constructNWISURL} function returns the URL. In addition to input variables that have been described, there is a new argument \texttt{"}service\texttt{"}. The service argument can be \texttt{"}dv\texttt{"} (daily values), \texttt{"}uv\texttt{"} (unit values), \texttt{"}qw\texttt{"} (NWIS water quality values), or \texttt{"}wqp\texttt{"} (general Water Quality Portal values). <<label=geturl, echo=TRUE, eval=FALSE>>= @@ -551,7 +560,7 @@ In this section, we use 3 dataRetrieval functions to get sufficient data to perf \subsection{INFO Data} \label{INFOsubsection} %------------------------------------------------------------ -The function to obtain metadata, or data about the streamgage and measured parameters is \texttt{getMetaData}. This function combines \texttt{getSiteFileData} and \texttt{getParameterInfo}, producing one dataframe called INFO. +The \texttt{getMetaData} function obtains metadata, or data about the streamgage and measured parameters. This function combines \texttt{getSiteFileData} and \texttt{getParameterInfo}, producing one dataframe called INFO. <<ThirdExample>>= parameterCd <- "00618" @@ -565,7 +574,7 @@ INFO <-getMetaData(siteNumber,parameterCd, interactive=FALSE) \subsection{Daily Data} \label{Dailysubsection} %------------------------------------------------------------ -The function to obtain the daily values (discharge in this case) is \texttt{getDVData}. It requires the inputs siteNumber, ParameterCd, StartDate, EndDate, interactive, and convert. Most of these arguments are described in the previous section, however \texttt{"}convert\texttt{"} is a new argument (defaults to TRUE). The convert argument tells the program to convert the values from cubic feet per second (ft\textsuperscript{3}/s) to cubic meters per second (m\textsuperscript{3}/s). For EGRET applications with NWIS web retrieval, do not use this argument (the default is TRUE), EGRET assumes that discharge is always stored in units of cubic meters per second. If you don't want this conversion and are not using EGRET, set convert=FALSE in the function call. +The \texttt{getDVData} function retrieves the daily values (discharge in this case). It requires the inputs siteNumber, ParameterCd, StartDate, EndDate, interactive, and convert. Most of these arguments are described in section \ref{sec:genRetrievals}, however \texttt{"}convert\texttt{"} is a new argument (that defaults to TRUE). The convert argument tells the program to convert the values from cubic feet per second (ft\textsuperscript{3}/s) to cubic meters per second (m\textsuperscript{3}/s) as shown in the example Daily data frame in Table \ref{tab:DailyDF1}. For EGRET applications with NWIS web retrieval, do not use this argument (the default is TRUE), EGRET assumes that discharge is always stored in units of cubic meters per second. If you don't want this conversion and are not using EGRET, set convert=FALSE in the function call. <<firstExample>>= siteNumber <- "01491000" @@ -575,7 +584,7 @@ endDate <- "2013-01-01" Daily <- getDVData(siteNumber, "00060", startDate, endDate) @ -Details of the Daily dataframe are listed in Table \ref{tab:DailyDF1}. + <<colNamesDaily, echo=FALSE,results='asis'>>= ColumnName <- c("Date", "Q", "Julian","Month","Day","DecYear","MonthSeq","Qualifier","i","LogQ","Q7","Q30") @@ -594,13 +603,13 @@ print(xtable(DF, caption="Daily dataframe",label="tab:DailyDF1"), sanitize.rownames.function = addSpace ) -# -# wanttex <- xtable(data.frame( label=paste("$m^{-",1:3,"}$",sep=""))) -# print(wanttex,sanitize.text.function=function(str)gsub("_","\\_",str,fixed=TRUE)) + @ -If there are negative discharge values or discharge values of zero, the code will set all of these to zero and then add a small constant to all of the daily discharge values. This constant is 0.001 times the mean discharge. The code will also report on the number of zero and negative values and the size of the constant. EGRET should only be used if the number of zero values is a very small fraction of the total days in the record (say less than 0.1\% of the days), and there are no negative discharge values. Columns Q7 and Q30 are the 7 and 30 day running averages for the 7 or 30 days ending on this specific date. +If discharge values are negative or zero, the code will set all of these values to zero and then add a small constant to all of the daily discharge values. This constant is 0.001 times the mean discharge. The code will also report on the number of zero and negative values and the size of the constant. Use EGRET analysis only if the number of zero values is a very small fraction of the total days in the record (say less than 0.1\% of the days), and there are no negative discharge values. Columns Q7 and Q30 are the 7 and 30 day running averages for the 7 or 30 days ending on this specific date. Table \ref{tab:DailyDF1} lists details of the Daily data frame. + +Notice that the \enquote{Day of the year} column can span from 1 to 366. The 366 accounts for leap years. Every day has a consistent day of the year. This means, February 28\textsuperscript{th} is always the 59\textsuperscript{th} day of the year, Feb. 29\textsuperscript{th} is always the 60\textsuperscript{th} day of the year, and March 1\textsuperscript{st} is always the 61\textsuperscript{st} day of the year whether or not it is a leap year. \FloatBarrier @@ -608,7 +617,7 @@ If there are negative discharge values or discharge values of zero, the code wil \subsection{Sample Data} \label{Samplesubsection} %------------------------------------------------------------ -The function to obtain USGS sample data from the water quality portal is \texttt{getSampleData}. The arguments for this function are also siteNumber, ParameterCd, StartDate, EndDate, interactive. These are the same inputs as \texttt{getRawQWData} or \texttt{getQWData} as described in the previous section. +The \texttt{getSampleData} function retrieves USGS sample data from NWIS. The arguments for this function are also siteNumber, ParameterCd, StartDate, EndDate, interactive. These are the same inputs as \texttt{getRawQWData} or \texttt{getQWData} as described in the previous section. <<secondExample>>= parameterCd <- "00618" @@ -616,7 +625,7 @@ Sample <-getSampleData(siteNumber,parameterCd, startDate, endDate) @ -The function to obtain STORET sample data from the water quality portal is \texttt{getSTORETSampleData}. The arguments for this function are siteNumber, characteristicName, StartDate, EndDate, interactive. Details of the Sample dataframe are listed in Table \ref{tab:SampleDataframe}. +The \texttt{getSTORETSampleData} function retrieves STORET sample data (or other non-NWIS data) from the water quality portal. The arguments for this function are siteNumber, characteristicName, StartDate, EndDate, interactive. Table \ref{tab:SampleDataframe} lists details of the Sample data frame. <<STORET,echo=TRUE,eval=FALSE>>= site <- 'WIDNR_WQX-10032762' @@ -629,37 +638,6 @@ Sample <-getSTORETSampleData(site,characteristicName, \pagebreak - - -% \begin{table}[!ht] -% \begin{minipage}{\linewidth} -% \begin{center} -% \caption{Sample dataframe} -% \begin{tabular}{llll} -% \hline -% ColumnName & Type & Description & Units \\ -% \hline -% Date & Date & Date & date \\ -% ConcLow & number & Lower limit of concentration & mg/L \\ -% ConcHigh & number & Upper limit of concentration & mg/L \\ -% Uncen & integer & Uncensored data (1=true, 0=false) & integer \\ -% ConcAve & number & Average of ConcLow and ConcHigh & mg/L \\ -% Julian & number & Number of days since January 1, 1850 & days \\ -% Month & integer & Month of the year [1-12] & months \\ -% Day & integer & Day of the year [1-366] & days \\ -% DecYear & number & Decimal year & years \\ -% MonthSeq & integer & Number of months since January 1, 1850 & months \\ -% SinDY & number & Sine of DecYear & numeric \\ -% CosDY & number & Cosine of DecYear & numeric \\ -% Q \footnotemark[1] & number & Discharge & m3/s \\ -% LogQ \footnotemark[1] & number & Natural logarithm of discharge & numeric \\ -% \hline -% \end{tabular} -% \end{center} -% \footnotetext[1]{Discharge columns are populated from data in the Daily dataframe after calling the mergeReport function.} -% \end{minipage} -% \end{table} - \begin{table} {\footnotesize \begin{threeparttable}[b] @@ -696,17 +674,20 @@ Sample <-getSTORETSampleData(site,characteristicName, } \end{table} -The next section will talk about summing multiple constituents, including how interval censoring is used. Since the Sample data frame is structured to only contain one constituent, when more than one parameter codes are requested, the \texttt{getSampleData} function will sum the values of each constituent as described below. +Notice that the \enquote{Day of the year} column can span from 1 to 366. The 366 accounts for leap years. Every day has a consistent day of the year. This means, February 28\textsuperscript{th} is always the 59\textsuperscript{th} day of the year, Feb. 29\textsuperscript{th} is always the 60\textsuperscript{th} day of the year, and March 1\textsuperscript{st} is always the 61\textsuperscript{st} day of the year whether or not it is a leap year. + +Section \ref{sec:cenValues} will talk about summing multiple constituents, including how interval censoring is used. Since the Sample data frame is structured to only contain one constituent, when more than one parameter codes are requested, the \texttt{getSampleData} function will sum the values of each constituent as described below. \FloatBarrier %------------------------------------------------------------ \subsection{Censored Values: Summation Explanation} +\label{sec:cenValues} %------------------------------------------------------------ -In the typical case where none of the data are censored (that is, no values are reported as \enquote{less-than} values) the ConcLow = ConcHigh = ConcAve all of which are equal to the reported value and Uncen=1. For the most common type of censoring, where a value is reported as less than the reporting limit, then ConcLow = NA, ConcHigh = reporting limit, ConcAve = 0.5 * reporting limit, and Uncen = 0. +In the typical case where none of the data are censored (that is, no values are reported as \enquote{less-than} values) the ConcLow = ConcHigh = ConcAve and Uncen = 1 are equal to the reported value. For the most common type of censoring, where a value is reported as less than the reporting limit, then ConcLow = NA, ConcHigh = reporting limit, ConcAve = 0.5 * reporting limit, and Uncen = 0. -As an example to understand how the dataRetrieval package handles a more complex censoring problem, let us say that in 2004 and earlier, we computed total phosphorus (tp) as the sum of dissolved phosphorus (dp) and particulate phosphorus (pp). From 2005 and onward, we have direct measurements of total phosphorus (tp). A small subset of this fictional data looks like Table \ref{tab:exampleComplexQW}. +To illustrate how the dataRetrieval package handles a more complex censoring problem, let us say that in 2004 and earlier, we computed total phosphorus (tp) as the sum of dissolved phosphorus (dp) and particulate phosphorus (pp). From 2005 and onward, we have direct measurements of total phosphorus (tp). A small subset of this fictional data looks like Table \ref{tab:exampleComplexQW}. @@ -737,7 +718,7 @@ The dataRetrieval package will \enquote{add up} all the values in a given row to For example, we might know the value for dp on 5/30/2005, but we don't want to put it in the table because under the rules of this data set, we are not supposed to add it in to the values in 2005. -For every sample, the EGRET package requires a pair of numbers to define an interval in which the true value lies (ConcLow and ConcHigh). In a simple non-censored case (the reported value is above the detection limit), ConcLow equals ConcHigh and the interval collapses down to a single point. In a simple censored case, the value might be reported as \verb@<@0.2, then ConcLow=NA and ConcHigh=0.2. We use NA instead of 0 as a way to elegantly handle future logarithm calculations. +For every sample, the EGRET package requires a pair of numbers to define an interval in which the true value lies (ConcLow and ConcHigh). In a simple uncensored case (the reported value is above the detection limit), ConcLow equals ConcHigh and the interval collapses down to a single point. In a simple censored case, the value might be reported as \verb@<@0.2, then ConcLow=NA and ConcHigh=0.2. We use NA instead of 0 as a way to elegantly handle future logarithm calculations. For the more complex example case, let us say dp is reported as \verb@<@0.01 and pp is reported as 0.3. We know that the total must be at least 0.3 and could be as much as 0.31. Therefore, ConcLow=0.3 and ConcHigh=0.31. Another case would be if dp is reported as \verb@<@0.005 and pp is reported \verb@<@0.2. We know in this case that the true value could be as low as zero, but could be as high as 0.205. Therefore, in this case, ConcLow=NA and ConcHigh=0.205. The Sample dataframe for the example data would be: @@ -750,23 +731,25 @@ For the more complex example case, let us say dp is reported as \verb@<@0.01 and Sample @ -The next section will talk about inputting user-generated files. \texttt{getSampleDataFromFile} and \texttt{getSampleData} assume summation with interval censoring inputs, as will be discussed in those sections. +Section \ref{sec:userFiles} discusses inputting user-generated files. The functions \texttt{getSampleDataFromFile} and \texttt{getSampleData} assume summation with interval censoring inputs, and are discussed in sections \ref{sec:DailyFile} and \ref{sec:SampleFile}. \FloatBarrier %------------------------------------------------------------ \subsection{User-Generated Data Files} +\label{sec:userFiles} %------------------------------------------------------------ -Aside from retrieving data from the USGS web services, the dataRetrieval package also includes functions to generate the Daily and Sample data frame from local files. +In addition to retrieving data from the USGS web services, the dataRetrieval package also includes functions to generate the Daily and Sample data frame from local files. %------------------------------------------------------------ \subsubsection{getDailyDataFromFile} +\label{sec:DailyFile} %------------------------------------------------------------ -\texttt{getDailyDataFromFile} will load a user-supplied text file and convert it to the Daily dataframe. The file should have two columns, the first dates, the second values. The dates should be formatted either mm/dd/yyyy or yyyy-mm-dd. Using a 4-digit year is required. This function has the following inputs: filePath, fileName,hasHeader (TRUE/FALSE), separator, qUnit, and interactive (TRUE/FALSE). filePath is a string that defines the path to your file. This can either be a full path, or path relative to your R working directory. The input fileName is a string that defines the file name (including the extension). +The \texttt{getDailyDataFromFile} function will load a user-supplied text file and convert it to the Daily dataframe. The file should have two columns, the first dates, the second values. The dates are formatted either mm/dd/yyyy or yyyy-mm-dd. Using a 4-digit year is required. This function has the following inputs: filePath, fileName,hasHeader (TRUE/FALSE), separator, qUnit, and interactive (TRUE/FALSE). filePath is a string that defines the path to your file, and the string can either be a full path, or path relative to your R working directory. The input fileName is a string that defines the file name (including the extension). -Text files that contain this sort of data require some sort of a separator, for example, a \enquote{csv} file (comma-separated value) file uses a comma to separate the date and value column. A tab delimited file would use a tab (\verb@"\t"@) rather than the comma (\texttt{"},\texttt{"}). The type of separator you use can be defined in the function call in the \texttt{"}separator\texttt{"} argument, the default is \texttt{"},\texttt{"}. Another function input is a logical variable: hasHeader. The default is TRUE. If your data does not have column names, set this variable to FALSE. +Text files that contain this sort of data require some sort of a separator, for example, a \enquote{csv} file (comma-separated value) file uses a comma to separate the date and value column. A tab delimited file would use a tab (\verb@"\t"@) rather than the comma (\texttt{"},\texttt{"}). Define the type of separator you choose to use in the function call in the \texttt{"}separator\texttt{"} argument, the default is \texttt{"},\texttt{"}. Another function input is a logical variable: hasHeader. The default is TRUE. If your data does not have column names, set this variable to FALSE. -Finally, qUnit is a numeric argument that defines the discharge units used in the input file. The default is qUnit = 1 which assumes discharge is in cubic feet per second. If the discharge in the file is already in cubic meters per second then set qUnit = 2. If it is in some other units (like liters per second or acre-feet per day), the user will have to pre-process the data with a unit conversion that changes it to either cubic feet per second or cubic meters per second. +Finally, qUnit is a numeric argument that defines the discharge units used in the input file. The default is qUnit = 1 which assumes discharge is in cubic feet per second. If the discharge in the file is already in cubic meters per second then set qUnit = 2. If it is in some other units (like liters per second or acre-feet per day), the user must pre-process the data with a unit conversion that changes it to either cubic feet per second or cubic meters per second. So, if you have a file called \enquote{ChoptankRiverFlow.txt} located in a folder called \enquote{RData} on the C drive (this is a Windows example), and the file is structured as follows (tab-separated): @@ -798,10 +781,11 @@ Microsoft Excel files can be a bit tricky to import into R directly. The simples %------------------------------------------------------------ \subsubsection{getSampleDataFromFile} +\label{sec:SampleFile} %------------------------------------------------------------ \doublespacing -Similarly to the previous section, \texttt{getSampleDataFromFile} will import a user-generated file and populate the Sample dataframe. The difference between sample data and discharge data is that the code requires a third column that contains a remark code, either blank or \verb@"<"@, which will tell the program that the data was \enquote{left-censored} (or, below the detection limit of the sensor). Therefore, the data is required to be in the form: date, remark, value. An example of a comma-delimited file would be: +The \texttt{getSampleDataFromFile} function will import a user-generated file and populate the Sample dataframe. The difference between sample data and discharge data is that the code requires a third column that contains a remark code, either blank or \verb@"<"@, which will tell the program that the data was \enquote{left-censored} (or, below the detection limit of the sensor). Therefore, the data must be in the form: date, remark, value. An example of a comma-delimited file is: \singlespacing \begin{verbatim} @@ -815,7 +799,7 @@ cdate;remarkCode;Nitrate \end{verbatim} \doublespacing -The call to open this file, and populate the Sample dataframe would be: +The call to open this file, and populate the Sample dataframe is: <<openSample, eval = FALSE>>= fileName <- "ChoptankRiverNitrate.csv" filePath <- "C:/RData/" @@ -823,7 +807,7 @@ Sample <- getSampleDataFromFile(filePath,fileName, separator=",") @ -When multiple constituents are to be summed, the format can be date, remark\_A, value\_A, remark\_b, value\_b, etc... A tab-separated example might look like this, where the columns are remark dissolved phosphate (rdp), dissolved phosphate (dp), remark particulate phosphorus (rpp), particulate phosphorus (pp), remark total phosphate (rtp), and total phosphate (tp): +When multiple constituents are to be summed, the format can be date, remark\_A, value\_A, remark\_b, value\_b, etc... A tab-separated example might look like the file below, where the columns are date, remark dissolved phosphate (rdp), dissolved phosphate (dp), remark particulate phosphorus (rpp), particulate phosphorus (pp), remark total phosphate (rtp), and total phosphate (tp): \singlespacing \begin{verbatim} @@ -870,7 +854,7 @@ head(Sample) %------------------------------------------------------------ \subsection{EGRET Plots} %------------------------------------------------------------ -As has been mentioned, the Daily, Sample, and INFO data frames whose construction is described in Secs. \ref{INFOsubsection} - \ref{Samplesubsection} are specifically formatted to be used with the EGRET package. The EGRET package has powerful modeling capabilities using WRTDS, but also has a variety of graphing and tabular tools to explore the data without using the WRTDS algorithm. See the EGRET vignette, user guide, and/or wiki (\url{https://github.com/USGS-R/EGRET/wiki}) for detailed information. The following figure is an example of one of the plotting functions that can be used directly from the dataRetrieval dataframes. +The Daily, Sample, and INFO data frames (described in Secs. \ref{INFOsubsection} - \ref{Samplesubsection}) are specifically formatted to be used with the EGRET package. The EGRET package has powerful modeling capabilities that use WRTDS, but EGRET also has graphing and tabular tools for exploring the data without using the WRTDS algorithm. See the EGRET vignette, user guide, and/or wiki (\url{https://github.com/USGS-R/EGRET/wiki}) for detailed information. Figure \ref{fig:egretEx} shows one of the plotting functions that can be used directly from the dataRetrieval dataframes. <<egretEx, echo=TRUE, eval=TRUE, fig.cap="Default multiPlotDataOverview">>= # Continuing Choptank example from the previous sections @@ -1056,12 +1040,12 @@ Suspended sediment concentration (SSC) 1980-10-01 1991-09-30 3651 mg/l Suspended sediment discharge 1980-10-01 1991-09-30 3652 tons/day \end{verbatim} -To open this file in Excel: +Next, follow the steps below to open this file in Excel: \begin{enumerate} \item Open Excel \item Click on the File tab \item Click on the Open option -\item Browse to the working directory (as shown in the results of getwd()) +\item Navigate to the working directory (as shown in the results of getwd()) \item Next to the File name text box, change the dropdown type to All Files (*.*) \item Double click tableData.tsv \item A text import wizard will open up, in the first window, choose the Delimited radio button if it is not automatically picked, then click on Next. -- GitLab