diff --git a/NAMESPACE b/NAMESPACE index 673861ca65f78660da4ba26d94a1e3fc2e68b048..ddd6a5416a3a1a58231b89cd3a8b3f51495e1a03 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -6,10 +6,10 @@ export(dateFormatCheck) export(formatCheckDate) export(formatCheckParameterCd) export(formatCheckSiteNumber) +export(getDVData) export(getDailyDataFromFile) export(getDataAvailability) export(getDataFromFile) -export(getDVData) export(getMetaData) export(getMultipleParameterNames) export(getParameterInfo) @@ -18,14 +18,14 @@ export(getPreLoadedData) export(getPreLoadedSampleData) export(getQWData) export(getQWDataFromFile) -export(getRawQWData) export(getRDB1Data) +export(getRawQWData) +export(getSTORETSampleData) export(getSampleData) export(getSampleDataFromFile) export(getSiteFileData) -export(getSTORETSampleData) -export(getWaterML1Data) export(getWQPData) +export(getWaterML1Data) export(mergeReport) export(padVariable) export(populateConcentrations) diff --git a/inst/doc/dataRetrieval.pdf b/inst/doc/dataRetrieval.pdf index d6db7f3751bec57cc99ef3e24ef38b8e8923e57e..7eb6feaac178cf04d68409d085b4165d0943d9d6 100644 Binary files a/inst/doc/dataRetrieval.pdf and b/inst/doc/dataRetrieval.pdf differ diff --git a/man/checkStartEndDate.Rd b/man/checkStartEndDate.Rd index aa1aa87150fb5d96f1f4f053fd04972bebad9d9e..892f64cca9c0ad84ad4ad691484ae396ed210d2e 100644 --- a/man/checkStartEndDate.Rd +++ b/man/checkStartEndDate.Rd @@ -26,6 +26,6 @@ startDate <- '1985-01-01' endDate <- '1990-01-01' checkStartEndDate(startDate, endDate, interactive = FALSE) } -\keyword{flow} \keyword{WRTDS} +\keyword{flow} diff --git a/man/compressData.Rd b/man/compressData.Rd index 17d40a4d71f6d04dd298450d5fc90f0746fd515e..92d9134d44d9369c41a427c163d614c96601a8d5 100644 --- a/man/compressData.Rd +++ b/man/compressData.Rd @@ -36,6 +36,6 @@ value3 <- c(3,4,5) dataInput <- data.frame(dateTime, comment1, value1, comment2, value2, comment3, value3, stringsAsFactors=FALSE) compressData(dataInput, interactive=FALSE) } -\keyword{flow} \keyword{WRTDS} +\keyword{flow} diff --git a/man/constructNWISURL.Rd b/man/constructNWISURL.Rd index 33fd4f50b6a1f2c3ba47c4b873c2b0971000bed9..d27f4d78cc64771dabe636f541ce83bf84e2d358 100644 --- a/man/constructNWISURL.Rd +++ b/man/constructNWISURL.Rd @@ -67,9 +67,9 @@ url_qw <- constructNWISURL(siteNumber,c('01075','00029','00453'),startDate,endDa url_wqp <- constructNWISURL(siteNumber,c('01075','00029','00453'),startDate,endDate,'wqp') url_daily_tsv <- constructNWISURL(siteNumber,pCode,startDate,endDate,'dv',statCd=c("00003","00001"),format="tsv") } +\keyword{USGS} \keyword{data} \keyword{import} \keyword{service} -\keyword{USGS} \keyword{web} diff --git a/man/dataOverview.Rd b/man/dataOverview.Rd index c99da9b50f9d0142972deef2188cd549ca498cfd..6fe56a3ac2930bf546840bcbdb75fea354dadd3f 100644 --- a/man/dataOverview.Rd +++ b/man/dataOverview.Rd @@ -21,8 +21,8 @@ dataOverview(localDaily = exDaily, localSample = exSample) \seealso{ \code{\link{mergeReport}} } -\keyword{data} -\keyword{import} \keyword{USGS} \keyword{WRTDS} +\keyword{data} +\keyword{import} diff --git a/man/dateFormatCheck.Rd b/man/dateFormatCheck.Rd index 99532817221093d2a64e3ce0e6ac8e3e3aabd07b..dd75fee619a69001adad54bba538e9d5980ee496 100644 --- a/man/dateFormatCheck.Rd +++ b/man/dateFormatCheck.Rd @@ -18,6 +18,6 @@ date <- '1985-01-01' dateFormatCheck(date) } -\keyword{flow} \keyword{WRTDS} +\keyword{flow} diff --git a/man/formatCheckDate.Rd b/man/formatCheckDate.Rd index 0bf8637101b5ac2a8f016bcc8faccdc3edac4dfe..64c54b65061785ecf5741d3f1c4eac8c9ac3af0d 100644 --- a/man/formatCheckDate.Rd +++ b/man/formatCheckDate.Rd @@ -27,6 +27,6 @@ Date <- '1985-01-01' dateString <- 'StartDate' formatCheckDate(Date, dateString, interactive = FALSE) } -\keyword{flow} \keyword{WRTDS} +\keyword{flow} diff --git a/man/formatCheckParameterCd.Rd b/man/formatCheckParameterCd.Rd index 086bea0aa058edc05bb1775bf7025433c33b96ee..d1574546ed4940e46494b4c4bbd46bacce0da90e 100644 --- a/man/formatCheckParameterCd.Rd +++ b/man/formatCheckParameterCd.Rd @@ -23,6 +23,6 @@ pCode <- '01234' formatCheckParameterCd(pCode, interactive = FALSE) } -\keyword{flow} \keyword{WRTDS} +\keyword{flow} diff --git a/man/formatCheckSiteNumber.Rd b/man/formatCheckSiteNumber.Rd index 60a912a66d584e466306e309483b57e7a1f38d54..5d8b42cd9e9cb3a8d96bdf4d70ac01ec0fbe8a03 100644 --- a/man/formatCheckSiteNumber.Rd +++ b/man/formatCheckSiteNumber.Rd @@ -22,6 +22,6 @@ site<- '01234567' formatCheckSiteNumber(site, interactive = FALSE) } -\keyword{flow} \keyword{WRTDS} +\keyword{flow} diff --git a/man/getDVData.Rd b/man/getDVData.Rd index 628b925997ac7b8ac0dd502b0cd41c6525d0525a..89c721c68e85843950c7fb0fe53685004d4a2264 100644 --- a/man/getDVData.Rd +++ b/man/getDVData.Rd @@ -57,8 +57,8 @@ Daily <- getDVData('01594440','00060', '1985-01-01', '1985-03-31', interactive=F \code{\link{retrieveNWISData}}, \code{\link{populateDaily}} } -\keyword{data} -\keyword{import} \keyword{USGS} \keyword{WRTDS} +\keyword{data} +\keyword{import} diff --git a/man/getDailyDataFromFile.Rd b/man/getDailyDataFromFile.Rd index 95f1ece41ac37dce3485e090992a4e76fbc70cc5..2c8ded37b9a18f130b42c18ecc9f4d3787a3c696 100644 --- a/man/getDailyDataFromFile.Rd +++ b/man/getDailyDataFromFile.Rd @@ -40,9 +40,9 @@ filePath <- '~/RData/' # Sample format fileName <- 'ChoptankRiverFlow.txt' \dontrun{getDailyDataFromFile(filePath,fileName,separator="\\t")} } +\keyword{USGS} +\keyword{WRTDS} \keyword{data} \keyword{file} \keyword{import} -\keyword{USGS} -\keyword{WRTDS} diff --git a/man/getDataAvailability.Rd b/man/getDataAvailability.Rd index fd9d46033e9e1e6165f538fcf14a54ab1c79bf69..06dbf8e02217726e0d4a9cc9e24f837a4ac1eb38 100644 --- a/man/getDataAvailability.Rd +++ b/man/getDataAvailability.Rd @@ -31,9 +31,9 @@ # These examples require an internet connection to run availableData <- getDataAvailability('05114000',interactive=FALSE) } +\keyword{USGS} \keyword{data} \keyword{import} \keyword{service} -\keyword{USGS} \keyword{web} diff --git a/man/getMetaData.Rd b/man/getMetaData.Rd index 77a11bcff1f726b0ecde6225df4d5f1b3b21a8fe..fb57f7d818c177f9756645238dada84569867e04 100644 --- a/man/getMetaData.Rd +++ b/man/getMetaData.Rd @@ -39,10 +39,10 @@ # Automatically gets information about site 05114000 and temperature, no interaction with user INFO <- getMetaData('05114000','00010',interactive=FALSE) } +\keyword{USGS} +\keyword{WRTDS} \keyword{data} \keyword{import} \keyword{service} -\keyword{USGS} \keyword{web} -\keyword{WRTDS} diff --git a/man/getMultipleParameterNames.Rd b/man/getMultipleParameterNames.Rd index 3177e1b6c5b0b27172ec344fff2391cee52d5489..dc266131310a138046da77d2b30e5bf2c1c45482 100644 --- a/man/getMultipleParameterNames.Rd +++ b/man/getMultipleParameterNames.Rd @@ -28,9 +28,9 @@ # These examples require an internet connection to run availableData <- getMultipleParameterNames(c("00060", "00065", "00010"),interactive=FALSE) } +\keyword{USGS} \keyword{data} \keyword{import} \keyword{service} -\keyword{USGS} \keyword{web} diff --git a/man/getParameterInfo.Rd b/man/getParameterInfo.Rd index 6344c8980314b462fbeb13a1be1a227e0e06a434..fb9df67602ba06d11c33ae51c97619337e355980 100644 --- a/man/getParameterInfo.Rd +++ b/man/getParameterInfo.Rd @@ -28,9 +28,9 @@ paramINFO <- getParameterInfo('01075') paramINFO2 <- getParameterInfo('00931',interactive=FALSE) } +\keyword{USGS} \keyword{data} \keyword{import} \keyword{service} -\keyword{USGS} \keyword{web} diff --git a/man/getPreLoadedDailyData.Rd b/man/getPreLoadedDailyData.Rd index a39fbbe6d2b72f71d83137208ac0c65157e98722..8be9941cee8a1e22b9c42a2f1e1783955a01990b 100644 --- a/man/getPreLoadedDailyData.Rd +++ b/man/getPreLoadedDailyData.Rd @@ -28,7 +28,7 @@ \examples{ Daily <- getPreLoadedDailyData(ChoptankRiverFlow, interactive=FALSE) } +\keyword{WRTDS} \keyword{data} \keyword{import} -\keyword{WRTDS} diff --git a/man/getPreLoadedSampleData.Rd b/man/getPreLoadedSampleData.Rd index 50708517441fb2c28f93936156fd4fd6c024f9c7..1e8834a400df76773c56f0d66f2f281f4bffabf2 100644 --- a/man/getPreLoadedSampleData.Rd +++ b/man/getPreLoadedSampleData.Rd @@ -23,7 +23,7 @@ \examples{ Sample <- getPreLoadedSampleData(ChoptankRiverNitrate, interactive=FALSE) } +\keyword{WRTDS} \keyword{data} \keyword{import} -\keyword{WRTDS} diff --git a/man/getQWData.Rd b/man/getQWData.Rd index f38d6a2322478fb8c29957201f772fe36d05034b..8348de8d5e2cd018928e1498e8d0ce37842488d7 100644 --- a/man/getQWData.Rd +++ b/man/getQWData.Rd @@ -46,9 +46,9 @@ rawProcessedSample <- getQWData('01594440','01075', '1985-01-01', '1985-03-31') rawProcessedSampleAll <- getQWData('05114000','', '1985-01-01', '1985-03-31') rawProcessedSampleSelect <- getQWData('05114000','00915;00931', '1985-01-01', '1985-04-30', interactive=FALSE) } +\keyword{USGS} \keyword{data} \keyword{import} \keyword{service} -\keyword{USGS} \keyword{web} diff --git a/man/getRawQWData.Rd b/man/getRawQWData.Rd index a77fc37ffda4adb1e1bc2c75b3a3a5b26ab57a98..738023e62faec1f5e19e3f7973eb04b875e5d7d0 100644 --- a/man/getRawQWData.Rd +++ b/man/getRawQWData.Rd @@ -45,9 +45,9 @@ rawSample <- getRawQWData('01594440','01075', '1985-01-01', '1985-03-31') rawSampleAll <- getRawQWData('05114000','', '1985-01-01', '1985-03-31') rawSampleSelect <- getRawQWData('05114000',c('00915','00931'), '1985-01-01', '1985-04-30', interactive=FALSE) } +\keyword{USGS} \keyword{data} \keyword{import} \keyword{service} -\keyword{USGS} \keyword{web} diff --git a/man/getSTORETSampleData.Rd b/man/getSTORETSampleData.Rd index bf115a32d65fe766b3623eb8288a6201789f0691..a1fc936e537dcb5235c44d9582da57eb0dad1fa3 100644 --- a/man/getSTORETSampleData.Rd +++ b/man/getSTORETSampleData.Rd @@ -44,8 +44,8 @@ Sample_All <- getSTORETSampleData('WIDNR_WQX-10032762','Specific conductance', ' \code{\link{compressData}}, \code{\link{populateSampleColumns}} } -\keyword{data} -\keyword{import} \keyword{USGS} \keyword{WRTDS} +\keyword{data} +\keyword{import} diff --git a/man/getSampleData.Rd b/man/getSampleData.Rd index 782b13c6c3bf602196a178982319a1be56c4f766..f8fc6db8dd5b2a7f164977fe2e1c287b8eefcb84 100644 --- a/man/getSampleData.Rd +++ b/man/getSampleData.Rd @@ -46,8 +46,8 @@ Sample_Select <- getSampleData('05114000','00915;00931', '', '', interactive=FAL \code{\link{compressData}}, \code{\link{populateSampleColumns}} } -\keyword{data} -\keyword{import} \keyword{USGS} \keyword{WRTDS} +\keyword{data} +\keyword{import} diff --git a/man/getSampleDataFromFile.Rd b/man/getSampleDataFromFile.Rd index 34a0e31df6f8ab4745627cbc5afc3f4ea08c175f..f295b54c36bbffe76ecd4934edde2f67b6531f51 100644 --- a/man/getSampleDataFromFile.Rd +++ b/man/getSampleDataFromFile.Rd @@ -35,9 +35,9 @@ filePath <- '~/RData/' # Sample format fileName <- 'ChoptankRiverNitrate.csv' #Sample <- getSampleDataFromFile(filePath,fileName, separator=";",interactive=FALSE) } +\keyword{USGS} +\keyword{WRTDS} \keyword{data} \keyword{file} \keyword{import} -\keyword{USGS} -\keyword{WRTDS} diff --git a/man/getSiteFileData.Rd b/man/getSiteFileData.Rd index 53291b62e542ee7599124ff7866c16c95ed96e9a..37a7d14a54aa31799bcb74be2ebf6e8137323eac 100644 --- a/man/getSiteFileData.Rd +++ b/man/getSiteFileData.Rd @@ -24,9 +24,9 @@ # These examples require an internet connection to run siteINFO <- getSiteFileData('05114000',interactive=FALSE) } +\keyword{USGS} \keyword{data} \keyword{import} \keyword{service} -\keyword{USGS} \keyword{web} diff --git a/man/getWQPData.Rd b/man/getWQPData.Rd index 3f5c8e143044f445ad8c2d9c19f888cfaba54b7e..a3b275d477aa664bc684d798eb3b33d64ddcf9f5 100644 --- a/man/getWQPData.Rd +++ b/man/getWQPData.Rd @@ -43,9 +43,9 @@ getWQPData('USGS-01594440','Chloride', '', '') getWQPData('WIDNR_WQX-10032762','Specific conductance', '', '') } +\keyword{WQP} \keyword{data} \keyword{import} \keyword{service} \keyword{web} -\keyword{WQP} diff --git a/man/mergeReport.Rd b/man/mergeReport.Rd index 800f34ba5ab6684f1af1975045caad09cf96f825..5efff9d3878cddc49ca7a82527a2ece4d08a4f52 100644 --- a/man/mergeReport.Rd +++ b/man/mergeReport.Rd @@ -33,8 +33,8 @@ Sample <- mergeReport(interactive=FALSE) \code{\link{getDVData}}, \code{\link{populateSampleColumns}} } -\keyword{data} -\keyword{import} \keyword{USGS} \keyword{WRTDS} +\keyword{data} +\keyword{import} diff --git a/man/padVariable.Rd b/man/padVariable.Rd index 90a166777c47ab504c3c3adc55d3c3197f6b284b..cf3aee545f2616faa1b17dcc20135c43ea3b97f8 100644 --- a/man/padVariable.Rd +++ b/man/padVariable.Rd @@ -20,9 +20,9 @@ pCode <- '10' correctPCode <- padVariable(pCode,5) } +\keyword{USGS} \keyword{data} \keyword{import} \keyword{service} -\keyword{USGS} \keyword{web} diff --git a/man/populateDaily.Rd b/man/populateDaily.Rd index e59aafb215c75061b4ac028d2c5a414956f109af..5722f563c0cd546c3c1cb92f01026ffb4690c6a3 100644 --- a/man/populateDaily.Rd +++ b/man/populateDaily.Rd @@ -33,6 +33,6 @@ Daily <- populateDaily(dataInput, 2, interactive=FALSE) \author{ Robert M. Hirsch \email{rhirsch@usgs.gov} } -\keyword{flow} \keyword{WRTDS} +\keyword{flow} diff --git a/man/processQWData.Rd b/man/processQWData.Rd index d5cc07572769a86d41ddf0ae9eb1f8082b06b5f6..660c8bb571bcef093ad463a30d4d779727da1c13 100644 --- a/man/processQWData.Rd +++ b/man/processQWData.Rd @@ -25,9 +25,9 @@ rawSample <- getRawQWData('01594440','01075', '1985-01-01', '1985-03-31') rawSampleSelect <- processQWData(rawSample) } +\keyword{USGS} \keyword{data} \keyword{import} \keyword{service} -\keyword{USGS} \keyword{web} diff --git a/man/retrieveNWISData.Rd b/man/retrieveNWISData.Rd index b89cdc3f54ea94944af390a7bc25324665d843d3..8d7ce45c0360d221dbd8763fce7246cdd8c34e95 100644 --- a/man/retrieveNWISData.Rd +++ b/man/retrieveNWISData.Rd @@ -60,9 +60,9 @@ rawDailyTemperature <- retrieveNWISData(siteNumber,'00010', startDate, endDate, rawDailyTemperatureTSV <- retrieveNWISData(siteNumber,'00010', startDate, endDate, StatCd='00001',format="tsv",interactive=FALSE) rawDailyQAndTempMeanMax <- retrieveNWISData(siteNumber,c('00010','00060'), startDate, endDate, StatCd=c('00001','00003'), interactive=FALSE) } +\keyword{USGS} \keyword{data} \keyword{import} \keyword{service} -\keyword{USGS} \keyword{web} diff --git a/man/retrieveNWISqwData.Rd b/man/retrieveNWISqwData.Rd index 4e5a169e402d4b4af02344a248be66f16508e587..254045c748b2520c03b207a717b6a2034639f995 100644 --- a/man/retrieveNWISqwData.Rd +++ b/man/retrieveNWISqwData.Rd @@ -44,9 +44,9 @@ data$dateTime <- as.Date(data$dateTime) compressedData <- compressData(data, interactive=interactive) Sample <- populateSampleColumns(compressedData) } +\keyword{USGS} \keyword{data} \keyword{import} \keyword{service} -\keyword{USGS} \keyword{web} diff --git a/man/retrieveUnitNWISData.Rd b/man/retrieveUnitNWISData.Rd index 5109fd5c8445500f8d94be6fc46c11ef87c2bbe2..f69c1294191c97249a6426057aab6a3daa315659 100644 --- a/man/retrieveUnitNWISData.Rd +++ b/man/retrieveUnitNWISData.Rd @@ -54,9 +54,9 @@ EndDate <- as.character(Sys.Date()) rawData <- retrieveUnitNWISData(siteNumber,ParameterCd,StartDate,EndDate,interactive=FALSE) rawData2 <- retrieveUnitNWISData(siteNumber,ParameterCd,StartDate,EndDate,"tsv",interactive=FALSE) } +\keyword{USGS} \keyword{data} \keyword{import} \keyword{service} -\keyword{USGS} \keyword{web} diff --git a/vignettes/dataRetrieval-concordance.tex b/vignettes/dataRetrieval-concordance.tex index 1c4366a41dd226833944f61227c270b445d18987..fe9fdbd77dd8e9cc51ed0ba557cb0cd2b2a7c46d 100644 --- a/vignettes/dataRetrieval-concordance.tex +++ b/vignettes/dataRetrieval-concordance.tex @@ -1,9 +1,9 @@ \Sconcordance{concordance:dataRetrieval.tex:dataRetrieval.Rnw:% 1 49 1 55 0 1 6 11 1 1 5 41 1 10 0 16 1 9 0 21 1 5 0 % -6 1 8 0 14 1 14 0 24 1 11 0 17 1 20 0 15 1 10 0 5 1 8 % -0 20 1 5 0 16 1 4 0 21 1 10 0 20 1 5 0 4 1 18 0 13 1 % -9 0 26 1 9 0 10 1 10 0 14 1 21 0 19 1 5 0 19 1 5 0 17 % -1 8 0 14 1 15 0 16 1 5 0 9 1 5 0 62 1 6 0 14 1 17 0 % -36 1 5 0 24 1 5 0 20 1 38 0 13 1 10 0 22 1 5 0 5 1 13 % -0 10 1 5 0 7 1 5 0 16 1 51 0 15 1 49 0 7 1 32 0 26 1 % -25 0 8 1 5 0 56 1} +6 1 8 0 14 1 14 0 24 1 11 0 15 1 6 0 16 1 10 0 5 1 8 % +0 20 1 5 0 16 1 4 0 21 1 10 0 20 1 5 0 4 1 18 0 29 1 % +9 0 10 1 10 0 14 1 21 0 19 1 5 0 19 1 5 0 17 1 8 0 14 % +1 15 0 16 1 5 0 9 1 5 0 62 1 6 0 14 1 17 0 36 1 5 0 % +24 1 5 0 20 1 38 0 13 1 10 0 22 1 5 0 5 1 14 0 10 1 5 % +0 7 1 5 0 16 1 51 0 15 1 49 0 7 1 32 0 26 1 25 0 8 1 % +5 0 56 1} diff --git a/vignettes/dataRetrieval.Rnw b/vignettes/dataRetrieval.Rnw index 6152ee378a54791bbd614cf583cbbd869887859d..2169659dd6ab108f52b88574bda6c689cf7cbcf1 100644 --- a/vignettes/dataRetrieval.Rnw +++ b/vignettes/dataRetrieval.Rnw @@ -65,7 +65,7 @@ library(knitr) <<include=TRUE ,echo=FALSE,eval=TRUE>>= -opts_chunk$set(highlight=TRUE, tidy=TRUE, keep.space=TRUE, keep.blank.space=FALSE, keep.comment=TRUE, tidy=FALSE) +opts_chunk$set(highlight=TRUE, tidy=TRUE, keep.space=TRUE, keep.blank.space=FALSE, keep.comment=TRUE, tidy=FALSE,comment="") knit_hooks$set(inline = function(x) { if (is.numeric(x)) round(x, 3)}) knit_hooks$set(crop = hook_pdfcrop) @@ -78,7 +78,7 @@ knit_hooks$set(crop = hook_pdfcrop) %------------------------------------------------------------ \section{Introduction to dataRetrieval} %------------------------------------------------------------ -The dataRetrieval package was created to simplify the process of getting hydrologic data in the R enviornment. It has been specifically designed to work seamlessly with the EGRET R package: Exploration and Graphics for RivEr Trends (EGRET). See: \url{https://github.com/USGS-R/EGRET/wiki} for information on EGRET. EGRET is designed to provide analysis of water quality data sets using the WRTDS method of data analysis (WRTDS is Weighted Regressions on Time, Discharge and Season) as well as analysis of streamflow trends using robust time-series smoothing techniques. Both of these capabilities provide both tabular and graphical analyses of long-term data sets. +The dataRetrieval package was created to simplify the process of getting hydrologic data in the R enviornment. It has been specifically designed to work seamlessly with the EGRET R package: Exploration and Graphics for RivEr Trends (EGRET). See: \url{https://github.com/USGS-R/EGRET/wiki} for information on EGRET. EGRET is designed to provide analysis of water quality data sets using the WRTDS method of data analysis (WRTDS is Weighted Regressions on Time, Discharge and Season) as well as analysis of discharge trends using robust time-series smoothing techniques. Both of these capabilities provide both tabular and graphical analyses of long-term data sets. The dataRetrieval package is designed to retrieve many of the major data types of USGS hydrologic data that are available on the web, but also allows users to make use of other data that they supply from spreadsheets. Section 2 provides examples of how one can obtain raw data from USGS sources on the web and ingest them into data frames within the R environment. The functionality described in section 2 is for general use and is not tailored for the specific uses of the EGRET package. The functionality described in section 3 is tailored specifically to obtaining input from the web and structuring them specifically for use in the EGRET package. The functionality described in section 4 is for converting hydrologic data from user-supplied spreadsheets and structuring them specifically for use in the EGRET package. @@ -94,7 +94,7 @@ In this section, we will run through 5 examples, documenting how to get raw data %------------------------------------------------------------ \subsection{Introduction} %------------------------------------------------------------ -The United States Geological Survey organizes their hydrological data in standard structure. Streamgages are located throughout the United States, and each streamgage has a unique ID. Often (but not always), these ID's are 8 digits. The first step to finding data is discoving this 8-digit ID. One potential tool for discovering data is Environmental Data Discovery and Transformation (EnDDaT): \url{http://cida.usgs.gov/enddat/}. Follow the example on the EnDDaT web page to learn how to discover USGS stations and available data from any location in the United States. +The United States Geological Survey organizes their hydrological data in standard structure. Streamgages are located throughout the United States, and each streamgage has a unique ID. Often (but not always), these ID's are 8 digits. The first step to finding data is discoving this 8-digit ID. There are many ways to do this, one is the National Water Information System: Mapper \url{http://maps.waterdata.usgs.gov/mapper/index.html}. Once the site-ID is known, the next required input for USGS data retrievals is the 'parameter code'. This is a 5-digit code that specifies what measured paramater is being requested. A complete list of possible USGS parameter codes can be found at \url{http://go.usa.gov/bVDz}. @@ -124,7 +124,7 @@ shortName <- c("Maximum","Minimum","Mean", "Median") data.df <- data.frame(StatCode, shortName, stringsAsFactors=FALSE) xtable(data.df,label="tab:stat", - caption="Commonly found USGS Stat Codes") + caption="Commonly used USGS Stat Codes") @ @@ -193,9 +193,7 @@ ChoptankDailyData <- merge(ChoptankDailyData, pCodeINFO,by="parameter_cd") @ -The daily data at the Choptank River site can be displayed in a \LaTeX table using the xtable package. See Appendix \ref{app:createWordTable} for instructions on converting an R dataframe to a table in Microsoft Excel or Word. - -<<tablegda, echo=TRUE,results='asis'>>= +<<tablegda, echo=FALSE,results='asis'>>= tableData <- with(ChoptankDailyData, data.frame(shortName=srsname, Start=as.character(startDate), @@ -206,10 +204,11 @@ tableData <- with(ChoptankDailyData, xtable(tableData,label="tab:gda", - caption="Daily mean data availabile at the Choptank River") + caption="Daily mean data availabile at the Choptank River near Greensboro, MD") @ +See Appendix \ref{app:createWordTable} for instructions on converting an R dataframe to a table in Microsoft Excel or Word. \FloatBarrier @@ -235,7 +234,7 @@ Parameter information is obtained from \url{http://nwis.waterdata.usgs.gov/nwis/ \subsection{Daily Values} \label{sec:usgsDaily} %------------------------------------------------------------ -To obtain historic daily records of USGS data, use the retrieveNWISData function. The arguments for this function are siteNumber, parameterCd, startDate, endDate, statCd, and a logical (true/false) interactive. There are 2 default argument: statCd (defaults to \texttt{"}00003\texttt{"}), and interactive (defaults to TRUE). If you want to use the default values, you do not need to list them in the function call. Setting the \texttt{"}interactive\texttt{"} option to true will walk you through the function. It might make more sense to run large batch collections with the interactive option set to FALSE. +To obtain historic daily records of USGS data, use the retrieveNWISData function. The arguments for this function are siteNumber, parameterCd, startDate, endDate, statCd, and a logical (TRUE/FALSE) interactive. There are 2 default arguments: statCd (defaults to \texttt{"}00003\texttt{"}), and interactive (defaults to TRUE). If you want to use the default values, you do not need to list them in the function call. Setting the \texttt{"}interactive\texttt{"} option to TRUE will walk you through the function. It might make more sense to run large batch collections with the interactive option set to FALSE. The dates (start and end) need to be in the format \texttt{"}YYYY-MM-DD\texttt{"} (note: the user does need to include the quotes). Setting the start date to \texttt{"}\texttt{"} will indicate to the program to ask for the earliest date, setting the end date to \texttt{"}\texttt{"} will ask for the latest available date. @@ -259,7 +258,7 @@ Another example that doesn't use the defaults would be a request for mean and ma parameterCd <- c("00010","00060") # Temperature and discharge statCd <- c("00001","00003") # Mean and maximum startDate <- "2012-01-01" -endDate <- "2012-06-30" +endDate <- "2012-05-01" temperatureAndFlow <- retrieveNWISData(siteNumber, parameterCd, startDate, endDate, StatCd=statCd) @@ -270,11 +269,11 @@ Daily data is pulled from \url{http://waterservices.usgs.gov/rest/DV-Test-Tool.h An example of plotting the above data (Figure \ref{fig:getNWIStemperaturePlot}): -<<getNWIStemperaturePlot, echo=TRUE, fig.cap="Temperature and discharge plot of Choptank River in 2012.">>= +<<getNWIStemperaturePlot, echo=TRUE, fig.cap="Temperature and discharge plot of Choptank River in 2012.",out.width='1\\linewidth',out.height='1\\linewidth',fig.show='hold'>>= par(mar=c(5,5,5,5)) colnames <- names(temperatureAndFlow) with(temperatureAndFlow, plot( - get(colnames[3]), get(colnames[6]), + datetime, get(colnames[6]), xlab="Date",ylab="Temperature [C]" )) par(new=TRUE) @@ -313,16 +312,6 @@ head(dischargeToday) Note that time now becomes important, so the variable datetime is a POSIXct, and the time zone is included in a separate column. Data is pulled from \url{http://waterservices.usgs.gov/rest/IV-Test-Tool.html}. There are occasions where NWIS values are not reported as numbers, instead a common example is \texttt{"}Ice\texttt{"}. Any value that cannot be converted to a number will be reported as NA in this package. -A simple plotting example is shown in Figure \ref{fig:getNWISUnitPlot}: -<<getNWISUnitPlot, echo=TRUE, fig.cap="Real-time discharge plot of Choptank River from May 12-13, 2012.">>= -colnames <- names(dischargeToday) -with(dischargeToday, plot( - dateTime, get(colnames[4]), - ylab="Discharge [cfs]",xlab="" - )) -title(ChoptankInfo$station.nm) - -@ \newpage @@ -332,7 +321,7 @@ title(ChoptankInfo$station.nm) \subsection{Water Quality Values} \label{sec:usgsWQP} %------------------------------------------------------------ -To get USGS water quality data from water samples collected at the streamgage (as distinct from unit values collected through some type of automatic monitor) we can use the Water Quality Data Portal: \url{http://www.waterqualitydata.us/}. The raw data are obtained from the function getRawQWData, with the similar input arguments: siteNumber, parameterCd, startDate, endDate, and interactive. The difference is in parameterCd, in this function multiple parameters can be queried using a \texttt{"};\texttt{"} separator, and setting parameterCd to \texttt{"}\texttt{"} will return all of the measured observations. The raw data can be overwelming (see Appendix \ref{sec:appendix2WQP}), a simplified version of the data can be obtained using getQWData.There is a large amount of data returned for each observation. +To get USGS water quality data from water samples collected at the streamgage (as distinct from unit values collected through some type of automatic monitor) we can use the Water Quality Data Portal: \url{http://www.waterqualitydata.us/}. The raw data are obtained from the function getRawQWData, with the similar input arguments: siteNumber, parameterCd, startDate, endDate, and interactive. The difference is in parameterCd, in this function multiple parameters can be queried using a vector, and setting parameterCd to \texttt{"}\texttt{"} will return all of the measured observations. The raw data can be overwelming (see Appendix \ref{sec:appendix2WQP}), a simplified version of the data can be obtained using getQWData.There is a large amount of data returned for each observation. <<label=getQW, echo=TRUE>>= @@ -444,7 +433,7 @@ xtable(DF, caption="Daily dataframe") @ -If there are discharge values of zero, the code will add a small constant to all of the daily discharges. This constant is 0.001 times the mean discharge. The code will also report on the number of zero values and the size of the constant. EGRET should only be used if the number of zero values is a very small fraction of the total days in the record (say less than 0.1\% of the days). Columns Q7 and Q30 are the 7 and 30 day running averages for the 7 or 30 days ending on this specific date. +If there are discharge values of zero, the code will add a small constant to all of the daily discharges. This constant is 0.001 times the mean discharge. The code will also report on the number of zero and negative values and the size of the constant. EGRET should only be used if the number of zero values is a very small fraction of the total days in the record (say less than 0.1\% of the days), and there are no negative discharge values. Columns Q7 and Q30 are the 7 and 30 day running averages for the 7 or 30 days ending on this specific date. \FloatBarrier @@ -494,14 +483,14 @@ Date & Date & Date & date \\ SinDY & number & Sine of DecYear & numeric \\ CosDY & number & Cosine of DecYear & numeric \\ Q \footnotemark[1] & number & Discharge & cms \\ - LogQ \footnotemark[1] & number & Natural logarithm of flow & numeric \\ + LogQ \footnotemark[1] & number & Natural logarithm of discharge & numeric \\ \hline \end{tabular} \end{center} \end{minipage} \end{table} -\footnotetext[1]{Flow columns are populated from data in the Daily dataframe after calling the mergeReport function.} +\footnotetext[1]{Discharge columns are populated from data in the Daily dataframe after calling the mergeReport function.} \FloatBarrier @@ -572,7 +561,7 @@ date Qdaily ... \end{verbatim} -The call to open this file, convert the flow to cubic meters per second, and populate the Daily data frame would be: +The call to open this file, convert the discharge to cubic meters per second, and populate the Daily data frame would be: <<openDaily, eval = FALSE>>= fileName <- "ChoptankRiverFlow.txt" filePath <- "C:/RData/" @@ -585,7 +574,7 @@ Daily <- getDailyDataFromFile(filePath,fileName, %------------------------------------------------------------ \subsubsection{getSampleDataFromFile} %------------------------------------------------------------ -Similarly to the previous section, getSampleDataFromFile will import a user-generated file and populate the Sample dataframe. The difference between sample data and flow data is that the code requires a third column that contains a remark code, either blank or \texttt{"}\verb@<@\texttt{"}, which will tell the program that the data was 'left-censored' (or, below the detection limit of the sensor). Therefore, the data is required to be in the form: date, remark, value. If multiple constituents are going to be used, the format can be date, remark\_A, value\_A, remark\_b, value\_b, etc... An example of a comma-delimited file would be: +Similarly to the previous section, getSampleDataFromFile will import a user-generated file and populate the Sample dataframe. The difference between sample data and discharge data is that the code requires a third column that contains a remark code, either blank or \texttt{"}\verb@<@\texttt{"}, which will tell the program that the data was 'left-censored' (or, below the detection limit of the sensor). Therefore, the data is required to be in the form: date, remark, value. If multiple constituents are going to be summed, the format can be date, remark\_A, value\_A, remark\_b, value\_b, etc... An example of a comma-delimited file would be: \begin{verbatim} cdate;remarkCode;Nitrate diff --git a/vignettes/dataRetrieval.log b/vignettes/dataRetrieval.log index 4c618022d6808b402c297b0a1578dbdc9c3b8fd9..1092d83676d9dbbfc8164b7f374ceaee90041791 100644 --- a/vignettes/dataRetrieval.log +++ b/vignettes/dataRetrieval.log @@ -1,4 +1,4 @@ -This is pdfTeX, Version 3.1415926-2.3-1.40.12 (MiKTeX 2.9) (preloaded format=pdflatex 2012.1.6) 27 JUN 2013 15:21 +This is pdfTeX, Version 3.1415926-2.3-1.40.12 (MiKTeX 2.9) (preloaded format=pdflatex 2012.1.6) 9 JUL 2013 17:05 entering extended mode **dataRetrieval.tex (D:\LADData\RCode\dataRetrieval\vignettes\dataRetrieval.tex @@ -549,22 +549,22 @@ Overfull \vbox (21.68121pt too high) has occurred while \output is active [] [5] -Package color Info: Redefining color shadecolor on input line 345. -Package color Info: Redefining color shadecolor on input line 363. +Package color Info: Redefining color shadecolor on input line 330. +Package color Info: Redefining color shadecolor on input line 348. -Overfull \hbox (30.64148pt too wide) in paragraph at lines 373--374 +Overfull \hbox (30.64148pt too wide) in paragraph at lines 358--359 []\OT1/ptm/m/n/10.95 Parameter in-for-ma-tion is ob-tained from []$\OT1/pcr/m/n /10.95 http : / / nwis . waterdata . usgs . gov / nwis / pmcodes/$[] [] +Package color Info: Redefining color shadecolor on input line 369. Overfull \vbox (21.68121pt too high) has occurred while \output is active [] [6] -Package color Info: Redefining color shadecolor on input line 384. -Package color Info: Redefining color shadecolor on input line 404. -Package color Info: Redefining color shadecolor on input line 424. +Package color Info: Redefining color shadecolor on input line 389. +Package color Info: Redefining color shadecolor on input line 409. Overfull \vbox (21.68121pt too high) has occurred while \output is active [] @@ -572,87 +572,67 @@ Overfull \vbox (21.68121pt too high) has occurred while \output is active [] [7] LaTeX Warning: No positions in optional float specifier. - Default added (so using `tbp') on input line 441. + Default added (so using `tbp') on input line 426. -<figure/getNWIStemperaturePlot.pdf, id=212, 505.89pt x 505.89pt> +<figure/getNWIStemperaturePlot.pdf, id=213, 505.89pt x 505.89pt> File: figure/getNWIStemperaturePlot.pdf Graphic file (type pdf) <use figure/getNWIStemperaturePlot.pdf> Package pdftex.def Info: figure/getNWIStemperaturePlot.pdf used on input line 4 -43. -(pdftex.def) Requested size: 448.07378pt x 448.07928pt. - -Overfull \vbox (21.68121pt too high) has occurred while \output is active [] +28. +(pdftex.def) Requested size: 448.07928pt x 448.07928pt. - -[8] Overfull \vbox (21.68121pt too high) has occurred while \output is active [] -[9 <D:/LADData/RCode/dataRetrieval/vignettes/figure/getNWIStemperaturePlot.pdf> +[8 <D:/LADData/RCode/dataRetrieval/vignettes/figure/getNWIStemperaturePlot.pdf> ] -Package color Info: Redefining color shadecolor on input line 462. -Package color Info: Redefining color shadecolor on input line 476. -Package color Info: Redefining color shadecolor on input line 501. - - -LaTeX Warning: No positions in optional float specifier. - Default added (so using `tbp') on input line 510. - -<figure/getNWISUnitPlot.pdf, id=229, 505.89pt x 505.89pt> -File: figure/getNWISUnitPlot.pdf Graphic file (type pdf) - -<use figure/getNWISUnitPlot.pdf> -Package pdftex.def Info: figure/getNWISUnitPlot.pdf used on input line 512. -(pdftex.def) Requested size: 448.07378pt x 448.07928pt. +Package color Info: Redefining color shadecolor on input line 447. +Package color Info: Redefining color shadecolor on input line 461. Overfull \vbox (21.68121pt too high) has occurred while \output is active [] -[10] -Overfull \vbox (21.68121pt too high) has occurred while \output is active [] - - -[11 <D:/LADData/RCode/dataRetrieval/vignettes/figure/getNWISUnitPlot.pdf>] -Package color Info: Redefining color shadecolor on input line 531. -Package color Info: Redefining color shadecolor on input line 556. +[9] +Package color Info: Redefining color shadecolor on input line 497. +Package color Info: Redefining color shadecolor on input line 522. LaTeX Warning: No positions in optional float specifier. - Default added (so using `tbp') on input line 565. + Default added (so using `tbp') on input line 531. -<figure/getQWtemperaturePlot.pdf, id=249, 505.89pt x 505.89pt> +<figure/getQWtemperaturePlot.pdf, id=233, 505.89pt x 505.89pt> File: figure/getQWtemperaturePlot.pdf Graphic file (type pdf) <use figure/getQWtemperaturePlot.pdf> -Package pdftex.def Info: figure/getQWtemperaturePlot.pdf used on input line 567 +Package pdftex.def Info: figure/getQWtemperaturePlot.pdf used on input line 533 . (pdftex.def) Requested size: 448.07378pt x 448.07928pt. Overfull \vbox (21.68121pt too high) has occurred while \output is active [] -[12] +[10] Overfull \vbox (21.68121pt too high) has occurred while \output is active [] -[13 <D:/LADData/RCode/dataRetrieval/vignettes/figure/getQWtemperaturePlot.pdf>] +[11 <D:/LADData/RCode/dataRetrieval/vignettes/figure/getQWtemperaturePlot.pdf>] -Overfull \hbox (50.793pt too wide) in paragraph at lines 580--581 +Overfull \hbox (50.793pt too wide) in paragraph at lines 546--547 \OT1/ptm/m/n/10.95 There are ad-di-tional data sets avail-able on the Wa-ter Qu al-ity Data Por-tal ([]$\OT1/pcr/m/n/10.95 http : / / www . waterqualitydata .$ [] -Package color Info: Redefining color shadecolor on input line 583. -LaTeX Font Info: Try loading font information for TS1+pcr on input line 585. +Package color Info: Redefining color shadecolor on input line 549. +LaTeX Font Info: Try loading font information for TS1+pcr on input line 551. ("C:\Program Files (x86)\MiKTeX 2.9\tex\latex\psnfss\ts1pcr.fd" File: ts1pcr.fd 2001/06/04 font definitions for TS1/pcr. ) -Package color Info: Redefining color shadecolor on input line 618. +Package color Info: Redefining color shadecolor on input line 584. -Overfull \hbox (5.25568pt too wide) in paragraph at lines 627--627 +Overfull \hbox (5.25568pt too wide) in paragraph at lines 593--593 []\OT1/pcr/m/n/10.95 url_uv <- []\OT1/pcr/b/n/10.95 constructNWISURL[]\OT1/pcr/ m/n/10.95 (siteNumber,[]"00060"[],startDate,endDate,[]\TS1/pcr/m/n/10.95 '\OT1/ pcr/m/n/10.95 uv\TS1/pcr/m/n/10.95 '[]\OT1/pcr/m/n/10.95 )[] @@ -662,212 +642,207 @@ pcr/m/n/10.95 uv\TS1/pcr/m/n/10.95 '[]\OT1/pcr/m/n/10.95 )[] Overfull \vbox (21.68121pt too high) has occurred while \output is active [] -[14] -Package color Info: Redefining color shadecolor on input line 648. -Package color Info: Redefining color shadecolor on input line 667. +[12] +Package color Info: Redefining color shadecolor on input line 614. +Package color Info: Redefining color shadecolor on input line 633. Overfull \vbox (21.68121pt too high) has occurred while \output is active [] -[15] -Package color Info: Redefining color shadecolor on input line 722. -Package color Info: Redefining color shadecolor on input line 735. +[13] +Package color Info: Redefining color shadecolor on input line 688. +Package color Info: Redefining color shadecolor on input line 701. Overfull \vbox (21.68121pt too high) has occurred while \output is active [] -[16] +[14] Overfull \vbox (21.68121pt too high) has occurred while \output is active [] -[17] +[15] Overfull \vbox (21.68121pt too high) has occurred while \output is active [] -[18] -Package color Info: Redefining color shadecolor on input line 824. +[16] +Package color Info: Redefining color shadecolor on input line 790. Overfull \vbox (21.68121pt too high) has occurred while \output is active [] -[19] -Package color Info: Redefining color shadecolor on input line 875. -LaTeX Font Info: Try loading font information for OMS+pcr on input line 880. +[17] +Package color Info: Redefining color shadecolor on input line 841. +LaTeX Font Info: Try loading font information for OMS+pcr on input line 846. ("C:\Program Files (x86)\MiKTeX 2.9\tex\latex\psnfss\omspcr.fd" File: omspcr.fd ) LaTeX Font Info: Font shape `OMS/pcr/m/n' in size <10.95> not available -(Font) Font shape `OMS/cmsy/m/n' tried instead on input line 880. +(Font) Font shape `OMS/cmsy/m/n' tried instead on input line 846. Overfull \vbox (21.68121pt too high) has occurred while \output is active [] -[20] -Package color Info: Redefining color shadecolor on input line 904. -Package color Info: Redefining color shadecolor on input line 924. +[18] +Package color Info: Redefining color shadecolor on input line 870. +Package color Info: Redefining color shadecolor on input line 890. -Overfull \hbox (64.3856pt too wide) in paragraph at lines 949--949 -[]\OT1/pcr/m/n/10.95 ## First day of the discharge record is 2000-01-01 and la -st day is 2013-01-01[] +Overfull \hbox (44.67563pt too wide) in paragraph at lines 915--915 +[] \OT1/pcr/m/n/10.95 First day of the discharge record is 2000-01-01 and last +day is 2013-01-01[] [] -Overfull \hbox (64.3856pt too wide) in paragraph at lines 949--949 -[]\OT1/pcr/m/n/10.95 ## The first sample is from 2000-01-04 and the last sampl -e is from 2012-12-18[] +Overfull \hbox (44.67563pt too wide) in paragraph at lines 915--915 +[] \OT1/pcr/m/n/10.95 The first sample is from 2000-01-04 and the last sample i +s from 2012-12-18[] [] Overfull \vbox (21.68121pt too high) has occurred while \output is active [] -[21] -Package color Info: Redefining color shadecolor on input line 981. +[19] +Package color Info: Redefining color shadecolor on input line 947. LaTeX Warning: No positions in optional float specifier. - Default added (so using `tbp') on input line 987. + Default added (so using `tbp') on input line 953. -<figure/egretEx.pdf, id=310, 505.89pt x 505.89pt> +<figure/egretEx.pdf, id=294, 505.89pt x 505.89pt> File: figure/egretEx.pdf Graphic file (type pdf) <use figure/egretEx.pdf> -Package pdftex.def Info: figure/egretEx.pdf used on input line 989. +Package pdftex.def Info: figure/egretEx.pdf used on input line 955. (pdftex.def) Requested size: 448.07378pt x 448.07928pt. Overfull \vbox (21.68121pt too high) has occurred while \output is active [] -[22] +[20] Overfull \vbox (21.68121pt too high) has occurred while \output is active [] -[23 <D:/LADData/RCode/dataRetrieval/vignettes/figure/egretEx.pdf>] -Package color Info: Redefining color shadecolor on input line 1015. -Package color Info: Redefining color shadecolor on input line 1025. - -Overfull \hbox (70.9556pt too wide) in paragraph at lines 1036--1036 -[]\OT1/pcr/m/n/10.95 ## Sample1 <- localSample[!duplicated(localSample[c("Dec -Year","ConcHigh")]),][] - [] - -Package color Info: Redefining color shadecolor on input line 1047. +[21 <D:/LADData/RCode/dataRetrieval/vignettes/figure/egretEx.pdf>] +Package color Info: Redefining color shadecolor on input line 981. +Package color Info: Redefining color shadecolor on input line 991. +Package color Info: Redefining color shadecolor on input line 1014. -Overfull \hbox (90.66557pt too wide) in paragraph at lines 1050--1050 +Overfull \hbox (90.66557pt too wide) in paragraph at lines 1017--1017 [][]\OT1/pcr/b/n/10.95 install.packages[]\OT1/pcr/m/n/10.95 ([]"dataRetrieval"[ ], repos=[]"http://usgs-r.github.com"[], type=[]"source"[])[] [] -Overfull \hbox (157.60596pt too wide) in paragraph at lines 1056--1057 +Overfull \hbox (157.60596pt too wide) in paragraph at lines 1023--1024 \OT1/ptm/m/n/10.95 C:/Users/userA/Documents/R/win-library/2.15/dataRetrieval, a nd the de-fault for a Mac: /Users/userA/Library/R/2.15/library/dataRetrieval. [] -Package color Info: Redefining color shadecolor on input line 1060. Overfull \vbox (21.68121pt too high) has occurred while \output is active [] -[24 +[22 ] +Package color Info: Redefining color shadecolor on input line 1027. + Overfull \vbox (21.68121pt too high) has occurred while \output is active [] -[25] +[23] Overfull \vbox (21.68121pt too high) has occurred while \output is active [] -[26] +[24] Overfull \vbox (21.68121pt too high) has occurred while \output is active [] -[27 +[25 ] Overfull \vbox (21.68121pt too high) has occurred while \output is active [] -[28] +[26] Overfull \vbox (21.68121pt too high) has occurred while \output is active [] -[29 +[27 ] -Package color Info: Redefining color shadecolor on input line 1247. -Package color Info: Redefining color shadecolor on input line 1293. +Package color Info: Redefining color shadecolor on input line 1214. +Package color Info: Redefining color shadecolor on input line 1260. Overfull \vbox (21.68121pt too high) has occurred while \output is active [] -[30 +[28 ] -Overfull \hbox (11.82567pt too wide) in paragraph at lines 1311--1311 +Overfull \hbox (11.82567pt too wide) in paragraph at lines 1278--1278 []\OT1/pcr/m/n/10.95 Suspended sediment concentration (SSC) 1980-10-01 1991-09- 30 3651 mg/l[] [] -<table1.png, id=360, 554.07pt x 125.71968pt> +<table1.png, id=344, 554.07pt x 125.71968pt> File: table1.png Graphic file (type png) <use table1.png> -Package pdftex.def Info: table1.png used on input line 1330. +Package pdftex.def Info: table1.png used on input line 1297. (pdftex.def) Requested size: 554.06865pt x 125.71936pt. Overfull \vbox (21.68121pt too high) has occurred while \output is active [] -[31] +[29] Overfull \vbox (21.68121pt too high) has occurred while \output is active [] -[32 <D:/LADData/RCode/dataRetrieval/vignettes/table1.png>] -Underfull \hbox (badness 1983) in paragraph at lines 1346--1347 +[30 <D:/LADData/RCode/dataRetrieval/vignettes/table1.png>] +Underfull \hbox (badness 1983) in paragraph at lines 1313--1314 []\OT1/ptm/m/n/10.95 Hirsch, R. M., Moyer, D. L. and Arch-field, S. A. (2010), Weighted Re-gres-sions on [] -Underfull \hbox (badness 1221) in paragraph at lines 1346--1347 +Underfull \hbox (badness 1221) in paragraph at lines 1313--1314 \OT1/ptm/m/n/10.95 Time, Dis-charge, and Sea-son (WRTDS), with an Ap-pli-ca-tio n to Chesa-peake Bay River [] -Underfull \hbox (badness 2443) in paragraph at lines 1346--1347 +Underfull \hbox (badness 2443) in paragraph at lines 1313--1314 \OT1/ptm/m/n/10.95 In-puts. JAWRA Jour-nal of the Amer-i-can Wa-ter Re-sources As-so-ci-a-tion, 46: 857-880. [] -Underfull \hbox (badness 3690) in paragraph at lines 1346--1347 +Underfull \hbox (badness 3690) in paragraph at lines 1313--1314 \OT1/ptm/m/n/10.95 doi: 10.1111/j.1752-1688.2010.00482.x []$\OT1/pcr/m/n/10.95 http : / / onlinelibrary . wiley . com / doi /$ [] -Package atveryend Info: Empty hook `BeforeClearDocument' on input line 1353. +Package atveryend Info: Empty hook `BeforeClearDocument' on input line 1320. Overfull \vbox (21.68121pt too high) has occurred while \output is active [] -[33 +[31 ] -Package atveryend Info: Empty hook `AfterLastShipout' on input line 1353. +Package atveryend Info: Empty hook `AfterLastShipout' on input line 1320. (D:\LADData\RCode\dataRetrieval\vignettes\dataRetrieval.aux) -Package atveryend Info: Executing hook `AtVeryEndDocument' on input line 1353. -Package atveryend Info: Executing hook `AtEndAfterFileList' on input line 1353. +Package atveryend Info: Executing hook `AtVeryEndDocument' on input line 1320. +Package atveryend Info: Executing hook `AtEndAfterFileList' on input line 1320. Package rerunfilecheck Info: File `dataRetrieval.out' has not changed. (rerunfilecheck) Checksum: E39EB3526BB75384BBF16131BFA2BB3D;2017. ) Here is how much of TeX's memory you used: - 8264 strings out of 494045 - 123482 string characters out of 3145961 - 215648 words of memory out of 3000000 - 11333 multiletter control sequences out of 15000+200000 + 8255 strings out of 494045 + 123288 string characters out of 3145961 + 215571 words of memory out of 3000000 + 11328 multiletter control sequences out of 15000+200000 30364 words of font info for 66 fonts, out of 3000000 for 9000 715 hyphenation exceptions out of 8191 44i,15n,42p,912b,451s stack positions out of 5000i,500n,10000p,200000b,50000s @@ -879,9 +854,9 @@ les (x86)/MiKTeX 2.9/fonts/type1/urw/courier/ucrb8a.pfb><C:/Program Files (x86) .9/fonts/type1/urw/times/utmb8a.pfb><C:/Program Files (x86)/MiKTeX 2.9/fonts/ty pe1/urw/times/utmr8a.pfb><C:/Program Files (x86)/MiKTeX 2.9/fonts/type1/urw/tim es/utmri8a.pfb> -Output written on dataRetrieval.pdf (33 pages, 291772 bytes). +Output written on dataRetrieval.pdf (31 pages, 274927 bytes). PDF statistics: - 442 PDF objects out of 1000 (max. 8388607) - 91 named destinations out of 1000 (max. 500000) - 258 words of extra memory for PDF output out of 10000 (max. 10000000) + 425 PDF objects out of 1000 (max. 8388607) + 88 named destinations out of 1000 (max. 500000) + 253 words of extra memory for PDF output out of 10000 (max. 10000000) diff --git a/vignettes/dataRetrieval.pdf b/vignettes/dataRetrieval.pdf new file mode 100644 index 0000000000000000000000000000000000000000..7eb6feaac178cf04d68409d085b4165d0943d9d6 Binary files /dev/null and b/vignettes/dataRetrieval.pdf differ diff --git a/vignettes/dataRetrieval.synctex.gz b/vignettes/dataRetrieval.synctex.gz index 158227040e6954c75111acec0006686caa551c35..2b021e507f4a98141b5909267508569df1d907a4 100644 Binary files a/vignettes/dataRetrieval.synctex.gz and b/vignettes/dataRetrieval.synctex.gz differ diff --git a/vignettes/dataRetrieval.tex b/vignettes/dataRetrieval.tex index 43030a1bf25d08f6d815eb3395f26de03c8daf92..b0283212200c0638510bab29c163df71333b2ea7 100644 --- a/vignettes/dataRetrieval.tex +++ b/vignettes/dataRetrieval.tex @@ -124,7 +124,7 @@ %------------------------------------------------------------ \section{Introduction to dataRetrieval} %------------------------------------------------------------ -The dataRetrieval package was created to simplify the process of getting hydrologic data in the R enviornment. It has been specifically designed to work seamlessly with the EGRET R package: Exploration and Graphics for RivEr Trends (EGRET). See: \url{https://github.com/USGS-R/EGRET/wiki} for information on EGRET. EGRET is designed to provide analysis of water quality data sets using the WRTDS method of data analysis (WRTDS is Weighted Regressions on Time, Discharge and Season) as well as analysis of streamflow trends using robust time-series smoothing techniques. Both of these capabilities provide both tabular and graphical analyses of long-term data sets. +The dataRetrieval package was created to simplify the process of getting hydrologic data in the R enviornment. It has been specifically designed to work seamlessly with the EGRET R package: Exploration and Graphics for RivEr Trends (EGRET). See: \url{https://github.com/USGS-R/EGRET/wiki} for information on EGRET. EGRET is designed to provide analysis of water quality data sets using the WRTDS method of data analysis (WRTDS is Weighted Regressions on Time, Discharge and Season) as well as analysis of discharge trends using robust time-series smoothing techniques. Both of these capabilities provide both tabular and graphical analyses of long-term data sets. The dataRetrieval package is designed to retrieve many of the major data types of USGS hydrologic data that are available on the web, but also allows users to make use of other data that they supply from spreadsheets. Section 2 provides examples of how one can obtain raw data from USGS sources on the web and ingest them into data frames within the R environment. The functionality described in section 2 is for general use and is not tailored for the specific uses of the EGRET package. The functionality described in section 3 is tailored specifically to obtaining input from the web and structuring them specifically for use in the EGRET package. The functionality described in section 4 is for converting hydrologic data from user-supplied spreadsheets and structuring them specifically for use in the EGRET package. @@ -140,7 +140,7 @@ In this section, we will run through 5 examples, documenting how to get raw data %------------------------------------------------------------ \subsection{Introduction} %------------------------------------------------------------ -The United States Geological Survey organizes their hydrological data in standard structure. Streamgages are located throughout the United States, and each streamgage has a unique ID. Often (but not always), these ID's are 8 digits. The first step to finding data is discoving this 8-digit ID. One potential tool for discovering data is Environmental Data Discovery and Transformation (EnDDaT): \url{http://cida.usgs.gov/enddat/}. Follow the example on the EnDDaT web page to learn how to discover USGS stations and available data from any location in the United States. +The United States Geological Survey organizes their hydrological data in standard structure. Streamgages are located throughout the United States, and each streamgage has a unique ID. Often (but not always), these ID's are 8 digits. The first step to finding data is discoving this 8-digit ID. There are many ways to do this, one is the National Water Information System: Mapper \url{http://maps.waterdata.usgs.gov/mapper/index.html}. Once the site-ID is known, the next required input for USGS data retrievals is the 'parameter code'. This is a 5-digit code that specifies what measured paramater is being requested. A complete list of possible USGS parameter codes can be found at \url{http://go.usa.gov/bVDz}. @@ -148,7 +148,7 @@ Not every station will measure all parameters. A short list of commonly measured % latex table generated in R 3.0.1 by xtable 1.7-1 package -% Thu Jun 27 15:21:30 2013 +% Tue Jul 09 17:04:58 2013 \begin{table}[ht] \centering \begin{tabular}{rll} @@ -174,7 +174,7 @@ For real-time data, the parameter code and site ID will suffice. For most varia Some common stat codes are shown in Table \ref{tab:stat}. % latex table generated in R 3.0.1 by xtable 1.7-1 package -% Thu Jun 27 15:21:30 2013 +% Tue Jul 09 17:04:58 2013 \begin{table}[ht] \centering \begin{tabular}{rll} @@ -187,7 +187,7 @@ Some common stat codes are shown in Table \ref{tab:stat}. 4 & 00008 & Median \\ \hline \end{tabular} -\caption{Commonly found USGS Stat Codes} +\caption{Commonly used USGS Stat Codes} \label{tab:stat} \end{table} @@ -227,7 +227,7 @@ A list of the available columns are found in Appendix \ref{sec:appendix2INFO}: I ChoptankInfo$station.nm \end{alltt} \begin{verbatim} -## [1] "CHOPTANK RIVER NEAR GREENSBORO, MD" +[1] "CHOPTANK RIVER NEAR GREENSBORO, MD" \end{verbatim} \end{kframe} \end{knitrout} @@ -249,13 +249,13 @@ ChoptankAvailableData <- \hlfunctioncall{getDataAvailability}(siteNumber) \hlfunctioncall{head}(ChoptankAvailableData) \end{alltt} \begin{verbatim} -## parameter_cd statCd startDate endDate count service -## 2 00010 00001 1988-10-01 2012-05-09 894 dv -## 3 00010 00002 2010-10-01 2012-05-09 529 dv -## 4 00010 00003 2010-10-01 2012-05-09 529 dv -## 5 00060 00003 1948-01-01 2013-06-26 23918 dv -## 6 00095 00001 2010-10-01 2012-05-09 527 dv -## 7 00095 00002 2010-10-01 2012-05-09 527 dv + parameter_cd statCd startDate endDate count service +2 00010 00001 1988-10-01 2012-05-09 894 dv +3 00010 00002 2010-10-01 2012-05-09 529 dv +4 00010 00003 2010-10-01 2012-05-09 529 dv +5 00060 00003 1948-01-01 2013-07-08 23930 dv +6 00095 00001 2010-10-01 2012-05-09 527 dv +7 00095 00002 2010-10-01 2012-05-09 527 dv \end{verbatim} \end{kframe} \end{knitrout} @@ -283,8 +283,8 @@ pCodeINFO <- \hlfunctioncall{getMultipleParameterNames}( ChoptankDailyData$parameter_cd) \end{alltt} \begin{verbatim} -## Percent complete: -## 20 40 60 80 100 +Percent complete: +20 40 60 80 100 \end{verbatim} \begin{alltt} @@ -296,24 +296,8 @@ ChoptankDailyData <- \hlfunctioncall{merge}(ChoptankDailyData, \end{knitrout} -The daily data at the Choptank River site can be displayed in a \LaTeX table using the xtable package. See Appendix \ref{app:createWordTable} for instructions on converting an R dataframe to a table in Microsoft Excel or Word. - -\begin{kframe} -\begin{alltt} -tableData <- \hlfunctioncall{with}(ChoptankDailyData, - \hlfunctioncall{data.frame}(shortName=srsname, - Start=\hlfunctioncall{as.character}(startDate), - End=\hlfunctioncall{as.character}(endDate), - Count=\hlfunctioncall{as.character}(count), - Units=parameter_units) - ) - - -\hlfunctioncall{xtable}(tableData,label=\hlstring{"tab:gda"}, - caption=\hlstring{"Daily mean data availabile at the Choptank River"}) -\end{alltt} -\end{kframe}% latex table generated in R 3.0.1 by xtable 1.7-1 package -% Thu Jun 27 15:21:36 2013 +% latex table generated in R 3.0.1 by xtable 1.7-1 package +% Tue Jul 09 17:05:04 2013 \begin{table}[ht] \centering \begin{tabular}{rlllll} @@ -321,18 +305,19 @@ tableData <- \hlfunctioncall{with}(ChoptankDailyData, & shortName & Start & End & Count & Units \\ \hline 1 & Temperature, water & 2010-10-01 & 2012-05-09 & 529 & deg C \\ - 2 & Stream flow, mean. daily & 1948-01-01 & 2013-06-26 & 23918 & cfs \\ + 2 & Stream flow, mean. daily & 1948-01-01 & 2013-07-08 & 23930 & cfs \\ 3 & Specific conductance & 2010-10-01 & 2012-05-09 & 527 & uS/cm @25C \\ 4 & Suspended sediment concentration (SSC) & 1980-10-01 & 1991-09-30 & 3651 & mg/l \\ 5 & Suspended sediment discharge & 1980-10-01 & 1991-09-30 & 3652 & tons/day \\ \hline \end{tabular} -\caption{Daily mean data availabile at the Choptank River} +\caption{Daily mean data availabile at the Choptank River near Greensboro, MD} \label{tab:gda} \end{table} +See Appendix \ref{app:createWordTable} for instructions on converting an R dataframe to a table in Microsoft Excel or Word. \FloatBarrier @@ -350,9 +335,9 @@ parameterINFO <- \hlfunctioncall{getParameterInfo}(parameterCd) \hlfunctioncall{colnames}(parameterINFO) \end{alltt} \begin{verbatim} -## [1] "parameter_cd" "parameter_group_nm" -## [3] "parameter_nm" "casrn" -## [5] "srsname" "parameter_units" +[1] "parameter_cd" "parameter_group_nm" +[3] "parameter_nm" "casrn" +[5] "srsname" "parameter_units" \end{verbatim} \end{kframe} \end{knitrout} @@ -365,7 +350,7 @@ Pulling out a specific example piece of information, in this case parameter name parameterINFO$parameter_nm \end{alltt} \begin{verbatim} -## [1] "Nitrate, water, filtered, milligrams per liter as nitrogen" +[1] "Nitrate, water, filtered, milligrams per liter as nitrogen" \end{verbatim} \end{kframe} \end{knitrout} @@ -376,7 +361,7 @@ Parameter information is obtained from \url{http://nwis.waterdata.usgs.gov/nwis/ \subsection{Daily Values} \label{sec:usgsDaily} %------------------------------------------------------------ -To obtain historic daily records of USGS data, use the retrieveNWISData function. The arguments for this function are siteNumber, parameterCd, startDate, endDate, statCd, and a logical (true/false) interactive. There are 2 default argument: statCd (defaults to \texttt{"}00003\texttt{"}), and interactive (defaults to TRUE). If you want to use the default values, you do not need to list them in the function call. Setting the \texttt{"}interactive\texttt{"} option to true will walk you through the function. It might make more sense to run large batch collections with the interactive option set to FALSE. +To obtain historic daily records of USGS data, use the retrieveNWISData function. The arguments for this function are siteNumber, parameterCd, startDate, endDate, statCd, and a logical (TRUE/FALSE) interactive. There are 2 default arguments: statCd (defaults to \texttt{"}00003\texttt{"}), and interactive (defaults to TRUE). If you want to use the default values, you do not need to list them in the function call. Setting the \texttt{"}interactive\texttt{"} option to TRUE will walk you through the function. It might make more sense to run large batch collections with the interactive option set to FALSE. The dates (start and end) need to be in the format \texttt{"}YYYY-MM-DD\texttt{"} (note: the user does need to include the quotes). Setting the start date to \texttt{"}\texttt{"} will indicate to the program to ask for the earliest date, setting the end date to \texttt{"}\texttt{"} will ask for the latest available date. @@ -407,7 +392,7 @@ Another example that doesn't use the defaults would be a request for mean and ma parameterCd <- \hlfunctioncall{c}(\hlstring{"00010"},\hlstring{"00060"}) # Temperature and discharge statCd <- \hlfunctioncall{c}(\hlstring{"00001"},\hlstring{"00003"}) # Mean and maximum startDate <- \hlstring{"2012-01-01"} -endDate <- \hlstring{"2012-06-30"} +endDate <- \hlstring{"2012-05-01"} temperatureAndFlow <- \hlfunctioncall{retrieveNWISData}(siteNumber, parameterCd, startDate, endDate, StatCd=statCd) @@ -426,7 +411,7 @@ An example of plotting the above data (Figure \ref{fig:getNWIStemperaturePlot}): \hlfunctioncall{par}(mar=\hlfunctioncall{c}(5,5,5,5)) colnames <- \hlfunctioncall{names}(temperatureAndFlow) \hlfunctioncall{with}(temperatureAndFlow, \hlfunctioncall{plot}( - \hlfunctioncall{get}(colnames[3]), \hlfunctioncall{get}(colnames[6]), + datetime, \hlfunctioncall{get}(colnames[6]), xlab=\hlstring{"Date"},ylab=\hlstring{"Temperature [C]"} )) \hlfunctioncall{par}(new=TRUE) @@ -440,7 +425,7 @@ colnames <- \hlfunctioncall{names}(temperatureAndFlow) \end{alltt} \end{kframe}\begin{figure}[] -\includegraphics[width=\maxwidth]{figure/getNWIStemperaturePlot} \caption[Temperature and discharge plot of Choptank River in 2012]{Temperature and discharge plot of Choptank River in 2012.\label{fig:getNWIStemperaturePlot}} +\includegraphics[width=1\linewidth,height=1\linewidth]{figure/getNWIStemperaturePlot} \caption[Temperature and discharge plot of Choptank River in 2012]{Temperature and discharge plot of Choptank River in 2012.\label{fig:getNWIStemperaturePlot}} \end{figure} @@ -475,20 +460,20 @@ Which produces the following dataframe: \begin{knitrout} \definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} \begin{verbatim} -## agency site dateTime X02_00060_00011 -## 1 USGS 01491000 2012-05-12 00:00:00 83 -## 2 USGS 01491000 2012-05-12 00:15:00 83 -## 3 USGS 01491000 2012-05-12 00:30:00 83 -## 4 USGS 01491000 2012-05-12 00:45:00 83 -## 5 USGS 01491000 2012-05-12 01:00:00 85 -## 6 USGS 01491000 2012-05-12 01:15:00 83 -## X02_00060_00011_cd -## 1 A -## 2 A -## 3 A -## 4 A -## 5 A -## 6 A + agency site dateTime X02_00060_00011 +1 USGS 01491000 2012-05-12 00:00:00 83 +2 USGS 01491000 2012-05-12 00:15:00 83 +3 USGS 01491000 2012-05-12 00:30:00 83 +4 USGS 01491000 2012-05-12 00:45:00 83 +5 USGS 01491000 2012-05-12 01:00:00 85 +6 USGS 01491000 2012-05-12 01:15:00 83 + X02_00060_00011_cd +1 A +2 A +3 A +4 A +5 A +6 A \end{verbatim} \end{kframe} \end{knitrout} @@ -496,25 +481,6 @@ Which produces the following dataframe: Note that time now becomes important, so the variable datetime is a POSIXct, and the time zone is included in a separate column. Data is pulled from \url{http://waterservices.usgs.gov/rest/IV-Test-Tool.html}. There are occasions where NWIS values are not reported as numbers, instead a common example is \texttt{"}Ice\texttt{"}. Any value that cannot be converted to a number will be reported as NA in this package. -A simple plotting example is shown in Figure \ref{fig:getNWISUnitPlot}: -\begin{knitrout} -\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} -\begin{alltt} -colnames <- \hlfunctioncall{names}(dischargeToday) -\hlfunctioncall{with}(dischargeToday, \hlfunctioncall{plot}( - dateTime, \hlfunctioncall{get}(colnames[4]), - ylab=\hlstring{"Discharge [cfs]"},xlab=\hlstring{""} - )) -\hlfunctioncall{title}(ChoptankInfo$station.nm) -\end{alltt} -\end{kframe}\begin{figure}[] - -\includegraphics[width=\maxwidth]{figure/getNWISUnitPlot} \caption[Real-time discharge plot of Choptank River from May 12-13, 2012]{Real-time discharge plot of Choptank River from May 12-13, 2012.\label{fig:getNWISUnitPlot}} -\end{figure} - - -\end{knitrout} - \newpage @@ -524,7 +490,7 @@ colnames <- \hlfunctioncall{names}(dischargeToday) \subsection{Water Quality Values} \label{sec:usgsWQP} %------------------------------------------------------------ -To get USGS water quality data from water samples collected at the streamgage (as distinct from unit values collected through some type of automatic monitor) we can use the Water Quality Data Portal: \url{http://www.waterqualitydata.us/}. The raw data are obtained from the function getRawQWData, with the similar input arguments: siteNumber, parameterCd, startDate, endDate, and interactive. The difference is in parameterCd, in this function multiple parameters can be queried using a \texttt{"};\texttt{"} separator, and setting parameterCd to \texttt{"}\texttt{"} will return all of the measured observations. The raw data can be overwelming (see Appendix \ref{sec:appendix2WQP}), a simplified version of the data can be obtained using getQWData.There is a large amount of data returned for each observation. +To get USGS water quality data from water samples collected at the streamgage (as distinct from unit values collected through some type of automatic monitor) we can use the Water Quality Data Portal: \url{http://www.waterqualitydata.us/}. The raw data are obtained from the function getRawQWData, with the similar input arguments: siteNumber, parameterCd, startDate, endDate, and interactive. The difference is in parameterCd, in this function multiple parameters can be queried using a vector, and setting parameterCd to \texttt{"}\texttt{"} will return all of the measured observations. The raw data can be overwelming (see Appendix \ref{sec:appendix2WQP}), a simplified version of the data can be obtained using getQWData.There is a large amount of data returned for each observation. \begin{knitrout} @@ -544,8 +510,8 @@ dissolvedNitrateSimple <- \hlfunctioncall{getQWData}(siteNumber, parameterCd, \hlfunctioncall{names}(dissolvedNitrateSimple) \end{alltt} \begin{verbatim} -## [1] "dateTime" "qualifier.00618" "value.00618" -## [4] "qualifier.71851" "value.71851" +[1] "dateTime" "qualifier.00618" "value.00618" +[4] "qualifier.71851" "value.71851" \end{verbatim} \end{kframe} \end{knitrout} @@ -587,20 +553,20 @@ specificCond <- \hlfunctioncall{getWQPData}(\hlstring{'WIDNR_WQX-10032762'}, \hlfunctioncall{head}(specificCond) \end{alltt} \begin{verbatim} -## dateTime qualifier.Specific conductance -## 1 2011-02-14 -## 2 2011-02-17 -## 3 2011-03-03 -## 4 2011-03-10 -## 5 2011-03-29 -## 6 2011-04-07 -## value.Specific conductance -## 1 1360 -## 2 1930 -## 3 1240 -## 4 1480 -## 5 1130 -## 6 1200 + dateTime qualifier.Specific conductance +1 2011-02-14 +2 2011-02-17 +3 2011-03-03 +4 2011-03-10 +5 2011-03-29 +6 2011-04-07 + value.Specific conductance +1 1360 +2 1930 +3 1240 +4 1480 +5 1130 +6 1200 \end{verbatim} \end{kframe} \end{knitrout} @@ -673,7 +639,7 @@ endDate <- \hlstring{"2013-01-01"} Daily <- \hlfunctioncall{getDVData}(siteNumber, \hlstring{"00060"}, startDate, endDate) \end{alltt} \begin{verbatim} -## There are 4750 data points, and 4750 days. +There are 4750 data points, and 4750 days. \end{verbatim} \end{kframe} \end{knitrout} @@ -682,7 +648,7 @@ Daily <- \hlfunctioncall{getDVData}(siteNumber, \hlstring{"00060"}, startDate, e Details of the Daily dataframe are listed below: % latex table generated in R 3.0.1 by xtable 1.7-1 package -% Thu Jun 27 15:21:46 2013 +% Tue Jul 09 17:05:13 2013 \begin{table}[ht] \centering \begin{tabular}{rllll} @@ -709,7 +675,7 @@ Details of the Daily dataframe are listed below: -If there are discharge values of zero, the code will add a small constant to all of the daily discharges. This constant is 0.001 times the mean discharge. The code will also report on the number of zero values and the size of the constant. EGRET should only be used if the number of zero values is a very small fraction of the total days in the record (say less than 0.1\% of the days). Columns Q7 and Q30 are the 7 and 30 day running averages for the 7 or 30 days ending on this specific date. +If there are discharge values of zero, the code will add a small constant to all of the daily discharges. This constant is 0.001 times the mean discharge. The code will also report on the number of zero and negative values and the size of the constant. EGRET should only be used if the number of zero values is a very small fraction of the total days in the record (say less than 0.1\% of the days), and there are no negative discharge values. Columns Q7 and Q30 are the 7 and 30 day running averages for the 7 or 30 days ending on this specific date. \FloatBarrier @@ -769,14 +735,14 @@ Date & Date & Date & date \\ SinDY & number & Sine of DecYear & numeric \\ CosDY & number & Cosine of DecYear & numeric \\ Q \footnotemark[1] & number & Discharge & cms \\ - LogQ \footnotemark[1] & number & Natural logarithm of flow & numeric \\ + LogQ \footnotemark[1] & number & Natural logarithm of discharge & numeric \\ \hline \end{tabular} \end{center} \end{minipage} \end{table} -\footnotetext[1]{Flow columns are populated from data in the Daily dataframe after calling the mergeReport function.} +\footnotetext[1]{Discharge columns are populated from data in the Daily dataframe after calling the mergeReport function.} \FloatBarrier @@ -791,7 +757,7 @@ As an example to understand how the dataRetrieval package handles a more complex \begin{center} % latex table generated in R 3.0.1 by xtable 1.7-1 package -% Thu Jun 27 15:21:47 2013 +% Tue Jul 09 17:05:14 2013 \begin{table}[ht] \centering \begin{tabular}{rllrlrlr} @@ -823,20 +789,20 @@ For the more complex example case, let us say dp is reported as \verb@<@0.01 and \begin{knitrout} \definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} \begin{verbatim} -## Date ConcLow ConcHigh Uncen ConcAve Julian Month -## 1 2003-02-15 0.520 0.520 1 0.520 55927 2 -## 2 2003-06-30 0.310 0.310 1 0.310 56062 6 -## 3 2004-09-15 0.205 0.205 1 0.205 56505 9 -## 4 2005-01-30 0.430 0.430 1 0.430 56642 1 -## 5 2005-05-30 0.050 0.050 1 0.050 56762 5 -## 6 2005-10-30 0.020 0.020 1 0.020 56915 10 -## Day DecYear MonthSeq SinDY CosDY -## 1 46 2003 1838 0.70407 0.7101 -## 2 182 2003 1842 0.02575 -0.9997 -## 3 259 2005 1857 -0.96251 -0.2712 -## 4 30 2005 1861 0.48506 0.8745 -## 5 151 2005 1865 0.52943 -0.8484 -## 6 304 2006 1870 -0.87861 0.4775 + Date ConcLow ConcHigh Uncen ConcAve Julian Month +1 2003-02-15 0.520 0.520 1 0.520 55927 2 +2 2003-06-30 0.310 0.310 1 0.310 56062 6 +3 2004-09-15 0.205 0.205 1 0.205 56505 9 +4 2005-01-30 0.430 0.430 1 0.430 56642 1 +5 2005-05-30 0.050 0.050 1 0.050 56762 5 +6 2005-10-30 0.020 0.020 1 0.020 56915 10 + Day DecYear MonthSeq SinDY CosDY +1 46 2003 1838 0.70407 0.7101 +2 182 2003 1842 0.02575 -0.9997 +3 259 2005 1857 -0.96251 -0.2712 +4 30 2005 1861 0.48506 0.8745 +5 151 2005 1865 0.52943 -0.8484 +6 304 2006 1870 -0.87861 0.4775 \end{verbatim} \end{kframe} \end{knitrout} @@ -870,7 +836,7 @@ date Qdaily ... \end{verbatim} -The call to open this file, convert the flow to cubic meters per second, and populate the Daily data frame would be: +The call to open this file, convert the discharge to cubic meters per second, and populate the Daily data frame would be: \begin{knitrout} \definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} \begin{alltt} @@ -888,7 +854,7 @@ Daily <- \hlfunctioncall{getDailyDataFromFile}(filePath,fileName, %------------------------------------------------------------ \subsubsection{getSampleDataFromFile} %------------------------------------------------------------ -Similarly to the previous section, getSampleDataFromFile will import a user-generated file and populate the Sample dataframe. The difference between sample data and flow data is that the code requires a third column that contains a remark code, either blank or \texttt{"}\verb@<@\texttt{"}, which will tell the program that the data was 'left-censored' (or, below the detection limit of the sensor). Therefore, the data is required to be in the form: date, remark, value. If multiple constituents are going to be used, the format can be date, remark\_A, value\_A, remark\_b, value\_b, etc... An example of a comma-delimited file would be: +Similarly to the previous section, getSampleDataFromFile will import a user-generated file and populate the Sample dataframe. The difference between sample data and discharge data is that the code requires a third column that contains a remark code, either blank or \texttt{"}\verb@<@\texttt{"}, which will tell the program that the data was 'left-censored' (or, below the detection limit of the sensor). Therefore, the data is required to be in the form: date, remark, value. If multiple constituents are going to be summed, the format can be date, remark\_A, value\_A, remark\_b, value\_b, etc... An example of a comma-delimited file would be: \begin{verbatim} cdate;remarkCode;Nitrate @@ -931,40 +897,40 @@ endDate <- \hlstring{"2013-01-01"} Daily <- \hlfunctioncall{getDVData}(siteNumber, \hlstring{"00060"}, startDate, endDate) \end{alltt} \begin{verbatim} -## There are 4750 data points, and 4750 days. +There are 4750 data points, and 4750 days. \end{verbatim} \begin{alltt} Sample <- \hlfunctioncall{getSampleData}(siteNumber,parameterCd, startDate, endDate) Sample <- \hlfunctioncall{mergeReport}() \end{alltt} \begin{verbatim} -## -## Discharge Record is 4750 days long, which is 13 years -## First day of the discharge record is 2000-01-01 and last day is 2013-01-01 -## The water quality record has 220 samples -## The first sample is from 2000-01-04 and the last sample is from 2012-12-18 -## Discharge: Minimum, mean and maximum 0.00991 4.55 246 -## Concentration: Minimum, mean and maximum 0.2 1.3 2.4 -## Percentage of the sample values that are censored is 0 % + + Discharge Record is 4750 days long, which is 13 years + First day of the discharge record is 2000-01-01 and last day is 2013-01-01 + The water quality record has 220 samples + The first sample is from 2000-01-04 and the last sample is from 2012-12-18 + Discharge: Minimum, mean and maximum 0.00991 4.55 246 + Concentration: Minimum, mean and maximum 0.2 1.3 2.4 + Percentage of the sample values that are censored is 0 % \end{verbatim} \begin{alltt} \hlfunctioncall{head}(Sample) \end{alltt} \begin{verbatim} -## Date ConcLow ConcHigh Uncen ConcAve Julian Month -## 1 2000-01-04 1.59 1.59 1 1.59 54789 1 -## 2 2000-02-03 1.54 1.54 1 1.54 54819 2 -## 3 2000-02-15 1.37 1.37 1 1.37 54831 2 -## 4 2000-02-19 1.24 1.24 1 1.24 54835 2 -## 5 2000-03-23 0.52 0.52 1 0.52 54868 3 -## 6 2000-06-05 1.11 1.11 1 1.11 54942 6 -## Day DecYear MonthSeq SinDY CosDY Q LogQ -## 1 4 2000 1801 0.06005 0.9982 2.747 1.0104 -## 2 34 2000 1802 0.54392 0.8391 3.936 1.3702 -## 3 46 2000 1802 0.70407 0.7101 10.845 2.3837 -## 4 50 2000 1802 0.75113 0.6602 15.518 2.7420 -## 5 83 2000 1803 0.98809 0.1539 56.917 4.0416 -## 6 157 2000 1806 0.43940 -0.8983 1.812 0.5946 + Date ConcLow ConcHigh Uncen ConcAve Julian Month +1 2000-01-04 1.59 1.59 1 1.59 54789 1 +2 2000-02-03 1.54 1.54 1 1.54 54819 2 +3 2000-02-15 1.37 1.37 1 1.37 54831 2 +4 2000-02-19 1.24 1.24 1 1.24 54835 2 +5 2000-03-23 0.52 0.52 1 0.52 54868 3 +6 2000-06-05 1.11 1.11 1 1.11 54942 6 + Day DecYear MonthSeq SinDY CosDY Q LogQ +1 4 2000 1801 0.06005 0.9982 2.747 1.0104 +2 34 2000 1802 0.54392 0.8391 3.936 1.3702 +3 46 2000 1802 0.70407 0.7101 10.845 2.3837 +4 50 2000 1802 0.75113 0.6602 15.518 2.7420 +5 83 2000 1803 0.98809 0.1539 56.917 4.0416 +6 157 2000 1806 0.43940 -0.8983 1.812 0.5946 \end{verbatim} \end{kframe} \end{knitrout} @@ -1027,12 +993,13 @@ To see the raw code for a particular code, type the name of the function: removeDuplicates \end{alltt} \begin{verbatim} -## function(localSample=Sample) { -## Sample1 <- localSample[!duplicated(localSample[c("DecYear","ConcHigh")]),] -## -## return(Sample1) -## } -## <environment: namespace:dataRetrieval> +function (localSample = Sample) +{ + Sample1 <- localSample[!duplicated(localSample[c("DecYear", + "ConcHigh")]), ] + return(Sample1) +} +<environment: namespace:dataRetrieval> \end{verbatim} \end{kframe} \end{knitrout} @@ -1076,7 +1043,7 @@ After installing the package, you need to open the library each time you re-star %------------------------------------------------------------ % latex table generated in R 3.0.1 by xtable 1.7-1 package -% Thu Jun 27 15:21:50 2013 +% Tue Jul 09 17:05:17 2013 \begin{table}[ht] \centering \begin{tabular}{rl} @@ -1142,7 +1109,7 @@ After installing the package, you need to open the library each time you re-star There are 62 columns returned from the water quality portal. % latex table generated in R 3.0.1 by xtable 1.7-1 package -% Thu Jun 27 15:21:50 2013 +% Tue Jul 09 17:05:17 2013 \begin{table}[ht] \centering \begin{tabular}{rl} @@ -1199,7 +1166,7 @@ There are 62 columns returned from the water quality portal. \FloatBarrier % latex table generated in R 3.0.1 by xtable 1.7-1 package -% Thu Jun 27 15:21:50 2013 +% Tue Jul 09 17:05:17 2013 \begin{table}[ht] \centering \begin{tabular}{rl} @@ -1252,8 +1219,8 @@ dailyData <- dailyData[\hlstring{"00003"} == dailyData$statCd,] pCodeINFO <- \hlfunctioncall{getMultipleParameterNames}(dailyData$parameter_cd) \end{alltt} \begin{verbatim} -## Percent complete: -## 20 40 60 80 100 +Percent complete: +20 40 60 80 100 \end{verbatim} \begin{alltt} dailyData <- \hlfunctioncall{merge}(dailyData,pCodeINFO, by=\hlstring{"parameter_cd"}) @@ -1269,18 +1236,18 @@ tableData <- \hlfunctioncall{with}(dailyData, tableData \end{alltt} \begin{verbatim} -## shortName Start -## 1 Temperature, water 2010-10-01 -## 2 Stream flow, mean. daily 1948-01-01 -## 3 Specific conductance 2010-10-01 -## 4 Suspended sediment concentration (SSC) 1980-10-01 -## 5 Suspended sediment discharge 1980-10-01 -## End Count Units -## 1 2012-05-09 529 deg C -## 2 2013-06-26 23918 cfs -## 3 2012-05-09 527 uS/cm @25C -## 4 1991-09-30 3651 mg/l -## 5 1991-09-30 3652 tons/day + shortName Start +1 Temperature, water 2010-10-01 +2 Stream flow, mean. daily 1948-01-01 +3 Specific conductance 2010-10-01 +4 Suspended sediment concentration (SSC) 1980-10-01 +5 Suspended sediment discharge 1980-10-01 + End Count Units +1 2012-05-09 529 deg C +2 2013-07-08 23930 cfs +3 2012-05-09 527 uS/cm @25C +4 1991-09-30 3651 mg/l +5 1991-09-30 3652 tons/day \end{verbatim} \end{kframe} \end{knitrout} diff --git a/vignettes/dataRetrieval.toc b/vignettes/dataRetrieval.toc index 8c170c50cc89676b8136c95e7a76963b1962f309..593cd4b711193cdf22204a59c323b8df3be5b26f 100644 --- a/vignettes/dataRetrieval.toc +++ b/vignettes/dataRetrieval.toc @@ -6,25 +6,25 @@ \contentsline {subsubsection}{\numberline {2.2.1}getSiteFileData}{4}{subsubsection.2.2.1} \contentsline {subsubsection}{\numberline {2.2.2}getDataAvailability}{4}{subsubsection.2.2.2} \contentsline {subsection}{\numberline {2.3}Parameter Information}{6}{subsection.2.3} -\contentsline {subsection}{\numberline {2.4}Daily Values}{7}{subsection.2.4} -\contentsline {subsection}{\numberline {2.5}Unit Values}{10}{subsection.2.5} -\contentsline {subsection}{\numberline {2.6}Water Quality Values}{12}{subsection.2.6} -\contentsline {subsection}{\numberline {2.7}STORET Water Quality Retrievals}{14}{subsection.2.7} -\contentsline {subsection}{\numberline {2.8}URL Construction}{14}{subsection.2.8} -\contentsline {section}{\numberline {3}Data Retrievals Structured For Use In The EGRET Package}{15}{section.3} -\contentsline {subsection}{\numberline {3.1}INFO Data}{15}{subsection.3.1} -\contentsline {subsection}{\numberline {3.2}Daily Data}{15}{subsection.3.2} -\contentsline {subsection}{\numberline {3.3}Sample Data}{16}{subsection.3.3} -\contentsline {subsection}{\numberline {3.4}Censored Values: Summation Explanation}{18}{subsection.3.4} -\contentsline {subsection}{\numberline {3.5}User-Generated Data Files}{19}{subsection.3.5} -\contentsline {subsubsection}{\numberline {3.5.1}getDailyDataFromFile}{19}{subsubsection.3.5.1} -\contentsline {subsubsection}{\numberline {3.5.2}getSampleDataFromFile}{20}{subsubsection.3.5.2} -\contentsline {subsection}{\numberline {3.6}Merge Report}{21}{subsection.3.6} -\contentsline {subsection}{\numberline {3.7}EGRET Plots}{22}{subsection.3.7} -\contentsline {section}{\numberline {A}Getting Started in R}{24}{appendix.A} -\contentsline {subsection}{\numberline {A.1}New to R?}{24}{subsection.A.1} -\contentsline {subsection}{\numberline {A.2}R User: Installing dataRetrieval}{24}{subsection.A.2} -\contentsline {section}{\numberline {B}Columns Names}{25}{appendix.B} -\contentsline {subsection}{\numberline {B.1}INFO dataframe}{25}{subsection.B.1} -\contentsline {subsection}{\numberline {B.2}Water Quality Portal}{27}{subsection.B.2} -\contentsline {section}{\numberline {C}Creating tables in Microsoft from R}{30}{appendix.C} +\contentsline {subsection}{\numberline {2.4}Daily Values}{6}{subsection.2.4} +\contentsline {subsection}{\numberline {2.5}Unit Values}{9}{subsection.2.5} +\contentsline {subsection}{\numberline {2.6}Water Quality Values}{10}{subsection.2.6} +\contentsline {subsection}{\numberline {2.7}STORET Water Quality Retrievals}{12}{subsection.2.7} +\contentsline {subsection}{\numberline {2.8}URL Construction}{12}{subsection.2.8} +\contentsline {section}{\numberline {3}Data Retrievals Structured For Use In The EGRET Package}{13}{section.3} +\contentsline {subsection}{\numberline {3.1}INFO Data}{13}{subsection.3.1} +\contentsline {subsection}{\numberline {3.2}Daily Data}{13}{subsection.3.2} +\contentsline {subsection}{\numberline {3.3}Sample Data}{14}{subsection.3.3} +\contentsline {subsection}{\numberline {3.4}Censored Values: Summation Explanation}{16}{subsection.3.4} +\contentsline {subsection}{\numberline {3.5}User-Generated Data Files}{17}{subsection.3.5} +\contentsline {subsubsection}{\numberline {3.5.1}getDailyDataFromFile}{17}{subsubsection.3.5.1} +\contentsline {subsubsection}{\numberline {3.5.2}getSampleDataFromFile}{18}{subsubsection.3.5.2} +\contentsline {subsection}{\numberline {3.6}Merge Report}{19}{subsection.3.6} +\contentsline {subsection}{\numberline {3.7}EGRET Plots}{20}{subsection.3.7} +\contentsline {section}{\numberline {A}Getting Started in R}{22}{appendix.A} +\contentsline {subsection}{\numberline {A.1}New to R?}{22}{subsection.A.1} +\contentsline {subsection}{\numberline {A.2}R User: Installing dataRetrieval}{22}{subsection.A.2} +\contentsline {section}{\numberline {B}Columns Names}{23}{appendix.B} +\contentsline {subsection}{\numberline {B.1}INFO dataframe}{23}{subsection.B.1} +\contentsline {subsection}{\numberline {B.2}Water Quality Portal}{25}{subsection.B.2} +\contentsline {section}{\numberline {C}Creating tables in Microsoft from R}{28}{appendix.C} diff --git a/vignettes/figure/egretEx.pdf b/vignettes/figure/egretEx.pdf index 3c8c3e65e6a147a23a12fd057806b7dca992991d..efd8a55131e54b8ad4032d0e048ff3ec1ec506e5 100644 Binary files a/vignettes/figure/egretEx.pdf and b/vignettes/figure/egretEx.pdf differ diff --git a/vignettes/figure/getNWISUnitPlot.pdf b/vignettes/figure/getNWISUnitPlot.pdf index 32c39c921584d8f41417cb8a5d449238ebc0c861..710116d8c74a4b14e88149d07f6deed176e0f02b 100644 Binary files a/vignettes/figure/getNWISUnitPlot.pdf and b/vignettes/figure/getNWISUnitPlot.pdf differ diff --git a/vignettes/figure/getNWIStemperaturePlot.pdf b/vignettes/figure/getNWIStemperaturePlot.pdf index 81ea08e49c664108e26b7a2fcadd33e5785d1a09..ea0c96ca3deae2dc8253445af51022e597d89ecf 100644 Binary files a/vignettes/figure/getNWIStemperaturePlot.pdf and b/vignettes/figure/getNWIStemperaturePlot.pdf differ diff --git a/vignettes/figure/getQWtemperaturePlot.pdf b/vignettes/figure/getQWtemperaturePlot.pdf index 568f9df9299d379b62ef43fffeb4b4e0d020061a..8bc9e080752cb35d76d42e58c28e67b563fd854d 100644 Binary files a/vignettes/figure/getQWtemperaturePlot.pdf and b/vignettes/figure/getQWtemperaturePlot.pdf differ