Commit f3905a97 authored by Laura A DeCicco's avatar Laura A DeCicco Committed by GitHub
Browse files

Merge pull request #258 from ldecicco-USGS/master

Bug fix and added message
parents edeff72b 68d178aa
Package: dataRetrieval
Type: Package
Title: Retrieval Functions for USGS and EPA Hydrologic and Water Quality Data
Version: 2.5.10
Date: 2016-06-14
Version: 2.5.11
Date: 2016-08-02
Authors@R: c( person("Robert", "Hirsch", role = c("aut"),
email = "rhirsch@usgs.gov"),
person("Laura", "DeCicco", role = c("aut","cre"),
......
......@@ -35,6 +35,7 @@ export(whatNWISdata)
export(whatNWISsites)
export(whatWQPsites)
export(zeroPad)
import(lubridate)
import(stats)
import(utils)
importFrom(XML,xmlAttrs)
......
......@@ -12,7 +12,8 @@
#' retrieval for the latest possible record.
#' @param statCd string or vector USGS statistic code only used for daily value service. This is usually 5 digits. Daily mean (00003) is the default.
#' @param service string USGS service to call. Possible values are "dv" (daily values), "uv" (unit/instantaneous values),
#' "qw" (water quality data), "gwlevels" (groundwater),and "rating" (rating curve), "peak", "meas" (discrete streamflow measurements).
#' "qw" (water quality data), "gwlevels" (groundwater),and "rating" (rating curve), "peak", "meas" (discrete streamflow measurements),
#' "stat" (statistics web service BETA).
#' @param format string, can be "tsv" or "xml", and is only applicable for daily and unit value requests. "tsv" returns results faster, but there is a possiblitiy that an incomplete file is returned without warning. XML is slower,
#' but will offer a warning if the file was incomplete (for example, if there was a momentary problem with the internet connection). It is possible to safely use the "tsv" option,
#' but the user must carefully check the results to see if the data returns matches what is expected. The default is therefore "xml".
......@@ -22,7 +23,7 @@
#' Note that daily provides statistics for each calendar day over the specified range of water years, i.e. no more than 366
#' data points will be returned for each site/parameter. Use readNWISdata or readNWISdv for daily averages.
#' Also note that 'annual' returns statistics for the calendar year. Use readNWISdata for water years. Monthly and yearly
#' provide statistics for each month and year within the range indivually.
#' provide statistics for each month and year within the range individually.
#' @param statType character Only used for statistics service requests. Type(s) of statistics to output for daily values. Default is mean, which is the only
#' option for monthly and yearly report types. See the statistics service documentation
#' at \url{http://waterservices.usgs.gov/rest/Statistics-Service.html#statType} for a full list of codes.
......@@ -152,6 +153,10 @@ constructNWISURL <- function(siteNumber,parameterCd="00060",startDate="",endDate
},
stat = { #for statistics service
message("Please be aware the NWIS data service feeding this function is in BETA.\n
Data formatting could be changed at any time, and is not guaranteed")
#make sure only statTypes allowed for the statReportType are being requested
if(!grepl("(?i)daily",statReportType) && !all(grepl("(?i)mean",statType)) && !all(grepl("(?i)all",statType))){
stop("Monthly and annual report types can only provide means")
......
......@@ -117,10 +117,15 @@ importRDB1 <- function(obs_url, asDateTime=TRUE, convertType = TRUE, tz=""){
if(convertType){
readr.data <- suppressWarnings(read_delim(doc, skip = (meta.rows+2),delim="\t",col_names = FALSE))
#defaults to time in seconds in readr 0.2.2.9??
if(data.class(readr.data$X4)=="hms"){
td <- seconds_to_period(readr.data$X4[!is.na(readr.data$X4)])
readr.data$X4[!is.na(readr.data$X4)] <- sprintf('%02d:%02d:%02d', hour(td), minute(td), second(td))
if(length(grep("hms",lapply(readr.data, class))) > 0){
colHMS <- grep("hms",lapply(readr.data, class))
colList <- list(rep("c", length(colHMS)))
names(colList) <- paste0("X",colHMS)
readr.data <- suppressWarnings(read_delim(doc, skip = (meta.rows+2),delim="\t",
col_names = FALSE,
col_types = colList))
}
} else {
readr.data <- read_delim(doc, skip = (meta.rows+2),delim="\t",col_names = FALSE, col_types = cols(.default = "c"))
}
......@@ -151,15 +156,28 @@ importRDB1 <- function(obs_url, asDateTime=TRUE, convertType = TRUE, tz=""){
}
badCols <- attr(readr.data, "problems")[["col"]]
readr.data <- as.data.frame(readr.data)
if(length(badCols) > 0){
readr.data <- fixErrors(readr.data, readr.data.char, "no trailing characters", as.numeric)
readr.data <- fixErrors(readr.data, readr.data.char, "date like", parse_date_time, c("%Y-%m-%d %H:%M:%S","%Y-%m-%d","%Y"))
}
if(length(grep("_va", names(readr.data))) > 0 &&
any(lapply(readr.data[,grep("_va", names(readr.data))], class) %in% "integer")){
#note... if we simply convert any _va to numeric...we lose some QW censoring information from some formats
vaCols <- grep("_va", names(readr.data))
if(length(vaCols) > 1){
vaCols <- vaCols[lapply(readr.data[,vaCols], class) %in% "integer"]
}
readr.data[,vaCols] <- sapply(readr.data[,vaCols], as.numeric)
}
comment(readr.data) <- readr.meta
problems.orig <- problems(readr.data)
readr.data <- as.data.frame(readr.data)
if (asDateTime & convertType){
......@@ -170,7 +188,7 @@ importRDB1 <- function(obs_url, asDateTime=TRUE, convertType = TRUE, tz=""){
if(all(c(paste0(i,"_dt"),paste0(i,"_tm")) %in% header.names)){
varname <- paste0(i,"_dateTime")
varval <- parse_date_time(paste(readr.data[,paste0(i,"_dt")],readr.data[,paste0(i,"_tm")]), c("%Y-%m-%d %H:%M:%S","%Y-%m-%d %H:%M"), tz = "UTC")
varval <- suppressWarnings(parse_date_time(paste(readr.data[,paste0(i,"_dt")],readr.data[,paste0(i,"_tm")]), c("%Y-%m-%d %H:%M:%S","%Y-%m-%d %H:%M"), tz = "UTC"))
if(!all(is.na(varval))){
readr.data[,varname] <- varval
......@@ -291,7 +309,11 @@ fixErrors <- function(readr.data, readr.data.char, message.text, FUN, ...){
index.col <- as.integer(gsub("X","",unique.bad.cols))
for(i in index.col){
readr.data[,i] <- FUN(readr.data.char[[i]], ...)
readr.data[,i] <- tryCatch({
FUN(readr.data.char[[i]], ...)
}, warning=function(cond){
readr.data.char[[i]]
})
attr(readr.data, "problems") <- attr(readr.data, "problems")[attr(readr.data, "problems")[["col"]] != paste0("X",i),]
}
}
......
......@@ -188,6 +188,12 @@ readNWISdata <- function(service="dv", ..., asDateTime=TRUE,convertType=TRUE){
format.default <- "rdb"
}
if(service == "stat"){
message("Please be aware the NWIS data service feeding this function is in BETA.\n
Data formatting could be changed at any time, and is not guaranteed")
}
if(!("format" %in% names(values))){
values["format"] <- format.default
}
......
......@@ -306,7 +306,7 @@ readNWISmeas <- function (siteNumbers,startDate="",endDate="", tz="", expanded=F
if(convertType){
data$measurement_dateTime <- data$measurement_dt
data$measurement_dt <- as.Date(data$measurement_dateTime)
data$measurement_dt <- suppressWarnings(as.Date(data$measurement_dateTime))
data$measurement_tm <- strftime(data$measurement_dateTime, "%H:%M")
data$measurement_tm[is.na(data$tz_cd_reported)] <- ""
indexDT <- which("measurement_dt" == names(data))
......@@ -465,6 +465,7 @@ readNWISgwl <- function (siteNumbers,startDate="",endDate="", convertType = TRUE
#' }
readNWISstat <- function(siteNumbers, parameterCd, startDate = "", endDate = "", convertType = TRUE,
statReportType = "daily", statType = "mean"){
#check for NAs in site numbers
if(any(is.na(siteNumbers))){
siteNumbers <- siteNumbers[!is.na(siteNumbers)]
......
......@@ -21,7 +21,8 @@ retrieval for the earliest possible record.}
retrieval for the latest possible record.}
\item{service}{string USGS service to call. Possible values are "dv" (daily values), "uv" (unit/instantaneous values),
"qw" (water quality data), "gwlevels" (groundwater),and "rating" (rating curve), "peak", "meas" (discrete streamflow measurements).}
"qw" (water quality data), "gwlevels" (groundwater),and "rating" (rating curve), "peak", "meas" (discrete streamflow measurements),
"stat" (statistics web service BETA).}
\item{statCd}{string or vector USGS statistic code only used for daily value service. This is usually 5 digits. Daily mean (00003) is the default.}
......@@ -37,7 +38,7 @@ but the user must carefully check the results to see if the data returns matches
Note that daily provides statistics for each calendar day over the specified range of water years, i.e. no more than 366
data points will be returned for each site/parameter. Use readNWISdata or readNWISdv for daily averages.
Also note that 'annual' returns statistics for the calendar year. Use readNWISdata for water years. Monthly and yearly
provide statistics for each month and year within the range indivually.}
provide statistics for each month and year within the range individually.}
\item{statType}{character Only used for statistics service requests. Type(s) of statistics to output for daily values. Default is mean, which is the only
option for monthly and yearly report types. See the statistics service documentation
......
......@@ -56,10 +56,11 @@ test_that("General WQP retrievals working", {
# characteristicName=nameToUse, querySummary = TRUE)
# expect_is(pHDataExpanded2, 'list')
startDate <- as.Date("2013-01-01")
nutrientDaneCounty <- readWQPdata(countycode="US:55:025",startDate=startDate,
characteristicType="Nutrient")
expect_is(nutrientDaneCounty$ActivityStartDateTime, 'POSIXct')
# Super slow:
# startDate <- as.Date("2013-01-01")
# nutrientDaneCounty <- readWQPdata(countycode="US:55:025",startDate=startDate,
# characteristicType="Nutrient")
# expect_is(nutrientDaneCounty$ActivityStartDateTime, 'POSIXct')
})
test_that("WQP head query retrievals working", {
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment