From d292e988499ab32ff42c88c143f1c3288224d3ff Mon Sep 17 00:00:00 2001 From: unknown <ldecicco@usgs.gov> Date: Mon, 15 Dec 2014 16:24:53 -0600 Subject: [PATCH] Improving error handling. --- R/constructNWISURL.r | 2 +- R/importRDB1.r | 285 +++++++++++++++++++++---------------------- R/importWQP.R | 55 +++++---- R/importWaterML1.r | 28 ++--- R/importWaterML2.r | 33 +++-- R/readNWISqw.r | 2 +- R/readNWISunit.r | 1 - R/whatWQPsites.R | 13 +- man/importWQP.Rd | 3 +- man/readNWISpCode.Rd | 2 +- 10 files changed, 212 insertions(+), 212 deletions(-) diff --git a/R/constructNWISURL.r b/R/constructNWISURL.r index c484f598..8cac7a93 100644 --- a/R/constructNWISURL.r +++ b/R/constructNWISURL.r @@ -58,7 +58,7 @@ constructNWISURL <- function(siteNumber,parameterCd="00060",startDate="",endDate } else { badPcode <- parameterCd } - message("The following pCodes may be unavailable:",paste(badPcode,collapse=",")) + warning("The following pCodes may be unavailable:",paste(badPcode,collapse=",")) } } diff --git a/R/importRDB1.r b/R/importRDB1.r index b07f2328..8ea4af3b 100644 --- a/R/importRDB1.r +++ b/R/importRDB1.r @@ -39,6 +39,7 @@ #' comment \tab character \tab Header comments from the RDB file \cr #' } #' @export +#' @import RCurl #' @examples #' siteNumber <- "02177000" #' startDate <- "2012-09-01" @@ -81,184 +82,180 @@ importRDB1 <- function(obs_url, asDateTime=FALSE, qw=FALSE, convertType = TRUE, } if(file.exists(obs_url)){ - doc <- obs_url - fileVecChar <- scan(obs_url, what = "", sep = "\n", quiet=TRUE) - pndIndx<-regexpr("^#", fileVecChar) - hdr <- fileVecChar[pndIndx > 0L] - + headerInfo <- setNames("200", "status") } else { # 400 bad site id # 404 outside date range, wrong pcode # 200 cool - - - retval = tryCatch({ + possibleError = tryCatch({ h <- basicHeaderGatherer() doc <- getURL(obs_url, headerfunction = h$update) - - fileVecChar <- scan(obs_url, what = "", sep = "\n", quiet=TRUE) - pndIndx<-regexpr("^#", fileVecChar) - hdr <- fileVecChar[pndIndx > 0L] - - if(!(as.character(h$value()["Content-Type"]) == "text/plain;charset=UTF-8" | - as.character(h$value()["Content-Type"]) == "text/plain")){ - message(paste("URL caused an error:", obs_url)) - message("Content-Type=",h$value()["Content-Type"]) - } - doc <- textConnection(doc) }, warning = function(w) { - message(paste("URL caused a warning:", obs_url)) - message(w) + warning(w, "with url:", obs_url) }, error = function(e) { - message(paste("URL does not seem to exist:", obs_url)) - message(e) - return(NA) + stop(e, "with url:", obs_url) }) + + headerInfo <- h$value() } - tmp <- read.delim( - doc, - header = TRUE, - quote="\"", - dec=".", - sep='\t', - colClasses=c('character'), - fill = TRUE, - comment.char="#") - dataType <- tmp[1,] - data <- tmp[-1,] + if (file.exists(obs_url) | headerInfo['status'] == "200"){ - if(convertType){ - - #This will break if the 2nd (or greater) site has more columns than the first - #Therefore, using RDB is not recommended for multi-site queries. - #This correction will work if each site has the same number of columns - multiSiteCorrections <- -which(as.logical(apply(data[,1:2], 1, FUN=function(x) all(x %in% as.character(dataType[,1:2]))))) + fileVecChar <- scan(obs_url, what = "", sep = "\n", quiet=TRUE) + pndIndx<-regexpr("^#", fileVecChar) + hdr <- fileVecChar[pndIndx > 0L] - if(length(multiSiteCorrections) > 0){ - data <- data[multiSiteCorrections,] - - findRowsWithHeaderInfo <- as.integer(apply(data[,1:2], 1, FUN = function(x) if(x[1] == names(data)[1] & x[2] == names(data)[2]) 1 else 0)) - findRowsWithHeaderInfo <- which(findRowsWithHeaderInfo == 0) - data <- data[findRowsWithHeaderInfo,] + if(!file.exists(obs_url)){ + doc <- textConnection(doc) } - offsetLibrary <- setNames(c(5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10, 10), - c("EST","EDT","CST","CDT","MST","MDT","PST","PDT","AKST","AKDT","HAST","HST")) - - # The suppressed warning occurs when there is text (such as ice) in the numeric coluym - data[,grep('n$', dataType)] <- suppressWarnings(sapply(data[,grep('n$', dataType)], function(x) as.numeric(x))) - - numberColumns <- grep("_va",names(data)) - data[,numberColumns] <- sapply(data[,numberColumns],as.numeric) + tmp <- read.delim( + doc, + header = TRUE, + quote="\"", + dec=".", + sep='\t', + colClasses=c('character'), + fill = TRUE, + comment.char="#") - intColumns <- grep("_nu",names(data)) - - if("current_rating_nu" %in% names(data)){ - intColumns <- intColumns[!("current_rating_nu" %in% names(data)[intColumns])] - data$current_rating_nu <- gsub(" ", "", data$current_rating_nu) - } - data[,intColumns] <- sapply(data[,intColumns],as.integer) + dataType <- tmp[1,] + data <- tmp[-1,] - if(length(grep('d$', dataType)) > 0){ - if (asDateTime & !qw){ - - if("tz_cd" %in% names(data)){ - offset <- offsetLibrary[data$tz_cd] - } else { - offset <- 0 - } - offset[is.na(offset)] <- 0 - - data[,regexpr('d$', dataType) > 0] <- as.POSIXct(data[,regexpr('d$', dataType) > 0], "%Y-%m-%d %H:%M", tz = "UTC") - data[,regexpr('d$', dataType) > 0] <- data[,regexpr('d$', dataType) > 0] + offset*60*60 - data[,regexpr('d$', dataType) > 0] <- as.POSIXct(data[,regexpr('d$', dataType) > 0]) - - if(tz != ""){ - attr(data[,regexpr('d$', dataType) > 0], "tzone") <- tz - } else { - attr(data[,regexpr('d$', dataType) > 0], "tzone") <- "UTC" - } - - } else if (qw){ - - if("sample_start_time_datum_cd" %in% names(data)){ - timeZoneStartOffset <- offsetLibrary[data$sample_start_time_datum_cd] - timeZoneStartOffset[is.na(timeZoneStartOffset)] <- 0 - } else { - timeZoneStartOffset <- 0 - } + if(convertType){ + + #This will break if the 2nd (or greater) site has more columns than the first + #Therefore, using RDB is not recommended for multi-site queries. + #This correction will work if each site has the same number of columns + multiSiteCorrections <- -which(as.logical(apply(data[,1:2], 1, FUN=function(x) all(x %in% as.character(dataType[,1:2]))))) + + if(length(multiSiteCorrections) > 0){ + data <- data[multiSiteCorrections,] - if("sample_end_time_datum_cd" %in% names(data)){ - timeZoneEndOffset <- offsetLibrary[data$sample_end_time_datum_cd] - timeZoneEndOffset[is.na(timeZoneEndOffset)] <- 0 - composite <- TRUE - } else { - composite <- FALSE - if(any(data$sample_end_dt != "") & any(data$sample_end_dm != "")){ - if(which(data$sample_end_dt != "") == which(data$sample_end_dm != "")){ - composite <- TRUE + findRowsWithHeaderInfo <- as.integer(apply(data[,1:2], 1, FUN = function(x) if(x[1] == names(data)[1] & x[2] == names(data)[2]) 1 else 0)) + findRowsWithHeaderInfo <- which(findRowsWithHeaderInfo == 0) + data <- data[findRowsWithHeaderInfo,] + } + + offsetLibrary <- setNames(c(5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10, 10), + c("EST","EDT","CST","CDT","MST","MDT","PST","PDT","AKST","AKDT","HAST","HST")) + + # The suppressed warning occurs when there is text (such as ice) in the numeric coluym + data[,grep('n$', dataType)] <- suppressWarnings(sapply(data[,grep('n$', dataType)], function(x) as.numeric(x))) + + numberColumns <- grep("_va",names(data)) + data[,numberColumns] <- sapply(data[,numberColumns],as.numeric) + + intColumns <- grep("_nu",names(data)) + + if("current_rating_nu" %in% names(data)){ + intColumns <- intColumns[!("current_rating_nu" %in% names(data)[intColumns])] + data$current_rating_nu <- gsub(" ", "", data$current_rating_nu) + } + data[,intColumns] <- sapply(data[,intColumns],as.integer) + + if(length(grep('d$', dataType)) > 0){ + if (asDateTime & !qw){ + + if("tz_cd" %in% names(data)){ + offset <- offsetLibrary[data$tz_cd] + } else { + offset <- 0 + } + offset[is.na(offset)] <- 0 + + data[,regexpr('d$', dataType) > 0] <- as.POSIXct(data[,regexpr('d$', dataType) > 0], "%Y-%m-%d %H:%M", tz = "UTC") + data[,regexpr('d$', dataType) > 0] <- data[,regexpr('d$', dataType) > 0] + offset*60*60 + data[,regexpr('d$', dataType) > 0] <- as.POSIXct(data[,regexpr('d$', dataType) > 0]) + + if(tz != ""){ + attr(data[,regexpr('d$', dataType) > 0], "tzone") <- tz + } else { + attr(data[,regexpr('d$', dataType) > 0], "tzone") <- "UTC" + } + + } else if (qw){ + + if("sample_start_time_datum_cd" %in% names(data)){ + timeZoneStartOffset <- offsetLibrary[data$sample_start_time_datum_cd] + timeZoneStartOffset[is.na(timeZoneStartOffset)] <- 0 + } else { + timeZoneStartOffset <- 0 + } + + if("sample_end_time_datum_cd" %in% names(data)){ + timeZoneEndOffset <- offsetLibrary[data$sample_end_time_datum_cd] + timeZoneEndOffset[is.na(timeZoneEndOffset)] <- 0 + composite <- TRUE + } else { + composite <- FALSE + if(any(data$sample_end_dt != "") & any(data$sample_end_dm != "")){ + if(which(data$sample_end_dt != "") == which(data$sample_end_dm != "")){ + composite <- TRUE + } } + timeZoneEndOffset <- 0 } - timeZoneEndOffset <- 0 - } - - if("sample_dt" %in% names(data)){ - if(any(data$sample_dt != "")){ - suppressWarnings(data$sample_dt <- as.Date(parse_date_time(data$sample_dt, c("Ymd", "mdY")))) + + if("sample_dt" %in% names(data)){ + if(any(data$sample_dt != "")){ + suppressWarnings(data$sample_dt <- as.Date(parse_date_time(data$sample_dt, c("Ymd", "mdY")))) + } } - } - - if("sample_end_dt" %in% names(data)){ - if(any(data$sample_end_dt != "")){ - suppressWarnings(data$sample_end_dt <- as.Date(parse_date_time(data$sample_end_dt, c("Ymd", "mdY")))) - } - } - - data$startDateTime <- with(data, as.POSIXct(paste(sample_dt, sample_tm),format="%Y-%m-%d %H:%M", tz = "UTC")) - data$startDateTime <- data$startDateTime + timeZoneStartOffset*60*60 - data$startDateTime <- as.POSIXct(data$startDateTime) - - if(tz != ""){ - attr(data$startDateTime, "tzone") <- tz - } else { - attr(data$startDateTime, "tzone") <- "UTC" - } - - if(composite){ - data$endDateTime <- with(data, as.POSIXct(paste(sample_end_dt, sample_end_tm),format="%Y-%m-%d %H:%M", tz = "UTC")) - data$endDateTime <- data$endDateTime + timeZoneEndOffset*60*60 - data$endDateTime <- as.POSIXct(data$endDateTime) + + if("sample_end_dt" %in% names(data)){ + if(any(data$sample_end_dt != "")){ + suppressWarnings(data$sample_end_dt <- as.Date(parse_date_time(data$sample_end_dt, c("Ymd", "mdY")))) + } + } + + data$startDateTime <- with(data, as.POSIXct(paste(sample_dt, sample_tm),format="%Y-%m-%d %H:%M", tz = "UTC")) + data$startDateTime <- data$startDateTime + timeZoneStartOffset*60*60 + data$startDateTime <- as.POSIXct(data$startDateTime) if(tz != ""){ - attr(data$endDateTime, "tzone") <- tz + attr(data$startDateTime, "tzone") <- tz } else { - attr(data$endDateTime, "tzone") <- "UTC" + attr(data$startDateTime, "tzone") <- "UTC" } - } - - } else { - for (i in grep('d$', dataType)){ - if (all(data[,i] != "")){ - data[,i] <- as.character(data[,i]) + + if(composite){ + data$endDateTime <- with(data, as.POSIXct(paste(sample_end_dt, sample_end_tm),format="%Y-%m-%d %H:%M", tz = "UTC")) + data$endDateTime <- data$endDateTime + timeZoneEndOffset*60*60 + data$endDateTime <- as.POSIXct(data$endDateTime) + + if(tz != ""){ + attr(data$endDateTime, "tzone") <- tz + } else { + attr(data$endDateTime, "tzone") <- "UTC" + } + } + + } else { + for (i in grep('d$', dataType)){ + if (all(data[,i] != "")){ + data[,i] <- as.character(data[,i]) + } } } } + + row.names(data) <- NULL } + + names(data) <- make.names(names(data)) + + comment(data) <- hdr + attr(data, "url") <- obs_url + attr(data, "queryTime") <- Sys.time() - row.names(data) <- NULL + return(data) + } else { + stop("Status:", headerInfo['status'], ": ", headerInfo['statusMessage'], "\nFor: ", obs_url) } - - names(data) <- make.names(names(data)) - - comment(data) <- hdr - attr(data, "url") <- obs_url - attr(data, "queryTime") <- Sys.time() - - return(data) } diff --git a/R/importWQP.R b/R/importWQP.R index 89d6d707..ede143b1 100644 --- a/R/importWQP.R +++ b/R/importWQP.R @@ -13,46 +13,65 @@ #' @export #' @seealso \code{\link{readWQPdata}}, \code{\link{readWQPqw}}, \code{\link{whatWQPsites}} #' @import RCurl -#' @import httr #' @import lubridate #' @examples #' # These examples require an internet connection to run -#' \dontrun{ +#' #' ## Examples take longer than 5 seconds: #' rawSampleURL <- constructWQPURL('USGS-01594440','01075', '', '') +#' \dontrun{ #' rawSample <- importWQP(rawSampleURL) #' url2 <- paste0(rawSampleURL,"&zip=yes") #' rawSample2 <- importWQP(url2, TRUE) +#' #' STORETex <- constructWQPURL('WIDNR_WQX-10032762','Specific conductance', '', '') #' STORETdata <- importWQP(STORETex) #' } importWQP <- function(url, zip=FALSE, tz=""){ + h <- basicHeaderGatherer() + if(zip){ - headerInfo <- HEAD(url)$headers + httpHEAD(url, headerfunction = h$update) + temp <- tempfile() options(timeout = 120) - possibleError <- tryCatch( - download.file(url,temp, quiet=TRUE, mode='wb'), - error = function(e) e + possibleError <- tryCatch({ + download.file(url,temp, quiet=TRUE, mode='wb') + }, + error = function(e) { + stop(e, "with url:", url) + } ) } else { - h <- basicHeaderGatherer() - possibleError <- tryCatch( - doc <- getURL(url, headerfunction = h$update), - error = function(e) e + + possibleError <- tryCatch({ + doc <- getURL(url, headerfunction = h$update) + }, + error = function(e) { + stop(e, "with url:", url) + } ) + } - if(!inherits(possibleError, "error")){ + headerInfo <- h$value() + + if(headerInfo['status'] != "200"){ + + if(zip){ + unlink(temp) + } + + stop("Status:", headerInfo['status'], ": ", headerInfo['statusMessage'], "\nFor: ", url) + + } else { if(zip){ doc <- unzip(temp) unlink(temp) - } else { - headerInfo <- h$value() } if(tz != ""){ @@ -130,14 +149,8 @@ importWQP <- function(url, zip=FALSE, tz=""){ return(retval) } else { - warning("No data to retrieve") + return(NA) } - } else { - if(zip){ - unlink(temp) - } - message(e) - } - + } } \ No newline at end of file diff --git a/R/importWaterML1.r b/R/importWaterML1.r index 0bd52435..6e42cdb8 100644 --- a/R/importWaterML1.r +++ b/R/importWaterML1.r @@ -87,29 +87,21 @@ importWaterML1 <- function(obs_url,asDateTime=FALSE, tz=""){ if(file.exists(obs_url)){ - doc <- xmlTreeParse(obs_url, getDTD = FALSE, useInternalNodes = TRUE) + returnedDoc <- obs_url } else { - doc = tryCatch({ + + possibleError <- tryCatch({ h <- basicHeaderGatherer() - returnedDoc <- getURI(obs_url, headerfunction = h$update) - if(h$value()["Content-Type"] == "text/xml;charset=UTF-8"){ - xmlTreeParse(returnedDoc, getDTD = FALSE, useInternalNodes = TRUE) - } else { - message(paste("URL caused an error:", obs_url)) - message("Content-Type=",h$value()["Content-Type"]) - return(NA) - } - + returnedDoc <- getURI(obs_url, headerfunction = h$update) }, warning = function(w) { - message(paste("URL caused a warning:", obs_url)) - message(w) + warning(w, "with url:", obs_url) }, error = function(e) { - message(paste("URL does not seem to exist:", obs_url)) - message(e) - return(NA) + stop(e, "with url:", obs_url) }) } + returnedDoc <- xmlTreeParse(returnedDoc, getDTD = FALSE, useInternalNodes = TRUE) + if(tz != ""){ tz <- match.arg(tz, c("America/New_York","America/Chicago", "America/Denver","America/Los_Angeles", @@ -118,7 +110,7 @@ importWaterML1 <- function(obs_url,asDateTime=FALSE, tz=""){ "America/Phoenix","America/Metlakatla")) } - doc <- xmlRoot(doc) + doc <- xmlRoot(returnedDoc) ns <- xmlNamespaceDefinitions(doc, simplify = TRUE) queryInfo <- xmlToList(xmlRoot(xmlDoc(doc[["queryInfo"]]))) names(queryInfo) <- make.unique(names(queryInfo)) @@ -133,9 +125,9 @@ importWaterML1 <- function(obs_url,asDateTime=FALSE, tz=""){ if(0 == length(timeSeries)){ - message("Returning an empty dataset") df <- data.frame() attr(df, "queryInfo") <- queryInfo + attr(df, "url") <- obs_url return(df) } diff --git a/R/importWaterML2.r b/R/importWaterML2.r index 3d4c5baf..0103fbe1 100644 --- a/R/importWaterML2.r +++ b/R/importWaterML2.r @@ -43,30 +43,23 @@ importWaterML2 <- function(obs_url, asDateTime=FALSE, tz=""){ if(file.exists(obs_url)){ - doc <- xmlTreeParse(obs_url, getDTD = FALSE, useInternalNodes = TRUE) + returnedDoc <- obs_url } else { - doc = tryCatch({ - h <- basicHeaderGatherer() - returnedDoc <- getURL(obs_url, headerfunction = h$update) - if(h$value()["Content-Type"] == "text/xml;charset=UTF-8" | - h$value()["Content-Type"] == "text/xml; subtype=gml/3.1.1;charset=UTF-8"){ - xmlTreeParse(returnedDoc, getDTD = FALSE, useInternalNodes = TRUE) - } else { - message(paste("URL caused an error:", obs_url)) - message("Content-Type=",h$value()["Content-Type"]) - return(NA) - } + h <- basicHeaderGatherer() + + possibleError = tryCatch({ + + returnedDoc <- getURL(obs_url, headerfunction = h$update) }, warning = function(w) { - message(paste("URL caused a warning:", obs_url)) - message(w) + warning(w, "with url:", obs_url) }, error = function(e) { - message(paste("URL does not seem to exist:", obs_url)) - message(e) - return(NA) + stop(e, "with url:", obs_url) }) } + doc <- xmlTreeParse(returnedDoc, getDTD = FALSE, useInternalNodes = TRUE) + if(tz != ""){ tz <- match.arg(tz, c("America/New_York","America/Chicago", "America/Denver","America/Los_Angeles", @@ -81,9 +74,11 @@ importWaterML2 <- function(obs_url, asDateTime=FALSE, tz=""){ timeSeries <- xpathApply(doc, "//wml2:Collection", namespaces = ns) - + if(0 == length(timeSeries)){ - stop("No data to return for URL:", obs_url) + df <- data.frame() + attr(df, "url") <- obs_url + return(df) } for (i in 1:length(timeSeries)){ diff --git a/R/readNWISqw.r b/R/readNWISqw.r index 7af883a3..6787dd62 100644 --- a/R/readNWISqw.r +++ b/R/readNWISqw.r @@ -81,7 +81,7 @@ readNWISqw <- function (siteNumbers,parameterCd,startDate="",endDate="", } if(reshape & !expanded){ - message("Reshape can only be used with expanded data. Reshape request will be ignored.") + warning("Reshape can only be used with expanded data. Reshape request will be ignored.") } siteInfo <- readNWISsite(siteNumbers) diff --git a/R/readNWISunit.r b/R/readNWISunit.r index 99203ea4..9acf1dc3 100644 --- a/R/readNWISunit.r +++ b/R/readNWISunit.r @@ -54,7 +54,6 @@ #' #' timeZoneChange <- readNWISuv(c('04024430','04024000'),parameterCd, #' "2013-11-03","2013-11-03") -#' firstSite <- timeZoneChange[timeZoneChange$site_no == '04024430',] readNWISuv <- function (siteNumbers,parameterCd,startDate="",endDate="", tz=""){ url <- constructNWISURL(siteNumbers,parameterCd,startDate,endDate,"uv",format="xml") diff --git a/R/whatWQPsites.R b/R/whatWQPsites.R index e370e415..35687f1f 100644 --- a/R/whatWQPsites.R +++ b/R/whatWQPsites.R @@ -80,11 +80,14 @@ whatWQPsites <- function(...){ doc <- getURL(urlCall, headerfunction = h$update) }, warning = function(w) { - message(paste("URL caused a warning:", urlCall)) - message(w) - }, error = function(e) e) + warning(w, "with url:", urlCall ) + }, error = function(e) { + stop(e, "with url:", urlCall) + }) - if(!inherits(possibleError, "error")){ + headerInfo <- h$value() + + if(headerInfo['status'] == "200"){ numToBeReturned <- as.numeric(h$value()["Total-Site-Count"]) @@ -115,6 +118,6 @@ whatWQPsites <- function(...){ return(NA) } } else { - message(e) + stop("Status:", headerInfo['status'], ": ", headerInfo['statusMessage'], "\nFor: ", urlCall) } } diff --git a/man/importWQP.Rd b/man/importWQP.Rd index c5880afd..e0e9e915 100644 --- a/man/importWQP.Rd +++ b/man/importWQP.Rd @@ -24,12 +24,13 @@ Imports data from the Water Quality Portal based on a specified url. } \examples{ # These examples require an internet connection to run -\dontrun{ + ## Examples take longer than 5 seconds: rawSampleURL <- constructWQPURL('USGS-01594440','01075', '', '') rawSample <- importWQP(rawSampleURL) url2 <- paste0(rawSampleURL,"&zip=yes") rawSample2 <- importWQP(url2, TRUE) +\dontrun{ STORETex <- constructWQPURL('WIDNR_WQX-10032762','Specific conductance', '', '') STORETdata <- importWQP(STORETex) } diff --git a/man/readNWISpCode.Rd b/man/readNWISpCode.Rd index 85542045..e1147982 100644 --- a/man/readNWISpCode.Rd +++ b/man/readNWISpCode.Rd @@ -7,7 +7,7 @@ readNWISpCode(parameterCd) } \arguments{ \item{parameterCd}{character of USGS parameter codes (or multiple parameter codes). These are 5 digit number codes -that can be found here: \link{http://help.waterdata.usgs.gov/codes-and-parameters/parameters}. To get a +that can be found here: \url{http://help.waterdata.usgs.gov/codes-and-parameters/parameters}. To get a complete list of all current parameter codes in the USGS, use "all" as the input.} } \value{ -- GitLab