diff --git a/R/getWebServiceData.R b/R/getWebServiceData.R index 8977039d19fa0adb8ef9d0b8ce3f556a12a95240..e016adcb3526ee584151d6c394186de4a3098297 100644 --- a/R/getWebServiceData.R +++ b/R/getWebServiceData.R @@ -4,6 +4,7 @@ #' \code{\link[RCurl]{getURI}} with more informative error messages. #' #' @param obs_url character containing the url for the retrieval +#' @param \dots information to pass to header request #' @import RCurl #' @export #' @return raw data from web services @@ -17,10 +18,10 @@ #' \dontrun{ #' rawData <- getWebServiceData(obs_url) #' } -getWebServiceData <- function(obs_url){ +getWebServiceData <- function(obs_url, ...){ possibleError <- tryCatch({ h <- basicHeaderGatherer() - returnedDoc <- getURI(obs_url, headerfunction = h$update, encoding='gzip') + returnedDoc <- getURI(obs_url, headerfunction = h$update, ...) }, warning = function(w) { warning(w, "with url:", obs_url) }, error = function(e) { diff --git a/R/importRDB1.r b/R/importRDB1.r index 57f7561ce0b9e507d15c12529a0c7b07ad4e2300..d584f096ffb72bc6053d95b10d51525eba0c43a1 100644 --- a/R/importRDB1.r +++ b/R/importRDB1.r @@ -98,7 +98,7 @@ importRDB1 <- function(obs_url, asDateTime=TRUE, convertType = TRUE, tz=""){ if(file.exists(obs_url)){ doc <- obs_url } else { - doc <- getWebServiceData(obs_url) + doc <- getWebServiceData(obs_url, encoding='gzip') if("warn" %in% names(attr(doc,"header"))){ data <- data.frame() attr(data, "header") <- attr(doc,"header") diff --git a/R/importWQP.R b/R/importWQP.R index ebff7ebd42f5ef932e9ff8084ce545663f899f41..98ef7e26a83f3c631415f853de4dc623f902916f 100644 --- a/R/importWQP.R +++ b/R/importWQP.R @@ -50,107 +50,106 @@ importWQP <- function(obs_url, zip=FALSE, tz=""){ "America/Jamaica","America/Managua", "America/Phoenix","America/Metlakatla")) } + if(!file.exists(obs_url)){ - h <- basicHeaderGatherer() - httpHEAD(obs_url, headerfunction = h$update) - - headerInfo <- h$value() - - if(headerInfo['status'] == "200"){ + if(zip){ + temp <- tempfile() + options(timeout = 120) + + possibleError <- tryCatch({ + suppressWarnings(download.file(obs_url,temp, quiet=TRUE, mode='wb')) + }, + error = function(e) { + stop(e, "with url:", obs_url) + } + ) + doc <- temp + + } else { + doc <- getWebServiceData(obs_url) + headerInfo <- attr(doc, "headerInfo") numToBeReturned <- as.numeric(headerInfo["Total-Result-Count"]) + sitesToBeReturned <- as.numeric(headerInfo["Total-Site-Count"]) + + totalReturned <- sum(numToBeReturned, sitesToBeReturned,na.rm = TRUE) - if(is.na(numToBeReturned) | numToBeReturned == 0){ + if(is.na(totalReturned) | totalReturned == 0){ for(i in grep("Warning",names(headerInfo))){ warning(headerInfo[i]) } return(data.frame()) - } - - if(zip){ - temp <- tempfile() - options(timeout = 120) - - possibleError <- tryCatch({ - suppressWarnings(download.file(obs_url,temp, quiet=TRUE, mode='wb')) - }, - error = function(e) { - stop(e, "with url:", obs_url) - } - ) - doc <- unzip(temp) - retval <- read_delim(doc, - col_types = cols(`ActivityStartTime/Time` = col_character(), - `ActivityEndTime/Time` = col_character(), - USGSPCode = col_character(), - ResultCommentText=col_character(), - `ActivityDepthHeightMeasure/MeasureValue` = col_number(), - `DetectionQuantitationLimitMeasure/MeasureValue` = col_number(), - ResultMeasureValue = col_number()), - quote = "", delim = "\t") - unlink(doc) - } else { - retval <- read_delim(obs_url, - col_types = cols(`ActivityStartTime/Time` = col_character(), - `ActivityEndTime/Time` = col_character(), - USGSPCode = col_character(), - ResultCommentText=col_character(), - `ActivityDepthHeightMeasure/MeasureValue` = col_number(), - `DetectionQuantitationLimitMeasure/MeasureValue` = col_number(), - ResultMeasureValue = col_number()), - quote = "", delim = "\t") - } - } else { - stop("Status:", headerInfo['status'], ": ", headerInfo['statusMessage'], "\nFor: ", obs_url) + } } + + } else { + doc <- obs_url + } + + if(zip){ + doc <- unzip(doc) + retval <- suppressWarnings(read_delim(doc, + col_types = cols(`ActivityStartTime/Time` = col_character(), + `ActivityEndTime/Time` = col_character(), + USGSPCode = col_character(), + ResultCommentText=col_character(), + `ActivityDepthHeightMeasure/MeasureValue` = col_number(), + `DetectionQuantitationLimitMeasure/MeasureValue` = col_number(), + ResultMeasureValue = col_number(), + `WellDepthMeasure/MeasureValue` = col_number(), + `WellHoleDepthMeasure/MeasureValue` = col_number(), + `HUCEightDigitCode` = col_character()), + quote = "", delim = "\t")) + unlink(doc) + } else { + retval <- suppressWarnings(read_delim(doc, + col_types = cols(`ActivityStartTime/Time` = col_character(), + `ActivityEndTime/Time` = col_character(), + USGSPCode = col_character(), + ResultCommentText=col_character(), + `ActivityDepthHeightMeasure/MeasureValue` = col_number(), + `DetectionQuantitationLimitMeasure/MeasureValue` = col_number(), + ResultMeasureValue = col_number(), + `WellDepthMeasure/MeasureValue` = col_number(), + `WellHoleDepthMeasure/MeasureValue` = col_number(), + `HUCEightDigitCode` = col_character()), + quote = "", delim = "\t")) + } + if(!file.exists(obs_url) & !zip){ actualNumReturned <- nrow(retval) - if(actualNumReturned != numToBeReturned) warning(numToBeReturned, " sample results were expected, ", actualNumReturned, " were returned") - } else { + if(actualNumReturned != numToBeReturned & actualNumReturned != sitesToBeReturned){ + warning(totalReturned, " sample results were expected, ", actualNumReturned, " were returned") + } + } + + if(length(grep("ActivityStartTime",names(retval))) > 0){ - if(zip){ - doc <- unzip(obs_url) - retval <- read_delim(obs_url, - col_types = cols(`ActivityStartTime/Time` = col_character(), - `ActivityEndTime/Time` = col_character(), - USGSPCode = col_character(), - ResultCommentText=col_character()), - quote = "", delim = "\t") - unlink(doc) - } else { - retval <- read_delim(obs_url, - col_types = cols(`ActivityStartTime/Time` = col_character(), - `ActivityEndTime/Time` = col_character(), - USGSPCode = col_character(), - ResultCommentText=col_character()), - quote = "", delim = "\t") - } + + offsetLibrary <- data.frame(offset=c(5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10, 10, 0, 0), + code=c("EST","EDT","CST","CDT","MST","MDT","PST","PDT","AKST","AKDT","HAST","HST","", NA), + stringsAsFactors = FALSE) + + retval <- left_join(retval, offsetLibrary, by=c("ActivityStartTime/TimeZoneCode"="code")) + names(retval)[names(retval) == "offset"] <- "timeZoneStart" + retval <- left_join(retval, offsetLibrary, by=c("ActivityEndTime/TimeZoneCode"="code")) + names(retval)[names(retval) == "offset"] <- "timeZoneEnd" + + dateCols <- c("ActivityStartDate","ActivityEndDate","AnalysisStartDate","PreparationStartDate") + + retval <- suppressWarnings(mutate_each_(retval, ~as.Date(parse_date_time(., c("Ymd", "mdY"))), dateCols)) + + retval <- mutate_(retval, ActivityStartDateTime=~paste(ActivityStartDate, `ActivityStartTime/Time`)) + retval <- mutate_(retval, ActivityEndDateTime=~paste(ActivityEndDate, `ActivityEndTime/Time`)) + retval <- mutate_(retval, ActivityStartDateTime=~fast_strptime(ActivityStartDateTime, '%Y-%m-%d %H:%M:%S')+60*60*timeZoneStart) + retval <- mutate_(retval, ActivityEndDateTime=~fast_strptime(ActivityEndDateTime, '%Y-%m-%d %H:%M:%S')+60*60*timeZoneStart) + + retval <- select_(retval, ~-timeZoneEnd, ~-timeZoneStart) } - - offsetLibrary <- data.frame(offset=c(5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10, 10, 0, 0), - code=c("EST","EDT","CST","CDT","MST","MDT","PST","PDT","AKST","AKDT","HAST","HST","", NA), - stringsAsFactors = FALSE) - - retval <- left_join(retval, offsetLibrary, by=c("ActivityStartTime/TimeZoneCode"="code")) - names(retval)[names(retval) == "offset"] <- "timeZoneStart" - retval <- left_join(retval, offsetLibrary, by=c("ActivityEndTime/TimeZoneCode"="code")) - names(retval)[names(retval) == "offset"] <- "timeZoneEnd" - - dateCols <- c("ActivityStartDate","ActivityEndDate","AnalysisStartDate","PreparationStartDate") - - retval <- suppressWarnings(mutate_each_(retval, ~as.Date(parse_date_time(., c("Ymd", "mdY"))), dateCols)) - - retval <- mutate_(retval, ActivityStartDateTime=~paste(ActivityStartDate, `ActivityStartTime/Time`)) - retval <- mutate_(retval, ActivityEndDateTime=~paste(ActivityEndDate, `ActivityEndTime/Time`)) - - retval <- mutate_(retval, ActivityStartDateTime=~fast_strptime(ActivityStartDateTime, '%Y-%m-%d %H:%M:%S')+60*60*timeZoneStart) - retval <- mutate_(retval, ActivityEndDateTime=~fast_strptime(ActivityEndDateTime, '%Y-%m-%d %H:%M:%S')+60*60*timeZoneStart) - - retval <- select_(retval, ~-timeZoneEnd, ~-timeZoneStart) names(retval)[grep("/",names(retval))] <- gsub("/",".",names(retval)[grep("/",names(retval))]) return(retval) diff --git a/R/importWaterML1.r b/R/importWaterML1.r index 1765808f116df718f221dc873cc48ba01719987f..8faa75d7e457dcb49e874862fc45e111a6869add 100644 --- a/R/importWaterML1.r +++ b/R/importWaterML1.r @@ -100,7 +100,7 @@ importWaterML1 <- function(obs_url,asDateTime=FALSE, tz=""){ if(file.exists(obs_url)){ rawData <- obs_url } else { - rawData <- getWebServiceData(obs_url) + rawData <- getWebServiceData(obs_url, encoding='gzip') } returnedDoc <- xmlTreeParse(rawData, getDTD = FALSE, useInternalNodes = TRUE) diff --git a/R/whatNWISsites.R b/R/whatNWISsites.R index c7b864aed875a4466876972aba4488f8c8fb8935..79efc7c6793aa8dad6daeb0ea4c164c7df3b11e4 100644 --- a/R/whatNWISsites.R +++ b/R/whatNWISsites.R @@ -41,7 +41,7 @@ whatNWISsites <- function(...){ urlCall <- drURL('waterservices',Access=pkg.env$access, format="mapper", arg.list = values) - rawData <- getWebServiceData(urlCall) + rawData <- getWebServiceData(urlCall, encoding='gzip') doc <- xmlTreeParse(rawData, getDTD = FALSE, useInternalNodes = TRUE) diff --git a/R/whatWQPsites.R b/R/whatWQPsites.R index ab97431a95983592e9b967fe55f92d1481cf633e..b7738261a1a602e11d1cb762659d715321e355ac 100644 --- a/R/whatWQPsites.R +++ b/R/whatWQPsites.R @@ -98,41 +98,10 @@ whatWQPsites <- function(...){ urlCall, "&mimeType=tsv&sorted=no",sep = "") - doc <- getWebServiceData(urlCall) - headerInfo <- attr(doc, "headerInfo") - - numToBeReturned <- as.numeric(headerInfo["Total-Site-Count"]) + retval <- importWQP(urlCall) + retval$queryTime <- Sys.time() + + return(retval) - if (!is.na(numToBeReturned) & numToBeReturned != 0){ - - retval <- read.delim(textConnection(doc), header = TRUE, - dec=".", sep='\t', quote="", - colClasses=c('character'), - fill = TRUE) - actualNumReturned <- nrow(retval) - - if(actualNumReturned != numToBeReturned) warning(numToBeReturned, " sites were expected, ", actualNumReturned, " were returned") - - if("LatitudeMeasure" %in% names(retval)){ - retval$LatitudeMeasure <- as.numeric(retval$LatitudeMeasure) - } - - if("LongitudeMeasure" %in% names(retval)){ - retval$LongitudeMeasure <- as.numeric(retval$LongitudeMeasure) - } - - retval$queryTime <- Sys.time() - - return(retval) - - } else { - if(headerInfo['Total-Site-Count'] == "0"){ - warning("No data returned") - } - - for(i in grep("Warning",names(headerInfo))){ - warning(headerInfo[i]) - } - } } diff --git a/man/getWebServiceData.Rd b/man/getWebServiceData.Rd index fba5749ba94aedc26ec593446b0284e7fa56c14c..9d049704568d141704066431276f41ce212813f6 100644 --- a/man/getWebServiceData.Rd +++ b/man/getWebServiceData.Rd @@ -4,10 +4,12 @@ \alias{getWebServiceData} \title{Function to return data from web services} \usage{ -getWebServiceData(obs_url) +getWebServiceData(obs_url, ...) } \arguments{ \item{obs_url}{character containing the url for the retrieval} + +\item{\dots}{information to pass to header request} } \value{ raw data from web services