Commit 65e66a60 authored by Laura A DeCicco's avatar Laura A DeCicco
Browse files

Merge pull request #179 from ldecicco-USGS/master

Second shot at httr
parents 83dc1c6b c103a720
Package: dataRetrieval
Type: Package
Title: Retrieval Functions for USGS and EPA Hydrologic and Water Quality Data
Version: 2.4.9
Date: 2016-02-12
Version: 2.5.0
Date: 2016-02-24
Authors@R: c( person("Robert", "Hirsch", role = c("aut"),
email = "rhirsch@usgs.gov"),
person("Laura", "DeCicco", role = c("aut","cre"),
......@@ -28,7 +28,8 @@ Depends:
R (>= 3.0)
Imports:
XML,
RCurl,
httr,
curl,
reshape2,
lubridate,
stats,
......
......@@ -33,15 +33,11 @@ export(whatWQPsites)
export(zeroPad)
import(stats)
import(utils)
importFrom(RCurl,basicHeaderGatherer)
importFrom(RCurl,curlOptions)
importFrom(RCurl,curlVersion)
importFrom(RCurl,getBinaryURL)
importFrom(RCurl,getURI)
importFrom(XML,xmlAttrs)
importFrom(XML,xmlDoc)
importFrom(XML,xmlName)
importFrom(XML,xmlNamespaceDefinitions)
importFrom(XML,xmlParse)
importFrom(XML,xmlRoot)
importFrom(XML,xmlSize)
importFrom(XML,xmlToList)
......@@ -49,11 +45,20 @@ importFrom(XML,xmlTreeParse)
importFrom(XML,xmlValue)
importFrom(XML,xpathApply)
importFrom(XML,xpathSApply)
importFrom(curl,curl_version)
importFrom(dplyr,full_join)
importFrom(dplyr,left_join)
importFrom(dplyr,mutate_)
importFrom(dplyr,mutate_each_)
importFrom(dplyr,rbind_all)
importFrom(dplyr,select_)
importFrom(httr,GET)
importFrom(httr,content)
importFrom(httr,headers)
importFrom(httr,status_code)
importFrom(httr,stop_for_status)
importFrom(httr,user_agent)
importFrom(httr,write_disk)
importFrom(lubridate,fast_strptime)
importFrom(lubridate,parse_date_time)
importFrom(readr,col_character)
......
......@@ -5,9 +5,13 @@
#'
#' @param obs_url character containing the url for the retrieval
#' @param \dots information to pass to header request
#' @importFrom RCurl basicHeaderGatherer
#' @importFrom RCurl getURI
#' @importFrom RCurl curlVersion
#' @importFrom httr GET
#' @importFrom httr user_agent
#' @importFrom httr stop_for_status
#' @importFrom httr status_code
#' @importFrom httr headers
#' @importFrom httr content
#' @importFrom curl curl_version
#' @export
#' @return raw data from web services
#' @examples
......@@ -22,36 +26,51 @@
#' }
getWebServiceData <- function(obs_url, ...){
possibleError <- tryCatch({
h <- basicHeaderGatherer()
returnedDoc <- getURI(obs_url, headerfunction = h$update,
useragent = default_ua(), ...)
}, warning = function(w) {
warning(w, "with url:", obs_url)
}, error = function(e) {
stop(e, "with url:", obs_url)
})
returnedList <- GET(obs_url, ..., user_agent(default_ua()))
headerInfo <- h$value()
if(headerInfo['status'] != "200"){
stop("Status:", headerInfo['status'], ": ", headerInfo['statusMessage'], "\nFor: ", obs_url)
} else {
if(grepl("No sites/data found using the selection criteria specified", returnedDoc)){
message(returnedDoc)
headerInfo['warn'] <- returnedDoc
if(status_code(returnedList) != 200){
message("For: ", obs_url,"\n")
stop_for_status(returnedList)
} else {
headerInfo <- headers(returnedList)
if(headerInfo$`content-type` == "text/tab-separated-values;charset=UTF-8"){
returnedDoc <- content(returnedList, type="text",encoding = "UTF-8")
} else if (headerInfo$`content-type` == "text/xml;charset=UTF-8"){
returnedDoc <- xmlcontent(returnedList)
} else {
returnedDoc <- content(returnedList)
if(grepl("No sites/data found using the selection criteria specified", returnedDoc)){
message(returnedDoc)
}
}
attr(returnedDoc, "headerInfo") <- headerInfo
return(returnedDoc)
}
attr(returnedDoc, "headerInfo") <- headerInfo
return(returnedDoc)
}
}
default_ua <- function() {
versions <- c(
libcurl = RCurl::curlVersion()$version,
RCurl = as.character(packageVersion("RCurl")),
libcurl = curl_version()$version,
httr = as.character(packageVersion("httr")),
dataRetrieval = as.character(packageVersion("dataRetrieval"))
)
paste0(names(versions), "/", versions, collapse = " ")
}
#' drop in replacement for httr switching to xml2 from XML
#'
#' reverts to old parsing pre v1.1.0 for httr
#'
#' @param response the result of httr::GET(url)
#' @keywords internal
#' @importFrom XML xmlParse
xmlcontent <- function(response){
XML::xmlTreeParse(iconv(readBin(response$content, character()), from = "UTF-8", to = "UTF-8"),
useInternalNodes=TRUE,getDTD = FALSE)
}
\ No newline at end of file
......@@ -48,6 +48,7 @@
#' @importFrom readr read_delim
#' @importFrom readr problems
#' @importFrom readr parse_number
#' @importFrom lubridate fast_strptime
#' @examples
#' siteNumber <- "02177000"
#' startDate <- "2012-09-01"
......@@ -166,7 +167,7 @@ importRDB1 <- function(obs_url, asDateTime=TRUE, convertType = TRUE, tz=""){
if(all(c(paste0(i,"_dt"),paste0(i,"_tm")) %in% header.names)){
varname <- paste0(i,"_dateTime")
varval <- as.POSIXct(paste(readr.data[,paste0(i,"_dt")],readr.data[,paste0(i,"_tm")]), "%Y-%m-%d %H:%M", tz = "UTC")
varval <- fast_strptime(paste(readr.data[,paste0(i,"_dt")],readr.data[,paste0(i,"_tm")]), "%Y-%m-%d %H:%M", tz = "UTC")
if(!all(is.na(varval))){
readr.data[,varname] <- varval
......@@ -198,7 +199,7 @@ importRDB1 <- function(obs_url, asDateTime=TRUE, convertType = TRUE, tz=""){
if(all(c("DATE","TIME","TZCD") %in% header.names)){
varname <- "DATETIME"
varval <- as.POSIXct(paste(readr.data[,"DATE"],readr.data[,"TIME"]), "%Y-%m-%d %H%M%S", tz = "UTC")
varval <- fast_strptime(paste(readr.data[,"DATE"],readr.data[,"TIME"]), "%Y-%m-%d %H%M%S", tz = "UTC")
readr.data[,varname] <- varval
readr.data <- convertTZ(readr.data,"TZCD",varname,tz, flip.cols=TRUE)
}
......
......@@ -26,9 +26,9 @@
#' @importFrom dplyr left_join
#' @importFrom lubridate parse_date_time
#' @importFrom lubridate fast_strptime
#' @importFrom RCurl basicHeaderGatherer
#' @importFrom RCurl getBinaryURL
#' @importFrom RCurl curlOptions
#' @importFrom httr GET
#' @importFrom httr user_agent
#' @importFrom httr write_disk
#' @examples
#' # These examples require an internet connection to run
#'
......@@ -38,8 +38,8 @@
#'
#' rawSample <- importWQP(rawSampleURL)
#'
#' rawSampleURL_noZip <- constructWQPURL('USGS-01594440','01075', '', '', FALSE)
#' rawSample2 <- importWQP(rawSampleURL_noZip, zip=FALSE)
#' rawSampleURL_Zip <- constructWQPURL('USGS-01594440','01075', '', '', TRUE)
#' rawSample2 <- importWQP(rawSampleURL_Zip, zip=TRUE)
#'
#' STORETex <- constructWQPURL('WIDNR_WQX-10032762','Specific conductance', '', '')
#' STORETdata <- importWQP(STORETex)
......@@ -59,22 +59,28 @@ importWQP <- function(obs_url, zip=FALSE, tz=""){
if(zip){
message("zip encoding access still in development")
temp <- tempfile()
options(timeout = 120)
h <- basicHeaderGatherer()
myOpts = curlOptions(verbose = FALSE,
header = FALSE,
useragent = default_ua())
temp <- paste0(temp,".zip")
doc <- GET(obs_url, user_agent(default_ua()),
write_disk(temp))
doc <- getBinaryURL(obs_url, .opts=myOpts, headerfunction = h$update)
headerInfo <- h$value()
headerInfo <- headers(doc)
} else {
doc <- getWebServiceData(obs_url)
headerInfo <- attr(doc, "headerInfo")
}
numToBeReturned <- as.numeric(headerInfo["Total-Result-Count"])
sitesToBeReturned <- as.numeric(headerInfo["Total-Site-Count"])
numToBeReturned <- 0
sitesToBeReturned <- 0
if("total-result-count" %in% names(headerInfo)){
numToBeReturned <- as.numeric(headerInfo["total-result-count"])
}
if("total-site-count" %in% names(headerInfo)){
sitesToBeReturned <- as.numeric(headerInfo["total-site-count"])
}
totalReturned <- sum(numToBeReturned, sitesToBeReturned,na.rm = TRUE)
......@@ -88,11 +94,6 @@ importWQP <- function(obs_url, zip=FALSE, tz=""){
}
if(zip){
temp <- paste0(temp,".zip")
f <- file(temp, "wb")
writeBin(doc, con = f)
close(f)
doc <- unzip(temp)
}
......
......@@ -51,6 +51,8 @@
#' @import stats
#' @importFrom reshape2 melt
#' @importFrom reshape2 dcast
#' @importFrom lubridate parse_date_time
#' @importFrom dplyr full_join
#' @examples
#' siteNumber <- "02177000"
#' startDate <- "2012-09-01"
......@@ -59,7 +61,7 @@
#' property <- '00060'
#' obs_url <- constructNWISURL(siteNumber,property,startDate,endDate,'dv')
#' \dontrun{
#' data <- importWaterML1(obs_url)
#' data <- importWaterML1(obs_url, asDateTime=TRUE)
#'
#' groundWaterSite <- "431049071324301"
#' startGW <- "2013-10-01"
......@@ -106,12 +108,11 @@ importWaterML1 <- function(obs_url,asDateTime=FALSE, tz=""){
if(file.exists(obs_url)){
rawData <- obs_url
returnedDoc <- xmlTreeParse(rawData, getDTD = FALSE, useInternalNodes = TRUE)
} else {
rawData <- getWebServiceData(obs_url, encoding='gzip')
returnedDoc <- getWebServiceData(obs_url, encoding='gzip')
}
returnedDoc <- xmlTreeParse(rawData, getDTD = FALSE, useInternalNodes = TRUE)
if(tz != ""){
tz <- match.arg(tz, c("America/New_York","America/Chicago",
"America/Denver","America/Los_Angeles",
......@@ -181,9 +182,7 @@ importWaterML1 <- function(obs_url,asDateTime=FALSE, tz=""){
value <- as.numeric(xpathSApply(subChunk, "ns1:value",namespaces = chunkNS, xmlValue))
if(length(value)!=0){
# value[value == noValue] <- NA
attNames <- xpathSApply(subChunk, "ns1:value/@*",namespaces = chunkNS)
attributeNames <- unique(names(attNames))
......@@ -197,7 +196,6 @@ importWaterML1 <- function(obs_url,asDateTime=FALSE, tz=""){
if(length(methodDescription) > 0 && methodDescription != ""){
valueName <- paste("X",methodDescription,pCode,statCd,sep="_")
}
assign(valueName,value)
......@@ -235,64 +233,32 @@ importWaterML1 <- function(obs_url,asDateTime=FALSE, tz=""){
if("dateTime" %in% attributeNames){
datetime <- xpathSApply(subChunk, "ns1:value/@dateTime",namespaces = chunkNS)
numChar <- nchar(datetime)
if(asDateTime){
# Common options:
# YYYY numChar=4
# YYYY-MM-DD numChar=10
# YYYY-MM-DDTHH:MM numChar=16
# YYYY-MM-DDTHH:MM:SS numChar=19
# YYYY-MM-DDTHH:MM:SSZ numChar=20
# YYYY-MM-DDTHH:MM:SS.000 numChar=23
# YYYY-MM-DDTHH:MM:SS.000-XX:00 numChar=29
if(abs(max(numChar) - min(numChar)) != 0){
warning("Mixed date types, not converted to POSIXct")
} else {
numChar <- numChar[1]
if(numChar == 4){
datetime <- as.POSIXct(datetime, "%Y", tz = "UTC")
} else if(numChar == 10){
datetime <- as.POSIXct(datetime, "%Y-%m-%d", tz = "UTC")
} else if(numChar == 16){
datetime <- as.POSIXct(datetime, "%Y-%m-%dT%H:%M", tz = "UTC")
} else if(numChar == 19){
datetime <- as.POSIXct(datetime, "%Y-%m-%dT%H:%M:%S", tz = "UTC")
} else if(numChar == 20){
datetime <- as.POSIXct(datetime, "%Y-%m-%dT%H:%M:%S", tz = "UTC")
} else if(numChar == 23){
datetime <- as.POSIXct(datetime, "%Y-%m-%dT%H:%M:%OS", tz = "UTC")
} else if(numChar == 24){
datetime <- substr(datetime,1,23)
datetime <- as.POSIXct(datetime, "%Y-%m-%dT%H:%M:%OS", tz = "UTC")
df$tz_cd <- rep(zoneAbbrievs[1], nrow(df))
} else if(numChar == 29){
tzOffset <- as.character(substr(datetime,24,numChar))
tzHours <- as.numeric(substr(tzOffset,1,3))
datetime <- substr(datetime,1,23)
datetime <- as.POSIXct(datetime, "%Y-%m-%dT%H:%M:%OS", tz = "UTC")
datetime <- datetime - tzHours*60*60
df$tz_cd <- as.character(zoneAbbrievs[tzOffset])
}
numChar <- nchar(datetime)
datetime <- parse_date_time(datetime, c("%Y","%Y-%m-%d","%Y-%m-%dT%H:%M",
"%Y-%m-%dT%H:%M:%S","%Y-%m-%dT%H:%M:%OS",
"%Y-%m-%dT%H:%M:%OS%z"), exact = TRUE)
if(any(numChar < 20) & any(numChar > 16)){
offsetLibrary <- data.frame(offset=c(5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10, 10, 0),
code=c("EST","EDT","CST","CDT","MST","MDT","PST","PDT","AKST","AKDT","HAST","HST",""),
stringsAsFactors = FALSE)
if(!("tz_cd" %in% names(df))){
df$tz_cd <- zoneAbbrievs[1]
tzHours <- as.numeric(substr(names(zoneAbbrievs[1]),1,3))
datetime <- datetime - tzHours*60*60
}
datetime[numChar < 20 & numChar > 16] <- datetime[numChar < 20 & numChar > 16] + offsetLibrary[offsetLibrary$code == zoneAbbrievs[1],"offset"]*60*60
}
} else {
datetime <- as.character(datetime)
numChar <- nchar(datetime)
if(any(numChar) == 29){
tzOffset <- as.character(substr(datetime,24,numChar))
df$tz_cd <- as.character(zoneAbbrievs[tzOffset])
df$tz_cd <- as.character(zoneAbbrievs[tzOffset])
df$tz_cd[is.na(df$tz_cd)] <- zoneAbbrievs[1]
} else {
df$tz_cd <- zoneAbbrievs[1]
......@@ -319,7 +285,8 @@ importWaterML1 <- function(obs_url,asDateTime=FALSE, tz=""){
mergedDF <- df
} else {
similarNames <- intersect(names(mergedDF), names(df))
mergedDF <- merge(mergedDF, df,by=similarNames,all=TRUE)
# mergedDF <- merge(mergedDF, df,by=similarNames,all=TRUE)
mergedDF <- full_join(mergedDF, df, by=similarNames)
}
} else {
......@@ -386,7 +353,8 @@ importWaterML1 <- function(obs_url,asDateTime=FALSE, tz=""){
} else {
similarSites <- intersect(names(siteInformation), names(siteInfo))
siteInformation <- merge(siteInformation, siteInfo, by=similarSites, all=TRUE)
# siteInformation <- merge(siteInformation, siteInfo, by=similarSites, all=TRUE)
siteInformation <- full_join(siteInformation, siteInfo, by=similarSites)
similarVariables <- intersect(names(variableInformation),names(variableInfo))
variableInformation <- merge(variableInformation, variableInfo, by=similarVariables, all=TRUE)
......@@ -431,7 +399,7 @@ importWaterML1 <- function(obs_url,asDateTime=FALSE, tz=""){
newRows <- rbind(meltedmergedDF[indexDups, ], valDF[matchIndexes,])
mergedDF3 <- dcast(newRows, castFormula, drop=FALSE, value.var = "value",)
mergedDF3 <- dcast(newRows, castFormula, drop=FALSE, value.var = "value")
mergedDF2 <- rbind(mergedDF2, mergedDF3)
mergedDF2 <- mergedDF2[order(mergedDF2$dateTime),]
......
......@@ -51,12 +51,11 @@ importWaterML2 <- function(obs_url, asDateTime=FALSE, tz=""){
if(file.exists(obs_url)){
rawData <- obs_url
doc <- xmlTreeParse(rawData, getDTD = FALSE, useInternalNodes = TRUE)
} else {
rawData <- getWebServiceData(obs_url)
doc <- getWebServiceData(obs_url)
}
doc <- xmlTreeParse(rawData, getDTD = FALSE, useInternalNodes = TRUE)
if(tz != ""){
tz <- match.arg(tz, c("America/New_York","America/Chicago",
"America/Denver","America/Los_Angeles",
......
......@@ -178,7 +178,6 @@ readNWISdata <- function(service="dv", ...){
if(length(grep("rdb",values["format"])) >0){
retval <- importRDB1(urlCall, asDateTime = TRUE, tz = tz)
# retval <- importRDB1(urlCall, asDateTime = (service == "qwdata"), tz = tz)
} else {
retval <- importWaterML1(urlCall, asDateTime = ("iv" == service), tz= tz)
}
......
......@@ -32,8 +32,8 @@ rawSampleURL <- constructWQPURL('USGS-01594440','01075', '', '')
rawSample <- importWQP(rawSampleURL)
rawSampleURL_noZip <- constructWQPURL('USGS-01594440','01075', '', '', FALSE)
rawSample2 <- importWQP(rawSampleURL_noZip, zip=FALSE)
rawSampleURL_Zip <- constructWQPURL('USGS-01594440','01075', '', '', TRUE)
rawSample2 <- importWQP(rawSampleURL_Zip, zip=TRUE)
STORETex <- constructWQPURL('WIDNR_WQX-10032762','Specific conductance', '', '')
STORETdata <- importWQP(STORETex)
......
......@@ -56,7 +56,7 @@ offering <- '00003'
property <- '00060'
obs_url <- constructNWISURL(siteNumber,property,startDate,endDate,'dv')
\dontrun{
data <- importWaterML1(obs_url)
data <- importWaterML1(obs_url, asDateTime=TRUE)
groundWaterSite <- "431049071324301"
startGW <- "2013-10-01"
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment