Skip to content
Snippets Groups Projects
Commit 95af9ed0 authored by Laura A DeCicco's avatar Laura A DeCicco
Browse files

Switching to dplyr....saving minutes per medium long query.

parent a180c765
No related branches found
No related tags found
1 merge request!144Better encoding and starting switch to dplyr
......@@ -29,7 +29,8 @@ Imports:
lubridate,
plyr,
stats,
utils
utils,
dplyr
Suggests:
xtable,
knitr,
......
......@@ -16,6 +16,7 @@
#' @import lubridate
#' @import utils
#' @import stats
#' @importFrom dplyr left_join
#' @examples
#' # These examples require an internet connection to run
#'
......@@ -40,123 +41,151 @@ importWQP <- function(obs_url, zip=FALSE, tz=""){
"America/Phoenix","America/Metlakatla"))
}
if(zip){
h <- basicHeaderGatherer()
httpHEAD(obs_url, headerfunction = h$update)
headerInfo <- h$value()
if(file.exists(obs_url)){
if(zip){
obs_url <- unzip(obs_url)
# retval <- read.csv(doc, header = TRUE, quote="", dec=".", colClasses="character",stringsAsFactors = FALSE)
# unlink(doc)
}
suppressWarnings(namesData <- read.delim(obs_url , header = TRUE, quote="",
dec=".", sep='\t', colClasses='character',nrow=1))
temp <- tempfile()
options(timeout = 120)
classColumns <- setNames(rep('character',ncol(namesData)),names(namesData))
possibleError <- tryCatch({
download.file(obs_url,temp, quiet=TRUE, mode='wb')
},
error = function(e) {
stop(e, "with url:", obs_url)
}
)
classColumns[grep("MeasureValue",names(classColumns))] <- NA
if(headerInfo['status'] == "200"){
doc <- unzip(temp)
} else {
unlink(temp)
stop("Status:", headerInfo['status'], ": ", headerInfo['statusMessage'], "\nFor: ", obs_url)
suppressWarnings(retval <- read.delim(obs_url, header = TRUE, quote="",
dec=".", sep='\t', colClasses=as.character(classColumns)))
if(zip){
unlink(obs_url)
}
} else {
doc <- getWebServiceData(obs_url)
headerInfo <- attr(doc, "headerInfo")
}
numToBeReturned <- as.numeric(headerInfo["Total-Result-Count"])
if (!is.na(numToBeReturned) & numToBeReturned != 0){
if(zip){
h <- basicHeaderGatherer()
httpHEAD(obs_url, headerfunction = h$update)
headerInfo <- h$value()
temp <- tempfile()
options(timeout = 120)
possibleError <- tryCatch({
download.file(obs_url,temp, quiet=TRUE, mode='wb')
},
error = function(e) {
stop(e, "with url:", obs_url)
}
)
if(headerInfo['status'] == "200"){
doc <- unzip(temp)
} else {
unlink(temp)
stop("Status:", headerInfo['status'], ": ", headerInfo['statusMessage'], "\nFor: ", obs_url)
}
# retval <- read.csv(doc, header = TRUE, quote="", dec=".", colClasses="character",stringsAsFactors = FALSE)
# unlink(doc)
} else {
doc <- getWebServiceData(obs_url)
headerInfo <- attr(doc, "headerInfo")
# retval <- read.csv(textConnection(doc), header = TRUE, quote="", dec=".", colClasses="character",stringsAsFactors = FALSE)
# unlink(doc)
}
suppressWarnings(namesData <- read.delim(if(zip) doc else textConnection(doc) , header = TRUE, quote="",
dec=".", sep='\t', colClasses='character',nrow=1))
classColumns <- setNames(rep('character',ncol(namesData)),names(namesData))
classColumns[grep("MeasureValue",names(classColumns))] <- NA
suppressWarnings(retval <- read.delim(if(zip) doc else textConnection(doc), header = TRUE, quote="",
dec=".", sep='\t', colClasses=as.character(classColumns)))
actualNumReturned <- nrow(retval)
retval[,names(which(sapply(retval[,grep("MeasureValue",names(retval))], function(x)all(is.na(x)))))] <- ""
if(actualNumReturned != numToBeReturned) warning(numToBeReturned, " sample results were expected, ", actualNumReturned, " were returned")
offsetLibrary <- setNames(c(5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10, 10),
c("EST","EDT","CST","CDT","MST","MDT","PST","PDT","AKST","AKDT","HAST","HST"))
timeZoneStart <- offsetLibrary[retval$ActivityStartTime.TimeZoneCode]
timeZoneEnd <- offsetLibrary[retval$ActivityEndTime.TimeZoneCode]
timeZoneStart[is.na(timeZoneStart)] <- 0
timeZoneEnd[is.na(timeZoneEnd)] <- 0
if("ActivityStartDate" %in% names(retval)){
if(any(retval$ActivityStartDate != "")){
suppressWarnings(retval$ActivityStartDate <- as.Date(parse_date_time(retval$ActivityStartDate, c("Ymd", "mdY"))))
}
}
if("ActivityEndDate" %in% names(retval)){
if(any(retval$ActivityEndDate != "")){
suppressWarnings(retval$ActivityEndDate <- as.Date(parse_date_time(retval$ActivityEndDate, c("Ymd", "mdY"))))
}
}
if(any(!is.na(timeZoneStart))){
dec=".", sep='\t', colClasses=as.character(classColumns)))
unlink(doc)
retval$ActivityStartDateTime <- with(retval, as.POSIXct(paste(ActivityStartDate, ActivityStartTime.Time),format="%Y-%m-%d %H:%M:%S", tz = "UTC"))
retval$ActivityStartDateTime <- retval$ActivityStartDateTime + timeZoneStart*60*60
retval$ActivityStartDateTime <- as.POSIXct(retval$ActivityStartDateTime)
if(tz != ""){
attr(retval$ActivityStartDateTime, "tzone") <- tz
} else {
attr(retval$ActivityStartDateTime, "tzone") <- "UTC"
}
}
if(any(!is.na(timeZoneEnd))){
retval$ActivityEndDateTime <- with(retval, as.POSIXct(paste(ActivityEndDate, ActivityEndTime.Time),format="%Y-%m-%d %H:%M:%S", tz = "UTC"))
retval$ActivityEndDateTime <- retval$ActivityEndDateTime + timeZoneEnd*60*60
retval$ActivityEndDateTime <- as.POSIXct(retval$ActivityEndDateTime)
if(tz != ""){
attr(retval$ActivityEndDateTime, "tzone") <- tz
} else {
attr(retval$ActivityEndDateTime, "tzone") <- "UTC"
}
}
if(all(is.na(retval$ActivityEndDateTime))){
retval$ActivityEndDateTime <- NULL
}
if(zip){
unlink(doc)
}
retval <- retval[order(retval$OrganizationIdentifier,
retval$MonitoringLocationIdentifier,
retval$ActivityStartDateTime, decreasing = FALSE),]
return(retval)
numToBeReturned <- as.numeric(headerInfo["Total-Result-Count"])
} else {
if(headerInfo['Total-Result-Count'] == "0"){
warning("No data returned")
return(data.frame())
}
for(i in grep("Warning",names(headerInfo))){
warning(headerInfo[i])
if(is.na(numToBeReturned) | numToBeReturned == 0){
for(i in grep("Warning",names(headerInfo))){
warning(headerInfo[i])
}
return(data.frame())
}
actualNumReturned <- nrow(retval)
if(actualNumReturned != numToBeReturned) warning(numToBeReturned, " sample results were expected, ", actualNumReturned, " were returned")
}
retval[,names(which(sapply(retval[,grep("MeasureValue",names(retval))], function(x)all(is.na(x)))))] <- ""
offsetLibrary <- data.frame(offset=c(5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10, 10),
code=c("EST","EDT","CST","CDT","MST","MDT","PST","PDT","AKST","AKDT","HAST","HST"),
stringsAsFactors = FALSE)
retval <- left_join(retval, offsetLibrary, by=c("ActivityStartTime.TimeZoneCode"="code"))
names(retval)[names(retval) == "offset"] <- "timeZoneStart"
retval <- left_join(retval, offsetLibrary, by=c("ActivityEndTime.TimeZoneCode"="code"))
names(retval)[names(retval) == "offset"] <- "timeZoneEnd"
retval$timeZoneStart[is.na(retval$timeZoneStart)] <- 0
retval$timeZoneEnd[is.na(retval$timeZoneEnd)] <- 0
if("ActivityStartDate" %in% names(retval)){
if(any(retval$ActivityStartDate != "")){
suppressWarnings(retval$ActivityStartDate <- as.Date(parse_date_time(retval$ActivityStartDate, c("Ymd", "mdY"))))
}
}
if("ActivityEndDate" %in% names(retval)){
if(any(retval$ActivityEndDate != "")){
suppressWarnings(retval$ActivityEndDate <- as.Date(parse_date_time(retval$ActivityEndDate, c("Ymd", "mdY"))))
}
}
if(any(!is.na(retval$timeZoneStart))){
retval$ActivityStartDateTime <- with(retval, as.POSIXct(paste(ActivityStartDate, ActivityStartTime.Time),format="%Y-%m-%d %H:%M:%S", tz = "UTC"))
retval$ActivityStartDateTime <- retval$ActivityStartDateTime + retval$timeZoneStart*60*60
retval$ActivityStartDateTime <- as.POSIXct(retval$ActivityStartDateTime)
if(tz != ""){
attr(retval$ActivityStartDateTime, "tzone") <- tz
} else {
attr(retval$ActivityStartDateTime, "tzone") <- "UTC"
}
}
if(any(!is.na(retval$timeZoneEnd))){
retval$ActivityEndDateTime <- with(retval, as.POSIXct(paste(ActivityEndDate, ActivityEndTime.Time),format="%Y-%m-%d %H:%M:%S", tz = "UTC"))
retval$ActivityEndDateTime <- retval$ActivityEndDateTime + retval$timeZoneEnd*60*60
retval$ActivityEndDateTime <- as.POSIXct(retval$ActivityEndDateTime)
if(tz != ""){
attr(retval$ActivityEndDateTime, "tzone") <- tz
} else {
attr(retval$ActivityEndDateTime, "tzone") <- "UTC"
}
}
if(all(is.na(retval$ActivityEndDateTime))){
retval$ActivityEndDateTime <- NULL
}
retval <- retval[order(retval$OrganizationIdentifier,
retval$MonitoringLocationIdentifier,
retval$ActivityStartDateTime, decreasing = FALSE),]
return(retval)
}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment