Skip to content
Snippets Groups Projects

Moving to dplyr for real

Merged Laura A DeCicco requested to merge ldecicco-USGS:master into master
5 files
+ 64
49
Compare changes
  • Side-by-side
  • Inline
Files
5
+ 54
43
 
 
#' Basic Water Quality Portal Data parser
#' Basic Water Quality Portal Data parser
#'
#'
#' Imports data from the Water Quality Portal based on a specified url.
#' Imports data from the Water Quality Portal based on a specified url.
@@ -15,7 +17,13 @@
@@ -15,7 +17,13 @@
#' @import RCurl
#' @import RCurl
#' @import utils
#' @import utils
#' @import stats
#' @import stats
#' @import data.table
#' @importFrom readr read_delim
 
#' @importFrom readr col_character
 
#' @importFrom readr cols
 
#' @importFrom dplyr mutate_
 
#' @importFrom dplyr mutate_each_
 
#' @importFrom dplyr select_
 
#' @importFrom dplyr left_join
#' @importFrom lubridate parse_date_time
#' @importFrom lubridate parse_date_time
#' @importFrom lubridate fast_strptime
#' @importFrom lubridate fast_strptime
#' @examples
#' @examples
@@ -62,7 +70,7 @@ importWQP <- function(obs_url, zip=FALSE, tz=""){
@@ -62,7 +70,7 @@ importWQP <- function(obs_url, zip=FALSE, tz=""){
}
}
if(headerInfo['status'] == "200"){
if(headerInfo['status'] == "200"){
if(zip){
if(zip){
temp <- tempfile()
temp <- tempfile()
options(timeout = 120)
options(timeout = 120)
@@ -74,16 +82,21 @@ importWQP <- function(obs_url, zip=FALSE, tz=""){
@@ -74,16 +82,21 @@ importWQP <- function(obs_url, zip=FALSE, tz=""){
stop(e, "with url:", obs_url)
stop(e, "with url:", obs_url)
}
}
)
)
doc <- unzip(temp)
if(headerInfo['status'] == "200"){
retval <- read_delim(doc,
doc <- unzip(temp)
col_types = cols(`ActivityStartTime/Time` = col_character(),
retval <- fread(doc,colClasses = "character",verbose = FALSE,showProgress = FALSE)
`ActivityEndTime/Time` = col_character(),
} else {
USGSPCode = col_character(),
stop("Status:", headerInfo['status'], ": ", headerInfo['statusMessage'], "\nFor: ", obs_url)
ResultCommentText=col_character()),
}
quote = "", delim = "\t")
unlink(doc)
unlink(doc)
} else {
} else {
retval <- suppressWarnings(fread(obs_url,colClasses = "character",verbose = FALSE,showProgress = FALSE))
retval <- read_delim(obs_url,
 
col_types = cols(`ActivityStartTime/Time` = col_character(),
 
`ActivityEndTime/Time` = col_character(),
 
USGSPCode = col_character(),
 
ResultCommentText=col_character()),
 
quote = "", delim = "\t")
}
}
} else {
} else {
stop("Status:", headerInfo['status'], ": ", headerInfo['statusMessage'], "\nFor: ", obs_url)
stop("Status:", headerInfo['status'], ": ", headerInfo['statusMessage'], "\nFor: ", obs_url)
@@ -96,49 +109,47 @@ importWQP <- function(obs_url, zip=FALSE, tz=""){
@@ -96,49 +109,47 @@ importWQP <- function(obs_url, zip=FALSE, tz=""){
if(zip){
if(zip){
doc <- unzip(obs_url)
doc <- unzip(obs_url)
retval <- fread(doc,colClasses = "character",verbose = FALSE,showProgress = FALSE)
retval <- read_delim(obs_url,
 
col_types = cols(`ActivityStartTime/Time` = col_character(),
 
`ActivityEndTime/Time` = col_character(),
 
USGSPCode = col_character(),
 
ResultCommentText=col_character()),
 
quote = "", delim = "\t")
unlink(doc)
unlink(doc)
} else {
} else {
retval <- fread(obs_url,colClasses = "character",verbose = FALSE,showProgress = FALSE)
retval <- read_delim(obs_url,
 
col_types = cols(`ActivityStartTime/Time` = col_character(),
 
`ActivityEndTime/Time` = col_character(),
 
USGSPCode = col_character(),
 
ResultCommentText=col_character()),
 
quote = "", delim = "\t")
}
}
}
}
dateCols <- c("ActivityStartDate","ActivityEndDate","AnalysisStartDate","PreparationStartDate")
retval <- suppressWarnings(retval[, (dateCols) := lapply(.SD, function(x) as.Date(parse_date_time(x, c("Ymd", "mdY")))),
.SDcols = dateCols])
numTmp <- names(retval)[grep("Value",names(retval))]
offsetLibrary <- data.frame(offset=c(5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10, 10, 0, 0),
retval <- suppressWarnings(retval[, (numTmp) := lapply(.SD, as.numeric), .SDcols = numTmp])
code=c("EST","EDT","CST","CDT","MST","MDT","PST","PDT","AKST","AKDT","HAST","HST","", NA),
 
stringsAsFactors = FALSE)
offsetLibrary <- data.table(offset=c(5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10, 10),
retval <- left_join(retval, offsetLibrary, by=c("ActivityStartTime/TimeZoneCode"="code"))
code=c("EST","EDT","CST","CDT","MST","MDT","PST","PDT","AKST","AKDT","HAST","HST"),
names(retval)[names(retval) == "offset"] <- "timeZoneStart"
key = "code")
retval <- left_join(retval, offsetLibrary, by=c("ActivityEndTime/TimeZoneCode"="code"))
retval <- setkey(retval, ActivityStartTime/TimeZoneCode)
names(retval)[names(retval) == "offset"] <- "timeZoneEnd"
retval <- retval[,timeZoneStart:=offsetLibrary[SJ(retval$`ActivityStartTime/TimeZoneCode`)]$offset]
retval <- setkey(retval, ActivityEndTime/TimeZoneCode)
retval <- retval[,timeZoneEnd:=offsetLibrary[SJ(retval$`ActivityEndTime/TimeZoneCode`)]$offset]
retval <- retval[,ActivityStartDateTime:=paste(ActivityStartDate, `ActivityStartTime/Time`)]
dateCols <- c("ActivityStartDate","ActivityEndDate","AnalysisStartDate","PreparationStartDate")
retval <- retval[,ActivityStartDateTime:=fast_strptime(ActivityStartDateTime, '%Y-%m-%d %H:%M:%S')+60*60*timeZoneStart]
retval <- retval[,ActivityEndDateTime:=paste(ActivityEndDate, `ActivityEndTime/Time`)]
retval <- suppressWarnings(mutate_each_(retval, ~as.Date(parse_date_time(., c("Ymd", "mdY"))), dateCols))
retval <- retval[,ActivityEndDateTime:=fast_strptime(ActivityEndDateTime, '%Y-%m-%d %H:%M:%S')+60*60*timeZoneEnd]
retval <- mutate_(retval, ActivityStartDateTime=~paste(ActivityStartDate, `ActivityStartTime/Time`))
retval <- mutate_(retval, ActivityEndDateTime=~paste(ActivityEndDate, `ActivityEndTime/Time`))
# if(all(is.na(retval$ActivityEndDateTime))){
# retval$ActivityEndDateTime <- NULL
retval <- mutate_(retval, ActivityStartDateTime=~fast_strptime(ActivityStartDateTime, '%Y-%m-%d %H:%M:%S')+60*60*timeZoneStart)
# }
retval <- mutate_(retval, ActivityEndDateTime=~fast_strptime(ActivityEndDateTime, '%Y-%m-%d %H:%M:%S')+60*60*timeZoneStart)
retval <- retval[order(OrganizationIdentifier,
retval <- select_(retval, ~-timeZoneEnd, ~-timeZoneStart)
MonitoringLocationIdentifier,
names(retval)[grep("/",names(retval))] <- gsub("/",".",names(retval)[grep("/",names(retval))])
ActivityStartDateTime)]
retval <- setDF(retval)
names(retval) <- gsub("/",".",names(retval))
return(retval)
return(retval)
}
}
\ No newline at end of file
Loading