From ea29c81fb6dd592cfaa0f676f53aba5898f79513 Mon Sep 17 00:00:00 2001 From: Laura DeCicco <ldecicco@usgs.gov> Date: Thu, 19 Nov 2015 16:08:28 -0600 Subject: [PATCH] BACK to dplyr and readr. --- DESCRIPTION | 4 +- NAMESPACE | 8 +++- R/importWQP.R | 97 +++++++++++++++++++++++++++--------------------- R/readWQPqw.r | 2 +- R/whatNWISData.r | 2 +- 5 files changed, 64 insertions(+), 49 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 1213829a..2a9a3222 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -26,12 +26,12 @@ Imports: XML, RCurl, reshape2, - data.table, lubridate, plyr, stats, utils, - dplyr + dplyr, + readr Suggests: xtable, knitr, diff --git a/NAMESPACE b/NAMESPACE index 132e8260..f5888a0b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -33,13 +33,17 @@ export(whatWQPsites) export(zeroPad) import(RCurl) import(XML) -import(data.table) -import(lubridate) import(stats) import(utils) importFrom(dplyr,left_join) +importFrom(dplyr,mutate_) +importFrom(dplyr,mutate_each_) +importFrom(dplyr,select_) importFrom(lubridate,fast_strptime) importFrom(lubridate,parse_date_time) importFrom(plyr,rbind.fill.matrix) +importFrom(readr,col_character) +importFrom(readr,cols) +importFrom(readr,read_delim) importFrom(reshape2,dcast) importFrom(reshape2,melt) diff --git a/R/importWQP.R b/R/importWQP.R index 97fb6321..00d304eb 100644 --- a/R/importWQP.R +++ b/R/importWQP.R @@ -1,3 +1,5 @@ + + #' Basic Water Quality Portal Data parser #' #' Imports data from the Water Quality Portal based on a specified url. @@ -15,7 +17,13 @@ #' @import RCurl #' @import utils #' @import stats -#' @import data.table +#' @importFrom readr read_delim +#' @importFrom readr col_character +#' @importFrom readr cols +#' @importFrom dplyr mutate_ +#' @importFrom dplyr mutate_each_ +#' @importFrom dplyr select_ +#' @importFrom dplyr left_join #' @importFrom lubridate parse_date_time #' @importFrom lubridate fast_strptime #' @examples @@ -62,7 +70,7 @@ importWQP <- function(obs_url, zip=FALSE, tz=""){ } if(headerInfo['status'] == "200"){ - + if(zip){ temp <- tempfile() options(timeout = 120) @@ -74,16 +82,21 @@ importWQP <- function(obs_url, zip=FALSE, tz=""){ stop(e, "with url:", obs_url) } ) - - if(headerInfo['status'] == "200"){ - doc <- unzip(temp) - retval <- fread(doc,colClasses = "character",verbose = FALSE,showProgress = FALSE) - } else { - stop("Status:", headerInfo['status'], ": ", headerInfo['statusMessage'], "\nFor: ", obs_url) - } + doc <- unzip(temp) + retval <- read_delim(doc, + col_types = cols(`ActivityStartTime/Time` = col_character(), + `ActivityEndTime/Time` = col_character(), + USGSPCode = col_character(), + ResultCommentText=col_character()), + quote = "", delim = "\t") unlink(doc) } else { - retval <- suppressWarnings(fread(obs_url,colClasses = "character",verbose = FALSE,showProgress = FALSE)) + retval <- read_delim(obs_url, + col_types = cols(`ActivityStartTime/Time` = col_character(), + `ActivityEndTime/Time` = col_character(), + USGSPCode = col_character(), + ResultCommentText=col_character()), + quote = "", delim = "\t") } } else { stop("Status:", headerInfo['status'], ": ", headerInfo['statusMessage'], "\nFor: ", obs_url) @@ -96,49 +109,47 @@ importWQP <- function(obs_url, zip=FALSE, tz=""){ if(zip){ doc <- unzip(obs_url) - retval <- fread(doc,colClasses = "character",verbose = FALSE,showProgress = FALSE) + retval <- read_delim(obs_url, + col_types = cols(`ActivityStartTime/Time` = col_character(), + `ActivityEndTime/Time` = col_character(), + USGSPCode = col_character(), + ResultCommentText=col_character()), + quote = "", delim = "\t") unlink(doc) } else { - retval <- fread(obs_url,colClasses = "character",verbose = FALSE,showProgress = FALSE) + retval <- read_delim(obs_url, + col_types = cols(`ActivityStartTime/Time` = col_character(), + `ActivityEndTime/Time` = col_character(), + USGSPCode = col_character(), + ResultCommentText=col_character()), + quote = "", delim = "\t") } - + } - - dateCols <- c("ActivityStartDate","ActivityEndDate","AnalysisStartDate","PreparationStartDate") - retval <- suppressWarnings(retval[, (dateCols) := lapply(.SD, function(x) as.Date(parse_date_time(x, c("Ymd", "mdY")))), - .SDcols = dateCols]) - numTmp <- names(retval)[grep("Value",names(retval))] - retval <- suppressWarnings(retval[, (numTmp) := lapply(.SD, as.numeric), .SDcols = numTmp]) + offsetLibrary <- data.frame(offset=c(5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10, 10, 0, 0), + code=c("EST","EDT","CST","CDT","MST","MDT","PST","PDT","AKST","AKDT","HAST","HST","", NA), + stringsAsFactors = FALSE) - offsetLibrary <- data.table(offset=c(5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10, 10), - code=c("EST","EDT","CST","CDT","MST","MDT","PST","PDT","AKST","AKDT","HAST","HST"), - key = "code") - retval <- setkey(retval, ActivityStartTime/TimeZoneCode) - retval <- retval[,timeZoneStart:=offsetLibrary[SJ(retval$`ActivityStartTime/TimeZoneCode`)]$offset] - retval <- setkey(retval, ActivityEndTime/TimeZoneCode) - retval <- retval[,timeZoneEnd:=offsetLibrary[SJ(retval$`ActivityEndTime/TimeZoneCode`)]$offset] + retval <- left_join(retval, offsetLibrary, by=c("ActivityStartTime/TimeZoneCode"="code")) + names(retval)[names(retval) == "offset"] <- "timeZoneStart" + retval <- left_join(retval, offsetLibrary, by=c("ActivityEndTime/TimeZoneCode"="code")) + names(retval)[names(retval) == "offset"] <- "timeZoneEnd" - retval <- retval[,ActivityStartDateTime:=paste(ActivityStartDate, `ActivityStartTime/Time`)] - retval <- retval[,ActivityStartDateTime:=fast_strptime(ActivityStartDateTime, '%Y-%m-%d %H:%M:%S')+60*60*timeZoneStart] + dateCols <- c("ActivityStartDate","ActivityEndDate","AnalysisStartDate","PreparationStartDate") - retval <- retval[,ActivityEndDateTime:=paste(ActivityEndDate, `ActivityEndTime/Time`)] - retval <- retval[,ActivityEndDateTime:=fast_strptime(ActivityEndDateTime, '%Y-%m-%d %H:%M:%S')+60*60*timeZoneEnd] + retval <- suppressWarnings(mutate_each_(retval, ~as.Date(parse_date_time(., c("Ymd", "mdY"))), dateCols)) - - -# if(all(is.na(retval$ActivityEndDateTime))){ -# retval$ActivityEndDateTime <- NULL -# } - - retval <- retval[order(OrganizationIdentifier, - MonitoringLocationIdentifier, - ActivityStartDateTime)] + retval <- mutate_(retval, ActivityStartDateTime=~paste(ActivityStartDate, `ActivityStartTime/Time`)) + retval <- mutate_(retval, ActivityEndDateTime=~paste(ActivityEndDate, `ActivityEndTime/Time`)) + + retval <- mutate_(retval, ActivityStartDateTime=~fast_strptime(ActivityStartDateTime, '%Y-%m-%d %H:%M:%S')+60*60*timeZoneStart) + retval <- mutate_(retval, ActivityEndDateTime=~fast_strptime(ActivityEndDateTime, '%Y-%m-%d %H:%M:%S')+60*60*timeZoneStart) + + retval <- select_(retval, ~-timeZoneEnd, ~-timeZoneStart) + names(retval)[grep("/",names(retval))] <- gsub("/",".",names(retval)[grep("/",names(retval))]) - retval <- setDF(retval) - names(retval) <- gsub("/",".",names(retval)) return(retval) - -} \ No newline at end of file +} diff --git a/R/readWQPqw.r b/R/readWQPqw.r index 3ca39727..684fd1a0 100644 --- a/R/readWQPqw.r +++ b/R/readWQPqw.r @@ -151,7 +151,7 @@ readWQPqw <- function(siteNumbers,parameterCd,startDate="",endDate="",tz=""){ stringsAsFactors=FALSE) variableInfo <- unique(variableInfo) - if(any(variableInfo$parameterCd != "")){ + if(any(!is.na(variableInfo$parameterCd))){ pcodes <- unique(variableInfo$parameterCd[!is.na(variableInfo$parameterCd)]) pcodes <- pcodes["" != pcodes] paramINFO <- readNWISpCode(pcodes) diff --git a/R/whatNWISData.r b/R/whatNWISData.r index 50067b22..1b9dbe4e 100644 --- a/R/whatNWISData.r +++ b/R/whatNWISData.r @@ -54,7 +54,7 @@ #' queryTime \tab POSIXct \tab The time the data was returned \cr #' } #' @export -#' @import lubridate +#' @importFrom lubridate parse_date_time #' @examples #' \dontrun{ #' availableData <- whatNWISdata('05114000') -- GitLab