diff --git a/DESCRIPTION b/DESCRIPTION index 5bc6ef5235d70c18e857f25e6f53de652c9d874b..2a9a32221c2c3031b9b694456501db1a82000097 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -30,7 +30,8 @@ Imports: plyr, stats, utils, - dplyr + dplyr, + readr Suggests: xtable, knitr, @@ -39,4 +40,4 @@ VignetteBuilder: knitr BuildVignettes: true BugReports: https://github.com/USGS-R/dataRetrieval/issues URL: https://github.com/USGS-R/dataRetrieval, http://pubs.usgs.gov/tm/04/a10/ -RoxygenNote: 5.0.0 +RoxygenNote: 5.0.1 diff --git a/NAMESPACE b/NAMESPACE index 8109f84198c681459fa01e11a48f1c73b23615a0..f5888a0b325a5bdf0e92e0af15a94446af366091 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -33,10 +33,17 @@ export(whatWQPsites) export(zeroPad) import(RCurl) import(XML) -import(lubridate) import(stats) import(utils) importFrom(dplyr,left_join) +importFrom(dplyr,mutate_) +importFrom(dplyr,mutate_each_) +importFrom(dplyr,select_) +importFrom(lubridate,fast_strptime) +importFrom(lubridate,parse_date_time) importFrom(plyr,rbind.fill.matrix) +importFrom(readr,col_character) +importFrom(readr,cols) +importFrom(readr,read_delim) importFrom(reshape2,dcast) importFrom(reshape2,melt) diff --git a/R/importWQP.R b/R/importWQP.R index f3ba012c170605af6a6569dcc441c99118df96fc..1132fcd6e24987609d52acd7ed6bf03e755b54f9 100644 --- a/R/importWQP.R +++ b/R/importWQP.R @@ -1,3 +1,5 @@ + + #' Basic Water Quality Portal Data parser #' #' Imports data from the Water Quality Portal based on a specified url. @@ -13,10 +15,17 @@ #' @export #' @seealso \code{\link{readWQPdata}}, \code{\link{readWQPqw}}, \code{\link{whatWQPsites}} #' @import RCurl -#' @import lubridate #' @import utils #' @import stats +#' @importFrom readr read_delim +#' @importFrom readr col_character +#' @importFrom readr cols +#' @importFrom dplyr mutate_ +#' @importFrom dplyr mutate_each_ +#' @importFrom dplyr select_ #' @importFrom dplyr left_join +#' @importFrom lubridate parse_date_time +#' @importFrom lubridate fast_strptime #' @examples #' # These examples require an internet connection to run #' @@ -41,68 +50,12 @@ importWQP <- function(obs_url, zip=FALSE, tz=""){ "America/Phoenix","America/Metlakatla")) } - if(file.exists(obs_url)){ - if(zip){ - obs_url <- unzip(obs_url) - } - suppressWarnings(namesData <- read.delim(obs_url , header = TRUE, quote="", - dec=".", sep='\t', colClasses='character',nrow=1)) - - classColumns <- setNames(rep('character',ncol(namesData)),names(namesData)) - - classColumns[grep("MeasureValue",names(classColumns))] <- NA - - suppressWarnings(retval <- read.delim(obs_url, header = TRUE, quote="", - dec=".", sep='\t', colClasses=as.character(classColumns))) - if(zip){ - unlink(obs_url) - } + if(!file.exists(obs_url)){ + h <- basicHeaderGatherer() + httpHEAD(obs_url, headerfunction = h$update) - } else { - - if(zip){ - h <- basicHeaderGatherer() - httpHEAD(obs_url, headerfunction = h$update) - - headerInfo <- h$value() - - temp <- tempfile() - options(timeout = 120) - - possibleError <- tryCatch({ - download.file(obs_url,temp, quiet=TRUE, mode='wb') - }, - error = function(e) { - stop(e, "with url:", obs_url) - } - ) - - if(headerInfo['status'] == "200"){ - doc <- unzip(temp) - } else { - unlink(temp) - - stop("Status:", headerInfo['status'], ": ", headerInfo['statusMessage'], "\nFor: ", obs_url) - } - - } else { - doc <- getWebServiceData(obs_url) - headerInfo <- attr(doc, "headerInfo") - } - - suppressWarnings(namesData <- read.delim(if(zip) doc else textConnection(doc) , header = TRUE, quote="", - dec=".", sep='\t', colClasses='character',nrow=1)) - - classColumns <- setNames(rep('character',ncol(namesData)),names(namesData)) - - classColumns[grep("MeasureValue",names(classColumns))] <- NA - - suppressWarnings(retval <- read.delim(if(zip) doc else textConnection(doc), header = TRUE, quote="", - dec=".", sep='\t', colClasses=as.character(classColumns))) - if(zip) unlink(doc) - + headerInfo <- h$value() numToBeReturned <- as.numeric(headerInfo["Total-Result-Count"]) - if(headerInfo['Total-Result-Count'] == "0"){ warning("No data returned") @@ -116,69 +69,87 @@ importWQP <- function(obs_url, zip=FALSE, tz=""){ return(data.frame()) } + if(headerInfo['status'] == "200"){ + + if(zip){ + temp <- tempfile() + options(timeout = 120) + + possibleError <- tryCatch({ + suppressWarnings(download.file(obs_url,temp, quiet=TRUE, mode='wb')) + }, + error = function(e) { + stop(e, "with url:", obs_url) + } + ) + doc <- unzip(temp) + retval <- suppressWarnings(read_delim(doc, + col_types = cols(`ActivityStartTime/Time` = col_character(), + `ActivityEndTime/Time` = col_character(), + USGSPCode = col_character(), + ResultCommentText=col_character()), + quote = "", delim = "\t")) + unlink(doc) + } else { + retval <- suppressWarnings(read_delim(obs_url, + col_types = cols(`ActivityStartTime/Time` = col_character(), + `ActivityEndTime/Time` = col_character(), + USGSPCode = col_character(), + ResultCommentText=col_character()), + quote = "", delim = "\t")) + } + } else { + stop("Status:", headerInfo['status'], ": ", headerInfo['statusMessage'], "\nFor: ", obs_url) + } + actualNumReturned <- nrow(retval) if(actualNumReturned != numToBeReturned) warning(numToBeReturned, " sample results were expected, ", actualNumReturned, " were returned") + } else { + + if(zip){ + doc <- unzip(obs_url) + retval <- suppressWarnings(read_delim(obs_url, + col_types = cols(`ActivityStartTime/Time` = col_character(), + `ActivityEndTime/Time` = col_character(), + USGSPCode = col_character(), + ResultCommentText=col_character()), + quote = "", delim = "\t")) + unlink(doc) + } else { + retval <- suppressWarnings(read_delim(obs_url, + col_types = cols(`ActivityStartTime/Time` = col_character(), + `ActivityEndTime/Time` = col_character(), + USGSPCode = col_character(), + ResultCommentText=col_character()), + quote = "", delim = "\t")) + } + } - retval[,names(which(sapply(retval[,grep("MeasureValue",names(retval))], function(x)all(is.na(x)))))] <- "" + offsetLibrary <- data.frame(offset=c(5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10, 10, 0, 0), + code=c("EST","EDT","CST","CDT","MST","MDT","PST","PDT","AKST","AKDT","HAST","HST","", NA), + stringsAsFactors = FALSE) - offsetLibrary <- data.frame(offset=c(5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10, 10), - code=c("EST","EDT","CST","CDT","MST","MDT","PST","PDT","AKST","AKDT","HAST","HST"), - stringsAsFactors = FALSE) - - retval <- left_join(retval, offsetLibrary, by=c("ActivityStartTime.TimeZoneCode"="code")) + retval <- left_join(retval, offsetLibrary, by=c("ActivityStartTime/TimeZoneCode"="code")) names(retval)[names(retval) == "offset"] <- "timeZoneStart" - retval <- left_join(retval, offsetLibrary, by=c("ActivityEndTime.TimeZoneCode"="code")) + retval <- left_join(retval, offsetLibrary, by=c("ActivityEndTime/TimeZoneCode"="code")) names(retval)[names(retval) == "offset"] <- "timeZoneEnd" - - retval$timeZoneStart[is.na(retval$timeZoneStart)] <- 0 - retval$timeZoneEnd[is.na(retval$timeZoneEnd)] <- 0 - if("ActivityStartDate" %in% names(retval)){ - if(any(retval$ActivityStartDate != "")){ - suppressWarnings(retval$ActivityStartDate <- as.Date(parse_date_time(retval$ActivityStartDate, c("Ymd", "mdY")))) - } - } - - if("ActivityEndDate" %in% names(retval)){ - if(any(retval$ActivityEndDate != "")){ - suppressWarnings(retval$ActivityEndDate <- as.Date(parse_date_time(retval$ActivityEndDate, c("Ymd", "mdY")))) - } - } - - if(any(!is.na(retval$timeZoneStart))){ - retval$ActivityStartDateTime <- with(retval, as.POSIXct(paste(ActivityStartDate, ActivityStartTime.Time),format="%Y-%m-%d %H:%M:%S", tz = "UTC")) - retval$ActivityStartDateTime <- retval$ActivityStartDateTime + retval$timeZoneStart*60*60 - retval$ActivityStartDateTime <- as.POSIXct(retval$ActivityStartDateTime) - if(tz != ""){ - attr(retval$ActivityStartDateTime, "tzone") <- tz - } else { - attr(retval$ActivityStartDateTime, "tzone") <- "UTC" - } - } + dateCols <- c("ActivityStartDate","ActivityEndDate","AnalysisStartDate","PreparationStartDate") - if(any(!is.na(retval$timeZoneEnd))){ - retval$ActivityEndDateTime <- with(retval, as.POSIXct(paste(ActivityEndDate, ActivityEndTime.Time),format="%Y-%m-%d %H:%M:%S", tz = "UTC")) - retval$ActivityEndDateTime <- retval$ActivityEndDateTime + retval$timeZoneEnd*60*60 - retval$ActivityEndDateTime <- as.POSIXct(retval$ActivityEndDateTime) - if(tz != ""){ - attr(retval$ActivityEndDateTime, "tzone") <- tz - } else { - attr(retval$ActivityEndDateTime, "tzone") <- "UTC" - } - } + retval <- suppressWarnings(mutate_each_(retval, ~as.Date(parse_date_time(., c("Ymd", "mdY"))), dateCols)) - if(all(is.na(retval$ActivityEndDateTime))){ - retval$ActivityEndDateTime <- NULL - } - - retval <- retval[order(retval$OrganizationIdentifier, - retval$MonitoringLocationIdentifier, - retval$ActivityStartDateTime, decreasing = FALSE),] + retval <- mutate_(retval, ActivityStartDateTime=~paste(ActivityStartDate, `ActivityStartTime/Time`)) + retval <- mutate_(retval, ActivityEndDateTime=~paste(ActivityEndDate, `ActivityEndTime/Time`)) + + retval <- mutate_(retval, ActivityStartDateTime=~fast_strptime(ActivityStartDateTime, '%Y-%m-%d %H:%M:%S')+60*60*timeZoneStart) + retval <- mutate_(retval, ActivityEndDateTime=~fast_strptime(ActivityEndDateTime, '%Y-%m-%d %H:%M:%S')+60*60*timeZoneStart) + + retval <- select_(retval, ~-timeZoneEnd, ~-timeZoneStart) + names(retval)[grep("/",names(retval))] <- gsub("/",".",names(retval)[grep("/",names(retval))]) return(retval) - -} \ No newline at end of file +} diff --git a/R/readWQPqw.r b/R/readWQPqw.r index 3ca39727d4382491e8f89452b328a28390d4bf2b..684fd1a0b7358c86eb478e91f720b66a16ad05f9 100644 --- a/R/readWQPqw.r +++ b/R/readWQPqw.r @@ -151,7 +151,7 @@ readWQPqw <- function(siteNumbers,parameterCd,startDate="",endDate="",tz=""){ stringsAsFactors=FALSE) variableInfo <- unique(variableInfo) - if(any(variableInfo$parameterCd != "")){ + if(any(!is.na(variableInfo$parameterCd))){ pcodes <- unique(variableInfo$parameterCd[!is.na(variableInfo$parameterCd)]) pcodes <- pcodes["" != pcodes] paramINFO <- readNWISpCode(pcodes) diff --git a/R/whatNWISData.r b/R/whatNWISData.r index 50067b22ebb23b2ce00d3b994f33d49b69df3871..1b9dbe4ee81d820dd2e4208dcc271220711ffb68 100644 --- a/R/whatNWISData.r +++ b/R/whatNWISData.r @@ -54,7 +54,7 @@ #' queryTime \tab POSIXct \tab The time the data was returned \cr #' } #' @export -#' @import lubridate +#' @importFrom lubridate parse_date_time #' @examples #' \dontrun{ #' availableData <- whatNWISdata('05114000')