Skip to content
Snippets Groups Projects
Commit 9cf524cc authored by Laura A DeCicco's avatar Laura A DeCicco
Browse files

Merge pull request #150 from ldecicco-USGS/master

Moving to dplyr for real
parents 6e0fd790 1568de18
No related branches found
No related tags found
No related merge requests found
......@@ -30,7 +30,8 @@ Imports:
plyr,
stats,
utils,
dplyr
dplyr,
readr
Suggests:
xtable,
knitr,
......@@ -39,4 +40,4 @@ VignetteBuilder: knitr
BuildVignettes: true
BugReports: https://github.com/USGS-R/dataRetrieval/issues
URL: https://github.com/USGS-R/dataRetrieval, http://pubs.usgs.gov/tm/04/a10/
RoxygenNote: 5.0.0
RoxygenNote: 5.0.1
......@@ -33,10 +33,17 @@ export(whatWQPsites)
export(zeroPad)
import(RCurl)
import(XML)
import(lubridate)
import(stats)
import(utils)
importFrom(dplyr,left_join)
importFrom(dplyr,mutate_)
importFrom(dplyr,mutate_each_)
importFrom(dplyr,select_)
importFrom(lubridate,fast_strptime)
importFrom(lubridate,parse_date_time)
importFrom(plyr,rbind.fill.matrix)
importFrom(readr,col_character)
importFrom(readr,cols)
importFrom(readr,read_delim)
importFrom(reshape2,dcast)
importFrom(reshape2,melt)
#' Basic Water Quality Portal Data parser
#'
#' Imports data from the Water Quality Portal based on a specified url.
......@@ -13,10 +15,17 @@
#' @export
#' @seealso \code{\link{readWQPdata}}, \code{\link{readWQPqw}}, \code{\link{whatWQPsites}}
#' @import RCurl
#' @import lubridate
#' @import utils
#' @import stats
#' @importFrom readr read_delim
#' @importFrom readr col_character
#' @importFrom readr cols
#' @importFrom dplyr mutate_
#' @importFrom dplyr mutate_each_
#' @importFrom dplyr select_
#' @importFrom dplyr left_join
#' @importFrom lubridate parse_date_time
#' @importFrom lubridate fast_strptime
#' @examples
#' # These examples require an internet connection to run
#'
......@@ -41,68 +50,12 @@ importWQP <- function(obs_url, zip=FALSE, tz=""){
"America/Phoenix","America/Metlakatla"))
}
if(file.exists(obs_url)){
if(zip){
obs_url <- unzip(obs_url)
}
suppressWarnings(namesData <- read.delim(obs_url , header = TRUE, quote="",
dec=".", sep='\t', colClasses='character',nrow=1))
classColumns <- setNames(rep('character',ncol(namesData)),names(namesData))
classColumns[grep("MeasureValue",names(classColumns))] <- NA
suppressWarnings(retval <- read.delim(obs_url, header = TRUE, quote="",
dec=".", sep='\t', colClasses=as.character(classColumns)))
if(zip){
unlink(obs_url)
}
if(!file.exists(obs_url)){
h <- basicHeaderGatherer()
httpHEAD(obs_url, headerfunction = h$update)
} else {
if(zip){
h <- basicHeaderGatherer()
httpHEAD(obs_url, headerfunction = h$update)
headerInfo <- h$value()
temp <- tempfile()
options(timeout = 120)
possibleError <- tryCatch({
download.file(obs_url,temp, quiet=TRUE, mode='wb')
},
error = function(e) {
stop(e, "with url:", obs_url)
}
)
if(headerInfo['status'] == "200"){
doc <- unzip(temp)
} else {
unlink(temp)
stop("Status:", headerInfo['status'], ": ", headerInfo['statusMessage'], "\nFor: ", obs_url)
}
} else {
doc <- getWebServiceData(obs_url)
headerInfo <- attr(doc, "headerInfo")
}
suppressWarnings(namesData <- read.delim(if(zip) doc else textConnection(doc) , header = TRUE, quote="",
dec=".", sep='\t', colClasses='character',nrow=1))
classColumns <- setNames(rep('character',ncol(namesData)),names(namesData))
classColumns[grep("MeasureValue",names(classColumns))] <- NA
suppressWarnings(retval <- read.delim(if(zip) doc else textConnection(doc), header = TRUE, quote="",
dec=".", sep='\t', colClasses=as.character(classColumns)))
if(zip) unlink(doc)
headerInfo <- h$value()
numToBeReturned <- as.numeric(headerInfo["Total-Result-Count"])
if(headerInfo['Total-Result-Count'] == "0"){
warning("No data returned")
......@@ -116,69 +69,87 @@ importWQP <- function(obs_url, zip=FALSE, tz=""){
return(data.frame())
}
if(headerInfo['status'] == "200"){
if(zip){
temp <- tempfile()
options(timeout = 120)
possibleError <- tryCatch({
suppressWarnings(download.file(obs_url,temp, quiet=TRUE, mode='wb'))
},
error = function(e) {
stop(e, "with url:", obs_url)
}
)
doc <- unzip(temp)
retval <- suppressWarnings(read_delim(doc,
col_types = cols(`ActivityStartTime/Time` = col_character(),
`ActivityEndTime/Time` = col_character(),
USGSPCode = col_character(),
ResultCommentText=col_character()),
quote = "", delim = "\t"))
unlink(doc)
} else {
retval <- suppressWarnings(read_delim(obs_url,
col_types = cols(`ActivityStartTime/Time` = col_character(),
`ActivityEndTime/Time` = col_character(),
USGSPCode = col_character(),
ResultCommentText=col_character()),
quote = "", delim = "\t"))
}
} else {
stop("Status:", headerInfo['status'], ": ", headerInfo['statusMessage'], "\nFor: ", obs_url)
}
actualNumReturned <- nrow(retval)
if(actualNumReturned != numToBeReturned) warning(numToBeReturned, " sample results were expected, ", actualNumReturned, " were returned")
} else {
if(zip){
doc <- unzip(obs_url)
retval <- suppressWarnings(read_delim(obs_url,
col_types = cols(`ActivityStartTime/Time` = col_character(),
`ActivityEndTime/Time` = col_character(),
USGSPCode = col_character(),
ResultCommentText=col_character()),
quote = "", delim = "\t"))
unlink(doc)
} else {
retval <- suppressWarnings(read_delim(obs_url,
col_types = cols(`ActivityStartTime/Time` = col_character(),
`ActivityEndTime/Time` = col_character(),
USGSPCode = col_character(),
ResultCommentText=col_character()),
quote = "", delim = "\t"))
}
}
retval[,names(which(sapply(retval[,grep("MeasureValue",names(retval))], function(x)all(is.na(x)))))] <- ""
offsetLibrary <- data.frame(offset=c(5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10, 10, 0, 0),
code=c("EST","EDT","CST","CDT","MST","MDT","PST","PDT","AKST","AKDT","HAST","HST","", NA),
stringsAsFactors = FALSE)
offsetLibrary <- data.frame(offset=c(5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10, 10),
code=c("EST","EDT","CST","CDT","MST","MDT","PST","PDT","AKST","AKDT","HAST","HST"),
stringsAsFactors = FALSE)
retval <- left_join(retval, offsetLibrary, by=c("ActivityStartTime.TimeZoneCode"="code"))
retval <- left_join(retval, offsetLibrary, by=c("ActivityStartTime/TimeZoneCode"="code"))
names(retval)[names(retval) == "offset"] <- "timeZoneStart"
retval <- left_join(retval, offsetLibrary, by=c("ActivityEndTime.TimeZoneCode"="code"))
retval <- left_join(retval, offsetLibrary, by=c("ActivityEndTime/TimeZoneCode"="code"))
names(retval)[names(retval) == "offset"] <- "timeZoneEnd"
retval$timeZoneStart[is.na(retval$timeZoneStart)] <- 0
retval$timeZoneEnd[is.na(retval$timeZoneEnd)] <- 0
if("ActivityStartDate" %in% names(retval)){
if(any(retval$ActivityStartDate != "")){
suppressWarnings(retval$ActivityStartDate <- as.Date(parse_date_time(retval$ActivityStartDate, c("Ymd", "mdY"))))
}
}
if("ActivityEndDate" %in% names(retval)){
if(any(retval$ActivityEndDate != "")){
suppressWarnings(retval$ActivityEndDate <- as.Date(parse_date_time(retval$ActivityEndDate, c("Ymd", "mdY"))))
}
}
if(any(!is.na(retval$timeZoneStart))){
retval$ActivityStartDateTime <- with(retval, as.POSIXct(paste(ActivityStartDate, ActivityStartTime.Time),format="%Y-%m-%d %H:%M:%S", tz = "UTC"))
retval$ActivityStartDateTime <- retval$ActivityStartDateTime + retval$timeZoneStart*60*60
retval$ActivityStartDateTime <- as.POSIXct(retval$ActivityStartDateTime)
if(tz != ""){
attr(retval$ActivityStartDateTime, "tzone") <- tz
} else {
attr(retval$ActivityStartDateTime, "tzone") <- "UTC"
}
}
dateCols <- c("ActivityStartDate","ActivityEndDate","AnalysisStartDate","PreparationStartDate")
if(any(!is.na(retval$timeZoneEnd))){
retval$ActivityEndDateTime <- with(retval, as.POSIXct(paste(ActivityEndDate, ActivityEndTime.Time),format="%Y-%m-%d %H:%M:%S", tz = "UTC"))
retval$ActivityEndDateTime <- retval$ActivityEndDateTime + retval$timeZoneEnd*60*60
retval$ActivityEndDateTime <- as.POSIXct(retval$ActivityEndDateTime)
if(tz != ""){
attr(retval$ActivityEndDateTime, "tzone") <- tz
} else {
attr(retval$ActivityEndDateTime, "tzone") <- "UTC"
}
}
retval <- suppressWarnings(mutate_each_(retval, ~as.Date(parse_date_time(., c("Ymd", "mdY"))), dateCols))
if(all(is.na(retval$ActivityEndDateTime))){
retval$ActivityEndDateTime <- NULL
}
retval <- retval[order(retval$OrganizationIdentifier,
retval$MonitoringLocationIdentifier,
retval$ActivityStartDateTime, decreasing = FALSE),]
retval <- mutate_(retval, ActivityStartDateTime=~paste(ActivityStartDate, `ActivityStartTime/Time`))
retval <- mutate_(retval, ActivityEndDateTime=~paste(ActivityEndDate, `ActivityEndTime/Time`))
retval <- mutate_(retval, ActivityStartDateTime=~fast_strptime(ActivityStartDateTime, '%Y-%m-%d %H:%M:%S')+60*60*timeZoneStart)
retval <- mutate_(retval, ActivityEndDateTime=~fast_strptime(ActivityEndDateTime, '%Y-%m-%d %H:%M:%S')+60*60*timeZoneStart)
retval <- select_(retval, ~-timeZoneEnd, ~-timeZoneStart)
names(retval)[grep("/",names(retval))] <- gsub("/",".",names(retval)[grep("/",names(retval))])
return(retval)
}
\ No newline at end of file
}
......@@ -151,7 +151,7 @@ readWQPqw <- function(siteNumbers,parameterCd,startDate="",endDate="",tz=""){
stringsAsFactors=FALSE)
variableInfo <- unique(variableInfo)
if(any(variableInfo$parameterCd != "")){
if(any(!is.na(variableInfo$parameterCd))){
pcodes <- unique(variableInfo$parameterCd[!is.na(variableInfo$parameterCd)])
pcodes <- pcodes["" != pcodes]
paramINFO <- readNWISpCode(pcodes)
......
......@@ -54,7 +54,7 @@
#' queryTime \tab POSIXct \tab The time the data was returned \cr
#' }
#' @export
#' @import lubridate
#' @importFrom lubridate parse_date_time
#' @examples
#' \dontrun{
#' availableData <- whatNWISdata('05114000')
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment