Newer
Older
#' Function to return data from the NWIS RDB 1.0 format
#'
#' This function accepts a url parameter that already contains the desired
#' NWIS site, parameter code, statistic, startdate and enddate. It is not
#' recommended to use the RDB format for importing multi-site data.
#' @param obs_url character containing the url for the retrieval or a file path to the data file.
#' @param asDateTime logical, if \code{TRUE} returns date and time as POSIXct, if \code{FALSE}, Date
#' @param tz character to set timezone attribute of datetime. Default is an empty quote, which converts the
#' datetimes to UTC (properly accounting for daylight savings times based on the data's provided tz_cd column).
#' Possible values to provide are "America/New_York","America/Chicago", "America/Denver","America/Los_Angeles",
#' "America/Anchorage","America/Honolulu","America/Jamaica","America/Managua","America/Phoenix", and "America/Metlakatla"
#' @param convertType logical, defaults to \code{TRUE}. If \code{TRUE}, the function will convert the data to dates, datetimes,
#' numerics based on a standard algorithm. If false, everything is returned as a character
#' @return A data frame with the following columns:
#' \tabular{lll}{
#' Name \tab Type \tab Description \cr
#' agency_cd \tab character \tab The NWIS code for the agency reporting the data\cr
#' site_no \tab character \tab The USGS site number \cr
#' datetime \tab POSIXct \tab The date and time of the value converted to UTC (if asDateTime = \code{TRUE}), \cr
#' \tab character \tab or raw character string (if asDateTime = FALSE) \cr
#' tz_cd \tab character \tab The time zone code for datetime \cr
#' code \tab character \tab Any codes that qualify the corresponding value\cr
#' value \tab numeric \tab The numeric value for the parameter \cr
#' tz_cd_reported \tab The originally reported time zone \cr
#' }
#' Note that code and value are repeated for the parameters requested. The names are of the form
#' XD_P_S, where X is literal,
#' D is an option description of the parameter,
#' P is the parameter code,
#' and S is the statistic code (if applicable).
#' If a date/time (dt) column contained incomplete date and times, a new column of dates and time was inserted. This could happen
Laura A DeCicco
committed
#' when older data was reported as dates, and newer data was reported as a date/time.
#'
#' There are also several useful attributes attached to the data frame:
#' \tabular{lll}{
#' Name \tab Type \tab Description \cr
#' url \tab character \tab The url used to generate the data \cr
#' queryTime \tab POSIXct \tab The time the data was returned \cr
#' comment \tab character \tab Header comments from the RDB file \cr
#' }
#' @importFrom readr read_lines
#' @importFrom readr read_delim
#' startDate <- "2012-09-01"
#' endDate <- "2012-10-01"
Laura A DeCicco
committed
#' offering <- "00003"
#' property <- "00060"
#' obs_url <- constructNWISURL(siteNumber,property,
Laura A DeCicco
committed
#' startDate,endDate,"dv",format="tsv")
#' \dontrun{
#' data <- importRDB1(obs_url)
#'
#' urlMultiPcodes <- constructNWISURL("04085427",c("00060","00010"),
Laura A DeCicco
committed
#' startDate,endDate,"dv",statCd=c("00003","00001"),"tsv")
#' multiData <- importRDB1(urlMultiPcodes)
#' unitDataURL <- constructNWISURL(siteNumber,property,
#' "2013-11-03","2013-11-03","uv",format="tsv") #includes timezone switch
#' unitData <- importRDB1(unitDataURL, asDateTime=TRUE)
#' qwURL <- constructNWISURL(c('04024430','04024000'),
#' c('34247','30234','32104','34220'),
#' "2010-11-03","","qw",format="rdb")
#' qwData <- importRDB1(qwURL, asDateTime=TRUE, tz="America/Chicago")
Laura A DeCicco
committed
#' iceSite <- '04024000'
#' start <- "2015-11-09"
#' end <- "2015-11-24"
#' urlIce <- constructNWISURL(iceSite,"00060",start, end,"uv",format="tsv")
#' iceNoConvert <- importRDB1(urlIce, convertType=FALSE)
#' }
#' filePath <- system.file("extdata", package="dataRetrieval")
#' fileName <- "RDB1Example.txt"
#' fullPath <- file.path(filePath, fileName)
#' importUserRDB <- importRDB1(fullPath)
importRDB1 <- function(obs_url, asDateTime=TRUE, convertType = TRUE, tz=""){
if(tz != ""){
tz <- match.arg(tz, c("America/New_York","America/Chicago",
"America/Denver","America/Los_Angeles",
"America/Anchorage","America/Honolulu",
"America/Jamaica","America/Managua",
if(file.exists(obs_url)){
doc <- obs_url
} else {
doc <- getWebServiceData(obs_url)
if("warn" %in% names(attr(doc,"header"))){
attr(data, "url") <- obs_url
attr(data, "queryTime") <- Sys.time()
return(data)
}
readr.total <- read_lines(doc)
total.rows <- length(readr.total)
readr.meta <- readr.total[grep("^#", readr.total)]
meta.rows <- length(readr.meta)
header.names <- strsplit(readr.total[meta.rows+1],"\t")[[1]]
if(convertType){
readr.data <- suppressWarnings(read_delim(doc, skip = (meta.rows+2),delim="\t",col_names = FALSE))
} else {
readr.data <- suppressWarnings(read_delim(doc, skip = (meta.rows+2),delim="\t",col_names = FALSE, col_types = cols(.default = "c")))
}
names(readr.data) <- header.names
comment(readr.data) <- readr.meta
readr.data <- as.data.frame(readr.data)
header.suffix <- sapply(strsplit(header.names,"_"), function(x)x[length(x)])
header.base <- substr(header.names,1,nchar(header.names)-3)
for(i in unique(header.base[header.suffix %in% c("dt","tm")])){
if(all(c(paste0(i,"_dt"),paste0(i,"_tm")) %in% header.names)){
varname <- paste0(i,"_dateTime")
varval <- as.POSIXct(paste(readr.data[,paste0(i,"_dt")],readr.data[,paste0(i,"_tm")]), "%Y-%m-%d %H:%M", tz = "UTC")
readr.data[,varname] <- varval
if(tz.name %in% header.names){
readr.data <- convertTZ(readr.data,tz.name,varname,tz)
}
if(tz.name %in% header.names){
readr.data <- convertTZ(readr.data,tz.name,varname,tz)
if("tz_cd" %in% header.names){
date.time.cols <- which(sapply(readr.data, function(x) inherits(x, "POSIXct")))
readr.data <- convertTZ(readr.data,"tz_cd",date.time.cols,tz)
}
if("sample_start_time_datum_cd" %in% header.names){
readr.data <- convertTZ(readr.data,"sample_start_time_datum_cd","sample_dateTime",tz)
if(!("sample_end_time_datum_cd" %in% header.names) & "sample_end_dateTime" %in% names(readr.data)){
readr.data <- convertTZ(readr.data,"sample_start_time_datum_cd_reported","sample_end_dateTime",tz)
readr.data$sample_start_time_datum_cd_reported<- readr.data$sample_start_time_datum_cd_reported_reported
readr.data$sample_start_time_datum_cd_reported_reported <- NULL
}
}
}
names(readr.data)[names(readr.data) == "sample_dateTime"] <- "startDateTime"
names(readr.data)[names(readr.data) == "sample_end_dateTime"] <- "endDateTime"
if("site_no" %in% header.names){
if(class(readr.data$site_no) != "character"){
readr.data$site_no <- as.character(readr.data$site_no)
}
row.names(readr.data) <- NULL
names(readr.data) <- make.names(names(readr.data))
attr(readr.data, "url") <- obs_url
attr(readr.data, "queryTime") <- Sys.time()
attr(readr.data, "header") <- attr(doc, "header")
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
convertTZ <- function(df, tz.name, date.time.cols, tz){
offsetLibrary <- data.frame(offset=c(5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10, 10, 0, 0),
code=c("EST","EDT","CST","CDT","MST","MDT","PST","PDT","AKST","AKDT","HAST","HST","", NA),
stringsAsFactors = FALSE)
offset <- left_join(df[,tz.name,drop=FALSE],offsetLibrary, by=setNames("code",tz.name))
offset <- offset$offset
df[,paste0(tz.name,"_reported")] <- df[,tz.name,drop=FALSE]
df[,date.time.cols] <- df[,date.time.cols] + offset*60*60
df[,date.time.cols] <- as.POSIXct(df[,date.time.cols])
if(tz != ""){
attr(df[,date.time.cols], "tzone") <- tz
df[,tz.name] <- tz
} else {
attr(df[,date.time.cols], "tzone") <- "UTC"
df[!is.na(df[,date.time.cols]),tz.name] <- "UTC"
}
reported.col <- which(names(df) %in% paste0(tz.name,"_reported"))
orig.col <- which(names(df) %in% tz.name)
new.order <- 1:ncol(df)
new.order[orig.col] <- reported.col
new.order[reported.col] <- orig.col
df <- df[,new.order]
return(df)