Newer
Older
#' Function to return data from the NWIS RDB 1.0 format
#'
#' This function accepts a url parameter that already contains the desired
#' NWIS site, parameter code, statistic, startdate and enddate. It is not
#' recommended to use the RDB format for importing multi-site data.
#'
#' @param obs_url string containing the url for the retrieval
#' @param asDateTime logical, if TRUE returns date and time as POSIXct, if FALSE, Date
#' @param qw logical, if TRUE parses as water quality data (where dates/times are in start and end times)
#' @param tz string to set timezone attribute of datetime. Default is an empty quote, which converts the
#' datetimes to UTC (properly accounting for daylight savings times based on the data's provided tz_cd column).
#' Possible values to provide are "America/New_York","America/Chicago", "America/Denver","America/Los_Angeles",
#' "America/Anchorage","America/Honolulu","America/Jamaica","America/Managua","America/Phoenix", and "America/Metlakatla"
#' @param convertType logical, defaults to TRUE. If TRUE, the function will convert the data to dates, datetimes,
#' numerics based on a standard algorithm. If false, everything is returned as a string.
#' @return data a data frame containing columns agency, site, dateTime (converted to UTC), values, and remark codes for all requested combinations
#' startDate <- "2012-09-01"
#' endDate <- "2012-10-01"
Laura A DeCicco
committed
#' offering <- "00003"
#' property <- "00060"
Laura A DeCicco
committed
#' \dontrun{
#' obs_url <- constructNWISURL(siteNumber,property,
Laura A DeCicco
committed
#' startDate,endDate,"dv",format="tsv")
#' urlMultiPcodes <- constructNWISURL("04085427",c("00060","00010"),
Laura A DeCicco
committed
#' startDate,endDate,"dv",statCd=c("00003","00001"),"tsv")
#' multiData <- importRDB1(urlMultiPcodes)
#' unitDataURL <- constructNWISURL(siteNumber,property,
#' "2013-11-03","2013-11-03","uv",format="tsv") #includes timezone switch
#' unitData <- importRDB1(unitDataURL, asDateTime=TRUE)
#' qwURL <- constructNWISURL(c('04024430','04024000'),
#' c('34247','30234','32104','34220'),
#' "2010-11-03","","qw",format="rdb")
#' qwData <- importRDB1(qwURL, qw=TRUE, tz="America/Chicago")
#' iceSite <- '04024430'
#' start <- "2013-11-09"
#' end <- "2013-11-28"
#' urlIce <- constructNWISURL(iceSite,"00060",start, end,"uv",format="tsv")
#'
#' filePath <- system.file("extdata", package="dataRetrieval")
#' fileName <- "RDB1Example.txt"
#' fullPath <- file.path(filePath, fileName)
#' importUserRDB <- importRDB1(fullPath)
Laura A DeCicco
committed
#' }
importRDB1 <- function(obs_url, asDateTime=FALSE, qw=FALSE, convertType = TRUE, tz=""){
if(tz != ""){
tz <- match.arg(tz, c("America/New_York","America/Chicago",
"America/Denver","America/Los_Angeles",
"America/Anchorage","America/Honolulu",
"America/Jamaica","America/Managua",
"America/Phoenix","America/Metlakatla"))
}
if(url.exists(obs_url)){
# 400 bad site id
# 404 outside date range, wrong pcode
# 200 cool
retval = tryCatch({
h <- basicHeaderGatherer()
doc <- getURL(obs_url, headerfunction = h$update)
fileVecChar <- scan(obs_url, what = "", sep = "\n", quiet=TRUE)
pndIndx<-regexpr("^#", fileVecChar)
hdr <- fileVecChar[pndIndx > 0L]
if(!(as.character(h$value()["Content-Type"]) == "text/plain;charset=UTF-8" |
as.character(h$value()["Content-Type"]) == "text/plain")){
message(paste("URL caused an error:", obs_url))
message("Content-Type=",h$value()["Content-Type"])
}
doc <- textConnection(doc)
}, warning = function(w) {
message(paste("URL caused a warning:", obs_url))
message(w)
}, error = function(e) {
message(paste("URL does not seem to exist:", obs_url))
message(e)
return(NA)
})
} else {
doc <- obs_url
fileVecChar <- scan(obs_url, what = "", sep = "\n", quiet=TRUE)
pndIndx<-regexpr("^#", fileVecChar)
hdr <- fileVecChar[pndIndx > 0L]
tmp <- read.delim(
doc,
header = TRUE,
quote="\"",
dec=".",
sep='\t',
colClasses=c('character'),
fill = TRUE,
comment.char="#")
dataType <- tmp[1,]
data <- tmp[-1,]
#This will break if the 2nd (or greater) site has more columns than the first
#Therefore, using RDB is not recommended for multi-site queries.
#This correction will work if each site has the same number of columns
multiSiteCorrections <- -which(as.logical(apply(data[,1:2], 1, FUN=function(x) all(x %in% as.character(dataType[,1:2])))))
if(length(multiSiteCorrections) > 0){
data <- data[multiSiteCorrections,]
findRowsWithHeaderInfo <- as.integer(apply(data[,1:2], 1, FUN = function(x) if(x[1] == names(data)[1] & x[2] == names(data)[2]) 1 else 0))
findRowsWithHeaderInfo <- which(findRowsWithHeaderInfo == 0)
data <- data[findRowsWithHeaderInfo,]
}
offsetLibrary <- setNames(c(5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10, 10),
c("EST","EDT","CST","CDT","MST","MDT","PST","PDT","AKST","AKDT","HAST","HST"))
# The suppressed warning occurs when there is text (such as ice) in the numeric coluym
data[,grep('n$', dataType)] <- suppressWarnings(sapply(data[,grep('n$', dataType)], function(x) as.numeric(x)))
numberColumns <- grep("_va",names(data))
data[,numberColumns] <- sapply(data[,numberColumns],as.numeric)
intColumns <- grep("_nu",names(data))
if("current_rating_nu" %in% names(data)){
intColumns <- intColumns[!("current_rating_nu" %in% names(data)[intColumns])]
data$current_rating_nu <- gsub(" ", "", data$current_rating_nu)
}
data[,intColumns] <- sapply(data[,intColumns],as.integer)
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
if(length(grep('d$', dataType)) > 0){
if (asDateTime & !qw){
if("tz_cd" %in% names(data)){
offset <- offsetLibrary[data$tz_cd]
} else {
offset <- 0
}
offset[is.na(offset)] <- 0
data[,regexpr('d$', dataType) > 0] <- as.POSIXct(data[,regexpr('d$', dataType) > 0], "%Y-%m-%d %H:%M", tz = "UTC")
data[,regexpr('d$', dataType) > 0] <- data[,regexpr('d$', dataType) > 0] + offset*60*60
data[,regexpr('d$', dataType) > 0] <- as.POSIXct(data[,regexpr('d$', dataType) > 0])
if(tz != ""){
attr(data[,regexpr('d$', dataType) > 0], "tzone") <- tz
}
} else if (qw){
if("sample_start_time_datum_cd" %in% names(data)){
timeZoneStartOffset <- offsetLibrary[data$sample_start_time_datum_cd]
timeZoneStartOffset[is.na(timeZoneStartOffset)] <- 0
} else {
timeZoneStartOffset <- 0
}
if("sample_end_time_datum_cd" %in% names(data)){
timeZoneEndOffset <- offsetLibrary[data$sample_end_time_datum_cd]
timeZoneEndOffset[is.na(timeZoneEndOffset)] <- 0
composite <- TRUE
} else {
composite <- FALSE
if(any(data$sample_end_dt != "") & any(data$sample_end_dm != "")){
if(which(data$sample_end_dt != "") == which(data$sample_end_dm != "")){
composite <- TRUE
}
timeZoneEndOffset <- 0
if("sample_dt" %in% names(data)){
if(any(data$sample_dt != "")){
suppressWarnings(data$sample_dt <- as.Date(parse_date_time(data$sample_dt, c("Ymd", "mdY"))))
}
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
if("sample_end_dt" %in% names(data)){
if(any(data$sample_end_dt != "")){
suppressWarnings(data$sample_end_dt <- as.Date(parse_date_time(data$sample_end_dt, c("Ymd", "mdY"))))
}
}
data$startDateTime <- with(data, as.POSIXct(paste(sample_dt, sample_tm),format="%Y-%m-%d %H:%M", tz = "UTC"))
data$startDateTime <- data$startDateTime + timeZoneStartOffset*60*60
data$startDateTime <- as.POSIXct(data$startDateTime)
if(tz != ""){
attr(data$startDateTime, "tzone") <- tz
}
if(composite){
data$endDateTime <- with(data, as.POSIXct(paste(sample_end_dt, sample_end_tm),format="%Y-%m-%d %H:%M", tz = "UTC"))
data$endDateTime <- data$endDateTime + timeZoneEndOffset*60*60
data$endDateTime <- as.POSIXct(data$endDateTime)
if(tz != ""){
attr(data$endDateTime, "tzone") <- tz
}
}
} else {
for (i in grep('d$', dataType)){
if (all(data[,i] != "")){
data[,i] <- as.character(data[,i])
row.names(data) <- NULL
}