Commit f3c426a6 authored by Laura A DeCicco's avatar Laura A DeCicco
Browse files

Getting ready for readr 2.0

parent 2e2282d4
dataRetrieval 2.7.8
==================
* Fixed missing comments in readNWISqw
* Added Stable Isotopes, Habitat, and Other water quality groupings
* Added option to not check for headers on WQP requests.
dataRetrieval 2.7.7
==================
* The NLDI service is now available through the `findNLDI` function.
......
......@@ -106,162 +106,116 @@ importRDB1 <- function(obs_url, asDateTime=TRUE, convertType = TRUE, tz="UTC"){
header.names <- strsplit(readr.total[meta.rows+1],"\t")[[1]]
types.names <- strsplit(readr.total[meta.rows+2],"\t")[[1]]
data.rows <- total.rows - meta.rows - 2
if(convertType){
readr.data <- read_delim_check_quote(file = doc, skip = (meta.rows+2),delim="\t",col_names = FALSE, total.rows = data.rows)
#defaults to time in seconds in readr 0.2.2.9??
if(length(grep("hms",lapply(readr.data, class))) > 0){
colHMS <- grep("hms",lapply(readr.data, class))
colList <- as.list(rep("c",length(colHMS)))
names(colList) <- paste0("X",colHMS)
readr.data <- read_delim_check_quote(file = doc, skip = (meta.rows+2),delim="\t",
col_names = FALSE,
col_types = colList,
total.rows = data.rows)
}
} else {
readr.data <- read_delim_check_quote(file = doc,skip = (meta.rows+2),delim="\t",col_names = FALSE, col_types = readr::cols(.default = "c"), total.rows = data.rows)
}
if(nrow(readr.data) > 0){
names(readr.data) <- header.names
char.names <- c(header.names[grep("_cd",header.names)],
header.names[grep("_id",header.names)],
header.names[grep("_tx",header.names)],
header.names[grep("_tm",header.names)],
header.names[header.names == "site_no"],
header.names[header.names == "project_no"])
if(length(char.names) > 0){
char.names.true <- char.names[sapply(readr.data[,char.names], is.character)]
char.names <- char.names[!(char.names %in% char.names.true)]
if(data.rows > 0){
if(convertType){
readr.data <- readr::read_delim(file = doc, delim = "\t",
skip = meta.rows + 2,
guess_max = data.rows,
show_col_types = FALSE,
col_names = FALSE)
} else {
char.names <- NULL
}
if(nrow(readr::problems(readr.data)) > 0 | length(char.names) > 0){
readr.data.char <- read_delim_check_quote(file = doc, skip = (meta.rows+2),delim="\t",col_names = FALSE,
col_types = readr::cols(.default = "c"), total.rows = data.rows)
names(readr.data.char) <- header.names
}
for(j in char.names){
readr.data[,j] <- readr.data.char[[j]]
attr(readr.data, "problems") <- attr(readr.data, "problems")[attr(readr.data, "problems")[["col"]] != paste0("X",j),]
}
badCols <- attr(readr.data, "problems")[["col"]]
readr.data <- as.data.frame(readr.data)
if(length(badCols) > 0){
readr.data <- fixErrors(readr.data, readr.data.char, "no trailing characters", as.numeric)
readr.data <- fixErrors(readr.data, readr.data.char, "date like", lubridate::parse_date_time, c("%Y-%m-%d %H:%M:%S","%Y-%m-%d","%Y"))
readr.data <- readr::read_delim(file = doc, delim = "\t",
skip = meta.rows + 2,
col_types = readr::cols(.default = "c"),
show_col_types = FALSE,
col_names = FALSE)
}
if(length(grep("_va", names(readr.data))) > 0 &&
any(lapply(readr.data[,grep("_va", names(readr.data))], class) %in% "integer")){
#note... if we simply convert any _va to numeric...we lose some QW censoring information from some formats
vaCols <- grep("_va", names(readr.data))
if(length(vaCols) > 1){
vaCols <- vaCols[lapply(readr.data[,vaCols], class) %in% "integer"]
}
readr.data[,vaCols] <- sapply(readr.data[,vaCols], as.numeric)
}
columnTypes <- sapply(readr.data, typeof)
columnsThatMayBeWrong <- grep("n",types.names)[which(!(columnTypes[grep("n",types.names)] %in% c("double","integer")))]
if(nrow(readr.data) > 0){
names(readr.data) <- header.names
for(i in columnsThatMayBeWrong){
readr.data[[i]] <- tryCatch({
as.numeric(readr.data[[i]])
},
warning=function(cond) {
message(paste("Column",i,"contains characters that cannot be automatically converted to numeric."))
return(readr.data[[i]])
}
)
}
problems.orig <- readr::problems(readr.data)
if (asDateTime & convertType){
header.suffix <- sapply(strsplit(header.names,"_"), function(x)x[length(x)])
header.base <- substr(header.names,1,nchar(header.names)-3)
dt_cols <- unique(header.base[header.suffix %in% c("dt","tm")])
readr.data <- as.data.frame(readr.data)
if(all(c("sample","sample_end") %in% dt_cols)){
if("sample_start_time_datum_cd" %in% header.names){
readr.data[,"tz_cd"] <- readr.data[,"sample_start_time_datum_cd"]
readr.data[,"sample_start_time_datum_cd_reported"] <- readr.data[,"sample_start_time_datum_cd"]
readr.data[,"sample_end_time_datum_cd_reported"] <- readr.data[,"sample_start_time_datum_cd"]
readr.data <- readr.data[,names(readr.data)[names(readr.data) != "sample_start_time_datum_cd"]]
}
}
for(i in dt_cols){
if(convertType){
if(all(c(paste0(i,"_dt"),paste0(i,"_tm")) %in% header.names)){
varname <- paste0(i,"_dateTime")
if(length(grep("_va", names(readr.data))) > 0 ){
#note... if we simply convert any _va to numeric...we lose some QW censoring information from some formats
vaCols <- grep("_va", names(readr.data))
for(i in vaCols){
readr.data[[i]] <- tryCatch({
as.numeric(readr.data[[i]])
},
warning=function(cond) {
message(paste("Column",i,"contains characters that cannot be automatically converted to numeric."))
return(readr.data[[i]])
}
)
}
}
varval <- suppressWarnings(lubridate::parse_date_time(paste(readr.data[,paste0(i,"_dt")],
readr.data[,paste0(i,"_tm")]),
c("%Y-%m-%d %H:%M:%S","%Y-%m-%d %H:%M"),
tz = "UTC"))
if(!all(is.na(varval))){
readr.data[,varname] <- varval
if(paste0(i, "_tz_cd") %in% names(readr.data)){
tz.name <- paste0(i, "_tz_cd")
} else {
tz.name <- paste0(i,"_time_datum_cd")
if (asDateTime & convertType){
header.suffix <- sapply(strsplit(header.names,"_"), function(x)x[length(x)])
header.base <- substr(header.names,1,nchar(header.names)-3)
dt_cols <- unique(header.base[header.suffix %in% c("dt","tm")])
if(all(c("sample","sample_end") %in% dt_cols)){
if("sample_start_time_datum_cd" %in% header.names){
readr.data[,"tz_cd"] <- readr.data[,"sample_start_time_datum_cd"]
readr.data[,"sample_start_time_datum_cd_reported"] <- readr.data[,"sample_start_time_datum_cd"]
readr.data[,"sample_end_time_datum_cd_reported"] <- readr.data[,"sample_start_time_datum_cd"]
readr.data <- readr.data[,names(readr.data)[names(readr.data) != "sample_start_time_datum_cd"]]
}
}
for(i in dt_cols){
if(tz.name %in% names(readr.data)){
readr.data <- convertTZ(readr.data,tz.name,varname,tz)
if(all(c(paste0(i,"_dt"),paste0(i,"_tm")) %in% header.names)){
varname <- paste0(i,"_dateTime")
varval <- suppressWarnings(lubridate::parse_date_time(paste(readr.data[,paste0(i,"_dt")],
readr.data[,paste0(i,"_tm")]),
c("%Y-%m-%d %H:%M:%S","%Y-%m-%d %H:%M"),
tz = "UTC"))
if(!all(is.na(varval))){
readr.data[,varname] <- varval
if(paste0(i, "_tz_cd") %in% names(readr.data)){
tz.name <- paste0(i, "_tz_cd")
} else {
tz.name <- paste0(i,"_time_datum_cd")
}
if(tz.name %in% names(readr.data)){
readr.data <- convertTZ(readr.data,tz.name,varname,tz)
}
}
}
}
if("tz_cd" %in% names(readr.data)){
date.time.cols <- which(sapply(readr.data, function(x) inherits(x, "POSIXct")))
if(length(date.time.cols) > 0){
readr.data <- convertTZ(readr.data,"tz_cd",date.time.cols,tz, flip.cols=FALSE)
}
}
if("DATE" %in% header.names){
readr.data[,"DATE"] <- lubridate::parse_date_time(readr.data[,"DATE"], "Ymd")
}
if(all(c("DATE","TIME","TZCD") %in% header.names)){
varname <- "DATETIME"
varval <- as.POSIXct(lubridate::fast_strptime(paste(readr.data[,"DATE"],readr.data[,"TIME"]), "%Y-%m-%d %H%M%S", tz = "UTC"))
readr.data[,varname] <- varval
readr.data <- convertTZ(readr.data,"TZCD",varname,tz, flip.cols=TRUE)
}
if("sample_dateTime" %in% names(readr.data)){
names(readr.data)[names(readr.data) == "sample_dateTime"] <- "startDateTime"
}
}
row.names(readr.data) <- NULL
}
if("tz_cd" %in% names(readr.data)){
date.time.cols <- which(sapply(readr.data, function(x) inherits(x, "POSIXct")))
if(length(date.time.cols) > 0){
readr.data <- convertTZ(readr.data,"tz_cd",date.time.cols,tz, flip.cols=FALSE)
}
}
if("DATE" %in% header.names){
readr.data[,"DATE"] <- lubridate::parse_date_time(readr.data[,"DATE"], "Ymd")
}
if(all(c("DATE","TIME","TZCD") %in% header.names)){
varname <- "DATETIME"
varval <- as.POSIXct(lubridate::fast_strptime(paste(readr.data[,"DATE"],readr.data[,"TIME"]), "%Y-%m-%d %H%M%S", tz = "UTC"))
readr.data[,varname] <- varval
readr.data <- convertTZ(readr.data,"TZCD",varname,tz, flip.cols=TRUE)
}
if("sample_dateTime" %in% names(readr.data)){
names(readr.data)[names(readr.data) == "sample_dateTime"] <- "startDateTime"
}
}
row.names(readr.data) <- NULL
if(nrow(problems.orig) > 0){
attr(readr.data, "problems") <- problems.orig
}
} else {
readr.data <- data.frame(matrix(vector(), 0, length(header.names),
......@@ -363,10 +317,14 @@ read_delim_check_quote <- function(..., total.rows){
if(total.rows <= 0){
total.rows <- 1
}
rdb.data <- suppressWarnings(readr::read_delim(..., guess_max = total.rows))
rdb.data <- suppressWarnings(readr::read_delim(...,
show_col_types = FALSE,
guess_max = total.rows))
if(nrow(rdb.data) < total.rows){
rdb.data <- suppressWarnings(readr::read_delim(..., quote = "", guess_max = total.rows))
rdb.data <- suppressWarnings(readr::read_delim(..., quote = "",
show_col_types = FALSE,
guess_max = total.rows))
}
return(rdb.data)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment