From 33926ec4d6fb5c169ed113af4ab40a92fe6c7198 Mon Sep 17 00:00:00 2001 From: unknown <ldecicco@usgs.gov> Date: Wed, 29 Oct 2014 14:26:20 -0500 Subject: [PATCH] Fixing timezone issue by converting everything to UTC. --- R/importRDB1.r | 99 ++++++++++++++++++----------------------------- R/readNWISqw.r | 11 ++++-- man/importRDB1.Rd | 8 +++- man/readNWISqw.Rd | 10 ++++- 4 files changed, 60 insertions(+), 68 deletions(-) diff --git a/R/importRDB1.r b/R/importRDB1.r index 2d5cf6c9..0709775f 100644 --- a/R/importRDB1.r +++ b/R/importRDB1.r @@ -7,7 +7,7 @@ #' @param obs_url string containing the url for the retrieval #' @param asDateTime logical, if TRUE returns date and time as POSIXct, if FALSE, Date #' @param qw logical, if TRUE parses as water quality data (where dates/times are in start and end times) -#' @return data a data frame containing columns agency, site, dateTime, values, and remark codes for all requested combinations +#' @return data a data frame containing columns agency, site, dateTime (converted to UTC), values, and remark codes for all requested combinations #' @export #' @examples #' siteNumber <- "02177000" @@ -22,8 +22,12 @@ #' startDate,endDate,"dv",statCd=c("00003","00001"),"tsv") #' multiData <- importRDB1(urlMulti) #' unitDataURL <- constructNWISURL(siteNumber,property, -#' "2014-10-10","2014-10-10","uv",format="tsv") +#' "2013-11-03","2013-11-03","uv",format="tsv") #includes timezone switch #' unitData <- importRDB1(unitDataURL, asDateTime=TRUE) +#' qwURL <- constructNWISURL(c('04024430','04024000'), +#' c('34247','30234','32104','34220'), +#' "2010-11-03","","qw",format="rdb") +#' qwData <- importRDB1(qwURL, qw=TRUE) importRDB1 <- function(obs_url,asDateTime=FALSE, qw=FALSE){ retval = tryCatch({ @@ -72,46 +76,48 @@ importRDB1 <- function(obs_url,asDateTime=FALSE, qw=FALSE){ "America/Anchorage","America/Anchorage","America/Honolulu","America/Honolulu"), c("EST","EDT","CST","CDT","MST","MDT","PST","PDT","AKST","AKDT","HAST","HST")) + + offsetLibrary <- setNames(c(5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10, 10), + c("EST","EDT","CST","CDT","MST","MDT","PST","PDT","AKST","AKDT","HAST","HST")) + data[,grep('n$', dataType)] <- suppressWarnings(sapply(data[,grep('n$', dataType)], function(x) as.numeric(x))) if(length(grep('d$', dataType)) > 0){ if (asDateTime & !qw){ if("tz_cd" %in% names(data)){ - timeZone <- as.character(timeZoneLibrary[data$tz_cd]) + offset <- offsetLibrary[data$tz_cd] } else { - timeZone <- NULL + offset <- 0 } + offset[is.na(offset)] <- 0 - - if(length(unique(timeZone)) == 1){ - data[,regexpr('d$', dataType) > 0] <- as.POSIXct(data[,regexpr('d$', dataType) > 0], "%Y-%m-%d %H:%M", tz = unique(timeZone)) - } else { - - mostCommonTZ <- names(sort(summary(as.factor(timeZone)),decreasing = TRUE)[1]) - - data[,grep('d$', dataType)] <- as.POSIXct(data[,grep('d$', dataType)], "%Y-%m-%d %H:%M", tz = mostCommonTZ) - additionalTZs <- names(sort(summary(as.factor(timeZone)),decreasing = TRUE)[-1]) - for(i in additionalTZs){ - data[timeZone == i,grep('d$', dataType)] <- as.POSIXct(data[,grep('d$', dataType)], "%Y-%m-%d %H:%M", tz = i) - } - } + data[,regexpr('d$', dataType) > 0] <- as.POSIXct(data[,regexpr('d$', dataType) > 0], "%Y-%m-%d %H:%M", tz = "UTC") + data[,regexpr('d$', dataType) > 0] <- data[,regexpr('d$', dataType) > 0] + offset*60*60 + data[,regexpr('d$', dataType) > 0] <- as.POSIXct(data[,regexpr('d$', dataType) > 0]) } else if (qw){ if("sample_start_time_datum_cd" %in% names(data)){ - timeZoneStart <- as.character(timeZoneLibrary[data$sample_start_time_datum_cd]) + timeZoneStartOffset <- offsetLibrary[data$sample_start_time_datum_cd] + timeZoneStartOffset[is.na(timeZoneStartOffset)] <- 0 } else { - timeZoneStart <- NA + timeZoneStartOffset <- 0 } if("sample_end_time_datum_cd" %in% names(data)){ - timeZoneEnd <- as.character(timeZoneLibrary[data$sample_end_time_datum_cd]) + timeZoneEndOffset <- offsetLibrary[data$sample_end_time_datum_cd] + timeZoneEndOffset[is.na(timeZoneEndOffset)] <- 0 + composite <- TRUE } else { - timeZoneEnd <- NA + composite <- FALSE + if(any(data$sample_end_dt != "") & any(data$sample_end_dm != "")){ + if(which(data$sample_end_dt != "") == which(data$sample_end_dm != "")){ + composite <- TRUE + } + } + timeZoneEndOffset <- 0 } - timeZoneStart[is.na(timeZoneStart)] <- "" - timeZoneEnd[is.na(timeZoneEnd)] <- "" if("sample_dt" %in% names(data)){ if(any(data$sample_dt != "")){ @@ -125,44 +131,16 @@ importRDB1 <- function(obs_url,asDateTime=FALSE, qw=FALSE){ } } - if(any(!is.na(timeZoneStart))){ - if(length(unique(timeZoneStart)) == 1){ - data$startDateTime <- with(data, as.POSIXct(paste(sample_dt, sample_tm),format="%Y-%m-%d %H:%M", tz=unique(timeZoneStart))) - } else { - - mostCommonTZ <- names(sort(summary(as.factor(timeZoneStart)),decreasing = TRUE)[1]) - - data$startDateTime <- with(data, as.POSIXct(paste(sample_dt, sample_tm), - format="%Y-%m-%d %H:%M", - tz=mostCommonTZ)) - additionalTZs <- names(sort(summary(as.factor(timeZoneStart)),decreasing = TRUE)[-1]) - for(i in additionalTZs){ - data$startDateTime[timeZoneStart == i] <- with(data[timeZoneStart == i,], - as.POSIXct(paste(sample_dt, sample_tm), - format="%Y-%m-%d %H:%M", - tz=i)) - } - } - } +# if(any(!is.na(timeZoneStartOffset))){ + data$startDateTime <- with(data, as.POSIXct(paste(sample_dt, sample_tm),format="%Y-%m-%d %H:%M", tz = "UTC")) + data$startDateTime <- data$startDateTime + timeZoneStartOffset*60*60 + data$startDateTime <- as.POSIXct(data$startDateTime) +# } - if(any(!is.na(timeZoneEnd))){ - if(length(unique(timeZoneEnd)) == 1){ - data$endDateTime <- with(data, as.POSIXct(paste(sample_end_dt, sample_end_tm),format="%Y-%m-%d %H:%M", tz=unique(timeZoneEnd))) - } else { - - mostCommonTZ <- names(sort(summary(as.factor(timeZoneEnd)),decreasing = TRUE)[1]) - - data$endDateTime <- with(data, as.POSIXct(paste(sample_end_dt, sample_end_tm), - format="%Y-%m-%d %H:%M", - tz=mostCommonTZ)) - additionalTZs <- names(sort(summary(as.factor(timeZoneEnd)),decreasing = TRUE)[-1]) - for(i in additionalTZs){ - data$endDateTime[timeZoneEnd == i] <- with(data[timeZoneStart == i,], - as.POSIXct(paste(sample_end_dt, sample_end_tm), - format="%Y-%m-%d %H:%M", - tz=i)) - } - } + if(composite){ + data$endDateTime <- with(data, as.POSIXct(paste(sample_end_dt, sample_end_tm),format="%Y-%m-%d %H:%M", tz = "UTC")) + data$endDateTime <- data$endDateTime + timeZoneEndOffset*60*60 + data$endDateTime <- as.POSIXct(data$endDateTime) } } else { @@ -170,7 +148,6 @@ importRDB1 <- function(obs_url,asDateTime=FALSE, qw=FALSE){ if (all(data[,i] != "")){ data[,i] <- as.Date(data[,i]) } - } } } diff --git a/R/readNWISqw.r b/R/readNWISqw.r index d7e8841b..26f42176 100644 --- a/R/readNWISqw.r +++ b/R/readNWISqw.r @@ -11,6 +11,7 @@ #' @param expanded logical defaults to FALSE. If TRUE, retrieves additional information. Expanded data includes #' remark_cd (remark code), result_va (result value), val_qual_tx (result value qualifier code), meth_cd (method code), #' dqi_cd (data-quality indicator code), rpt_lev_va (reporting level), and rpt_lev_cd (reporting level type). +#' @param reshape logical. Will reshape the data if TRUE (default) #' @keywords data import USGS web service #' @return data dataframe with agency, site, dateTime, value, and code columns #' @export @@ -23,17 +24,21 @@ #' endDate <- '' #' pCodes <- c('34247','30234','32104','34220') #' rawNWISqwData <- readNWISqw(siteNumber,pCodes,startDate,endDate) -#' rawNWISqwDataExpand <- readNWISqw(siteNumber,pCodes,startDate,endDate,expanded=TRUE) -readNWISqw <- function (siteNumber,pCodes,startDate,endDate,expanded=FALSE){ +#' rawNWISqwDataExpandReshaped <- readNWISqw(siteNumber,pCodes, +#' startDate,endDate,expanded=TRUE) +#' rawNWISqwDataExpand <- readNWISqw(siteNumber,pCodes, +#' startDate,endDate,expanded=TRUE,reshape=FALSE) +readNWISqw <- function (siteNumber,pCodes,startDate,endDate,expanded=FALSE,reshape=TRUE){ url <- constructNWISURL(siteNumber,pCodes,startDate,endDate,"qw",expanded=expanded) data <- importRDB1(url,asDateTime=TRUE, qw=TRUE) - if(expanded){ + if(reshape & expanded){ columnsToMelt <- c("agency_cd","site_no","sample_dt","sample_tm", "sample_end_dt","sample_end_tm","sample_start_time_datum_cd","tm_datum_rlbty_cd", "parm_cd","startDateTime","endDateTime") + columnsToMelt <- columnsToMelt[columnsToMelt %in% names(data)] longDF <- melt(data, columnsToMelt) wideDF <- dcast(longDF, ... ~ variable + parm_cd ) wideDF[,grep("_va_",names(wideDF))] <- sapply(wideDF[,grep("_va_",names(wideDF))], function(x) as.numeric(x)) diff --git a/man/importRDB1.Rd b/man/importRDB1.Rd index 00df135a..3865aab0 100644 --- a/man/importRDB1.Rd +++ b/man/importRDB1.Rd @@ -13,7 +13,7 @@ importRDB1(obs_url, asDateTime = FALSE, qw = FALSE) \item{qw}{logical, if TRUE parses as water quality data (where dates/times are in start and end times)} } \value{ -data a data frame containing columns agency, site, dateTime, values, and remark codes for all requested combinations +data a data frame containing columns agency, site, dateTime (converted to UTC), values, and remark codes for all requested combinations } \description{ This function accepts a url parameter that already contains the desired @@ -32,7 +32,11 @@ urlMulti <- constructNWISURL("04085427",c("00060","00010"), startDate,endDate,"dv",statCd=c("00003","00001"),"tsv") multiData <- importRDB1(urlMulti) unitDataURL <- constructNWISURL(siteNumber,property, - "2014-10-10","2014-10-10","uv",format="tsv") + "2013-11-03","2013-11-03","uv",format="tsv") #includes timezone switch unitData <- importRDB1(unitDataURL, asDateTime=TRUE) +qwURL <- constructNWISURL(c('04024430','04024000'), + c('34247','30234','32104','34220'), + "2010-11-03","","qw",format="rdb") +qwData <- importRDB1(qwURL, qw=TRUE) } diff --git a/man/readNWISqw.Rd b/man/readNWISqw.Rd index 9e8ab380..0d16d618 100644 --- a/man/readNWISqw.Rd +++ b/man/readNWISqw.Rd @@ -3,7 +3,8 @@ \alias{readNWISqw} \title{Raw Data Import for USGS NWIS QW Data} \usage{ -readNWISqw(siteNumber, pCodes, startDate, endDate, expanded = FALSE) +readNWISqw(siteNumber, pCodes, startDate, endDate, expanded = FALSE, + reshape = TRUE) } \arguments{ \item{siteNumber}{string or vector of of USGS site numbers. This is usually an 8 digit number} @@ -17,6 +18,8 @@ readNWISqw(siteNumber, pCodes, startDate, endDate, expanded = FALSE) \item{expanded}{logical defaults to FALSE. If TRUE, retrieves additional information. Expanded data includes remark_cd (remark code), result_va (result value), val_qual_tx (result value qualifier code), meth_cd (method code), dqi_cd (data-quality indicator code), rpt_lev_va (reporting level), and rpt_lev_cd (reporting level type).} + +\item{reshape}{logical. Will reshape the data if TRUE (default)} } \value{ data dataframe with agency, site, dateTime, value, and code columns @@ -32,7 +35,10 @@ startDate <- '2010-01-01' endDate <- '' pCodes <- c('34247','30234','32104','34220') rawNWISqwData <- readNWISqw(siteNumber,pCodes,startDate,endDate) -rawNWISqwDataExpand <- readNWISqw(siteNumber,pCodes,startDate,endDate,expanded=TRUE) +rawNWISqwDataExpandReshaped <- readNWISqw(siteNumber,pCodes, + startDate,endDate,expanded=TRUE) +rawNWISqwDataExpand <- readNWISqw(siteNumber,pCodes, + startDate,endDate,expanded=TRUE,reshape=FALSE) } \seealso{ \code{\link{readWQPdata}}, \code{\link{whatWQPsites}}, -- GitLab