From 33926ec4d6fb5c169ed113af4ab40a92fe6c7198 Mon Sep 17 00:00:00 2001
From: unknown <ldecicco@usgs.gov>
Date: Wed, 29 Oct 2014 14:26:20 -0500
Subject: [PATCH] Fixing timezone issue by converting everything to UTC.

---
 R/importRDB1.r    | 99 ++++++++++++++++++-----------------------------
 R/readNWISqw.r    | 11 ++++--
 man/importRDB1.Rd |  8 +++-
 man/readNWISqw.Rd | 10 ++++-
 4 files changed, 60 insertions(+), 68 deletions(-)

diff --git a/R/importRDB1.r b/R/importRDB1.r
index 2d5cf6c9..0709775f 100644
--- a/R/importRDB1.r
+++ b/R/importRDB1.r
@@ -7,7 +7,7 @@
 #' @param obs_url string containing the url for the retrieval
 #' @param asDateTime logical, if TRUE returns date and time as POSIXct, if FALSE, Date
 #' @param qw logical, if TRUE parses as water quality data (where dates/times are in start and end times)
-#' @return data a data frame containing columns agency, site, dateTime, values, and remark codes for all requested combinations
+#' @return data a data frame containing columns agency, site, dateTime (converted to UTC), values, and remark codes for all requested combinations
 #' @export
 #' @examples
 #' siteNumber <- "02177000"
@@ -22,8 +22,12 @@
 #'          startDate,endDate,"dv",statCd=c("00003","00001"),"tsv")
 #' multiData <- importRDB1(urlMulti)
 #' unitDataURL <- constructNWISURL(siteNumber,property,
-#'          "2014-10-10","2014-10-10","uv",format="tsv")
+#'          "2013-11-03","2013-11-03","uv",format="tsv") #includes timezone switch
 #' unitData <- importRDB1(unitDataURL, asDateTime=TRUE)
+#' qwURL <- constructNWISURL(c('04024430','04024000'),
+#'           c('34247','30234','32104','34220'),
+#'          "2010-11-03","","qw",format="rdb") 
+#' qwData <- importRDB1(qwURL, qw=TRUE)
 importRDB1 <- function(obs_url,asDateTime=FALSE, qw=FALSE){
   
   retval = tryCatch({
@@ -72,46 +76,48 @@ importRDB1 <- function(obs_url,asDateTime=FALSE, qw=FALSE){
                                   "America/Anchorage","America/Anchorage","America/Honolulu","America/Honolulu"),
                                 c("EST","EDT","CST","CDT","MST","MDT","PST","PDT","AKST","AKDT","HAST","HST"))
     
+    
+    offsetLibrary <- setNames(c(5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10, 10),
+                                c("EST","EDT","CST","CDT","MST","MDT","PST","PDT","AKST","AKDT","HAST","HST"))
+    
     data[,grep('n$', dataType)] <- suppressWarnings(sapply(data[,grep('n$', dataType)], function(x) as.numeric(x)))
     
     if(length(grep('d$', dataType)) > 0){
       if (asDateTime & !qw){
         
         if("tz_cd" %in% names(data)){
-          timeZone <- as.character(timeZoneLibrary[data$tz_cd])
+          offset <- offsetLibrary[data$tz_cd]
         } else {
-          timeZone <- NULL
+          offset <- 0
         }
+        offset[is.na(offset)] <- 0
         
-        
-        if(length(unique(timeZone)) == 1){
-          data[,regexpr('d$', dataType) > 0] <- as.POSIXct(data[,regexpr('d$', dataType) > 0], "%Y-%m-%d %H:%M", tz = unique(timeZone))
-        } else {
-          
-          mostCommonTZ <- names(sort(summary(as.factor(timeZone)),decreasing = TRUE)[1])
-
-          data[,grep('d$', dataType)] <- as.POSIXct(data[,grep('d$', dataType)], "%Y-%m-%d %H:%M", tz = mostCommonTZ)
-          additionalTZs <- names(sort(summary(as.factor(timeZone)),decreasing = TRUE)[-1])
-          for(i in additionalTZs){
-            data[timeZone == i,grep('d$', dataType)] <-  as.POSIXct(data[,grep('d$', dataType)], "%Y-%m-%d %H:%M", tz = i)
-          }
-        }
+        data[,regexpr('d$', dataType) > 0] <- as.POSIXct(data[,regexpr('d$', dataType) > 0], "%Y-%m-%d %H:%M", tz = "UTC")
+        data[,regexpr('d$', dataType) > 0] <- data[,regexpr('d$', dataType) > 0] + offset*60*60
+        data[,regexpr('d$', dataType) > 0] <- as.POSIXct(data[,regexpr('d$', dataType) > 0])
        
       } else if (qw){
         
         if("sample_start_time_datum_cd" %in% names(data)){
-          timeZoneStart <- as.character(timeZoneLibrary[data$sample_start_time_datum_cd])
+          timeZoneStartOffset <- offsetLibrary[data$sample_start_time_datum_cd]
+          timeZoneStartOffset[is.na(timeZoneStartOffset)] <- 0
         } else {
-          timeZoneStart <- NA
+          timeZoneStartOffset <- 0
         }
         
         if("sample_end_time_datum_cd" %in% names(data)){
-          timeZoneEnd <- as.character(timeZoneLibrary[data$sample_end_time_datum_cd])
+          timeZoneEndOffset <- offsetLibrary[data$sample_end_time_datum_cd]
+          timeZoneEndOffset[is.na(timeZoneEndOffset)] <- 0
+          composite <- TRUE
         } else {
-          timeZoneEnd <- NA
+          composite <- FALSE
+          if(any(data$sample_end_dt != "") & any(data$sample_end_dm != "")){
+            if(which(data$sample_end_dt != "") == which(data$sample_end_dm != "")){
+              composite <- TRUE
+            }
+          }
+          timeZoneEndOffset <- 0
         }
-        timeZoneStart[is.na(timeZoneStart)] <- ""
-        timeZoneEnd[is.na(timeZoneEnd)] <- ""
         
         if("sample_dt" %in% names(data)){
           if(any(data$sample_dt != "")){
@@ -125,44 +131,16 @@ importRDB1 <- function(obs_url,asDateTime=FALSE, qw=FALSE){
           }        
         }
         
-        if(any(!is.na(timeZoneStart))){
-          if(length(unique(timeZoneStart)) == 1){
-            data$startDateTime <- with(data, as.POSIXct(paste(sample_dt, sample_tm),format="%Y-%m-%d %H:%M", tz=unique(timeZoneStart)))
-          } else {
-            
-            mostCommonTZ <- names(sort(summary(as.factor(timeZoneStart)),decreasing = TRUE)[1])
-            
-            data$startDateTime <- with(data, as.POSIXct(paste(sample_dt, sample_tm),
-                                            format="%Y-%m-%d %H:%M", 
-                                            tz=mostCommonTZ))
-            additionalTZs <- names(sort(summary(as.factor(timeZoneStart)),decreasing = TRUE)[-1])
-            for(i in additionalTZs){
-              data$startDateTime[timeZoneStart == i] <-  with(data[timeZoneStart == i,], 
-                                  as.POSIXct(paste(sample_dt, sample_tm),
-                                             format="%Y-%m-%d %H:%M", 
-                                             tz=i))
-            }
-          }
-        }
+#         if(any(!is.na(timeZoneStartOffset))){
+        data$startDateTime <- with(data, as.POSIXct(paste(sample_dt, sample_tm),format="%Y-%m-%d %H:%M", tz = "UTC"))
+        data$startDateTime <- data$startDateTime + timeZoneStartOffset*60*60
+        data$startDateTime <- as.POSIXct(data$startDateTime)
+#         }
         
-        if(any(!is.na(timeZoneEnd))){
-          if(length(unique(timeZoneEnd)) == 1){
-            data$endDateTime <- with(data, as.POSIXct(paste(sample_end_dt, sample_end_tm),format="%Y-%m-%d %H:%M", tz=unique(timeZoneEnd)))
-          } else {
-            
-            mostCommonTZ <- names(sort(summary(as.factor(timeZoneEnd)),decreasing = TRUE)[1])
-            
-            data$endDateTime <- with(data, as.POSIXct(paste(sample_end_dt, sample_end_tm),
-                                format="%Y-%m-%d %H:%M", 
-                                tz=mostCommonTZ))
-            additionalTZs <- names(sort(summary(as.factor(timeZoneEnd)),decreasing = TRUE)[-1])
-            for(i in additionalTZs){
-              data$endDateTime[timeZoneEnd == i] <-  with(data[timeZoneStart == i,], 
-                                as.POSIXct(paste(sample_end_dt, sample_end_tm),
-                                           format="%Y-%m-%d %H:%M", 
-                                           tz=i))
-            }
-          }
+        if(composite){
+          data$endDateTime <- with(data, as.POSIXct(paste(sample_end_dt, sample_end_tm),format="%Y-%m-%d %H:%M", tz = "UTC"))
+          data$endDateTime <- data$endDateTime + timeZoneEndOffset*60*60
+          data$endDateTime <- as.POSIXct(data$endDateTime)
         }
         
       } else {
@@ -170,7 +148,6 @@ importRDB1 <- function(obs_url,asDateTime=FALSE, qw=FALSE){
           if (all(data[,i] != "")){
             data[,i] <- as.Date(data[,i])
           }
-          
         }
       }
     }
diff --git a/R/readNWISqw.r b/R/readNWISqw.r
index d7e8841b..26f42176 100644
--- a/R/readNWISqw.r
+++ b/R/readNWISqw.r
@@ -11,6 +11,7 @@
 #' @param expanded logical defaults to FALSE. If TRUE, retrieves additional information. Expanded data includes
 #' remark_cd (remark code), result_va (result value), val_qual_tx (result value qualifier code), meth_cd (method code),
 #' dqi_cd (data-quality indicator code), rpt_lev_va (reporting level), and rpt_lev_cd (reporting level type).
+#' @param reshape logical. Will reshape the data if TRUE (default)
 #' @keywords data import USGS web service
 #' @return data dataframe with agency, site, dateTime, value, and code columns
 #' @export
@@ -23,17 +24,21 @@
 #' endDate <- ''
 #' pCodes <- c('34247','30234','32104','34220')
 #' rawNWISqwData <- readNWISqw(siteNumber,pCodes,startDate,endDate)
-#' rawNWISqwDataExpand <- readNWISqw(siteNumber,pCodes,startDate,endDate,expanded=TRUE)
-readNWISqw <- function (siteNumber,pCodes,startDate,endDate,expanded=FALSE){  
+#' rawNWISqwDataExpandReshaped <- readNWISqw(siteNumber,pCodes,
+#'           startDate,endDate,expanded=TRUE)
+#' rawNWISqwDataExpand <- readNWISqw(siteNumber,pCodes,
+#'           startDate,endDate,expanded=TRUE,reshape=FALSE)
+readNWISqw <- function (siteNumber,pCodes,startDate,endDate,expanded=FALSE,reshape=TRUE){  
   
   url <- constructNWISURL(siteNumber,pCodes,startDate,endDate,"qw",expanded=expanded)
   
   data <- importRDB1(url,asDateTime=TRUE, qw=TRUE)
   
-  if(expanded){
+  if(reshape & expanded){
     columnsToMelt <- c("agency_cd","site_no","sample_dt","sample_tm",
                        "sample_end_dt","sample_end_tm","sample_start_time_datum_cd","tm_datum_rlbty_cd",
                        "parm_cd","startDateTime","endDateTime")
+    columnsToMelt <- columnsToMelt[columnsToMelt %in% names(data)]
     longDF <- melt(data, columnsToMelt)
     wideDF <- dcast(longDF, ... ~ variable + parm_cd )
     wideDF[,grep("_va_",names(wideDF))] <- sapply(wideDF[,grep("_va_",names(wideDF))], function(x) as.numeric(x))
diff --git a/man/importRDB1.Rd b/man/importRDB1.Rd
index 00df135a..3865aab0 100644
--- a/man/importRDB1.Rd
+++ b/man/importRDB1.Rd
@@ -13,7 +13,7 @@ importRDB1(obs_url, asDateTime = FALSE, qw = FALSE)
 \item{qw}{logical, if TRUE parses as water quality data (where dates/times are in start and end times)}
 }
 \value{
-data a data frame containing columns agency, site, dateTime, values, and remark codes for all requested combinations
+data a data frame containing columns agency, site, dateTime (converted to UTC), values, and remark codes for all requested combinations
 }
 \description{
 This function accepts a url parameter that already contains the desired
@@ -32,7 +32,11 @@ urlMulti <- constructNWISURL("04085427",c("00060","00010"),
          startDate,endDate,"dv",statCd=c("00003","00001"),"tsv")
 multiData <- importRDB1(urlMulti)
 unitDataURL <- constructNWISURL(siteNumber,property,
-         "2014-10-10","2014-10-10","uv",format="tsv")
+         "2013-11-03","2013-11-03","uv",format="tsv") #includes timezone switch
 unitData <- importRDB1(unitDataURL, asDateTime=TRUE)
+qwURL <- constructNWISURL(c('04024430','04024000'),
+          c('34247','30234','32104','34220'),
+         "2010-11-03","","qw",format="rdb")
+qwData <- importRDB1(qwURL, qw=TRUE)
 }
 
diff --git a/man/readNWISqw.Rd b/man/readNWISqw.Rd
index 9e8ab380..0d16d618 100644
--- a/man/readNWISqw.Rd
+++ b/man/readNWISqw.Rd
@@ -3,7 +3,8 @@
 \alias{readNWISqw}
 \title{Raw Data Import for USGS NWIS QW Data}
 \usage{
-readNWISqw(siteNumber, pCodes, startDate, endDate, expanded = FALSE)
+readNWISqw(siteNumber, pCodes, startDate, endDate, expanded = FALSE,
+  reshape = TRUE)
 }
 \arguments{
 \item{siteNumber}{string or vector of of USGS site numbers.  This is usually an 8 digit number}
@@ -17,6 +18,8 @@ readNWISqw(siteNumber, pCodes, startDate, endDate, expanded = FALSE)
 \item{expanded}{logical defaults to FALSE. If TRUE, retrieves additional information. Expanded data includes
 remark_cd (remark code), result_va (result value), val_qual_tx (result value qualifier code), meth_cd (method code),
 dqi_cd (data-quality indicator code), rpt_lev_va (reporting level), and rpt_lev_cd (reporting level type).}
+
+\item{reshape}{logical. Will reshape the data if TRUE (default)}
 }
 \value{
 data dataframe with agency, site, dateTime, value, and code columns
@@ -32,7 +35,10 @@ startDate <- '2010-01-01'
 endDate <- ''
 pCodes <- c('34247','30234','32104','34220')
 rawNWISqwData <- readNWISqw(siteNumber,pCodes,startDate,endDate)
-rawNWISqwDataExpand <- readNWISqw(siteNumber,pCodes,startDate,endDate,expanded=TRUE)
+rawNWISqwDataExpandReshaped <- readNWISqw(siteNumber,pCodes,
+          startDate,endDate,expanded=TRUE)
+rawNWISqwDataExpand <- readNWISqw(siteNumber,pCodes,
+          startDate,endDate,expanded=TRUE,reshape=FALSE)
 }
 \seealso{
 \code{\link{readWQPdata}}, \code{\link{whatWQPsites}},
-- 
GitLab