From 3ea4a421397f530c9d37de8a1b18d99ee6e52a82 Mon Sep 17 00:00:00 2001
From: Laura DeCicco <ldecicco@usgs.gov>
Date: Wed, 18 Nov 2015 17:11:17 -0600
Subject: [PATCH] data.table

---
 DESCRIPTION   |   3 +-
 NAMESPACE     |   3 +
 R/importWQP.R | 189 +++++++++++++++++++-------------------------------
 3 files changed, 77 insertions(+), 118 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 5bc6ef52..1213829a 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -26,6 +26,7 @@ Imports:
     XML,
     RCurl,
     reshape2,
+    data.table,
     lubridate,
     plyr,
     stats,
@@ -39,4 +40,4 @@ VignetteBuilder: knitr
 BuildVignettes: true
 BugReports: https://github.com/USGS-R/dataRetrieval/issues
 URL: https://github.com/USGS-R/dataRetrieval, http://pubs.usgs.gov/tm/04/a10/
-RoxygenNote: 5.0.0
+RoxygenNote: 5.0.1
diff --git a/NAMESPACE b/NAMESPACE
index 8109f841..132e8260 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -33,10 +33,13 @@ export(whatWQPsites)
 export(zeroPad)
 import(RCurl)
 import(XML)
+import(data.table)
 import(lubridate)
 import(stats)
 import(utils)
 importFrom(dplyr,left_join)
+importFrom(lubridate,fast_strptime)
+importFrom(lubridate,parse_date_time)
 importFrom(plyr,rbind.fill.matrix)
 importFrom(reshape2,dcast)
 importFrom(reshape2,melt)
diff --git a/R/importWQP.R b/R/importWQP.R
index af9154a2..97fb6321 100644
--- a/R/importWQP.R
+++ b/R/importWQP.R
@@ -13,10 +13,11 @@
 #' @export
 #' @seealso \code{\link{readWQPdata}}, \code{\link{readWQPqw}}, \code{\link{whatWQPsites}}
 #' @import RCurl
-#' @import lubridate
 #' @import utils
 #' @import stats
-#' @importFrom dplyr left_join
+#' @import data.table 
+#' @importFrom lubridate parse_date_time
+#' @importFrom lubridate fast_strptime
 #' @examples
 #' # These examples require an internet connection to run
 #' 
@@ -41,73 +42,12 @@ importWQP <- function(obs_url, zip=FALSE, tz=""){
                           "America/Phoenix","America/Metlakatla"))
   }
   
-  if(file.exists(obs_url)){
-    if(zip){
-      obs_url <- unzip(obs_url)
-    } 
-    suppressWarnings(namesData <- read.delim(obs_url , header = TRUE, quote="",
-                                             dec=".", sep='\t', colClasses='character',nrow=1))
-    
-    classColumns <- setNames(rep('character',ncol(namesData)),names(namesData))
-    
-    classColumns[grep("MeasureValue",names(classColumns))] <- NA
-    
-    suppressWarnings(retval <- read.delim(obs_url, header = TRUE, quote="", 
-                                          dec=".", sep='\t', colClasses=as.character(classColumns)))
-    if(zip){
-      unlink(obs_url)
-    }
+  if(!file.exists(obs_url)){
+    h <- basicHeaderGatherer()
+    httpHEAD(obs_url, headerfunction = h$update)
     
-  } else {
-  
-    if(zip){
-      h <- basicHeaderGatherer()
-      httpHEAD(obs_url, headerfunction = h$update)
-      
-      headerInfo <- h$value()
-      
-      temp <- tempfile()
-      options(timeout = 120)
-      
-      possibleError <- tryCatch({
-        download.file(obs_url,temp, quiet=TRUE, mode='wb')
-        },
-        error = function(e)  {
-          stop(e, "with url:", obs_url)
-        }
-      )
-      
-      if(headerInfo['status'] == "200"){
-        doc <- unzip(temp)
-      } else {
-        unlink(temp)
-  
-        stop("Status:", headerInfo['status'], ": ", headerInfo['statusMessage'], "\nFor: ", obs_url)
-      }
-
-    } else {
-      doc <- getWebServiceData(obs_url)
-      headerInfo <- attr(doc, "headerInfo")
-    }
-    
-    library(readr)
-    retval <- read_tsv(doc, col_types = cols(`ActivityStartTime/Time` = col_character(),
-                                             `ActivityEndTime/Time` = col_character(),
-                                             USGSPCode = col_character()))
-    
-#     suppressWarnings(namesData <- read.delim(if(zip) doc else textConnection(doc) , header = TRUE, quote="",
-#                                              dec=".", sep='\t', colClasses='character',nrow=1))
-#     
-#     classColumns <- setNames(rep('character',ncol(namesData)),names(namesData))
-#     
-#     classColumns[grep("MeasureValue",names(classColumns))] <- NA
-#     
-#     suppressWarnings(retval <- read.delim(if(zip) doc else textConnection(doc), header = TRUE, quote="", 
-#                                           dec=".", sep='\t', colClasses=as.character(classColumns)))
-    if(zip) unlink(doc)
-      
+    headerInfo <- h$value()
     numToBeReturned <- as.numeric(headerInfo["Total-Result-Count"])
-  
     
     if(headerInfo['Total-Result-Count'] == "0"){
       warning("No data returned")
@@ -121,67 +61,82 @@ importWQP <- function(obs_url, zip=FALSE, tz=""){
       return(data.frame())
     }
     
+    if(headerInfo['status'] == "200"){
+
+      if(zip){
+        temp <- tempfile()
+        options(timeout = 120)
+        
+        possibleError <- tryCatch({
+            suppressWarnings(download.file(obs_url,temp, quiet=TRUE, mode='wb'))
+          },
+          error = function(e)  {
+            stop(e, "with url:", obs_url)
+          }
+        )
+        
+        if(headerInfo['status'] == "200"){
+          doc <- unzip(temp)
+          retval <- fread(doc,colClasses = "character",verbose = FALSE,showProgress = FALSE)
+        } else {
+          stop("Status:", headerInfo['status'], ": ", headerInfo['statusMessage'], "\nFor: ", obs_url)
+        }
+        unlink(doc)
+      } else {
+        retval <- suppressWarnings(fread(obs_url,colClasses = "character",verbose = FALSE,showProgress = FALSE))
+      }
+    } else {
+      stop("Status:", headerInfo['status'], ": ", headerInfo['statusMessage'], "\nFor: ", obs_url)
+    }
+    
     actualNumReturned <- nrow(retval)
     if(actualNumReturned != numToBeReturned) warning(numToBeReturned, " sample results were expected, ", actualNumReturned, " were returned")
     
+  } else {
+    
+    if(zip){
+      doc <- unzip(obs_url)
+      retval <- fread(doc,colClasses = "character",verbose = FALSE,showProgress = FALSE)
+      unlink(doc)
+    }  else {
+      retval <- fread(obs_url,colClasses = "character",verbose = FALSE,showProgress = FALSE)
+    }
+
   }
+
+  dateCols <- c("ActivityStartDate","ActivityEndDate","AnalysisStartDate","PreparationStartDate")
+  retval <- suppressWarnings(retval[, (dateCols) := lapply(.SD, function(x) as.Date(parse_date_time(x, c("Ymd", "mdY")))),
+                                      .SDcols = dateCols])
   
-  retval[,names(which(sapply(retval[,grep("MeasureValue",names(retval))], function(x)all(is.na(x)))))] <- ""
+  numTmp <- names(retval)[grep("Value",names(retval))]
+  retval <- suppressWarnings(retval[, (numTmp) := lapply(.SD, as.numeric), .SDcols = numTmp])
   
-  offsetLibrary <- data.frame(offset=c(5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10, 10),
-                            code=c("EST","EDT","CST","CDT","MST","MDT","PST","PDT","AKST","AKDT","HAST","HST"),
-                            stringsAsFactors = FALSE)
+  offsetLibrary <- data.table(offset=c(5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10, 10),
+                              code=c("EST","EDT","CST","CDT","MST","MDT","PST","PDT","AKST","AKDT","HAST","HST"),
+                              key = "code")
+  retval <- setkey(retval, ActivityStartTime/TimeZoneCode)
+  retval <- retval[,timeZoneStart:=offsetLibrary[SJ(retval$`ActivityStartTime/TimeZoneCode`)]$offset]
+  retval <- setkey(retval, ActivityEndTime/TimeZoneCode)
+  retval <- retval[,timeZoneEnd:=offsetLibrary[SJ(retval$`ActivityEndTime/TimeZoneCode`)]$offset]
   
-  retval <- left_join(retval, offsetLibrary, by=c("ActivityStartTime/TimeZoneCode"="code"))
-  names(retval)[names(retval) == "offset"] <- "timeZoneStart"
-  retval <- left_join(retval, offsetLibrary, by=c("ActivityEndTime/TimeZoneCode"="code"))
-  names(retval)[names(retval) == "offset"] <- "timeZoneEnd"
-
-  retval$timeZoneStart[is.na(retval$timeZoneStart)] <- 0
-  retval$timeZoneEnd[is.na(retval$timeZoneEnd)] <- 0
+  retval <- retval[,ActivityStartDateTime:=paste(ActivityStartDate, `ActivityStartTime/Time`)]
+  retval <- retval[,ActivityStartDateTime:=fast_strptime(ActivityStartDateTime, '%Y-%m-%d %H:%M:%S')+60*60*timeZoneStart]
   
-#   if("ActivityStartDate" %in% names(retval)){
-#     if(any(retval$ActivityStartDate != "")){
-#       suppressWarnings(retval$ActivityStartDate <- as.Date(parse_date_time(retval$ActivityStartDate, c("Ymd", "mdY"))))
-#     }
-#   }
-# 
-#   if("ActivityEndDate" %in% names(retval)){
-#     if(any(retval$ActivityEndDate != "")){
-#       suppressWarnings(retval$ActivityEndDate <- as.Date(parse_date_time(retval$ActivityEndDate, c("Ymd", "mdY"))))
-#     }        
-#   }
-
-  if(any(!is.na(retval$timeZoneStart))){
-    retval$ActivityStartDateTime <- with(retval, as.POSIXct(paste(ActivityStartDate, `ActivityStartTime/Time`),format="%Y-%m-%d %H:%M:%S", tz = "UTC"))
-    retval$ActivityStartDateTime <- retval$ActivityStartDateTime + retval$timeZoneStart*60*60
-    retval$ActivityStartDateTime <- as.POSIXct(retval$ActivityStartDateTime)
-    if(tz != ""){
-      attr(retval$ActivityStartDateTime, "tzone") <- tz
-    } else {
-      attr(retval$ActivityStartDateTime, "tzone") <- "UTC"
-    }      
-  }
+  retval <- retval[,ActivityEndDateTime:=paste(ActivityEndDate, `ActivityEndTime/Time`)]
+  retval <- retval[,ActivityEndDateTime:=fast_strptime(ActivityEndDateTime, '%Y-%m-%d %H:%M:%S')+60*60*timeZoneEnd]
   
-  if(any(!is.na(retval$timeZoneEnd))){      
-    retval$ActivityEndDateTime <- with(retval, as.POSIXct(paste(ActivityEndDate, `ActivityEndTime/Time`),format="%Y-%m-%d %H:%M:%S", tz = "UTC"))
-    retval$ActivityEndDateTime <- retval$ActivityEndDateTime + retval$timeZoneEnd*60*60
-    retval$ActivityEndDateTime <- as.POSIXct(retval$ActivityEndDateTime)
-    if(tz != ""){
-      attr(retval$ActivityEndDateTime, "tzone") <- tz
-    } else {
-      attr(retval$ActivityEndDateTime, "tzone") <- "UTC"
-    }
-  }
-  
-  if(all(is.na(retval$ActivityEndDateTime))){
-    retval$ActivityEndDateTime <- NULL
-  }
 
-  retval <- retval[order(retval$OrganizationIdentifier, 
-                         retval$MonitoringLocationIdentifier, 
-                         retval$ActivityStartDateTime, decreasing = FALSE),]
+
+#   if(all(is.na(retval$ActivityEndDateTime))){
+#     retval$ActivityEndDateTime <- NULL
+#   }
+
+  retval <- retval[order(OrganizationIdentifier, 
+                         MonitoringLocationIdentifier, 
+                         ActivityStartDateTime)]
   
+  retval <- setDF(retval)
+  names(retval) <- gsub("/",".",names(retval))
   return(retval)
   
   
-- 
GitLab