From 980fc32c97575dbe1158aa3896622b8dfba1a5c5 Mon Sep 17 00:00:00 2001
From: Laura DeCicco <ldecicco@usgs.gov>
Date: Wed, 8 Jan 2014 13:50:15 -0600
Subject: [PATCH] Added option to get extended qw results from NWIS.

---
 R/constructNWISURL.r      | 10 +++++--
 R/retrieveNWISqwData.r    | 56 ++++++++++++++++++++++++++++++---------
 man/constructNWISURL.Rd   |  7 ++++-
 man/retrieveNWISqwData.Rd |  7 ++++-
 4 files changed, 63 insertions(+), 17 deletions(-)

diff --git a/R/constructNWISURL.r b/R/constructNWISURL.r
index f9695c6c..b57c557a 100644
--- a/R/constructNWISURL.r
+++ b/R/constructNWISURL.r
@@ -13,6 +13,7 @@
 #' @param format string, can be "tsv" or "xml", and is only applicable for daily and unit value requests.  "tsv" returns results faster, but there is a possiblitiy that an incomplete file is returned without warning. XML is slower, 
 #' but will offer a warning if the file was incomplete (for example, if there was a momentary problem with the internet connection). It is possible to safely use the "tsv" option, 
 #' but the user must carefully check the results to see if the data returns matches what is expected. The default is therefore "xml". 
+#' @param expanded logical defaults to FALSE. If TRUE, retrieves additional information, only applicable for qw data.
 #' @param interactive logical Option for interactive mode.  If TRUE, there is user interaction for error handling and data checks.
 #' @keywords data import USGS web service
 #' @return url string
@@ -28,7 +29,7 @@
 #' url_qw <- constructNWISURL(siteNumber,c('01075','00029','00453'),startDate,endDate,'qw')
 #' url_wqp <- constructNWISURL(siteNumber,c('01075','00029','00453'),startDate,endDate,'wqp')
 #' url_daily_tsv <- constructNWISURL(siteNumber,pCode,startDate,endDate,'dv',statCd=c("00003","00001"),format="tsv")
-constructNWISURL <- function(siteNumber,parameterCd,startDate,endDate,service,statCd="00003", format="xml",interactive=TRUE){
+constructNWISURL <- function(siteNumber,parameterCd,startDate,endDate,service,statCd="00003", format="xml",expanded=FALSE,interactive=TRUE){
 
   startDate <- formatCheckDate(startDate, "StartDate", interactive=interactive)
   endDate <- formatCheckDate(endDate, "EndDate", interactive=interactive)
@@ -67,8 +68,13 @@ constructNWISURL <- function(siteNumber,parameterCd,startDate,endDate,service,st
                           "group_key=NONE&sitefile_output_format=html_table&column_name=agency_cd",
                           "column_name=site_no&column_name=station_nm&inventory_output=0&rdb_inventory_output=file",
                           "TZoutput=0&pm_cd_compare=Greater%20than&radio_parm_cds=previous_parm_cds&qw_attributes=0",
-                          "format=rdb&qw_sample_wide=separated_wide&rdb_qw_attributes=0&date_format=YYYY-MM-DD",
+                          "format=rdb&rdb_qw_attributes=0&date_format=YYYY-MM-DD",
                           "rdb_compression=value", sep = "&")
+             if(expanded){
+               url <- paste(url,"&qw_sample_wide=0",sep="")
+             } else {
+               url <- paste(url,"&qw_sample_wide=separated_wide",sep="")
+             }
              
              if (nzchar(startDate)) {
                url <- paste(url,"&begin_date=",startDate,sep="")
diff --git a/R/retrieveNWISqwData.r b/R/retrieveNWISqwData.r
index a099157a..4530b01a 100644
--- a/R/retrieveNWISqwData.r
+++ b/R/retrieveNWISqwData.r
@@ -8,10 +8,12 @@
 #' @param pCodes string or vector of USGS parameter code.  This is usually an 5 digit number.
 #' @param startDate string starting date for data retrieval in the form YYYY-MM-DD.
 #' @param endDate string ending date for data retrieval in the form YYYY-MM-DD.
+#' @param expanded logical defaults to FALSE. If TRUE, retrieves additional information.
 #' @param interactive logical Option for interactive mode.  If true, there is user interaction for error handling and data checks.
 #' @keywords data import USGS web service
 #' @return data dataframe with agency, site, dateTime, value, and code columns
 #' @export
+#' @import reshape2
 #' @examples
 #' # These examples require an internet connection to run
 #' siteNumber <- c('04024430','04024000')
@@ -19,14 +21,15 @@
 #' endDate <- ''
 #' pCodes <- c('34247','30234','32104','34220')
 #' rawNWISqwData <- retrieveNWISqwData(siteNumber,pCodes,startDate,endDate)
+#' rawNWISqwDataExpand <- retrieveNWISqwData(siteNumber,pCodes,startDate,endDate,expanded=TRUE)
 #' # To get data in Sample dataframe format:
 #' data <- rawNWISqwData[,names(rawNWISqwData) != "site"]
 #' data$dateTime <- as.Date(data$dateTime)
 #' compressedData <- compressData(data)
 #' Sample <- populateSampleColumns(compressedData)
-retrieveNWISqwData <- function (siteNumber,pCodes,startDate,endDate,interactive=TRUE){  
+retrieveNWISqwData <- function (siteNumber,pCodes,startDate,endDate,expanded=FALSE,interactive=TRUE){  
   
-  url <- constructNWISURL(siteNumber,pCodes,startDate,endDate,"qw",interactive=interactive)
+  url <- constructNWISURL(siteNumber,pCodes,startDate,endDate,"qw",expanded=expanded,interactive=interactive)
   
   tmp <- read.delim(  
     url, 
@@ -41,20 +44,47 @@ retrieveNWISqwData <- function (siteNumber,pCodes,startDate,endDate,interactive=
   dataType <- tmp[1,]
   data <- tmp[-1,]
   row.names(data) <- NULL
-  data$site <- with(data,paste(agency_cd,site_no,sep="-"))
-  data$dateTime <- with(data, as.POSIXct(paste(sample_dt,sample_tm,sep=" "),tz="UTC"))
   
-  rmCol <- c("agency_cd","site_no","tm_datum_rlbty_cd",
-             "coll_ent_cd","medium_cd","tu_id","body_part_id",
-             "sample_end_dt","sample_end_tm","sample_dt","sample_tm","sample_start_time_datum_cd")
-  data <- data[,!(names(data) %in% rmCol)]
   
-  names(data) <- c(gsub("r", "qualifier_",names(data)[1:(length(names(data))-2)]),names(data)[(length(names(data))-1):length(names(data))])
-  names(data) <- c(gsub("p", "value_",names(data)[1:(length(names(data))-2)]),names(data)[(length(names(data))-1):length(names(data))])
   
-  data[,grep("value",names(data))] <- sapply( data[,grep("value",names(data))], function(x) as.numeric(x))
-  
-  data <- data[,c(ncol(data):(ncol(data)-1),(1:(ncol(data)-2)))]
+  if(expanded){
+    data$site <- with(data,paste(agency_cd,site_no,sep="-"))
+    data$dateTime <- with(data, as.POSIXct(paste(sample_dt,sample_tm,sep=" "),tz="UTC"))
+    
+    if (any("" != data[["sample_end_dt"]])){
+      data$dateTimeEnd <- with(data, as.POSIXct(paste(sample_end_dt,sample_end_tm,sep=" "),tz="UTC"))
+    } 
+    
+    data$result_va <- as.numeric(data$result_va)
+    data$rpt_lev_va <- as.numeric(data$rpt_lev_va)
+    rmCol <- c("agency_cd","site_no","tm_datum_rlbty_cd",
+               "coll_ent_cd","medium_cd","tu_id","body_part_id",
+               "sample_end_dt","sample_end_tm","sample_dt","sample_tm",
+               "sample_start_time_datum_cd","anl_ent_cd","lab_std_va")
+    data <- data[,!(names(data) %in% rmCol)]
+    
+    longDF <- melt(data, c("parm_cd","dateTime","site"))
+    wideDF <- dcast(longDF, ... ~ variable + parm_cd )
+    wideDF[,grep("_va_",names(wideDF))] <- sapply(wideDF[,grep("_va_",names(wideDF))], function(x) as.numeric(x))
+    order(sapply(strsplit(names(wideDF)[c(-1:-2)],"_"), function(x) x[length(x)]))
+    
+    data <- wideDF[,c(1,2,(2+order(sapply(strsplit(names(wideDF)[c(-1:-2)],"_"), function(x) x[length(x)]))))]
+    
+  } else {
+    data$site <- with(data,paste(agency_cd,site_no,sep="-"))
+    data$dateTime <- with(data, as.POSIXct(paste(sample_dt,sample_tm,sep=" "),tz="UTC"))
+    rmCol <- c("agency_cd","site_no","tm_datum_rlbty_cd",
+               "coll_ent_cd","medium_cd","tu_id","body_part_id",
+               "sample_end_dt","sample_end_tm","sample_dt","sample_tm","sample_start_time_datum_cd")
+    data <- data[,!(names(data) %in% rmCol)]
+    
+    names(data) <- c(gsub("r", "qualifier_",names(data)[1:(length(names(data))-2)]),names(data)[(length(names(data))-1):length(names(data))])
+    names(data) <- c(gsub("p", "value_",names(data)[1:(length(names(data))-2)]),names(data)[(length(names(data))-1):length(names(data))])
+    
+    data[,grep("value",names(data))] <- sapply( data[,grep("value",names(data))], function(x) as.numeric(x))
+    
+    data <- data[,c(ncol(data):(ncol(data)-1),(1:(ncol(data)-2)))]
+  }
   
   return (data)
 }
diff --git a/man/constructNWISURL.Rd b/man/constructNWISURL.Rd
index 6d55f6e3..981b295f 100644
--- a/man/constructNWISURL.Rd
+++ b/man/constructNWISURL.Rd
@@ -3,7 +3,8 @@
 \title{Construct NWIS url for data retrieval}
 \usage{
 constructNWISURL(siteNumber, parameterCd, startDate, endDate, service,
-  statCd = "00003", format = "xml", interactive = TRUE)
+  statCd = "00003", format = "xml", expanded = FALSE,
+  interactive = TRUE)
 }
 \arguments{
   \item{siteNumber}{string or vector of strings USGS site
@@ -38,6 +39,10 @@ constructNWISURL(siteNumber, parameterCd, startDate, endDate, service,
   the results to see if the data returns matches what is
   expected. The default is therefore "xml".}
 
+  \item{expanded}{logical defaults to FALSE. If TRUE,
+  retrieves additional information, only applicable for qw
+  data.}
+
   \item{interactive}{logical Option for interactive mode.
   If TRUE, there is user interaction for error handling and
   data checks.}
diff --git a/man/retrieveNWISqwData.Rd b/man/retrieveNWISqwData.Rd
index 714521c4..42d48a71 100644
--- a/man/retrieveNWISqwData.Rd
+++ b/man/retrieveNWISqwData.Rd
@@ -2,7 +2,8 @@
 \alias{retrieveNWISqwData}
 \title{Raw Data Import for USGS NWIS QW Data}
 \usage{
-retrieveNWISqwData(siteNumber, pCodes, startDate, endDate, interactive = TRUE)
+retrieveNWISqwData(siteNumber, pCodes, startDate, endDate, expanded = FALSE,
+  interactive = TRUE)
 }
 \arguments{
   \item{siteNumber}{string or vector of strings USGS site
@@ -17,6 +18,9 @@ retrieveNWISqwData(siteNumber, pCodes, startDate, endDate, interactive = TRUE)
   \item{endDate}{string ending date for data retrieval in
   the form YYYY-MM-DD.}
 
+  \item{expanded}{logical defaults to FALSE. If TRUE,
+  retrieves additional information.}
+
   \item{interactive}{logical Option for interactive mode.
   If true, there is user interaction for error handling and
   data checks.}
@@ -41,6 +45,7 @@ startDate <- '2010-01-01'
 endDate <- ''
 pCodes <- c('34247','30234','32104','34220')
 rawNWISqwData <- retrieveNWISqwData(siteNumber,pCodes,startDate,endDate)
+rawNWISqwDataExpand <- retrieveNWISqwData(siteNumber,pCodes,startDate,endDate,expanded=TRUE)
 # To get data in Sample dataframe format:
 data <- rawNWISqwData[,names(rawNWISqwData) != "site"]
 data$dateTime <- as.Date(data$dateTime)
-- 
GitLab