From 980fc32c97575dbe1158aa3896622b8dfba1a5c5 Mon Sep 17 00:00:00 2001 From: Laura DeCicco <ldecicco@usgs.gov> Date: Wed, 8 Jan 2014 13:50:15 -0600 Subject: [PATCH] Added option to get extended qw results from NWIS. --- R/constructNWISURL.r | 10 +++++-- R/retrieveNWISqwData.r | 56 ++++++++++++++++++++++++++++++--------- man/constructNWISURL.Rd | 7 ++++- man/retrieveNWISqwData.Rd | 7 ++++- 4 files changed, 63 insertions(+), 17 deletions(-) diff --git a/R/constructNWISURL.r b/R/constructNWISURL.r index f9695c6c..b57c557a 100644 --- a/R/constructNWISURL.r +++ b/R/constructNWISURL.r @@ -13,6 +13,7 @@ #' @param format string, can be "tsv" or "xml", and is only applicable for daily and unit value requests. "tsv" returns results faster, but there is a possiblitiy that an incomplete file is returned without warning. XML is slower, #' but will offer a warning if the file was incomplete (for example, if there was a momentary problem with the internet connection). It is possible to safely use the "tsv" option, #' but the user must carefully check the results to see if the data returns matches what is expected. The default is therefore "xml". +#' @param expanded logical defaults to FALSE. If TRUE, retrieves additional information, only applicable for qw data. #' @param interactive logical Option for interactive mode. If TRUE, there is user interaction for error handling and data checks. #' @keywords data import USGS web service #' @return url string @@ -28,7 +29,7 @@ #' url_qw <- constructNWISURL(siteNumber,c('01075','00029','00453'),startDate,endDate,'qw') #' url_wqp <- constructNWISURL(siteNumber,c('01075','00029','00453'),startDate,endDate,'wqp') #' url_daily_tsv <- constructNWISURL(siteNumber,pCode,startDate,endDate,'dv',statCd=c("00003","00001"),format="tsv") -constructNWISURL <- function(siteNumber,parameterCd,startDate,endDate,service,statCd="00003", format="xml",interactive=TRUE){ +constructNWISURL <- function(siteNumber,parameterCd,startDate,endDate,service,statCd="00003", format="xml",expanded=FALSE,interactive=TRUE){ startDate <- formatCheckDate(startDate, "StartDate", interactive=interactive) endDate <- formatCheckDate(endDate, "EndDate", interactive=interactive) @@ -67,8 +68,13 @@ constructNWISURL <- function(siteNumber,parameterCd,startDate,endDate,service,st "group_key=NONE&sitefile_output_format=html_table&column_name=agency_cd", "column_name=site_no&column_name=station_nm&inventory_output=0&rdb_inventory_output=file", "TZoutput=0&pm_cd_compare=Greater%20than&radio_parm_cds=previous_parm_cds&qw_attributes=0", - "format=rdb&qw_sample_wide=separated_wide&rdb_qw_attributes=0&date_format=YYYY-MM-DD", + "format=rdb&rdb_qw_attributes=0&date_format=YYYY-MM-DD", "rdb_compression=value", sep = "&") + if(expanded){ + url <- paste(url,"&qw_sample_wide=0",sep="") + } else { + url <- paste(url,"&qw_sample_wide=separated_wide",sep="") + } if (nzchar(startDate)) { url <- paste(url,"&begin_date=",startDate,sep="") diff --git a/R/retrieveNWISqwData.r b/R/retrieveNWISqwData.r index a099157a..4530b01a 100644 --- a/R/retrieveNWISqwData.r +++ b/R/retrieveNWISqwData.r @@ -8,10 +8,12 @@ #' @param pCodes string or vector of USGS parameter code. This is usually an 5 digit number. #' @param startDate string starting date for data retrieval in the form YYYY-MM-DD. #' @param endDate string ending date for data retrieval in the form YYYY-MM-DD. +#' @param expanded logical defaults to FALSE. If TRUE, retrieves additional information. #' @param interactive logical Option for interactive mode. If true, there is user interaction for error handling and data checks. #' @keywords data import USGS web service #' @return data dataframe with agency, site, dateTime, value, and code columns #' @export +#' @import reshape2 #' @examples #' # These examples require an internet connection to run #' siteNumber <- c('04024430','04024000') @@ -19,14 +21,15 @@ #' endDate <- '' #' pCodes <- c('34247','30234','32104','34220') #' rawNWISqwData <- retrieveNWISqwData(siteNumber,pCodes,startDate,endDate) +#' rawNWISqwDataExpand <- retrieveNWISqwData(siteNumber,pCodes,startDate,endDate,expanded=TRUE) #' # To get data in Sample dataframe format: #' data <- rawNWISqwData[,names(rawNWISqwData) != "site"] #' data$dateTime <- as.Date(data$dateTime) #' compressedData <- compressData(data) #' Sample <- populateSampleColumns(compressedData) -retrieveNWISqwData <- function (siteNumber,pCodes,startDate,endDate,interactive=TRUE){ +retrieveNWISqwData <- function (siteNumber,pCodes,startDate,endDate,expanded=FALSE,interactive=TRUE){ - url <- constructNWISURL(siteNumber,pCodes,startDate,endDate,"qw",interactive=interactive) + url <- constructNWISURL(siteNumber,pCodes,startDate,endDate,"qw",expanded=expanded,interactive=interactive) tmp <- read.delim( url, @@ -41,20 +44,47 @@ retrieveNWISqwData <- function (siteNumber,pCodes,startDate,endDate,interactive= dataType <- tmp[1,] data <- tmp[-1,] row.names(data) <- NULL - data$site <- with(data,paste(agency_cd,site_no,sep="-")) - data$dateTime <- with(data, as.POSIXct(paste(sample_dt,sample_tm,sep=" "),tz="UTC")) - rmCol <- c("agency_cd","site_no","tm_datum_rlbty_cd", - "coll_ent_cd","medium_cd","tu_id","body_part_id", - "sample_end_dt","sample_end_tm","sample_dt","sample_tm","sample_start_time_datum_cd") - data <- data[,!(names(data) %in% rmCol)] - names(data) <- c(gsub("r", "qualifier_",names(data)[1:(length(names(data))-2)]),names(data)[(length(names(data))-1):length(names(data))]) - names(data) <- c(gsub("p", "value_",names(data)[1:(length(names(data))-2)]),names(data)[(length(names(data))-1):length(names(data))]) - data[,grep("value",names(data))] <- sapply( data[,grep("value",names(data))], function(x) as.numeric(x)) - - data <- data[,c(ncol(data):(ncol(data)-1),(1:(ncol(data)-2)))] + if(expanded){ + data$site <- with(data,paste(agency_cd,site_no,sep="-")) + data$dateTime <- with(data, as.POSIXct(paste(sample_dt,sample_tm,sep=" "),tz="UTC")) + + if (any("" != data[["sample_end_dt"]])){ + data$dateTimeEnd <- with(data, as.POSIXct(paste(sample_end_dt,sample_end_tm,sep=" "),tz="UTC")) + } + + data$result_va <- as.numeric(data$result_va) + data$rpt_lev_va <- as.numeric(data$rpt_lev_va) + rmCol <- c("agency_cd","site_no","tm_datum_rlbty_cd", + "coll_ent_cd","medium_cd","tu_id","body_part_id", + "sample_end_dt","sample_end_tm","sample_dt","sample_tm", + "sample_start_time_datum_cd","anl_ent_cd","lab_std_va") + data <- data[,!(names(data) %in% rmCol)] + + longDF <- melt(data, c("parm_cd","dateTime","site")) + wideDF <- dcast(longDF, ... ~ variable + parm_cd ) + wideDF[,grep("_va_",names(wideDF))] <- sapply(wideDF[,grep("_va_",names(wideDF))], function(x) as.numeric(x)) + order(sapply(strsplit(names(wideDF)[c(-1:-2)],"_"), function(x) x[length(x)])) + + data <- wideDF[,c(1,2,(2+order(sapply(strsplit(names(wideDF)[c(-1:-2)],"_"), function(x) x[length(x)]))))] + + } else { + data$site <- with(data,paste(agency_cd,site_no,sep="-")) + data$dateTime <- with(data, as.POSIXct(paste(sample_dt,sample_tm,sep=" "),tz="UTC")) + rmCol <- c("agency_cd","site_no","tm_datum_rlbty_cd", + "coll_ent_cd","medium_cd","tu_id","body_part_id", + "sample_end_dt","sample_end_tm","sample_dt","sample_tm","sample_start_time_datum_cd") + data <- data[,!(names(data) %in% rmCol)] + + names(data) <- c(gsub("r", "qualifier_",names(data)[1:(length(names(data))-2)]),names(data)[(length(names(data))-1):length(names(data))]) + names(data) <- c(gsub("p", "value_",names(data)[1:(length(names(data))-2)]),names(data)[(length(names(data))-1):length(names(data))]) + + data[,grep("value",names(data))] <- sapply( data[,grep("value",names(data))], function(x) as.numeric(x)) + + data <- data[,c(ncol(data):(ncol(data)-1),(1:(ncol(data)-2)))] + } return (data) } diff --git a/man/constructNWISURL.Rd b/man/constructNWISURL.Rd index 6d55f6e3..981b295f 100644 --- a/man/constructNWISURL.Rd +++ b/man/constructNWISURL.Rd @@ -3,7 +3,8 @@ \title{Construct NWIS url for data retrieval} \usage{ constructNWISURL(siteNumber, parameterCd, startDate, endDate, service, - statCd = "00003", format = "xml", interactive = TRUE) + statCd = "00003", format = "xml", expanded = FALSE, + interactive = TRUE) } \arguments{ \item{siteNumber}{string or vector of strings USGS site @@ -38,6 +39,10 @@ constructNWISURL(siteNumber, parameterCd, startDate, endDate, service, the results to see if the data returns matches what is expected. The default is therefore "xml".} + \item{expanded}{logical defaults to FALSE. If TRUE, + retrieves additional information, only applicable for qw + data.} + \item{interactive}{logical Option for interactive mode. If TRUE, there is user interaction for error handling and data checks.} diff --git a/man/retrieveNWISqwData.Rd b/man/retrieveNWISqwData.Rd index 714521c4..42d48a71 100644 --- a/man/retrieveNWISqwData.Rd +++ b/man/retrieveNWISqwData.Rd @@ -2,7 +2,8 @@ \alias{retrieveNWISqwData} \title{Raw Data Import for USGS NWIS QW Data} \usage{ -retrieveNWISqwData(siteNumber, pCodes, startDate, endDate, interactive = TRUE) +retrieveNWISqwData(siteNumber, pCodes, startDate, endDate, expanded = FALSE, + interactive = TRUE) } \arguments{ \item{siteNumber}{string or vector of strings USGS site @@ -17,6 +18,9 @@ retrieveNWISqwData(siteNumber, pCodes, startDate, endDate, interactive = TRUE) \item{endDate}{string ending date for data retrieval in the form YYYY-MM-DD.} + \item{expanded}{logical defaults to FALSE. If TRUE, + retrieves additional information.} + \item{interactive}{logical Option for interactive mode. If true, there is user interaction for error handling and data checks.} @@ -41,6 +45,7 @@ startDate <- '2010-01-01' endDate <- '' pCodes <- c('34247','30234','32104','34220') rawNWISqwData <- retrieveNWISqwData(siteNumber,pCodes,startDate,endDate) +rawNWISqwDataExpand <- retrieveNWISqwData(siteNumber,pCodes,startDate,endDate,expanded=TRUE) # To get data in Sample dataframe format: data <- rawNWISqwData[,names(rawNWISqwData) != "site"] data$dateTime <- as.Date(data$dateTime) -- GitLab