readNWISqw.r 9.76 KB
Newer Older
1
2
#' Raw Data Import for USGS NWIS QW Data
#'
Laura A DeCicco's avatar
Laura A DeCicco committed
3
4
5
#' Imports data from NWIS web service. This function gets the data from here: \url{https://nwis.waterdata.usgs.gov/nwis/qwdata}
#' A list of parameter codes can be found here: \url{https://nwis.waterdata.usgs.gov/nwis/pmcodes/}
#' A list of statistic codes can be found here: \url{https://nwis.waterdata.usgs.gov/nwis/help/?read_file=stat&format=table}
6
#'
Laura A DeCicco's avatar
Laura A DeCicco committed
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
#' @details Valid parameter code groups are "All," or group codes:
#'\tabular{ll}{
#'Code \tab Description\cr
#'INF \tab Information \cr
#'PHY \tab Physical \cr
#'INM \tab Inorganics, Major, Metals (major cations) \cr
#'INN \tab Inorganics, Major, Non-metals (major anions) \cr
#'NUT \tab Nutrient \cr
#'MBI \tab Microbiological \cr
#'BIO \tab Biological \cr
#'IMN \tab Inorganics, Minor, Non-metals \cr
#'IMM \tab Inorganics, Minor, Metals \cr
#'TOX \tab Toxicity \cr
#'OPE \tab Organics, pesticide \cr
#'OPC \tab Organics, PCBs \cr
#'OOT \tab Organics, other \cr
#'RAD \tab Radiochemical \cr
#'SED \tab Sediment \cr
#'POP \tab Population/community \cr
#'}
27
#'If more than one parameter group is requested, only sites that data for all requested groups are returned.
Laura A DeCicco's avatar
Laura A DeCicco committed
28
#'
29
#' @param siteNumbers character of USGS site numbers.  This is usually an 8 digit number
Laura A DeCicco's avatar
Laura A DeCicco committed
30
31
#' @param parameterCd character that contains the code for a parameter
#' group, or a character vector of 5-digit parameter codes. See \bold{Details}.
32
#' @param startDate character starting date for data retrieval in the form YYYY-MM-DD. Default is "" which indicates
33
#' retrieval for the earliest possible record. Date arguments are always specified in local time.
34
#' @param endDate character ending date for data retrieval in the form YYYY-MM-DD. Default is "" which indicates
35
#' retrieval for the latest possible record. Date arguments are always specified in local time.
Laura A DeCicco's avatar
Laura A DeCicco committed
36
#' @param expanded logical defaults to \code{TRUE}. If \code{TRUE}, retrieves additional information. Expanded data includes
37
#' remark_cd (remark code), result_va (result value), val_qual_tx (result value qualifier code), meth_cd (method code),
Laura A DeCicco's avatar
Laura A DeCicco committed
38
39
40
#' dqi_cd (data-quality indicator code), rpt_lev_va (reporting level), and rpt_lev_cd (reporting level type). If \code{FALSE},
#' only returns remark_cd (remark code) and result_va (result value). Expanded = \code{FALSE} will not give
#' sufficient information for unbiased statistical analysis.
Laura A DeCicco's avatar
Laura A DeCicco committed
41
42
43
#' @param reshape logical, reshape the expanded data. If \code{TRUE}, then return a wide data frame with all water-quality in a single row for each sample. 
#' If \code{FALSE} (default), then return a long data frame with each water-quality result in a single row. This
#' argument is only applicable to expanded data. Data requested using \code{expanded=FALSE} is always returned in the wide format.
44
45
#' @param tz character to set timezone attribute of output columns: startDateTime and endDateTime. Default is an empty quote, which converts the 
#' datetimes to UTC (properly accounting for daylight savings times).
Laura A DeCicco's avatar
Laura A DeCicco committed
46
47
#' Possible values to provide are "America/New_York","America/Chicago", "America/Denver","America/Los_Angeles",
#' "America/Anchorage","America/Honolulu","America/Jamaica","America/Managua","America/Phoenix", and "America/Metlakatla"
48
#' @keywords data import USGS web service
49
#' @return A data frame with at least the following columns:
50
51
52
53
#' \tabular{lll}{
#' Name \tab Type \tab Description \cr
#' agency_cd \tab character \tab The NWIS code for the agency reporting the data\cr
#' site_no \tab character \tab The USGS site number \cr
54
55
56
57
58
#' sample_dt \tab Date \tab The date the sample was collected \cr 
#' sample_tm \tab character \tab The reported sample collection time \cr
#' startDateTime \tab POSIXct \tab Combining sample_dt and sample_tm, a date/time column is created, and converted into UTC 
#' (unless the tz argument specifies a different time zone)\cr
#' endDateTime \tab POSIXct \tab If any sample_end_dt and sample_end_dt exist, this column is created similar to startDateTime\cr
59
60
#' }
#' 
61
62
63
#' Further columns will be included depending on the requested output format (expanded = TRUE or FALSE).
#' 
#' 
64
65
66
67
68
69
#' There are also several useful attributes attached to the data frame:
#' \tabular{lll}{
#' Name \tab Type \tab Description \cr
#' url \tab character \tab The url used to generate the data \cr
#' queryTime \tab POSIXct \tab The time the data was returned \cr
#' comment \tab character \tab Header comments from the RDB file \cr
70
71
#' siteInfo \tab data frame \tab A data frame containing information on the requested sites \cr
#' variableInfo \tab data frame \tab A data frame containing information on the requested parameters \cr
72
#' }
73
#' @export
Laura A DeCicco's avatar
Laura A DeCicco committed
74
75
#' @importFrom reshape2 melt
#' @importFrom reshape2 dcast
76
#' @importFrom dplyr left_join
Laura A DeCicco's avatar
Laura A DeCicco committed
77
78
#' @seealso \code{\link{readWQPdata}}, \code{\link{whatWQPsites}}, 
#' \code{\link{readWQPqw}}, \code{\link{constructNWISURL}}
79
#' @examples
80
#' siteNumbers <- c('04024430','04024000')
81
82
#' startDate <- '2010-01-01'
#' endDate <- ''
Laura A DeCicco's avatar
Laura A DeCicco committed
83
#' parameterCd <- c('34247','30234','32104','34220')
84
#' \dontrun{
Laura A DeCicco's avatar
Laura A DeCicco committed
85
86
#' rawNWISqwData <- readNWISqw(siteNumbers,parameterCd,startDate,endDate)
#' rawNWISqwDataReshaped <- readNWISqw(siteNumbers,parameterCd,
87
#'           startDate,endDate,reshape=TRUE)
Laura A DeCicco's avatar
Laura A DeCicco committed
88
89
#' parameterCd <- "all"
#' rawNWISall <- readNWISqw(siteNumbers,parameterCd,
Laura A DeCicco's avatar
Laura A DeCicco committed
90
91
92
93
94
95
96
97
98
#'           startDate,endDate)
#' pgroup <- c("NUT")
#' rawNWISNutrients <- readNWISqw(siteNumbers,pgroup,
#'           startDate,endDate)
#' groups <- c("NUT","OPE")
#' rawNWISNutOpe <- readNWISqw(siteNumbers,groups,
#'           startDate,endDate) 
#' rawNWISOpe <- readNWISqw(siteNumbers,"OPE",
#'           startDate,endDate) 
99
#'          } 
Laura A DeCicco's avatar
Laura A DeCicco committed
100
readNWISqw <- function (siteNumbers,parameterCd,startDate="",endDate="",
101
                        expanded=TRUE,reshape=FALSE,tz=""){  
102
  
Laura A DeCicco's avatar
Laura A DeCicco committed
103
104
105
106
  pgrp <- c("INF", "PHY", "INM", "INN", "NUT", "MBI", "BIO", "IMM", "IMN", "TOX",
                           "OPE", "OPC", "OOT", "RAD", "XXX", "SED", "POP")

  if(any(parameterCd == "all") | any(parameterCd == "All") ){
Laura A DeCicco's avatar
Laura A DeCicco committed
107
    
Laura A DeCicco's avatar
Laura A DeCicco committed
108
    siteNumbers <- paste(siteNumbers, collapse=",")
Laura A DeCicco's avatar
Laura A DeCicco committed
109
    url <- paste0("https://nwis.waterdata.usgs.gov/nwis/qwdata?multiple_site_no=", siteNumbers,
Laura A DeCicco's avatar
Laura A DeCicco committed
110
111
112
113
114
115
           "&sort_key=site_no&group_key=NONE&inventory_output=0",
           "&begin_date=", startDate, "&end_date=", endDate,
           "&TZoutput=0",
           "&radio_parm_cds=all_parm_cds&qw_attributes=0&format=rdb",
           "&qw_sample_wide=0&rdb_qw_attributes=expanded&date_format=YYYY-MM-DD",
           "&rdb_compression=value&list_of_search_criteria=multiple_site_no")
Laura A DeCicco's avatar
Laura A DeCicco committed
116
117
118
  } else if (all(parameterCd %in% pgrp)){
    siteNumbers <- paste(siteNumbers, collapse=",")
    groups <- paste(parameterCd, collapse=",")
Laura A DeCicco's avatar
Laura A DeCicco committed
119
    url <- paste0("https://nwis.waterdata.usgs.gov/nwis/qwdata?multiple_site_no=", siteNumbers,
Laura A DeCicco's avatar
Laura A DeCicco committed
120
121
122
123
124
125
                  "&sort_key=site_no&group_key=NONE&inventory_output=0",
                  "&begin_date=", startDate, "&end_date=", endDate,
                  "&TZoutput=0&param_group=", groups,
                  "&qw_attributes=0&format=rdb",
                  "&qw_sample_wide=0&rdb_qw_attributes=expanded&date_format=YYYY-MM-DD",
                  "&rdb_compression=value&list_of_search_criteria=multiple_site_no")
Laura A DeCicco's avatar
Laura A DeCicco committed
126
127
128
129
130
131
132

  } else {
    url <- constructNWISURL(siteNumbers,
                            parameterCd,
                            startDate,
                            endDate,"qw",expanded=expanded)    
  }
Laura A DeCicco's avatar
Laura A DeCicco committed
133

Laura A DeCicco's avatar
Laura A DeCicco committed
134
135
136
137
138
139
140
  data <- importRDB1(url,asDateTime=TRUE, tz = tz)
  
  url <- attr(data, "url")
  comment <- attr(data, "comment")
  queryTime <- attr(data, "queryTime")
  header <- attr(data, "header")

Laura A DeCicco's avatar
Laura A DeCicco committed
141
  
142
  
Laura A DeCicco's avatar
Laura A DeCicco committed
143
144
145
146
147
148
  if(reshape){
    if(expanded){
      columnsToMelt <- c("agency_cd","site_no","sample_dt","sample_tm",
                         "sample_end_dt","sample_end_tm","sample_start_time_datum_cd","tm_datum_rlbty_cd",
                         "parm_cd","startDateTime","endDateTime","coll_ent_cd", "medium_cd","project_cd",
                         "aqfr_cd","tu_id","body_part_id", "hyd_cond_cd", "samp_type_cd",
Laura A DeCicco's avatar
Laura A DeCicco committed
149
150
                         "hyd_event_cd","sample_lab_cm_tx","tz_cd","startDateTime","endDateTime",
                         "sample_start_time_datum_cd_reported","sample_end_time_datum_cd_reported")
Laura A DeCicco's avatar
Laura A DeCicco committed
151
152
      measureCols <- names(data)[!(names(data) %in% columnsToMelt)]
      columnsToMelt <- names(data)[(names(data) %in% columnsToMelt)]
Laura A DeCicco's avatar
Laura A DeCicco committed
153
      dataWithPcodes <- data[data$parm_cd != "",]
Laura A DeCicco's avatar
Laura A DeCicco committed
154
      if(sum(data$parm_cd == "") > 0){
Laura A DeCicco's avatar
Laura A DeCicco committed
155
156
        warning("Some or all data returned without pCodes, those data will not be included in reshape")
      }
Laura A DeCicco's avatar
Laura A DeCicco committed
157

Laura A DeCicco's avatar
Laura A DeCicco committed
158
      longDF <- reshape2::melt(dataWithPcodes, measure.vars =  measureCols,
Laura A DeCicco's avatar
Laura A DeCicco committed
159
                     variable.name = "variable", value.name = "value", na.rm = FALSE)
Laura A DeCicco's avatar
Laura A DeCicco committed
160
      wideDF <- reshape2::dcast(longDF, ... ~ variable + parm_cd )
Laura A DeCicco's avatar
Laura A DeCicco committed
161
162
163
      wideDF[,grep("_va_",names(wideDF))] <- sapply(wideDF[,grep("_va_",names(wideDF))], function(x) as.numeric(x))
      pCodesReturned <- unique(dataWithPcodes$parm_cd)
      groupByPCode <- as.vector(sapply(pCodesReturned, function(x) grep(x, names(wideDF)) ))
Laura A DeCicco's avatar
Laura A DeCicco committed
164
      data <- wideDF[,c(which(names(wideDF) %in% columnsToMelt),groupByPCode)]
Laura A DeCicco's avatar
Laura A DeCicco committed
165

Laura A DeCicco's avatar
Laura A DeCicco committed
166
167
    } else {
      warning("Reshape can only be used with expanded data. Reshape request will be ignored.")
Laura A DeCicco's avatar
Laura A DeCicco committed
168
    }
169
170
  }
  
Laura A DeCicco's avatar
Laura A DeCicco committed
171
172
173
174
175
176
  if( !(is.null(siteNumbers)) && !(is.na(siteNumbers)) & length(siteNumbers) > 0){
    siteInfo <- readNWISsite(siteNumbers)
    if(nrow(data) > 0){
      siteInfo <- left_join(unique(data[,c("agency_cd","site_no")]),siteInfo, by=c("agency_cd","site_no"))
    }
    attr(data, "siteInfo") <- siteInfo    
177
  }
Laura A DeCicco's avatar
Laura A DeCicco committed
178
179

  parameterCd <- unique(data$parm_cd)
180
  
Laura A DeCicco's avatar
Laura A DeCicco committed
181
182
183
184
185
  if(!(is.null(parameterCd)) && !is.na(parameterCd) & length(parameterCd) > 0){
    parameterCd <- parameterCd[parameterCd != ""]
    varInfo <- readNWISpCode(parameterCd)
    attr(data, "variableInfo") <- varInfo
  }
Laura A DeCicco's avatar
Laura A DeCicco committed
186
187
  
  attr(data, "statisticInfo") <- NULL
Laura A DeCicco's avatar
Laura A DeCicco committed
188
  
189
  attr(data, "url") <- url
Laura A DeCicco's avatar
Laura A DeCicco committed
190
191
192
193
  attr(data, "comment") <- comment
  attr(data, "queryTime") <- queryTime
  attr(data, "header") <- header
  
194
195
  return (data)

Laura A DeCicco's avatar
Laura A DeCicco committed
196
}