importRDB1.r 12.3 KB
Newer Older
1

2
3
4
#' Function to return data from the NWIS RDB 1.0 format
#'
#' This function accepts a url parameter that already contains the desired
5
6
#' NWIS site, parameter code, statistic, startdate and enddate. It is not
#' recommended to use the RDB format for importing multi-site data. 
7
#'
Laura A DeCicco's avatar
Laura A DeCicco committed
8
#' @param obs_url character containing the url for the retrieval or a file path to the data file.
Laura A DeCicco's avatar
Laura A DeCicco committed
9
#' @param asDateTime logical, if \code{TRUE} returns date and time as POSIXct, if \code{FALSE}, Date
Laura A DeCicco's avatar
Laura A DeCicco committed
10
11
12
13
14
#' @param tz character to set timezone attribute of datetime. Default converts the datetimes to UTC 
#' (properly accounting for daylight savings times based on the data's provided tz_cd column).
#' Recommended US values include "UTC","America/New_York","America/Chicago", "America/Denver","America/Los_Angeles",
#' "America/Anchorage","America/Honolulu","America/Jamaica","America/Managua","America/Phoenix", and "America/Metlakatla".
#' For a complete list, see \url{https://en.wikipedia.org/wiki/List_of_tz_database_time_zones}
Laura A DeCicco's avatar
Laura A DeCicco committed
15
#' @param convertType logical, defaults to \code{TRUE}. If \code{TRUE}, the function will convert the data to dates, datetimes,
16
17
18
19
20
21
#' numerics based on a standard algorithm. If false, everything is returned as a character
#' @return A data frame with the following columns:
#' \tabular{lll}{
#' Name \tab Type \tab Description \cr
#' agency_cd \tab character \tab The NWIS code for the agency reporting the data\cr
#' site_no \tab character \tab The USGS site number \cr
Laura A DeCicco's avatar
Laura A DeCicco committed
22
#' datetime \tab POSIXct \tab The date and time of the value converted to UTC (if asDateTime = \code{TRUE}), \cr 
23
24
25
26
#' \tab character \tab or raw character string (if asDateTime = FALSE) \cr
#' tz_cd \tab character \tab The time zone code for datetime \cr
#' code \tab character \tab Any codes that qualify the corresponding value\cr
#' value \tab numeric \tab The numeric value for the parameter \cr
27
#' tz_cd_reported \tab The originally reported time zone \cr
28
29
30
31
32
33
#' }
#' Note that code and value are repeated for the parameters requested. The names are of the form 
#' XD_P_S, where X is literal, 
#' D is an option description of the parameter, 
#' P is the parameter code, 
#' and S is the statistic code (if applicable).
34
#' If a date/time (dt) column contained incomplete date and times, a new column of dates and time was inserted. This could happen
35
#' when older data was reported as dates, and newer data was reported as a date/time.
36
37
38
39
40
41
42
43
#' 
#' There are also several useful attributes attached to the data frame:
#' \tabular{lll}{
#' Name \tab Type \tab Description \cr
#' url \tab character \tab The url used to generate the data \cr
#' queryTime \tab POSIXct \tab The time the data was returned \cr
#' comment \tab character \tab Header comments from the RDB file \cr
#' }
44
#' @export
Laura A DeCicco's avatar
Laura A DeCicco committed
45
#' @examplesIf is_dataRetrieval_user()
46
#' site_id <- "02177000"
47
48
#' startDate <- "2012-09-01"
#' endDate <- "2012-10-01"
49
50
#' offering <- "00003"
#' property <- "00060"
51
#' 
52
#' obs_url <- constructNWISURL(site_id,property,
53
#'          startDate,endDate,"dv",format="tsv")
54
#' \donttest{
Laura A DeCicco's avatar
Laura A DeCicco committed
55
56
#' data <- importRDB1(obs_url)
#' 
57
#' 
58
#' urlMultiPcodes <- constructNWISURL("04085427",c("00060","00010"),
59
#'          startDate,endDate,"dv",statCd=c("00003","00001"),"tsv")
Laura A DeCicco's avatar
Laura A DeCicco committed
60
#' 
Laura A DeCicco's avatar
Laura A DeCicco committed
61
#'   multiData <- importRDB1(urlMultiPcodes)
Laura A DeCicco's avatar
Laura A DeCicco committed
62
#' 
63
#' unitDataURL <- constructNWISURL(site_id,property,
64
#'          "2020-10-30","2020-11-01","uv",format="tsv") #includes timezone switch
Laura A DeCicco's avatar
Laura A DeCicco committed
65
#' 
Laura A DeCicco's avatar
Laura A DeCicco committed
66
#'   unitData <- importRDB1(unitDataURL, asDateTime=TRUE)
Laura A DeCicco's avatar
Laura A DeCicco committed
67
#' 
68
69
#' qwURL <- constructNWISURL(c('04024430','04024000'),
#'           c('34247','30234','32104','34220'),
Laura A DeCicco's avatar
Laura A DeCicco committed
70
#'          "2010-11-03","","qw",format="rdb")
Laura A DeCicco's avatar
Laura A DeCicco committed
71
72
73
#' 
#' qwData <- importRDB1(qwURL, asDateTime=TRUE, tz="America/Chicago")
#' 
74
#' iceSite <- '04024000'
Laura A DeCicco's avatar
Laura A DeCicco committed
75
76
#' start <- "2015-11-09"
#' end <- "2015-11-24"
77
#' urlIce <- constructNWISURL(iceSite,"00060",start, end,"uv",format="tsv")
Laura A DeCicco's avatar
Laura A DeCicco committed
78
#' 
Laura A DeCicco's avatar
Laura A DeCicco committed
79
80
#'   ice <- importRDB1(urlIce, asDateTime=TRUE)
#'   iceNoConvert <- importRDB1(urlIce, convertType=FALSE)
Laura A DeCicco's avatar
Laura A DeCicco committed
81
#' 
82
#' }
83
#' # User file:
Laura A DeCicco's avatar
Laura A DeCicco committed
84
#' filePath <- system.file("extdata", package="dataRetrieval")
85
86
87
#' fileName <- "RDB1Example.txt"
#' fullPath <- file.path(filePath, fileName)
#' importUserRDB <- importRDB1(fullPath)
88
#' 
Laura A DeCicco's avatar
Laura A DeCicco committed
89
importRDB1 <- function(obs_url, asDateTime=TRUE, convertType = TRUE, tz="UTC"){
90
  
Laura A DeCicco's avatar
Laura A DeCicco committed
91
92
  if(tz == ""){
    tz <- "UTC" 
93
  }
Laura A DeCicco's avatar
Laura A DeCicco committed
94
  
Laura A DeCicco's avatar
Laura A DeCicco committed
95
  tz <- match.arg(tz, OlsonNames())
96

Laura A DeCicco's avatar
Laura A DeCicco committed
97
  if(file.exists(obs_url)){
Laura A DeCicco's avatar
Laura A DeCicco committed
98
    f <- obs_url
Laura A DeCicco's avatar
Laura A DeCicco committed
99
  } else {
Laura A DeCicco's avatar
Laura A DeCicco committed
100
101
102
103
104
105
    f <- tempfile()
    on.exit(unlink(f))
    
    doc <- getWebServiceData(obs_url,
                             httr::write_disk(f),
                             encoding='gzip')
106
107
108
    if(is.null(doc)){
      return(invisible(NULL))
    }
109
    if("warn" %in% names(attr(doc, "headerInfo"))){
Laura A DeCicco's avatar
Laura A DeCicco committed
110
      data <- data.frame()
111
      attr(data, "headerInfo") <- attr(doc,"headerInfo")
Laura A DeCicco's avatar
Laura A DeCicco committed
112
113
114
115
116
      attr(data, "url") <- obs_url
      attr(data, "queryTime") <- Sys.time()
      
      return(data)
    }
117
  }
118
  
Laura A DeCicco's avatar
Laura A DeCicco committed
119
120
  readr.total <- readLines(f)

Laura A DeCicco's avatar
Laura A DeCicco committed
121
122
123
124
  total.rows <- length(readr.total)
  readr.meta <- readr.total[grep("^#", readr.total)]
  meta.rows <- length(readr.meta)
  header.names <- strsplit(readr.total[meta.rows+1],"\t")[[1]]
Laura A DeCicco's avatar
Laura A DeCicco committed
125
  types.names <- strsplit(readr.total[meta.rows+2],"\t")[[1]]
126
  data.rows <- total.rows - meta.rows - 2
Laura A DeCicco's avatar
Laura A DeCicco committed
127
128
129
130
131
132
133
134
  
  char.names <- c(header.names[grep("_cd",header.names)],
                  header.names[grep("_id",header.names)],
                  header.names[grep("_tx",header.names)],
                  header.names[grep("_tm",header.names)],
                  header.names[header.names == "site_no"],
                  header.names[header.names == "project_no"])
  
Laura A DeCicco's avatar
Laura A DeCicco committed
135
  if(data.rows > 0){
136
137
    args_list <- list(file = f,
                 delim = "\t",
138
                 quote = "",
139
140
                 skip = meta.rows + 2,
                 col_names = FALSE)
Laura A DeCicco's avatar
Laura A DeCicco committed
141
    if(utils::packageVersion("readr") > 1.4){
142
143
      args_list[["show_col_types"]] <- FALSE
    }
Laura A DeCicco's avatar
Laura A DeCicco committed
144
    if(convertType){
Laura A DeCicco's avatar
Laura A DeCicco committed
145
146
      args_list[["guess_max"]] <- data.rows 
      args_list[["col_types"]] <- readr::cols()
147
    } else {
148
      args_list[["col_types"]] <- readr::cols(.default = "c")
149
    } 
150
    
151
152
    readr.data <- do.call(readr::read_delim, args = args_list)

153
    readr.problems <- readr::problems(readr.data)
Laura A DeCicco's avatar
Laura A DeCicco committed
154

155
    readr.data <- as.data.frame(readr.data)
Laura A DeCicco's avatar
Laura A DeCicco committed
156

Laura A DeCicco's avatar
Laura A DeCicco committed
157
158
    if(nrow(readr.data) > 0){
      names(readr.data) <- header.names
Laura A DeCicco's avatar
Laura A DeCicco committed
159

Laura A DeCicco's avatar
Laura A DeCicco committed
160
      readr.data <- as.data.frame(readr.data)
161
      
Laura A DeCicco's avatar
Laura A DeCicco committed
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
      if(length(char.names) > 0){
        char.names.true <- char.names[sapply(readr.data[,char.names], is.character)]
        char.names <- char.names[!(char.names %in% char.names.true)]
      }
      
      if(length(char.names) > 0){
        args_list[["col_types"]] <- readr::cols(.default = "c")
        readr.data.char <- do.call(readr::read_delim, args = args_list)
        names(readr.data.char) <- header.names   
        
        for(j in char.names){
          readr.data[,j] <- readr.data.char[[j]]
        }
      }
      
Laura A DeCicco's avatar
Laura A DeCicco committed
177
      if(convertType){
178
        
Laura A DeCicco's avatar
Laura A DeCicco committed
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
        if(length(grep("_va", names(readr.data))) > 0 ){ 
          #note... if we simply convert any _va to numeric...we lose some QW censoring information from some formats
          vaCols <- grep("_va", names(readr.data))
          
          for(i in vaCols){
            readr.data[[i]] <- tryCatch({
              as.numeric(readr.data[[i]])
            },
            warning=function(cond) {
              message(paste("Column",i,"contains characters that cannot be automatically converted to numeric."))
              return(readr.data[[i]])
            }
            )
          }
    
        }
Laura A DeCicco's avatar
Laura A DeCicco committed
195

Laura A DeCicco's avatar
Laura A DeCicco committed
196
197
198
199
200
201
202
203
204
205
206
207
208
209
        if (asDateTime & convertType){
      
          header.suffix <- sapply(strsplit(header.names,"_"), function(x)x[length(x)])
          header.base <- substr(header.names,1,nchar(header.names)-3)
          
          dt_cols <- unique(header.base[header.suffix %in% c("dt","tm")])
          
          if(all(c("sample","sample_end") %in% dt_cols)){
            if("sample_start_time_datum_cd" %in% header.names){
              readr.data[,"tz_cd"] <- readr.data[,"sample_start_time_datum_cd"]
    
              readr.data[,"sample_start_time_datum_cd_reported"] <- readr.data[,"sample_start_time_datum_cd"]
              readr.data[,"sample_end_time_datum_cd_reported"] <- readr.data[,"sample_start_time_datum_cd"]
              readr.data <- readr.data[,names(readr.data)[names(readr.data) != "sample_start_time_datum_cd"]]
Laura A DeCicco's avatar
Laura A DeCicco committed
210
            }
Laura A DeCicco's avatar
Laura A DeCicco committed
211
212
213
          }
          
          for(i in dt_cols){
Laura A DeCicco's avatar
Laura A DeCicco committed
214
            
Laura A DeCicco's avatar
Laura A DeCicco committed
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
            if(all(c(paste0(i,"_dt"),paste0(i,"_tm")) %in% header.names)){
              varname <- paste0(i,"_dateTime")
    
              varval <- suppressWarnings(lubridate::parse_date_time(paste(readr.data[,paste0(i,"_dt")],
                                                                          readr.data[,paste0(i,"_tm")]), 
                                                                    c("%Y-%m-%d %H:%M:%S","%Y-%m-%d %H:%M"),
                                                                    tz = "UTC"))
            
              if(!all(is.na(varval))){
                readr.data[,varname] <- varval
                if(paste0(i, "_tz_cd") %in% names(readr.data)){
                  tz.name <- paste0(i, "_tz_cd") 
                } else {
                  tz.name <- paste0(i,"_time_datum_cd")
                }
                
                if(tz.name %in% names(readr.data)){
                  readr.data <- convertTZ(readr.data,tz.name,varname,tz)
                }
              }
Laura A DeCicco's avatar
Laura A DeCicco committed
235
            }
236
          }
Laura A DeCicco's avatar
Laura A DeCicco committed
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
    
          if("tz_cd" %in% names(readr.data)){
            date.time.cols <- which(sapply(readr.data, function(x) inherits(x, "POSIXct")))
            if(length(date.time.cols) > 0){
                readr.data <- convertTZ(readr.data,"tz_cd",date.time.cols,tz, flip.cols=FALSE)
            }
          }
          
          if("DATE" %in% header.names){
            readr.data[,"DATE"] <- lubridate::parse_date_time(readr.data[,"DATE"], "Ymd")
          }
          
          if(all(c("DATE","TIME","TZCD") %in% header.names)){
            varname <- "DATETIME"
            varval <- as.POSIXct(lubridate::fast_strptime(paste(readr.data[,"DATE"],readr.data[,"TIME"]), "%Y-%m-%d %H%M%S", tz = "UTC"))
            readr.data[,varname] <- varval
            readr.data <- convertTZ(readr.data,"TZCD",varname,tz, flip.cols=TRUE)
          }
          
          if("sample_dateTime" %in% names(readr.data)){
            names(readr.data)[names(readr.data) == "sample_dateTime"] <- "startDateTime"
          }
          
260
        }
Laura A DeCicco's avatar
Laura A DeCicco committed
261
262
        row.names(readr.data) <- NULL
        
263
      }
Laura A DeCicco's avatar
Laura A DeCicco committed
264
    }
265
266
267
268
  } else {
    readr.data <- data.frame(matrix(vector(), 0, length(header.names),
                                    dimnames=list(c(), header.names)),
                             stringsAsFactors=FALSE)
269
  }
270

Laura A DeCicco's avatar
Laura A DeCicco committed
271
  names(readr.data) <- make.names(names(readr.data))
272
  
Laura A DeCicco's avatar
Laura A DeCicco committed
273
274
275
276
277
278
279
280
  # People get confused having the tz_cd_reported next to the POSIXs:
  
  if("tz_cd_reported" %in% names(readr.data)){
    new_order <- names(readr.data)
    new_order <- c(new_order[!new_order %in% c("tz_cd_reported","tz_cd")], "tz_cd")
    readr.data <- readr.data[,new_order]
  }
  
Laura A DeCicco's avatar
Laura A DeCicco committed
281
  attr(readr.data, "queryTime") <- Sys.time()
Laura A DeCicco's avatar
Laura A DeCicco committed
282
  if(!file.exists(obs_url)){
283
    attr(readr.data, "url") <- obs_url
284
    attr(readr.data, "headerInfo") <- attr(doc, "headerInfo")
Laura A DeCicco's avatar
Laura A DeCicco committed
285
  }
286

Laura A DeCicco's avatar
Laura A DeCicco committed
287
288
289
  if("spec" %in% names(attributes(readr.data))){
    attr(readr.data, "spec") <- NULL
  }
Laura A DeCicco's avatar
Laura A DeCicco committed
290
  
Laura A DeCicco's avatar
Laura A DeCicco committed
291
  attr(readr.data, "comment") <- readr.meta
292
  
Laura A DeCicco's avatar
Laura A DeCicco committed
293
  return(readr.data)
294
  
Laura A DeCicco's avatar
Laura A DeCicco committed
295
}
296

Laura A DeCicco's avatar
Laura A DeCicco committed
297
convertTZ <- function(df, tz.name, date.time.cols, tz, flip.cols=TRUE){
Laura A DeCicco's avatar
Laura A DeCicco committed
298
  
299
300
  offsetLibrary <- data.frame(offset=c(5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10, 10, 0, 0, 0, 0),
                              code=c("EST","EDT","CST","CDT","MST","MDT","PST","PDT","AKST","AKDT","HAST","HST","UTC","", NA, "GMT"),
Laura A DeCicco's avatar
Laura A DeCicco committed
301
                              stringsAsFactors = FALSE)
302
303
304

  offset <- offsetLibrary$offset[match(df[,tz.name], offsetLibrary$code)]

Laura A DeCicco's avatar
Laura A DeCicco committed
305
306
307
308
  df[,paste0(tz.name,"_reported")] <- df[,tz.name,drop=FALSE]
  
  df[,date.time.cols] <- df[,date.time.cols] + offset*60*60
  
309
310
311
312
313
314
315
316
317
  for(i in date.time.cols){
    df[,i] <- as.POSIXct(df[,i])
    if(tz != ""){
      attr(df[,i], "tzone") <- tz
      df[,tz.name] <- tz
    } else {
      attr(df[,i], "tzone") <- "UTC"
      df[!is.na(df[,i]),tz.name] <- "UTC"
    }
Laura A DeCicco's avatar
Laura A DeCicco committed
318
  }
319
320


Laura A DeCicco's avatar
Laura A DeCicco committed
321
  
Laura A DeCicco's avatar
Laura A DeCicco committed
322
323
324
325
326
327
328
329
330
331
  if(flip.cols){
    reported.col <- which(names(df) %in% paste0(tz.name,"_reported"))
    orig.col <- which(names(df) %in% tz.name)
    
    new.order <- 1:ncol(df)
    new.order[orig.col] <- reported.col
    new.order[reported.col] <- orig.col
    
    df <- df[,new.order]
  }
Laura A DeCicco's avatar
Laura A DeCicco committed
332
333
334
335
336
  
  if(all(is.na(df[,date.time.cols]))){
    df[,date.time.cols] <- NULL
  }
  
Laura A DeCicco's avatar
Laura A DeCicco committed
337
  return(df)
338
}