findNLDI.R 13.2 KB
Newer Older
mike's avatar
mike committed
1
#' @title Trim and Cull NULLs
mike's avatar
mike committed
2
3
4
5
#' @description Remove NULL arguments from a named list
#' @param x a list
#' @keywords nldi internal
#' @return a list
mike's avatar
mike committed
6
#' @noRd
mike's avatar
mike committed
7
8
9
10
11

tc <- function(x) {
  Filter(Negate(is.null), x)
}

12
13
14
15
16
17
18
19
20
21
22
23
24
25
#' @title Find Good Names
#' @description minimize names of returned features
#' @param input data returned from NLDI
#' @param type type of return
#' @keywords nldi internal
#' @return a list
#' @noRd

find_good_names = function(input, type) {
  # The features names are different across features, navigation, and basin returns
  # This sets the environment for what to expect
  if (type == "nav") {
    grep("comid", names(input), value = TRUE)
  } else if (type == "feature") {
26
27
    c("sourceName", "identifier", "comid",
      names(input)[names(input) %in% c("name", "reachcode", "measure")])
28
29
30
31
32
  } else {
    NULL
  }
}

mike's avatar
mike committed
33
34
35
36
37
38
39
40
#' @title Get current NLDI offerings
#' @description Used to query the current resources available through the NLDI
#' @return data.frame
#' @export
#' @keywords nldi
#' @importFrom httr RETRY content
#' @importFrom jsonlite fromJSON
#' @examples
41
#' \donttest{
mike's avatar
mike committed
42
#' get_nldi_sources()
43
#' }
mike's avatar
mike committed
44
get_nldi_sources <- function() {
mike's avatar
mike committed
45
46
47
48
49
  res <-
    httr::RETRY("GET",
                pkg.env$nldi_base,
                times = 3,
                pause_cap = 60)
50
  
mike's avatar
mike committed
51
  if (res$status_code == 200) {
52
53
54
55
    jsonlite::fromJSON(httr::content(res, "text",
                                     encoding = "UTF8"),
                       simplifyDataFrame = TRUE)
    
mike's avatar
mike committed
56
57
58
59
60
61
  } else {
    message("Error in: ",  url)
  }
}

#' @title Query NLDI
62
63
64
65
#' @description Queries the NLDI for a given URL. If local sf install is
#' available the function returns a sf data.frame.
#' If the object requested is a POINT object, the XY coordinates are added
#' as columns. Otherwise the columns returned are "sourceName" and
66
#' "identifier" for features, and "nhdplus_comid".
mike's avatar
mike committed
67
68
69
70
#' @param url the URL to retrieve
#' @param type the type of data being returned (nav or feature)
#' @param use_sf should a local sf install be usedto parse data
#' @keywords nldi internal
mike's avatar
mike committed
71
#' @noRd
mike's avatar
mike committed
72
73
74
75
#' @return a data.frame
#' @importFrom httr content RETRY
#' @importFrom jsonlite fromJSON
#' @examples
76
#' \donttest{
mike's avatar
mike committed
77
78
79
#'  base = "https://labs.waterdata.usgs.gov/api/nldi/linked-data/"
#'  get_nldi(paste0(base, "comid/101"), type = "feature", use_sf = FALSE)
#'  get_nldi(paste0(base, "comid/101"), type = "feature", use_sf = TRUE)
mike's avatar
mike committed
80
#'  get_nldi(url = paste0(base, "nwissite/USGS-11120000"), type = "feature", use_sf = TRUE)
mike's avatar
mike committed
81
82
#'  get_nldi(paste0(base, "nwissite/USGS-11120000"), type = "feature", use_sf = TRUE)
#'  }
83
get_nldi = function(url, type = "", use_sf = FALSE) {
mike's avatar
mike committed
84
85
  # Query
  res <- httr::RETRY("GET", url, times = 3, pause_cap = 60)
86
  
mike's avatar
mike committed
87
  # If successful ...
88
  if (res$status_code == 200) {
mike's avatar
mike committed
89
90
    # Interpret as text
    d <- httr::content(res, "text", encoding = "UTF8")
91
92
    
    if (d == "") {
mike's avatar
mike committed
93
94
      message("No data returned for: ", url, call. = FALSE)
      return(NULL)
mike's avatar
mike committed
95
    }
96
97
    
    if (use_sf) {
mike's avatar
mike committed
98
      #Parse with sf
99
100
101
      tmp <- sf::read_sf(d)
      good_name = find_good_names(tmp, type)
      
mike's avatar
mike committed
102
      # if of type POINT at the X,Y coordinates as columns
103
104
105
106
107
108
      if (sf::st_geometry_type(tmp)[1] == "POINT") {
        tmp$X <- sf::st_coordinates(tmp)[, 1]
        tmp$Y <- sf::st_coordinates(tmp)[, 2]
        
        tmp <- tmp[, c(good_name, "X", "Y")]
        
mike's avatar
mike committed
109
110
      } else {
        # If line/polygon then keep geometry but don't expand
111
112
        tmp <- tmp[, c(good_name, attr(tmp, "sf_column"))]
        
mike's avatar
mike committed
113
      }
114
115
116
      # Returning as data.frame drops the geometry column ...
      return(tmp)
      
mike's avatar
mike committed
117
118
119
    } else {
      # Parse as simplified JSON
      d <- jsonlite::fromJSON(d, simplifyDataFrame = TRUE)
120
      
mikejohnson51's avatar
mikejohnson51 committed
121
122
123
      input <-  d$features$properties
      
      good_name = find_good_names(input, type)
124
      
mike's avatar
mike committed
125
      # if of type POINT at the X,Y coordinates as columns
126
      if (d$features$geometry$type[1] == "Point") {
mike's avatar
mike committed
127
        geom = d$features$geometry$coordinates
128
        
mikejohnson51's avatar
mikejohnson51 committed
129
        tmp = cbind(input[, good_name], do.call(rbind, geom))
130
        
mike's avatar
mike committed
131
        names(tmp) <- c(good_name, "X", "Y")
132
        
mike's avatar
mike committed
133
        return(tmp)
134
        
mike's avatar
mike committed
135
136
      } else {
        # If line/polygon then keep geometry but don't expand
mikejohnson51's avatar
mikejohnson51 committed
137
        return(input[, good_name])
mike's avatar
mike committed
138
139
      }
    }
140
141
142
143
    
  } else {
    message("Error in: ",  url)
  }
mike's avatar
mike committed
144
145
146
}

#' Clean NWIS NLDI ids
147
148
#' @description The NWIS ids come as "USGS-XXXXXXXX". This is not suitable for 
#' passing to other package functions like readNWISdv.
mike's avatar
mike committed
149
#' This function strips the "USGS-" from these ids.
mike's avatar
mike committed
150
151
#' @param tmp a data.frame retrieved from get_nldi()
#' @return the input object with potentially modified identifiers
mike's avatar
mike committed
152
#' @keywords nldi internal
mike's avatar
mike committed
153
#' @noRd
mike's avatar
mike committed
154
clean_nwis_ids = function(tmp) {
mike's avatar
mike committed
155
  # If data.frame, and of type NWIS, then strip "USGS-" from identifiers
156
  if (is.data.frame(tmp)) {
157
    if ("sourceName" %in% names(tmp) &&
158
        tmp$sourceName[1] == "NWIS Sites") {
mike's avatar
mike committed
159
      tmp$identifier = gsub("USGS-", "", tmp$identifier)
160
      
mike's avatar
mike committed
161
162
163
164
165
166
    }
  }
  tmp
}

#' @title NLDI Validity Check
mike's avatar
mike committed
167
168
169
#' @description tests if NLDI feature is available. Is vectorized and works with partial string matching.
#' @param all a data.frame of available features (see get_nldi_sources)
#' @param type type(s) to check (character)
mike's avatar
mike committed
170
171
#' @return a list with good and bad entries
#' @keywords nldi internal
mike's avatar
mike committed
172
#' @noRd
mike's avatar
mike committed
173
#' @examples
174
#' \donttest{
175
#' valid_ask(all = get_nldi_sources(), "nwis")
mike's avatar
mike committed
176
#' }
177
valid_ask = function(all, type) {
mike's avatar
mike committed
178
  # those where the requested pattern is included in a nldi_source ...
179
180
181
  # means we will catch nwis - not just nwissite ...
  # means we will catch both wqp and WQP ...
  
182
  ### WOW! This is hacky and will hopefully be unneeded latter on....
183
184
185
186
187
188
189
190
191
192
193
  type = ifelse(type == "nwis", "nwissite", type)
  all = rbind(all, c("flowlines", "NHDPlus comid", NA))
  
  good  <-
    grepl(paste0(tolower(type), collapse = "|"), tolower(c(all$source)))
  
  bad <-
    grepl(paste0(tolower(all$source), collapse = "|"), tolower(c(all$source)))
  
  list(good = all[good, ], bad = type[!bad])
  
mike's avatar
mike committed
194
195
}

196
197
#' @title R Client for the Network Linked Data Index
#' @description Provides a formal client to the USGS
mike's avatar
mike committed
198
#' \href{https://labs.waterdata.usgs.gov/about-nldi/index.html}{Network Linked Data Index}.
199
200
201
202
203
204
#' @details The function is useful for topology and location based
#' feature discovery. A user must specify an origin feature, optional navigation
#' direction(s) along the network, as well as features to identify along the
#' navigated paths. Valid starting options can be given by one of the following
#' arguments: comid, nwis, huc12, wqp, location, and start.
#' @param comid numeric or character. An NHDPlusV2 COMID
205
#' @param nwis  numeric or character. A USGS NWIS surface water siteID
206
207
208
209
210
211
212
213
214
215
216
217
218
#' @param wqp   numeric or character. A water quality point ID
#' @param huc12 numeric or character. A WBD HUC12 unit ID
#' @param location numeric vector. Coordinate pair in WGS84
#' SRS ordered lng/lat (X,Y)
#' @param origin named list. Specifying a feature type and ID
#' (e.g. list("comid" = 101))
#' @param nav character vector. where to navigate from the starting point.
#' Options include along the upper mainsteam (UM), upstream tributary (UT),
#' downstream mainstem (DM) and downstream divergences (DD). You may select
#' one or more of the abbreviations ("UM", "UT", DM", "DD").
#' @param find character vector. Define what resources to find along the
#' navigation path(s) (see get_nldi_sources()$source). Can also include 'basin'
#' or 'flowline', which will return the upstream basin of the starting feature
219
220
221
#' or flowlines along the navigation respectively. The default is "flowlines". 
#' If you provide any other resource, AND want flowlines, then flowlines must 
#' be explicitly requested.
222
223
224
225
226
#' @param distance_km numeric. Define how far to look along the navigation path in
#' kilometers (default = 100)
#' @param no_sf if available, should `sf` be used for parsing,
#' defaults to `TRUE` if `sf` is locally installed
#' @return a list of data.frames if sf is not installed, a list of sf objects if it is
mike's avatar
mike committed
227
228
#' @export
#' @keywords nldi
Laura A DeCicco's avatar
Laura A DeCicco committed
229
#' @examplesIf is_dataRetrieval_user()
230
#' \donttest{
mike's avatar
mike committed
231
232
233
234
#' # Find Features / Define origin features
#'
#' ## Find feature by COMID
#'  findNLDI(comid = 101)
Laura A DeCicco's avatar
Laura A DeCicco committed
235
#'  
mike's avatar
mike committed
236
#' ## Find feature by NWIS ID
Laura A DeCicco's avatar
Laura A DeCicco committed
237
238
239
#'   findNLDI(nwis = '11120000')
#' 
#' ## Find feature by WQP ID 
mike's avatar
mike committed
240
241
242
243
244
245
246
247
248
249
250
#'  findNLDI(wqp = 'USGS-04024315')
#'
#' ## Find feature by LOCATION
#'  findNLDI(location = c(-115,40))
#'
#' ## GENERAL ORIGIN: COMID
#'  findNLDI(origin = list("comid" = 101))
#'
#' ## GENERAL ORIGIN: WaDE
#'  findNLDI(origin = list("wade" = 'CA_45206'))
#'
251
#' # Navigation (flowlines will be returned if find is unspecified)
mike's avatar
mike committed
252
#' # UPPER MAINSTEM of USGS-11120000
253
#'  findNLDI(nwis = '11120000', nav = "UM")
mike's avatar
mike committed
254
255
256
#'
#' # MULTI-REQUEST
#' # UPPER MAINSTEM and TRIBUTARY of USGS-11120000
257
#'  findNLDI(nwis = '11120000', nav = c("UT", "UM"))
mike's avatar
mike committed
258
#'
259
#' # Discover Features(flowlines will not be returned unless included in find)
mike's avatar
mike committed
260
261
#'
#' ## Find feature(s) on the upper tributary of USGS-11120000
mike's avatar
mike committed
262
#'  findNLDI(nwis = '11120000', nav = "UT", find = c("nwis", "wqp"))
mike's avatar
mike committed
263
#'
264
265
#' ## Find upstream basin boundary and  of USGS-11120000
#'  findNLDI(nwis = '11120000',  find = "basin")
mike's avatar
mike committed
266
267
#'
#' # Control Distance
268
269
#' ## Limit search to 50 km
#'  findNLDI(comid = 101, nav = "DM", find = c("nwis", "wqp", "flowlines"), distance_km = 50)
270
#'}
mike's avatar
mike committed
271
findNLDI <- function(comid = NULL,
272
273
274
275
276
277
278
279
280
                     nwis = NULL,
                     wqp = NULL,
                     huc12 = NULL,
                     location = NULL,
                     origin = NULL,
                     nav = NULL,
                     find = c("flowlines"),
                     distance_km = 100,
                     no_sf = FALSE) {
mike's avatar
mike committed
281
282
  # Should sf be used? Both no_sf and pkg.env must agree
  use_sf = all(pkg.env$local_sf, !no_sf)
283
  
mike's avatar
mike committed
284
  # Should the basin be identified?
mike's avatar
mike committed
285
  getBasin = ("basin" %in% find)
286
  
287
  # From the collection of possible origins, pick 1 and remove NULLS
mike's avatar
mike committed
288
289
290
291
292
293
294
295
296
297
  starter <- tc(c(
    list(
      comid = comid,
      nwis = nwis,
      wqp = wqp,
      huc12 = huc12,
      location = location
    ),
    origin
  ))
298
  
mike's avatar
mike committed
299
300
301
302
  # a single starting location must be given ...
  if (is.null(starter) | length(starter) > 1) {
    stop("Define a single starting point. Use `find` to identify other resources.")
  }
303
  
mike's avatar
mike committed
304
  # Ensure nav types are valid
305
  bad_nav <- !nav %in% c("UM", "UT", "DD", "DM")
306
  
mike's avatar
mike committed
307
  if (any(bad_nav)) {
308
    stop(nav[bad_nav], " not a valid navigation. Chose from: UM, UT, DD, DM")
mike's avatar
mike committed
309
  }
310
  
mike's avatar
mike committed
311
312
  # name of starter
  start_type = names(starter)
313
  
mike's avatar
mike committed
314
  # If location, ensure lng is first argument (hack for USA features)
mike's avatar
mike committed
315
  if (start_type == 'location') {
316
317
    if (any(grepl("sfc$|sf$", class(location))) & use_sf) {
      if (sf::st_geometry_type(location) != "POINT") {
318
319
        stop("Only POINT objects can be passed to location")
      }
320
      
321
322
      location  = sf::st_coordinates(location)
    } else {
323
324
325
      if (location[1] > 0) {
        stop("Provide location in the form c(lng,lat)")
      }
326
    }
327
328
    
    # Must convert location to COMID for tracing and discovery ...
329
    tmp_url <- paste0(
mike's avatar
mike committed
330
331
      pkg.env$nldi_base,
      "comid/position?coords=POINT%28",
332
333
334
335
      location[1] ,
      "%20",
      location[2] ,
      "%29"
mike's avatar
mike committed
336
    )
337
    
338
    tmp_return <- get_nldi(tmp_url, "feature", use_sf = use_sf)
339
    
mike's avatar
mike committed
340
    # Override starter with location based COMID
341
    starter <- list("comid" = tmp_return$identifier)
mike's avatar
mike committed
342
  }
343
  
mike's avatar
mike committed
344
  # Reset (if needed)
345
  start_type <- names(starter)
346
347
  
  if (is.null(pkg.env$current_nldi)) {
348
349
    pkg.env$current_nldi <- get_nldi_sources()
  }
350
  
mike's avatar
mike committed
351
  # Defining the origin URL.
352
353
  #  Align request with formal name from sources
  #  If NWIS, add "USGS-" prefix
mike's avatar
mike committed
354
  start_url = paste0(
355
356
357
358
    valid_ask(all = pkg.env$current_nldi, type = start_type)$good$features,
    "/",
    ifelse(start_type == "nwis", paste0("USGS-", starter), starter),
    "/"
mike's avatar
mike committed
359
  )
360
  
mike's avatar
mike committed
361
  # Makes sure that all requested features to `find` are valid and name-aligned
mike's avatar
mike committed
362
363
364
  if (!is.null(find)) {
    find = valid_ask(pkg.env$current_nldi, type = find)$good$source
  }
365
  
mike's avatar
mike committed
366
  # Set empty lists to store origin, navigation, features, and basin requests ...
367
  navigate <- features  <- list()
368
  
mike's avatar
mike committed
369
  # Build navigation URLs
mike's avatar
mike committed
370
  for (i in seq_along(nav)) {
371
    navigate[[nav[i]]] = paste0(start_url, "navigation/", nav[i])
mike's avatar
mike committed
372
  }
373
  
mike's avatar
mike committed
374
375
  # Build find URLs
  if (length(find) > 0) {
376
377
378
379
    features = lapply(navigate,
                      paste0,
                      paste0("/", find),
                      paste0("?distance=", distance_km))
mike's avatar
mike committed
380
  }
381
  
382
  names  <- unlist(lapply(nav, paste0, paste0("_", find)))
383
  
384
385
  search <- data.frame(
    url = unlist(c(start_url,
386
387
388
                   if (getBasin) {
                     paste0(start_url, "basin")
                   },
389
                   features)),
390
    
391
    type = c("feature",
392
393
394
395
396
397
398
             if (getBasin) {
               'basin'
             },
             ifelse(
               rep(find, length(nav)) == "flowlines", "nav", "feature"
             )),
    
399
    name = c("origin",
400
401
402
             if (getBasin) {
               'basin'
             },
403
404
             names[!names %in%  c("UM_", "UT_", "DD_", "DM_")])
  )
405
  
mike's avatar
mike committed
406
  # Send NLDI queries ...
407
  shp <- lapply(1:nrow(search), function(x) {
408
409
410
    get_nldi(url = search$url[x],
             type = search$type[x],
             use_sf = use_sf)
mike's avatar
mike committed
411
  })
412
  
mike's avatar
mike committed
413
  # Set the names of the parsed URL list
414
  names(shp) <- search$name
415
  
mike's avatar
mike committed
416
  # Clean up NWIS ids, trim NULLs, and return ...
417
  shp = tc(lapply(shp, clean_nwis_ids))
418
419
420
  
  # dont return list for one length elements
  if (length(shp) == 1) {
421
422
    shp = shp[[1]]
  }
423
  
424
  return(shp)
mike's avatar
mike committed
425
}