findNLDI.R 12.1 KB
Newer Older
mike's avatar
mike committed
1
#' @title Trim and Cull NULLs
mike's avatar
mike committed
2
3
4
5
#' @description Remove NULL arguments from a named list
#' @param x a list
#' @keywords nldi internal
#' @return a list
mike's avatar
mike committed
6
#' @noRd
mike's avatar
mike committed
7
8
9
10
11
12
13
14
15
16
17
18
19

tc <- function(x) {
  Filter(Negate(is.null), x)
}

#' @title Get current NLDI offerings
#' @description Used to query the current resources available through the NLDI
#' @return data.frame
#' @export
#' @keywords nldi
#' @importFrom httr RETRY content
#' @importFrom jsonlite fromJSON
#' @examples
mike's avatar
mike committed
20
#' get_nldi_sources()
mike's avatar
mike committed
21

mike's avatar
mike committed
22
get_nldi_sources <- function() {
mike's avatar
mike committed
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
  res <-
    httr::RETRY("GET",
                pkg.env$nldi_base,
                times = 3,
                pause_cap = 60)

  if (res$status_code == 200) {

    d <- httr::content(res, "text", encoding = "UTF8")

    d <- jsonlite::fromJSON(d, simplifyDataFrame = TRUE)

    return(d)
  } else {
    message("Error in: ",  url)
  }
}

#' @title Query NLDI
#' @description Queries the NLDI for a given URL. If local sf install is available the function returns a data.frame with the sfc geometry column listed. Such an object can be converted to sf with `sf::st_as_sf()`. If the object requested is a POINT object, the XY coordinates are added as columns. Otherwise the columns returned are "sourceName" and "identifier" for features, and "nhdplus_comid" for navigated paths.
#' @param url the URL to retrieve
#' @param type the type of data being returned (nav or feature)
#' @param use_sf should a local sf install be usedto parse data
#' @keywords nldi internal
mike's avatar
mike committed
47
#' @noRd
mike's avatar
mike committed
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#' @return a data.frame
#' @importFrom httr content RETRY
#' @importFrom jsonlite fromJSON
#' @examples
#' \dontrun{
#'  base = "https://labs.waterdata.usgs.gov/api/nldi/linked-data/"
#'  get_nldi(paste0(base, "comid/101"), type = "feature", use_sf = FALSE)
#'  get_nldi(paste0(base, "comid/101"), type = "feature", use_sf = TRUE)
#'  get_nldi(paste0(base, "nwissite/USGS-11120000"), type = "feature", use_sf = TRUE)
#'  get_nldi(paste0(base, "nwissite/USGS-11120000"), type = "feature", use_sf = TRUE)
#'  }

get_nldi = function(url, type = "", use_sf = FALSE){

  # The features names are different across features, navigation, and basin returns
  # This sets the environment for what to expect

  if(type == "nav"){
mike's avatar
mike committed
66
    good_name = c("nhdplus_comid")
mike's avatar
mike committed
67
  } else if(type == "feature") {
mike's avatar
mike committed
68
    good_name = c("sourceName", "identifier")
mike's avatar
mike committed
69
  } else {
mike's avatar
mike committed
70
    good_name = NULL
mike's avatar
mike committed
71
72
73
74
75
76
77
78
79
80
81
82
  }

  # Query
  res <- httr::RETRY("GET", url, times = 3, pause_cap = 60)

  # If successful ...
  if(res$status_code == 200){

    # Interpret as text
    d <- httr::content(res, "text", encoding = "UTF8")

    if(d == ""){
mike's avatar
mike committed
83
84
      message("No data returned for: ", url, call. = FALSE)
      return(NULL)
mike's avatar
mike committed
85
86
    }

mike's avatar
mike committed
87
    if(use_sf){
mike's avatar
mike committed
88

mike's avatar
mike committed
89
90
      #Parse with sf
       tmp <- sf::read_sf(d)
mike's avatar
mike committed
91

mike's avatar
mike committed
92
93
      # if of type POINT at the X,Y coordinates as columns
      if(sf::st_geometry_type(tmp)[1] =="POINT"){
mike's avatar
mike committed
94

mike's avatar
mike committed
95
        tmp$X = sf::st_coordinates(tmp)[,1]
mike's avatar
mike committed
96
        tmp$Y = sf::st_coordinates(tmp)[,2]
mike's avatar
mike committed
97

mike's avatar
mike committed
98
        tmp = tmp[,c(good_name, "X", "Y")]
mike's avatar
mike committed
99

mike's avatar
mike committed
100
101
102
      } else {
        # If line/polygon then keep geometry but don't expand
        tmp = tmp[,c(good_name, "geometry")]
mike's avatar
mike committed
103

mike's avatar
mike committed
104
105
106
      }
       # Returning as data.frame drops the geometry column ...
        return(data.frame(tmp))
mike's avatar
mike committed
107

mike's avatar
mike committed
108
    } else {
mike's avatar
mike committed
109

mike's avatar
mike committed
110
111
      # Parse as simplified JSON
      d <- jsonlite::fromJSON(d, simplifyDataFrame = TRUE)
mike's avatar
mike committed
112

mike's avatar
mike committed
113
114
      # if of type POINT at the X,Y coordinates as columns
      if(d$features$geometry$type[1] == "Point"){
mike's avatar
mike committed
115

mike's avatar
mike committed
116
        geom = d$features$geometry$coordinates
mike's avatar
mike committed
117

mike's avatar
mike committed
118
        tmp = cbind(d$features$properties[,good_name], do.call(rbind, geom))
mike's avatar
mike committed
119

mike's avatar
mike committed
120
        names(tmp) <- c(good_name, "X", "Y")
mike's avatar
mike committed
121

mike's avatar
mike committed
122
123
124
125
126
127
128
        return(tmp)

      } else {
        # If line/polygon then keep geometry but don't expand
        return(d$features$properties[,c(good_name)])
      }
    }
mike's avatar
mike committed
129
130

    } else {
mike's avatar
mike committed
131
      message("Error in: ",  url)
mike's avatar
mike committed
132
133
134
135
136
137
138
    }

}

#' Clean NWIS NLDI ids
#' @description The NWIS ids come as "USGS-XXXXXXXX". This is not suitable for passing to other package functions like readNWISdv.
#' This function strips the "USGS-" from these ids.
mike's avatar
mike committed
139
140
#' @param tmp a data.frame retrieved from get_nldi()
#' @return the input object with potentially modified identifiers
mike's avatar
mike committed
141
#' @keywords nldi internal
mike's avatar
mike committed
142
#' @noRd
mike's avatar
mike committed
143
144

clean_nwis_ids = function(tmp) {
mike's avatar
mike committed
145
  # If data.frame, and of type NWIS, then strip "USGS-" from identifiers
mike's avatar
mike committed
146
147
148
149
150
151
152
153
154
155
156
157
  if (class(tmp) == 'data.frame') {

    if (sum(tmp$sourceName[1] == "NWIS Sites") == 1) {

      tmp$identifier = gsub("USGS-", "", tmp$identifier)

    }
  }
  tmp
}

#' @title NLDI Validity Check
mike's avatar
mike committed
158
159
160
#' @description tests if NLDI feature is available. Is vectorized and works with partial string matching.
#' @param all a data.frame of available features (see get_nldi_sources)
#' @param type type(s) to check (character)
mike's avatar
mike committed
161
162
#' @return a list with good and bad entries
#' @keywords nldi internal
mike's avatar
mike committed
163
#' @noRd
mike's avatar
mike committed
164
165
#' @examples
#' \dontrun{
mike's avatar
mike committed
166
#' valid_ask(get_nldi_sources(), "nwis")
mike's avatar
mike committed
167
168
169
#' }

valid_ask = function(all, type){
mike's avatar
mike committed
170
171
  # those where the requested pattern is included in a nldi_source ...
    # means we will catch nwis - not just nwissite ...
mike's avatar
mike committed
172
173
174
175
176
177
178
179
180
181
    # means we will catch both wqp and WQP ...
  cond  <- grepl(paste0(tolower(type), collapse = "|"), tolower(all$source))

  cond2 <- grepl(paste0(tolower(all$source), collapse = "|"), tolower(type))

  list(good = all[cond,], bad = type[!cond2])

}

#' @title 	Retrieve features from the \href{https://labs.waterdata.usgs.gov/api/nldi/swagger-ui/index.html?configUrl=/api/nldi/v3/api-docs/swagger-config}{NLDI}
mike's avatar
mike committed
182
183
184
185
186
187
188
#' @description Provides a formal query to the
#' \href{https://labs.waterdata.usgs.gov/about-nldi/index.html}{Network Linked Data Index}.
#' The function is useful for topology and location based feature discovery.
#' A user must supply a starting feature, and can add optional navigation direction(s),
#' and features to identify on the navigated network.
#' Valid starting options can be given by one of the following arguments: comid, nwis, huc12,
#'  wqp, location, and start.
mike's avatar
mike committed
189
190
191
192
193
194
195
#' @param comid an NHDPlusV2 COMID
#' @param nwis  a USGS NWIS siteID
#' @param wqp a water quality point ID
#' @param huc12 a HUC12 ID
#' @param location Coordinate pair in WGS84 GCS provided as a numeric vector ordered lng/lat
#' @param origin a named list specifying a feature type and ID (e.g. list("comid" = 101))
#' @param nav where to navigate from the starting point ("UM", "UT", DM", "DD")
mike's avatar
mike committed
196
197
#' @param find what resources to find along the navigation path(s) (see get_nldi_sources()$source). Can also include 'basin', which will return the upstream basin of the starting feature
#' @param distance_km how far to look along the navigation path in kilometers (default = 100)
mike's avatar
mike committed
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
#' @param no_sf if available, should `sf` be used for parsing, defaults to `TRUE` if `sf` is locally installed
#' @return a list of data.frames
#' @export
#' @keywords nldi
#' @examples
#'
#' # Find Features / Define origin features
#'
#' ## Find feature by COMID
#'  findNLDI(comid = 101)
#'
#' ## Find feature by NWIS ID
#'  findNLDI(nwis = '11120000')
#'
#' ## Find feature by WQP ID
#'  findNLDI(wqp = 'USGS-04024315')
#'
#' ## Find feature by LOCATION
#'  findNLDI(location = c(-115,40))
#'
#' ## GENERAL ORIGIN: COMID
#'  findNLDI(origin = list("comid" = 101))
#'
#' ## GENERAL ORIGIN: WaDE
#'  findNLDI(origin = list("wade" = 'CA_45206'))
#'
#' # Navigation
#' # UPPER MAINSTEM of USGS-11120000
#'  str(findNLDI(nwis = '11120000', nav = "UM"), max.level = 1)
#'
#' # MULTI-REQUEST
#' # UPPER MAINSTEM and TRIBUTARY of USGS-11120000
#'  str(findNLDI(nwis = '11120000', nav = c("UT", "UM")), max.level = 1)
#'
#' # Discover Features
#'
#' ## Find feature(s) on the upper tributary of USGS-11120000
#'  str(findNLDI(nwis = '11120000', nav = "UT", find = c("nwis", "wqp")), max.level = 1)
#'
#' ## Find upstream basin boundary of USGS-11120000
#'  str(findNLDI(nwis = '11120000',  find = "basin"), max.level = 1)
#'
#' # Control Distance
#'
#' ## Limit search to 100 km
#'  str(findNLDI(comid = 101, nav = "DM", find = c("nwis", "wqp"), distance_km = 100), max.level = 1)
#'
#' ## Convert returned list of data.frames to list of spatial features
#' \donttest{
#'  nldi = findNLDI(nwis = '11120000', nav = "UT", find = c("nwis", "wqp"))
#'  str(lapply(nldi, sf::st_as_sf), max.level = 1)
#'  }

mike's avatar
mike committed
251
252

findNLDI <- function(comid = NULL,
mike's avatar
mike committed
253
254
255
256
257
258
259
                    nwis = NULL,
                    wqp = NULL,
                    huc12 = NULL,
                    location = NULL,
                    origin = NULL,
                    nav = NULL,
                    find = NULL,
mike's avatar
mike committed
260
                    distance_km = 100,
mike's avatar
mike committed
261
262
263
264
265
                    no_sf = FALSE) {

  # Should sf be used? Both no_sf and pkg.env must agree
  use_sf = all(pkg.env$local_sf, !no_sf)

mike's avatar
mike committed
266
  # Should the basin be identified?
mike's avatar
mike committed
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
  getBasin = ("basin" %in% find)

  # From the collection of all possible origins, pick 1 and remove NULLS
  starter <- tc(c(
    list(
      comid = comid,
      nwis = nwis,
      wqp = wqp,
      huc12 = huc12,
      location = location
    ),
    origin
  ))

  # a single starting location must be given ...
  if (is.null(starter) | length(starter) > 1) {
    stop("Define a single starting point. Use `find` to identify other resources.")
  }

  # Ensure nav types are valid
mike's avatar
mike committed
287
  bad_nav = !nav %in% c("UM", "UT", "DD", "DM")
mike's avatar
mike committed
288

mike's avatar
mike committed
289
290
  if (any(bad_nav)) {
    stop(nav[bad_nav], " not a valid navigation. Use one of: UM, UT, DD, DM")
mike's avatar
mike committed
291
292
293
294
295
  }

  # name of starter
  start_type = names(starter)

mike's avatar
mike committed
296
  # If location, ensure lng is first argument (hack for USA features)
mike's avatar
mike committed
297
298
299
300
301
  if (start_type == 'location') {
    if (location[1] > 0) {
      stop("Please provide location in the form c(lng,lat)")
    }

mike's avatar
mike committed
302
  # Must convert location to COMID for tracing and discovery ...
mike's avatar
mike committed
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
    tmp_url = paste0(
      pkg.env$nldi_base,
      "comid/position?coords=POINT%28",
      location[1] ,
      "%20",
      location[2] ,
      "%29"
    )

    tmp_return = get_nldi(tmp_url, "feature", use_sf = use_sf)

    # Override starter with location based COMID
    starter = list("comid" = tmp_return$identifier)

  }

  # Reset (if needed)
  start_type = names(starter)

  # Defining the origin URL.
mike's avatar
mike committed
323
    #  Align request with formal name from offerings
mike's avatar
mike committed
324
325
326
327
328
329
330
331
    # If NWIS, add "USGS-" prefix
  start_url = paste0(
    valid_ask(pkg.env$current_nldi, type = start_type)$good$feature,
    "/",
    ifelse(start_type == "nwis", paste0("USGS-", starter), starter),
    "/"
  )

mike's avatar
mike committed
332
  # Makes sure that all requested features to `find` are valid and name-aligned
mike's avatar
mike committed
333
334
335
336
  if (!is.null(find)) {
    find = valid_ask(pkg.env$current_nldi, type = find)$good$source
  }

mike's avatar
mike committed
337
  # Set empty lists to store origin, navigation, features, and basin requests ...
mike's avatar
mike committed
338
339
340
341
342
  start <- navigate <- features <- basin <- list()

  # Set origin url
  start[["start"]] <- start_url

mike's avatar
mike committed
343
  # Build navigation URLs
mike's avatar
mike committed
344
  for (i in seq_along(nav)) {
mike's avatar
mike committed
345
      navigate[[nav[i]]] = paste0(start_url, "navigation/", nav[i])
mike's avatar
mike committed
346
347
348
349
350
351
352
353
354
355
356
357
  }

  # Build basin URL
  if (getBasin) {
    basin[["basin"]] = paste0(start_url, "basin")
  }

  # Build find URLs
  if (length(find) > 0) {
    features = lapply(navigate, paste0, paste0("/", find))
  }

mike's avatar
mike committed
358
359
  # Add distance constraints to features
  features = lapply(features, paste0, paste0("?distance=", distance_km))
mike's avatar
mike committed
360

mike's avatar
mike committed
361
362
  # Add distance constraints to navigations flowpaths
  navigate = lapply(navigate, paste0, paste0("/flowlines?distance=", distance_km))
mike's avatar
mike committed
363
364
365
366

  # combine, unlist, relist
  ll = as.list(unlist(c(start, basin, navigate, features)))

mike's avatar
mike committed
367
368
  # define the type of each URL (feature, basin, or nav)
  # This is needed as the attribute names for each varies
mike's avatar
mike committed
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
  types = c("feature",
            if (getBasin) { 'basin' },
            rep("nav", length(nav)),
            rep("feature", length(find) * length(nav)))

  # Send NLDI queries ...
  shp = lapply(seq_along(ll), function(x) {
    get_nldi(ll[[x]], type = types[x], use_sf = use_sf)
  })

  # Remove basin from find list ...
  find = find[find != 'basin']

  # if no features (aside from basin) were requested then names are NULL
  # else, the names are the combination of the navigation direction
mike's avatar
mike committed
384
  # and the feature discovered ...
mike's avatar
mike committed
385
386
387
388
389
390
391
392

  feats = if (is.null(find) | length(find) == 0) {
    NULL
  } else {
    unlist(lapply(nav, paste0, paste0("_", find)))
  }

  # Set the names of the parsed URL list
mike's avatar
mike committed
393
394
395
396
  names(shp) = c("origin",
                 if (getBasin){'basin'},
                 nav,
                 feats)
mike's avatar
mike committed
397

mike's avatar
mike committed
398
399
  # Clean up NWIS ids, trim NULLs, and return ...
  tc(lapply(shp, clean_nwis_ids))
mike's avatar
mike committed
400
}
mike's avatar
mike committed
401