Unverified Commit b40130e2 authored by Laura A DeCicco's avatar Laura A DeCicco Committed by GitHub
Browse files

Merge pull request #577 from mikejohnson51/master

Updates for NLDI changes
parents 23dbe84f b5ef0976
......@@ -9,6 +9,26 @@ tc <- function(x) {
Filter(Negate(is.null), x)
}
#' @title Find Good Names
#' @description minimize names of returned features
#' @param input data returned from NLDI
#' @param type type of return
#' @keywords nldi internal
#' @return a list
#' @noRd
find_good_names = function(input, type) {
# The features names are different across features, navigation, and basin returns
# This sets the environment for what to expect
if (type == "nav") {
grep("comid", names(input), value = TRUE)
} else if (type == "feature") {
c("sourceName", "identifier", "comid")
} else {
NULL
}
}
#' @title Get current NLDI offerings
#' @description Used to query the current resources available through the NLDI
#' @return data.frame
......@@ -26,14 +46,12 @@ get_nldi_sources <- function() {
pkg.env$nldi_base,
times = 3,
pause_cap = 60)
if (res$status_code == 200) {
d <- httr::content(res, "text", encoding = "UTF8")
d <- jsonlite::fromJSON(d, simplifyDataFrame = TRUE)
return(d)
jsonlite::fromJSON(httr::content(res, "text",
encoding = "UTF8"),
simplifyDataFrame = TRUE)
} else {
message("Error in: ", url)
}
......@@ -61,84 +79,72 @@ get_nldi_sources <- function() {
#' get_nldi(url = paste0(base, "nwissite/USGS-11120000"), type = "feature", use_sf = TRUE)
#' get_nldi(paste0(base, "nwissite/USGS-11120000"), type = "feature", use_sf = TRUE)
#' }
get_nldi = function(url, type = "", use_sf = FALSE){
# The features names are different across features, navigation, and basin returns
# This sets the environment for what to expect
if(type == "nav"){
good_name = c("nhdplus_comid")
} else if(type == "feature") {
good_name = c("sourceName", "identifier", "comid")
} else {
good_name = NULL
}
get_nldi = function(url, type = "", use_sf = FALSE) {
# Query
res <- httr::RETRY("GET", url, times = 3, pause_cap = 60)
# If successful ...
if(res$status_code == 200){
if (res$status_code == 200) {
# Interpret as text
d <- httr::content(res, "text", encoding = "UTF8")
if(d == ""){
if (d == "") {
message("No data returned for: ", url, call. = FALSE)
return(NULL)
}
if(use_sf){
if (use_sf) {
#Parse with sf
tmp <- sf::read_sf(d)
tmp <- sf::read_sf(d)
good_name = find_good_names(tmp, type)
# if of type POINT at the X,Y coordinates as columns
if(sf::st_geometry_type(tmp)[1] =="POINT"){
tmp$X <- sf::st_coordinates(tmp)[,1]
tmp$Y <- sf::st_coordinates(tmp)[,2]
tmp <- tmp[,c(good_name, "X", "Y")]
if (sf::st_geometry_type(tmp)[1] == "POINT") {
tmp$X <- sf::st_coordinates(tmp)[, 1]
tmp$Y <- sf::st_coordinates(tmp)[, 2]
tmp <- tmp[, c(good_name, "X", "Y")]
} else {
# If line/polygon then keep geometry but don't expand
tmp <- tmp[,c(good_name, attr(tmp, "sf_column"))]
tmp <- tmp[, c(good_name, attr(tmp, "sf_column"))]
}
# Returning as data.frame drops the geometry column ...
return(tmp)
# Returning as data.frame drops the geometry column ...
return(tmp)
} else {
# Parse as simplified JSON
d <- jsonlite::fromJSON(d, simplifyDataFrame = TRUE)
input <- d$features$properties
good_name = find_good_names(input, type)
# if of type POINT at the X,Y coordinates as columns
if(d$features$geometry$type[1] == "Point"){
if (d$features$geometry$type[1] == "Point") {
geom = d$features$geometry$coordinates
tmp = cbind(d$features$properties[,good_name], do.call(rbind, geom))
tmp = cbind(input[, good_name], do.call(rbind, geom))
names(tmp) <- c(good_name, "X", "Y")
return(tmp)
} else {
# If line/polygon then keep geometry but don't expand
return(d$features$properties[,c(good_name)])
return(input[, good_name])
}
}
} else {
message("Error in: ", url)
}
} else {
message("Error in: ", url)
}
}
#' Clean NWIS NLDI ids
#' @description The NWIS ids come as "USGS-XXXXXXXX". This is not suitable for passing to other package functions like readNWISdv.
#' @description The NWIS ids come as "USGS-XXXXXXXX". This is not suitable for
#' passing to other package functions like readNWISdv.
#' This function strips the "USGS-" from these ids.
#' @param tmp a data.frame retrieved from get_nldi()
#' @return the input object with potentially modified identifiers
......@@ -147,12 +153,10 @@ get_nldi = function(url, type = "", use_sf = FALSE){
clean_nwis_ids = function(tmp) {
# If data.frame, and of type NWIS, then strip "USGS-" from identifiers
if (is.data.frame(tmp)) {
if ("sourceName" %in% names(tmp) &&
tmp$sourceName[1] == "NWIS Sites") {
tmp$identifier = gsub("USGS-", "", tmp$identifier)
}
}
tmp
......@@ -169,20 +173,23 @@ clean_nwis_ids = function(tmp) {
#' \donttest{
#' valid_ask(all = get_nldi_sources(), "nwis")
#' }
valid_ask = function(all, type){
valid_ask = function(all, type) {
# those where the requested pattern is included in a nldi_source ...
# means we will catch nwis - not just nwissite ...
# means we will catch both wqp and WQP ...
# means we will catch nwis - not just nwissite ...
# means we will catch both wqp and WQP ...
### WOW! This is hacky and will hopefully be unneeded latter on....
all = rbind(all, c("flowlines", "NHDPlus", NA))
cond <- grepl(paste0(tolower(type), collapse = "|"), tolower(c(all$source)))
cond2 <- grepl(paste0(tolower(all$source), collapse = "|"), tolower(c(all$source)))
list(good = all[cond,], bad = type[!cond2])
type = ifelse(type == "nwis", "nwissite", type)
all = rbind(all, c("flowlines", "NHDPlus comid", NA))
good <-
grepl(paste0(tolower(type), collapse = "|"), tolower(c(all$source)))
bad <-
grepl(paste0(tolower(all$source), collapse = "|"), tolower(c(all$source)))
list(good = all[good, ], bad = type[!bad])
}
#' @title R Client for the Network Linked Data Index
......@@ -194,7 +201,7 @@ valid_ask = function(all, type){
#' navigated paths. Valid starting options can be given by one of the following
#' arguments: comid, nwis, huc12, wqp, location, and start.
#' @param comid numeric or character. An NHDPlusV2 COMID
#' @param nwis numeric or character. A USGS NWIS siteID
#' @param nwis numeric or character. A USGS NWIS surface water siteID
#' @param wqp numeric or character. A water quality point ID
#' @param huc12 numeric or character. A WBD HUC12 unit ID
#' @param location numeric vector. Coordinate pair in WGS84
......@@ -208,7 +215,9 @@ valid_ask = function(all, type){
#' @param find character vector. Define what resources to find along the
#' navigation path(s) (see get_nldi_sources()$source). Can also include 'basin'
#' or 'flowline', which will return the upstream basin of the starting feature
#' or flowlines along the navigation respectively. The default is "flowlines". If you provide any other resource, AND want flowlines, then flowlines must be explicitly requested.
#' or flowlines along the navigation respectively. The default is "flowlines".
#' If you provide any other resource, AND want flowlines, then flowlines must
#' be explicitly requested.
#' @param distance_km numeric. Define how far to look along the navigation path in
#' kilometers (default = 100)
#' @param no_sf if available, should `sf` be used for parsing,
......@@ -268,13 +277,12 @@ findNLDI <- function(comid = NULL,
find = c("flowlines"),
distance_km = 100,
no_sf = FALSE) {
# Should sf be used? Both no_sf and pkg.env must agree
use_sf = all(pkg.env$local_sf, !no_sf)
# Should the basin be identified?
getBasin = ("basin" %in% find)
# From the collection of possible origins, pick 1 and remove NULLS
starter <- tc(c(
list(
......@@ -286,113 +294,131 @@ findNLDI <- function(comid = NULL,
),
origin
))
# a single starting location must be given ...
if (is.null(starter) | length(starter) > 1) {
stop("Define a single starting point. Use `find` to identify other resources.")
}
# Ensure nav types are valid
bad_nav <- !nav %in% c("UM", "UT", "DD", "DM")
if (any(bad_nav)) {
stop(nav[bad_nav], " not a valid navigation. Chose from: UM, UT, DD, DM")
}
# name of starter
start_type = names(starter)
# If location, ensure lng is first argument (hack for USA features)
if (start_type == 'location') {
if(any(grepl("sfc$|sf$", class(location))) & use_sf ) {
if(sf::st_geometry_type(location) != "POINT"){
if (any(grepl("sfc$|sf$", class(location))) & use_sf) {
if (sf::st_geometry_type(location) != "POINT") {
stop("Only POINT objects can be passed to location")
}
location = sf::st_coordinates(location)
} else {
if (location[1] > 0) { stop("Provide location in the form c(lng,lat)") }
if (location[1] > 0) {
stop("Provide location in the form c(lng,lat)")
}
}
# Must convert location to COMID for tracing and discovery ...
# Must convert location to COMID for tracing and discovery ...
tmp_url <- paste0(
pkg.env$nldi_base,
"comid/position?coords=POINT%28",
location[1] , "%20", location[2] , "%29"
location[1] ,
"%20",
location[2] ,
"%29"
)
tmp_return <- get_nldi(tmp_url, "feature", use_sf = use_sf)
# Override starter with location based COMID
starter <- list("comid" = tmp_return$identifier)
}
# Reset (if needed)
start_type <- names(starter)
if(is.null(pkg.env$current_nldi)) {
if (is.null(pkg.env$current_nldi)) {
pkg.env$current_nldi <- get_nldi_sources()
}
# Defining the origin URL.
# Align request with formal name from sources
# If NWIS, add "USGS-" prefix
# Align request with formal name from sources
# If NWIS, add "USGS-" prefix
start_url = paste0(
valid_ask(pkg.env$current_nldi, type = start_type)$good$features, "/",
ifelse(start_type == "nwis", paste0("USGS-", starter), starter), "/"
valid_ask(all = pkg.env$current_nldi, type = start_type)$good$features,
"/",
ifelse(start_type == "nwis", paste0("USGS-", starter), starter),
"/"
)
# Makes sure that all requested features to `find` are valid and name-aligned
if (!is.null(find)) {
find = valid_ask(pkg.env$current_nldi, type = find)$good$source
}
# Set empty lists to store origin, navigation, features, and basin requests ...
navigate <- features <- list()
# Build navigation URLs
for (i in seq_along(nav)) {
navigate[[nav[i]]] = paste0(start_url, "navigation/", nav[i])
navigate[[nav[i]]] = paste0(start_url, "navigation/", nav[i])
}
# Build find URLs
if (length(find) > 0) {
features = lapply(navigate, paste0, paste0("/", find), paste0("?distance=", distance_km))
features = lapply(navigate,
paste0,
paste0("/", find),
paste0("?distance=", distance_km))
}
names <- unlist(lapply(nav, paste0, paste0("_", find)))
search <- data.frame(
url = unlist(c(start_url,
if (getBasin) { paste0(start_url, "basin") },
if (getBasin) {
paste0(start_url, "basin")
},
features)),
type = c("feature",
if(getBasin) { 'basin' },
ifelse(rep(find, length(nav)) == "flowlines", "nav", "feature")),
if (getBasin) {
'basin'
},
ifelse(
rep(find, length(nav)) == "flowlines", "nav", "feature"
)),
name = c("origin",
if (getBasin) { 'basin' },
if (getBasin) {
'basin'
},
names[!names %in% c("UM_", "UT_", "DD_", "DM_")])
)
# Send NLDI queries ...
shp <- lapply(1:nrow(search), function(x) {
get_nldi(search$url[x], type = search$type[x], use_sf = use_sf)
get_nldi(url = search$url[x],
type = search$type[x],
use_sf = use_sf)
})
# Set the names of the parsed URL list
names(shp) <- search$name
# Clean up NWIS ids, trim NULLs, and return ...
shp = tc(lapply(shp, clean_nwis_ids))
if(length(shp) == 1){
# dont return list for one length elements
if (length(shp) == 1) {
shp = shp[[1]]
}
return(shp)
}
......@@ -20,7 +20,7 @@ findNLDI(
\arguments{
\item{comid}{numeric or character. An NHDPlusV2 COMID}
\item{nwis}{numeric or character. A USGS NWIS siteID}
\item{nwis}{numeric or character. A USGS NWIS surface water siteID}
\item{wqp}{numeric or character. A water quality point ID}
......@@ -40,7 +40,9 @@ one or more of the abbreviations ("UM", "UT", DM", "DD").}
\item{find}{character vector. Define what resources to find along the
navigation path(s) (see get_nldi_sources()$source). Can also include 'basin'
or 'flowline', which will return the upstream basin of the starting feature
or flowlines along the navigation respectively. The default is "flowlines". If you provide any other resource, AND want flowlines, then flowlines must be explicitly requested.}
or flowlines along the navigation respectively. The default is "flowlines".
If you provide any other resource, AND want flowlines, then flowlines must
be explicitly requested.}
\item{distance_km}{numeric. Define how far to look along the navigation path in
kilometers (default = 100)}
......
......@@ -19,7 +19,7 @@ test_that("NLDI starting sources...", {
# COMID
expect_equal(findNLDI(comid = 101)$sourceName, "NHDPlus comid")
# NWIS
expect_equal(findNLDI(nwis = '11120000')$sourceName, "NWIS Sites")
expect_equal(findNLDI(nwis = '11120000')$sourceName, "NWIS Surface Water Sites")
# WQP
expect_equal(findNLDI(wqp = 'USGS-04024315')$sourceName, "Water Quality Portal")
# LOCATION
......
......@@ -66,7 +66,7 @@ Each of these is discussed below using the following packages:
```{r}
library(dplyr) # Data frame manipulation
library(ggplot2) # Plotting
library(gridExtra) # Arranging plots
library(patchwork) # Arranging plots
library(dataRetrieval) # The star of the show!
```
......@@ -74,7 +74,7 @@ library(dataRetrieval) # The star of the show!
# What's available?
First, we need to know what features are indexed in the NLDI. The most current offerings can be found using `get_nldi_sources`, and new features are regularly added. At the time of writing (`r Sys.Date()`), `r nrow(get_nldi_sources())` datasets have been indexed to the NHDPlus and cataloged in the NLDI.
First, we need to know what features are indexed in the NLDI. The most current offerings can be found using `get_nldi_sources`, and new features are regularly added. At the time of writing (`r Sys.Date()`), `r nrow(get_nldi_sources())` data sets have been indexed to the NHDPlus and cataloged in the NLDI.
```{r, eval = FALSE}
get_nldi_sources()
......@@ -92,13 +92,13 @@ Features can be requested in two primary ways: Using the the native data set ide
### By Identifier
As an illustrative example, NHDPlus features can be requested by their COMID from the NHDPlusV2 dataset.
As an illustrative example, NHDPlus features can be requested by their COMID from the NHDPlusV2 data set.
```{r message=FALSE}
findNLDI(comid = 101)
```
The returned simple features object contains the native dataset identifier ("identifier"), sourceName of the native dataset, and the indexed NHD COMID (in this case a duplicate since an NHD feature was requested). In the example above, we see the geometry column is of type `LINESTRING`. To keep `dataRetrieval` lightweight, the [`sf`] (https://r-spatial.github.io/sf/articles/sf1.html) package is not a dependency. Instead, if `sf` is not installed - or `no_sf = TRUE` - only the _sourceName_, _comid_, and _identifier_ will be returned.
The returned simple features object contains the native data set identifier ("identifier"), sourceName of the native data set, and the indexed NHD COMID (in this case a duplicate since an NHD feature was requested). In the example above, we see the geometry column is of type `LINESTRING`. To keep `dataRetrieval` lightweight, the [`sf`](https://r-spatial.github.io/sf/articles/sf1.html) package is not a dependency. Instead, if `sf` is not installed - or `no_sf = TRUE` - only the _sourceName_, _comid_, and _identifier_ will be returned.
```{r}
findNLDI(comid = 101, no_sf = TRUE)
......@@ -106,7 +106,7 @@ findNLDI(comid = 101, no_sf = TRUE)
***
To provide another example, we can request the NLDI representation of USGS NWIS gage [11120000](https://waterdata.usgs.gov/monitoring-location/11120000) in both a `sf` and "non-sf" way. Features indexed to the NHDPlus are returned as `POINT` objects. If `sf` is enabled, the _sourceName_, _identifier_, _X_, _Y_ and _geometry_ (`sfc`) are returned. If `sf` is not available, the _geometry_ is dropped but the _X_ and _Y_ values are retained.
To provide another example, we can request the NLDI representation of USGS NWIS gauge [11120000](https://waterdata.usgs.gov/monitoring-location/11120000) in both a `sf` and "non-sf" way. Features indexed to the NHDPlus are returned as `POINT` objects. If `sf` is enabled, the _sourceName_, _identifier_, _X_, _Y_ and _geometry_ (`sfc`) are returned. If `sf` is not available, the _geometry_ is dropped but the _X_ and _Y_ values are retained.
```{r}
# local sf installation
......@@ -118,7 +118,7 @@ findNLDI(nwis = "11120000", no_sf = TRUE)
****
Any NLDI feature found with `get_nldi_source` can be requested by passing a `type`/`ID` pair as a list to the `origin` argument. This will allow the networking capabilities offered in `dataRetrieval` to grow naturally with the NLDI itself. For example, we can use the origin argument to request features that dont offer a specific parameter.
Any NLDI feature found with `get_nldi_source` can be requested by passing a `type`/`ID` pair as a list to the `origin` argument. This will allow the networking capabilities offered in `dataRetrieval` to grow naturally with the NLDI itself. For example, we can use the origin argument to request features that don't offer a specific parameter.
```{r}
# Water Data Exchange 2.0 Site CA_45206
......@@ -140,7 +140,7 @@ findNLDI(location = ucsb)
# Navigation
From any feature (`comid`, `huc12`, `nwis`, `wqp`,`origin`) or `location`, four modes of navigation are available and include:
From any feature (`comid`, `huc12`, `nwis`, `wqp`, `origin`) or `location`, four modes of navigation are available and include:
(1) **UT**: Upper Tributary
(2) **UM**: Upper Mainstream
......@@ -178,44 +178,55 @@ ggplot() +
One or more modes of navigation can be supplied to the `nav` argument. For example we can ask to navigate along the upper mainstem (UM) from COMID 101.
```{r}
summary(findNLDI(comid = 101, nav = "UM"), max.level = 1)
summarize.nldi = function(input){
data.frame(name = names(input),
class = sapply(input, class)[1],
row.names = NULL) %>%
mutate(feature_count = ifelse(class == "sf", sapply(input, nrow),
sapply(input, length)))
}
findNLDI(comid = 101, nav = "UM") %>%
summarize.nldi()
```
Or along the upper mainstem (UM) _and_ upper tributary (UT) of COMID 101.
```{r}
summary(findNLDI(comid = 101, nav = c("UM", "UT")))
findNLDI(comid = 101, nav = c("UM", "UT")) %>%
summarize.nldi()
```
In both cases the returned named list includes the origin and the flowlines along the requested navigation. If `sf` is not enabled, the returned object for a flowpath navigation is a vector of COMIDs.
```{r}
summary(findNLDI(comid = 101, nav = c("UM", "DM"), no_sf = TRUE))
findNLDI(comid = 101, nav = c("UM", "DM"), no_sf = TRUE) %>%
summarize.nldi()
```
# Searching along the Navigation
Like the gas station example, any of the features listed in `get_nldi_sources` can be searched for along the network, for example, we can find all NWIS gages, on the upper tributary, of COMID 101.
Like the gas station example, any of the features listed in `get_nldi_sources` can be searched for along the network, for example, we can find all NWIS gauges, on the upper tributary, of COMID 101.
```{r}
summary(findNLDI(comid = 101,
nav = "UT",
find = "nwis"))
findNLDI(comid = 101, nav = "UT", find = "nwis") %>%
summarize.nldi()
```
Of course, more than one resource can be requested, for example, lets replicate the previous search, this time adding Water Quality Points to the returned list:
```{r}
summary(findNLDI(comid = 101, nav = "UT", find = c("nwis", "wqp")))
findNLDI(comid = 101, nav = "UT", find = c("nwis", "wqp")) %>%
summarize.nldi()
```
Note that flowlines are no longer the default return for navigation once a new feature is requested. To retain flowlines, the must be explicitly requested.
```{r}
summary(findNLDI(comid = 101,
nav = "UT",
find = c("nwis", "flowlines")))
findNLDI(comid = 101, nav = "UT", find = c("nwis", "flowlines")) %>%
summarize.nldi()
```
### Upstream Basin Boundary
......@@ -224,12 +235,14 @@ The Upstream Basin Boundary is a unique object that can be found for any feature
```{r}
# with sf
summary(findNLDI(comid = 101, find = "basin"))
findNLDI(comid = 101, find = "basin") %>%
summarize.nldi()
```
```{r}
# No sf
summary(findNLDI(comid = 101, find = "basin", no_sf = TRUE))
findNLDI(comid = 101, find = "basin", no_sf = TRUE) %>%
summarize.nldi()
```
### Distance Constraints
......@@ -238,20 +251,19 @@ In some cases, particularly for DM and DD navigation, the network can extend for
```{r}
# Default 100 km
dist100 = findNLDI(comid = 101, nav = "DM", find = c("nwis", "wqp"))
paste(nrow(dist100$DM_nwissite), "NWIS sites w/in 100 km")
paste(nrow(dist100$DM_WQP), "WQP sites w/in 100 km")
findNLDI(comid = 101, nav = "DM", find = c("nwis", "wqp")) %>%
summarize.nldi()
# Extended 200 km search
dist200 = findNLDI(comid = 101, nav = "DM", find = c("nwis", "wqp"), distance_km = 200)
paste(nrow(dist200$DM_nwissite), "NWIS sites w/in 200 km")
paste(nrow(dist200$DM_WQP), "WQP sites w/in 200 km")
findNLDI(comid = 101, nav = "DM", find = c("nwis", "wqp"), distance_km = 200) %>%
summarize.nldi()
```
# Basic `dataRetrieval` integration
Last, as this functionality is being added to the `dataRetrieval` package, lets see a basic example of how the NLDI tools provide a discovery mechanism for working with the `dataRetrieval` tools. Here we will take a location that is near [Fountain Creek in Colorado Springs, Colorado](https://www.google.com/maps/place/Colorado+Springs,+CO/@38.7864572,-104.7829507,17z).
In this example we will use that location as the origin, navigate upstream along the mainstem, search for NWIS gages, and use the identified siteIDs to query streamflow records from January 1^st^, 2020 to the current day.
In this example we will use that location as the origin, navigate upstream along the mainstem, search for NWIS gauges, and use the identified siteIDs to query streamflow records from January 1^st^, 2020 to the current day.
```{r, fig.cap = "Integrating NLDI and NWIS dataRetrieval tools"}
# Upstream nwis, flowlines, and basin
......@@ -259,34 +271,46 @@ fountainCreek <- findNLDI(location = c(-104.780837, 38.786796),