Newer
Older
---
title: "GFv2 Get Data"
output: html_document
---
This notebook pulls data from a number of sources and populates the GFv2 data directory. Any new data requirements should be added as code chunks here.
Each code chunk should create a path to the file you want to use in a process step, check if that path exists, and put the data there if it does not. All paths are stored in a list that is saved to the `cache` directory. If changes are made to the output of this notebook, they should be checked in.
**If resources from ScienceBase need to be downloaded Rmarkdown document should be run from RStudio so username and password authentication will work**
```{r}

Blodgett, David L.
committed
library(sbtools)
library(nhdplusTools)
library(tidyr)
library(dplyr)
if(!dir.exists("data")) {dir.create("data")}
if(!dir.exists("bin")) {dir.create("bin")}

Blodgett, David L.
committed
data_dir <- "data"
out_list <- list("data_dir" = data_dir)
out_file <- file.path("cache", "data_paths.json")
sevenz <- "7z"
check_7z <- try(nhdplusTools:::check7z(), silent = TRUE)
if(is(check_7z, "try-error")) {
message("trying to download 7z -- it's not on your path")
# Download command-line Z-zip
if(!file.exists("bin/7za.exe")){
download.file("https://www.7-zip.org/a/7za920.zip", destfile = "bin/7za920.zip")
unzip("bin/7za920.zip", exdir = "bin")
}
sevenz <- "bin/7za.exe"
}
```{r HUC12 outlets}
# HUC12 outlets - derived by Dave Blodgetts code (in progress); currently hosted at GFv2 POI Source data page
hu12_points_path <- file.path(data_dir, "hu_outlets.gpkg")

Blodgett, David L.
committed
if(!file.exists(hu12_points_path)) {
if(is.null(sbtools::current_session()))
sb <- authenticate_sb()
sbtools::item_file_download("5dbc53d4e4b06957974eddae", names = "hu_outlets.gpkg",

Blodgett, David L.
committed
destinations = hu12_points_path, session = sb)
}
out_list <- c(out_list, list(hu12_points_path = hu12_points_path))
```
```{r GagesIII}
# GagesIII - data release in progress by Chase and others, currently hosted at GFv2 POI Source data page

Blodgett, David L.
committed
gagesiii_points_path <- file.path(data_dir, "GAGESIII_gages")
if(!file.exists(gagesiii_points_path)) {
dir.create(gagesiii_points_path, recursive = TRUE, showWarnings = FALSE)
if(is.null(sbtools::current_session()))
authenticate_sb()
g3z <- "GAGESIII_gages.zip"
sbtools::item_file_download("5dbc53d4e4b06957974eddae", names = g3z,
destinations = file.path(gagesiii_points_path, g3z))
unzip(file.path(gagesiii_points_path, g3z), exdir = gagesiii_points_path)
rm(g3z)
g3gf <- "gages3GFinfo.dbf"
sbtools::item_file_download("5dcd5f96e4b069579760aedb", names = g3gf,
destinations = file.path(data_dir, g3gf))

Blodgett, David L.
committed
}
out_list <- c(out_list, list(gagesiii_points_path = gagesiii_points_path))
```
```{r Thermoelectric Facilities}
# Thermoelectric plants - data compiled by Melissa Harris and Amy Galanter (multiple sources),
# Currently hosted at GFv2 POI Source data page
#-----------------------------------------------------
# Harris, Melissa A. and Diehl, Timothy H., 2017. A Comparison of Three Federal Datasets for Thermoelectric Water
# Withdrawals in the United States for 2010. Journal of the American Water Resources Association
# (JAWRA) 53( 5): 1062– 1080. https://doi.org/10.1111/1752-1688.12551
TE_points_path <- file.path(data_dir, "TE_points")
if(!file.exists(TE_points_path)) {
dir.create(TE_points_path, recursive = TRUE, showWarnings = FALSE)
if(is.null(sbtools::current_session()))
authenticate_sb()
TEz <- "2015_TE_Model_Estimates_lat.long_COMIDs.7z"
sbtools::item_file_download("5dbc53d4e4b06957974eddae", names = TEz,
destinations = file.path(TE_points_path, TEz))
system(paste0(sevenz, " e -o", TE_points_path, " ", file.path(TE_points_path, TEz)))
rm(TEz)
}
out_list <- c(out_list, list(TE_points_path = TE_points_path))
```
```{r Interbasin transfers and diversions - USGS}
# USGS IT and diversion points on NHDPlus v2 network -
#------------------------------------------------------
# Schwarz, G.E., 2019, E2NHDPlusV2_us: Database of Ancillary Hydrologic Attributes and Modified Routing for
# NHDPlus Version 2.1 Flowlines: U.S. Geological Survey data release, https://doi.org/10.5066/P986KZEM.
USGS_IT_path <- file.path(data_dir, "USGS_IT")
if(!file.exists(USGS_IT_path)) {
dir.create(USGS_IT_path, recursive = TRUE, showWarnings = FALSE)
if(is.null(sbtools::current_session()))
authenticate_sb()
ITzip <- "supplemental_files.zip"
sbtools::item_file_download("5d16509ee4b0941bde5d8ffe", names = ITzip,
destinations = file.path(USGS_IT_path, ITzip))
unzip(file.path(USGS_IT_path, ITzip), exdir = USGS_IT_path)
}
out_list <- c(out_list, list(USGS_IT_path = USGS_IT_path))
```
```{r Interbasin transfers - KSU}
# KSU IT Facilities - data compiled by Kerim Dickson (Kansas State University)
#-----------------------------------------------------------------------------
# Dickson, Kerim E. and Dzombak, David A., 2017. Inventory of Interbasin Transfers in the United States.
# Journal of the # American Water Resources Association (JAWRA) 53( 5): 1121‐ 1132.
# https://doi.org/10.1111/1752-1688.12561
KSU_IT_path <- file.path(data_dir, "KSU_IT")
if(!file.exists(KSU_IT_path)) {
dir.create(KSU_IT_path, recursive = TRUE, showWarnings = FALSE)
if(is.null(sbtools::current_session()))
authenticate_sb()
IT <- "IBT_Database.xlsx"
sbtools::item_file_download("5dbc53d4e4b06957974eddae", names = IT,
destinations = file.path(KSU_IT_path, IT))
}
out_list <- c(out_list, list(KSU_IT_path = KSU_IT_path))
```
```{r National Inventory of Dams}
# National Inventory of Dams data coupled with NHDPlus COMIDs
#-----------------------------------------------------------------------------
# Wieczorek, M.E., Jackson, S.E., and Schwarz, G.E., 2018, Select Attributes for NHDPlus Version 2.1 Reach Catchments
# and Modified Network Routed Upstream Watersheds for the Conterminous United States (ver. 2.0, November 2019):
# U.S. Geological Survey data release, https://doi.org/10.5066/F7765D7V.
#
# https://nid.sec.usace.army.mil/ords/f?p=105%3A1%3A%3A%3A%3A%3A%3A
NID_points_path <- file.path(data_dir, "NID_points")
if(!file.exists(NID_points_path)) {
dir.create(NID_points_path, recursive = TRUE, showWarnings = FALSE)
if(is.null(sbtools::current_session()))
authenticate_sb()
NIDtxt <- "NID_attributes_20170612.txt"
sbtools::item_file_download("5dbc53d4e4b06957974eddae", names = NIDtxt,
destinations = file.path(NID_points_path, NIDtxt))
NIDsnap <- "NAWQA_NID_snap.rds"
sbtools::item_file_download("5dbc53d4e4b06957974eddae", names = NIDsnap,
destinations = file.path(NID_points_path, NIDsnap))
}
out_list <- c(out_list, list(NID_points_path = NID_points_path))
```
```{r VPU outlets}
# VPU Outlets for subsetting - Derived by Team Spatial for subsetting VPUs (hydrologic regions)

Blodgett, David L.
committed
VPU_outlets <- file.path(data_dir, "RegOutlets.json")

Blodgett, David L.
committed
if(!file.exists(VPU_outlets)) {
if(is.null(sbtools::current_session()))
authenticate_sb()
VPU_out <- "RegOutlets.json"
sbtools::item_file_download("5dbc53d4e4b06957974eddae", names = VPU_out,
destinations = file.path(VPU_outlets))
out_list <- c(out_list, list(VPU_outlets = VPU_outlets))
```

Blodgett, David L.
committed
```{r NHDPlusV2}
# NHDPlus Seamless National Data - pulled from NHDPlus national data server; post-processed to RDS files by NHDPlusTools
# GageLoc - Gages snapped to NHDPlusv2 flowlines (QAQC not verified)
# NHDPlus HUC12 crosswalk
#------------------------
# Moore, R.B., Johnston, C.M., and Hayes, L., 2019, Crosswalk Table Between NHDPlus V2.1 and
# its Accompanying WBD Snapshot of 12-Digit Hydrologic Units: U.S. Geological Survey data release,
# https://doi.org/10.5066/P9CFXHGT.
nhdplus_dir <- file.path(data_dir, "NHDPlusNationalData")
nhdplus_gdb <- file.path(data_dir, "NHDPlusNationalData/NHDPlusV21_National_Seamless_Flattened_Lower48.gdb")

Blodgett, David L.
committed
islands_dir <- file.path(data_dir, "islands")
islands_gdb <- file.path(islands_dir, "NHDPlusNationalData/NHDPlusV21_National_Seamless_Flattened_HI_PR_VI_PI.gdb/")
if(!file.exists(nhdplus_dir)) {
message("downloading NHDPlus...")
dir.create(nhdplus_dir, recursive = TRUE, showWarnings = FALSE)
if(is.null(sbtools::current_session()))
authenticate_sb()
gLz <- "NHDPlusV21_NationalData_GageLoc_05.7z"
sbtools::item_file_download("5dbc53d4e4b06957974eddae", names = gLz,
destinations = file.path(nhdplus_dir, gLz))
system(paste0(sevenz, " e -o", nhdplus_dir, " ", file.path(nhdplus_dir, gLz)))
xWalk <- "CrosswalkTable_NHDplus_HU12_CSV.7z"
sbtools::item_file_download("5c86a747e4b09388244b3da1", names = xWalk,
destinations = file.path(nhdplus_dir, xWalk))
system(paste0(sevenz, nhdplus_dir, " ", file.path(nhdplus_dir, xWalk)))
x <- tryCatch(
download_nhdplusv2(data_dir),
# Quiet the download, overwrite existing files
error = function(e)
{system(paste0(sevenz, " x ", file.path(data_dir, "NHDPlusV21_NationalData_Seamless_Geodatabase_Lower48_07.7z")
nhdplus_path(nhdplus_gdb)
suppressWarnings(staged_nhdplus <- stage_national_data())
x <- tryCatch(
download_nhdplusv2(islands_dir, "https://s3.amazonaws.com/edap-nhdplus/NHDPlusV21/Data/NationalData/NHDPlusV21_NationalData_Seamless_Geodatabase_HI_PR_VI_PI_03.7z"),
# Quiet the download, overwrite existing files
error = function(e)
{system(paste0(sevenz, " x ", file.path(islands_dir, "NHDPlusV21_NationalData_Seamless_Geodatabase_HI_PR_VI_PI_03.7z")
, " -o", data_dir), ignore.stdout = T)}
)
nhdplus_path(islands_gdb)
suppressWarnings(staged_nhdplus_islands <- stage_national_data())
HUC12 <- read_sf(data_paths$nhdplus_gdb, layer = "HUC12") %>% st_make_valid() %>% st_transform(., crs = 5070)
saveRDS(HUC12, file = file.path(nhdplus_dir, "HUC12.rds"))
gagelocgf <- "GageLocGFinfo.dbf"
sbtools::item_file_download("5dcd5f96e4b069579760aedb", names = gagelocgf,
destinations = file.path(data_dir, gagelocgf))
out_list <- c(out_list, list(nhdplus_dir = nhdplus_dir, nhdplus_gdb = nhdplus_gdb, islands_dir = islands_dir, islands_gdb = islands_gdb))
```{r NHDPlusV2 Waterbodies}
# Waterbodies - derived after downloading and post-processing NHDPlus Seamless National Geodatabase
waterbodies_path <- file.path(nhdplus_dir, "nhdplus_waterbodies.rds")
if(!file.exists(waterbodies_path)) {
message("formatting NHDPlus watebodies...")
nhdplus_path(nhdplus_gdb)
# Read the feature class
fc <- read_sf(data_paths$nhdplus_gdb, "NHDWaterbody") %>% st_as_sf() %>% st_transform(., crs = 5070)
# Convert to simple feature and save out
saveRDS(wbSF, waterbodies_path)
}
out_list <- c(out_list, list(waterbodies_path = waterbodies_path))
```
```{r NHDPlusV2 FDR_FAC}
# NHDPlus FDR/FAC grids available by raster processing unit
fdr_fac_dir <- file.path(data_dir, "fdrfac")
dir.create(fdr_fac_dir, recursive = TRUE, showWarnings = FALSE)
# Will need to make this work for all the FdrFac data -- maybe just shell out to wget.
dirs <- unique(dirname(list.files(fdr_fac_dir, recursive = TRUE, full.names = TRUE)))
fdr <- dirs[grepl(".*/fdr$", dirs)]
fac <- dirs[grepl(".*/fac$", dirs)]
out <- list(fdr = list(), fac = list())
rpu <- substr(fdr, (nchar(fdr) - 6), (nchar(fdr) - 4))
out$fdr <- as.list(setNames(fdr, paste0("rpu_", rpu)))
out$fac <- as.list(setNames(fac, paste0("rpu_", rpu)))
```{r WBD}
# Current version of National WBD
if(!dir.exists(wbd_dir)) {
dir.create(wbd_dir, recursive = TRUE)
wbd <- download_wbd(wbd_dir, "https://prd-tnm.s3.amazonaws.com/StagedProducts/Hydrography/WBD/National/GDB/WBD_National_GDB.zip")
wbd_rds <- file.path(wbd_dir, "WBD.rds")
out <- list(latest_wbd = file.path(wbd_dir, list.files(wbd_dir, pattern = "*.gdb")))
if(!file.exists(wbd_rds)) {
wbdfc <- sf::read_sf(file.path(wbd_dir,
out$latest_wbd),
"WBDHU12") %>%
st_as_sf() %>%
st_transform(crs = 5070)
saveRDS(wbdfc, wbd_rds)
}
out_rds <- list(latest_wbd_rds = wbd_rds)
out_list <- c(out_list, out, out_rds)
```{r MERIT HydroDEM}
# MERIT HydroDEM - used for AK Geospatial Fabric, and potentially Mexico portion of R13
#-----------------------------------------------------------------------------
# Yamazaki, D., Ikeshima, D., Sosa, J., Bates, P. D., Allen, G. H., & Pavelsky, T. M. ( 2019).
# MERIT Hydro: a high‐resolution global hydrography map based on latest topography dataset.
# Water Resources Research, 55, 5053– 5073. https://doi.org/10.1029/2019WR024873
merit_dir <- file.path(data_dir, "merged_AK_MERIT_Hydro")
if(!dir.exists(merit_dir)) {
dir.create(merit_dir, recursive = TRUE, showWarnings = FALSE)
if(is.null(sbtools::current_session()))
authenticate_sb()
m <- "merged_AK_MERIT_Hydro.zip"
sbtools::item_file_download("5dbc53d4e4b06957974eddae", names = m,
destinations = file.path(data_dir, m))
unzip(file.path(data_dir, m), exdir = data_dir)
rm(m)
} else {
out <- list(merit_catchments = file.path(merit_dir,
"cat_pfaf_78_81_82_MERIT_Hydro_v07_Basins_v01.shp"),
merit_rivers = file.path(merit_dir,
"riv_pfaf_78_81_82_MERIT_Hydro_v07_Basins_v01.shp"))
out_list <- c(out_list, out)
}
```
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
```{r e2nhd}
zip_file <- "e2nhdplusv2_us_csv.zip"
out_zip <- file.path(data_dir, zip_file)
out <- list(e2nhd_network = file.path(data_dir, "e2nhdplusv2_us.csv"))
if(!file.exists(out$e2nhd_network)) {
sbtools::item_file_download("5d16509ee4b0941bde5d8ffe",
names = zip_file,
destinations = out_zip)
zip::unzip(out, exdir = data_dir)
}
out_list <- c(out_list, out)
```
```{r nwm_topology}
out <- list(nwm_network = file.path(data_dir, (sb_f <- "NWM_v2_1_CONUS_Topology.csv")))
if(!file.exists(out$nwm_network)) {
sbtools::item_file_download("5dbc53d4e4b06957974eddae",
names = sb_f,
destinations = out)
}
out_list <- c(out_list, out)
```
```{r nhdplus_attributes}
out <- list(new_nhdp_atts = file.path("cache", (sb_f <- "enhd_nhdplusatts.csv")))
if(!file.exists(out$new_nhdp_atts)) {
sbtools::item_file_download("5dcd5f96e4b069579760aedb",
names = sb_f,
destinations = out)
}
out_list <- c(out_list, out)
```
```{r}
write_json(out_list, path = out_file, pretty = TRUE, auto_unbox = TRUE)

Blodgett, David L.
committed
rm(out_list)