Newer
Older
---
title: "GFv2 Get Data"
output: html_document
---
This notebook pulls data from a number of sources and populates the GFv2 data directory. Any new data requirements should be added as code chunks here.
Each code chunk should create a path to the file you want to use in a process step, check if that path exists, and put the data there if it does not. All paths are stored in a list that is saved to the `cache` directory. If changes are made to the output of this notebook, they should be checked in.
**If resources from ScienceBase need to be downloaded Rmarkdown document should be run from RStudio so username and password authentication will work**
```{r}

Blodgett, David L.
committed
library(sbtools)
library(nhdplusTools)
library(tidyr)
library(dplyr)
if(!dir.exists("data")) {dir.create("data")}
if(!dir.exists("bin")) {dir.create("bin")}

Blodgett, David L.
committed
data_dir <- "data"
out_list <- list("data_dir" = data_dir)
out_file <- file.path("cache", "data_paths.json")
sevenz <- "7z"
check_7z <- try(nhdplusTools:::check7z(), silent = TRUE)
if(is(check_7z, "try-error")) {
message("trying to download 7z -- it's not on your path")
# Download command-line Z-zip
if(!file.exists("bin/7za.exe")){
download.file("https://www.7-zip.org/a/7za920.zip", destfile = "bin/7za920.zip")
unzip("bin/7za920.zip", exdir = "bin")
}
sevenz <- "bin/7za.exe"
}
```{r HUC12 outlets}
# HUC12 outlets - derived by Dave Blodgetts code (in progress); currently hosted at GFv2 POI Source data page
hu12_points_path <- file.path(data_dir, "hu_outlets.gpkg")

Blodgett, David L.
committed
if(!file.exists(hu12_points_path)) {
if(is.null(sbtools::current_session()))
sb <- authenticate_sb()
sbtools::item_file_download("5dbc53d4e4b06957974eddae", names = "hu_outlets.gpkg",

Blodgett, David L.
committed
destinations = hu12_points_path, session = sb)
}
out_list <- c(out_list, list(hu12_points_path = hu12_points_path))
```
```{r GagesIII}
# GagesIII - data release in progress by Chase and others, currently hosted at GFv2 POI Source data page

Blodgett, David L.
committed
gagesiii_points_path <- file.path(data_dir, "GAGESIII_gages")
if(!file.exists(gagesiii_points_path)) {
dir.create(gagesiii_points_path, recursive = TRUE, showWarnings = FALSE)
if(is.null(sbtools::current_session()))
authenticate_sb()
g3z <- "GAGESIII_gages.zip"
sbtools::item_file_download("5dbc53d4e4b06957974eddae", names = g3z,
destinations = file.path(gagesiii_points_path, g3z))
unzip(file.path(gagesiii_points_path, g3z), exdir = gagesiii_points_path)
rm(g3z)
g3gf <- "gages3GFinfo.dbf"
sbtools::item_file_download("5dcd5f96e4b069579760aedb", names = g3gf,
destinations = file.path(data_dir, g3gf))

Blodgett, David L.
committed
}
out_list <- c(out_list, list(gagesiii_points_path = gagesiii_points_path))
```
```{r Thermoelectric Facilities}
# Thermoelectric plants - data compiled by Melissa Harris and Amy Galanter (multiple sources),
# Currently hosted at GFv2 POI Source data page
#-----------------------------------------------------
# Harris, Melissa A. and Diehl, Timothy H., 2017. A Comparison of Three Federal Datasets for Thermoelectric Water
# Withdrawals in the United States for 2010. Journal of the American Water Resources Association
# (JAWRA) 53( 5): 1062– 1080. https://doi.org/10.1111/1752-1688.12551
TE_points_path <- file.path(data_dir, "TE_points")
if(!file.exists(TE_points_path)) {
dir.create(TE_points_path, recursive = TRUE, showWarnings = FALSE)
if(is.null(sbtools::current_session()))
authenticate_sb()
TEz <- "2015_TE_Model_Estimates_lat.long_COMIDs.7z"
sbtools::item_file_download("5dbc53d4e4b06957974eddae", names = TEz,
destinations = file.path(TE_points_path, TEz))
system(paste0(sevenz, " e -o", TE_points_path, " ", file.path(TE_points_path, TEz)))
rm(TEz)
}
out_list <- c(out_list, list(TE_points_path = TE_points_path))
```
```{r Interbasin transfers and diversions - USGS}
# USGS IT and diversion points on NHDPlus v2 network -
#------------------------------------------------------
# Schwarz, G.E., 2019, E2NHDPlusV2_us: Database of Ancillary Hydrologic Attributes and Modified Routing for
# NHDPlus Version 2.1 Flowlines: U.S. Geological Survey data release, https://doi.org/10.5066/P986KZEM.
USGS_IT_path <- file.path(data_dir, "USGS_IT")
if(!file.exists(USGS_IT_path)) {
dir.create(USGS_IT_path, recursive = TRUE, showWarnings = FALSE)
if(is.null(sbtools::current_session()))
authenticate_sb()
ITzip <- "supplemental_files.zip"
sbtools::item_file_download("5d16509ee4b0941bde5d8ffe", names = ITzip,
destinations = file.path(USGS_IT_path, ITzip))
unzip(file.path(USGS_IT_path, ITzip), exdir = USGS_IT_path)
}
out_list <- c(out_list, list(USGS_IT_path = USGS_IT_path))
```
```{r National Inventory of Dams}
# National Inventory of Dams data coupled with NHDPlus COMIDs
#-----------------------------------------------------------------------------
# Wieczorek, M.E., Jackson, S.E., and Schwarz, G.E., 2018, Select Attributes for NHDPlus Version 2.1 Reach Catchments
# and Modified Network Routed Upstream Watersheds for the Conterminous United States (ver. 2.0, November 2019):
# U.S. Geological Survey data release, https://doi.org/10.5066/F7765D7V.
#
# https://nid.sec.usace.army.mil/ords/f?p=105%3A1%3A%3A%3A%3A%3A%3A
NID_points_path <- file.path(data_dir, "NID_points")
if(!file.exists(NID_points_path)) {
dir.create(NID_points_path, recursive = TRUE, showWarnings = FALSE)
if(is.null(sbtools::current_session()))
authenticate_sb()
NIDtxt <- "NID_attributes_20170612.txt"
sbtools::item_file_download("5dbc53d4e4b06957974eddae", names = NIDtxt,
destinations = file.path(NID_points_path, NIDtxt))
NIDsnap <- "NAWQA_NID_snap.rds"
sbtools::item_file_download("5dbc53d4e4b06957974eddae", names = NIDsnap,
destinations = file.path(NID_points_path, NIDsnap))
}
out_list <- c(out_list, list(NID_points_path = NID_points_path))
```
```{r NHDPlusV2}
# NHDPlus Seamless National Data - pulled from NHDPlus national data server; post-processed to RDS files by NHDPlusTools
# GageLoc - Gages snapped to NHDPlusv2 flowlines (QAQC not verified)
# NHDPlus HUC12 crosswalk
#------------------------
# Moore, R.B., Johnston, C.M., and Hayes, L., 2019, Crosswalk Table Between NHDPlus V2.1 and
# its Accompanying WBD Snapshot of 12-Digit Hydrologic Units: U.S. Geological Survey data release,
# https://doi.org/10.5066/P9CFXHGT.
nhdplus_dir <- file.path(data_dir, "NHDPlusNationalData")
nhdplus_gdb <- file.path(data_dir, "NHDPlusNationalData/NHDPlusV21_National_Seamless_Flattened_Lower48.gdb")

Blodgett, David L.
committed
islands_dir <- file.path(data_dir, "islands")
islands_gdb <- file.path(islands_dir, "NHDPlusNationalData/NHDPlusV21_National_Seamless_Flattened_HI_PR_VI_PI.gdb/")
if(!file.exists(nhdplus_dir)) {
message("downloading NHDPlus...")
dir.create(nhdplus_dir, recursive = TRUE, showWarnings = FALSE)
dir.create(islands_dir, recursive = TRUE, showWarnings = FALSE)
if(is.null(sbtools::current_session()))
authenticate_sb()
gLz <- "NHDPlusV21_NationalData_GageLoc_05.7z"
sbtools::item_file_download("5dbc53d4e4b06957974eddae", names = gLz,
destinations = file.path(nhdplus_dir, gLz))
system(paste0(sevenz, " e -o", nhdplus_dir, " ", file.path(nhdplus_dir, gLz)))
xWalk <- "CrosswalkTable_NHDplus_HU12_CSV.7z"
sbtools::item_file_download("5c86a747e4b09388244b3da1", names = xWalk,
destinations = file.path(nhdplus_dir, xWalk))
system(paste0(sevenz, " e -o", nhdplus_dir, " ", file.path(nhdplus_dir, xWalk)))
x <- tryCatch(
download_nhdplusv2(data_dir),
# Quiet the download, overwrite existing files
error = function(e)
{system(paste0(sevenz, " x ", file.path(data_dir, "NHDPlusV21_NationalData_Seamless_Geodatabase_Lower48_07.7z")
, " -o", data_dir), ignore.stdout = T)}
nhdplus_path(nhdplus_gdb)
suppressWarnings(staged_nhdplus <- stage_national_data())
x <- tryCatch(
download_nhdplusv2(islands_dir, "https://s3.amazonaws.com/edap-nhdplus/NHDPlusV21/Data/NationalData/NHDPlusV21_NationalData_Seamless_Geodatabase_HI_PR_VI_PI_03.7z"),
# Quiet the download, overwrite existing files
error = function(e)
{system(paste0(sevenz, " x ", file.path(islands_dir, "NHDPlusV21_NationalData_Seamless_Geodatabase_HI_PR_VI_PI_03.7z")
)
nhdplus_path(islands_gdb)
suppressWarnings(staged_nhdplus_islands <- stage_national_data())
HUC12 <- read_sf(nhdplus_gdb, layer = "HUC12") %>%
st_make_valid() %>%
st_transform(., crs = 5070)
saveRDS(HUC12, file = file.path(nhdplus_dir, "HUC12.rds"))
gagelocgf <- "GageLocGFinfo.dbf"
sbtools::item_file_download("5dcd5f96e4b069579760aedb", names = gagelocgf,
destinations = file.path(data_dir, gagelocgf))
out_list <- c(out_list, list(nhdplus_dir = nhdplus_dir, nhdplus_gdb = nhdplus_gdb, islands_dir = islands_dir, islands_gdb = islands_gdb))
```{r NHDPlusV2 Waterbodies}
# Waterbodies - derived after downloading and post-processing NHDPlus Seamless National Geodatabase
waterbodies_path <- file.path(nhdplus_dir, "nhdplus_waterbodies.rds")
message("formatting NHDPlus watebodies...")
# Read the feature class
# Convert to simple feature and save out
out_list <- c(out_list, list(waterbodies_path = waterbodies_path))
```{r NHDPlusV2 Full cats}
fullcat_path <- file.path(nhdplus_dir, "nhdcat_full.rds")
islandcat_path <- file.path(islands_dir, "nhdcat_full.rds")
# Create full cat dataset
if(!file.exists(fullcat_path)){
cat_tab <- cat_rpu(staged_nhd$catchment, nhdplus_gdb)
saveRDS(cat_tab, fullcat_path)
island_cats <- file.path(islands_dir, "NHDPlusNationalData/nhdplus_catchment.rds")
island_tab <- cat_rpu(island_cats, islands_gdb)
saveRDS(island_tab, islandcat_path)
}
out_list <- c(out_list, list(fullcats_table = fullcat_path))
out_list <- c(out_list, list(islandcats_table = islandcat_path))
```
```{r NHDPlusV2 FDR_FAC}
# NHDPlus FDR/FAC grids available by raster processing unit
if(!dir.exists(fdr_fac_dir)) {
dir.create(fdr_fac_dir, recursive = TRUE, showWarnings = FALSE)
download_fdr_fac(fdr_fac_dir)
}
dirs <- unique(dirname(list.files(fdr_fac_dir, recursive = TRUE, full.names = TRUE)))
fdr <- dirs[grepl(".*/fdr$", dirs)]
fac <- dirs[grepl(".*/fac$", dirs)]
out <- list(fdr = list(), fac = list())
rpu <- substr(fdr, (nchar(fdr) - 6), (nchar(fdr) - 4))
out$fdr <- as.list(setNames(fdr, paste0("rpu_", rpu)))
out$fac <- as.list(setNames(fac, paste0("rpu_", rpu)))
```{r WBD}
# Current version of National WBD
if(!dir.exists(wbd_dir)) {
dir.create(wbd_dir, recursive = TRUE)
wbd <- download_wbd(wbd_dir, "https://prd-tnm.s3.amazonaws.com/StagedProducts/Hydrography/WBD/National/GDB/WBD_National_GDB.zip")
out_gdb <- file.path(wbd_dir, "WBD_National_GDB.gdb")
out <- list(latest_wbd = file.path(wbd_dir, "WBD.rds"))
"WBDHU12") %>%
st_as_sf() %>%
st_transform(crs = 5070)
```{r MERIT HydroDEM}
# MERIT HydroDEM - used for AK Geospatial Fabric, and potentially Mexico portion of R13
#-----------------------------------------------------------------------------
# Yamazaki, D., Ikeshima, D., Sosa, J., Bates, P. D., Allen, G. H., & Pavelsky, T. M. ( 2019).
# MERIT Hydro: a high‐resolution global hydrography map based on latest topography dataset.
# Water Resources Research, 55, 5053– 5073. https://doi.org/10.1029/2019WR024873
merit_dir <- file.path(data_dir, "merged_AK_MERIT_Hydro")
if(!dir.exists(merit_dir)) {
dir.create(merit_dir, recursive = TRUE, showWarnings = FALSE)
if(is.null(sbtools::current_session()))
authenticate_sb()
m <- "merged_AK_MERIT_Hydro.zip"
sbtools::item_file_download("5dbc53d4e4b06957974eddae", names = m,
unzip(file.path(merit_dir, m), exdir = merit_dir)
} else {
out <- list(merit_catchments = file.path(merit_dir,
"cat_pfaf_78_81_82_MERIT_Hydro_v07_Basins_v01.shp"),
merit_rivers = file.path(merit_dir,
"riv_pfaf_78_81_82_MERIT_Hydro_v07_Basins_v01.shp"))
out_list <- c(out_list, out)
}
```
```{r AK GF Source data}
# Geopackage of Alaska hydrographic datasets
AK_dir <- file.path(data_dir, "AK")
if(!dir.exists(AK_dir)) {
dir.create(AK_dir, recursive = TRUE)
if(is.null(sbtools::current_session()))
authenticate_sb()
sbtools::item_file_download("5dbc53d4e4b06957974eddae", names = AK_GF_source,
destinations = file.path(AK_dir, AK_GF_source))
system(paste0(sevenz, " e -o", AK_dir, " ", file.path(AK_dir, AK_GF_source)))
} else {
out_ak <- list(ak_source = file.path(AK_dir, "ak.gpkg"))
out_list <- c(out_list, out_ak)
# Geopackage of HI source data
HI_GF_source <- "hi.7z"
out_hi <- list(hi_source = file.path(islands_dir, "hi.gpkg"))
if(!file.exists(file.path(islands_dir, "hi.gpkg"))) {
if(is.null(sbtools::current_session()))
authenticate_sb()
sbtools::item_file_download("5dbc53d4e4b06957974eddae", names = HI_GF_source,
destinations = file.path(islands_dir, HI_GF_source))
system(paste0(sevenz, " e -o", islands_dir, " ", file.path(islands_dir, HI_GF_source)))
}
```{r e2nhd}
zip_file <- "e2nhdplusv2_us_csv.zip"
out_csv <- "e2nhdplusv2_us.csv"
out_zip <- file.path(data_dir, out_csv)
out <- list(e2nhd_network = file.path(data_dir, out_csv))
if(!file.exists(out$e2nhd_network)) {
if(is.null(sbtools::current_session()))
authenticate_sb()
sbtools::item_file_download("5d16509ee4b0941bde5d8ffe",
names = zip_file,
zip::unzip(file.path(data_dir, zip_file), exdir = data_dir)
}
out_list <- c(out_list, out)
```
```{r nwm_topology}
nwm_targz_url <- "https://www.nohrsc.noaa.gov/pub/staff/keicher/NWM_live/NWM_parameters/NWM_parameter_files_v2.1.tar.gz"
nwm_parm_url <- "https://www.nohrsc.noaa.gov/pub/staff/keicher/NWM_live/web/data_tools/NWM_v2.1_channel_hydrofabric.tar.gz"
targz <- file.path(data_dir, basename(nwm_targz_url))
out <- list(nwm_network = file.path(data_dir, "NWM_parameters_v2.1", "RouteLink_CONUS.nc"))
if(!file.exists(out$nwm_network)) {
download.file(nwm_targz_url, destfile = targz)
utils::untar(targz, exdir = data_dir)
}
out_list <- c(out_list, out)
parmtgz <- file.path(data_dir, basename(nwm_parm_url))
out <- list(nwm_parm = file.path(data_dir,
"NWM_v2.1_channel_hydrofabric_10262020",
"nwm_v2_1_hydrofabric.gdb"))
if(!file.exists(out$nwm_parm)) {
download.file(nwm_parm_url, destfile = parmtgz)
utils::untar(parmtgz, exdir = data_dir)
}
out_list <- c(out_list, out)
```{r nhdplus_attributes}
out <- list(new_nhdp_atts = file.path("cache", (sb_f <- "enhd_nhdplusatts.csv")))
if(!file.exists(out$new_nhdp_atts)) {
if(is.null(sbtools::current_session()))
authenticate_sb()
sbtools::item_file_download("60c92503d34e86b9389df1c9",
}
out_list <- c(out_list, out)
```
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
```{r Gages_GFv1.1}
GFv11_dir <- file.path(data_dir, "GFv11")
out <- list(GFv11_gages_lyr = file.path(data_dir, "GFv11/GFv11_gages.rds"))
# Download the GFv1.1 geodatabase
if(!dir.exists(GFv11_dir)) {
dir.create(GFv11_dir, recursive = TRUE)
if(is.null(sbtools::current_session()))
sb <- sbtools::authenticate_sb()
sbtools::item_file_download("5e29d1a0e4b0a79317cf7f63", names = "GFv1.1.gdb.zip",
destinations = file.path(GFv11_dir, "GFv1.1.gdb.zip"), session = sb)
unzip(file.path(GFv11_dir, "GFv1.1.gdb.zip"), exdir = GFv11_dir)
# Extract gages
GFv11_gages <- read_sf(file.path(GFv11_dir, "GFv1.1.gdb"), "POIs_v1_1") %>%
filter(Type_Gage != 0)
saveRDS(GFv11_gages, out$GFv11_gages_lyr)
file.remove(file.path(GFv11_dir, "GFv1.1.gdb.zip"))
}
out_list <- c(out_list, out)
```
```{r Gages_II}
# Download the GFv1.1 geodatabase
if(!dir.exists(gagesiii_points_path))
dir.create(gagesiii_points_path, recursive = TRUE)
g2_out <- list(gagesii_lyr = file.path(gagesiii_points_path, "gagesII_9322_point_shapefile.shp"))
gagesII_url <- "https://water.usgs.gov/GIS/dsdl/gagesII_9322_point_shapefile.zip"
zip <- file.path(gagesiii_points_path, basename(gagesII_url))
download.file(gagesII_url, destfile = zip)
unzip(zip, exdir = g2_out$gagesii_lyr)
}
```{r updated_flowlines}
out <- list(new_nhdp_rds = file.path(out_list$nhdplus_dir, (sb_f <- "nhdplus_flowline_update.rds")))
if(!file.exists(out$new_nhdp_rds)) {
if(is.null(sbtools::current_session()))
authenticate_sb()
sbtools::item_file_download("5dcd5f96e4b069579760aedb",
names = sb_f,
destinations = out$new_nhdp_rds)
}
out_list <- c(out_list, out)
```
```{r}
write_json(out_list, path = out_file, pretty = TRUE, auto_unbox = TRUE)

Blodgett, David L.
committed
rm(out_list)