Skip to content
Snippets Groups Projects
Commit b7068300 authored by Blodgett, David L.'s avatar Blodgett, David L.
Browse files

general purpose file download for #154

parent 9ecd586c
No related branches found
No related tags found
1 merge request!183Refactor - progress
......@@ -110,11 +110,11 @@ dir.create(TE_points_path, recursive = TRUE, showWarnings = FALSE)
get_sb_file("5dbc53d4e4b06957974eddae",
"2015_TE_Model_Estimates_lat.long_COMIDs.7z",
TE_points_path, unzip = TRUE)
TE_points_path)
get_sb_file("63adc826d34e92aad3ca5af4",
"galanter_and_others_2023.zip",
TE_points_path, unzip = TRUE)
TE_points_path)
out_list <- c(out_list, list(TE_points_path = TE_points_path))
......@@ -132,22 +132,12 @@ water use withdrawals.
# Attributes and Modified Routing for NHDPlus Version 2.1 Flowlines: U.S.
# Geological Survey data release, https://doi.org/10.5066/P986KZEM.
USGS_IT_path <- file.path(data_dir, "USGS_IT")
if(!file.exists(USGS_IT_path)) {
dir.create(USGS_IT_path, recursive = TRUE, showWarnings = FALSE)
ITzip <- "supplemental_files.zip"
check_auth()
sbtools::item_file_download("5d16509ee4b0941bde5d8ffe", names = ITzip,
destinations = file.path(USGS_IT_path, ITzip))
unzip(file.path(USGS_IT_path, ITzip), exdir = USGS_IT_path)
rm(ITzip)
}
out_list <- c(out_list, list(USGS_IT_path = USGS_IT_path))
out_list <- c(
out_list,
list(USGS_IT_path =
get_sb_file("5d16509ee4b0941bde5d8ffe",
"supplemental_files.zip",
file.path(data_dir, "USGS_IT"))))
```
Two datasets relate hydro location information from the National Inventory of
......@@ -169,29 +159,16 @@ natural flow (Wieczorek and others, 2021).
NID_points_path <- file.path(data_dir, "NID_points")
if(!file.exists(NID_points_path)) {
dir.create(NID_points_path, recursive = TRUE, showWarnings = FALSE)
NIDtxt <- "NID_attributes_20170612.txt"
check_auth()
sbtools::item_file_download("5dbc53d4e4b06957974eddae", names = NIDtxt,
destinations = file.path(NID_points_path,
NIDtxt))
NIDzip <- "Final_NID_2018.zip"
check_auth()
sbtools::item_file_download("5fb7e483d34eb413d5e14873", names = NIDzip,
destinations = file.path(NID_points_path,
NIDzip))
get_sb_file("5dbc53d4e4b06957974eddae",
"NID_attributes_20170612.txt",
NID_points_path)
unzip(file.path(NID_points_path, NIDzip), files = NULL,
exdir = NID_points_path)
rm(NIDtxt, NIDzip)
}
get_sb_file("5fb7e483d34eb413d5e14873",
"Final_NID_2018.zip",
NID_points_path)
out_list <- c(out_list, list(NID_points_path = NID_points_path))
if(mapview)(mapview(read_sf(out_list$NID_points_path)))
```
......@@ -213,83 +190,43 @@ efforts. These include:
# Units: U.S. Geological Survey data release,
# https://doi.org/10.5066/P9CFXHGT.
epa_data_root <- "https://dmap-data-commons-ow.s3.amazonaws.com/NHDPlusV21/Data/"
nhdplus_dir <- file.path(data_dir, "NHDPlusNationalData")
nhdplus_gdb <- file.path(data_dir,
"NHDPlusNationalData/NHDPlusV21_National_Seamless_Flattened_Lower48.gdb")
nhdplus_gdb <- file.path(nhdplus_dir, "NHDPlusV21_National_Seamless_Flattened_Lower48.gdb")
islands_dir <- file.path(data_dir, "islands")
islands_gdb <- file.path(islands_dir,
"NHDPlusNationalData/NHDPlusV21_National_Seamless_Flattened_HI_PR_VI_PI.gdb/")
islands_gdb <- file.path(islands_dir, "NHDPlusNationalData/NHDPlusV21_National_Seamless_Flattened_HI_PR_VI_PI.gdb/")
rpu <- file.path(nhdplus_dir, "NHDPlusGlobalData", "BoundaryUnit.shp")
if(!file.exists(nhdplus_dir)) {
message("downloading NHDPlus...")
dir.create(nhdplus_dir, recursive = TRUE, showWarnings = FALSE)
dir.create(islands_dir, recursive = TRUE, showWarnings = FALSE)
get_sb_file("5dbc53d4e4b06957974eddae", "NHDPlusV21_NationalData_GageLoc_05.7z", nhdplus_dir)
gLz <- "NHDPlusV21_NationalData_GageLoc_05.7z"
check_auth()
sbtools::item_file_download("5dbc53d4e4b06957974eddae", names = gLz,
destinations = file.path(nhdplus_dir, gLz))
system(paste0(sevenz, " e -o", nhdplus_dir, " ",
file.path(nhdplus_dir, gLz)))
xWalk <- "CrosswalkTable_NHDplus_HU12_CSV.7z"
sbtools::item_file_download("5c86a747e4b09388244b3da1", names = xWalk,
destinations = file.path(nhdplus_dir, xWalk))
check_auth()
system(paste0(sevenz, " e -o", nhdplus_dir, " ",
file.path(nhdplus_dir, xWalk)))
x <- tryCatch(
download_nhdplusv2(data_dir, url = "https://dmap-data-commons-ow.s3.amazonaws.com/NHDPlusV21/Data/NationalData/NHDPlusV21_NationalData_Seamless_Geodatabase_Lower48_07.7z"),
# Quiet the download, overwrite existing files
error = function(e)
{system(paste0(sevenz, " x ", file.path(data_dir,
"NHDPlusV21_NationalData_Seamless_Geodatabase_Lower48_07.7z")
, " -o", data_dir), ignore.stdout = T)}
)
x <- tryCatch(
download_nhdplusv2(islands_dir,
url = paste0("https://dmap-data-commons-ow.s3.amazonaws.com/NHDPlusV21/",
"Data/NationalData/NHDPlusV21_NationalData_Seamless",
"_Geodatabase_HI_PR_VI_PI_03.7z")),
# Quiet the download, overwrite existing files
error = function(e)
{system(paste0(sevenz, " x ", file.path(islands_dir, "NHDPlusV21_NationalData_Seamless_Geodatabase_HI_PR_VI_PI_03.7z")
, " -o", islands_dir), ignore.stdout = T)}
)
rm(gLz, xWalk)
HUC12 <- read_sf(nhdplus_gdb, layer = "HUC12") %>%
st_make_valid() %>%
st_transform(., crs = proj_crs)
get_sb_file("5c86a747e4b09388244b3da1", "CrosswalkTable_NHDplus_HU12_CSV.7z", nhdplus_dir)
saveRDS(HUC12, file = file.path(nhdplus_dir, "HUC12.rds"))
gagelocgf <- "GageLocGFinfo.dbf"
check_auth()
sbtools::item_file_download("5dcd5f96e4b069579760aedb", names = gagelocgf,
destinations = file.path(data_dir, gagelocgf))
u <- "http://www.horizon-systems.com/NHDPlusData/NHDPlusV21/Data/GlobalData/NHDPlusV21_NHDPlusGlobalData_03.7z"
out_f <- file.path(nhdplus_dir, basename(u))
download.file(u, destfile = out_f, mode = "wb")
system(paste0(sevenz, " x ", out_f, " -o", nhdplus_dir),
ignore.stdout = TRUE, intern = TRUE)
# will download the 7z and unzip into the folder structure in nhdplus_gdb path
download_file(paste0(epa_data_root, "NationalData/NHDPlusV21_NationalData_Seamless_Geodatabase_Lower48_07.7z"),
out_path = data_dir, check_path = nhdplus_gdb)
download_file(paste0(epa_data_root, "NationalData/NHDPlusV21_NationalData_Seamless_Geodatabase_HI_PR_VI_PI_03.7z"),
out_path = islands_dir, check_path = islands_gdb)
# cache the huc12 layer in rds format
hu12_rds <- file.path(nhdplus_dir, "HUC12.rds")
if(!file.exists(hu12_rds)) {
read_sf(nhdplus_gdb, layer = "HUC12") |>
st_make_valid() |>
st_transform(crs = proj_crs) |>
# TODO: convert this to gpkg
saveRDS(file = hu12_rds)
}
get_sb_file("5dcd5f96e4b069579760aedb", "GageLocGFinfo.dbf", data_dir)
download_file(paste0(epa_data_root, "GlobalData/NHDPlusV21_NHDPlusGlobalData_03.7z"),
out_path = nhdplus_dir, check_path = rpu)
out_list <- c(out_list, list(nhdplus_dir = nhdplus_dir,
nhdplus_gdb = nhdplus_gdb,
islands_dir = islands_dir,
......@@ -314,64 +251,22 @@ ref_cat <- file.path(ref_fab_path, "reference_catchments.gpkg")
ref_fl <- file.path(ref_fab_path, "reference_flowline.gpkg")
nwm_fl <- file.path(ref_fab_path, "nwm_network.gpkg")
if(!dir.exists(ref_fab_path)) {
dir.create(ref_fab_path, recursive = TRUE, showWarnings = FALSE)
}
if(!file.exists(ref_cat)){
possible_vpu <- c("01", "08", "10L", "15", "02", "04", "05", "06", "07", "09",
"03S", "03W", "03N", "10U", "11", "12", "13", "14", "16",
"17", "18")
for (vpu in possible_vpu){
vpu_gpkg <- paste0(vpu, "_reference_features.gpkg")
check_auth()
sbtools::item_file_download("6317a72cd34e36012efa4e8a", names = vpu_gpkg,
destinations = file.path(ref_fab_path, vpu_gpkg))
}
check_auth()
sbtools::item_file_download("61295190d34e40dd9c06bcd7",
names = "reference_catchments.gpkg",
destinations = ref_cat)
check_auth()
sbtools::item_file_download("61295190d34e40dd9c06bcd7",
names = "reference_flowline.gpkg",
destinations = ref_fl)
check_auth()
sbtools::item_file_download("61295190d34e40dd9c06bcd7",
names = "nwm_network.gpkg",
destinations = nwm_fl)
for (vpu in c("01", "08", "10L", "15", "02", "04", "05", "06", "07", "09",
"03S", "03W", "03N", "10U", "11", "12", "13", "14", "16",
"17", "18")) {
get_sb_file("6317a72cd34e36012efa4e8a",
paste0(vpu, "_reference_features.gpkg"),
ref_fab_path)
}
get_sb_file("61295190d34e40dd9c06bcd7",
c("reference_catchments.gpkg", "reference_flowline.gpkg", "nwm_network.gpkg"),
out_destination = ref_fab_path)
out_list <- c(out_list, list(ref_fab_path = ref_fab_path, ref_cat = ref_cat,
ref_fl = ref_fl, nwm_fl = nwm_fl))
```
NHDPlus Value-added attributes used to characterize the network and get
information such as velocity and mean streamflow.
```{r NHDPlusV2 VAA}
# Waterbodies - derived after downloading and post-processing
# NHDPlus Seamless National Geodatabase
# Compacted here into a GDB
VAA_fst_path <- file.path(nhdplus_dir, "nhdplusVAA.fst")
VAA_rds_path <- file.path(nhdplus_dir, "nhd_vaa.rds")
#TODO pull attributes needed for threshold POIs
if(!file.exists(VAA_rds_path)) {
message("downloading NHDPlusVAA...")
download_vaa(VAA_fst_path)
# Extract other attributes
nhdpv2_vaa <- read_sf(nhdplus_gdb, "NHDFlowline_Network") %>%
st_drop_geometry()
saveRDS(nhdpv2_vaa, file.path(nhdplus_dir, "nhd_vaa.rds"))
}
out_list <- c(out_list, list(VAA_fst = VAA_fst_path, VAA_rds = VAA_rds_path))
out_list <- c(out_list, list(ref_fab_path = ref_fab_path,
ref_cat = ref_cat, ref_fl = ref_fl, nwm_fl = nwm_fl))
```
NHDPlus Waterbody and Area Polygons converted to an RDS file for easier
......@@ -385,21 +280,17 @@ waterbodies_path <- file.path(nhdplus_dir, "nhdplus_waterbodies.rds")
if(!file.exists(waterbodies_path)) {
message("formatting NHDPlus watebodies...")
# Read the feature class
wb_sf <- read_sf(nhdplus_gdb, "NHDWaterbody") %>%
st_transform(proj_crs) %>%
mutate(layer = "NHDWaterbody")
wbarea_sf <- read_sf(nhdplus_gdb, "NHDArea") %>%
st_transform(proj_crs) %>%
mutate(layer = "NHDArea")
wb_fc <- data.table::rbindlist(list(wb_sf, wbarea_sf), fill = TRUE) %>%
st_as_sf()
data.table::rbindlist(list(
read_sf(out_list$nhdplus_gdb, "NHDWaterbody") |>
st_transform(proj_crs) |>
mutate(layer = "NHDWaterbody"),
read_sf(out_list$nhdplus_gdb, "NHDArea") |>
st_transform(proj_crs) |>
mutate(layer = "NHDArea")), fill = TRUE) |>
st_as_sf() |>
saveRDS(waterbodies_path)
# Convert to simple feature and save out
saveRDS(wb_fc, waterbodies_path)
}
out_list <- c(out_list, list(waterbodies_path = waterbodies_path))
......
......@@ -12,6 +12,8 @@ get_sb_file <- function(item, item_files, out_destination, unzip = TRUE) {
item_files <- sbtools::item_list_files(item)$fname
}
if(!dir.exists(out_destination)) dir.create(out_destination, showWarnings = FALSE, recursive = TRUE)
out_files <- file.path(out_destination, item_files)
missing <- out_files[!file.exists(out_files)]
......@@ -24,14 +26,40 @@ get_sb_file <- function(item, item_files, out_destination, unzip = TRUE) {
if(unzip) {
un7zip_fs <- missing[grepl("7z$", missing)]
un7zip_fs <- missing[grepl("7z$|zip$", missing)]
for(f in un7zip_fs) {
system(paste0(sevenz, " e -o", out_destination, " ", f))
}
}
out_destination
invisible(out_destination)
}
#' download a file
#' @param url character url where the file can be retrieved
#' @param out_path character path the directory to save the downloaded file
#' @param check_path character path to check that should exist when done
#' @param unzip logical whether to try to unzip the file
download_file <- function(url, out_path, check_path = NULL, unzip = TRUE) {
if(file.exists(check_path)) return(invisible(check_path))
dir.create(out_path, showWarnings = FALSE, recursive = TRUE)
out <- file.path(out_path, basename(url))
if(!file.exists(out)) {
httr::RETRY("GET", url, httr::write_disk(out))
if(unzip & grepl("7z$|zip$", out))
system(paste0(sevenz, " x ", out, " -o", out_path))
}
if(file.exists(check_path)) return(invisible(check_path))
stop("did not create ", check_file)
}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment