diff --git a/workspace/00_get_data.Rmd b/workspace/00_get_data.Rmd index 9c8b496866baf5e911b85c2dbf3a936dfb9e1247..0e5779524ea69edae35573d10786450417e3a563 100644 --- a/workspace/00_get_data.Rmd +++ b/workspace/00_get_data.Rmd @@ -110,11 +110,11 @@ dir.create(TE_points_path, recursive = TRUE, showWarnings = FALSE) get_sb_file("5dbc53d4e4b06957974eddae", "2015_TE_Model_Estimates_lat.long_COMIDs.7z", - TE_points_path, unzip = TRUE) + TE_points_path) get_sb_file("63adc826d34e92aad3ca5af4", "galanter_and_others_2023.zip", - TE_points_path, unzip = TRUE) + TE_points_path) out_list <- c(out_list, list(TE_points_path = TE_points_path)) @@ -132,22 +132,12 @@ water use withdrawals. # Attributes and Modified Routing for NHDPlus Version 2.1 Flowlines: U.S. # Geological Survey data release, https://doi.org/10.5066/P986KZEM. -USGS_IT_path <- file.path(data_dir, "USGS_IT") - -if(!file.exists(USGS_IT_path)) { - dir.create(USGS_IT_path, recursive = TRUE, showWarnings = FALSE) - - ITzip <- "supplemental_files.zip" - check_auth() - sbtools::item_file_download("5d16509ee4b0941bde5d8ffe", names = ITzip, - destinations = file.path(USGS_IT_path, ITzip)) - - unzip(file.path(USGS_IT_path, ITzip), exdir = USGS_IT_path) - - rm(ITzip) -} - -out_list <- c(out_list, list(USGS_IT_path = USGS_IT_path)) +out_list <- c( + out_list, + list(USGS_IT_path = + get_sb_file("5d16509ee4b0941bde5d8ffe", + "supplemental_files.zip", + file.path(data_dir, "USGS_IT")))) ``` Two datasets relate hydro location information from the National Inventory of @@ -169,29 +159,16 @@ natural flow (Wieczorek and others, 2021). NID_points_path <- file.path(data_dir, "NID_points") -if(!file.exists(NID_points_path)) { - dir.create(NID_points_path, recursive = TRUE, showWarnings = FALSE) - - NIDtxt <- "NID_attributes_20170612.txt" - check_auth() - sbtools::item_file_download("5dbc53d4e4b06957974eddae", names = NIDtxt, - destinations = file.path(NID_points_path, - NIDtxt)) - - NIDzip <- "Final_NID_2018.zip" - check_auth() - sbtools::item_file_download("5fb7e483d34eb413d5e14873", names = NIDzip, - destinations = file.path(NID_points_path, - NIDzip)) +get_sb_file("5dbc53d4e4b06957974eddae", + "NID_attributes_20170612.txt", + NID_points_path) - unzip(file.path(NID_points_path, NIDzip), files = NULL, - exdir = NID_points_path) - - - rm(NIDtxt, NIDzip) -} +get_sb_file("5fb7e483d34eb413d5e14873", + "Final_NID_2018.zip", + NID_points_path) out_list <- c(out_list, list(NID_points_path = NID_points_path)) + if(mapview)(mapview(read_sf(out_list$NID_points_path))) ``` @@ -213,83 +190,43 @@ efforts. These include: # Units: U.S. Geological Survey data release, # https://doi.org/10.5066/P9CFXHGT. +epa_data_root <- "https://dmap-data-commons-ow.s3.amazonaws.com/NHDPlusV21/Data/" + nhdplus_dir <- file.path(data_dir, "NHDPlusNationalData") -nhdplus_gdb <- file.path(data_dir, - "NHDPlusNationalData/NHDPlusV21_National_Seamless_Flattened_Lower48.gdb") +nhdplus_gdb <- file.path(nhdplus_dir, "NHDPlusV21_National_Seamless_Flattened_Lower48.gdb") islands_dir <- file.path(data_dir, "islands") -islands_gdb <- file.path(islands_dir, - "NHDPlusNationalData/NHDPlusV21_National_Seamless_Flattened_HI_PR_VI_PI.gdb/") +islands_gdb <- file.path(islands_dir, "NHDPlusNationalData/NHDPlusV21_National_Seamless_Flattened_HI_PR_VI_PI.gdb/") rpu <- file.path(nhdplus_dir, "NHDPlusGlobalData", "BoundaryUnit.shp") -if(!file.exists(nhdplus_dir)) { - message("downloading NHDPlus...") - - dir.create(nhdplus_dir, recursive = TRUE, showWarnings = FALSE) - dir.create(islands_dir, recursive = TRUE, showWarnings = FALSE) +get_sb_file("5dbc53d4e4b06957974eddae", "NHDPlusV21_NationalData_GageLoc_05.7z", nhdplus_dir) - gLz <- "NHDPlusV21_NationalData_GageLoc_05.7z" - check_auth() - sbtools::item_file_download("5dbc53d4e4b06957974eddae", names = gLz, - destinations = file.path(nhdplus_dir, gLz)) - - system(paste0(sevenz, " e -o", nhdplus_dir, " ", - file.path(nhdplus_dir, gLz))) - - xWalk <- "CrosswalkTable_NHDplus_HU12_CSV.7z" - - sbtools::item_file_download("5c86a747e4b09388244b3da1", names = xWalk, - destinations = file.path(nhdplus_dir, xWalk)) - check_auth() - system(paste0(sevenz, " e -o", nhdplus_dir, " ", - file.path(nhdplus_dir, xWalk))) - - x <- tryCatch( - download_nhdplusv2(data_dir, url = "https://dmap-data-commons-ow.s3.amazonaws.com/NHDPlusV21/Data/NationalData/NHDPlusV21_NationalData_Seamless_Geodatabase_Lower48_07.7z"), - # Quiet the download, overwrite existing files - error = function(e) - {system(paste0(sevenz, " x ", file.path(data_dir, - "NHDPlusV21_NationalData_Seamless_Geodatabase_Lower48_07.7z") - , " -o", data_dir), ignore.stdout = T)} - ) - - x <- tryCatch( - download_nhdplusv2(islands_dir, - url = paste0("https://dmap-data-commons-ow.s3.amazonaws.com/NHDPlusV21/", - "Data/NationalData/NHDPlusV21_NationalData_Seamless", - "_Geodatabase_HI_PR_VI_PI_03.7z")), - - # Quiet the download, overwrite existing files - error = function(e) - {system(paste0(sevenz, " x ", file.path(islands_dir, "NHDPlusV21_NationalData_Seamless_Geodatabase_HI_PR_VI_PI_03.7z") - , " -o", islands_dir), ignore.stdout = T)} - ) - - rm(gLz, xWalk) - - HUC12 <- read_sf(nhdplus_gdb, layer = "HUC12") %>% - st_make_valid() %>% - st_transform(., crs = proj_crs) +get_sb_file("5c86a747e4b09388244b3da1", "CrosswalkTable_NHDplus_HU12_CSV.7z", nhdplus_dir) - saveRDS(HUC12, file = file.path(nhdplus_dir, "HUC12.rds")) - - gagelocgf <- "GageLocGFinfo.dbf" - check_auth() - sbtools::item_file_download("5dcd5f96e4b069579760aedb", names = gagelocgf, - destinations = file.path(data_dir, gagelocgf)) - - u <- "http://www.horizon-systems.com/NHDPlusData/NHDPlusV21/Data/GlobalData/NHDPlusV21_NHDPlusGlobalData_03.7z" - - out_f <- file.path(nhdplus_dir, basename(u)) - - download.file(u, destfile = out_f, mode = "wb") - - system(paste0(sevenz, " x ", out_f, " -o", nhdplus_dir), - ignore.stdout = TRUE, intern = TRUE) - +# will download the 7z and unzip into the folder structure in nhdplus_gdb path +download_file(paste0(epa_data_root, "NationalData/NHDPlusV21_NationalData_Seamless_Geodatabase_Lower48_07.7z"), + out_path = data_dir, check_path = nhdplus_gdb) + +download_file(paste0(epa_data_root, "NationalData/NHDPlusV21_NationalData_Seamless_Geodatabase_HI_PR_VI_PI_03.7z"), + out_path = islands_dir, check_path = islands_gdb) + +# cache the huc12 layer in rds format +hu12_rds <- file.path(nhdplus_dir, "HUC12.rds") + +if(!file.exists(hu12_rds)) { + read_sf(nhdplus_gdb, layer = "HUC12") |> + st_make_valid() |> + st_transform(crs = proj_crs) |> + # TODO: convert this to gpkg + saveRDS(file = hu12_rds) } +get_sb_file("5dcd5f96e4b069579760aedb", "GageLocGFinfo.dbf", data_dir) + +download_file(paste0(epa_data_root, "GlobalData/NHDPlusV21_NHDPlusGlobalData_03.7z"), + out_path = nhdplus_dir, check_path = rpu) + out_list <- c(out_list, list(nhdplus_dir = nhdplus_dir, nhdplus_gdb = nhdplus_gdb, islands_dir = islands_dir, @@ -314,64 +251,22 @@ ref_cat <- file.path(ref_fab_path, "reference_catchments.gpkg") ref_fl <- file.path(ref_fab_path, "reference_flowline.gpkg") nwm_fl <- file.path(ref_fab_path, "nwm_network.gpkg") -if(!dir.exists(ref_fab_path)) { - dir.create(ref_fab_path, recursive = TRUE, showWarnings = FALSE) -} - -if(!file.exists(ref_cat)){ - possible_vpu <- c("01", "08", "10L", "15", "02", "04", "05", "06", "07", "09", - "03S", "03W", "03N", "10U", "11", "12", "13", "14", "16", - "17", "18") - - for (vpu in possible_vpu){ - vpu_gpkg <- paste0(vpu, "_reference_features.gpkg") - check_auth() - sbtools::item_file_download("6317a72cd34e36012efa4e8a", names = vpu_gpkg, - destinations = file.path(ref_fab_path, vpu_gpkg)) - } - check_auth() - sbtools::item_file_download("61295190d34e40dd9c06bcd7", - names = "reference_catchments.gpkg", - destinations = ref_cat) - check_auth() - sbtools::item_file_download("61295190d34e40dd9c06bcd7", - names = "reference_flowline.gpkg", - destinations = ref_fl) - check_auth() - sbtools::item_file_download("61295190d34e40dd9c06bcd7", - names = "nwm_network.gpkg", - destinations = nwm_fl) +for (vpu in c("01", "08", "10L", "15", "02", "04", "05", "06", "07", "09", + "03S", "03W", "03N", "10U", "11", "12", "13", "14", "16", + "17", "18")) { + + get_sb_file("6317a72cd34e36012efa4e8a", + paste0(vpu, "_reference_features.gpkg"), + ref_fab_path) } +get_sb_file("61295190d34e40dd9c06bcd7", + c("reference_catchments.gpkg", "reference_flowline.gpkg", "nwm_network.gpkg"), + out_destination = ref_fab_path) -out_list <- c(out_list, list(ref_fab_path = ref_fab_path, ref_cat = ref_cat, - ref_fl = ref_fl, nwm_fl = nwm_fl)) -``` - -NHDPlus Value-added attributes used to characterize the network and get -information such as velocity and mean streamflow. - -```{r NHDPlusV2 VAA} -# Waterbodies - derived after downloading and post-processing -# NHDPlus Seamless National Geodatabase -# Compacted here into a GDB -VAA_fst_path <- file.path(nhdplus_dir, "nhdplusVAA.fst") -VAA_rds_path <- file.path(nhdplus_dir, "nhd_vaa.rds") -#TODO pull attributes needed for threshold POIs - -if(!file.exists(VAA_rds_path)) { - message("downloading NHDPlusVAA...") - download_vaa(VAA_fst_path) - - # Extract other attributes - nhdpv2_vaa <- read_sf(nhdplus_gdb, "NHDFlowline_Network") %>% - st_drop_geometry() - - saveRDS(nhdpv2_vaa, file.path(nhdplus_dir, "nhd_vaa.rds")) -} - -out_list <- c(out_list, list(VAA_fst = VAA_fst_path, VAA_rds = VAA_rds_path)) +out_list <- c(out_list, list(ref_fab_path = ref_fab_path, + ref_cat = ref_cat, ref_fl = ref_fl, nwm_fl = nwm_fl)) ``` NHDPlus Waterbody and Area Polygons converted to an RDS file for easier @@ -385,21 +280,17 @@ waterbodies_path <- file.path(nhdplus_dir, "nhdplus_waterbodies.rds") if(!file.exists(waterbodies_path)) { message("formatting NHDPlus watebodies...") - - # Read the feature class - wb_sf <- read_sf(nhdplus_gdb, "NHDWaterbody") %>% - st_transform(proj_crs) %>% - mutate(layer = "NHDWaterbody") - - wbarea_sf <- read_sf(nhdplus_gdb, "NHDArea") %>% - st_transform(proj_crs) %>% - mutate(layer = "NHDArea") - wb_fc <- data.table::rbindlist(list(wb_sf, wbarea_sf), fill = TRUE) %>% - st_as_sf() + data.table::rbindlist(list( + read_sf(out_list$nhdplus_gdb, "NHDWaterbody") |> + st_transform(proj_crs) |> + mutate(layer = "NHDWaterbody"), + read_sf(out_list$nhdplus_gdb, "NHDArea") |> + st_transform(proj_crs) |> + mutate(layer = "NHDArea")), fill = TRUE) |> + st_as_sf() |> + saveRDS(waterbodies_path) - # Convert to simple feature and save out - saveRDS(wb_fc, waterbodies_path) } out_list <- c(out_list, list(waterbodies_path = waterbodies_path)) diff --git a/workspace/R/1_get_data.R b/workspace/R/1_get_data.R index 19dd825c09acd180855122093277737325701211..53e00d97a4ab2ddfb75186537546499fd1e28da5 100644 --- a/workspace/R/1_get_data.R +++ b/workspace/R/1_get_data.R @@ -12,6 +12,8 @@ get_sb_file <- function(item, item_files, out_destination, unzip = TRUE) { item_files <- sbtools::item_list_files(item)$fname } + if(!dir.exists(out_destination)) dir.create(out_destination, showWarnings = FALSE, recursive = TRUE) + out_files <- file.path(out_destination, item_files) missing <- out_files[!file.exists(out_files)] @@ -24,14 +26,40 @@ get_sb_file <- function(item, item_files, out_destination, unzip = TRUE) { if(unzip) { - un7zip_fs <- missing[grepl("7z$", missing)] - + un7zip_fs <- missing[grepl("7z$|zip$", missing)] + for(f in un7zip_fs) { system(paste0(sevenz, " e -o", out_destination, " ", f)) } } - out_destination + invisible(out_destination) } + +#' download a file +#' @param url character url where the file can be retrieved +#' @param out_path character path the directory to save the downloaded file +#' @param check_path character path to check that should exist when done +#' @param unzip logical whether to try to unzip the file +download_file <- function(url, out_path, check_path = NULL, unzip = TRUE) { + + if(file.exists(check_path)) return(invisible(check_path)) + + dir.create(out_path, showWarnings = FALSE, recursive = TRUE) + + out <- file.path(out_path, basename(url)) + + if(!file.exists(out)) { + httr::RETRY("GET", url, httr::write_disk(out)) + + if(unzip & grepl("7z$|zip$", out)) + system(paste0(sevenz, " x ", out, " -o", out_path)) + + } + + if(file.exists(check_path)) return(invisible(check_path)) + + stop("did not create ", check_file) +} \ No newline at end of file