diff --git a/workspace/00_get_data.Rmd b/workspace/00_get_data.Rmd index 3e53781db99391b4ee3ecd2654bb942fbba55134..13ead041b571b399da2ef378cd346cfd38a8295f 100644 --- a/workspace/00_get_data.Rmd +++ b/workspace/00_get_data.Rmd @@ -46,13 +46,17 @@ if(is(check_7z, "try-error")) { if(!sbtools::is_logged_in()) initialize_sciencebase_session(username = Sys.getenv("sb_user")) +region <- NULL +reg <- (length(domain) == 1 && grepl("^[0-1]", domain)) +if(reg) region <- substr(domain, 1, 2) + # Enable mapview rendering if desired mapview <- FALSE ``` HUC12 (Hydrologic Unit Code, Level 12) outlets derived from the Watershed Boundary Dataset indexed to the reference fabricform the baseline and extent of national modeling fabrics. -```{r HUC12 outlets} +```{r HUC12 outlets, eval="huc12" %in% POI_types} # Blodgett, D.L., 2022, Mainstem Rivers of the Conterminous United States: # U.S. Geological Survey data release, https://doi.org/10.5066/P9BTKP3T. @@ -78,7 +82,7 @@ datasets developed for the Streamgage Watershed InforMation (SWIM) project. This dataset, which is referred to as “SWIM streamgage locations,†was created in support of the second dataset of basin characteristics and disturbance indexes. -```{r SWIM} +```{r SWIM, eval="gages" %in% POI_types} # Hayes, L., Chase, K.J., Wieczorek, M.E., and Jackson, S.E., 2021, # USGS streamgages in the conterminous United States indexed to NHDPlus v2.1 # flowlines to support Streamgage Watershed InforMation (SWIM), 2021: U.S. @@ -104,7 +108,7 @@ the amount of water that is withdrawn and consumed by thermoelectric power plants (Diehl and others, 2013; Diehl and Harris, 2014; Harris and Diehl, 2019 Galanter and othes, 2023). -```{r Thermoelectric Facilities} +```{r Thermoelectric Facilities, eval="thermo_electric" %in% POI_types} # Harris, Melissa A. and Diehl, Timothy H., 2017. A Comparison of Three # Federal Datasets for Thermoelectric Water Withdrawals in the United States # for 2010. Journal of the American Water Resources Association (JAWRA) @@ -141,7 +145,7 @@ hydrologic analyses. The resulting enhanced network is named E2NHDPlusV2_us. This includes the network locations associated with some diversions and water use withdrawals. -```{r e2nhd supplemental data - USGS} +```{r e2nhd supplemental data - USGS, eval="addition_removal" %in% POI_types} # Schwarz, G.E., 2019, E2NHDPlusV2_us: Database of Ancillary Hydrologic # Attributes and Modified Routing for NHDPlus Version 2.1 Flowlines: U.S. # Geological Survey data release, https://doi.org/10.5066/P986KZEM. @@ -159,7 +163,7 @@ Dams to the NHDPlus network. One effort is related to the SPARROW work (Wieczorek and others, 2018), the other related to work quantifying impacts on natural flow (Wieczorek and others, 2021). -```{r National Inventory of Dams} +```{r National Inventory of Dams, eval="dams" %in% POI_types} # Wieczorek, M.E., Jackson, S.E., and Schwarz, G.E., 2018, Select Attributes # for NHDPlus Version 2.1 Reach Catchments and Modified Network Routed @@ -218,12 +222,16 @@ get_sb_file("5dbc53d4e4b06957974eddae", "NHDPlusV21_NationalData_GageLoc_05.7z", get_sb_file("5c86a747e4b09388244b3da1", "CrosswalkTable_NHDplus_HU12_CSV.7z", nhdplus_dir) +if("CONUS" %in% domain | reg) { # will download the 7z and unzip into the folder structure in nhdplus_gdb path download_file(paste0(epa_data_root, "NationalData/NHDPlusV21_NationalData_Seamless_Geodatabase_Lower48_07.7z"), out_path = data_dir, check_path = nhdplus_gdb) +} +if("HI" %in% domain) { download_file(paste0(epa_data_root, "NationalData/NHDPlusV21_NationalData_Seamless_Geodatabase_HI_PR_VI_PI_03.7z"), out_path = islands_dir, check_path = islands_gdb) +} # cache the huc12 layer in rds format hu12_rds <- file.path(nhdplus_dir, "HUC12.rds") @@ -267,19 +275,25 @@ ref_cat <- file.path(ref_fab_path, "reference_catchments.gpkg") ref_fl <- file.path(ref_fab_path, "reference_flowline.gpkg") nwm_fl <- file.path(ref_fab_path, "nwm_network.gpkg") -for (vpu in c("01", "08", "10L", "15", "02", "04", "05", "06", "07", "09", +all_vpu <- c("01", "08", "10L", "15", "02", "04", "05", "06", "07", "09", "03S", "03W", "03N", "10U", "11", "12", "13", "14", "16", - "17", "18")) { - + "17", "18") + +if(!any(c("CONUS", "HI", "AK") %in% domain)) { + all_vpu <- all_vpu[grepl(substr(domain, 1, 2), all_vpu)] +} + +for (vpu in all_vpu) { get_sb_file("6317a72cd34e36012efa4e8a", paste0(vpu, "_reference_features.gpkg"), ref_fab_path) } +if("CONUS" %in% domain | reg) { get_sb_file("61295190d34e40dd9c06bcd7", c("reference_catchments.gpkg", "reference_flowline.gpkg", "nwm_network.gpkg"), out_destination = ref_fab_path) - +} out_list <- c(out_list, list(ref_fab_path = ref_fab_path, ref_cat = ref_cat, ref_fl = ref_fl, nwm_fl = nwm_fl)) @@ -294,11 +308,11 @@ when aggregating at points of interest. fullcat_path <- file.path(nhdplus_dir, "nhdcat_full.rds") islandcat_path <- file.path(islands_dir, "nhdcat_full.rds") -if(!file.exists(fullcat_path)) +if(!file.exists(fullcat_path) & ("CONUS" %in% domain | reg)) saveRDS(cat_rpu(out_list$ref_cat, nhdplus_gdb), fullcat_path) -if(!file.exists(islandcat_path)) +if(!file.exists(islandcat_path) & "HI" %in% domain) saveRDS(cat_rpu(out_list$islands_gdb, islands_gdb), islandcat_path) @@ -308,21 +322,18 @@ out_list <- c(out_list, list(fullcats_table = fullcat_path, islandcats_table = i Download NHDPlusV2 FDR and FAC grids for refactoring and catcment splitting. -```{r NHDPlusV2 FDR_FAC} +```{r NHDPlusV2 FDR_FAC, eval="CONUS" %in% domain | reg} # NHDPlus FDR/FAC grids available by raster processing unit -# TODO: set this up for a per-region download for #134 -out_list<- c(out_list, make_fdr_fac_list(file.path(data_dir, "fdrfac"))) +out_list<- c(out_list, make_fdr_fac_list(file.path(data_dir, "fdrfac"), region)) ``` Download NHDPlusV2 elevation grids for headwater extensions and splitting catchments into left and right banks. -```{r NHDPlusV2 elev} +```{r NHDPlusV2 elev, eval="CONUS" %in% domain | reg} # NHDPlus elev grids available by raster processing unit -# TODO: set this up for a per-region download for #134 -out_list<- c(out_list, make_nhdplus_elev_list(file.path(data_dir, "nhdplusv2_elev"))) - +out_list<- c(out_list, make_nhdplus_elev_list(file.path(data_dir, "nhdplusv2_elev"), region)) ``` Merrit Topographic and Hydrographic data for deriving GIS Features of the @@ -339,6 +350,8 @@ National Hydrologic Modeling, Alaska Domain merit_dir <- file.path(data_dir, "merged_AK_MERIT_Hydro") +if("AK" %in% domain) { + get_sb_file("5dbc53d4e4b06957974eddae", "merged_AK_MERIT_Hydro.zip", merit_dir) # TODO: update to use "6644f85ed34e1955f5a42dc4" when released (roughly Dec 10,) @@ -348,6 +361,8 @@ get_sb_file("64ff628ed34ed30c2057b430", c("ak_merit_dem.zip", "ak_merit_fdr.zip", "ak_merit_fac.zip"), merit_dir) +} + out_list <- c( out_list, list(merit_catchments = file.path(merit_dir, @@ -376,7 +391,9 @@ Alaska Domain AK_GF_source <- "ak.7z" AK_dir <- file.path(data_dir, "AK") +if("ak" %in% domain) { get_sb_file("5dbc53d4e4b06957974eddae", AK_GF_source, AK_dir) +} out_list <- c(out_list, list(ak_source = file.path(AK_dir, "ak.gpkg"))) @@ -391,7 +408,9 @@ Hawaii Domain # Hydrologic Modeling, Hawaii Domain: U.S. Geological Survey data release, # https://doi.org/10.5066/P9HMKOP8 +if("HI" %in% domain) { get_sb_file("5dbc53d4e4b06957974eddae", "hi.7z", islands_dir) +} out_list <- c(out_list, list(hi_source = file.path(islands_dir, "hi.gpkg"))) @@ -412,6 +431,7 @@ out <- list(GFv11_gages_lyr = file.path(data_dir, "GFv11/GFv11_gages.rds"), GFv11_gdb = file.path(GFv11_dir, "GFv1.1.gdb"), GFv11_tgf = file.path(GFv11_dir, "TGF.gdb")) +if("CONUS" %in% domain | reg) { get_sb_file("5e29d1a0e4b0a79317cf7f63", "GFv1.1.gdb.zip", GFv11_dir, check_path = out$GFv11_gdb) get_sb_file("5d967365e4b0c4f70d113923", "TGF.gdb.zip", GFv11_dir, check_path = out$GFv11_tgf) @@ -423,7 +443,7 @@ if(!file.exists(out$GFv11_gages_lyr)) { saveRDS(out$GFv11_gages_lyr) } - +} out_list <- c(out_list, out) if(mapview)(mapview(readRDS(out_list$GFv11_gages_lyr))) @@ -437,12 +457,14 @@ GAGESII dataset SWIM_points_path <- file.path(data_dir, "SWIM_gage_loc") +if("gages" %in% POI_types) { get_sb_file("631405bbd34e36012efa304a", "gagesII_9322_point_shapefile.zip", SWIM_points_path) +} out_list <- c(out_list, list( gagesii_lyr = file.path(SWIM_points_path, "gagesII_9322_point_shapefile"))) -if(mapview)(mapview(read_sf(out_list$gagesii_lyr))) +if(mapview) try(mapview(read_sf(out_list$gagesii_lyr))) ``` HILARRI dataset of Network-indexed Hydropower structures, reservoirs, and @@ -457,15 +479,17 @@ locations hilarri_dir <- file.path(data_dir, "HILARRI") hilarri_out <- list(hilarri_sites = file.path(hilarri_dir, "HILARRI_v2.csv")) +if("reservoirs" %in% POI_types) { download_file("https://hydrosource.ornl.gov/sites/default/files/2023-03/HILARRI_v2.zip", out_path = hilarri_dir, check_path = hilarri_out$hilari_sites) +} out_list <- c(out_list, hilarri_out) if(mapview) { - mapview(st_as_sf(read.csv(out_list$hilarri_sites), + try(mapview(st_as_sf(read.csv(out_list$hilarri_sites), coords = c("longitude", "latitude"), - crs = 4326)) + crs = 4326))) } ``` @@ -495,8 +519,9 @@ istarf_url <- "https://zenodo.org/record/4602277/files/ISTARF-CONUS.csv?download # Download GRanD zip from above GRanD_zip <- file.path(res_path, "GRanD_Version_1_3.zip") - +if("reservoirs" %in% POI_types) { download_file(res_att_url, res_path, file_name = "ResOpsUS.zip") +} out_list <- c(out_list, list(res_attributes = file.path(res_path, "ResOpsUS", "attributes", @@ -504,12 +529,15 @@ out_list <- c(out_list, istarf_csv <- file.path(res_path, "ISTARF-CONUS.csv") +if("reservoirs" %in% POI_types) { download_file(istarf_url, res_path, istarf_csv, file_name = "ISTARF-CONUS.csv") +} out_list <- c(out_list, list(istarf = istarf_csv)) grand_dir <- file.path(res_path, "GRanD_Version_1_3") +if("reservoirs" %in% POI_types) { # Extract GRanD data if(!dir.exists(grand_dir)) { if(!file.exists(GRanD_zip)) @@ -518,16 +546,18 @@ if(!dir.exists(grand_dir)) { unzip(GRanD_zip, exdir = res_path) } +} out_list <- c(out_list, list(GRanD = grand_dir)) resops_to_nid_path <- file.path(res_path, "cw_ResOpsUS_NID.csv") +istarf_xwalk_path <- file.path(res_path, "istarf_xwalk_final_48_gfv11.csv") +if("reservoirs" %in% POI_types) { get_sb_file("5dbc53d4e4b06957974eddae", "cw_ResOpsUS_NID.csv", dirname(resops_to_nid_path)) -istarf_xwalk_path <- file.path(res_path, "istarf_xwalk_final_48_gfv11.csv") - get_sb_file("5dbc53d4e4b06957974eddae", "istarf_xwalk_final_48_gfv11.csv", dirname(istarf_xwalk_path)) +} out_list <- c(out_list, list(resops_NID_CW = resops_to_nid_path, istarf_xwalk = istarf_xwalk_path)) @@ -541,7 +571,9 @@ All Hydro-linked Network Data Index (NLDI) datasets nldi_dir <- file.path(data_dir, "nldi") +if("gages" %in% POI_types) { get_sb_file("60c7b895d34e86b9389b2a6c", "all", nldi_dir) +} out_list <- c( out_list, diff --git a/workspace/R/00_get_data_functions.R b/workspace/R/00_get_data_functions.R index 81c4318fa0b0be1fba6ac01c721aeb9b7fdb1352..2bf98b0ad3d04b26d6a35a493a67d2d84259d572 100644 --- a/workspace/R/00_get_data_functions.R +++ b/workspace/R/00_get_data_functions.R @@ -78,8 +78,9 @@ download_file <- function(url, out_path, check_path = NULL, unzip = TRUE, file_n #' make flow direction and flow accumulation file list #' @param fdr_fac_dir directory where flow direction and flow accumulation are +#' @param hu2 two digit huc to pass to download_elev #' @return list containing all flow direction and flow accumulation files -make_fdr_fac_list <- function(fdr_fac_dir) { +make_fdr_fac_list <- function(fdr_fac_dir, hu2) { if(!dir.exists(fdr_fac_dir)) download_elev("FDRFAC", fdr_fac_dir) @@ -98,10 +99,10 @@ make_fdr_fac_list <- function(fdr_fac_dir) { out } -make_nhdplus_elev_list <- function(elev_dir) { +make_nhdplus_elev_list <- function(elev_dir, region, hu2) { if(!dir.exists(elev_dir)) - download_elev("DEM", elev_dir) + download_elev("DEM", elev_dir, hu2) dirs <- unique(dirname(list.files(elev_dir, recursive = TRUE, full.names = TRUE))) diff --git a/workspace/R/user_vars.R b/workspace/R/user_vars.R index 703236b6e0be51bb6f11ecaa0ac03f1667732e69..04c9f3dc64327bb69f1ff3ee2aea710f5c6f4d5e 100644 --- a/workspace/R/user_vars.R +++ b/workspace/R/user_vars.R @@ -1,9 +1,11 @@ if(Sys.getenv("sb_user") == "CHANGEME") stop("must set sb_user") # Domain to control what region data will be assembled for. -# NULL for all, can be a two or four digit HUC to limit some -# data downloads to a smaller domain. -domain = NULL +# Can be a single two or four digit HUC to limit some +# data downloads to a smaller domain. +# can "AK", "HI", or "CONUS" to run one of them +# HUC subsetting is only supported in the superCONUS domain +domain = c("AK", "HI", "CONUS") # what POI types to include POI_types <- c( # comment / uncomment to modify