diff --git a/workspace/00_get_data.Rmd b/workspace/00_get_data.Rmd index 3e53781db99391b4ee3ecd2654bb942fbba55134..d75412ef3799226920a7f3ab204245c1719f951f 100644 --- a/workspace/00_get_data.Rmd +++ b/workspace/00_get_data.Rmd @@ -21,6 +21,10 @@ source("R/00_get_data_functions.R") library(sbtools) library(jsonlite) +library(dplyr) +library(nhdplusTools) +library(sf) +library(hyRefactor) if(!dir.exists("data")) {dir.create("data")} if(!dir.exists("bin")) {dir.create("bin")} @@ -46,6 +50,10 @@ if(is(check_7z, "try-error")) { if(!sbtools::is_logged_in()) initialize_sciencebase_session(username = Sys.getenv("sb_user")) +region <- NULL +reg <- (length(domain) == 1 && grepl("^[0-1]", domain)) +if(reg) region <- substr(domain, 1, 2) + # Enable mapview rendering if desired mapview <- FALSE ``` @@ -59,15 +67,17 @@ national modeling fabrics. wbd_points_file <- "102020wbd_outlets.gpkg" +if("huc12" %in% POI_types) { get_sb_file(item = "63cb38b2d34e06fef14f40ad", item_files = wbd_points_file, out_destination = data_dir) +} out_list <- c( out_list, list(hu12_points_path = file.path(data_dir, wbd_points_file))) -if(mapview)(mapview(read_sf(out_list$hu12_points_path))) +if(mapview) try(mapview(read_sf(out_list$hu12_points_path))) ``` @@ -83,7 +93,7 @@ support of the second dataset of basin characteristics and disturbance indexes. # USGS streamgages in the conterminous United States indexed to NHDPlus v2.1 # flowlines to support Streamgage Watershed InforMation (SWIM), 2021: U.S. # Geological Survey data release, https://doi.org/10.5066/P9J5CK2Y. - +if("gages" %in% POI_types) { out_list <- c( out_list, list(SWIM_points_path = @@ -96,7 +106,8 @@ out_list <- c( out_list, list(SWIM_dbf = file.path(data_dir, "SWIMGFinfo.dbf"))) -if(mapview)(mapview(read_sf(out_list$SWIM_points_path))) +if(mapview) try(mapview(read_sf(out_list$SWIM_points_path))) +} ``` Sites associated with Work by the U.S. Geological Survey (USGS) to estimate @@ -117,7 +128,7 @@ Galanter and othes, 2023). # 2008-2020 period by power plant, month, and year for the conterminous # United States: U.S. Geological Survey data release, # https://doi.org/10.5066/P9ZE2FVM. - +if("thermo_electric" %in% POI_types) { TE_points_path <- file.path(data_dir, "TE_points") dir.create(TE_points_path, recursive = TRUE, showWarnings = FALSE) @@ -132,7 +143,8 @@ get_sb_file("63adc826d34e92aad3ca5af4", out_list <- c(out_list, list(TE_points_path = TE_points_path)) -if(mapview)(mapview(read_sf(out_list$TE_points_path))) +if(mapview) try(mapview(read_sf(out_list$TE_points_path))) +} ``` Network locations made to improve the routing capabilities @@ -145,13 +157,14 @@ water use withdrawals. # Schwarz, G.E., 2019, E2NHDPlusV2_us: Database of Ancillary Hydrologic # Attributes and Modified Routing for NHDPlus Version 2.1 Flowlines: U.S. # Geological Survey data release, https://doi.org/10.5066/P986KZEM. - +if("addition_removal" %in% POI_types) { out_list <- c( out_list, list(USGS_IT_path = get_sb_file("5d16509ee4b0941bde5d8ffe", "supplemental_files.zip", file.path(data_dir, "USGS_IT")))) +} ``` Two datasets relate hydro location information from the National Inventory of @@ -171,6 +184,7 @@ natural flow (Wieczorek and others, 2021). # impact/disturbance metrics for the conterminous United States, 1800 to 2018: # U.S. Geological Survey data release, https://doi.org/10.5066/P92S9ZX6. +if("dams" %in% POI_types) { NID_points_path <- file.path(data_dir, "NID_points") get_sb_file("5dbc53d4e4b06957974eddae", @@ -183,7 +197,8 @@ get_sb_file("5fb7e483d34eb413d5e14873", out_list <- c(out_list, list(NID_points_path = NID_points_path)) -if(mapview)(mapview(read_sf(out_list$NID_points_path))) +if(mapview) try(mapview(read_sf(out_list$NID_points_path))) +} ``` This next section retrieves NHDPlus datasets related to national modeling @@ -218,22 +233,15 @@ get_sb_file("5dbc53d4e4b06957974eddae", "NHDPlusV21_NationalData_GageLoc_05.7z", get_sb_file("5c86a747e4b09388244b3da1", "CrosswalkTable_NHDplus_HU12_CSV.7z", nhdplus_dir) +if("CONUS" %in% domain | reg) { # will download the 7z and unzip into the folder structure in nhdplus_gdb path download_file(paste0(epa_data_root, "NationalData/NHDPlusV21_NationalData_Seamless_Geodatabase_Lower48_07.7z"), out_path = data_dir, check_path = nhdplus_gdb) +} +if("HI" %in% domain) { download_file(paste0(epa_data_root, "NationalData/NHDPlusV21_NationalData_Seamless_Geodatabase_HI_PR_VI_PI_03.7z"), out_path = islands_dir, check_path = islands_gdb) - -# cache the huc12 layer in rds format -hu12_rds <- file.path(nhdplus_dir, "HUC12.rds") - -if(!file.exists(hu12_rds)) { - read_sf(nhdplus_gdb, layer = "HUC12") |> - st_make_valid() |> - st_transform(crs = proj_crs) |> - # TODO: convert this to gpkg - saveRDS(file = hu12_rds) } get_sb_file("5dcd5f96e4b069579760aedb", "GageLocGFinfo.dbf", data_dir) @@ -267,19 +275,25 @@ ref_cat <- file.path(ref_fab_path, "reference_catchments.gpkg") ref_fl <- file.path(ref_fab_path, "reference_flowline.gpkg") nwm_fl <- file.path(ref_fab_path, "nwm_network.gpkg") -for (vpu in c("01", "08", "10L", "15", "02", "04", "05", "06", "07", "09", +all_vpu <- c("01", "08", "10L", "15", "02", "04", "05", "06", "07", "09", "03S", "03W", "03N", "10U", "11", "12", "13", "14", "16", - "17", "18")) { - + "17", "18") + +if(!any(c("CONUS", "HI", "AK") %in% domain)) { + all_vpu <- all_vpu[grepl(substr(domain, 1, 2), all_vpu)] +} + +for (vpu in all_vpu) { get_sb_file("6317a72cd34e36012efa4e8a", paste0(vpu, "_reference_features.gpkg"), ref_fab_path) } +if("CONUS" %in% domain | reg) { get_sb_file("61295190d34e40dd9c06bcd7", c("reference_catchments.gpkg", "reference_flowline.gpkg", "nwm_network.gpkg"), out_destination = ref_fab_path) - +} out_list <- c(out_list, list(ref_fab_path = ref_fab_path, ref_cat = ref_cat, ref_fl = ref_fl, nwm_fl = nwm_fl)) @@ -294,11 +308,11 @@ when aggregating at points of interest. fullcat_path <- file.path(nhdplus_dir, "nhdcat_full.rds") islandcat_path <- file.path(islands_dir, "nhdcat_full.rds") -if(!file.exists(fullcat_path)) +if(!file.exists(fullcat_path) & ("CONUS" %in% domain | reg)) saveRDS(cat_rpu(out_list$ref_cat, nhdplus_gdb), fullcat_path) -if(!file.exists(islandcat_path)) +if(!file.exists(islandcat_path) & "HI" %in% domain) saveRDS(cat_rpu(out_list$islands_gdb, islands_gdb), islandcat_path) @@ -309,9 +323,10 @@ out_list <- c(out_list, list(fullcats_table = fullcat_path, islandcats_table = i Download NHDPlusV2 FDR and FAC grids for refactoring and catcment splitting. ```{r NHDPlusV2 FDR_FAC} +if("CONUS" %in% domain | reg) { # NHDPlus FDR/FAC grids available by raster processing unit -# TODO: set this up for a per-region download for #134 -out_list<- c(out_list, make_fdr_fac_list(file.path(data_dir, "fdrfac"))) +out_list<- c(out_list, make_fdr_fac_list(file.path(data_dir, "fdrfac"), region)) +} ``` @@ -319,10 +334,10 @@ Download NHDPlusV2 elevation grids for headwater extensions and splitting catchments into left and right banks. ```{r NHDPlusV2 elev} +if("CONUS" %in% domain | reg) { # NHDPlus elev grids available by raster processing unit -# TODO: set this up for a per-region download for #134 -out_list<- c(out_list, make_nhdplus_elev_list(file.path(data_dir, "nhdplusv2_elev"))) - +out_list<- c(out_list, make_nhdplus_elev_list(file.path(data_dir, "nhdplusv2_elev"), region)) +} ``` Merrit Topographic and Hydrographic data for deriving GIS Features of the @@ -339,6 +354,8 @@ National Hydrologic Modeling, Alaska Domain merit_dir <- file.path(data_dir, "merged_AK_MERIT_Hydro") +if("AK" %in% domain) { + get_sb_file("5dbc53d4e4b06957974eddae", "merged_AK_MERIT_Hydro.zip", merit_dir) # TODO: update to use "6644f85ed34e1955f5a42dc4" when released (roughly Dec 10,) @@ -348,6 +365,8 @@ get_sb_file("64ff628ed34ed30c2057b430", c("ak_merit_dem.zip", "ak_merit_fdr.zip", "ak_merit_fac.zip"), merit_dir) +} + out_list <- c( out_list, list(merit_catchments = file.path(merit_dir, @@ -376,7 +395,9 @@ Alaska Domain AK_GF_source <- "ak.7z" AK_dir <- file.path(data_dir, "AK") +if("ak" %in% domain) { get_sb_file("5dbc53d4e4b06957974eddae", AK_GF_source, AK_dir) +} out_list <- c(out_list, list(ak_source = file.path(AK_dir, "ak.gpkg"))) @@ -391,7 +412,9 @@ Hawaii Domain # Hydrologic Modeling, Hawaii Domain: U.S. Geological Survey data release, # https://doi.org/10.5066/P9HMKOP8 +if("HI" %in% domain) { get_sb_file("5dbc53d4e4b06957974eddae", "hi.7z", islands_dir) +} out_list <- c(out_list, list(hi_source = file.path(islands_dir, "hi.gpkg"))) @@ -412,6 +435,7 @@ out <- list(GFv11_gages_lyr = file.path(data_dir, "GFv11/GFv11_gages.rds"), GFv11_gdb = file.path(GFv11_dir, "GFv1.1.gdb"), GFv11_tgf = file.path(GFv11_dir, "TGF.gdb")) +if("CONUS" %in% domain | reg) { get_sb_file("5e29d1a0e4b0a79317cf7f63", "GFv1.1.gdb.zip", GFv11_dir, check_path = out$GFv11_gdb) get_sb_file("5d967365e4b0c4f70d113923", "TGF.gdb.zip", GFv11_dir, check_path = out$GFv11_tgf) @@ -423,7 +447,7 @@ if(!file.exists(out$GFv11_gages_lyr)) { saveRDS(out$GFv11_gages_lyr) } - +} out_list <- c(out_list, out) if(mapview)(mapview(readRDS(out_list$GFv11_gages_lyr))) @@ -437,12 +461,14 @@ GAGESII dataset SWIM_points_path <- file.path(data_dir, "SWIM_gage_loc") +if("gages" %in% POI_types) { get_sb_file("631405bbd34e36012efa304a", "gagesII_9322_point_shapefile.zip", SWIM_points_path) +} out_list <- c(out_list, list( gagesii_lyr = file.path(SWIM_points_path, "gagesII_9322_point_shapefile"))) -if(mapview)(mapview(read_sf(out_list$gagesii_lyr))) +if(mapview) try(mapview(read_sf(out_list$gagesii_lyr))) ``` HILARRI dataset of Network-indexed Hydropower structures, reservoirs, and @@ -457,15 +483,20 @@ locations hilarri_dir <- file.path(data_dir, "HILARRI") hilarri_out <- list(hilarri_sites = file.path(hilarri_dir, "HILARRI_v2.csv")) -download_file("https://hydrosource.ornl.gov/sites/default/files/2023-03/HILARRI_v2.zip", - out_path = hilarri_dir, check_path = hilarri_out$hilari_sites) +if("reservoirs" %in% POI_types) { + # now behind user tracking +# download_file("https://hydrosource.ornl.gov/sites/default/files/2023-03/HILARRI_v2.zip", +# out_path = hilarri_dir, check_path = hilarri_out$hilari_sites) + + get_sb_file("5dbc53d4e4b06957974eddae", "HILARRI_v2.zip", hilarri_dir, check_path = hilarri_out$hilarri_sites) +} out_list <- c(out_list, hilarri_out) if(mapview) { - mapview(st_as_sf(read.csv(out_list$hilarri_sites), + try(mapview(st_as_sf(read.csv(out_list$hilarri_sites), coords = c("longitude", "latitude"), - crs = 4326)) + crs = 4326))) } ``` @@ -495,8 +526,9 @@ istarf_url <- "https://zenodo.org/record/4602277/files/ISTARF-CONUS.csv?download # Download GRanD zip from above GRanD_zip <- file.path(res_path, "GRanD_Version_1_3.zip") - +if("reservoirs" %in% POI_types) { download_file(res_att_url, res_path, file_name = "ResOpsUS.zip") +} out_list <- c(out_list, list(res_attributes = file.path(res_path, "ResOpsUS", "attributes", @@ -504,12 +536,15 @@ out_list <- c(out_list, istarf_csv <- file.path(res_path, "ISTARF-CONUS.csv") +if("reservoirs" %in% POI_types) { download_file(istarf_url, res_path, istarf_csv, file_name = "ISTARF-CONUS.csv") +} out_list <- c(out_list, list(istarf = istarf_csv)) grand_dir <- file.path(res_path, "GRanD_Version_1_3") +if("reservoirs" %in% POI_types) { # Extract GRanD data if(!dir.exists(grand_dir)) { if(!file.exists(GRanD_zip)) @@ -518,16 +553,18 @@ if(!dir.exists(grand_dir)) { unzip(GRanD_zip, exdir = res_path) } +} out_list <- c(out_list, list(GRanD = grand_dir)) resops_to_nid_path <- file.path(res_path, "cw_ResOpsUS_NID.csv") +istarf_xwalk_path <- file.path(res_path, "istarf_xwalk_final_48_gfv11.csv") +if("reservoirs" %in% POI_types) { get_sb_file("5dbc53d4e4b06957974eddae", "cw_ResOpsUS_NID.csv", dirname(resops_to_nid_path)) -istarf_xwalk_path <- file.path(res_path, "istarf_xwalk_final_48_gfv11.csv") - get_sb_file("5dbc53d4e4b06957974eddae", "istarf_xwalk_final_48_gfv11.csv", dirname(istarf_xwalk_path)) +} out_list <- c(out_list, list(resops_NID_CW = resops_to_nid_path, istarf_xwalk = istarf_xwalk_path)) @@ -541,7 +578,9 @@ All Hydro-linked Network Data Index (NLDI) datasets nldi_dir <- file.path(data_dir, "nldi") +if("gages" %in% POI_types) { get_sb_file("60c7b895d34e86b9389b2a6c", "all", nldi_dir) +} out_list <- c( out_list, diff --git a/workspace/R/00_get_data_functions.R b/workspace/R/00_get_data_functions.R index 81c4318fa0b0be1fba6ac01c721aeb9b7fdb1352..113beed4e683be19b906ac867b6e21a870766b97 100644 --- a/workspace/R/00_get_data_functions.R +++ b/workspace/R/00_get_data_functions.R @@ -6,7 +6,7 @@ #' @param check_path character path to check, if it exists, it will be returned invisibly #' @return path that data was saved to get_sb_file <- function(item, item_files, out_destination, unzip = TRUE, check_path = NULL) { - + if(!is.null(check_path) && file.exists(check_path)) return(invisible(check_path)) check_auth() @@ -26,13 +26,24 @@ get_sb_file <- function(item, item_files, out_destination, unzip = TRUE, check_p names = basename(missing), destinations = missing) } + + if(!is.null(check_path) && !file.exists(check_path) && unzip) { + missing <- out_files[grepl("7z$|zip$", out_files)] + warning("check file missing but zip file exists, trying to unzip again") + } if(unzip) { un7zip_fs <- missing[grepl("7z$|zip$", missing)] for(f in un7zip_fs) { - system(paste0(sevenz, " e -o", out_destination, " ", f)) + + out_destination_use <- out_destination + if(!is.null(check_path) && grepl(".gdb.zip", f)) { + out_destination_use <- check_path + } + + system(paste0(sevenz, " e -o", out_destination_use, " ", f)) } } @@ -78,10 +89,11 @@ download_file <- function(url, out_path, check_path = NULL, unzip = TRUE, file_n #' make flow direction and flow accumulation file list #' @param fdr_fac_dir directory where flow direction and flow accumulation are +#' @param hu2 two digit huc to pass to download_elev #' @return list containing all flow direction and flow accumulation files -make_fdr_fac_list <- function(fdr_fac_dir) { +make_fdr_fac_list <- function(fdr_fac_dir, hu2) { if(!dir.exists(fdr_fac_dir)) - download_elev("FDRFAC", fdr_fac_dir) + download_elev("FDRFAC", fdr_fac_dir, hu2) dirs <- unique(dirname(list.files(fdr_fac_dir, recursive = TRUE, full.names = TRUE))) @@ -98,10 +110,10 @@ make_fdr_fac_list <- function(fdr_fac_dir) { out } -make_nhdplus_elev_list <- function(elev_dir) { - +make_nhdplus_elev_list <- function(elev_dir, hu2) { + if(!dir.exists(elev_dir)) - download_elev("DEM", elev_dir) + download_elev("DEM", elev_dir, hu2) dirs <- unique(dirname(list.files(elev_dir, recursive = TRUE, full.names = TRUE))) diff --git a/workspace/R/01_prep_functions.R b/workspace/R/01_prep_functions.R index a5288ec064c7c20980f92840d4086b97f41e2e46..32223b6f856157b8da4a8cf7afbb590937f7ca3c 100644 --- a/workspace/R/01_prep_functions.R +++ b/workspace/R/01_prep_functions.R @@ -81,6 +81,8 @@ prepare_SWIM <- function(SWIM_dbf_file, SWIM_points_file, ref_gages, write.csv(potCanalsIII, "temp/GagestoCheck_GAGESIII.csv") } + sqkm_sqmi <- .386 + # Subset further by streamgages below the minimum drainage area critera gagesIII_db_fin <- gagesIII_db %>% filter(!site_no %in% potCanalsIII$site_no, drain_area > (min_da_km_gages * sqkm_sqmi)) @@ -179,6 +181,8 @@ prepare_gage_location <- function(NHDPlusV2_gageloc_file, NHDPlusV2_gdb, potCanals_gageloc <- read.csv("temp/GagestoCheck_GageLoc.csv") } + sqkm_sqmi <- .386 + gageloc_db_fin <- gageloc_db %>% filter(!site_no %in% potCanals_gageloc$site_no, drain_area > (min_da_km_gages * sqkm_sqmi)) diff --git a/workspace/R/05_non_dendritic_functions.R b/workspace/R/05_non_dendritic_functions.R index fce667c96ab448bf6cf14dc18f9845d7eff9efa9..e759e33f4c116cf9d1b4aab2c861d04cd10c8654 100644 --- a/workspace/R/05_non_dendritic_functions.R +++ b/workspace/R/05_non_dendritic_functions.R @@ -565,7 +565,10 @@ dissolve_holes <- function(divides_poi){ #' @param xwalk_nhd_wbd xwalk_layer from huc12 crosswalk step #' @param vpu_nhd vpu_nhd from vpu_attributes step #' @param debug logical whether to output proto HRUs -assign_huc12_fun <- function(divides_lu, xwalk_nhd_wbd, vpu_nhd, debug = FALSE) { +#' @param CAC_thresh coefficient of areal coorespondence -- minimum measure of common +#' footprint between two features taking into acount +#' the area of each feature in proporation to overlap area +assign_huc12_fun <- function(divides_lu, xwalk_nhd_wbd, vpu_nhd, debug = FALSE, CAC_thresh = 0.66) { print(paste0("Currently there are ", sum(is.na(divides_lu$POI_ID)), " cats without a POI_ID")) diff --git a/workspace/R/user_vars.R b/workspace/R/user_vars.R index 980c4ffd0bbca918465f872daae640227d1c2abc..3bc64a23fd1dc35e8f4ce7f53c95ee031950ab2b 100644 --- a/workspace/R/user_vars.R +++ b/workspace/R/user_vars.R @@ -1,20 +1,25 @@ if(Sys.getenv("sb_user") == "CHANGEME") stop("must set sb_user") -# Path mods -path_mod <- FALSE -if (path_mod){ - # 7-zip - old_path <- Sys.getenv("PATH") - Sys.setenv(PATH = paste(old_path, "C:\\Program Files\\7-zip", sep = ";")) - - # Mapshaper - old_path <- Sys.getenv("PATH") - Sys.setenv(PATH = paste(old_path, paste0(Sys.getenv("USERPROFILE"), "AppData\\Roaming\\npm"), sep = ";")) - - # Whitebox Tools - library(whitebox) - wbt_init(exe_path = paste0(Sys.getenv("USERPROFILE"), "AppData\\Roaming\\R\\data\\R\\whitebox\\WBT\\whitebox_tools.exe")) -} +# Domain to control what region data will be assembled for. +# Can be a single two or four digit HUC to limit some +# data downloads to a smaller domain. +# can "AK", "HI", or "CONUS" to run one of them +# HUC subsetting is only supported in the superCONUS domain +# domain = c("AK", "HI", "CONUS") +domain <- "17" + +# what POI types to include +POI_types <- c( # comment / uncomment to modify + "huc12", # a relatively uniform distribution of outlets used by IWAAs + "gages", # can be configured with prep_targets + "thermo_electric", # thermo electric power plants + "reservoirs", # reservoirs where operations data are known + "dams", # National Inventory of Dams locations + "waterbodies", # inlets and outlets of waterbodies + "addition_removal", # "AR-events" from E2NHD + "elevation_breaks", # configured in this file + "time_of_travel", # configured in this file + "") # leave this line alone # CRS for projected and geographic reference systems geo_crs <- 4326 @@ -28,31 +33,52 @@ POR_Start <- as.Date("1979-10-01") # inclusion into POI selection (non-inclusive of start date) min_obs <- 90 -# POI thresholds +############################################ +#### Variables used during POI creation #### +############################################ + # Minimum drainage area for gages (square kilometers) min_da_km_gages <- min_da_km <- 5 -sqkm_sqmi <- .386 + +# Minimum drainage area for inclusion of network outlets at the coast. min_da_km_terminal <- 10 -# Variables used during 02_Navigate -wb_area_thresh <- 1 # Waterbody size for inclusion as POI + +# Waterbody size for inclusion as POI +wb_area_thresh <- 1 + # minimum/max drainage area for headwaters(square kilometers) min_da_km_hw <- 40 max_da_km_hw <- 60 + # Max difference in elevation within a single segment/POI (centimeters) elev_diff <- 50000 + # Max number of travel time hours between adjacent POIS travt_diff <- 24 + # Maximum total drainage area to consider a travel time break max_elev_TT_DA <- 10 + # maximum proportional drainage area difference to collapse adjacent POIs poi_dar_move <- .05 poi_distance_move <- 2.5 +# If the measure is within this percent of the end of a reach, don't +# add a "event" POI +reach_meas_thresh <- 10 + +######################################## +#### Settings for refactor workflow #### +######################################## -# Settings for refactor workflow +# If catchments are longer than this, split them split_meters <- 10000 + +# If catchments are short than this, combine them combine_meters <- 1000 -reach_meas_thresh <- 10 + +# Target smallest catchment size for aggregation. +# (this is a fall back only used if POIs are added to enforce network validity) aggregate_da_thresh_sqkm <- 5 # parallel factor for catchment reconciliation. @@ -60,5 +86,22 @@ para_reconcile <- 2 para_split_flines <- 2 keep_cat_points <- 1 -# Variables used during 07-2_NonDend -CAC_thresh <- 0.66 # Coefficient of areal correspondence threshold +# Coefficient of areal correspondence threshold for +# coorespondence with HUC12 in internally drained area checks +CAC_thresh <- 0.66 + +# Path mods +path_mod <- FALSE +if (path_mod){ + # 7-zip + old_path <- Sys.getenv("PATH") + Sys.setenv(PATH = paste(old_path, "C:\\Program Files\\7-zip", sep = ";")) + + # Mapshaper + old_path <- Sys.getenv("PATH") + Sys.setenv(PATH = paste(old_path, paste0(Sys.getenv("USERPROFILE"), "AppData\\Roaming\\npm"), sep = ";")) + + # Whitebox Tools + library(whitebox) + wbt_init(exe_path = paste0(Sys.getenv("USERPROFILE"), "AppData\\Roaming\\R\\data\\R\\whitebox\\WBT\\whitebox_tools.exe")) +} \ No newline at end of file diff --git a/workspace/_targets/01_prep_targets.R b/workspace/_targets/01_prep_targets.R index 5c93112e679498ae93cfb4d8986027bb83f1132b..7d1eab4c4dcf1195f507c1f3b88e15288e3b94c5 100644 --- a/workspace/_targets/01_prep_targets.R +++ b/workspace/_targets/01_prep_targets.R @@ -69,8 +69,12 @@ list( tar_target(rpu_vpu, read.csv(system.file("extdata/rpu_vpu.csv", package = "hyfabric"), colClasses = "character")), - tar_target(vpu_codes, unique(rpu_vpu$vpuid)), - tar_target(all_rpu_codes, unique(rpu_vpu$rpuid)), + tar_target(vpu_codes, if(length(domain) == 1 && grepl("^[0-1]", domain)) + unique(rpu_vpu$vpuid[grepl(domain, rpu_vpu$vpuid)]) else + unique(rpu_vpu$vpuid)), + tar_target(rpu_codes, if(length(domain) == 1 && grepl("^[0-1]", domain)) + unique(rpu_vpu$rpuid[grepl(domain, rpu_vpu$rpuid)]) else + unique(rpu_vpu$rpuid)), tar_target(full_cat_file, data_paths$fullcats_table, format = "file"), tar_target(full_cat_table, readRDS(full_cat_file)), diff --git a/workspace/_targets/03_refactor_targets.R b/workspace/_targets/03_refactor_targets.R index 6c459e4f900b601693a2d8cb6f4ff5c67bb23288..8c334f54614a81c74f1fde5061d9d652e588206b 100644 --- a/workspace/_targets/03_refactor_targets.R +++ b/workspace/_targets/03_refactor_targets.R @@ -16,7 +16,7 @@ prep_path <- ".targets_stores/01_prep/objects/" list(tar_target(data_paths_file, "cache/data_paths.json", format = "file"), tar_target(data_paths, jsonlite::read_json(data_paths_file)), - tar_target(rpu_codes_file, file.path(prep_path, "all_rpu_codes"), format = "file"), + tar_target(rpu_codes_file, file.path(prep_path, "rpu_codes"), format = "file"), tar_target(rpu_codes, readRDS(rpu_codes_file)), tar_target(rpu_vpu_file, file.path(prep_path, "rpu_vpu"), format = "file"), tar_target(rpu_vpu, readRDS(rpu_vpu_file)), diff --git a/workspace/_targets/04_aggregate_targets.R b/workspace/_targets/04_aggregate_targets.R index 6374f23689cea8003a014d24c46acacc3ea8146f..f20b4399ad0ac7972384f2b47a0db59ba3019d1d 100644 --- a/workspace/_targets/04_aggregate_targets.R +++ b/workspace/_targets/04_aggregate_targets.R @@ -15,7 +15,7 @@ source("R/04_aggregate_functions.R") prep_path <- ".targets_stores/01_prep/objects/" -list(tar_target(rpu_codes_file, file.path(prep_path, "all_rpu_codes"), format = "file"), +list(tar_target(rpu_codes_file, file.path(prep_path, "rpu_codes"), format = "file"), tar_target(rpu_codes, readRDS(rpu_codes_file)), tar_target(rpu_gpkg, file.path("cache", paste0(rpu_codes, "_refactor.gpkg")), pattern = map(rpu_codes), deployment = "worker"), diff --git a/workspace/_targets_runner.R b/workspace/_targets_runner.R index f0786c2497258098741820c7f255c1c76f3758a1..594af6a03d708c86bb029045ba5065efebe364d2 100644 --- a/workspace/_targets_runner.R +++ b/workspace/_targets_runner.R @@ -12,12 +12,18 @@ file.remove("temp/GagestoCheck_GageLoc.csv") Sys.setenv(TAR_PROJECT = "01_prep") # step 1: gage selection +# do not run if gages aren't of interest tar_make(gage_selection_map_report) # step 2: vpu prep -# iterates over vpus +# the vpu_codes target controls how many VPUs worth of data are prepared +# set the "domain" variable to an HU02 in "R/user_vars.R" to restrict +tar_make(vpu_codes) tar_load(vpu_codes) +tar_make(rpu_codes) + +########## Debug help ################ if(FALSE) { # this won't run if you just bang through this file # To debug a given vpu, first run all to get the dynamic branches established: tar_make(vpu_base) @@ -37,7 +43,9 @@ if(FALSE) { # this won't run if you just bang through this file tar_make(callr_function = NULL) } +########## Debug help ################ +# if running many processing units, parallelization is possible workers <- 12 # run branches for a given target in parallel if you have enough memory @@ -48,8 +56,20 @@ tar_make(rpu_vpu_out) # make sure to run all too! tar_make() +# see what you've done with: +tar_visnetwork(targets_only = TRUE) +# and +View(tar_meta()) + Sys.setenv(TAR_PROJECT = "02_POI_creation") +tar_make(vpu_gpkg) + +# can see what we are going to run through +# the POI creation workflow loops over these +tar_read(vpu_gpkg) + +# if running many, each can be run independently. tar_make_future(huc12_poi, workers = workers) tar_make_future(gage_pois, workers = workers) tar_make_future(te_pois, workers = workers) @@ -72,20 +92,50 @@ tar_make_future(poi_lookup, workers = workers) tar_make_future(draft_segments, workers = workers) tar_make_future(write_poi_, workers = workers) +# Otherwise, just run all with +tar_make() + Sys.setenv(TAR_PROJECT = "03_refactor") +# iterate of "raster processing units" +tar_make(rpu_gpkg) +tar_make(vpu_gpkg) # need to track the vpu for each + +tar_read(rpu_gpkg) +tar_read(vpu_gpkg) + workers = 6 +# now make all the rpus that we need tar_make_future(workers = workers) Sys.setenv(TAR_PROJECT = "04_aggregate") +# iterate of "raster processing units" +tar_make(rpu_gpkg) +tar_make(agg_gpkg) # need to track the vpu for each + +tar_read(rpu_gpkg) +tar_read(agg_gpkg) # output to "aggregate" rpu gpkgs + workers = 6 tar_make_future(workers = workers) Sys.setenv(TAR_PROJECT = "05_non_dendritic") +# now we combine raster units back to vector processing units +tar_make(vpu_gpkg) +tar_read(vpu_gpkg) + +# outputs will be written to a refactor and final "GF" gpkg +tar_make(rfc_gpkg) +tar_read(rfc_gpkg) +tar_make(gf_gpkg) +tar_read(gf_gpkg) + +tar_make() + workers = 6 tar_make_future(workers = workers)