From 036f378346c9b84886fb10f8b0793311325aa2f3 Mon Sep 17 00:00:00 2001
From: David Blodgett <dblodgett@usgs.gov>
Date: Fri, 29 Nov 2024 10:27:44 -0600
Subject: [PATCH] combined waterbodies to 01_prep and waterbodies for poi
 creation

---
 workspace/00_get_data.Rmd                    | 37 +++-----------------
 workspace/R/01_prep_functions.R              | 21 +++++++++++
 workspace/R/02_POI_creation_functions.R      | 32 +++++++++++++++++
 workspace/_targets/01_prep_targets.R         |  1 +
 workspace/_targets/02_POI_creation_targets.R |  8 ++++-
 workspace/cache/data_paths.json              |  1 -
 workspace/workflow_runner.R                  |  5 +--
 7 files changed, 68 insertions(+), 37 deletions(-)

diff --git a/workspace/00_get_data.Rmd b/workspace/00_get_data.Rmd
index 4dfa8e3..b578763 100644
--- a/workspace/00_get_data.Rmd
+++ b/workspace/00_get_data.Rmd
@@ -17,7 +17,10 @@ are made to the output of this notebook, they should be checked in.
 source("R/utils.R")
 source("R/config.R")
 source("R/user_vars.R")
-source("R/1_get_data.R")
+source("R/00_get_data_functions.R")
+
+library(sbtools)
+library(jsonlite)
 
 if(!dir.exists("data")) {dir.create("data")}
 if(!dir.exists("bin")) {dir.create("bin")}
@@ -281,38 +284,6 @@ out_list <- c(out_list, list(ref_fab_path = ref_fab_path,
                              ref_cat = ref_cat, ref_fl = ref_fl, nwm_fl = nwm_fl))
 ```
 
-NHDPlus Waterbody and Area Polygons converted to an RDS file for easier 
-loading within R.
-
-```{r NHDPlusV2 Waterbodies}
-#  Waterbodies - derived after downloading and post-processing 
-#  NHDPlus Seamless National Geodatabase
-#  Compacted here into a GDB
-waterbodies_path <- file.path(nhdplus_dir, "nhdplus_waterbodies.rds")
-
-if(!file.exists(waterbodies_path)) {
-  message("formatting NHDPlus watebodies...")
-  
-  data.table::rbindlist(
-    list(
-    
-    read_sf(out_list$nhdplus_gdb, "NHDWaterbody") |>
-      st_transform(proj_crs) |>
-      mutate(layer = "NHDWaterbody"), 
-    
-    read_sf(out_list$nhdplus_gdb, "NHDArea") |>
-      st_transform(proj_crs) |>
-      mutate(layer = "NHDArea")
-    
-    ), fill = TRUE) |>
-    st_as_sf() |>
-    saveRDS(waterbodies_path)
-  
-}
-
-out_list <- c(out_list, list(waterbodies_path = waterbodies_path))
-```
-
 Formatting a full list of network and non-network catchments for the NHDPlus
 domains.  This more easily tracks catchments were are off and on the network
 when aggregating at points of interest.
diff --git a/workspace/R/01_prep_functions.R b/workspace/R/01_prep_functions.R
index 06c4091..a5288ec 100644
--- a/workspace/R/01_prep_functions.R
+++ b/workspace/R/01_prep_functions.R
@@ -555,3 +555,24 @@ prep_rpu_vpu_out <- function(rpu_vpu_out_list, fline) {
   out_rpu_vpu
   
 }
+
+#' make combined waterbodies
+#' @param nhdplus_gdb NHDPlusV2 Geodatabase path
+#' @param proj_crs projected crs to use
+#' @return sf data.frame combining the NHDWaterbody and NHDArea layers
+#' 
+make_combined_waterbodies <- function(nhdplus_gdb, proj_crs) {
+  data.table::rbindlist(
+    list(
+      
+      read_sf(nhdplus_gdb, "NHDWaterbody") |>
+        st_transform(proj_crs) |>
+        mutate(layer = "NHDWaterbody"), 
+      
+      read_sf(nhdplus_gdb, "NHDArea") |>
+        st_transform(proj_crs) |>
+        mutate(layer = "NHDArea")
+      
+    ), fill = TRUE) |>
+    st_as_sf()
+}
diff --git a/workspace/R/02_POI_creation_functions.R b/workspace/R/02_POI_creation_functions.R
index 853779c..d8d6bc1 100644
--- a/workspace/R/02_POI_creation_functions.R
+++ b/workspace/R/02_POI_creation_functions.R
@@ -183,4 +183,36 @@ create_thermo_electric_pois <- function(te_points, te_attributes, tmp_POIs, HUC1
   }
   
   list(tmp_POIs = tmp_POIs, unassigned_plants = unassigned_plants)
+}
+
+#' make waterbodies layer
+#' @param combined_waterbodies nhdplus area and waterbody layers combined together
+#' @param 
+make_waterbodies_layer <- function(combined_waterbodies, flowline, geo_crs = 4326) {
+  
+  # Waterbodies sourced from NHD waterbody layer for entire VPU
+  WBs_VPU_all <- filter(combined_waterbodies, 
+                        COMID %in% flowline$WBAREACOMI) %>%
+    filter(FTYPE != "SwampMarsh") %>%
+    mutate(FTYPE = as.character(FTYPE),
+           source = "NHDv2WB",
+           wb_id = ifelse(GNIS_ID == " ", COMID, GNIS_ID)) %>%
+    st_transform(geo_crs) %>%
+    st_make_valid()
+  
+  old_sf_use_s2 <- sf_use_s2(FALSE)
+  
+  ref_WB <- WBs_VPU_all %>%
+    group_by(wb_id, GNIS_NAME) %>%
+    sfheaders::sf_remove_holes() %>%
+    summarize(do_union = TRUE, member_comid = paste(COMID, collapse = ",")) %>%
+    st_make_valid() %>%
+    ungroup() %>%
+    mutate(area_sqkm = as.numeric(st_area(.)) / 1000000) %>%
+    mutate(source = "NHDv2WB") %>%
+    st_cast("MULTIPOLYGON")
+  
+  sf_use_s2(old_sf_use_s2)
+  
+  list(wbs_draft = WBs_VPU_all, WBs_layer_orig = ref_WB)
 }
\ No newline at end of file
diff --git a/workspace/_targets/01_prep_targets.R b/workspace/_targets/01_prep_targets.R
index db939ca..5c93112 100644
--- a/workspace/_targets/01_prep_targets.R
+++ b/workspace/_targets/01_prep_targets.R
@@ -47,6 +47,7 @@ list(
                                                     usgs_ref_gages,
                                                     min_da_km_gages)),
   ###
+  tar_target(combined_waterbodies, make_combined_waterbodies(NHDPlusV2_gdb, proj_crs)),
   tar_target(reference_flowlines, read_sf(data_paths$ref_fl)),
   tar_target(gfv11_gages_rds, data_paths$GFv11_gages_lyr, format = "file"),
   tar_target(gfv11_gages_dbf, file.path(data_paths$data_dir, "GFV11.dbf"), format = "file"),
diff --git a/workspace/_targets/02_POI_creation_targets.R b/workspace/_targets/02_POI_creation_targets.R
index cc54ffc..a4bd511 100644
--- a/workspace/_targets/02_POI_creation_targets.R
+++ b/workspace/_targets/02_POI_creation_targets.R
@@ -44,4 +44,10 @@ list(tar_target(data_paths_file, "cache/data_paths.json", format = "file"),
      tar_target(te_pois, create_thermo_electric_pois(te_points, te_attributes, 
                                                      gage_pois$tmp_POIs, huc12_poi$HUC12_COMIDs,
                                                      flowline, "TE", vpu_codes),
-                pattern = map(gage_pois, huc12_poi, flowline, vpu_codes)))
\ No newline at end of file
+                pattern = map(gage_pois, huc12_poi, flowline, vpu_codes)),
+     
+     tar_target(combined_waterbodies_file, file.path(prep_path, "combined_waterbodies"), format = "file"),
+     tar_target(combined_waterbodies, readRDS(combined_waterbodies_file)),
+     tar_target(waterbodies, make_waterbodies_layer(combined_waterbodies, flowline, geo_crs), 
+                pattern = map(flowline), deployment = "worker")
+     )
\ No newline at end of file
diff --git a/workspace/cache/data_paths.json b/workspace/cache/data_paths.json
index 194dec6..7464a1a 100644
--- a/workspace/cache/data_paths.json
+++ b/workspace/cache/data_paths.json
@@ -16,7 +16,6 @@
   "ref_cat": "data/reference_fabric/reference_catchments.gpkg",
   "ref_fl": "data/reference_fabric/reference_flowline.gpkg",
   "nwm_fl": "data/reference_fabric/nwm_network.gpkg",
-  "waterbodies_path": "data/NHDPlusNationalData/nhdplus_waterbodies.rds",
   "fullcats_table": "data/NHDPlusNationalData/nhdcat_full.rds",
   "islandcats_table": "data/islands/nhdcat_full.rds",
   "fdr": {
diff --git a/workspace/workflow_runner.R b/workspace/workflow_runner.R
index 5d96208..2b01ddf 100644
--- a/workspace/workflow_runner.R
+++ b/workspace/workflow_runner.R
@@ -43,9 +43,10 @@ if(FALSE) { # this won't run if you just bang through this file
 tar_make_future(rpu_vpu_out_list, workers = 8)
 tar_make(rpu_vpu_out)
 
-# to run all, just do:
+# make sure to run all too!
 tar_make()
 
 Sys.setenv(TAR_PROJECT = "02_POI_creation")
 
-tar_make_future(gage_pois, workers = 8)
+tar_make_future(waterbodies, workers = 8)
+    
\ No newline at end of file
-- 
GitLab