From 5f2265032f7f643e612222e41263607817df69f4 Mon Sep 17 00:00:00 2001
From: Andy Bock <abock@usgs.gov>
Date: Tue, 12 Jan 2021 12:36:53 -0700
Subject: [PATCH] Function for Cat2HUC12 xwalk

---
 workspace/R/utils.R | 119 ++++++++++++++++++++++++++++++++++----------
 1 file changed, 92 insertions(+), 27 deletions(-)

diff --git a/workspace/R/utils.R b/workspace/R/utils.R
index c25b4c7..17e5bc8 100644
--- a/workspace/R/utils.R
+++ b/workspace/R/utils.R
@@ -1,3 +1,23 @@
+needs_layer <- function(db, layer) {
+  
+  if(file.exists(db)) {
+    layers <- st_layers(db)
+    if(layer %in% layers$name)
+      return(FALSE)
+  }
+  TRUE
+}
+
+layer_exists <- function(db, layer) {
+  if(file.exists(db)) {
+    layers <- st_layers(db)
+    if(layer %in% layers$name)
+      return(TRUE)
+  }
+  FALSE
+}
+
+
 # Subset NHD RDS by hydrologic region
 VPU_Subset <- function(nhdpath, VPU){
   ##########################################
@@ -80,11 +100,11 @@ VPU_Subset <- function(nhdpath, VPU){
     
     nhd <- filter(nhd, COMID %in% final$COMID)
   }
-  return (nhd)
+  return (st_as_sf(nhd))
 }
 
 
-Merge_hydReg <- function(feat, in_gpkg, out_gpkg){
+Merge_VPU <- function(feat, in_gpkg, out_gpkg){
   ##########################################
   # Merges geopackages together to create CONUs geopackage of features
   # 
@@ -107,37 +127,82 @@ Merge_hydReg <- function(feat, in_gpkg, out_gpkg){
   }
 }
 
-siteAtt <- function(hydReg){
+get_HUC12_Xwalk <- function(VPU, nhd_cats = FALSE, wbd = FALSE){
+  ##########################################
+  # Retrieve HUC12 crosswalk for a given unit (CONUS, HI, AK)
+  # 
+  # Arguments:
+  #   VPU : (character) vector processing unit
+  #   in_gpkg : (character, optional) geopackage to find HUC12 xwalk
+  #
+  # Returns:
+  #   writes output geopackage of CONUS for given features
+  ##########################################
+  if (VPU == "20"){
+    # This was previouly calculated for HI in the Hawaii version of NHD_navigate.Rmd
+    xWalk <- read_sf(file.path("cache", paste0("GF_", VPU,".gpkg")), "HUC12_nhd_20")
+    
+  } else {
+    # crosswalk to HU12, filter by VPU
+    nhd_to_HU12 <- read.csv(wbd, colClasses = c("character", "integer", "character")) %>% 
+      filter(grepl(paste0("^", VPU), .data$HUC_12))
+    
+    # Subset full nhdcats to just VPU
+    nhd_cats <- readRDS(staged_nhd$catchment) 
+    reg_cats <- nhd %>% 
+      inner_join(select(wbd, c(FEATUREID, HUC_12)), by = "FEATUREID") %>% 
+      st_transform(crs) %>% 
+      st_set_precision(10) %>% 
+      st_make_valid()
+    
+    HUC12_lyr <- readRDS(HUC12) %>% 
+      filter(grepl(paste0("^", VPU,".*"), .data$HUC_12))
+    
+    # Intersect the HUC12s and catchments
+    xWalk <- sf::st_intersection(HUC12_lyr, nhd_cats) %>%
+      # Convert area to squaker kilometers and drop geom
+      mutate(intArea = as.numeric(st_area(.)) * 0.000001) %>%
+      # Create sqkm attribute for HUC12s
+      mutate(HUC12_SQKM = ACRES * .00404686) #%>%
+      # group by IDs and get proporational area
+      #group_by(FEATUREID, HUC_12) %>%
+      #mutate(cat_in_HUC = sum(intArea)/AreaSqKM,
+      #          HUC_in_cat = sum(intArea)/HUC12_SQKM)
+  }
+  return(xWalk)
+}
+
+siteAtt <- function(VPU){
   ##########################################
   # Creates value-added attributes for structures and features on the network
   # 
   # Arguments:
-  #   hydReg : (character) 2-digit hydrologic region
+  #   VPU : (character) 2-digit hydrologic region
   #
   # Returns:
   #   writes or appends tables for features
   ##########################################
   print ("gage")
-  out_gpkg <- file.path("cache",paste0("GF_", hydReg, ".gpkg"))
+  out_gpkg <- file.path("cache",paste0("GF_", VPU, ".gpkg"))
   
   data_paths <- jsonlite::read_json(file.path("cache", "data_paths.json"))
   
   # Layers
-  allPOIs <- read_sf(out_gpkg, paste0("POIS_", hydReg)) 
-  gagesIII_pois <- read_sf(out_gpkg, paste0("Gages_", hydReg)) # GAGESIII POIs
+  allPOIs <- read_sf(out_gpkg, paste0("POIS_", VPU)) 
+  gagesIII_pois <- read_sf(out_gpkg, paste0("Gages_", VPU)) # GAGESIII POIs
   gageLoc <- read_sf(data_paths$nhdplus_dir, "GageLoc")
-  TE_pois <- read_sf(out_gpkg, paste0("TE_", hydReg)) # Thermoelectric POIs
-  NID_pois <- read_sf(out_gpkg, paste0("NID_", hydReg)) # NID POIs
-  nsegment_raw <- read_sf(out_gpkg, paste0("nsegment_raw_", hydReg)) # Minimally-sufficient network attributed with POI_ID
-  nsegment <- read_sf(out_gpkg, paste0("Nsegment_", hydReg)) # Minimally-sufficient network dissolved by POI_ID
-  WBs_VPU <- read_sf(out_gpkg, paste0("WB_", hydReg)) %>% st_drop_geometry() %>% mutate(COMID = as.integer(COMID))
-  WBout_POIs <- read_sf(out_gpkg, paste0("WBout_", hydReg)) %>% st_drop_geometry()
-  WBin_POIs <- read_sf(out_gpkg, paste0("WBin_", hydReg)) %>% st_drop_geometry()
+  TE_pois <- read_sf(out_gpkg, paste0("TE_", VPU)) # Thermoelectric POIs
+  NID_pois <- read_sf(out_gpkg, paste0("NID_", VPU)) # NID POIs
+  nsegment_raw <- read_sf(out_gpkg, paste0("nsegment_raw_", VPU)) # Minimally-sufficient network attributed with POI_ID
+  nsegment <- read_sf(out_gpkg, paste0("Nsegment_", VPU)) # Minimally-sufficient network dissolved by POI_ID
+  WBs_VPU <- read_sf(out_gpkg, paste0("WB_", VPU)) %>% st_drop_geometry() %>% mutate(COMID = as.integer(COMID))
+  WBout_POIs <- read_sf(out_gpkg, paste0("WBout_", VPU)) %>% st_drop_geometry()
+  WBin_POIs <- read_sf(out_gpkg, paste0("WBin_", VPU)) %>% st_drop_geometry()
   
   #*************************************************************************
   # Attribute gagesIII POIs with adjusted measure and drainage area information
   gagesIII <- read_sf(data_paths$gagesiii_points_path, "GagesIII_gages") %>%
-    filter(grepl(paste0("^", hydReg, ".*"), .data$NHDPlusReg))
+    filter(grepl(paste0("^", VPU, ".*"), .data$NHDPlusReg))
 
   # Load GAGESIII POIs from gpkg at cache dir
   gagesIII_POIs <- select(st_drop_geometry(gagesIII_pois), Type_GagesIII, COMID) %>%
@@ -164,13 +229,13 @@ siteAtt <- function(hydReg){
     inner_join(nsegment_gages, by = "Type_GagesIII") %>%
     select(Type_GagesIII, COMID, Length, Percent, TotDASqKM.y) %>%
     rename(Seg_Length = Length, Seg_Measure = Percent, Adj_NHD_DA = TotDASqKM.y) %>%
-    mutate(VPU = hydReg)
+    mutate(VPU = VPU)
 
   # Compile data into GagesIII folder
   if (file.exists("data/GAGESIII_gages/gagesIII_MDA.rds")){
     # open file and bind to it
     curRDS_gages <- readRDS("data/GAGESIII_gages/gagesIII_MDA.rds") %>% 
-      filter(VPU != hydReg) %>%
+      filter(VPU != VPU) %>%
       bind_rows(gagesIII_stats)
     saveRDS(curRDS_gages, "data/GAGESIII_gages/gagesIII_MDA.rds")
   } else {
@@ -236,12 +301,12 @@ siteAtt <- function(hydReg){
                 select(POI_ID, To_POI_ID), by = c("COMID" = "POI_ID")) %>%
     group_by(COMID) %>% mutate(Upstream_POIs = paste0(unique(POI_ID), collapse = " ")) %>%
     rename(Downstream_POI = To_POI_ID) %>% 
-    select(-POI_ID) %>% distinct() %>% mutate(VPU = hydReg)
+    select(-POI_ID) %>% distinct() %>% mutate(VPU = VPU)
 
   if (file.exists("data/TE_points/TE_adj.rds")){
     # open file and bind to it
     curRDS_TE <- readRDS("data/TE_points/TE_adj.rds") %>% 
-      filter(VPU != hydReg) %>%
+      filter(VPU != VPU) %>%
       bind_rows(TE_data_final)
     saveRDS(curRDS_TE, "data/TE_points/TE_adj.rds")
   } else {
@@ -314,12 +379,12 @@ siteAtt <- function(hydReg){
     rename(Downstream_POI = To_POI_ID) %>% 
     select(-POI_ID) %>% 
     distinct() %>% 
-    mutate(VPU = hydReg)
+    mutate(VPU = VPU)
 
   if (file.exists("data/NID_points/NID_adj.rds")){
     # open file and bind to it
     curRDS_NID <- readRDS("data/NID_points/NID_adj.rds") %>% 
-      filter(VPU != hydReg) %>%
+      filter(VPU != VPU) %>%
       bind_rows(NID_data_final)
     saveRDS(curRDS_NID, "data/NID_points/NID_adj.rds")
   } else {
@@ -370,7 +435,7 @@ siteAtt <- function(hydReg){
   wbmore_tab <- WBs_VPU %>% select(-c(FDATE, RESOLUTION, Shape_Length, Shape_Area, GNIS_ID, REACHCODE, FTYPE, FCODE, 
                                       PurpCode, PurpDesc, MeanDCode, ONOFFNET, LakeArea)) %>%
     inner_join(wb_pol_tab, by = "COMID")
-  #write.csv(wbmore_tab, paste0("cache/SiteInfo/WBPOIs_r", hydReg, "_tab.csv"))
+  #write.csv(wbmore_tab, paste0("cache/SiteInfo/WBPOIs_r", VPU, "_tab.csv"))
   
   #****************************************************************************
   # Create table of spatial assocation of waterbodies with gages, NID, and TE
@@ -434,7 +499,7 @@ siteAtt <- function(hydReg){
     mutate(COMID=as.integer(COMID)) %>% 
     filter(COMID > 0) %>%
     inner_join(., select(st_drop_geometry(nhd), c(COMID, WBAREACOMI, VPUID)), by = "COMID") %>%
-    filter(grepl(paste0("^", hydReg, ".*"), .data$VPUID)) %>% switchDiv(., nhd)
+    filter(grepl(paste0("^", VPU, ".*"), .data$VPUID)) %>% switchDiv(., nhd)
   
   # gen list of WB NID, put on a table, there may be more than one per WB
   wb_TE <- TE_fac %>% 
@@ -442,7 +507,7 @@ siteAtt <- function(hydReg){
     select(EIA_PLANT_, COMID, WBAREACOMI) 
   
   wb_finTable <- wbmore_tab %>% 
-    left_join(wb_NID, by = c("COMID" = "WBAREACOMI")) %>% mutate(VPU = hydReg)
+    left_join(wb_NID, by = c("COMID" = "WBAREACOMI")) %>% mutate(VPU = VPU)
   
   # create WB table: WB_r"xx_info
   wb_POI_tab <- WBs_VPU %>% rename(WBAREACOMI=COMID) %>%
@@ -457,18 +522,18 @@ siteAtt <- function(hydReg){
                 select(WBAREACOMI, Upstream_gages)) %>%
     left_join(wb_NID) %>% 
     select(WBAREACOMI, Outlet, NIDID) %>%
-    mutate(VPU = hydReg)
+    mutate(VPU = VPU)
   
   if (file.exists("data/NHDPlusNationalData/wbod_stats.rds")){
     # open waterbody file and bind to it
     curRDS_wbod <- readRDS("data/NHDPlusNationalData/wbod_stats.rds") %>% 
-      filter(VPU != hydReg) %>%
+      filter(VPU != VPU) %>%
       bind_rows(wb_finTable)
     saveRDS(curRDS_wbod, "data/NHDPlusNationalData/wbod_stats.rds")
     
     # open POI file and bind to it
     curRDS_POIwbod <- readRDS("data/NHDPlusNationalData/wbod_POIs.rds") %>% 
-      filter(VPU != hydReg) %>%
+      filter(VPU != VPU) %>%
       bind_rows(wb_POI_tab)
     
   } else {
-- 
GitLab