From fc450a7b4fbe6d4d3f7ac0e7a79add86da1d905c Mon Sep 17 00:00:00 2001
From: Andy Bock <abock@usgs.gov>
Date: Wed, 28 Jul 2021 15:33:54 -0600
Subject: [PATCH] gage ranking, vaa DL and join

---
 workspace/02_NHD_navigate.Rmd |  42 +--
 workspace/R/NHD_navigate.R    | 381 ++++++++++-------------
 workspace/R/config.R          |  76 +++--
 workspace/R/utils.R           | 550 ++--------------------------------
 4 files changed, 252 insertions(+), 797 deletions(-)

diff --git a/workspace/02_NHD_navigate.Rmd b/workspace/02_NHD_navigate.Rmd
index 1f56a4f..30c82e6 100644
--- a/workspace/02_NHD_navigate.Rmd
+++ b/workspace/02_NHD_navigate.Rmd
@@ -29,9 +29,10 @@ staged_nhd <- stage_national_data(nhdplus_data = data_paths$nhdplus_gdb,
 nhd_rds <- fix_headwaters(staged_nhd$flowline, gsub("flowline.rds", "flowline_update.rds", staged_nhd$flowline),
                new_atts = data_paths$new_nhdp_atts, nhdpath = data_paths$nhdplus_gdb)
 
+
 # Need DnMinorHyd for switching POIs downstream of watebodies/dams
 nhdplusTools_data_dir(dir = data_paths$nhdplus_dir)
-VAA <- get_vaa(atts = c("dnminorhyd"), path = get_vaa_path(), download = F)
+VAA <- get_vaa(atts = c("dnminorhyd"), path = get_vaa_path(), download = TRUE)
 ```
 
 ```{r huc12 POIs}
@@ -42,7 +43,7 @@ if(needs_layer(nav_gpkg, nav_poi_layer)) {
     
     # Subset NHD by VPU
     nhd <- VPU_Subset(nhd_rds, VPU) %>%
-      left_join(VAA, by = "COMID")
+      left_join(VAA, by = c("COMID" = "comid"))
     
     if("arbolate_sum" %in% names(nhd)) nhd <- rename(nhd, ArbolateSu = arbolate_sum)
     
@@ -55,7 +56,7 @@ if(needs_layer(nav_gpkg, nav_poi_layer)) {
     nhd <- nhd %>% 
       mutate(dend = ifelse(!COMID %in% non_dend, 1, 0),
              poi = ifelse(!COMID %in% non_dend & TotDASqKM >= min_da_km, 1, 0)) 
-      
+    
     write_sf(nhd, nav_gpkg, nhd_flowline)
   } else {
      nhd <- read_sf(nav_gpkg, nhd_flowline)
@@ -65,9 +66,7 @@ if(needs_layer(nav_gpkg, nav_poi_layer)) {
   # Join HUC12 outlets with NHD
   HUC12_COMIDs <- read_sf(data_paths$hu12_points_path, "hu_points") %>% 
     filter(grepl(paste0("^", substr(VPU, start = 1, stop = 2)), .data$HUC12)) %>%
-    #filter(grepl("^02|^04", .data$HUC12)) %>% 
     select(COMID, HUC12) %>%
-    #switchDiv(., nhd) %>%
     # Remove this when HUC12 outlets finished
     group_by(COMID) %>% 
     slice(1)
@@ -78,7 +77,6 @@ if(needs_layer(nav_gpkg, nav_poi_layer)) {
   # Write out geopackage layer representing POIs for given theme
   write_sf(huc12_POIs, nav_gpkg, nav_poi_layer)
   tmp_POIs <- huc12_POIs
-  
 } else {
   # Load HUC12 POIs as the tmpPOIs if they already exist
   tmp_POIs <- read_sf(nav_gpkg, nav_poi_layer) 
@@ -90,24 +88,25 @@ if(needs_layer(nav_gpkg, nav_poi_layer)) {
 if(all(is.na(tmp_POIs$Type_Gages))) { 
 
   # Previously identified streamgages within Gage_Selection.Rmd
-  streamgages <- read_sf("cache/Gages_info.gpkg", "Gages") %>%
-    #filter(source != "GFv11" ) %>%
-    select(COMID, Gage_no) %>%
+  streamgages_VPU <- read_sf("cache/Gages_info.gpkg", "Gages") %>%
     filter(COMID %in% nhd$COMID) %>%
     st_drop_geometry() %>%
-    # these two lines below will not be needed when Marilyn finishes Gage work
-    # Should we keep all streamgages larger than 10 km or stick with min_da_km threshold
-    switchDiv(., nhd) %>%
+    switchDiv(., nhd) 
+  
+  streamgages <- streamgages_VPU %>% 
     group_by(COMID) %>%
-    top_n(n = -1)
-
+    # If multiple gages per COMID, pick one with highest rank as rep POI_ID
+    filter(rank_id == max(rank_id), !is.na(drain_area)) %>%
+    ungroup() %>%
+    select(COMID, Gage_no)
+    
   # Derive GAGE POIs; use NHD as we've alredy filtered by NWIS DA in the Gage selection step
-  tmp_POIs <- POI_creation(streamgages, filter(nhd, dend == 1), "Gages") %>%
+  tmp_POIs <- POI_creation(streamgages, nhd, "Gages") %>%
     addType(., tmp_POIs, "Gages")
   
   # As a fail-safe, write out list of gages not assigned a POI
-  if(nrow(filter(streamgages, !Gage_no %in% tmp_POIs$Type_Gages)) > 0) {
-    write_sf(filter(read_sf("cache/Gages_info.gpkg", "Gages"), !Gage_no %in% tmp_POIs$Type_Gages),
+  if(nrow(filter(streamgages_VPU, !Gage_no %in% tmp_POIs$Type_Gages)) > 0) {
+    write_sf(filter(streamgages_VPU, !Gage_no %in% tmp_POIs$Type_Gages),
              nav_gpkg, "unassigned_gages")
   }
   
@@ -129,7 +128,6 @@ if(all(is.na(tmp_POIs$Type_TE))) {
     mutate(COMID = as.integer(COMID)) %>%
     inner_join(., select(st_drop_geometry(nhd), COMID, VPUID), by = "COMID") %>%
     filter(grepl(paste0("^", substr(VPU, 1, 2), ".*"), .data$VPUID), COMID > 0) %>%
-    #filter(grepl("^02.*|^04.*", data$VPUID), COMID > 0) %>% 
     switchDiv(., nhd) %>%
     group_by(COMID) %>%
     summarize(EIA_PLANT = paste0(unique(EIA_PLANT_), collapse = " "), count = n()) 
@@ -228,12 +226,14 @@ if(all(is.na(tmp_POIs$Type_WBOut))) {
   # Segments that are in waterbodies
   nhd <- mutate(nhd, WB = ifelse(WBAREACOMI > 0 & WBAREACOMI %in% WBs_VPU$COMID, 1, 0))
 
+  # Create waterbody outlet POIs
   wbout_COMIDs <- filter(nhd, dend == 1 & WB == 1) %>%
     group_by(WBAREACOMI) %>%
     slice(which.min(Hydroseq)) %>%
     switchDiv(., nhd) %>%
     select(COMID, WBAREACOMI) 
 
+  # Create waterbody inlet POIs
   wbin_COMIDs <- filter(nhd, WB == 0, 
                         DnHydroseq %in% filter(nhd, WB == 1)$Hydroseq,
                         TotDASqKM >= min_da_km) %>%
@@ -337,7 +337,7 @@ print (paste0(nrow(sub[sub$AreaSqKM == 0,]), " POIs on flowlines with no local d
 
 ```{r POI Collapse}
 #  Load data
-if(needs_layer(nav_gpkg, pois_merge)) {
+if(needs_layer(nav_gpkg, final_poi_layer)) {
 
   #1 Move POIs downstream by category
   out_gages <- POI_move_down(nav_gpkg, final_POIs, nsegments, filter(nhd_Final, !is.na(POI_ID)), "Type_Gages", .05)
@@ -348,11 +348,11 @@ if(needs_layer(nav_gpkg, pois_merge)) {
                 select(COMID, POI_ID), by = "COMID")
 
   # Write out geopackage layer representing POIs for given theme
-  write_sf(out_HUC12$allPOIs, nav_gpkg, pois_merge)
+  write_sf(out_HUC12$allPOIs, nav_gpkg, final_poi_layer)
   write_sf(nhd_Final, nav_gpkg, nhd_flowline)
   write_sf(out_HUC12$segs, nav_gpkg, nsegments_layer)
 } else {
-  final_POIs <- read_sf(nav_gpkg, pois_merge)
+  final_POIs2 <- read_sf(nav_gpkg, final_poi_layer)
   nhd_Final <- read_sf(nav_gpkg, nhd_flowline)
   nsegments <- read_sf(nav_gpkg, nsegments_layer)
 }
diff --git a/workspace/R/NHD_navigate.R b/workspace/R/NHD_navigate.R
index 1d0c751..832e597 100644
--- a/workspace/R/NHD_navigate.R
+++ b/workspace/R/NHD_navigate.R
@@ -1,18 +1,12 @@
-
+#' Network navigation for upstream/downstream from a COMID of interest
+#' @param inCOM  (list) list of input COMIDs 
+#' @param nhdDF (sf data.frame) (data frame) valid data frame of NHD flowlines
+#' @param withTrib (logical) flag for if the upstream navigation should include tributaries
+#              or stick to mainstem level path
+#
+#' @return (list) list of COMIDs upstream of point
 NetworkNav <- function(inCom, nhdDF, withTrib){
-  ##########################################
-  # Network navigation for upstream/downstream from a COMID of interest
-  # 
-  # Arguments:
-  #   inCOM : (list) list of input COMIDs that are 'dangles'
-  #   nhdDF : (data frame) valid data frame of NHD flowlines
-  #   withTrib : (logical) flag for if the upstream navigation should include tributaries
-  #              or stick to mainstem level path
-  #
-  # Returns:
-  #   mydata : (list) list of COMIDs to connect dangle to network
-  ##########################################
-  #print (inCom)
+  
   if (missing(withTrib)){
     seg <- nhdplusTools::get_UM(nhdDF, inCom, include = TRUE)
   } else {
@@ -22,19 +16,15 @@ NetworkNav <- function(inCom, nhdDF, withTrib){
 }
 
 
+#' Identifies and connects dangles in network generated by Network Nav function
+#' @param inCOM  (list) list of input COMIDs 
+#' @param nhdDF (sf data.frame) (data frame) valid data frame of NHD flowlines
+#' @param withTrib (logical) flag for if the upstream navigation should include tributaries
+#              or stick to mainstem level path
+#
+#' @return (list) list of COMIDs connecting dangle to existing network
 NetworkConnection <- function(incom, nhd){
-  ##########################################
-  # Connects dangles in the network that are not
-  #          terminal outlets
-  # 
-  # Arguments:
-  #   inCOM : (list) list of input COMIDs that are 'dangles'
-  #   nhdDF : (data frame) valid data frame of NHD flowlines
-  #
-  # Returns:
-  #   mydata : (list) list of COMIDs to connect dangle to network
-  ##########################################
-  # data frame of connections that need to be made
+
   upnet_DF <- filter(nhd, COMID %in% incom) %>%
     filter(!DnHydroseq %in% Hydroseq)
   
@@ -66,31 +56,26 @@ NetworkConnection <- function(incom, nhd){
 }
 
 
+#' Switches valid POIs from minor to major path divergences
+#' @param inSegs  (list) list of input COMIDs representing POIs
+#' @param nhdDF (sf data.frame) (data frame) valid data frame of NHD flowlines
+#' 
+#' @return (sf data.frame) Corresponding major path COMID for POI
 switchDiv <- function(insegs, nhd){
-  ##########################################
-  # Swith divergence path for POIs from minor (2) to major (1)
-  # reduces the number of short, spurious segments and POIS as locations such as waterbody outlets
-  # 
-  # Arguments:
-  #   inSegs : (list) list of input COMIDs that are 'dangles'
-  #   nhd : (data frame) valid data frame of NHD flowlines
-  #
-  # Returns:
-  #   mydata : (list) list of COMIDs for major diversions
-  ##########################################
+
   div_segs <- filter(nhd, COMID %in% insegs$COMID)
   if (2 %in% div_segs$Divergence){
     print ("Switching divergence to other fork")
     
     # Look Upstream
-    upstream <- st_drop_geometry(nhd) %>% 
+    upstream <- st_drop_geometry(nhdDF) %>% 
       inner_join(st_drop_geometry(div_segs) %>% 
                    filter(Divergence == 2) %>% 
                    select(COMID, Hydroseq), 
                  by = c("DnMinorHyd" = "Hydroseq"))
     
     # From Upstream Segment switch POI to the downstream major/main path
-    insegs_maj <- st_drop_geometry(nhd) %>% 
+    insegs_maj <- st_drop_geometry(nhdDF) %>% 
       inner_join(upstream %>% select(COMID.y, DnHydroseq), 
                  by = c("Hydroseq" = "DnHydroseq")) %>% 
       select(COMID, COMID.y)
@@ -105,25 +90,21 @@ switchDiv <- function(insegs, nhd){
   return(insegs)
 }
 
-POI_creation<-function(srcData, nhd, IDfield){
-  ##########################################
-  # Creates POIs for a given data theme
-  # 
-  # Arguments:
-  #   srcData : (data frame) DF with two columns: 
-  #             1 - COMID
-  #             2 - Unique ID value for POI (Streamgage ID, etc.)
-  #   nhd : (data frame) valid data frame of NHD flowlines
-  #   IDfield : (character) Name of 'Type' field to be modified in POI 
-  #
-  # Returns:
-  #   mydata : (simple features) POIs for the specific data theme
-  ##########################################
-  
+
+#' Creates POIs for a given data theme
+#' @param srcData  (data.frame) (data frame) DF with two columns: 
+#             1 - COMID
+#             2 - Unique ID value for POI (Streamgage ID, etc.)
+#' @param nhdDF (sf data.frame) valid data frame of NHD flowlines
+#' @param IDfield character) Name of 'Type' field to be modified in POI 
+#' 
+#' @return (sf data.frame) OIs for the specific data theme
+POI_creation<-function(srcData, nhdDF, IDfield){
+
   # Give generic ID to Identity field
   colnames(srcData) <- c("COMID", "ID")
   
-  sub_segs <- filter(nhd, COMID %in% srcData$COMID) 
+  sub_segs <- filter(nhdDF, COMID %in% srcData$COMID) 
 
   POIs <- sub_segs %>% 
     get_node(., position = "end") %>% 
@@ -142,20 +123,17 @@ POI_creation<-function(srcData, nhd, IDfield){
   return(POIs)
 }
 
+#' Adds the type attribute for co-located POIs of multiple themes
+#' @param new_POIs (sf data.frame) new POIs to be tested against existing
+#' @param POIs  (sf data.frame) Existing POIs
+#' @param IDfield (character) Name of 'Type' field to be modified in existing POI 
+#' @param bind (logical) whether to bind non co-located POIs to data frame or just
+#'             attribute existing POIs 
+#' 
+#' @return (sf data.frame) xisting POIs with Type field modified for
+#            duplicate POIs for given data theme
 addType<-function(new_POIs, POIs, IDfield, bind){
-  ##########################################
-  # Checks if existing POIs are co-located with new POI set
-  #        Adds the type attribute for co-located POIs of multiple themes
-  # 
-  # Arguments:
-  #   POIs : (data frame) Existing POIs
-  #   newPOIs: (data frame) newly-derived POIs of a given data theme
-  #   IDfield : (character) Name of 'Type' field to be modified in POI 
-  #
-  # Returns:
-  #   mydata : (data frame) Existing POIs with Type field modified for
-  #            duplicate POIs for given data theme
-  ##########################################
+
   new_POIs <- st_compatibalize(new_POIs, POIs) 
   
   if(paste0("Type_", IDfield) %in% colnames(POIs)){
@@ -191,21 +169,15 @@ addType<-function(new_POIs, POIs, IDfield, bind){
   return(POIs_fin)
 }
 
-segment_increment <- function(nhd, POIs){
-  ##########################################
-  # Creates raw and dissolved segment layers with necessaary
-  #         upstream/downstream routing attribution
-  # 
-  # Arguments:
-  #   nhd : (data frame) minimally-sufficient flowlines that connect POIs
-  #          and meet requirements of NHDPlusTools
-  #   POIs : (data frame) Raw POI data frame
-  #   Field : (character) ID field of unique Segment identifier
-  #  
-  # Returns:
-  #   segList : (data frame) raw segments
-  #             
-  ##########################################
+#' Creates raw and dissolved segment layers with necessaary
+#         upstream/downstream routing attribution
+#'  @param nhdDF (sf data.frame) valid data frame of NHD flowlines
+#'  @param POIs  (sf data.frame) Existing POIs
+#' 
+#' @return (sf data.frame) data.frame of segments connecting POIs attributed
+#'         with POI_ID for each upstream flowpath
+segment_increment <- function(nhdDF, POIs){
+
   ptm<-proc.time()
   
   seg_POIs <- arrange(POIs, desc(LevelPathI), desc(Hydroseq)) %>%
@@ -218,7 +190,7 @@ segment_increment <- function(nhd, POIs){
     ungroup()
   
   # Add an empty field for POI_Id population
-  nhd <- mutate(nhd, POI_ID = 0)
+  nhdDF <- mutate(nhdDF, POI_ID = 0)
   
   POI_ID_assign <- function(i, seg_POIs, nhd){
     ##########################################
@@ -256,8 +228,8 @@ segment_increment <- function(nhd, POIs){
   library(parallel)
   library(dplyr)
   clust <- makeCluster(4)
-  POI_list <- parLapply(clust, c(1:nrow(seg_POIs)), POI_ID_assign, seg_POIs, st_drop_geometry(nhd))
-  #POI_list <- lapply(c(1:nrow(seg_POIs)), POI_ID_assign, seg_POIs, st_drop_geometry(nhd))
+  POI_list <- parLapply(clust, c(1:nrow(seg_POIs)), POI_ID_assign, seg_POIs, st_drop_geometry(nhdDF))
+  #POI_list <- lapply(c(1:nrow(seg_POIs)), POI_ID_assign, seg_POIs, st_drop_geometry(nhdDF))
   stopCluster(clust)
   
   inc_segs <- data.table::rbindlist(POI_list)
@@ -266,27 +238,18 @@ segment_increment <- function(nhd, POIs){
   return(inc_segs)
 }
 
-
-segment_creation <- function(nhd, routing_fix){
-  ##########################################
-  # Creates raw and dissolved segment layers with necessaary
-  #         upstream/downstream routing attribution
-  # 
-  # Arguments:
-  #   inSegs : (data frame) segments with incremental IDs (POI_ID populated)
-  #   nhd : (data frame) full nhd data frame
-  #   routing_fix : (logical) routing fixes if available
-  #  
-  # Returns:
-  #   segList : (data frame) raw segments
-  #             
-  ##########################################
-  
-  if(!"StartFlag" %in% names(nhd)) {
-    nhd$StartFlag <- ifelse(nhd$COMID %in% nhd$tocomid, 0, 1)
+#' Creates finalized segments and routing
+#'  @param nhdDF (sf data.frame) valid data frame of NHD flowlines
+#'  @param routing_fix  (sf data.frame) any additional routing fixes
+#' 
+#' @return (sf data.frame) data.frame of segments
+segment_creation <- function(nhdDF, routing_fix){
+  
+  if(!"StartFlag" %in% names(nhdDF)) {
+    nhdDF$StartFlag <- ifelse(nhdDF$COMID %in% nhdDF$tocomid, 0, 1)
   }
   
-  in_segs <- filter(nhd, !is.na(POI_ID))
+  in_segs <- filter(nhdDF, !is.na(POI_ID))
   
   # If there are routing fixes to account for if a POI with a DA of 0 is moved upsream or downstream
   if (missing(routing_fix) == FALSE){
@@ -294,7 +257,7 @@ segment_creation <- function(nhd, routing_fix){
       rename(COMID = oldPOI, new_COMID = COMID)
     
     # Above we generated the network using the initial set of POIs; here we crosswalk over the old COMIDs to the new
-    nhd_fix <- nhd %>%
+    nhd_fix <- nhdDF %>%
       left_join(routing_fix %>%
                    select(COMID, new_COMID), by = c("POI_ID" = "COMID")) %>%
       mutate(POI_ID = ifelse(is.na(new_COMID), POI_ID, new_COMID)) %>%
@@ -316,7 +279,7 @@ segment_creation <- function(nhd, routing_fix){
   
   # produce a short data frame for populating TO_POI for downstream segment
   to_from <- filter(st_drop_geometry(in_segs)) %>%
-    left_join(filter(st_drop_geometry(nhd), !is.na(POI_ID)) %>% 
+    left_join(filter(st_drop_geometry(nhdDF), !is.na(POI_ID)) %>% 
                 select(COMID, Hydroseq, POI_ID), by = c("DnHydroseq" = "Hydroseq")) %>%
     select(COMID.x, Hydroseq, DnHydroseq, POI_ID.y) %>%
     rename(To_POI_ID = POI_ID.y) 
@@ -329,18 +292,14 @@ segment_creation <- function(nhd, routing_fix){
   return(list(diss_segs = nsegments_fin, raw_segs = in_segs))
 }
 
+#' Moves POI Upstream or downstream if it falls on COMID
+#       of flowline with no corresponding catchment
+#'  @param POIs_wgeom (sf data.frame) POIs
+#'  @param nhdDF  (sf data.frame) valid data frame of NHD flowlines
+#' 
+#' @return (sf data.frame) data.frame of POIs with new COMID associated
 DS_poiFix <- function(POIs_wgeom, nhd){
-  ##########################################
-  # Moves POI Upstream or downstream if it falls on COMID
-  #       of flowline with no corresponding catchment
-  #
-  # Arguments:
-  #   POIs : (data frame) POI data set
-  #   nhd : (data frame) valid data frame of NHD flowlines
-  #
-  # Returns:
-  #   repPOIs_unique : (data frame) DF of POIs with new COMID associated
-  ##########################################
+
   POIs <- st_drop_geometry(POIs_wgeom) %>%
     arrange(COMID)
 
@@ -388,31 +347,26 @@ DS_poiFix <- function(POIs_wgeom, nhd){
 }
 
 
-movePOI_NA_DA <- function(poi_fix, nhd){
-  ##########################################
-  # Move POIs that fall on flowlines with no catchment upstream/downstream
-  #  to adjacent flowline with most similar total drainage area. Called from 
-  #  DS_poi_fix function above
-  # 
-  # Arguments:
-  #   poi_fix : (data frame) POI data set of COMIDs to be changed
-  #   nhd : (data frame) valid data frame of NHD flowlines
-  #
-  # Returns:
-  #   newPOI : (data frame) DF of POIs with new COMID associated
-  ##########################################
+#' Move POIs that fall on flowlines with no catchment upstream/downstream
+#     to adjacent flowline with most similar total drainage area. Called from 
+#     DS_poi_fix function above
+#'  @param poi_fix (data.frame) POI data set of COMIDs to be changed
+#'  @param nhdDF  (sf data.frame) valid data frame of NHD flowlines
+#' 
+#' @return (data frame) DF of POIs with new COMID associated
+movePOI_NA_DA <- function(poi_fix, nhdDF){
   
   # Closest POI/US/DS
-  up_segs <- nhdplusTools::get_UM(nhd, poi_fix, sort=T)
-  seg2fix <- filter(nhd, COMID == poi_fix)
+  up_segs <- nhdplusTools::get_UM(nhdDF, poi_fix, sort=T)
+  seg2fix <- filter(nhdDF, COMID == poi_fix)
   
   # Sorted results and filter out all flowlines w/o catchments
-  upstuff <- filter(nhd, COMID %in% unlist(up_segs)) %>% 
+  upstuff <- filter(nhdDF, COMID %in% unlist(up_segs)) %>% 
     arrange(factor(COMID, levels = up_segs)) %>%
     filter(AreaSqKM > 0)
   
-  down_segs <- nhdplusTools::get_DM(nhd, poi_fix, sort=T)
-  downstuff <- filter(nhd, COMID %in% unlist(down_segs)) %>% 
+  down_segs <- nhdplusTools::get_DM(nhdDF, poi_fix, sort=T)
+  downstuff <- filter(nhdDF, COMID %in% unlist(down_segs)) %>% 
     arrange(factor(COMID, levels = down_segs)) %>%
     filter(AreaSqKM > 0)
   
@@ -436,24 +390,18 @@ movePOI_NA_DA <- function(poi_fix, nhd){
   return(new_POI)
 }
 
-#out_gages <- POI_move_down(nav_gpkg, final_POIs, nsegments_fin, filter(nhd_Final, !is.na(POI_ID)), "Type_Gages", .05)
+#' Collaposes/merges POIs together based on drainage area ratio and data theme
+#'  @param out_gpkg (geopackage) output geopackage to write intermediate results to
+#'  @param POIs  (sf data.frame) Original  POIs
+#'  @param Seg  (sf data.frame) dissolved segments
+#'  @param Seg2  (sf data.frame) (data frame) raw segments/flowlines
+#'  @param exp  (string) Type of thematic POI being moved round
+#'  @param DA_diff  (float) threshold of drainage area difference to
+#             consider when merging two POIs
+#'  
+#' @return (list of data.frames) 1 - New set of POIs
+#          2/3 - correpsonding segments (both raw and dissolved)
 POI_move_down<-function(out_gpkg, POIs, Seg, Seg2, exp, DA_diff){
-  ##########################################
-  # Moves down/up POIs based on data theme
-  # 
-  # Arguments:
-  #   out_gpkg : (geopackage) Output geopackage
-  #   POIs: (data frame) Original  POIs
-  #   Seg : (data frame) dissolved segments
-  #   Seg2 : (data frame) raw segments
-  #   exp : (string) Type of thematic POI being moved round
-  #   DA_diff : (float) threshold of drainage area difference to
-  #             consider when merging two POIs
-  #
-  # Returns:
-  #   list : 1 - New set of POIs
-  #          2/3 - correpsonding segments (both raw and dissolved)
-  ##########################################
   
   POIs <- POIs %>% mutate_if(is.numeric, function(x) ifelse(is.infinite(x), NA, x))
   
@@ -471,7 +419,7 @@ POI_move_down<-function(out_gpkg, POIs, Seg, Seg2, exp, DA_diff){
     # select and re-order
     select(POI_ID, HW, To_POI_ID, Type_HUC12, Type_Gages, Type_TE, Type_NID, Type_WBIn, Type_WBOut, Type_Conf, DAR, IncDA)
 
-  # downstream POIs and their drainage area ratios and incremental drainage areas
+  # upstream POIs and their drainage area ratios and incremental drainage areas
   up_POIs <- filter(POIs, is.na(Type_Conf))
   segs_up <- Seg %>% inner_join(select(filter(st_drop_geometry(.), POI_ID %in% up_POIs$COMID), 
                                        c(POI_ID, To_POI_ID, TotalDA, TotalLength)), 
@@ -487,24 +435,27 @@ POI_move_down<-function(out_gpkg, POIs, Seg, Seg2, exp, DA_diff){
     # select and re-order
     select(POI_ID, HW, From_POI_ID, Type_HUC12, Type_Gages, Type_TE, Type_NID, Type_WBIn, Type_WBOut, Type_Conf, DAR, IncDA)
   
+  # Filter by POI Type, this removes the POI theme 
+  Types <- c("Type_Gages", "Type_HUC12", "Type_TE", "Type_Conf", "Type_NID", "Type_WBIn", "Type_WBOut")
+  Types <- Types[Types != exp]
+  
+  # Gages coupled with confluences, waterbodies, do not move these
+  static_POIs <- POIs %>% 
+    filter(!is.na(Type_Gages) & (!is.na(Type_Conf) | !is.na(Type_WBOut) | !is.na(Type_WBIn)))
+  
   # compiled list of POIs to move up or down
-  seg_choices <- POIs %>%
+  seg_choices <- filter(POIs, !COMID %in% static_POIs) %>%
     left_join(st_drop_geometry(segs_down) %>%
                 select(POI_ID, To_POI_ID, DAR, IncDA), by = c("COMID" = "POI_ID")) %>%
     left_join(st_drop_geometry(segs_up) %>%
                 select(POI_ID, From_POI_ID, DAR, IncDA), by = c("COMID" = "POI_ID")) %>%
     filter(!is.na(To_POI_ID) | !is.na(From_POI_ID)) %>%
     filter(IncDA.x < min_da_km | IncDA.y < min_da_km)
-
-  # Filter by POI Type
-  Types <- c("Type_Gages", "Type_TE", "Type_Conf", "Type_NID", "Type_WBIn", "Type_WBOut")
-  Types <- Types[Types != exp]
-  # Gages coupled with confluences, waterbodies, do not move these
-  static_POIs <- POIs %>% 
-    filter(!is.na(Type_Gages) & (!is.na(Type_Conf) | !is.na(Type_WBOut) | !is.na(Type_WBIn)))
   
+  # We only want to move POIs that are not coupled within another theme
   seg_sub <- seg_choices %>% 
     filter_at(Types, all_vars(is.na(.))) %>%
+    # Don't want to move the static POIS either
     filter(!COMID %in% static_POIs$COMID) %>%
     select (COMID, To_POI_ID, DAR.x, IncDA.x, From_POI_ID, DAR.y, DAR.y, IncDA.y) %>%
     st_sf()
@@ -519,9 +470,8 @@ POI_move_down<-function(out_gpkg, POIs, Seg, Seg2, exp, DA_diff){
            Move = ifelse(DirDA == "Down", To_POI_ID, From_POI_ID))
   
   # POIs whose moves correspond to each others locations
-  Mult <- select(segsub_dir, COMID, Move) %>% 
-    inner_join(st_drop_geometry(.) %>% 
-                 select(COMID, Move), by = c("Move" = "COMID"), suffix = c(".A", ".B")) 
+  Mult <- select(segsub_dir, -c(To_POI_ID, From_POI_ID)) %>% #select(segsub_dir, COMID, Move) %>% 
+    inner_join(st_drop_geometry(.), by = c("Move" = "COMID"), suffix = c(".A", ".B")) 
   
   # Basicly just chose one of the pair in Mult.
   segsub_dir <- filter(segsub_dir, !COMID %in% Mult$Move)
@@ -530,6 +480,7 @@ POI_move_down<-function(out_gpkg, POIs, Seg, Seg2, exp, DA_diff){
   move_POI <- filter(POIs, COMID %in% segsub_dir$COMID)
   # POIs that are stationary and their infomration will be merged with upstream  POIs
   stationary_POI <- filter(POIs, !COMID %in% move_POI$COMID)
+  
   # Final Set to be merged with that don't involve either sets above
   #FinalPOI <- POIs %>% filter(!COMID %in% SegSub_Dir$COMID & !COMID %in% SegSub_Dir$Move) %>%
   #  mutate(merged_COMID = NA)
@@ -542,33 +493,43 @@ POI_move_down<-function(out_gpkg, POIs, Seg, Seg2, exp, DA_diff){
   merged_POIs <- stationary_POI %>%
     inner_join(movedownPOI_withatt,
                by = c("COMID" = "Move"), suffix = c(".x", ".y")) %>%
+    mutate_all(., list(~na_if(.,""))) %>%
+    # Don't overwrite existing gages or HUC12s
+    filter(is.na(!!as.symbol(paste0(exp, ".x")))) %>%
     # Bring over relevant attributes
     mutate(Type_HUC12 = ifelse(is.na(Type_HUC12.y), Type_HUC12.x, Type_HUC12.y)) %>%
-    mutate(Type_Gages_A = ifelse(is.na(Type_Gages.x) & !is.na(Type_Gages.y), Type_Gages.y, Type_Gages.x)) %>%
-    # Gages_B is incase there are two gages being merged together, not writing out for now
-    mutate(Type_Gages_B = ifelse(is.na(Type_Gages.y) & !is.na(Type_Gages.x), Type_Gages.y, NA)) %>%
+    mutate(Type_Gages = ifelse(is.na(Type_Gages.x), Type_Gages.y, Type_Gages.x)) %>%
     mutate(Type_WBIn = ifelse(is.na(Type_WBIn.y), Type_WBIn.x, Type_WBIn.y)) %>% 
     mutate(Type_WBOut = ifelse(is.na(Type_WBOut.y), Type_WBOut.x, Type_WBOut.y)) %>%
     mutate(Type_TE = ifelse(is.na(Type_TE.y), Type_TE.x, Type_TE.y)) %>%
     mutate(Type_NID = ifelse(is.na(Type_NID.y), Type_NID.x, Type_NID.y)) %>%
     mutate(Type_Conf = ifelse(is.na(Type_Conf.y), Type_Conf.x, Type_Conf.y)) %>%
     mutate(oldPOI = COMID.y, to_com = NA) %>% 
+    select(COMID, Type_HUC12, Type_Gages, Type_WBIn, Type_WBOut, 
+           Type_TE, Type_NID, Type_Conf, DirDA, oldPOI = COMID.y) %>%
+    group_by(COMID) %>%
+    summarize_all(~paste(unique(na.omit(.)), collapse = ',')) %>%
     st_sf()
+    
+  print(colnames(POIs))
+  print(colnames(merged_POIs))
   
   # Bring new POI data over to new data
-  fin_POIs <- filter(POIs, !COMID %in% merged_POIs$oldPOI) %>% 
-    left_join(st_drop_geometry(merged_POIs) %>% select(COMID, Type_Gages_A), by = "COMID") %>%
-    mutate(Type_Gages = ifelse(!is.na(Type_Gages_A), Type_Gages_A, Type_Gages)) %>% select(-Type_Gages_A)
+  fin_POIs <- filter(POIs, !COMID %in% c(merged_POIs$oldPOI, merged_POIs$COMID)) %>% 
+    rbind(merged_POIs %>% select(-c(DirDA, oldPOI)))
+    #inner_join(st_drop_geometry(merged_POIs) %>% select(COMID, Type_Gages_A), by = "COMID") %>%
+    #mutate(Type_Gages = ifelse(!is.na(Type_Gages_A), Type_Gages_A, Type_Gages)) %>% select(-Type_Gages_A)
   
   changed_POIs <- POIs %>%
-    inner_join(select(st_drop_geometry(merged_POIs), COMID, oldPOI, to_com)) %>%
-    select(COMID, oldPOI, Type_HUC12, Type_Gages, Type_TE, Type_NID, Type_WBIn, Type_WBOut, Type_Conf, to_com) %>%
+    inner_join(select(st_drop_geometry(merged_POIs), COMID, oldPOI)) %>%
+    select(COMID, oldPOI, Type_HUC12, Type_Gages, Type_TE, Type_NID, Type_WBIn, Type_WBOut, Type_Conf) %>%
+    mutate(to_com = COMID) %>%
     st_compatibalize(., read_sf(out_gpkg, poi_moved))
   st_write(changed_POIs, out_gpkg, poi_moved, append = TRUE) # write_sf not appending?
   
   # Format POI moves to table
   xWalk <- select(st_drop_geometry(segsub_dir), Move, COMID) %>%
-    filter(!is.na(Move)) %>%
+    filter(!is.na(Move), Move %in% merged_POIs$COMID) %>%
     rename(COMID = Move, oldPOI = COMID)
   st_write(xWalk, out_gpkg, "collapse_xWalk", append = TRUE) # write_sf not appending?
 
@@ -580,68 +541,24 @@ POI_move_down<-function(out_gpkg, POIs, Seg, Seg2, exp, DA_diff){
   return (list(allPOIs = fin_POIs, segs = newSegs$diss_segs, FL = newSegs$raw_segs))
 }
 
-writePOIs <- function(POIs, out_gpkg, Type){
-  ##########################################
-  # Write out final POI datasets with information
-  # 
-  # Arguments:
-  #   POIs : (data frame) POI data set 
-  #   out_gkpg : (geopackage) Geopackage where final POI layers written
-  #   Type : (character) Type of POI being written; default is write features for all types
-  #
-  # Returns:
-  #   finPOIs : (data frame) DF of final POIs 
-  ##########################################
-  
-  print ("Writing out final POIs")
-  # If type is missing write out all flowlines
-  if (missing(Type)){
-    print ("Writing out all POI Types")
-    lyrs <- st_layers(out_gpkg)
-    # get subcategory of POIs
-    POI_names <- lyrs$name[!is.na(lyrs$geomtype) & lyrs$geomtype== "Point"]
-    print (POI_names)
-    for (poi in POI_names){
-      print (poi)
-      poi_type <- unlist(strsplit(poi, "_"))[1]
-      sub_POIs <- POIs %>%
-        filter(!is.na(!!as.name(paste0("Type_", poi_type))))
-      
-      write_sf(sub_POIs, out_gpkg, poi)
-    }
-  } else {
-    sub_POIs <- POIs %>%
-      filter(!is.na(!!as.name(paste0("Type_", Type))))
-    write_sf(sub_POIs, out_gpkg, poi)
-  }
-}
-
-
-structPOIsNet <- function(nhd, final_POIs){
-  ##########################################
-  # Produce the structural POIs
-  # 
-  # Arguments:
-  #   ncombined : (data frame) final Segments 
-  #   nhd : (data frame) valid data frame of NHD flowlines
-  #   finalPOIs :  (data frame) final POIs
-  #   out_gkpg : (geopackage) Geopackage where final POI layers written
-  #
-  # Returns:
-  #   writes Structural POIs and segments to geopackage 
-  ##########################################
+#' Identifies and attributes structural POIs
+#'  @param nhdDF (sf data.frame) valid data frame of NHD flowlines
+#'  @param final_POIs  (sf data.frame) final POIs
+#' 
+#' @return (data.frame columns) 1/0 Columns indicating structural POIs and the structural network
+structPOIsNet <- function(nhdDF, final_POIs){
   
   # subset to flowlines used for nsegments
-  inc_segs <- filter(nhd, !is.na(POI_ID))
+  inc_segs <- filter(nhdDF, !is.na(POI_ID))
   
   # Terminal outlets from the initial network
-  termout <- filter(nhd, !Hydroseq %in% DnHydroseq & TerminalFl == 1 & !COMID %in% final_POIs$COMID)
+  termout <- filter(nhdDF, !Hydroseq %in% DnHydroseq & TerminalFl == 1 & !COMID %in% final_POIs$COMID)
   
   # POIs that are also terminal outlets
-  out_POIs <- filter(nhd, COMID %in% final_POIs$COMID & TerminalFl == 1)
+  out_POIs <- filter(nhdDF, COMID %in% final_POIs$COMID & TerminalFl == 1)
   
   # Confluence POIs
-  conf_POIs <- filter(inc_segs, COMID %in% final_POIs$COMID[final_POIs$Type_Conf == 1])
+  conf_POIs <- filter(inc_segs, COMID %in% final_POIs$COMID[!is.na(final_POIs$nhd_FfType_Conf)])
   
   # Waterbody outlet POIs
   wb_POIs <- filter(inc_segs, COMID %in% final_POIs$COMID[!is.na(final_POIs$Type_WBOut) | !is.na(final_POIs$Type_WBIn)])
@@ -650,6 +567,10 @@ structPOIsNet <- function(nhd, final_POIs){
   struc_flines <- termout %>% 
     bind_rows(out_POIs, conf_POIs, wb_POIs) %>% 
     arrange(COMID)
+  
+  struc_flines <- struc_flines[!st_is_empty(struc_flines), , drop = F] %>%
+    st_cast("LINESTRING")
+  
   struc_POIs <- get_node(struc_flines, position = "end") %>% 
     mutate(COMID = struc_flines$COMID, LevelPathI = struc_flines$LevelPathI) 
   
@@ -665,7 +586,7 @@ structPOIsNet <- function(nhd, final_POIs){
   }
   
   # final stuctural POIs, order by COMID to match with strucPOIs
-  final_struct <- filter(nhd, Hydroseq %in% lp_POIs) %>% 
+  final_struct <- filter(nhdDF, Hydroseq %in% lp_POIs) %>% 
     arrange(COMID)
   struct_POIs <- get_node(final_struct, position = "end") %>% 
     mutate(COMID = final_struct$COMID) 
@@ -674,7 +595,7 @@ structPOIsNet <- function(nhd, final_POIs){
   final_net <- unique(unlist(lapply(unique(final_struct$COMID), NetworkNav, st_drop_geometry(nhd))))
   
   # Subset NHDPlusV2 flowlines to navigation results and write to shapefile
-  structnet <- filter(nhd, COMID %in% final_net & grepl(paste0("^", VPU, ".*"), .data$VPUID)) 
+  structnet <- filter(nhdDF, COMID %in% final_net & grepl(paste0("^", VPU, ".*"), .data$VPUID)) 
   
   # Write out minimally-sufficient network and POIs as a list
   return(list(struc_POIs = struc_POIs, structnet = structnet))
diff --git a/workspace/R/config.R b/workspace/R/config.R
index ff39b48..011a667 100644
--- a/workspace/R/config.R
+++ b/workspace/R/config.R
@@ -69,7 +69,6 @@ nsegments_layer <- paste0("nsegment_", VPU) # Minimally-sufficient network disso
 pois_all <- paste0("POIs_", VPU) # All POIs binded together
 poi_xWalk <- paste0("poi_xWalk_", VPU) # POIs that changed COMIDS during the navigate part of the workflow
 final_poi_layer <- paste0("final_POIS_", VPU)
-pois_merge <- paste0("merPOIs_", VPU) # All POIs binded together
 
 # Defined during Refactor
 rpu <- paste0("rpu_", rpu_code)
@@ -87,11 +86,9 @@ refactor_table <- "refactor_lookup"
 agg_fline_layer <- "agg_fline"
 agg_cats_layer <- "agg_cats"
 outlets_layer <- "outlets"
+mapped_outlets_layer <- "mapped_outlets"
 lookup_VPU <- paste0(VPU, "_lookup")
 
-# Defined during Nondend
-ND_gpkg <- file.path("cache", paste0("ND_", VPU,".gpkg"))
-
 # Making an "events" script to address "events" information
 #     i.e. gages, NID, TE Plants, waterbodies
 gage_gpkg <- "cache/Gage_info.gpkg"
@@ -100,43 +97,29 @@ VPU_gage_Table <- file.path("cache", paste0("R", VPU, "_Gages.csv"))
 CONUS_gage_Table <- "data/gages_MDA.rds"
 
 # Defined during NonDend.Rmd
-fin_gpkg <- file.path("cache", paste0(VPU, "_final.gpkg"))
+ND_gpkg <- file.path("cache", paste0("ND_", VPU,".gpkg"))
 lookup_miss <- paste0("lookup_missing_", VPU)
+divides_xwalk <- paste0("divides_nhd_", VPU)
 missing_cats <- paste0("miss_cats_", VPU)
-protoHRUs <- paste0("poi_cats_", VPU)
+missing_terms <- paste0("miss_terms_", VPU)
+HRU_layer <- paste0("poi_cats_", VPU)
 cat_boundary <- paste0("boundaries_", VPU)
 
-gf_gpkg <- nav_gpkg
-poi_layer <- paste0("POIs_", VPU)
-
-
 # parallel factor for catchment reconciliation.
-para_reconcile <- 2
-para_split_flines <- 2
+para_reconcile <- 4
+para_split_flines <- 4
 
 data_paths <- jsonlite::read_json(file.path("cache", "data_paths.json"))
-# nhdplus_path(data_paths$nhdplus_gdb)
-# staged_nhd <- stage_national_data(nhdplus_data = data_paths$nhdplus_gdb, 
+#nhdplus_path(data_paths$nhdplus_gdb)
+#staged_nhd <- stage_national_data(nhdplus_data = data_paths$nhdplus_gdb, 
 #                                   output_path = data_paths$nhdplus_dir)
-# Output Layers
-nhd_outlet <- "nhd_outlet"
-nhd_catchment <- "nhd_catchment"
-full_catchment <- "full_catchment"
-refactored_layer <- "collapsed"
-reconciled_layer <- "reconciled"
-gf_outlets_layer <- "gf_outlets"
-divide_layer <- "divides"
-refactor_table <- "refactor_lookup"
-agg_fline_layer <- "agg_fline"
-agg_cats_layer <- "agg_cats"
-outlets_layer <- "outlets"
-lookup_VPU <- paste0(VPU, "_lookup")
+
 
 # Manually verified
 crs_out <- st_crs('PROJCS["NAD83 / Conus Albers",GEOGCS["NAD83",DATUM["North American Datum 1983",SPHEROID["GRS 1980",6378137.0,298.257222101,AUTHORITY["EPSG","7019"]],TOWGS84[1.0,1.0,-1.0,0.0,0.0,0.0,0.0],AUTHORITY["EPSG","6269"]],PRIMEM["Greenwich",0.0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.017453292519943295],AXIS["Geodetic longitude",EAST],AXIS["Geodetic latitude",NORTH],AUTHORITY["EPSG","4269"]],PROJECTION["Albers Equal Area",AUTHORITY["EPSG","9822"]],PARAMETER["central_meridian",-96.0],PARAMETER["latitude_of_origin",23.0],PARAMETER["standard_parallel_1",29.5],PARAMETER["false_easting",0.0],PARAMETER["false_northing",0.0],PARAMETER["standard_parallel_2",45.5],UNIT["m",1.0],AXIS["Easting",EAST],AXIS["Northing",NORTH],AUTHORITY["EPSG","5070"]]')
 
 # Gages checked after 01_Gage_Selection.Rmd
-gages_checked <- c("02492360", "05537000", "05590000", "06244500", "06285400", 
+gages_add <- c("02492360", "05537000", "05590000", "06244500", "06285400", 
 "06632400", "06659500", "06752000", "08302500", "09089500", "09235600", 
 "09279150", "09307500", "09343300", "09345200", "09537200", "10145400", 
 "10173450", "10194200", "10270830", "10293500", "11055501", "11055801", 
@@ -156,4 +139,39 @@ gages_checked <- c("02492360", "05537000", "05590000", "06244500", "06285400",
 "11389780", "11396395", "11474780", "12051900", "12052200", "12099100", 
 "12209490", "12320700", "12447387", "12514095", "13012490", "13084400", 
 "13203510", "13292280", "13293350", "13294600", "13294640", "13303500", 
-"13316530", "13329770", "13342250", "50126150")
\ No newline at end of file
+"13316530", "13329770", "13342250", "50126150")
+
+comid_changes <- data.frame(gages = c("06076560", "09077200", "09520700", "09527900", "11023450"),
+                                    new_comid = c(12420373, 1327427, 20379283, 945030101, 20331218))
+
+
+gages_remove <- c("01484668", "255656081285000", "262007080321500", "02291715", 
+"02287400", "255754081314000", "02291717", "261017080565100", "255601081265300", 
+"02259627", "255738081300100", "02237733", "02274505", "02291670", "261004081082000", 
+"260823080524100", "02293263", "255843081333000", "02298928", "02294747", "02310286", 
+"02244690", "02321900", "02244651", "02246522", "02244601", "02240988", "02326280", 
+"02240982", "022409424", "02326993", "04087101", "04087113", "04085492", "04253294", 
+"04253296", "03352988", "03075650", "07010035", "07010034", "07010094", "07010030", 
+"054279449", "431655089393801", "05403042", "05412056", "073676607", "07380238", 
+"07369654", "05075720", "07032244", "07030295", "05075930", "05075694", "05056636", 
+"06720280", "06720255", "06720285", "05019000", "08075605", "08330940", "08329918", 
+"08329838", "08331105", "08061551", "08329939", "08480594", "08331130", "08330200", 
+"08055700", "08226600", "08329930", "08331140", "09319001", "090955285", "09095526", 
+"09152650", "09095528", "09106104", "09077300", "09095529", "09152600", "09429070", 
+"09522680", "09424150", "09512165", "09534550", "09479502", "09522660", "09423553", 
+"09531850", "09483010", "09533000", "09531900", "09512407", "09419745", "10310402", 
+"10293048", "10311250", "10243630", "10311260", "10302145", "10172791", "10312277", 
+"10336039", "10243640", "10312270", "10172220", "10141400", "10172650", "411403112200801", 
+"14066500", "14349500", "10127109", "13091733", "13157325", "13154510", "13152940", 
+"13155074", "13090999", "13093500", "13082060", "1315377299", "13091000", "13134556", 
+"13134520", "13157293", "131610556", "13095000", "13090998", "13069532", "13170350", 
+"13203510", "13172840", "12509612", "12473502", "12473880", "12509614", "12473560", 
+"12435840", "12471724", "12466101", "12473900", "12472950", "12472300", "12472380", 
+"12200730", "12212430", "11262890", "11060901", "10260776", "10260780", "11262895", 
+"10259920", "11108135", "11062411", "11097260", "10264555", "11062200", "10260778", 
+"11063682", "11073200", "11070263", "11208818", "11062401", "11236080", "11109396", 
+"10264675", "11208800", "11208565", "380626121383803", "11465690", "380629121385203", 
+"11452800", "11181330", "11465700", "380629121385208", "380626121383808", "11181335", 
+"11439950", "11452900", "11439300", "11452901", "11404901", "02084557", "02267000", 
+"04216200", "05018500", "08073000", "08061550", "08017400", "09512100", "09512200", 
+"09406300", "10143500", "10172200", "10142000", "11162800")
\ No newline at end of file
diff --git a/workspace/R/utils.R b/workspace/R/utils.R
index 3ab0f72..b7e0e2c 100644
--- a/workspace/R/utils.R
+++ b/workspace/R/utils.R
@@ -169,26 +169,23 @@ VPU_Subset <- function(nhd_rds, VPU){
   return(st_as_sf(nhd))
 }
 
+#' Merges geopackages together to create CONUs geopackage of features
+#' @param feat (character) Type of feature to merge (POIs, segments) character
+#' @param in_gpkg  (character) geopackage to merge (navigate, collapse, non-dend, etc.)
+#' @param out_gpkg (character) Name of output geopackage
+#' @return output geopackage of CONUS for given features
+#'
 Merge_VPU <- function(feat, in_gpkg, out_gpkg){
-  ##########################################
-  # Merges geopackages together to create CONUs geopackage of features
-  # 
-  # Arguments:
-  #   feat : (character) Type of feature to merge (POIs, segments)
-  #   in_gpkg : (character) geopackage to merge (navigate, collapse, non-dend, etc.)
-  #   out_gpkg : (character) Name of output geopackage
-  #
-  # Returns:
-  #   writes output geopackage of CONUS for given features
-  ##########################################
-  # merges features together into a CONUS geopackage
+
   if(needs_layer(out_gpkg, feat)) {
     all_sf <- paste0(feat, "_CONUS")
-    VPUs <- c(paste0("0", c(1:9)), as.character(11:18), "10U", "10L")
+    VPUs <- c(paste0("0", c(1:9)), as.character(10:18))
+
     featCON <- do.call("rbind", lapply(VPUs, function(x) {
       tryCatch({
-        layer <- ifelse(feat == "nhd_flowline", feat, paste0(feat, "_", x))
-        read_sf(file.path("cache", paste0("GF_", x, in_gpkg, ".gpkg")), 
+        layer <- ifelse(feat %in% c("nhd_flowline", "unassigned_gages", "unassigned_TE"), 
+                                    feat, paste0(feat, "_", x))
+        read_sf(file.path("cache", paste0(in_gpkg, x, ".gpkg")), 
                 layer)}, 
         error = function(e) stop(paste("didn't find", x,
                                        "\n Original error was", e)))
@@ -197,35 +194,30 @@ Merge_VPU <- function(feat, in_gpkg, out_gpkg){
   }
 }
 
+#' Merges geopackages together to create CONUs geopackage of features
+#' @param VPU VPU from NHDPlusV2
+#' @param full_cats  (sf data.frame) all catchments (sinks/flowline) within given VPU
+#' @param divides (sf data.frame) divides layer for a given VPU
+#' @param wbd  (data.frame) HUC12 X-walk table, see get_data for full citation
+#' @param wbd_SF (sf data.frame) HUC12 layer from NHDPlusv2.1 national geodatabase
+#' @return (sf data.frame) intersection of nhd catchments and divides layer with HUC12
+#'
 get_HUC12_Xwalk <- function(VPU, full_cats = FALSE, divides = FALSE, wbd = FALSE, wbd_SF = FALSE){
-  ##########################################
-  # Retrieve HUC12 crosswalk for a given unit (CONUS, HI, AK)
-  # 
-  # Arguments:
-  #   VPU : (character) vector processing unit
-  #   cats : (dataframe) VPU catchments
-  #   wbd : (file) HUC12 xwalk file
-  #
-  # Returns:
-  #   writes output geopackage of CONUS for given features
-  ##########################################
-  
+
   # crosswalk to HU12, filter by VPU
   nhd_to_HU12 <- read.csv(wbd, colClasses = c("character", "integer", "character")) %>% 
     filter(grepl(paste0("^", VPU), .data$HUC_12)) %>%
     mutate(HUC_8 = substr(HUC_12, 1, 8))
   
+  # Read in the NHDPlus National GDB HUC12 layer
   HUC12_lyr <- readRDS(file.path(wbd_SF)) %>% 
     filter(grepl(paste0("^", VPU,".*"), .data$HUC_12)) %>% #, 
     st_set_precision(10) %>%
     st_transform(st_crs(full_cats)) %>%
     st_make_valid() %>%
     select(HUC_12, ACRES, HU_10_TYPE, HU_12_DS, HU_12_MOD, HU_12_TYPE)
-  
-  region <- mutate(HUC12_lyr, diss = 1) %>%
-    group_by(diss) %>%
-    summarize(do_union = T)
-  
+
+  # Bring over HUC12 xwalk information
   reg_cats <- full_cats %>% 
     left_join(select(nhd_to_HU12, c(FEATUREID, HUC_12)), by = "FEATUREID") %>% 
     st_transform(crs) %>% 
@@ -233,389 +225,20 @@ get_HUC12_Xwalk <- function(VPU, full_cats = FALSE, divides = FALSE, wbd = FALSE
     st_make_valid()
   
   # Intersect the HUC12s and catchments
-  xWalk <- sf::st_intersection(reg_cats, HUC12_lyr) %>%
-    # Convert area to squaker kilometers and drop geom
+  cats_HUC12 <- sf::st_intersection(reg_cats, HUC12_lyr) %>%
+    # Convert area to squaker kilometers 
     mutate(intArea = as.numeric(st_area(.)) * 0.000001,
            AreaHUC12 = ACRES * .00404686)
   
   divides_HUC12 <- sf::st_intersection(divides, HUC12_lyr) %>%
-    # Convert area to squaker kilometers and drop geom
+    # Convert area to squaker kilometers 
     mutate(intArea = as.numeric(st_area(.)) * 0.000001,
            AreaHUC12 = ACRES * .00404686)
   
   
-  return(list(xWalk = xWalk, divides_HUC12 = divides_HUC12))
+  return(list(cats_HUC12 = cats_HUC12, divides_HUC12 = divides_HUC12))
 }
 
-siteAtt <- function(VPU){
-  ##########################################
-  # Creates value-added attributes for structures and features on the network
-  # 
-  # Arguments:
-  #   VPU : (character) 2-digit hydrologic region
-  #
-  # Returns:
-  #   writes or appends tables for features
-  ##########################################
-  print ("gage")
-  out_gpkg <- file.path("cache",paste0("GF_", VPU, ".gpkg"))
-  
-  data_paths <- jsonlite::read_json(file.path("cache", "data_paths.json"))
-  
-  # Layers
-  allPOIs <- read_sf(out_gpkg, paste0("POIS_", VPU)) 
-  gagesIII_pois <- read_sf(out_gpkg, paste0("Gages_", VPU)) # GAGESIII POIs
-  gageLoc <- read_sf(data_paths$nhdplus_dir, "GageLoc")
-  TE_pois <- read_sf(out_gpkg, paste0("TE_", VPU)) # Thermoelectric POIs
-  NID_pois <- read_sf(out_gpkg, paste0("NID_", VPU)) # NID POIs
-  nsegment_raw <- read_sf(out_gpkg, paste0("nsegment_raw_", VPU)) # Minimally-sufficient network attributed with POI_ID
-  nsegment <- read_sf(out_gpkg, paste0("Nsegment_", VPU)) # Minimally-sufficient network dissolved by POI_ID
-  WBs_VPU <- read_sf(out_gpkg, paste0("WB_", VPU)) %>% st_drop_geometry() %>% mutate(COMID = as.integer(COMID))
-  WBout_POIs <- read_sf(out_gpkg, paste0("WBout_", VPU)) %>% st_drop_geometry()
-  WBin_POIs <- read_sf(out_gpkg, paste0("WBin_", VPU)) %>% st_drop_geometry()
-  
-  #*************************************************************************
-  # Attribute gagesIII POIs with adjusted measure and drainage area information
-  gagesIII <- read_sf(data_paths$gagesiii_points_path, "GagesIII_gages") %>%
-    filter(grepl(paste0("^", VPU, ".*"), .data$NHDPlusReg))
-
-  # Load GAGESIII POIs from gpkg at cache dir
-  gagesIII_POIs <- select(st_drop_geometry(gagesIII_pois), Type_GagesIII, COMID) %>%
-    inner_join(gagesIII, by = "COMID") %>% 
-    select(Type_GagesIII, COMID, COMID_meas)
-
-  # Derive segment measure attributes
-  nsegment_gages <- st_drop_geometry(nsegment_raw) %>% 
-    left_join(gagesIII_POIs, by = "COMID") %>%
-    select(Hydroseq, Type_GagesIII, COMID_meas, LENGTHKM, POI_ID, TotDASqKM) %>%
-    mutate(COMID_meas = 1 - replace_na(COMID_meas/100,0), Seg_meas = COMID_meas * LENGTHKM) %>% group_by(POI_ID) %>%
-    summarize(HS = min(Hydroseq), Length = sum(LENGTHKM), Percent = sum(Seg_meas)/sum(LENGTHKM),
-              TotDASqKM = max(TotDASqKM), Type_GagesIII =  first(na.omit(Type_GagesIII))) %>% filter(!is.na(Type_GagesIII))
-
-  # Calcualte estimated proporational drainage area
-  gagesIII_POI_DA <- gagesIII_POIs %>% 
-    left_join(nsegment_raw, by = "COMID") %>%
-    select(Type_GagesIII, COMID, COMID_meas, AreaSqKM, TotDASqKM) %>%
-    mutate(Inc_DA = (1-(COMID_meas / 100)) * AreaSqKM) %>%
-    mutate(Total_DA = TotDASqKM - (AreaSqKM - Inc_DA))
-
-  # PUt everything together
-  gagesIII_stats <- gagesIII_POI_DA %>% 
-    inner_join(nsegment_gages, by = "Type_GagesIII") %>%
-    select(Type_GagesIII, COMID, Length, Percent, TotDASqKM.y) %>%
-    rename(Seg_Length = Length, Seg_Measure = Percent, Adj_NHD_DA = TotDASqKM.y) %>%
-    mutate(VPU = VPU)
-
-  # Compile data into GagesIII folder
-  if (file.exists("data/GAGESIII_gages/gagesIII_MDA.rds")){
-    # open file and bind to it
-    curRDS_gages <- readRDS("data/GAGESIII_gages/gagesIII_MDA.rds") %>% 
-      filter(VPU != VPU) %>%
-      bind_rows(gagesIII_stats)
-    saveRDS(curRDS_gages, "data/GAGESIII_gages/gagesIII_MDA.rds")
-  } else {
-    saveRDS(gagesIII_stats, "data/GAGESIII_gages/gagesIII_MDA.rds")
-  }
-  #************************************************************************
-
-  #************************************************************************
-  # Read in TE shapefile
-  print ("TE")
-  TE_fac <- st_read(file.path(data_paths$TE_points_path, "/2015_TE_Model_Estimates_lat.long_COMIDs.shp")) %>%
-    filter(COMID %in% TE_pois$COMID)
-
-  # Cast TE as points and project to USGS albers
-  TE_fac_alb <- TE_fac %>% 
-    st_cast('POINT') %>% 
-    st_transform(., 6703)
-
-  # Subset segs based on TE and project to USGS albs
-  TE_seg_pnts <- filter(nsegment, POI_ID %in% TE_fac_alb$COMID) %>% 
-    st_transform(., 6703) %>% 
-    group_by(POI_ID) %>%
-    st_line_merge(.) %>% 
-    st_cast("POINT")
-
-  #for (POI_ID in TE_fac_alb$COMID){
-  TE_fac_alb$TE_measure <- unlist(lapply(1:nrow(TE_fac_alb), function(x){
-    COM <- TE_fac_alb$COMID[x]
-    fac <- TE_fac_alb %>% filter(COMID == COM)
-    seg <- TE_seg_pnts %>% filter(POI_ID == COM)
-
-    # Get index of nearest vertex on segment and rebuild two line IDs, then get length of each
-    nearVert <- st_nearest_feature(fac, seg)
-    # Pull out the line upstream and downstream of snapping point
-    Upline <- seg[1:nearVert,] %>% 
-      summarise(geom = st_combine(geom)) %>% 
-      st_cast("LINESTRING") %>% 
-      st_length(.)
-    
-    Downline <- seg[nearVert:nrow(seg),] %>% 
-      summarise(geom = st_combine(geom)) %>% 
-      st_cast("LINESTRING") %>% st_length(.)
-
-    # Return the measure
-    return (Upline / (Upline + Downline))
-  }))
-
-  # Derive proportional total (probably need proporational incremental ratio too)
-  TE_data <- st_drop_geometry(TE_fac_alb) %>%
-    left_join(st_drop_geometry(nsegment_raw) %>% 
-                select(COMID, AreaSqKM, TotDASqKM), by = "COMID") %>%
-    mutate(Inc_DA = TE_measure * AreaSqKM, Total_DA = TotDASqKM - (AreaSqKM - Inc_DA)) %>%
-    select(EIA_PLANT_, LATITUDE, LONGITUDE, COMID, TE_measure, Inc_DA, Total_DA)
-
-  TE_data_upstream <- TE_data %>%
-    left_join(st_drop_geometry(nsegment) %>% 
-                select(POI_ID, To_POI_ID), by = c("COMID" = "To_POI_ID")) %>%
-    group_by(COMID) %>% 
-    mutate(Upstream_POIs = paste0(unique(POI_ID), collapse = " "))
-
-  TE_data_final <- TE_data_upstream %>%
-    left_join(st_drop_geometry(nsegment) %>% 
-                select(POI_ID, To_POI_ID), by = c("COMID" = "POI_ID")) %>%
-    group_by(COMID) %>% mutate(Upstream_POIs = paste0(unique(POI_ID), collapse = " ")) %>%
-    rename(Downstream_POI = To_POI_ID) %>% 
-    select(-POI_ID) %>% distinct() %>% mutate(VPU = VPU)
-
-  if (file.exists("data/TE_points/TE_adj.rds")){
-    # open file and bind to it
-    curRDS_TE <- readRDS("data/TE_points/TE_adj.rds") %>% 
-      filter(VPU != VPU) %>%
-      bind_rows(TE_data_final)
-    saveRDS(curRDS_TE, "data/TE_points/TE_adj.rds")
-  } else {
-    saveRDS(TE_data_final, "data/TE_points/TE_adj.rds")
-  }
-  #************************************************************************
-  
-  #************************************************************************
-  # Read in NID shapefile
-  print ("NID")
-  NID_fac <- read.csv(file.path(data_paths$NID_points_path,"/NID_attributes_20170612.txt"), stringsAsFactors = F) %>%
-    filter(FlowLcomid %in% NID_pois$COMID)
-
-  NID_snap_alb <- readRDS(file.path(data_paths$NID_points_path,"/NAWQA_NID_snap.rds")) %>%
-    filter(Source_FeatureID %in% NID_fac$Source_FeatureID) %>% 
-    st_transform(., 6703) %>%
-    inner_join(NID_fac, by = "Source_FeatureID")
-
-  # Subset segs based on TE and project to USGS albs
-  NID_seg_pnts <- filter(nsegment, POI_ID %in% 
-                           unique(NID_fac$FlowLcomid)) %>% 
-    st_transform(., 6703) %>% 
-    group_by(POI_ID) %>%
-    st_line_merge(.) %>% 
-    st_cast("POINT")
-
-  #options(warn = -1)
-  NID_snap_alb$NID_measure <- unlist(lapply(1:nrow(NID_snap_alb), function(x){
-    fac <- NID_snap_alb[x,]
-    COM <- fac$FlowLcomid
-    seg <- NID_seg_pnts %>% filter(POI_ID == COM)
-
-    # Get index of nearest vertex on segment and rebuild two line IDs, then get length of each
-    nearVert <- st_nearest_feature(fac, seg)
-    # Pull out the line upstream and downstream of snapping point
-    Upline <- seg[1:nearVert,] %>% 
-      summarise(geom = st_combine(geom)) %>% 
-      st_cast("LINESTRING") %>% 
-      st_length(.)
-    
-    Downline <- seg[nearVert:nrow(seg),] %>% 
-      summarise(geom = st_combine(geom)) %>% 
-      st_cast("LINESTRING") %>% 
-      st_length(.)
-
-    # Return the measure
-    return (Upline / (Upline + Downline))
-  }))
-  #options(warn = oldw)
-
-  # Derive proportional total (probably need proporational incremental ratio too)
-  NID_data <- st_drop_geometry(NID_snap_alb) %>%
-    left_join(st_drop_geometry(nsegment_raw) %>% 
-                select(COMID, AreaSqKM, TotDASqKM), by = c("FlowLcomid" = "COMID")) %>%
-    mutate(Inc_DA = NID_measure * AreaSqKM, Total_DA = TotDASqKM - (AreaSqKM - Inc_DA)) %>%
-    select(Source_FeatureID, VPU.x, Outlet, FlowLcomid, WBAreaSQKM, NIDSASQKM, NLCDSASQKM, MAX_SA, MultiMain, Catchcomid, FalconeDate,
-           YEAR_COMPLETED, MAX_DISCHARGE, NID_STORAGE, NID_measure, Inc_DA, Total_DA)
-
-  NID_data_upstream <- NID_data %>%
-    left_join(st_drop_geometry(nsegment) %>% 
-                select(POI_ID, To_POI_ID), by = c("FlowLcomid" = "To_POI_ID")) %>%
-    group_by(FlowLcomid) %>% 
-    mutate(Upstream_POIs = paste0(unique(POI_ID), collapse = " "))
-
-  NID_data_final <- NID_data_upstream %>%
-    left_join(st_drop_geometry(nsegment) %>% 
-                select(POI_ID, To_POI_ID), by = c("FlowLcomid" = "POI_ID")) %>%
-    group_by(FlowLcomid) %>% 
-    mutate(Upstream_POIs = paste0(unique(POI_ID), collapse = " ")) %>%
-    rename(Downstream_POI = To_POI_ID) %>% 
-    select(-POI_ID) %>% 
-    distinct() %>% 
-    mutate(VPU = VPU)
-
-  if (file.exists("data/NID_points/NID_adj.rds")){
-    # open file and bind to it
-    curRDS_NID <- readRDS("data/NID_points/NID_adj.rds") %>% 
-      filter(VPU != VPU) %>%
-      bind_rows(NID_data_final)
-    saveRDS(curRDS_NID, "data/NID_points/NID_adj.rds")
-  } else {
-    saveRDS(NID_data_final, "data/NID_points/NID_adj.rds")
-  }
-  
-  #****************************************************************************
-  # Get information on all inflow/outflows in nsegment network
-  # index the  WBAREACOMI to In WB POIs
-  print("waterbodies")
-  
-  WBinPOIs_tab <- select(WBin_POIs, COMID) %>%
-    left_join(st_drop_geometry(nsegment_raw), by = "COMID") %>% 
-    select(COMID, DnHydroseq)%>%
-    left_join(st_drop_geometry(nsegment_raw), by = c("DnHydroseq" = "Hydroseq")) %>%
-    mutate(COMID = COMID.x) %>% 
-    select(COMID, WBAREACOMI) 
-  
-  # Noticed there are some "NHDArea" features coded into the WBARECOMI field (prob mistakes)
-  # Should we QAQC in previous step and remove these?
-  wb_pol_intab <- WBs_VPU %>% 
-    inner_join(WBinPOIs_tab, by = c("COMID" = "WBAREACOMI")) %>%
-    select(COMID, COMID.y) %>% 
-    rename(seg_COMID = COMID.y) %>%
-    group_by(COMID) %>% 
-    summarize(num_poi_inlets = n(), Upstream_POIs = paste0(unique(seg_COMID), collapse = " "))
-  
-  # index the WBAREACOMI to out WB POIs
-  # 3 WBoutPOIs have no WBareaCOMID
-  WBoutPOIs_tab <- select(WBout_POIs, COMID) %>% 
-    left_join(st_drop_geometry(nsegment_raw), by = "COMID") %>%
-    select(COMID, WBAREACOMI) 
-  
-  wb_pol_outtab <- WBs_VPU %>% 
-    inner_join(WBoutPOIs_tab, by = c("COMID" = "WBAREACOMI")) %>%
-    select(COMID, COMID.y)  %>% 
-    rename(seg_COMID = COMID.y) %>% 
-    group_by(COMID) %>% 
-    summarize(num_poi_outlets = n(), outlet = paste0(unique(seg_COMID), collapse = " "))
-  
-  # WBs that have multiple outlets  
-  multiOutWBs <- filter(wb_pol_outtab, num_poi_outlets > 1)
-  
-  wb_pol_tab <- wb_pol_outtab %>% 
-    left_join(wb_pol_intab, by = "COMID")
-  
-  # write to csv file
-  wbmore_tab <- WBs_VPU %>% select(-c(FDATE, RESOLUTION, Shape_Length, Shape_Area, GNIS_ID, REACHCODE, FTYPE, FCODE, 
-                                      PurpCode, PurpDesc, MeanDCode, ONOFFNET, LakeArea)) %>%
-    inner_join(wb_pol_tab, by = "COMID")
-  #write.csv(wbmore_tab, paste0("cache/SiteInfo/WBPOIs_r", VPU, "_tab.csv"))
-  
-  #****************************************************************************
-  # Create table of spatial assocation of waterbodies with gages, NID, and TE
-  wbpoi_with_gage <- filter(allPOIs (!is.na(Type_WBIn) | !is.na(Type_WBOut) & !is.na(Type_GagesIII)))
-  wbpoi_with_NID <- filter(allPOIs, (!is.na(Type_WBIn) | !is.na(Type_WBOut) & !is.na(Type_NID)))
-  wbpoi_with_TE <- filter(allPOIs, (!is.na(Type_WBIn) | !is.na(Type_WBOut)) & !is.na(Type_TE))
-  
-  # determine gages by waterbody (using segs WBAREACOMI); only wb (not nhdrea)
-  wb_gage <- gagesIII_pois %>% 
-    left_join(st_drop_geometry(nsegment_raw), by = "COMID") %>%
-    select(COMID, WBAREACOMI, Type_GagesIII) %>% 
-    filter(WBAREACOMI > 0, WBAREACOMI %in% WBs_VPU$COMID) 
-  
-  # Waterbody inlets that are gages
-  wb_ingage_tab <- filter(st_drop_geometry(gagesIII_pois), Type_WBIn == 1) %>%
-    left_join(WBinPOIs_tab, by = "COMID") %>% 
-    select(COMID, Type_GagesIII, WBAREACOMI) %>%
-    rename(WBin_Gage = Type_GagesIII)
-  
-  # find if gage at upstream segment of a WBin POI
-  wb_upgage_tab <- select(st_drop_geometry(gagesIII_pois), COMID, Type_GagesIII) %>%
-    left_join(st_drop_geometry(nsegment) %>% 
-                select(POI_ID, To_POI_ID), by = c("COMID" = "POI_ID")) %>%
-    inner_join(WBinPOIs_tab, by = c("To_POI_ID" = "COMID")) %>% 
-    group_by(WBAREACOMI) %>% 
-    summarize(num_us_gages = n(), Upstream_gages = paste0(unique(Type_GagesIII), collapse = ", "))
-  
-  # WB outlets 
-  wb_outgage_tab <- filter(gagesIII_pois, Type_WBOut == 1) %>% 
-    inner_join(WBoutPOIs_tab, by = "COMID") %>% 
-    select(COMID, WBAREACOMI, Type_GagesIII) %>%
-    rename(WBout_Gage = Type_GagesIII)
-  
-  # find if gage on downstream segment of a WBout POI
-  wb_dngage_tab <- select(WBout_POIs, COMID) %>% 
-    inner_join(WBoutPOIs_tab, by = "COMID") %>%
-    left_join(nsegment %>% select(POI_ID, To_POI_ID), by = c("COMID" = "POI_ID")) %>%
-    inner_join(gagesIII_pois, by = c("To_POI_ID" = "COMID")) %>% 
-    rename(Downstream_gage = Type_GagesIII)
-  
-  # Read in NID sites for the VPU (and get unique COMIDS)there may be more than one per COMID
-  NID_fac <- read.csv(paste0(data_paths$NID_points_path,"/NID_attributes_20170612.txt"), stringsAsFactors = F) %>%
-    select(-c(ReachCode, Measure, Source_FeatureID, VPU, SNAPTYPE, comment, MultiNIDs, NHDnet_Err, ONNHDPLUS,
-              MultiMain, Catchcomid)) %>% 
-    filter(FlowLcomid %in% NID_POIs$COMID) %>% 
-    left_join(nsegment_raw %>% select(COMID, WBAREACOMI), by = c("FlowLcomid" = "COMID"))
-  
-  # gen list of WB NID, put on a table, there may be more than one per WB
-  wb_NID <- NID_fac %>% inner_join(WBs_VPU, by = c("WBAREACOMI" = "COMID")) %>% 
-    group_by(WBAREACOMI) %>% 
-    summarize(Outlet = max(Outlet), WBAreaSQKM = max(WBAreaSQKM, na.rm = T), NIDSASQKM = max(NIDSASQKM, na.rm = T),
-              NLCDSASQKM = max(NLCDSASQKM, na.rm = T), MAX_SA = max(MAX_SA, na.rm = T), FalconeDate = max(FalconeDate, na.rm = T),
-              NLCDASADIST = max(NLCDSADIST), NIDID = paste0(unique(NIDID), collapse = " "), 
-              YEAR_COMPLETED = max(YEAR_COMPLETED, na.rm = T), MULT_YEARS = paste0(unique(YEAR_COMPLETED), collapse = " "),
-              MAX_DISCHARGE = max(MAX_DISCHARGE), NID_STORAGE = max(NID_STORAGE, na.rm = T)) %>% 
-    mutate_all(funs(stringr::str_replace_all(., "-Inf", "NA"))) %>% 
-    mutate(WBAREACOMI = as.integer(WBAREACOMI))
-  
-  # Do we really need to read the facilities in again
-  TE_fac <- read_sf(data_paths$TE_points_path, "2015_TE_Model_Estimates_lat.long_COMIDs") %>% 
-    mutate(COMID=as.integer(COMID)) %>% 
-    filter(COMID > 0) %>%
-    inner_join(., select(st_drop_geometry(nhd), c(COMID, WBAREACOMI, VPUID)), by = "COMID") %>%
-    filter(grepl(paste0("^", VPU, ".*"), .data$VPUID)) %>% switchDiv(., nhd)
-  
-  # gen list of WB NID, put on a table, there may be more than one per WB
-  wb_TE <- TE_fac %>% 
-    inner_join(WBs_VPU, by = c("WBAREACOMI" = "COMID")) %>% 
-    select(EIA_PLANT_, COMID, WBAREACOMI) 
-  
-  wb_finTable <- wbmore_tab %>% 
-    left_join(wb_NID, by = c("COMID" = "WBAREACOMI")) %>% mutate(VPU = VPU)
-  
-  # create WB table: WB_r"xx_info
-  wb_POI_tab <- WBs_VPU %>% rename(WBAREACOMI=COMID) %>%
-    select(WBAREACOMI) %>%
-    left_join(wb_outgage_tab %>% 
-                select(WBAREACOMI, WBout_Gage)) %>%
-    left_join(wb_dngage_tab %>% 
-                select(WBAREACOMI, Downstream_gage)) %>%
-    left_join(wb_ingage_tab %>% 
-                select(WBAREACOMI, WBin_Gage)) %>% #fix to be able to handle multiple gages
-    left_join(wb_upgage_tab %>% 
-                select(WBAREACOMI, Upstream_gages)) %>%
-    left_join(wb_NID) %>% 
-    select(WBAREACOMI, Outlet, NIDID) %>%
-    mutate(VPU = VPU)
-  
-  if (file.exists("data/NHDPlusNationalData/wbod_stats.rds")){
-    # open waterbody file and bind to it
-    curRDS_wbod <- readRDS("data/NHDPlusNationalData/wbod_stats.rds") %>% 
-      filter(VPU != VPU) %>%
-      bind_rows(wb_finTable)
-    saveRDS(curRDS_wbod, "data/NHDPlusNationalData/wbod_stats.rds")
-    
-    # open POI file and bind to it
-    curRDS_POIwbod <- readRDS("data/NHDPlusNationalData/wbod_POIs.rds") %>% 
-      filter(VPU != VPU) %>%
-      bind_rows(wb_POI_tab)
-    
-  } else {
-    saveRDS(wb_finTable, "data/NHDPlusNationalData/wbod_stats.rds")
-    saveRDS(wb_POI_tab, "data/NHDPlusNationalData/wbod_POIs.rds")
-  }
-}
 
 # make sf1 compatible with sf2
 st_compatibalize <- function(sf1, sf2) {
@@ -628,112 +251,6 @@ st_compatibalize <- function(sf1, sf2) {
   sf1
 }
 
-prep_HU12 <- function(sf, VPU){
-  ##########################################
-  # Adds value-added attributes to the HU12 layer to compatibilize it with HUC12 crosswalk
-  # 
-  # Arguments:
-  #   sf : (simple feature) HU12_layer
-  #   VPU : (character) 2-digit hydrologic region/VPU
-  #
-  # Returns:
-  #   Adds addtional columns to HU_12 layer, returns modified HUC12 layer
-  ##########################################
-  
-  # Adds additional attributes to the HUC12 layer coming out of nHDPlus database
-  prepped_HU12 <- select(sf, HUC_10, HUC_12, AreaHUC12, HU_12_TYPE, HU_10_TYPE, HU_12_DS) %>% 
-    st_transform(5070) %>%
-    mutate(HUC_10 = as.character(HUC_10), HUC_12 = as.character(HUC_12), 
-           HU_10_DS = substr(HU_12_DS, 1, 10)) %>% filter(grepl(paste0("^", VPU,".*"), .data$HUC_12)) %>%
-    st_cast("POLYGON") %>% 
-    st_make_valid()
-  
-  # Get estimate of accumulated drainage area for HUC12
-  HUC12_lyr_HW <- filter(prepped_HU12, !HUC_12 %in% HU_12_DS) %>% 
-    mutate(AccumArea = AreaHUC12)
-  HUC12_DS_list <- filter(prepped_HU12, !HUC_12 %in% HUC12_lyr_HW$HUC_12) %>% 
-    mutate(AccumArea = NA)
-  
-  # For HUC whose downstream HUC is missing, iterate until you find it or reach a closed basin
-  # Move to funciton in future
-  arealist <- list()
-  for (HUC in unique(HUC12_DS_list$HUC_12)){
-    areaSum <- 0
-    sub <- filter(HUC12_DS_list, HUC_12 == as.character(HUC))
-    areaSum <- areaSum + sub$AreaHUC12
-    if(sub$HUC_12 %in% prepped_HU12$HU_12_DS){
-      sub <- filter(prepped_HU12, HU_12_DS == sub$HUC_12)
-      areaSum <- areaSum + sum(sub$AreaHUC12)
-    }
-    arealist <- append(arealist, areaSum)
-  }
-  
-  HUC12_DS_list$AccumArea <- unlist(arealist) 
-  
-  HUC12_lyr <- HUC12_DS_list %>% rbind(HUC12_lyr_HW)
-  
-  return(HUC12_lyr)
-}
-
-prep_xWalk <- function(inTable, VPU, HUC12_lyr){
-  ##########################################
-  # Adds value-added attributes to the HUC12 crosswalk to compatibilize it with HU12 layer
-  # 
-  # Arguments:
-  #   inTable : (DF) HU12 crosswalk table
-  #   VPU : (character) 2-digit hydrologic region/VPU
-  #   HUC12_lyr : (simple feature) HUC12 lyr, output from 'prep_HU12' function
-  #
-  # Returns:
-  #   Adds addtional columns to HUC12 crosswalk, retruns modified HUC12 layer and crosswalk
-  ##########################################
-  
-  # subset xWalk table
-  nhd_to_HU12_in <- inTable %>% 
-    left_join(select(HUC12_lyr, HUC_12, HU_12_DS), by = "HUC_12")
-  
-  # Identifies downstream HUC12s for those HUC12 whose DS HUC12 is 
-  #            missing from the xWalk
-  missingHUC12_GIS <- select(HUC12_lyr, HUC_12, HU_12_DS) %>% 
-    st_drop_geometry() %>% 
-    filter(!HUC_12 %in% nhd_to_HU12$HUC_12)
-  
-  if (nrow(missingHUC12_GIS) > 0){
-    # This will prevent closed basins from being included
-    missingDSHUC12_xWalk <- HUC12_lyr %>% 
-      filter(HU_12_DS %in% c(missingHUC12_GIS$HUC_12))
-  
-    # For HUC whose downstream HUC is missing, iterate until you find it or reach a closed basin
-    # Move to funciton in future, this will help assign downstream HRU
-    finalDS_list <- list()
-    for (HUC in missingDSHUC12_xWalk$HUC_12){
-      sub <- filter(missingDSHUC12_xWalk, HUC_12 == as.character(HUC))
-      while(sub$HU_12_DS %in% missingHUC12_GIS$HUC_12){
-        sub <- HUC12_lyr %>% 
-          filter(HUC_12 == sub$HU_12_DS)
-      }
-      finalDS_list <- append(finalDS_list, as.character(sub$HU_12_DS))
-    }
-  
-    # Adds corrected HU_12_DS to crosswalk table for missing HUCs
-    missingDSHUC12_xWalk$xWalk_DS <- unlist(finalDS_list) 
-    missingDSHUC12_Sum <- st_drop_geometry(missingDSHUC12_xWalk) %>% 
-      group_by(HUC_12, xWalk_DS) %>% summarize()
-    
-    nhd_to_HU12_out <- nhd_to_HU12_in %>% 
-      left_join(select(missingDSHUC12_Sum, c(HUC_12, xWalk_DS)), by = "HUC_12") %>%
-      mutate(HU_12_DS == ifelse(HUC_12 %in% missingDSHUC12_xWalk$HUC_12, xWalk_DS, HUC_12)) %>% 
-      select(-c(xWalk_DS))
-    
-    HUC12_lyr <- HUC12_lyr %>% 
-      left_join(unique(select(st_drop_geometry(missingDSHUC12_xWalk), c(HUC_12, xWalk_DS)), .keep_all = TRUE), by = "HUC_12")
-  } else {
-    nhd_to_HU12_out <- nhd_to_HU12_in
-  }
-  
-  return(list(HUC12 = HUC12_lyr, xWalk = nhd_to_HU12_out))
-}
-
 #' Fix flow direction
 #' @description If flowlines aren't digitized in the expected direction, 
 #' this will reorder the nodes so they are.
@@ -774,13 +291,12 @@ try({
 }
 
 
+#' Capture all catchments (flowline, sink, etc.) in a given RPU
+#' @param fcats (sf data.frame) NHDPlusv2.1 National geodatabase catchment layer
+#' @param the_RPU (character) RPU to retrive full catchment set.
+#' @return (sf data.frame) with FEATUREID, RPUID, FTYPE, TERMINAFL 
 cat_rpu <- function(fcats, the_RPU){
-  # parameter:
-  # the_RPU = character NHDPlusV2 RASTER PROCESSING UNIT ID (RPUID)
-  # returns: a data frame with FEATUREID,RPUID, also  the FTYPE and  TERMINAFL for the RPUID
-  # if RPU valid RPUID continue process, other wise nothing returned
-  
-  
+ 
   VPU <- substr(the_RPU, 1, 2)
   if(VPU == "20"){
     fl <- readRDS(file.path(data_paths$islands_dir,  "NHDPlusNationalData", "nhdplus_flowline.rds")) %>%
-- 
GitLab