From 54c6a7df4ed95dfbb9f2dc490b3acc2dac1b74df Mon Sep 17 00:00:00 2001
From: Andy Bock <abock@usgs.gov>
Date: Tue, 12 Jan 2021 12:36:34 -0700
Subject: [PATCH] reduce var names, functions for non-dend

---
 workspace/R/NHD_navigate.R | 264 +++++++++++++++++++++++++++++--------
 1 file changed, 209 insertions(+), 55 deletions(-)

diff --git a/workspace/R/NHD_navigate.R b/workspace/R/NHD_navigate.R
index 1b37b9f..a3b71b5 100644
--- a/workspace/R/NHD_navigate.R
+++ b/workspace/R/NHD_navigate.R
@@ -27,29 +27,11 @@ VPU_gage_Table <- file.path("cache", paste0("R", VPU, "_Gages.csv"))
 CONUS_gage_Table <- "data/gages_MDA.rds"
 
 # Defined during NonDend.Rmd
-nd_gpkg <- file.path("cache", paste0("ND_",VPU,".gpkg"))
-coast_FL <- paste0("coastal_FL_", VPU)
-nhd_aggLine <- paste0("nhd_aggline_", VPU)
-agg_fline <- paste0("agg_fline_", VPU)
-reg_cats_lyr <- paste0("nhd_cats", VPU)
-agg_cats_lyr <- paste0("agg_cats", VPU)
-outlets_lyr <- paste0("outlets", VPU)
-lookup_table <- file.path("cache/", paste0(VPU, "_lookup.csv"))
-cat0_lyr <- paste0("cat0_", VPU)
-hru0_lyr <- paste0("hru0_", VPU)
-nd_table_out <- file.path("cache/", paste0(VPU, "_ND_lookup.csv"))
-nhdCoast_net <- paste0("nhdCoast_network_", VPU)
-catCoast_net <- paste0("catCoast_", VPU)
-cat0_lyr <- "catchment_coast" # modify this
-hru0_lyr <- "hru_coast" # modify this
-sink_lyr_out <- paste0("sink_", VPU)
-missing_cat_lyr <- paste0("missingCats_", VPU)
-sink_cats_out <- paste0("Terminal_Cats_", VPU)
-HUC12_lyr_out <- paste0("HUC12_", VPU)
-cat_final_out <- paste0("Final_Cats_", VPU)
-sink_net_out <- paste0("sink_Net_", VPU)
-rem_cat_out <- paste0("Rem_Cats_", VPU)
-hru_NCA_out <- paste0("HRU_NCA_", VPU)
+fin_gpkg <- file.path("cache", paste0(VPU, "_final.gpkg"))
+lookup_miss <- paste0("lookup_missing_", VPU)
+missing_cats <- paste0("miss_cats_", VPU)
+protoHRUs <- paste0("poi_cats_", VPU)
+cat_borders <- paste0("perimeters_", VPU)
 
 
 NetworkNav <- function(inCom, nhdDF, withTrib){
@@ -226,7 +208,6 @@ addType<-function(new_POIs, POIs, IDfield, bind){
   return(POIs_fin)
 }
 
-
 segment_increment <- function(nhd, POIs){
   ##########################################
   # Creates raw and dissolved segment layers with necessaary
@@ -242,47 +223,67 @@ segment_increment <- function(nhd, POIs){
   #   segList : (data frame) raw segments
   #             
   ##########################################
-  #nhd_orig <- filter(nhd, dend == 1)
-  #nhd <- nhd_orig
-  seg_POIs <- arrange(POIs, desc(LevelPathI), desc(Hydroseq))
+  ptm<-proc.time()
+  
+  seg_POIs <- arrange(POIs, desc(LevelPathI), desc(Hydroseq)) %>%
+    select(COMID, Hydroseq, LevelPathI) %>%
+    group_by(LevelPathI) %>%
+    # These next two levels arrange POIs hydrologically along the 
+    #       level path in order to meet the requirements of the code below
+    mutate(id = row_number(), 
+           num = if(n() > 1) id[1L] + row_number()-1 else id) %>%
+    ungroup()
+  
+  # Add an empty field for POI_Id population
   nhd <- mutate(nhd, POI_ID = 0)
   
-  # Populate NHD with POI_ID
-  # Begin with upstream most Levelpath/Hydrosequenc
-  for (LP in unique(seg_POIs$LevelPathI)){
-    #print(LP)
-    # For each level path
-    seg_POIs_LP <- filter(seg_POIs, LevelPathI == LP)
-    # Go through sorted list of US -> DS comids
-    for (POI in seg_POIs_LP$COMID){
-      #print(POI)
-      
-      # Re-iterate only on flowlines that have no POI_ID to increase speed
-      nhd <- filter(nhd, POI_ID == 0)
+  POI_ID_assign <- function(i, seg_POIs, nhd){
+    ##########################################
+    # Populates POI_ID per segment
+    # 
+    # Arguments:
+    #   i : (integer) iterator
+    #   seg_POIs : (data frame) POI data frame
+    #   nhd : (dataframe) flowlines data frame (no geometry)
+    #  
+    # Returns:
+    #   nhd_sub : (data frame) raw segments with POI_ID populated
+    #             
+    ##########################################
+    library(dplyr)
     
-      # Get upstream segments
-      Seg <- nhdplusTools::get_UM(nhd, POI, include = T)
-      
+    # If POI is most upstream POI on levelpath
+    if (seg_POIs$num[i] == 1){
       # Assign POI_ID
-      nhd <- nhd %>%  
-        mutate(POI_ID = ifelse(COMID %in% Seg & POI_ID == 0, POI, POI_ID))
-      
-      # Derive incremental segment composed of flowlines
-      inc_seg <- filter(nhd, POI_ID == POI)
-      
-      #print (dim(incSeg))
-      if(!exists("inc_segs")){
-        inc_segs <- inc_seg
-      }else{
-        inc_segs <- rbind(inc_segs, inc_seg)
-      }
+      nhd_sub <- filter(nhd, Hydroseq >= seg_POIs$Hydroseq[i] & 
+                          LevelPathI == seg_POIs$LevelPathI[i]) %>% 
+        mutate(POI_ID =  seg_POIs$COMID[i])
+      # or as you travel downstream on set of POIs below level path
+    } else {
+      # Assign POI_ID
+      nhd_sub <- filter(nhd, LevelPathI == seg_POIs$LevelPathI[i] & 
+                          Hydroseq >= seg_POIs$Hydroseq[i] & Hydroseq < seg_POIs$Hydroseq[i-1]) %>% 
+        mutate(POI_ID = seg_POIs$COMID[i])
     }
+    # return assigned flowlines
+    return(select(nhd_sub, LevelPathI, Hydroseq, COMID, POI_ID) %>%
+             filter(POI_ID != 0))
   }
-  print(proc.time()-ptm)
   
+  library(parallel)
+  library(dplyr)
+  clust <- makeCluster(4)
+  POI_list <- parLapply(clust, c(1:nrow(seg_POIs)), POI_ID_assign, seg_POIs, st_drop_geometry(nhd))
+  #POI_list <- lapply(c(1:nrow(seg_POIs)), POI_ID_assign, seg_POIs, st_drop_geometry(nhd))
+  stopCluster(clust)
+  
+  inc_segs <- data.table::rbindlist(POI_list)
+  
+  print(proc.time()-ptm)
   return(inc_segs)
 }
 
+
 segment_creation <- function(nhd, routing_fix){
   ##########################################
   # Creates raw and dissolved segment layers with necessaary
@@ -689,3 +690,156 @@ structPOIsNet <- function(nhd, final_POIs){
   # Write out minimally-sufficient network and POIs as a list
   return(list(struc_POIs = struc_POIs, structnet = structnet))
 }
+
+outlets_close <- function(nhd, cats){
+  ##########################################
+  # Find little outlets and determine closest valid catchment with HRU ID
+  # 
+  # Arguments:
+  #   nhd : (data frame) valid data frame of NHD flowlines
+  #   cats : (data frame) valid data frame of NHD catchments
+  #
+  # Returns:
+  #   missing_outlets : (data frame) DF of outlets and closest proto-HRU
+  ##########################################
+  
+  # Networks that terminate and are smaller than the threshold, previously defined 
+  little_outlets <- filter(nhd, TerminalFl == 1,
+                           TotDASqKM <= min_da_km &
+                             COMID %in% filter(cats, is.na(POI_ID))$FEATUREID) %>%
+    arrange(COMID)
+  
+  # convert flowlines to end points
+  xy <- get_node(little_outlets) %>%
+    st_transform(26904)
+  
+  # All cats except those with same COMId as little_outlets
+  nonout_cats <- filter(cats, !FEATUREID %in% little_outlets$COMID) %>%
+    mutate(ID = row_number())
+  
+  # cast to multlinestirng sf feature for proximity analysis
+  cats_lines <- filter(cats, FEATUREID %in% little_outlets$COMID) %>%
+    group_by(FEATUREID) %>%
+    summarize(do_union = FALSE) %>%
+    st_cast("MULTILINESTRING") %>%
+    arrange(FEATUREID)
+  
+  # find and measure distance to closest HRUs
+  missing_outlets <- bind_cols(st_drop_geometry(little_outlets) %>% select(COMID), xy) %>% 
+    st_sf() %>%
+    # Distance from little outlet to catchment boundary
+    mutate(dist2edge = round(as.numeric(st_distance(., cats_lines, by_element = TRUE)), 2)) %>%
+    # Nearest catchment that is not its own
+    mutate(nearest_cat = st_nearest_feature(., nonout_cats)) %>%
+    # Distance to nearest HRU
+    mutate(dist2_HRU = round(as.numeric(st_distance(., nonout_cats[nearest_cat,], by_element = TRUE)), 2)) %>% 
+    #st_drop_geometry() %>%
+    inner_join(select(st_drop_geometry(cats), FEATUREID, HUC_12), by = c("COMID" = "FEATUREID")) %>%
+    inner_join(select(st_drop_geometry(nonout_cats), FEATUREID, HUC_12, POI_ID, ID), by = c("nearest_cat" = "ID")) %>%
+    rename(HUC_12_outlet = HUC_12.x, FEATUREID_nrHRU = FEATUREID, HUC_12_nrHRU = HUC_12.y) %>%
+    filter(!dist2_HRU > dist2edge)
+  
+  return(missing_outlets)
+}
+
+perims <- function(cats){
+  ##########################################
+  # Determines length of shared perimeters with adjacent hRUS
+  # 
+  # Arguments:
+  #   cats : (data frame) valid data frame of NHD catchments
+  #
+  # Returns:
+  #   perimeters : (data frame) DF of shared perimeter lengths for cats
+  ##########################################
+  
+  # Perimeters and catchments that touch
+  Touching_List <- st_touches(cats)
+  perimeters <- lwgeom::st_perimeter(cats)
+  
+  # Get perimeter lengths and IDs of neightbors of catchments
+  neighbors <- all.length.list <- lapply(1:length(Touching_List), function(from) {
+    lines <- st_intersection(cats[from,], cats[Touching_List[[from]],])
+    lines <- st_cast(lines) # In case of multiple geometries
+    l_lines <- st_length(lines)
+    res <- data.frame(origin = from,
+                      perimeter = as.vector(perimeters[from]),
+                      touching = Touching_List[[from]],
+                      t.length = as.vector(l_lines),
+                      t.pc = as.vector(100*l_lines/perimeters[from]))
+    res})
+  
+  perimeters <- do.call("rbind", neighbors)
+  
+  return(perimeters)
+}
+
+
+assignable_cats <- function(perimeters, cats, missing_outlets){
+  ##########################################
+  # Assigns cats with no POI_ID a POI_ID based on proxmity or 
+  #         other spatial relationships where determined
+  # 
+  # Arguments:
+  #   perimeters : (data frame) shared perimeters data frame 
+  #   cats : (data frame) valid data frame of NHD catchments
+  #   missing outlets : (data frame) outlets of catchments w/o POI_ID
+  #
+  # Returns:
+  #   cats : (data frame) DF of cats with populated POI_ID
+  ##########################################
+  
+  # limit the result to cats that have not been assigned POI_ID
+  all.length.df <- perimeters %>%
+    inner_join(select(st_drop_geometry(cats), FEATUREID, POI_ID, HUC_12, ID), 
+               by = c("origin" = "ID")) %>%
+    left_join(select(st_drop_geometry(cats), FEATUREID, POI_ID, HUC_12, ID), by = c("touching" = "ID")) %>%
+    rename(FEATUREID = FEATUREID.x, shared_length = t.length, HUC_12_noPOI = HUC_12.x, 
+           FEATUREID_bdHRU = FEATUREID.y, HUC_12_bdHRU = HUC_12.y, POI_ID_bdHRU = POI_ID.y) %>%
+    select(origin, FEATUREID, shared_length, perimeter, HUC_12_noPOI, FEATUREID_bdHRU, HUC_12_bdHRU, POI_ID_bdHRU, POI_ID.x)
+  
+  # cats without POI surrounded by POI cats
+  surrounded <- filter(all.length.df, is.na(POI_ID.x)) %>%
+    # origin is the CAT from which the neighboring relationships are determined
+    group_by(FEATUREID) %>%
+    mutate(n_cats = n()) %>%
+    filter(perimeter == sum(shared_length), !is.na(POI_ID_bdHRU), n_distinct(POI_ID_bdHRU) == 1) %>%
+    select(FEATUREID, POI_ID_bdHRU) %>%
+    distinct()
+  
+  # populate cats surrounded by HRUs with HRUs POI_ID
+  cats <- cats %>%
+    left_join(surrounded, by = "FEATUREID") %>%
+    mutate(POI_ID = ifelse(!is.na(POI_ID_bdHRU), POI_ID_bdHRU,  POI_ID))
+  
+  # catchments taht are partially surrounded
+  part_surrounded <- filter(all.length.df, !FEATUREID %in% surrounded$FEATUREID) %>%
+    # origin is the CAT from which the neighboring relationships are determined
+    group_by(FEATUREID, POI_ID_bdHRU) %>%
+    summarize(perim_POI = sum(shared_length)/perimeter) %>%
+    distinct() %>%
+    # Join the catchment to missing outlets if the same COMID for another
+    #      source of information
+    inner_join(missing_outlets, by = c("FEATUREID" = "COMID")) %>% 
+    # if the distance to the HRU with the POI_ID populated is greater than the
+    #   distance from the outlet to the edge, disregard
+    filter(!dist2_HRU > dist2edge, POI_ID_bdHRU == POI_ID) %>%
+    st_as_sf()
+  
+  # for each surrounded catchment
+  for (COMID in part_surrounded$FEATUREID){
+    # get the upstream flowline/cat contributiong if it exists
+    up <- get_UT(nhd, COMID)
+    
+    nhdsub <- filter(nhd, COMID %in% up) %>%
+      select(COMID) %>%
+      mutate(new_POI_ID = filter(part_surrounded, FEATUREID %in% up)$POI_ID)
+    
+    cats <- cats %>%
+      left_join(st_drop_geometry(nhdsub), by = c("FEATUREID" = "COMID")) %>%
+      mutate(POI_ID = ifelse(FEATUREID %in% up, nhdsub$new_POI_ID, POI_ID)) %>%
+      select(-new_POI_ID)
+  }
+  
+  return(cats) 
+}
-- 
GitLab