Skip to content
Snippets Groups Projects
utils.R 24.1 KiB
Newer Older
  • Learn to ignore specific revisions
  • VPU_Subset <- function(nhdpath, VPU){
      ##########################################
      # Subsets VPU for terminal outlets 
      # 
      # Arguments:
      #   nhdpath : (directory) folder path of NHDPlus data
      #   VPU : (character) VPU to subset
      #
      # Returns:
      #   nhd : (data frame/sf) subset VPU
      ##########################################
      
    
      regoutlets <- jsonlite::read_json(file.path("cache", "RegOutlets.json"))
    
      print (paste0("subsetting for VPU ", VPU))
    
      if (!VPU %in% names(regoutlets)){
    
        
        # For regions that terminate to the NHDPlus domain
        # 1,2,3,4,8,9,12,15,17,18
        # Read in NHDPlusV2 flowline simple features and filter by vector processing unit (VPU)
    
        nhd <- readRDS(nhdpath) %>% 
          filter(grepl(paste0("^", VPU, ".*"), .data$VPUID)) %>% 
          filter(FTYPE != "Coastline")
    
        keep <- prepare_nhdplus(nhd, 0, 0, 0, FALSE)
    
    Bock, Andy's avatar
    Bock, Andy committed
      } else if (VPU %in% c("05", "06", "07", "10U", "14")){
    
        nhd <- filter(nhd, COMID %in% unlist(get_UT(nhd, outlet)))
    
        keep <- prepare_nhdplus(nhd, 0, 0, 0, FALSE)
    
        nhd <- filter(nhd, COMID %in% keep$COMID, VPUID == VPU)
    
        
      } else if (VPU == "10L"){
    
        # Subset by flowlines in Region 10
    
          filter(grepl(paste0("^",substr(VPU,1,2),".*"), .data$VPUID)) # take out this VPUID
        
        # R10U upstream
    
        nhd_US_10U <- filter(nhd, COMID %in% unlist(get_UT(nhd, regoutlets["10U"])))
    
        nhd_US_10L <- filter(nhd, COMID %in% unlist(get_UT(nhd, regoutlets["10L"])))
    
        nhd_10L <- filter(nhd_US_10L, !COMID %in% nhd_US_10U$COMID)
    
        keep <- prepare_nhdplus(nhd_10L, 0, 0, 0, FALSE)
    
        outlets <- unlist(regoutlets[VPU])
    
        
        #outlets <- c(14320629, 941140164, 15334434)
        
        for (out in outlets){
          #print (out)
          nhd_US <- get_UT(nhd, out)
    
          keep <- prepare_nhdplus(filter(nhd, COMID %in% unlist(nhd_US)), 0, 0, 0, FALSE)
    
          #print (dim(keep))
          ifelse( exists("final") , final <- rbind(final, keep), final <- keep)
        }
        
    
        nhd <- filter(nhd, COMID %in% final$COMID)
    
    Merge_hydReg <- function(feat, in_gpkg, out_gpkg){
      ##########################################
      # Merges geopackages together to create CONUs geopackage of features
      # 
      # Arguments:
      #   feat : (character) Type of feature to merge (POIs, segments)
      #   in_gpkg : (character) geopackage to merge (navigate, collapse, non-dend, etc.)
      #   out_gpkg : (character) Name of output geopackage
      #
      # Returns:
      #   writes output geopackage of CONUS for given features
      ##########################################
      # merges features together into a CONUS geopackage
    
    Blodgett, David L.'s avatar
    Blodgett, David L. committed
      if(needs_layer(out_gpkg, feat)) {
    
        all_sf <- paste0(feat, "_CONUS")
        VPUs <- c(paste0("0", c(1:9)), as.character(11:18), "10U", "10L")
        featCON <- do.call("rbind", lapply(VPUs, function(x) 
    
          read_sf(file.path("cache", paste0("GF_", x, in_gpkg, ".gpkg")), paste0(feat, "_", x))))
    
        
        write_sf(featCON, out_gpkg, feat)
    
    Blodgett, David L.'s avatar
    Blodgett, David L. committed
      }
    
    siteAtt <- function(hydReg){
    
      ##########################################
      # Creates value-added attributes for structures and features on the network
      # 
      # Arguments:
      #   hydReg : (character) 2-digit hydrologic region
      #
      # Returns:
      #   writes or appends tables for features
      ##########################################
    
      print ("gage")
      out_gpkg <- file.path("cache",paste0("GF_", hydReg, ".gpkg"))
    
      
      data_paths <- jsonlite::read_json(file.path("cache", "data_paths.json"))
      
      # Layers
    
      allPOIs <- read_sf(out_gpkg, paste0("POIS_", hydReg)) 
    
      gagesIII_pois <- read_sf(out_gpkg, paste0("Gages_", hydReg)) # GAGESIII POIs
      gageLoc <- read_sf(data_paths$nhdplus_dir, "GageLoc")
    
      TE_pois <- read_sf(out_gpkg, paste0("TE_", hydReg)) # Thermoelectric POIs
      NID_pois <- read_sf(out_gpkg, paste0("NID_", hydReg)) # NID POIs
      nsegment_raw <- read_sf(out_gpkg, paste0("nsegment_raw_", hydReg)) # Minimally-sufficient network attributed with POI_ID
      nsegment <- read_sf(out_gpkg, paste0("Nsegment_", hydReg)) # Minimally-sufficient network dissolved by POI_ID
      WBs_VPU <- read_sf(out_gpkg, paste0("WB_", hydReg)) %>% st_drop_geometry() %>% mutate(COMID = as.integer(COMID))
      WBout_POIs <- read_sf(out_gpkg, paste0("WBout_", hydReg)) %>% st_drop_geometry()
      WBin_POIs <- read_sf(out_gpkg, paste0("WBin_", hydReg)) %>% st_drop_geometry()
    
      
      #*************************************************************************
      # Attribute gagesIII POIs with adjusted measure and drainage area information
    
      gagesIII <- read_sf(data_paths$gagesiii_points_path, "GagesIII_gages") %>%
        filter(grepl(paste0("^", hydReg, ".*"), .data$NHDPlusReg))
    
    
      # Load GAGESIII POIs from gpkg at cache dir
    
      gagesIII_POIs <- select(st_drop_geometry(gagesIII_pois), Type_GagesIII, COMID) %>%
        inner_join(gagesIII, by = "COMID") %>% 
        select(Type_GagesIII, COMID, COMID_meas)
    
      # Derive segment measure attributes
    
      nsegment_gages <- st_drop_geometry(nsegment_raw) %>% 
        left_join(gagesIII_POIs, by = "COMID") %>%
    
        select(Hydroseq, Type_GagesIII, COMID_meas, LENGTHKM, POI_ID, TotDASqKM) %>%
        mutate(COMID_meas = 1 - replace_na(COMID_meas/100,0), Seg_meas = COMID_meas * LENGTHKM) %>% group_by(POI_ID) %>%
    
        summarize(HS = min(Hydroseq), Length = sum(LENGTHKM), Percent = sum(Seg_meas)/sum(LENGTHKM),
    
                  TotDASqKM = max(TotDASqKM), Type_GagesIII =  first(na.omit(Type_GagesIII))) %>% filter(!is.na(Type_GagesIII))
    
      # Calcualte estimated proporational drainage area
    
      gagesIII_POI_DA <- gagesIII_POIs %>% 
        left_join(nsegment_raw, by = "COMID") %>%
    
        select(Type_GagesIII, COMID, COMID_meas, AreaSqKM, TotDASqKM) %>%
        mutate(Inc_DA = (1-(COMID_meas / 100)) * AreaSqKM) %>%
    
        mutate(Total_DA = TotDASqKM - (AreaSqKM - Inc_DA))
    
    
      # PUt everything together
    
      gagesIII_stats <- gagesIII_POI_DA %>% 
        inner_join(nsegment_gages, by = "Type_GagesIII") %>%
    
        select(Type_GagesIII, COMID, Length, Percent, TotDASqKM.y) %>%
        rename(Seg_Length = Length, Seg_Measure = Percent, Adj_NHD_DA = TotDASqKM.y) %>%
        mutate(VPU = hydReg)
    
      # Compile data into GagesIII folder
      if (file.exists("data/GAGESIII_gages/gagesIII_MDA.rds")){
        # open file and bind to it
    
        curRDS_gages <- readRDS("data/GAGESIII_gages/gagesIII_MDA.rds") %>% 
          filter(VPU != hydReg) %>%
    
          bind_rows(gagesIII_stats)
    
        saveRDS(curRDS_gages, "data/GAGESIII_gages/gagesIII_MDA.rds")
      } else {
        saveRDS(gagesIII_stats, "data/GAGESIII_gages/gagesIII_MDA.rds")
      }
      #************************************************************************
    
      #************************************************************************
      # Read in TE shapefile
    
      print ("TE")
    
      TE_fac <- st_read(file.path(data_paths$TE_points_path, "/2015_TE_Model_Estimates_lat.long_COMIDs.shp")) %>%
        filter(COMID %in% TE_pois$COMID)
    
    
      # Cast TE as points and project to USGS albers
    
      TE_fac_alb <- TE_fac %>% 
        st_cast('POINT') %>% 
        st_transform(., 6703)
    
    
      # Subset segs based on TE and project to USGS albs
    
      TE_seg_pnts <- filter(nsegment, POI_ID %in% TE_fac_alb$COMID) %>% 
        st_transform(., 6703) %>% 
        group_by(POI_ID) %>%
        st_line_merge(.) %>% 
        st_cast("POINT")
    
    
      #for (POI_ID in TE_fac_alb$COMID){
      TE_fac_alb$TE_measure <- unlist(lapply(1:nrow(TE_fac_alb), function(x){
        COM <- TE_fac_alb$COMID[x]
        fac <- TE_fac_alb %>% filter(COMID == COM)
        seg <- TE_seg_pnts %>% filter(POI_ID == COM)
    
        # Get index of nearest vertex on segment and rebuild two line IDs, then get length of each
        nearVert <- st_nearest_feature(fac, seg)
        # Pull out the line upstream and downstream of snapping point
    
        Upline <- seg[1:nearVert,] %>% 
          summarise(geom = st_combine(geom)) %>% 
          st_cast("LINESTRING") %>% 
          st_length(.)
        
        Downline <- seg[nearVert:nrow(seg),] %>% 
          summarise(geom = st_combine(geom)) %>% 
          st_cast("LINESTRING") %>% st_length(.)
    
    
        # Return the measure
        return (Upline / (Upline + Downline))
      }))
    
      # Derive proportional total (probably need proporational incremental ratio too)
      TE_data <- st_drop_geometry(TE_fac_alb) %>%
    
        left_join(st_drop_geometry(nsegment_raw) %>% 
                    select(COMID, AreaSqKM, TotDASqKM), by = "COMID") %>%
    
        mutate(Inc_DA = TE_measure * AreaSqKM, Total_DA = TotDASqKM - (AreaSqKM - Inc_DA)) %>%
        select(EIA_PLANT_, LATITUDE, LONGITUDE, COMID, TE_measure, Inc_DA, Total_DA)
    
      TE_data_upstream <- TE_data %>%
    
        left_join(st_drop_geometry(nsegment) %>% 
                    select(POI_ID, To_POI_ID), by = c("COMID" = "To_POI_ID")) %>%
        group_by(COMID) %>% 
        mutate(Upstream_POIs = paste0(unique(POI_ID), collapse = " "))
    
    
      TE_data_final <- TE_data_upstream %>%
    
        left_join(st_drop_geometry(nsegment) %>% 
                    select(POI_ID, To_POI_ID), by = c("COMID" = "POI_ID")) %>%
    
        group_by(COMID) %>% mutate(Upstream_POIs = paste0(unique(POI_ID), collapse = " ")) %>%
    
        rename(Downstream_POI = To_POI_ID) %>% 
        select(-POI_ID) %>% distinct() %>% mutate(VPU = hydReg)
    
    
      if (file.exists("data/TE_points/TE_adj.rds")){
        # open file and bind to it
    
        curRDS_TE <- readRDS("data/TE_points/TE_adj.rds") %>% 
          filter(VPU != hydReg) %>%
    
          bind_rows(TE_data_final)
    
        saveRDS(curRDS_TE, "data/TE_points/TE_adj.rds")
      } else {
        saveRDS(TE_data_final, "data/TE_points/TE_adj.rds")
      }
      #************************************************************************
      
      #************************************************************************
      # Read in NID shapefile
    
      print ("NID")
    
      NID_fac <- read.csv(file.path(data_paths$NID_points_path,"/NID_attributes_20170612.txt"), stringsAsFactors = F) %>%
        filter(FlowLcomid %in% NID_pois$COMID)
    
      NID_snap_alb <- readRDS(file.path(data_paths$NID_points_path,"/NAWQA_NID_snap.rds")) %>%
    
        filter(Source_FeatureID %in% NID_fac$Source_FeatureID) %>% 
        st_transform(., 6703) %>%
    
        inner_join(NID_fac, by = "Source_FeatureID")
    
      # Subset segs based on TE and project to USGS albs
    
      NID_seg_pnts <- filter(nsegment, POI_ID %in% 
                               unique(NID_fac$FlowLcomid)) %>% 
        st_transform(., 6703) %>% 
        group_by(POI_ID) %>%
        st_line_merge(.) %>% 
        st_cast("POINT")
    
      #options(warn = -1)
      NID_snap_alb$NID_measure <- unlist(lapply(1:nrow(NID_snap_alb), function(x){
        fac <- NID_snap_alb[x,]
        COM <- fac$FlowLcomid
        seg <- NID_seg_pnts %>% filter(POI_ID == COM)
    
        # Get index of nearest vertex on segment and rebuild two line IDs, then get length of each
        nearVert <- st_nearest_feature(fac, seg)
        # Pull out the line upstream and downstream of snapping point
    
        Upline <- seg[1:nearVert,] %>% 
          summarise(geom = st_combine(geom)) %>% 
          st_cast("LINESTRING") %>% 
          st_length(.)
        
        Downline <- seg[nearVert:nrow(seg),] %>% 
          summarise(geom = st_combine(geom)) %>% 
          st_cast("LINESTRING") %>% 
          st_length(.)
    
        # Return the measure
        return (Upline / (Upline + Downline))
      }))
      #options(warn = oldw)
    
      # Derive proportional total (probably need proporational incremental ratio too)
    
      NID_data <- st_drop_geometry(NID_snap_alb) %>%
    
        left_join(st_drop_geometry(nsegment_raw) %>% 
                    select(COMID, AreaSqKM, TotDASqKM), by = c("FlowLcomid" = "COMID")) %>%
    
        mutate(Inc_DA = NID_measure * AreaSqKM, Total_DA = TotDASqKM - (AreaSqKM - Inc_DA)) %>%
    
        select(Source_FeatureID, VPU.x, Outlet, FlowLcomid, WBAreaSQKM, NIDSASQKM, NLCDSASQKM, MAX_SA, MultiMain, Catchcomid, FalconeDate,
    
               YEAR_COMPLETED, MAX_DISCHARGE, NID_STORAGE, NID_measure, Inc_DA, Total_DA)
    
    
      NID_data_upstream <- NID_data %>%
    
        left_join(st_drop_geometry(nsegment) %>% 
                    select(POI_ID, To_POI_ID), by = c("FlowLcomid" = "To_POI_ID")) %>%
        group_by(FlowLcomid) %>% 
        mutate(Upstream_POIs = paste0(unique(POI_ID), collapse = " "))
    
    
      NID_data_final <- NID_data_upstream %>%
    
        left_join(st_drop_geometry(nsegment) %>% 
                    select(POI_ID, To_POI_ID), by = c("FlowLcomid" = "POI_ID")) %>%
        group_by(FlowLcomid) %>% 
        mutate(Upstream_POIs = paste0(unique(POI_ID), collapse = " ")) %>%
        rename(Downstream_POI = To_POI_ID) %>% 
        select(-POI_ID) %>% 
        distinct() %>% 
        mutate(VPU = hydReg)
    
      if (file.exists("data/NID_points/NID_adj.rds")){
        # open file and bind to it
    
        curRDS_NID <- readRDS("data/NID_points/NID_adj.rds") %>% 
          filter(VPU != hydReg) %>%
    
          bind_rows(NID_data_final)
    
        saveRDS(curRDS_NID, "data/NID_points/NID_adj.rds")
      } else {
        saveRDS(NID_data_final, "data/NID_points/NID_adj.rds")
      }
    
      
      #****************************************************************************
      # Get information on all inflow/outflows in nsegment network
      # index the  WBAREACOMI to In WB POIs
      print("waterbodies")
      
    
      WBinPOIs_tab <- select(WBin_POIs, COMID) %>%
        left_join(st_drop_geometry(nsegment_raw), by = "COMID") %>% 
        select(COMID, DnHydroseq)%>%
    
        left_join(st_drop_geometry(nsegment_raw), by = c("DnHydroseq" = "Hydroseq")) %>%
    
        mutate(COMID = COMID.x) %>% 
        select(COMID, WBAREACOMI) 
    
      
      # Noticed there are some "NHDArea" features coded into the WBARECOMI field (prob mistakes)
      # Should we QAQC in previous step and remove these?
    
      wb_pol_intab <- WBs_VPU %>% 
        inner_join(WBinPOIs_tab, by = c("COMID" = "WBAREACOMI")) %>%
        select(COMID, COMID.y) %>% 
        rename(seg_COMID = COMID.y) %>%
        group_by(COMID) %>% 
        summarize(num_poi_inlets = n(), Upstream_POIs = paste0(unique(seg_COMID), collapse = " "))
    
      
      # index the WBAREACOMI to out WB POIs
      # 3 WBoutPOIs have no WBareaCOMID
    
      WBoutPOIs_tab <- select(WBout_POIs, COMID) %>% 
        left_join(st_drop_geometry(nsegment_raw), by = "COMID") %>%
    
        select(COMID, WBAREACOMI) 
      
    
      wb_pol_outtab <- WBs_VPU %>% 
        inner_join(WBoutPOIs_tab, by = c("COMID" = "WBAREACOMI")) %>%
        select(COMID, COMID.y)  %>% 
        rename(seg_COMID = COMID.y) %>% 
        group_by(COMID) %>% 
        summarize(num_poi_outlets = n(), outlet = paste0(unique(seg_COMID), collapse = " "))
    
      
      # WBs that have multiple outlets  
    
      multiOutWBs <- filter(wb_pol_outtab, num_poi_outlets > 1)
    
      wb_pol_tab <- wb_pol_outtab %>% 
        left_join(wb_pol_intab, by = "COMID")
    
      
      # write to csv file
      wbmore_tab <- WBs_VPU %>% select(-c(FDATE, RESOLUTION, Shape_Length, Shape_Area, GNIS_ID, REACHCODE, FTYPE, FCODE, 
                                          PurpCode, PurpDesc, MeanDCode, ONOFFNET, LakeArea)) %>%
        inner_join(wb_pol_tab, by = "COMID")
      #write.csv(wbmore_tab, paste0("cache/SiteInfo/WBPOIs_r", hydReg, "_tab.csv"))
      
      #****************************************************************************
      # Create table of spatial assocation of waterbodies with gages, NID, and TE
    
      wbpoi_with_gage <- filter(allPOIs (!is.na(Type_WBIn) | !is.na(Type_WBOut) & !is.na(Type_GagesIII)))
      wbpoi_with_NID <- filter(allPOIs, (!is.na(Type_WBIn) | !is.na(Type_WBOut) & !is.na(Type_NID)))
      wbpoi_with_TE <- filter(allPOIs, (!is.na(Type_WBIn) | !is.na(Type_WBOut)) & !is.na(Type_TE))
    
      
      # determine gages by waterbody (using segs WBAREACOMI); only wb (not nhdrea)
    
      wb_gage <- gagesIII_pois %>% 
        left_join(st_drop_geometry(nsegment_raw), by = "COMID") %>%
        select(COMID, WBAREACOMI, Type_GagesIII) %>% 
        filter(WBAREACOMI > 0, WBAREACOMI %in% WBs_VPU$COMID) 
    
      
      # Waterbody inlets that are gages
    
      wb_ingage_tab <- filter(st_drop_geometry(gagesIII_pois), Type_WBIn == 1) %>%
        left_join(WBinPOIs_tab, by = "COMID") %>% 
        select(COMID, Type_GagesIII, WBAREACOMI) %>%
    
        rename(WBin_Gage = Type_GagesIII)
      
      # find if gage at upstream segment of a WBin POI
    
      wb_upgage_tab <- select(st_drop_geometry(gagesIII_pois), COMID, Type_GagesIII) %>%
        left_join(st_drop_geometry(nsegment) %>% 
                    select(POI_ID, To_POI_ID), by = c("COMID" = "POI_ID")) %>%
        inner_join(WBinPOIs_tab, by = c("To_POI_ID" = "COMID")) %>% 
        group_by(WBAREACOMI) %>% 
    
        summarize(num_us_gages = n(), Upstream_gages = paste0(unique(Type_GagesIII), collapse = ", "))
      
      # WB outlets 
    
      wb_outgage_tab <- filter(gagesIII_pois, Type_WBOut == 1) %>% 
        inner_join(WBoutPOIs_tab, by = "COMID") %>% 
        select(COMID, WBAREACOMI, Type_GagesIII) %>%
    
        rename(WBout_Gage = Type_GagesIII)
      
      # find if gage on downstream segment of a WBout POI
    
      wb_dngage_tab <- select(WBout_POIs, COMID) %>% 
        inner_join(WBoutPOIs_tab, by = "COMID") %>%
    
        left_join(nsegment %>% select(POI_ID, To_POI_ID), by = c("COMID" = "POI_ID")) %>%
    
        inner_join(gagesIII_pois, by = c("To_POI_ID" = "COMID")) %>% 
        rename(Downstream_gage = Type_GagesIII)
    
      
      # Read in NID sites for the VPU (and get unique COMIDS)there may be more than one per COMID
      NID_fac <- read.csv(paste0(data_paths$NID_points_path,"/NID_attributes_20170612.txt"), stringsAsFactors = F) %>%
        select(-c(ReachCode, Measure, Source_FeatureID, VPU, SNAPTYPE, comment, MultiNIDs, NHDnet_Err, ONNHDPLUS,
    
                  MultiMain, Catchcomid)) %>% 
        filter(FlowLcomid %in% NID_POIs$COMID) %>% 
    
        left_join(nsegment_raw %>% select(COMID, WBAREACOMI), by = c("FlowLcomid" = "COMID"))
      
      # gen list of WB NID, put on a table, there may be more than one per WB
    
      wb_NID <- NID_fac %>% inner_join(WBs_VPU, by = c("WBAREACOMI" = "COMID")) %>% 
        group_by(WBAREACOMI) %>% 
    
        summarize(Outlet = max(Outlet), WBAreaSQKM = max(WBAreaSQKM, na.rm = T), NIDSASQKM = max(NIDSASQKM, na.rm = T),
                  NLCDSASQKM = max(NLCDSASQKM, na.rm = T), MAX_SA = max(MAX_SA, na.rm = T), FalconeDate = max(FalconeDate, na.rm = T),
                  NLCDASADIST = max(NLCDSADIST), NIDID = paste0(unique(NIDID), collapse = " "), 
                  YEAR_COMPLETED = max(YEAR_COMPLETED, na.rm = T), MULT_YEARS = paste0(unique(YEAR_COMPLETED), collapse = " "),
                  MAX_DISCHARGE = max(MAX_DISCHARGE), NID_STORAGE = max(NID_STORAGE, na.rm = T)) %>% 
    
        mutate_all(funs(stringr::str_replace_all(., "-Inf", "NA"))) %>% 
        mutate(WBAREACOMI = as.integer(WBAREACOMI))
    
      
      # Do we really need to read the facilities in again
      TE_fac <- read_sf(data_paths$TE_points_path, "2015_TE_Model_Estimates_lat.long_COMIDs") %>% 
    
        mutate(COMID=as.integer(COMID)) %>% 
        filter(COMID > 0) %>%
    
        inner_join(., select(st_drop_geometry(nhd), c(COMID, WBAREACOMI, VPUID)), by = "COMID") %>%
        filter(grepl(paste0("^", hydReg, ".*"), .data$VPUID)) %>% switchDiv(., nhd)
      
      # gen list of WB NID, put on a table, there may be more than one per WB
    
      wb_TE <- TE_fac %>% 
        inner_join(WBs_VPU, by = c("WBAREACOMI" = "COMID")) %>% 
    
        select(EIA_PLANT_, COMID, WBAREACOMI) 
      
    
      wb_finTable <- wbmore_tab %>% 
        left_join(wb_NID, by = c("COMID" = "WBAREACOMI")) %>% mutate(VPU = hydReg)
    
      
      # create WB table: WB_r"xx_info
      wb_POI_tab <- WBs_VPU %>% rename(WBAREACOMI=COMID) %>%
        select(WBAREACOMI) %>%
    
        left_join(wb_outgage_tab %>% 
                    select(WBAREACOMI, WBout_Gage)) %>%
        left_join(wb_dngage_tab %>% 
                    select(WBAREACOMI, Downstream_gage)) %>%
        left_join(wb_ingage_tab %>% 
                    select(WBAREACOMI, WBin_Gage)) %>% #fix to be able to handle multiple gages
        left_join(wb_upgage_tab %>% 
                    select(WBAREACOMI, Upstream_gages)) %>%
        left_join(wb_NID) %>% 
        select(WBAREACOMI, Outlet, NIDID) %>%
    
        mutate(VPU = hydReg)
      
      if (file.exists("data/NHDPlusNationalData/wbod_stats.rds")){
        # open waterbody file and bind to it
    
        curRDS_wbod <- readRDS("data/NHDPlusNationalData/wbod_stats.rds") %>% 
          filter(VPU != hydReg) %>%
    
          bind_rows(wb_finTable)
        saveRDS(curRDS_wbod, "data/NHDPlusNationalData/wbod_stats.rds")
        
        # open POI file and bind to it
    
        curRDS_POIwbod <- readRDS("data/NHDPlusNationalData/wbod_POIs.rds") %>% 
          filter(VPU != hydReg) %>%
    
          bind_rows(wb_POI_tab)
        
      } else {
        saveRDS(wb_finTable, "data/NHDPlusNationalData/wbod_stats.rds")
        saveRDS(wb_POI_tab, "data/NHDPlusNationalData/wbod_POIs.rds")
      }
    
    }
    
    # make sf1 compatible with sf2
    st_compatibalize <- function(sf1, sf2) {
      sf1 <- st_transform(sf1, st_crs(sf2))
      
      g <- attr(sf1, "sf_column")
      gp <- attr(sf2, "sf_column")
      names(sf1)[names(sf1) == g] <- gp
      attr(sf1, "sf_column") <- gp
      sf1
    }
    
    prep_HU12 <- function(sf, VPU){
      ##########################################
      # Adds value-added attributes to the HU12 layer to compatibilize it with HUC12 crosswalk
      # 
      # Arguments:
      #   sf : (simple feature) HU12_layer
      #   VPU : (character) 2-digit hydrologic region/VPU
      #
      # Returns:
      #   Adds addtional columns to HU_12 layer, returns modified HUC12 layer
      ##########################################
      
      # Adds additional attributes to the HUC12 layer coming out of nHDPlus database
      prepped_HU12 <- select(sf, HUC_10, HUC_12, AreaHUC12, HU_12_TYPE, HU_10_TYPE, HU_12_DS) %>% 
        st_transform(5070) %>%
        mutate(HUC_10 = as.character(HUC_10), HUC_12 = as.character(HUC_12), 
               HU_10_DS = substr(HU_12_DS, 1, 10)) %>% filter(grepl(paste0("^", VPU,".*"), .data$HUC_12)) %>%
        st_cast("POLYGON") %>% 
        st_make_valid()
      
      # Get estimate of accumulated drainage area for HUC12
      HUC12_lyr_HW <- filter(prepped_HU12, !HUC_12 %in% HU_12_DS) %>% 
        mutate(AccumArea = AreaHUC12)
      HUC12_DS_list <- filter(prepped_HU12, !HUC_12 %in% HUC12_lyr_HW$HUC_12) %>% 
        mutate(AccumArea = NA)
      
      # For HUC whose downstream HUC is missing, iterate until you find it or reach a closed basin
      # Move to funciton in future
      arealist <- list()
      for (HUC in unique(HUC12_DS_list$HUC_12)){
        areaSum <- 0
        sub <- filter(HUC12_DS_list, HUC_12 == as.character(HUC))
        areaSum <- areaSum + sub$AreaHUC12
        if(sub$HUC_12 %in% prepped_HU12$HU_12_DS){
          sub <- filter(prepped_HU12, HU_12_DS == sub$HUC_12)
          areaSum <- areaSum + sum(sub$AreaHUC12)
        }
        arealist <- append(arealist, areaSum)
      }
      
      HUC12_DS_list$AccumArea <- unlist(arealist) 
      
      HUC12_lyr <- HUC12_DS_list %>% rbind(HUC12_lyr_HW)
      
      return(HUC12_lyr)
    }
    
    prep_xWalk <- function(inTable, VPU, HUC12_lyr){
      ##########################################
      # Adds value-added attributes to the HUC12 crosswalk to compatibilize it with HU12 layer
      # 
      # Arguments:
      #   inTable : (DF) HU12 crosswalk table
      #   VPU : (character) 2-digit hydrologic region/VPU
      #   HUC12_lyr : (simple feature) HUC12 lyr, output from 'prep_HU12' function
      #
      # Returns:
      #   Adds addtional columns to HUC12 crosswalk, retruns modified HUC12 layer and crosswalk
      ##########################################
      
      # subset xWalk table
      nhd_to_HU12_in <- inTable %>% 
        left_join(select(HUC12_lyr, HUC_12, HU_12_DS), by = "HUC_12")
      
      # Identifies downstream HUC12s for those HUC12 whose DS HUC12 is 
      #            missing from the xWalk
      missingHUC12_GIS <- select(HUC12_lyr, HUC_12, HU_12_DS) %>% 
        st_drop_geometry() %>% 
        filter(!HUC_12 %in% nhd_to_HU12$HUC_12)
      
      if (nrow(missingHUC12_GIS) > 0){
        # This will prevent closed basins from being included
        missingDSHUC12_xWalk <- HUC12_lyr %>% 
          filter(HU_12_DS %in% c(missingHUC12_GIS$HUC_12))
      
        # For HUC whose downstream HUC is missing, iterate until you find it or reach a closed basin
        # Move to funciton in future, this will help assign downstream HRU
        finalDS_list <- list()
        for (HUC in missingDSHUC12_xWalk$HUC_12){
          sub <- filter(missingDSHUC12_xWalk, HUC_12 == as.character(HUC))
          while(sub$HU_12_DS %in% missingHUC12_GIS$HUC_12){
            sub <- HUC12_lyr %>% 
              filter(HUC_12 == sub$HU_12_DS)
          }
          finalDS_list <- append(finalDS_list, as.character(sub$HU_12_DS))
        }
      
        # Adds corrected HU_12_DS to crosswalk table for missing HUCs
        missingDSHUC12_xWalk$xWalk_DS <- unlist(finalDS_list) 
        missingDSHUC12_Sum <- st_drop_geometry(missingDSHUC12_xWalk) %>% 
          group_by(HUC_12, xWalk_DS) %>% summarize()
        
        nhd_to_HU12_out <- nhd_to_HU12_in %>% 
          left_join(select(missingDSHUC12_Sum, c(HUC_12, xWalk_DS)), by = "HUC_12") %>%
          mutate(HU_12_DS == ifelse(HUC_12 %in% missingDSHUC12_xWalk$HUC_12, xWalk_DS, HUC_12)) %>% 
          select(-c(xWalk_DS))
        
        HUC12_lyr <- HUC12_lyr %>% 
          left_join(unique(select(st_drop_geometry(missingDSHUC12_xWalk), c(HUC_12, xWalk_DS)), .keep_all = TRUE), by = "HUC_12")
      } else {
        nhd_to_HU12_out <- nhd_to_HU12_in
      }
      
      return(list(HUC12 = HUC12_lyr, xWalk = nhd_to_HU12_out))
    }