From 76b02bd326ee035b8770c3d19848aac14a2f728b Mon Sep 17 00:00:00 2001
From: Bock <abock@usgs.gov>
Date: Wed, 7 Feb 2024 13:35:57 -0700
Subject: [PATCH] updates

---
 workspace/01_Gage_Selection.Rmd | 152 +++++++++++++++++++++-----------
 1 file changed, 101 insertions(+), 51 deletions(-)

diff --git a/workspace/01_Gage_Selection.Rmd b/workspace/01_Gage_Selection.Rmd
index e8638f2..8230103 100644
--- a/workspace/01_Gage_Selection.Rmd
+++ b/workspace/01_Gage_Selection.Rmd
@@ -22,7 +22,6 @@ library(sf)
 library(hyRefactor)
 library(tidyr)
 library(foreign)
-library(rgdal)
 library(hyfabric)
 library(mapview)
 
@@ -31,45 +30,45 @@ source("R/utils.R")
 source("R/NHD_navigate.R")
 # Gages which are manually inspected from "Gages_to_check" file and added back in
 source("R/config.R")
+source("R/user_vars.R")
 source("R/gage_mod.R")
 
 # Load user-defined data paths
 data_paths <- jsonlite::read_json(file.path("cache", "data_paths.json"))
 # Load NHDPlus Data if precprocessed to RDS format
 nhdplus_path(data_paths$nhdplus_gdb)
-staged_nhd <- stage_national_data(nhdplus_data = data_paths$nhdplus_gdb, 
-                                  output_path  = data_paths$nhdplus_dir)
-
 # Load NHD
-nhd <- readRDS(staged_nhd$flowline)
+nhd <- read_sf(data_paths$ref_fl)
 
 # clear dropped gages file
 if(file.exists("cache/dropped_gages.csv")){
   file.remove("cache/dropped_gages.csv")
 }
 
-# Update location index information from other sources where necessary from NLDI reference gage set
-ref_gages <- jsonlite::fromJSON("https://github.com/internetofwater/ref_gages/releases/download/v0.5/usgs_nldi_gages.geojson", 
-                                flatten = TRUE)$features %>%
-  select(-geometry.type, -geometry.coordinates, ref_site_no = properties.id, 
-         ref_RC = properties.nhdpv2_REACHCODE, ref_RM = properties.nhdpv2_REACH_measure, 
-         ref_COM = properties.nhdpv2_COMID) %>%
-  filter(!is.na(ref_COM) & ref_COM > 0)
+# gages II 
+gages_ii <- read_sf(data_paths$gagesii_lyr)
+ref_gages <- read_sf("data/nldi/usgs_nldi_gages.geojson")
+gages_ii_ref <- filter(ref_gages, id %in% 
+                         filter(gages_ii, CLASS == "Ref")$STAID)
 ```
 
 ```{r GAGESIII-SWIM}
 # Read gagesIII dbf that is a POR summary produced from 00_gage_info.Rmd
-gagesIII_db_full <- read.dbf(file = file.path(data_paths$data_dir, "gages3GFinfo.dbf"), as.is = TRUE) %>%
+gagesIII_db_full <- read.dbf(file = file.path(data_paths$data_dir, "SWIMGFinfo.dbf"), as.is = TRUE) %>%
   rename(total_obs = count_nu, total_poss_obs = TPdays1)
 
 # Read in shapefile
 gagesIII_sf_full <- read_sf(data_paths$SWIM_points_path, "SWIM_gage_loc") %>%
   st_zm(drop = TRUE) %>%
-  select(site_no = Gage_no, comid = COMID, reachcode = REACHCODE, reach_meas = REACH_meas) %>%
-  left_join(select(ref_gages, ref_site_no, ref_COM, ref_RC, ref_RM), by = c("site_no" = "ref_site_no")) %>%
-  # Update index information with NLDI gages
-  mutate(comid = ref_COM, reachcode = ref_RC, reach_meas = ref_RM) %>%
-  select(-c(ref_COM, ref_RC, ref_RM))
+  # Use site_no as the authoritative station ID name
+  rename(site_no = Gage_no) %>%
+  # Drop fields in NLDI gages
+  select(-c(COMID, REACHCODE, REACH_meas)) %>%
+    # Join to NLDI reference gages
+  left_join(select(st_drop_geometry(ref_gages), id, comid = nhdpv2_COMID, 
+                   reachcode = nhdpv2_REACHCODE, 
+                   reach_meas = nhdpv2_REACH_measure), 
+            by = c("site_no" = "id")) 
   
 # Read Gages III shapefile
 gagesIII_sf <- gagesIII_sf_full %>% 
@@ -83,7 +82,8 @@ gagesIII_sf <- gagesIII_sf_full %>%
   distinct()
 
 # Start documenting gages that are dropped out; these gages have no mean daily Q
-gage_document(filter(select(st_drop_geometry(gagesIII_sf_full), site_no), !site_no %in% gagesIII_sf$site_no),
+gage_document(filter(select(st_drop_geometry(gagesIII_sf_full), site_no), 
+                     !site_no %in% gagesIII_sf$site_no),
               "GAGES-III", "No Q or COMID")
 
 # First subset
@@ -99,21 +99,28 @@ gagesIII_db <- gagesIII_db_full %>%
   distinct() 
 
 # Gages that don't fit the POR requirement, or are non-ST site types
-gage_document(filter(select(st_drop_geometry(gagesIII_sf), site_no), !site_no %in% gagesIII_db$site_no),
+gage_document(filter(select(st_drop_geometry(gagesIII_sf), site_no), 
+                     !site_no %in% gagesIII_db$site_no),
               "GAGES-III", "Non-ST; POR")
 
 # scan for potential canals/ditches among the remaining ST site types
 potCanalsIII <- gagesIII_db %>%
   # Flag station_nm with the follwing words
-  filter(grepl(paste(c("DITCH", "ditch", "CANAL", "canal", "CN", "DIV", "div"), collapse = "|"), station_nm)) %>%
+  filter(grepl(paste(c("DITCH", "ditch", "CANAL", "canal", "CN", "DIV", "div"), 
+                     collapse = "|"), station_nm)) %>%
   # Remove the station_nm with the following
-  filter(!grepl(paste(c("RIVER AT", "RIVER BELOW", "RIVER BL", "R AB", "R BL", "R ABV"), collapse = "|"), 
+  filter(!grepl(paste(c("RIVER AT", "RIVER BELOW", "RIVER BL", "R AB", "R BL", "R ABV"), 
+                      collapse = "|"), 
                 station_nm, ignore.case = TRUE)) %>%
   # These have been manually checked and are located in config.R
   filter(!site_no %in% gages_add) 
 
 # Write out for manual checking
-write.csv(potCanalsIII, "temp/GagestoCheck_GAGESIII.csv")
+if(!file.exists("temp/GagestoCheck_GAGESIII.csv")){
+  write.csv(potCanalsIII, "temp/GagestoCheck_GAGESIII.csv")
+} else {
+  potCanalsIII <- read.csv("temp/GagestoCheck_GAGESIII.csv")
+}
 
 # Subset further by streamgages below the minimum drainage area critera
 gagesIII_db_fin <- gagesIII_db %>% 
@@ -122,7 +129,8 @@ gagesIII_db_fin <- gagesIII_db %>%
 # Write out gages that dropped to document.
 gage_document(select(potCanalsIII, site_no), "GAGES-III", "ST; Pot. Canal")
 gage_document(gagesIII_db %>%
-                filter(!site_no %in% potCanalsIII$site_no, drain_area < min_da_km_gages) %>% 
+                filter(!site_no %in% potCanalsIII$site_no, 
+                       drain_area < min_da_km_gages) %>% 
                 select(site_no),
               "GAGES-III", "below DA")
 
@@ -146,8 +154,8 @@ mapview(gagesIII_sf_full, layer.name = "all GAGES-III")  +
   mapview(filter(gagesIII_sf, site_no %in% potCanalsIII$site_no), 
           layer.name = "Canals, man. checked", col.regions = "blue") + 
   mapview(filter(gagesIII_sf %>% 
-                   inner_join(select(gagesIII_db, site_no, drain_area), by = "site_no"),
-                 drain_area < min_da_km_gages),
+                   inner_join(select(gagesIII_db, site_no, drain_area), 
+                              by = "site_no"), drain_area < min_da_km_gages),
           layer.name = "below min DA", col.regions = "green") +
   mapview(gagesIII_final, layer.name = "Final GAGES-III", col.regions = "orange")
 
@@ -162,13 +170,14 @@ gageloc_db_full <- read.dbf(file = file.path(data_paths$data_dir, "GageLocGFinfo
 gageloc_sf_full <- read_sf(data_paths$nhdplus_gdb, "Gage") %>%
   # Drop Z geometries
   st_zm(drop = TRUE) %>%
-  select(site_no = SOURCE_FEA, comid = FLComID, reachcode = REACHCODE, reach_meas = Measure) %>%
+  rename(site_no = SOURCE_FEA) %>%
+  select(-c(FLComID, REACHCODE, Measure)) %>%
   # Disregard locations already removed in GAGESIII
   filter(!site_no %in% gagesIII_sf_full$site_no) %>%
   # Update index information with NLDI gages
-  left_join(select(ref_gages, ref_site_no, ref_COM, ref_RC, ref_RM), by = c("site_no" = "ref_site_no")) %>%
-  mutate(comid = ref_COM, reachcode = ref_RC, reach_meas = ref_RM) %>%
-  select(-c(ref_COM, ref_RC, ref_RM))
+  left_join(select(st_drop_geometry(ref_gages), site_no = id, 
+                   comid = nhdpv2_COMID, reachcode = nhdpv2_COMID, 
+                   reach_meas = nhdpv2_REACH_measure ), by = "site_no")
 
 # Subset to locations with streamflow
 gageloc_sf <- gageloc_sf_full %>% 
@@ -182,29 +191,37 @@ gageloc_sf <- gageloc_sf_full %>%
   distinct()
 
 # Start documenting gages that are dropped out; these gages have no mean daily Q
-gage_document(filter(select(st_drop_geometry(gageloc_sf_full), site_no), !site_no %in% gageloc_sf$site_no),
+gage_document(filter(select(st_drop_geometry(gageloc_sf_full), site_no), 
+                     !site_no %in% gageloc_sf$site_no), 
               "GageLoc", "No Q, COMID")
 
 # do the same exercise with GageLoc, but with more site type attributes to filter out
 gageloc_db <- gageloc_db_full %>%
   filter(end_date > POR_Start | total_obs > min_obs, 
          !site_tp_cd %in% 
-           c("ST-CA", "ST-DCH", "FA-STS", "SB_TSM", "FA-DV", "FA-STS", "FA-CS", "FA-WWD", "FA-OF", "GW"), 
-         stat_cd == "00003") %>%
+           c("ST-CA", "ST-DCH", "FA-STS", "SB_TSM", "FA-DV", "FA-STS", "FA-CS",
+             "FA-WWD", "FA-OF", "GW"), stat_cd == "00003") %>%
   # Some sites are listed twice with same information but different agencies, 
   select(-c(agency_cd, ts_id))  %>% 
   distinct(site_no, drain_area, dec_lat_va, dec_long_v, .keep_all = T)
 
 # Gages that don't fit the POR requirement, or are non-ST site types
-gage_document(filter(select(gageloc_db_full, site_no), !site_no %in% gageloc_db$site_no),
+gage_document(filter(select(gageloc_db_full, site_no), 
+                     !site_no %in% gageloc_db$site_no), 
               "GageLoc", "Non-ST; POR")
 
 # scan for potential canals/ditches 
 potCanals_gageloc <- gageloc_db %>% 
-  filter(grepl(paste(c("DITCH", "ditch", "CANAL", "canal", "CN", "DIV", "div"), collapse = "|"), station_nm)) %>%
+  filter(grepl(paste(c("DITCH", "ditch", "CANAL", "canal", "CN", "DIV", "div"), 
+                     collapse = "|"), station_nm)) %>%
   filter(!site_no %in% gages_add)
 
-write.csv(potCanals_gageloc, "temp/GagestoCheck_GageLoc.csv")
+# Write out for manual checking
+if(!file.exists("temp/GagestoCheck_GageLoc.csv")){
+  write.csv(potCanals_gageloc, "temp/GagestoCheck_GageLoc.csv")
+} else {
+  potCanals_gageloc <- read.csv("temp/GagestoCheck_GageLoc.csv")
+}
 
 gageloc_db_fin <- gageloc_db %>% 
   filter(!site_no %in% potCanals_gageloc$site_no, drain_area > min_da_km_gages) 
@@ -226,7 +243,8 @@ gageloc_final <-  gageloc_sf %>%
   group_by(site_no) %>%
   filter(total_obs == max(total_obs)) %>%
   ungroup() %>%
-  st_sf()
+  st_sf() %>%
+  st_compatibalize(gagesIII_final)
 
 mapview(gageloc_sf_full, layer.name = "all GageLoc")  +  
   mapview(filter(gageloc_sf_full, !site_no %in% gageloc_sf$site_no), 
@@ -257,23 +275,43 @@ gfv11_db_full <- read.dbf(file = file.path(data_paths$data_dir, "GFV11.dbf"), as
 # Remove transboundary
 gfv11_sf <- gfv11_sf_full %>%
   # Remove streamgages assocaited with transboundary
-  filter(nchar(NHD_Unit) < 4, !Type_Gage %in% c(gagesIII_final$site_no, gageloc_final$site_no)) %>%
+  filter(nchar(NHD_Unit) < 4, 
+         !Type_Gage %in% c(gagesIII_final$site_no, gageloc_final$site_no)) %>%
   select(site_no = Type_Gage, NHDPlusID) %>%
   # join to get comid, reachcode, measure data
   left_join(st_drop_geometry(gageloc_sf_full), by = "site_no") %>%
   # Know from previous work 3 locations with no COM/RC/M
-  left_join(select(st_drop_geometry(nhd), COMID, REACHCODE, FromMeas), by = c("NHDPlusID" = "COMID")) %>%
+  left_join(select(st_drop_geometry(nhd), COMID, REACHCODE, FromMeas), 
+            by = c("NHDPlusID" = "COMID")) %>%
   # bring over RC/M for 3 gages missing them
   mutate(comid = ifelse(is.na(comid), NHDPlusID, comid),
          reachcode = ifelse(is.na(reachcode), REACHCODE, reachcode),
          reach_meas = ifelse(is.na(reach_meas), FromMeas, reach_meas)) %>%
   select(-c(NHDPlusID, REACHCODE, FromMeas)) %>%
   # Update index information with NLDI gages
-  left_join(select(ref_gages, ref_site_no, ref_COM, ref_RC, ref_RM), by = c("site_no" = "ref_site_no")) %>%
-  mutate(comid = ref_COM, reachcode = ref_RC, reach_meas = ref_RM) %>%
-  select(-c(ref_COM, ref_RC, ref_RM)) %>%
+  left_join(select(st_drop_geometry(ref_gages), 
+                   id, nhdpv2_COMID, nhdpv2_REACHCODE, nhdpv2_REACH_measure), 
+            by = c("site_no" = "id")) %>%
+  mutate(comid = ifelse(!is.na(nhdpv2_COMID), nhdpv2_COMID, comid),
+         reachcode = ifelse(!is.na(nhdpv2_REACHCODE), 
+                            nhdpv2_REACHCODE, reachcode),
+         reach_meas = ifelse(!is.na(nhdpv2_REACH_measure), 
+                                    nhdpv2_REACH_measure, reach_meas)) %>%
+  select(site_no, comid, reachcode, reach_meas) %>%
+  filter(!is.na(comid)) %>%
   st_sf()
 
+# get NWIS, non-GF lat lon for GFv11 gages
+gfv11_nwis <- dataRetrieval::readNWISsite(gfv11_sf$site_no) %>%
+  filter(agency_cd == "USGS") %>%
+  select(site_no, dec_long_va, dec_lat_va) %>%
+  st_as_sf(coords = c("dec_long_va", "dec_lat_va"), crs = 4269)
+  
+# Bring NWIS geometries over to GFv11 data
+gfv11_sf_final <- st_drop_geometry(gfv11_sf) %>%
+  inner_join(gfv11_nwis, by = "site_no") %>%
+  st_as_sf()
+  
 # Build list of STAID - Drainage area for use in the GFv11 section
 # combine DA data from dbf outputs from 00_get_data
 DA_data <- rbind(select(gfv11_db_full, site_no, drain_area), 
@@ -281,16 +319,17 @@ DA_data <- rbind(select(gfv11_db_full, site_no, drain_area),
                  select(gagesIII_db_full, site_no, drain_area)) %>%
   dplyr::filter(!is.na(drain_area)) %>%
   dplyr::rename(drain_area_full = drain_area) %>%
-  dplyr::filter(site_no %in% gfv11_sf$site_no) %>%
+  dplyr::filter(site_no %in% gfv11_sf_final$site_no) %>%
   distinct()
 
 # final Gfv11 file, rebuild geometry
-gfv11_final <- gfv11_sf %>%
+gfv11_final <- gfv11_sf_final %>%
   left_join(gfv11_db_full, by = "site_no") %>% 
   mutate(source = "GFv11") %>%
   left_join(DA_data, by = "site_no") %>%
   dplyr::mutate(drain_area = if_else(is.na(drain_area), drain_area_full, drain_area)) %>%
-  select(-drain_area_full)
+  select(-drain_area_full) %>%
+  st_compatibalize(gagesIII_final)
 
 # Gages that don't fit the POR requirement, or are non-ST site types
 gage_document(filter(st_drop_geometry(gfv11_sf) %>% select(site_no), !site_no %in% gfv11_db_full$site_no),
@@ -300,11 +339,22 @@ mapview(gfv11_final, layer.name = "gfv11")
 ```
 
 ```{r Wrapup}
-# Bind together  
-gage_final <- do.call("rbind", 
-                      list(rename_geometry(gagesIII_final, "geom"), 
-                           rename_geometry(gageloc_final, "geom"),
-                           rename_geometry(gfv11_final, "geom")))
+# Bind them together
+gage_final <- data.table::rbindlist(list(gagesIII_final, gageloc_final, 
+                                         gfv11_final)) %>%
+  st_as_sf()
+
+# which gagesii_ref gages are missing from final set
+missing_gagesii_ref <- filter(gages_ii_ref, !id %in% gage_final$site_no) %>%
+  rename(site_no = id, comid = nhdpv2_COMID, reachcode = nhdpv2_REACHCODE, 
+         reach_meas = nhdpv2_REACH_measure) #%>%
+  inner_join(gageloc_db_full, by = "site_no") %>%
+  select(-c(agency_cd, site_tp_cd, coord_acy_, huc_cd, parm_cd, stat_cd, ts_id, missing_da)) %>%
+  st_compatibalize(gage_final)
+
+# Append those with data to gage_final
+gage_final <- data.table::rbindlist(list(gage_final, missing_gagesii_ref), fill = TRUE) %>%
+  st_as_sf()
 
 # Rank multiple gages that occur on same COMID
 gage_final_ranked <- gage_final %>% 
@@ -338,7 +388,7 @@ gage_final_ranked <- gage_final %>%
                         ifelse(end_date == max(end_date), max(gage_score) + 1, gage_score), gage_score)) %>%
   ungroup() %>%
   select(-n) %>%
-  st_transform(crs)
+  st_transform(geo_crs)
 
 # conver to emtpy strings
 write_sf(gage_final_ranked, gage_info_gpkg, "Gages")
-- 
GitLab