From 76b02bd326ee035b8770c3d19848aac14a2f728b Mon Sep 17 00:00:00 2001 From: Bock <abock@usgs.gov> Date: Wed, 7 Feb 2024 13:35:57 -0700 Subject: [PATCH] updates --- workspace/01_Gage_Selection.Rmd | 152 +++++++++++++++++++++----------- 1 file changed, 101 insertions(+), 51 deletions(-) diff --git a/workspace/01_Gage_Selection.Rmd b/workspace/01_Gage_Selection.Rmd index e8638f2..8230103 100644 --- a/workspace/01_Gage_Selection.Rmd +++ b/workspace/01_Gage_Selection.Rmd @@ -22,7 +22,6 @@ library(sf) library(hyRefactor) library(tidyr) library(foreign) -library(rgdal) library(hyfabric) library(mapview) @@ -31,45 +30,45 @@ source("R/utils.R") source("R/NHD_navigate.R") # Gages which are manually inspected from "Gages_to_check" file and added back in source("R/config.R") +source("R/user_vars.R") source("R/gage_mod.R") # Load user-defined data paths data_paths <- jsonlite::read_json(file.path("cache", "data_paths.json")) # Load NHDPlus Data if precprocessed to RDS format nhdplus_path(data_paths$nhdplus_gdb) -staged_nhd <- stage_national_data(nhdplus_data = data_paths$nhdplus_gdb, - output_path = data_paths$nhdplus_dir) - # Load NHD -nhd <- readRDS(staged_nhd$flowline) +nhd <- read_sf(data_paths$ref_fl) # clear dropped gages file if(file.exists("cache/dropped_gages.csv")){ file.remove("cache/dropped_gages.csv") } -# Update location index information from other sources where necessary from NLDI reference gage set -ref_gages <- jsonlite::fromJSON("https://github.com/internetofwater/ref_gages/releases/download/v0.5/usgs_nldi_gages.geojson", - flatten = TRUE)$features %>% - select(-geometry.type, -geometry.coordinates, ref_site_no = properties.id, - ref_RC = properties.nhdpv2_REACHCODE, ref_RM = properties.nhdpv2_REACH_measure, - ref_COM = properties.nhdpv2_COMID) %>% - filter(!is.na(ref_COM) & ref_COM > 0) +# gages II +gages_ii <- read_sf(data_paths$gagesii_lyr) +ref_gages <- read_sf("data/nldi/usgs_nldi_gages.geojson") +gages_ii_ref <- filter(ref_gages, id %in% + filter(gages_ii, CLASS == "Ref")$STAID) ``` ```{r GAGESIII-SWIM} # Read gagesIII dbf that is a POR summary produced from 00_gage_info.Rmd -gagesIII_db_full <- read.dbf(file = file.path(data_paths$data_dir, "gages3GFinfo.dbf"), as.is = TRUE) %>% +gagesIII_db_full <- read.dbf(file = file.path(data_paths$data_dir, "SWIMGFinfo.dbf"), as.is = TRUE) %>% rename(total_obs = count_nu, total_poss_obs = TPdays1) # Read in shapefile gagesIII_sf_full <- read_sf(data_paths$SWIM_points_path, "SWIM_gage_loc") %>% st_zm(drop = TRUE) %>% - select(site_no = Gage_no, comid = COMID, reachcode = REACHCODE, reach_meas = REACH_meas) %>% - left_join(select(ref_gages, ref_site_no, ref_COM, ref_RC, ref_RM), by = c("site_no" = "ref_site_no")) %>% - # Update index information with NLDI gages - mutate(comid = ref_COM, reachcode = ref_RC, reach_meas = ref_RM) %>% - select(-c(ref_COM, ref_RC, ref_RM)) + # Use site_no as the authoritative station ID name + rename(site_no = Gage_no) %>% + # Drop fields in NLDI gages + select(-c(COMID, REACHCODE, REACH_meas)) %>% + # Join to NLDI reference gages + left_join(select(st_drop_geometry(ref_gages), id, comid = nhdpv2_COMID, + reachcode = nhdpv2_REACHCODE, + reach_meas = nhdpv2_REACH_measure), + by = c("site_no" = "id")) # Read Gages III shapefile gagesIII_sf <- gagesIII_sf_full %>% @@ -83,7 +82,8 @@ gagesIII_sf <- gagesIII_sf_full %>% distinct() # Start documenting gages that are dropped out; these gages have no mean daily Q -gage_document(filter(select(st_drop_geometry(gagesIII_sf_full), site_no), !site_no %in% gagesIII_sf$site_no), +gage_document(filter(select(st_drop_geometry(gagesIII_sf_full), site_no), + !site_no %in% gagesIII_sf$site_no), "GAGES-III", "No Q or COMID") # First subset @@ -99,21 +99,28 @@ gagesIII_db <- gagesIII_db_full %>% distinct() # Gages that don't fit the POR requirement, or are non-ST site types -gage_document(filter(select(st_drop_geometry(gagesIII_sf), site_no), !site_no %in% gagesIII_db$site_no), +gage_document(filter(select(st_drop_geometry(gagesIII_sf), site_no), + !site_no %in% gagesIII_db$site_no), "GAGES-III", "Non-ST; POR") # scan for potential canals/ditches among the remaining ST site types potCanalsIII <- gagesIII_db %>% # Flag station_nm with the follwing words - filter(grepl(paste(c("DITCH", "ditch", "CANAL", "canal", "CN", "DIV", "div"), collapse = "|"), station_nm)) %>% + filter(grepl(paste(c("DITCH", "ditch", "CANAL", "canal", "CN", "DIV", "div"), + collapse = "|"), station_nm)) %>% # Remove the station_nm with the following - filter(!grepl(paste(c("RIVER AT", "RIVER BELOW", "RIVER BL", "R AB", "R BL", "R ABV"), collapse = "|"), + filter(!grepl(paste(c("RIVER AT", "RIVER BELOW", "RIVER BL", "R AB", "R BL", "R ABV"), + collapse = "|"), station_nm, ignore.case = TRUE)) %>% # These have been manually checked and are located in config.R filter(!site_no %in% gages_add) # Write out for manual checking -write.csv(potCanalsIII, "temp/GagestoCheck_GAGESIII.csv") +if(!file.exists("temp/GagestoCheck_GAGESIII.csv")){ + write.csv(potCanalsIII, "temp/GagestoCheck_GAGESIII.csv") +} else { + potCanalsIII <- read.csv("temp/GagestoCheck_GAGESIII.csv") +} # Subset further by streamgages below the minimum drainage area critera gagesIII_db_fin <- gagesIII_db %>% @@ -122,7 +129,8 @@ gagesIII_db_fin <- gagesIII_db %>% # Write out gages that dropped to document. gage_document(select(potCanalsIII, site_no), "GAGES-III", "ST; Pot. Canal") gage_document(gagesIII_db %>% - filter(!site_no %in% potCanalsIII$site_no, drain_area < min_da_km_gages) %>% + filter(!site_no %in% potCanalsIII$site_no, + drain_area < min_da_km_gages) %>% select(site_no), "GAGES-III", "below DA") @@ -146,8 +154,8 @@ mapview(gagesIII_sf_full, layer.name = "all GAGES-III") + mapview(filter(gagesIII_sf, site_no %in% potCanalsIII$site_no), layer.name = "Canals, man. checked", col.regions = "blue") + mapview(filter(gagesIII_sf %>% - inner_join(select(gagesIII_db, site_no, drain_area), by = "site_no"), - drain_area < min_da_km_gages), + inner_join(select(gagesIII_db, site_no, drain_area), + by = "site_no"), drain_area < min_da_km_gages), layer.name = "below min DA", col.regions = "green") + mapview(gagesIII_final, layer.name = "Final GAGES-III", col.regions = "orange") @@ -162,13 +170,14 @@ gageloc_db_full <- read.dbf(file = file.path(data_paths$data_dir, "GageLocGFinfo gageloc_sf_full <- read_sf(data_paths$nhdplus_gdb, "Gage") %>% # Drop Z geometries st_zm(drop = TRUE) %>% - select(site_no = SOURCE_FEA, comid = FLComID, reachcode = REACHCODE, reach_meas = Measure) %>% + rename(site_no = SOURCE_FEA) %>% + select(-c(FLComID, REACHCODE, Measure)) %>% # Disregard locations already removed in GAGESIII filter(!site_no %in% gagesIII_sf_full$site_no) %>% # Update index information with NLDI gages - left_join(select(ref_gages, ref_site_no, ref_COM, ref_RC, ref_RM), by = c("site_no" = "ref_site_no")) %>% - mutate(comid = ref_COM, reachcode = ref_RC, reach_meas = ref_RM) %>% - select(-c(ref_COM, ref_RC, ref_RM)) + left_join(select(st_drop_geometry(ref_gages), site_no = id, + comid = nhdpv2_COMID, reachcode = nhdpv2_COMID, + reach_meas = nhdpv2_REACH_measure ), by = "site_no") # Subset to locations with streamflow gageloc_sf <- gageloc_sf_full %>% @@ -182,29 +191,37 @@ gageloc_sf <- gageloc_sf_full %>% distinct() # Start documenting gages that are dropped out; these gages have no mean daily Q -gage_document(filter(select(st_drop_geometry(gageloc_sf_full), site_no), !site_no %in% gageloc_sf$site_no), +gage_document(filter(select(st_drop_geometry(gageloc_sf_full), site_no), + !site_no %in% gageloc_sf$site_no), "GageLoc", "No Q, COMID") # do the same exercise with GageLoc, but with more site type attributes to filter out gageloc_db <- gageloc_db_full %>% filter(end_date > POR_Start | total_obs > min_obs, !site_tp_cd %in% - c("ST-CA", "ST-DCH", "FA-STS", "SB_TSM", "FA-DV", "FA-STS", "FA-CS", "FA-WWD", "FA-OF", "GW"), - stat_cd == "00003") %>% + c("ST-CA", "ST-DCH", "FA-STS", "SB_TSM", "FA-DV", "FA-STS", "FA-CS", + "FA-WWD", "FA-OF", "GW"), stat_cd == "00003") %>% # Some sites are listed twice with same information but different agencies, select(-c(agency_cd, ts_id)) %>% distinct(site_no, drain_area, dec_lat_va, dec_long_v, .keep_all = T) # Gages that don't fit the POR requirement, or are non-ST site types -gage_document(filter(select(gageloc_db_full, site_no), !site_no %in% gageloc_db$site_no), +gage_document(filter(select(gageloc_db_full, site_no), + !site_no %in% gageloc_db$site_no), "GageLoc", "Non-ST; POR") # scan for potential canals/ditches potCanals_gageloc <- gageloc_db %>% - filter(grepl(paste(c("DITCH", "ditch", "CANAL", "canal", "CN", "DIV", "div"), collapse = "|"), station_nm)) %>% + filter(grepl(paste(c("DITCH", "ditch", "CANAL", "canal", "CN", "DIV", "div"), + collapse = "|"), station_nm)) %>% filter(!site_no %in% gages_add) -write.csv(potCanals_gageloc, "temp/GagestoCheck_GageLoc.csv") +# Write out for manual checking +if(!file.exists("temp/GagestoCheck_GageLoc.csv")){ + write.csv(potCanals_gageloc, "temp/GagestoCheck_GageLoc.csv") +} else { + potCanals_gageloc <- read.csv("temp/GagestoCheck_GageLoc.csv") +} gageloc_db_fin <- gageloc_db %>% filter(!site_no %in% potCanals_gageloc$site_no, drain_area > min_da_km_gages) @@ -226,7 +243,8 @@ gageloc_final <- gageloc_sf %>% group_by(site_no) %>% filter(total_obs == max(total_obs)) %>% ungroup() %>% - st_sf() + st_sf() %>% + st_compatibalize(gagesIII_final) mapview(gageloc_sf_full, layer.name = "all GageLoc") + mapview(filter(gageloc_sf_full, !site_no %in% gageloc_sf$site_no), @@ -257,23 +275,43 @@ gfv11_db_full <- read.dbf(file = file.path(data_paths$data_dir, "GFV11.dbf"), as # Remove transboundary gfv11_sf <- gfv11_sf_full %>% # Remove streamgages assocaited with transboundary - filter(nchar(NHD_Unit) < 4, !Type_Gage %in% c(gagesIII_final$site_no, gageloc_final$site_no)) %>% + filter(nchar(NHD_Unit) < 4, + !Type_Gage %in% c(gagesIII_final$site_no, gageloc_final$site_no)) %>% select(site_no = Type_Gage, NHDPlusID) %>% # join to get comid, reachcode, measure data left_join(st_drop_geometry(gageloc_sf_full), by = "site_no") %>% # Know from previous work 3 locations with no COM/RC/M - left_join(select(st_drop_geometry(nhd), COMID, REACHCODE, FromMeas), by = c("NHDPlusID" = "COMID")) %>% + left_join(select(st_drop_geometry(nhd), COMID, REACHCODE, FromMeas), + by = c("NHDPlusID" = "COMID")) %>% # bring over RC/M for 3 gages missing them mutate(comid = ifelse(is.na(comid), NHDPlusID, comid), reachcode = ifelse(is.na(reachcode), REACHCODE, reachcode), reach_meas = ifelse(is.na(reach_meas), FromMeas, reach_meas)) %>% select(-c(NHDPlusID, REACHCODE, FromMeas)) %>% # Update index information with NLDI gages - left_join(select(ref_gages, ref_site_no, ref_COM, ref_RC, ref_RM), by = c("site_no" = "ref_site_no")) %>% - mutate(comid = ref_COM, reachcode = ref_RC, reach_meas = ref_RM) %>% - select(-c(ref_COM, ref_RC, ref_RM)) %>% + left_join(select(st_drop_geometry(ref_gages), + id, nhdpv2_COMID, nhdpv2_REACHCODE, nhdpv2_REACH_measure), + by = c("site_no" = "id")) %>% + mutate(comid = ifelse(!is.na(nhdpv2_COMID), nhdpv2_COMID, comid), + reachcode = ifelse(!is.na(nhdpv2_REACHCODE), + nhdpv2_REACHCODE, reachcode), + reach_meas = ifelse(!is.na(nhdpv2_REACH_measure), + nhdpv2_REACH_measure, reach_meas)) %>% + select(site_no, comid, reachcode, reach_meas) %>% + filter(!is.na(comid)) %>% st_sf() +# get NWIS, non-GF lat lon for GFv11 gages +gfv11_nwis <- dataRetrieval::readNWISsite(gfv11_sf$site_no) %>% + filter(agency_cd == "USGS") %>% + select(site_no, dec_long_va, dec_lat_va) %>% + st_as_sf(coords = c("dec_long_va", "dec_lat_va"), crs = 4269) + +# Bring NWIS geometries over to GFv11 data +gfv11_sf_final <- st_drop_geometry(gfv11_sf) %>% + inner_join(gfv11_nwis, by = "site_no") %>% + st_as_sf() + # Build list of STAID - Drainage area for use in the GFv11 section # combine DA data from dbf outputs from 00_get_data DA_data <- rbind(select(gfv11_db_full, site_no, drain_area), @@ -281,16 +319,17 @@ DA_data <- rbind(select(gfv11_db_full, site_no, drain_area), select(gagesIII_db_full, site_no, drain_area)) %>% dplyr::filter(!is.na(drain_area)) %>% dplyr::rename(drain_area_full = drain_area) %>% - dplyr::filter(site_no %in% gfv11_sf$site_no) %>% + dplyr::filter(site_no %in% gfv11_sf_final$site_no) %>% distinct() # final Gfv11 file, rebuild geometry -gfv11_final <- gfv11_sf %>% +gfv11_final <- gfv11_sf_final %>% left_join(gfv11_db_full, by = "site_no") %>% mutate(source = "GFv11") %>% left_join(DA_data, by = "site_no") %>% dplyr::mutate(drain_area = if_else(is.na(drain_area), drain_area_full, drain_area)) %>% - select(-drain_area_full) + select(-drain_area_full) %>% + st_compatibalize(gagesIII_final) # Gages that don't fit the POR requirement, or are non-ST site types gage_document(filter(st_drop_geometry(gfv11_sf) %>% select(site_no), !site_no %in% gfv11_db_full$site_no), @@ -300,11 +339,22 @@ mapview(gfv11_final, layer.name = "gfv11") ``` ```{r Wrapup} -# Bind together -gage_final <- do.call("rbind", - list(rename_geometry(gagesIII_final, "geom"), - rename_geometry(gageloc_final, "geom"), - rename_geometry(gfv11_final, "geom"))) +# Bind them together +gage_final <- data.table::rbindlist(list(gagesIII_final, gageloc_final, + gfv11_final)) %>% + st_as_sf() + +# which gagesii_ref gages are missing from final set +missing_gagesii_ref <- filter(gages_ii_ref, !id %in% gage_final$site_no) %>% + rename(site_no = id, comid = nhdpv2_COMID, reachcode = nhdpv2_REACHCODE, + reach_meas = nhdpv2_REACH_measure) #%>% + inner_join(gageloc_db_full, by = "site_no") %>% + select(-c(agency_cd, site_tp_cd, coord_acy_, huc_cd, parm_cd, stat_cd, ts_id, missing_da)) %>% + st_compatibalize(gage_final) + +# Append those with data to gage_final +gage_final <- data.table::rbindlist(list(gage_final, missing_gagesii_ref), fill = TRUE) %>% + st_as_sf() # Rank multiple gages that occur on same COMID gage_final_ranked <- gage_final %>% @@ -338,7 +388,7 @@ gage_final_ranked <- gage_final %>% ifelse(end_date == max(end_date), max(gage_score) + 1, gage_score), gage_score)) %>% ungroup() %>% select(-n) %>% - st_transform(crs) + st_transform(geo_crs) # conver to emtpy strings write_sf(gage_final_ranked, gage_info_gpkg, "Gages") -- GitLab