diff --git a/workspace/00_get_data.Rmd b/workspace/00_get_data.Rmd index 5e2152403b7d6fa810bcaee1f4948b0dd5bd7ca1..ca24adecb3707212088a9cd0c6e447ef12564252 100644 --- a/workspace/00_get_data.Rmd +++ b/workspace/00_get_data.Rmd @@ -656,6 +656,23 @@ if(!file.exists(out$ref_flowline)) { out_list <- c(out_list, out) ``` +```{r reference_catchments} + +out <- list(ref_catchment = file.path(out_list$nhdplus_dir, (sb_c <- "reference_catchments.gpkg"))) + +if(!file.exists(out$ref_catchment)) { + if(is.null(sbtools::current_session())) + authenticate_sb() + + sbtools::item_file_download("61295190d34e40dd9c06bcd7", + names = sb_c, + destinations = out$ref_catchment) +} + +out_list <- c(out_list, out) +``` + + ```{r res} # Download Reservoir and dams data: ResOps, ISTARF and GRanDd # diff --git a/workspace/02_HI_navigate.Rmd b/workspace/02_HI_navigate.Rmd index 1af53d2a52d523a5a8b1b3fcf9de0b988c0d0fac..8e17e7066e36e45e87db049cab71ce2127b6b730 100644 --- a/workspace/02_HI_navigate.Rmd +++ b/workspace/02_HI_navigate.Rmd @@ -23,7 +23,7 @@ library(hyRefactor) library(tidyr) library(hyfabric) -vpu <- "20" +vpu <- VPU <- "20" # Load data paths data_paths <- jsonlite::read_json(file.path("cache", "data_paths.json")) @@ -34,6 +34,8 @@ source("R/NHD_navigate.R") source("R/hyRefactor_funs.R") source("R/config.R") source("R/ExCONUS_config.R") + +atts <- readRDS(file.path(data_paths$islands_dir, "nhdplus_flowline_attributes.rds")) ``` Define NHD dataset to use for refactoring and discretization. RPUs 20f and 20g had NHD attribution that behaved differently @@ -101,7 +103,7 @@ if(needs_layer(nav_gpkg, nhd_flowline)) { mutate(dend = ifelse(COMID %in% dend_COM$COMID, 1, 0)) # Bring in catchments - nhd_cats <- readRDS(staged_nhdplus$catchment) %>% + nhd_cats <- readRDS(staged_nhdplus_islands$catchment) %>% st_transform(crs) %>% st_make_valid() @@ -423,6 +425,67 @@ if(all(is.na(nav_POIs$Type_NID))) { mapview(filter(nav_POIs, !is.na(Type_NID))) ``` + +This chunk breaks up potential aggregated segments where the elevation range of the are contributing to the segment exceeds 500-m +```{r Elevation Break POIs} +# derive incremental segments from POIs +inc_segs <- segment_increment(filter(nhd_prep, minNet == 1), filter(st_drop_geometry(nhd), + COMID %in% nav_POIs$COMID, COMID %in% filter(nhd_prep, minNet == 1)$COMID)) %>% + # bring over VAA data + inner_join(select(atts, COMID, LENGTHKM, MAXELEVSMO, MINELEVSMO, AreaSqKM, TotDASqKM), by = c("COMID" = "ComID")) + + # Build dataframe for creation of points based on breaks +elev_pois_split <- inc_segs %>% + group_by(POI_ID) %>% + # Get elevation info + mutate(MAXELEVSMO = na_if(MAXELEVSMO, -9998), MINELEVSMO = na_if(MINELEVSMO, -9998), + elev_diff_seg = max(MAXELEVSMO) - min(MINELEVSMO)) %>% + # Filter out to those incremental segments that need splitting above the defined elevation threshold + filter((max(MAXELEVSMO) - min(MINELEVSMO)) > (elev_diff * 100)) #%>% + # Do not aggregate on fake lfowlines + filter(AreaSqKM > 0, TotDASqKM > max_elev_TT_DA) #%>% + # Order from upstream to downstream + arrange(desc(Hydroseq)) #%>% + # Get attributes used for splitting + # csum_length = cumulative length US to DS along segment, cumsum_elev = cumulative US to DS elev diff along segment + # inc_elev = elevation range of each segment + # iter = estimated number of times we'd need to split existing agg flowlines, or number of rows in set + mutate(csum_length = cumsum(LENGTHKM), + inc_elev = cumsum(MAXELEVSMO - MINELEVSMO), + #nc_elev_diff = c(inc_elev[1], (inc_elev - lag(inc_elev))[-1]), + iter = min(round(max(inc_elev) / (elev_diff * 100)), n()), + elev_POI_ID = NA) %>% + # Remove segments we can't break + filter(iter > 0, n() > 1) %>% + # Track original iteration number + mutate(orig_iter = iter) %>% + ungroup() + +if(nrow(elev_pois_split) > 0) { + + # For reach iteration + elev_POIs <- do.call("rbind", lapply(unique(elev_pois_split$POI_ID), split_elev, + elev_pois_split, elev_diff*100, max_elev_TT_DA)) %>% + filter(!elev_POI_ID %in% tmp_POIs$COMID, COMID == elev_POI_ID) %>% + mutate(Type_Elev = 1) %>% + select(COMID, Type_Elev) %>% + unique() + + if(nrow(elev_POIs) > 0){ + tmp_POIs <- POI_creation(select(elev_POIs, COMID, Type_Elev), nhd, "Elev") %>% + addType(., tmp_POIs, "Elev") + } + + write_sf(tmp_POIs, nav_gpkg, nav_poi_layer) +} else { + tmp_POIs <- read_sf(nav_gpkg, nav_poi_layer) + nhd <- read_sf(nav_gpkg, nhd_flowline) +} + +if(!all(is.na(tmp_POIs$Type_Elev))) + mapview(filter(tmp_POIs, Type_Elev == 1), layer.name = "Elevation break POIs", col.regions = "blue") +``` + Clean up and map final POIs ```{r} diff --git a/workspace/05_hyRefactor_HI.Rmd b/workspace/05_hyRefactor_HI.Rmd index fd6c7ac2e39985488f190b1388ddb53afa0294a5..87d5a95bdd93c4279831c58068584093a0a0e84d 100644 --- a/workspace/05_hyRefactor_HI.Rmd +++ b/workspace/05_hyRefactor_HI.Rmd @@ -86,7 +86,7 @@ POIs <- read_sf(nav_gpkg, final_poi_layer) %>% if (rpu_code == "20f"){ new_outlet_POI_atts <- filter(st_drop_geometry(nhd), COMID == 800001119) %>% select(COMID, Type_Conf_new = LevelPathI) - + POIs <- POIs %>% left_join(new_outlet_POI_atts, by = "COMID") %>% mutate(Type_Conf = ifelse(!is.na(Type_Conf_new), Type_Conf_new, Type_Conf)) %>% @@ -126,12 +126,13 @@ avoid <- dplyr::filter(nhd, (sqrt(AreaSqKM) / LENGTHKM) > 3 & AreaSqKM > 1) # Verified that it works on all RPUS events <- read_sf(nav_gpkg, gage_events_layer) %>% #filter(Split == 1) %>% - inner_join(select(st_drop_geometry(filter(nhd, refactor == 1)), AreaSqKM, LENGTHKM, COMID, FromMeas, ToMeas), by = "COMID") %>% + inner_join(select(st_drop_geometry(nhd), AreaSqKM, LENGTHKM, COMID, FromMeas, ToMeas), by = "COMID") %>% #filter(AreaSqKM > 1 & LENGTHKM > (combine_meters / 250)) %>% - filter(site_no == "16265600" | REACH_meas - FromMeas > 5 & AreaSqKM > 0.5 & ToMeas - REACH_meas > 5) %>% + filter(site_no %in% c("16265600", "16759600", "16700600", "16682000", "16681000") | + REACH_meas - FromMeas > 5 & AreaSqKM > 0.5 & ToMeas - REACH_meas > 5) %>% select(site_no, COMID, REACHCODE, REACH_meas) %>% # Events cannot be in terminal POIs, code seems to ignore the command not to split/combine those flowlines - filter(!COMID %in% filter(outlets, type == "terminal")$COMID) %>% + #filter(!COMID %in% filter(outlets, type == "terminal")$COMID) %>% filter(!COMID %in% avoid$COMID) # write out events @@ -221,7 +222,7 @@ if(needs_layer(out_refac_gpkg, divide_layer)) { fline_rec = reconciled, fdr = fdr, fac = fac, - para = para_reconcile, + para = 6, cache = cache_split, keep = NULL, vector_crs = crs) diff --git a/workspace/07-2_NonDend.Rmd b/workspace/07-2_NonDend.Rmd index a07d6337edd1f17f561b086b6e4b210eb39b6ca0..59548818df57dbc6377b56548522bbcc7b54d454 100644 --- a/workspace/07-2_NonDend.Rmd +++ b/workspace/07-2_NonDend.Rmd @@ -39,6 +39,8 @@ if (vpu == "20"){ full_nhd <- readRDS(file.path(data_paths$nhdplus_dir, "nhdplus_flowline.rds")) } +elev <- data_paths$elev_cm[grepl(paste0("Ned", substr(vpu, 1, 2)), data_paths$elev_cm, ignore.case = TRUE)] + # HUC extraction for specific NHD+ vpus if(vpu == "02"){ grep_exp <-"^02|^04" @@ -46,6 +48,7 @@ if(vpu == "02"){ grep_exp <- "^03|^08" } else { grep_exp <- paste0("^", substr(vpu, start = 1, stop = 2)) + elev <- append(elev, list(rpu_03g = data_paths$elev_cm$rpu_03g)) } cat_rpu_table <- readRDS(data_paths$fullcats_table) @@ -58,8 +61,6 @@ full_nhd <- full_nhd %>% vpu_WBD <- readRDS(file.path(data_paths$nhdplus_dir, "HUC12.rds")) %>% filter(grepl(paste0("^", grep_exp, ".*"), .data$HUC_12)) -elev <- data_paths$elev_cm[grepl(paste0("Ned", vpu), data_paths$elev_cm, ignore.case = TRUE)] - nhd <- st_transform(read_sf(gf_gpkg, nhd_flowline), crs) cats <- st_transform(read_sf(gf_gpkg, nhd_catchment), crs) divides <- st_transform(read_sf(gf_gpkg, divide_layer), crs) @@ -356,7 +357,7 @@ if(needs_layer(ND_gpkg, "missing_cats")){ st_make_valid() # Prob HRU - filter(all_hrus, POI_ID == 140402000209) - all_hrus <- divides_lu %>% + all_hrus <- filter(divides_lu, !is.na(POI_ID)) %>% group_by(POI_ID) %>% summarize(do_union = T) %>% sfheaders::sf_remove_holes(.) %>% diff --git a/workspace/R/ExCONUS_config.R b/workspace/R/ExCONUS_config.R index 37ea5fc72f7334406584634890ab8b4a9a39ba21..9888dbf9c8383530ad4782b9e48b3bf2d36c54c7 100644 --- a/workspace/R/ExCONUS_config.R +++ b/workspace/R/ExCONUS_config.R @@ -25,6 +25,8 @@ if(vpu == "20"){ min_da_km <- 5 split_meters <- 2500 combine_meters <- 500 + elev_diff <- 300 + max_elev_DA <- 1.5 } else { # Cusomizations for AK crs <- 3338 diff --git a/workspace/R/non_dend.R b/workspace/R/non_dend.R index fc3a2565de1e0f402314ce1c48c58a23dc13c089..2d2943ea2e9a1cd62192ec01e8fff2736bceb81a 100644 --- a/workspace/R/non_dend.R +++ b/workspace/R/non_dend.R @@ -82,7 +82,7 @@ assign_HUC12 <- function(divides_poi, HUC12_xwalk, nhd, CAC_num){ ungroup() %>% group_by(FEATUREID, HUC_12_int) %>% summarize(n_NHD = n(), - NHD_area = mean(AreaSqKM), + NHD_area = mean(AREASQKM), HUC_12_area = sum(AreaHUC12), intarea = sum(intArea), CAC = intarea / ((NHD_area - intarea) + (HUC_12_area - intarea) + intarea)) %>% @@ -118,7 +118,7 @@ assign_HUC12 <- function(divides_poi, HUC12_xwalk, nhd, CAC_num){ ungroup() %>% group_by(HUC_12_int) %>% summarize(n_HUC12 = n(), - NHD_area = sum(AreaSqKM), + NHD_area = sum(AREASQKM), HUC_12_area = mean(AreaHUC12), intarea = sum(intArea), CAC = intarea / ((NHD_area - intarea) + (HUC_12_area - intarea) + intarea)) %>% @@ -233,7 +233,7 @@ assign_HUC10 <- function(divides, HUC12_xwalk, nhd, CAC_num){ ungroup() %>% group_by(HUC_10) %>% mutate(n_NHD = n(), - NHD_area = mean(AreaSqKM), + NHD_area = mean(AREASQKM), HUC_10_area = mean(AreaHUC10), intarea = sum(intArea), CAC = intarea / ((NHD_area - intarea) + (HUC_10_area - intarea) + intarea)) %>% @@ -849,35 +849,46 @@ miss_term_assign <- function(term_outlets, divides_poi, nhd, elev){ st_intersection(divides_lu_poi) %>% filter(!member_COMID == incomid) - # convert cat buf to format for extract input - buf <- st_as_sf(cats_buff) - - #************************************************************** - # Then do a zonal statitics type operation of the DEM within each - # buffered polygon to get the min cell value, this is a hard coded path to where the dems reside - - elev <- as.character(elev) - elev <- elev[grepl(unique(missing_cats$rpu), elev)] - - dem <- terra::rast(elev) - ex <- terra::extract(dem, terra::vect(buf), min) - buf_df <- as.data.frame(ex)[,2, drop = FALSE] - - # Attribute with values - buf_df$outlet_COMID <- incomid - buf_df$neighbor_COMID <- as.character(cats_buff$member_COMID) - buf_df$POI_ID = as.character(cats_buff$POI_ID) - - colnames(buf_df) <- c("Elev","outlet_COMID", "neighbor_COMID", "POI_ID") - buf_df2 <- buf_df[which.min(buf_df$Elev), ] - - return(buf_df2) + if(nrow(cats_buff) == 0){ + miss_cat <- dplyr::select(st_drop_geometry(missing_cats), + outlet_COMID = member_COMID) %>% + dplyr::mutate(Elev = NA, neighbor_COMID = NA, POI_ID = NA) %>% + dplyr::select(Elev, outlet_COMID, neighbor_COMID, POI_ID) + return(miss_cat) + } else { + # convert cat buf to format for extract input + buf <- st_as_sf(cats_buff) + + #************************************************************** + # Then do a zonal statitics type operation of the DEM within each + # buffered polygon to get the min cell value, this is a hard coded path to where the dems reside + + elev <- as.character(elev) + elev <- elev[grepl(unique(missing_cats$rpu), elev)] + + dem <- terra::rast(elev) + ex <- terra::extract(dem, terra::vect(buf), min) + buf_df <- as.data.frame(ex)[,2, drop = FALSE] + + # Attribute with values + buf_df$outlet_COMID <- incomid + buf_df$neighbor_COMID <- as.character(cats_buff$member_COMID) + buf_df$POI_ID = as.character(cats_buff$POI_ID) + + colnames(buf_df) <- c("Elev","outlet_COMID", "neighbor_COMID", "POI_ID") + buf_df2 <- buf_df[which.min(buf_df$Elev), ] + + return(buf_df2) + } } - out_df <- do.call(rbind, - pbapply::pblapply(unique(term_outlets$outlet_COMID), + out_df <- do.call(rbind, + pbapply::pblapply(unique(term_outlets$outlet_COMID), assign_func, divides_poi, nhd, elev, cl = NULL)) - + + # out_df <- lapply(unique(term_outlets$outlet_COMID), + # assign_func, divides_poi, nhd, elev) + out_df <- out_df %>% left_join(dplyr::select(st_drop_geometry(divides_poi), HUC12_neighbor = HUC_12_int, member_COMID), by = c("neighbor_COMID" = "member_COMID")) %>% diff --git a/workspace/reference_catchments/build_catchments.Rmd b/workspace/reference_catchments/build_catchments.Rmd new file mode 100644 index 0000000000000000000000000000000000000000..a2ccf0068f8c7f8cceaae86b2f0bfc06e0b8b37f --- /dev/null +++ b/workspace/reference_catchments/build_catchments.Rmd @@ -0,0 +1,356 @@ +--- +title: "Build Reference Catchments" +output: html_document +--- + +Project: GFv2.0 +Script purpose: Build a topologically correct, simple, valid, version of NHDPlus2.1 catchments +Date: 05/03/2022 +Author: [Mike Johnson](jjohnson@lynker.com) + +```{r setup} +# Where to build intermediate data... +base_dir = './' + +# Delete all intermediate data at end? +purge = TRUE + +source("catchments_build_config.R") +``` + +# Step 1: NHDPlus Catchment Data + +```{r} +df = get_bucket_df(epa_bucket, max = Inf) + +v = grep("_NHDPlusCatchment_", df$Key, value =TRUE) + +#### + +all = data.frame( + key = v, + VPU = sapply(strsplit(v, "_"), `[`, 3), + region = sapply(strsplit(v, "_"), `[`, 2), + link = paste0('https://', epa_bucket, '.s3.amazonaws.com/', v)) |> +mutate(outfile = paste0(epa_download, "/NHDPlus", VPU, ".7z"), + shp = paste0("NHDPlus", region, "/NHDPlus", VPU, '/NHDPlusCatchment/Catchment.shp')) + +df = filter(all, !all$shp %in% list.files(epa_download, recursive = TRUE, pattern = ".shp$")) + +#### + +for(i in 1:nrow(df)){ + save_object(object = df$key[i], bucket = epa_bucket,file = df$outfile[i]) + archive_extract(df$outfile[i], dir = epa_download) + unlink(df$outfile[i]) +} + +#### + +all$gpkg = paste0(catchments_dir, "/NHDPlus", all$VPU, ".gpkg") + +df2 = filter(all, !all$gpkg %in% list.files(catchments_dir, full.names = TRUE)) + +if(nrow(df2) > 0){ + calls = paste('ogr2ogr -f GPKG', df2$gpkg, df2$shp) + + for(i in 1:length(calls)){ + system(calls[i]) + message(i) + } +} + + +if(purge){ + unlink(epa_download, recursive = TRUE) +} +``` + +# Step 2: Clean + +```{r} +files = list.files(catchments_dir, + full.names = TRUE, + pattern = ".gpkg$") + +out_geojson = gsub('gpkg', + 'geojson', paste0(cleaned_dir, basename(files))) + + +for(i in 1:length(files)){ + + if(!file.exists(out_geojson[i])){ + catchments = read_sf(files[i]) + names(catchments) = tolower(names(catchments)) + + ll = clean_geometry( + catchments, + sys = TRUE, + 'featureid', + keep = NULL) + + lt = st_make_valid(ll) + lt$areasqkm = drop_units(set_units(st_area(lt), "km2")) + lt = st_transform(lt, 4326) + + write_sf(lt, out_geojson[i]) + + rm(ll); rm(catchments); rm(lt); gc() + } + + log_info("Cleaned VPU: ",i, " of ", length(files)) +} + + + +if(purge){ + unlink(catchments_dir, recursive = TRUE) +} +``` + +# Step 3: Simplify + +```{r} +all = list.files(cleaned_dir, + full.names = TRUE, + pattern = ".geojson$") + +out = paste0(simplified_dir, "/", + gsub(".geojson", "", + basename(all)), "_t", num, ".geojson") + +out2 = gsub('geojson', "gpkg", out) + +for(i in 1:length(out2)){ + if(!file.exists(out2[i])){ + system(paste0('node --max-old-space-size=16000 `which mapshaper` ', all[i], ' -simplify ',num, '% keep-shapes -o ', out[i])) + system(paste('ogr2ogr', out2[i], out[i], "-nln catchments")) + unlink(out[i]) + } + + log_info("Simplified: ", i , " of ", length(out2)) +} + +``` + +# Step 4: Rectify + +```{r} +topos = pu_adjanceny() + +files = list.files(simplified_dir, full.names = TRUE, pattern = ".gpkg$") + + +for(i in 1:nrow(topos)){ + + VPU1 = topos$VPU1[i] + VPU2 = topos$VPU2[i] + + v_path_1 = find_file_path(VPU1, files, new_dir) + v_path_2 = find_file_path(VPU2, files, new_dir) + + log_info('\tRead in touching pairs') + v1 = read_sf(v_path_1) |> + st_transform(5070) |> + st_make_valid() |> + rename_geometry('geometry') + + v2 = read_sf(v_path_2) |> + st_transform(5070) |> + st_make_valid() |> + rename_geometry('geometry') + + log_info('\tIsolate "edge" catchments') + old_1 = st_filter(v1, v2) + old_2 = st_filter(v2, v1) + + log_info('\tErase fragments of OVERLAP') + new_1 = st_difference(old_1, st_union(st_combine(old_2))) + new_2 = st_difference(old_2, st_union(st_combine(old_1))) + + u1 = sf_remove_holes(st_union(new_1)) + u2 = sf_remove_holes(st_union(new_2)) + + log_info('\tBuild Fragments') + + base_cats = bind_rows(new_1, new_2) + base_union = sf_remove_holes(st_union(c(u1,u2))) + + frags = st_difference(base_union, st_union(st_combine(base_cats))) |> + st_cast("MULTIPOLYGON") |> + st_cast("POLYGON") |> + st_as_sf() |> + mutate(id = 1:n()) %>% + rename_geometry('geometry') |> + st_buffer(.0001) + + out = tryCatch({ + suppressWarnings({ + st_intersection(frags, base_cats) %>% + st_collection_extract("POLYGON") + }) + }, error = function(e) { NULL }) + + + ints = out %>% + mutate(l = st_area(.)) %>% + group_by(.data$id) %>% + slice_max(.data$l, with_ties = FALSE) %>% + ungroup() %>% + select(.data$featureid, .data$id, .data$l) %>% + st_drop_geometry() + + tj = right_join(frags, + ints, + by = "id") %>% + bind_rows(base_cats) %>% + dplyr::select(-.data$id) %>% + group_by(.data$featureid) %>% + mutate(n = n()) %>% + ungroup() + + in_cat = suppressWarnings({ + hyRefactor::union_polygons_geos(filter(tj, .data$n > 1) , 'featureid') %>% + bind_rows(dplyr::select(filter(tj, .data$n == 1), .data$featureid)) %>% + mutate(tmpID = 1:n()) |> + st_make_valid() + }) + + log_info('\tReassemble VPUS') + + inds = in_cat$featureid[in_cat$featureid %in% v1$featureid] + + to_keep_1 = bind_rows( filter(v1, !featureid %in% inds), + filter(in_cat, featureid %in% inds)) |> + select(names(v1)) %>% + mutate(areasqkm = hyRefactor::add_areasqkm(.)) + + inds2 = in_cat$featureid[in_cat$featureid %in% v2$featureid] + + to_keep_2 = bind_rows( filter(v2, !featureid %in% inds2), + filter(in_cat, featureid %in% inds2)) |> + select(names(v1)) %>% + mutate(areasqkm = hyRefactor::add_areasqkm(.)) + + log_info('\tWrite VPUS') + write_sf(to_keep_1, v_path_1, "catchments", overwrite = TRUE) + write_sf(to_keep_2, v_path_2, "catchments", overwrite = TRUE) + + log_info('Finished: ', i , " of ", nrow(topos)) + +} + +if(purge){ + unlink(cleaned_dir, recursive = TRUE) +} + +``` + +# Step 5: Merge CONUS + +```{r} + +gpkgs <- list.files(simplified_dir, full.names = TRUE, pattern = ".gpkg$") + +vpu <- gsub(".gpkg", "", gsub(paste0(simplified_dir, "/NHDPlus"), "", gpkgs)) +vpu <- sapply(strsplit(vpu, "_"),"[[",1) + +rpuid <- get_vaa("rpuid") + +output <- list() + +for (i in 1:length(gpkgs)) { + + tst <- read_sf(gpkgs[i]) |> + st_cast("POLYGON") %>% + mutate(areasqkm = add_areasqkm(.)) |> + mutate(tmpID = 1:n()) |> + rename_geometry("geometry") + + dups <- tst$featureid[which(duplicated(tst$featureid))] + + if(length(dups) != 0){ + + bad <- filter(tst, featureid %in% dups) |> + group_by(featureid) |> + slice_min(areasqkm) |> + ungroup() + + good <- filter(tst, !tmpID %in% bad$tmpID) + + out <- tryCatch( + { + suppressWarnings({ + st_intersection(bad, select(good, tmpID)) %>% + st_collection_extract("POLYGON") |> + filter(tmpID != tmpID.1) + }) + }, + error = function(e) { + NULL + } + ) + + ints <- out %>% + mutate(l = st_area(.)) %>% + group_by(.data$tmpID) %>% + slice_max(.data$l, with_ties = FALSE) %>% + ungroup() %>% + select(.data$tmpID.1, .data$tmpID, .data$l) %>% + st_drop_geometry() + + tj <- right_join(bad, + ints, + by = "tmpID" + ) %>% + select(badID = tmpID, tmpID = tmpID.1, featureid) + + tmp <- bind_rows(tj, filter(good, tmpID %in% tj$tmpID)) |> + hyRefactor::union_polygons_geos("tmpID") |> + select(tmpID) + + tmp2 <- filter(good, tmpID %in% tmp$tmpID) |> + st_drop_geometry() |> + right_join(tmp) + + tst = bind_rows( + filter(good, !tmpID %in% tmp2$tmpID), + tmp2 + ) +} + + + output[[i]] <- tst |> + mutate(vpuid = vpu[i]) |> + left_join(rpuid, by = c("featureid" = "comid")) |> + rename_geometry("geometry") |> + select(-gridcode, -sourcefc, -tmpID) |> + st_transform(facfdr_crs) + + message(vpu[i]) +} + + +all <- bind_rows(output) |> + st_make_valid() + + +unlink("data/reference_catchments.gpkg") +write_sf(all, "data/reference_catchments.gpkg") + +if(purge){ + unlink(simplified_dir, recursive = TRUE) +} + +``` + +# Step 6: Upload to Science Base + +```{r} +sbtools::authenticate_sb() +sbtools::item_append_files("61295190d34e40dd9c06bcd7", "data/reference_catchments.gpkg") +``` + + + + diff --git a/workspace/reference_catchments/catchments_build_config.R b/workspace/reference_catchments/catchments_build_config.R new file mode 100644 index 0000000000000000000000000000000000000000..4c455744b1ac88f606a816c9e78299ff950dc150 --- /dev/null +++ b/workspace/reference_catchments/catchments_build_config.R @@ -0,0 +1,66 @@ +# Setup +library(dplyr) +library(rmapshaper) +library(nhdplusTools) +library(hyRefactor) +library(sf) +library(units) +library(logger) +library(aws.s3) +library(archive) + +sf_use_s2(FALSE) + +epa_bucket = 'edap-ow-data-commons' +epa_download = paste0(base_dir, '/01_EPA_downloads/') +catchments_dir = paste0(base_dir, '/02_Catchments/') +cleaned_dir = paste0(base_dir, '/03_cleaned_catchments/') +simplified_dir = paste0(base_dir, '/04_simplified_catchments/') + + +dir.create(epa_download, showWarnings = FALSE) +dir.create(catchments_dir, showWarnings = FALSE) +dir.create(cleaned_dir, showWarnings = FALSE) +dir.create(simplified_dir, showWarnings = FALSE) +dir.create('data/', showWarnings = FALSE) + +facfdr_crs = '+proj=aea +lat_0=23 +lon_0=-96 +lat_1=29.5 +lat_2=45.5 +x_0=0 +y_0=0 +datum=NAD83 +units=m +no_defs' + +num = 20 + + +pu_adjanceny = function(type = "vpu"){ + + if(type == "vpu"){ + pu = nhdplusTools::vpu_boundaries + id = "VPUID" + } else { + pu = nhdplusTools::rpu_boundaries + id = "RPUID" + } + + dt = sf::st_make_valid(pu) + + x = as.data.frame(which(st_intersects(dt, sparse = FALSE), arr.ind = T)) + + vars = lapply(1:nrow(x), function(y){ + A <- as.numeric(x[y, ]) + A[order(A)] + }) + + do.call('rbind', vars)[!duplicated(vars),] |> + data.frame() |> + setNames(c("PU1", "PU2")) |> + filter(PU1 != PU2) |> + mutate(PU1 = dt[[id]][PU1], + PU2 = dt[[id]][PU2]) +} + +find_file_path = function(VPU, files, new_dir){ + tmp01 = grep(VPU, files, value = TRUE) + tmp02 = paste0(new_dir, "/", gsub("\\_.*", "", basename(tmp01)), ".gpkg") + + if(!file.exists(tmp02)){ file.copy(tmp01, tmp02) } + + tmp02 +}