diff --git a/workspace/reference_catchments/build_catchments.Rmd b/workspace/reference_catchments/build_catchments.Rmd
new file mode 100644
index 0000000000000000000000000000000000000000..a2ccf0068f8c7f8cceaae86b2f0bfc06e0b8b37f
--- /dev/null
+++ b/workspace/reference_catchments/build_catchments.Rmd
@@ -0,0 +1,356 @@
+---
+title: "Build Reference Catchments"
+output: html_document
+---
+
+Project: GFv2.0  
+Script purpose: Build a topologically correct, simple, valid, version of NHDPlus2.1 catchments
+Date: 05/03/2022 
+Author: [Mike Johnson](jjohnson@lynker.com)
+
+```{r setup}
+# Where to build intermediate data...
+base_dir   = './'
+
+# Delete all intermediate data at end?
+purge = TRUE
+
+source("catchments_build_config.R")
+```
+
+# Step 1: NHDPlus Catchment Data
+
+```{r}
+df = get_bucket_df(epa_bucket, max = Inf)
+
+v = grep("_NHDPlusCatchment_", df$Key, value =TRUE)
+
+####
+
+all = data.frame(
+  key = v,
+  VPU = sapply(strsplit(v, "_"), `[`, 3),
+  region = sapply(strsplit(v, "_"), `[`, 2),
+  link = paste0('https://', epa_bucket, '.s3.amazonaws.com/', v)) |>
+mutate(outfile = paste0(epa_download, "/NHDPlus", VPU, ".7z"),
+       shp     = paste0("NHDPlus", region, "/NHDPlus", VPU, '/NHDPlusCatchment/Catchment.shp'))
+
+df = filter(all, !all$shp %in% list.files(epa_download, recursive  = TRUE, pattern = ".shp$"))
+
+####
+
+for(i in 1:nrow(df)){
+  save_object(object = df$key[i], bucket = epa_bucket,file = df$outfile[i])
+  archive_extract(df$outfile[i], dir = epa_download)
+  unlink(df$outfile[i])
+}
+
+####
+
+all$gpkg = paste0(catchments_dir, "/NHDPlus", all$VPU, ".gpkg")
+
+df2 = filter(all, !all$gpkg %in% list.files(catchments_dir, full.names = TRUE))
+
+if(nrow(df2) > 0){
+  calls = paste('ogr2ogr -f GPKG', df2$gpkg, df2$shp)
+
+  for(i in 1:length(calls)){
+    system(calls[i])
+    message(i)
+  }
+}
+
+
+if(purge){
+   unlink(epa_download, recursive = TRUE) 
+}
+```
+
+# Step 2: Clean
+
+```{r}
+files       = list.files(catchments_dir, 
+                         full.names = TRUE, 
+                         pattern = ".gpkg$")
+
+out_geojson = gsub('gpkg', 
+                   'geojson', paste0(cleaned_dir, basename(files)))
+
+
+for(i in 1:length(files)){
+
+  if(!file.exists(out_geojson[i])){
+    catchments  = read_sf(files[i])
+    names(catchments) = tolower(names(catchments))
+
+    ll = clean_geometry(
+      catchments,
+      sys = TRUE,
+      'featureid',
+      keep = NULL)
+
+    lt          =  st_make_valid(ll)
+    lt$areasqkm =  drop_units(set_units(st_area(lt), "km2"))
+    lt          =  st_transform(lt, 4326)
+
+    write_sf(lt, out_geojson[i])
+
+    rm(ll); rm(catchments); rm(lt); gc()
+  }
+  
+  log_info("Cleaned VPU: ",i,  " of ", length(files))
+}
+
+
+
+if(purge){
+  unlink(catchments_dir, recursive = TRUE) 
+}
+```
+
+# Step 3: Simplify
+
+```{r}
+all  = list.files(cleaned_dir, 
+                  full.names = TRUE, 
+                  pattern = ".geojson$")
+
+out  = paste0(simplified_dir, "/", 
+              gsub(".geojson", "", 
+              basename(all)), "_t", num, ".geojson")
+
+out2 = gsub('geojson', "gpkg", out)
+
+for(i in 1:length(out2)){
+  if(!file.exists(out2[i])){
+    system(paste0('node  --max-old-space-size=16000 `which mapshaper` ', all[i], ' -simplify ',num, '% keep-shapes -o ', out[i]))
+    system(paste('ogr2ogr', out2[i], out[i], "-nln catchments"))
+    unlink(out[i])
+  }
+
+  log_info("Simplified: ", i ,  " of ", length(out2))
+}
+
+```
+
+# Step 4: Rectify
+
+```{r}
+topos = pu_adjanceny()
+
+files       = list.files(simplified_dir, full.names = TRUE, pattern = ".gpkg$")
+
+
+for(i in 1:nrow(topos)){
+
+  VPU1 = topos$VPU1[i]
+  VPU2 = topos$VPU2[i]
+
+  v_path_1 = find_file_path(VPU1, files, new_dir)
+  v_path_2 = find_file_path(VPU2, files, new_dir)
+
+  log_info('\tRead in touching pairs')
+    v1 = read_sf(v_path_1) |>
+      st_transform(5070) |>
+      st_make_valid() |>
+      rename_geometry('geometry')
+
+    v2 = read_sf(v_path_2) |>
+      st_transform(5070) |>
+      st_make_valid() |>
+      rename_geometry('geometry')
+
+  log_info('\tIsolate "edge" catchments')
+    old_1 = st_filter(v1, v2)
+    old_2 = st_filter(v2, v1)
+
+  log_info('\tErase fragments of OVERLAP')
+    new_1 = st_difference(old_1, st_union(st_combine(old_2)))
+    new_2 = st_difference(old_2, st_union(st_combine(old_1)))
+
+    u1 = sf_remove_holes(st_union(new_1))
+    u2 = sf_remove_holes(st_union(new_2))
+
+   log_info('\tBuild Fragments')
+
+    base_cats = bind_rows(new_1, new_2)
+    base_union = sf_remove_holes(st_union(c(u1,u2)))
+
+    frags = st_difference(base_union, st_union(st_combine(base_cats))) |>
+      st_cast("MULTIPOLYGON") |>
+      st_cast("POLYGON") |>
+      st_as_sf() |>
+      mutate(id = 1:n()) %>%
+      rename_geometry('geometry') |>
+      st_buffer(.0001)
+
+    out = tryCatch({
+      suppressWarnings({
+        st_intersection(frags, base_cats) %>%
+          st_collection_extract("POLYGON")
+      })
+    }, error = function(e) { NULL })
+
+
+  ints = out %>%
+    mutate(l = st_area(.)) %>%
+    group_by(.data$id) %>%
+    slice_max(.data$l, with_ties = FALSE) %>%
+    ungroup() %>%
+    select(.data$featureid, .data$id, .data$l) %>%
+    st_drop_geometry()
+
+  tj = right_join(frags,
+                  ints,
+                  by = "id") %>%
+    bind_rows(base_cats) %>%
+    dplyr::select(-.data$id) %>%
+    group_by(.data$featureid) %>%
+    mutate(n = n()) %>%
+    ungroup()
+
+  in_cat = suppressWarnings({
+    hyRefactor::union_polygons_geos(filter(tj, .data$n > 1) , 'featureid') %>%
+      bind_rows(dplyr::select(filter(tj, .data$n == 1), .data$featureid)) %>%
+      mutate(tmpID = 1:n()) |>
+      st_make_valid()
+  })
+
+  log_info('\tReassemble VPUS')
+
+    inds = in_cat$featureid[in_cat$featureid %in% v1$featureid]
+
+    to_keep_1 = bind_rows( filter(v1, !featureid %in% inds),
+                           filter(in_cat, featureid %in% inds)) |>
+      select(names(v1)) %>%
+      mutate(areasqkm = hyRefactor::add_areasqkm(.))
+
+    inds2 = in_cat$featureid[in_cat$featureid %in% v2$featureid]
+
+    to_keep_2 = bind_rows( filter(v2, !featureid %in% inds2),
+                           filter(in_cat, featureid %in% inds2)) |>
+      select(names(v1)) %>%
+      mutate(areasqkm = hyRefactor::add_areasqkm(.))
+
+    log_info('\tWrite VPUS')
+    write_sf(to_keep_1, v_path_1, "catchments", overwrite = TRUE)
+    write_sf(to_keep_2, v_path_2, "catchments", overwrite = TRUE)
+
+    log_info('Finished: ', i , " of ", nrow(topos))
+
+}
+
+if(purge){
+  unlink(cleaned_dir, recursive = TRUE)
+}
+
+```
+
+# Step 5: Merge CONUS
+
+```{r}
+
+gpkgs <- list.files(simplified_dir, full.names = TRUE, pattern = ".gpkg$")
+
+vpu <- gsub(".gpkg", "", gsub(paste0(simplified_dir, "/NHDPlus"), "", gpkgs))
+vpu <- sapply(strsplit(vpu, "_"),"[[",1)
+
+rpuid <- get_vaa("rpuid")
+
+output <- list()
+
+for (i in 1:length(gpkgs)) {
+
+  tst <- read_sf(gpkgs[i]) |>
+    st_cast("POLYGON") %>%
+    mutate(areasqkm = add_areasqkm(.)) |>
+    mutate(tmpID = 1:n()) |>
+    rename_geometry("geometry")
+
+  dups <- tst$featureid[which(duplicated(tst$featureid))]
+
+  if(length(dups) != 0){
+
+      bad <- filter(tst, featureid %in% dups) |>
+        group_by(featureid) |>
+        slice_min(areasqkm) |>
+        ungroup()
+
+      good <- filter(tst, !tmpID %in% bad$tmpID)
+
+      out <- tryCatch(
+        {
+          suppressWarnings({
+            st_intersection(bad, select(good, tmpID)) %>%
+              st_collection_extract("POLYGON") |>
+              filter(tmpID != tmpID.1)
+          })
+        },
+        error = function(e) {
+          NULL
+        }
+      )
+
+      ints <- out %>%
+        mutate(l = st_area(.)) %>%
+        group_by(.data$tmpID) %>%
+        slice_max(.data$l, with_ties = FALSE) %>%
+        ungroup() %>%
+        select(.data$tmpID.1, .data$tmpID, .data$l) %>%
+        st_drop_geometry()
+
+      tj <- right_join(bad,
+        ints,
+        by = "tmpID"
+      ) %>%
+        select(badID = tmpID, tmpID = tmpID.1, featureid)
+
+      tmp <- bind_rows(tj, filter(good, tmpID %in% tj$tmpID)) |>
+        hyRefactor::union_polygons_geos("tmpID") |>
+        select(tmpID)
+
+      tmp2 <- filter(good, tmpID %in% tmp$tmpID) |>
+        st_drop_geometry() |>
+        right_join(tmp)
+
+      tst = bind_rows(
+        filter(good, !tmpID %in% tmp2$tmpID),
+        tmp2
+      )
+}
+
+
+  output[[i]] <- tst |>
+    mutate(vpuid = vpu[i]) |>
+    left_join(rpuid, by = c("featureid" = "comid")) |>
+    rename_geometry("geometry") |>
+    select(-gridcode, -sourcefc, -tmpID) |>
+    st_transform(facfdr_crs)
+
+  message(vpu[i])
+}
+
+
+all <- bind_rows(output) |>
+  st_make_valid()
+
+
+unlink("data/reference_catchments.gpkg")
+write_sf(all, "data/reference_catchments.gpkg")
+
+if(purge){
+  unlink(simplified_dir, recursive = TRUE)
+}
+
+```
+
+# Step 6: Upload to Science Base
+
+```{r}
+sbtools::authenticate_sb()
+sbtools::item_append_files("61295190d34e40dd9c06bcd7", "data/reference_catchments.gpkg")
+```
+
+
+
+
diff --git a/workspace/reference_catchments/catchments_build_config.R b/workspace/reference_catchments/catchments_build_config.R
new file mode 100644
index 0000000000000000000000000000000000000000..4c455744b1ac88f606a816c9e78299ff950dc150
--- /dev/null
+++ b/workspace/reference_catchments/catchments_build_config.R
@@ -0,0 +1,66 @@
+# Setup
+library(dplyr)
+library(rmapshaper)
+library(nhdplusTools)
+library(hyRefactor)
+library(sf)
+library(units)
+library(logger)
+library(aws.s3)
+library(archive)
+
+sf_use_s2(FALSE)
+
+epa_bucket = 'edap-ow-data-commons'
+epa_download   = paste0(base_dir, '/01_EPA_downloads/')
+catchments_dir = paste0(base_dir, '/02_Catchments/')
+cleaned_dir    = paste0(base_dir, '/03_cleaned_catchments/')
+simplified_dir = paste0(base_dir, '/04_simplified_catchments/')
+
+
+dir.create(epa_download, showWarnings = FALSE)
+dir.create(catchments_dir, showWarnings = FALSE)
+dir.create(cleaned_dir, showWarnings = FALSE)
+dir.create(simplified_dir, showWarnings = FALSE)
+dir.create('data/', showWarnings = FALSE)
+
+facfdr_crs = '+proj=aea +lat_0=23 +lon_0=-96 +lat_1=29.5 +lat_2=45.5 +x_0=0 +y_0=0 +datum=NAD83 +units=m +no_defs'
+
+num        = 20
+
+
+pu_adjanceny = function(type = "vpu"){
+  
+  if(type == "vpu"){
+    pu = nhdplusTools::vpu_boundaries
+    id = "VPUID"
+  } else {
+    pu = nhdplusTools::rpu_boundaries
+    id = "RPUID"
+  }
+  
+  dt = sf::st_make_valid(pu)
+  
+  x = as.data.frame(which(st_intersects(dt, sparse = FALSE), arr.ind = T))
+  
+  vars = lapply(1:nrow(x), function(y){
+    A <- as.numeric(x[y, ])
+    A[order(A)]
+  })
+  
+  do.call('rbind', vars)[!duplicated(vars),] |>
+    data.frame() |>
+    setNames(c("PU1", "PU2")) |>
+    filter(PU1 != PU2) |>
+    mutate(PU1 = dt[[id]][PU1],
+           PU2 = dt[[id]][PU2]) 
+}
+
+find_file_path = function(VPU, files, new_dir){
+  tmp01 = grep(VPU, files, value = TRUE)
+  tmp02 = paste0(new_dir, "/", gsub("\\_.*", "", basename(tmp01)), ".gpkg")
+  
+  if(!file.exists(tmp02)){ file.copy(tmp01, tmp02) }
+  
+  tmp02
+}