diff --git a/workspace/00_get_data.Rmd b/workspace/00_get_data.Rmd
index 9c8b496866baf5e911b85c2dbf3a936dfb9e1247..0e5779524ea69edae35573d10786450417e3a563 100644
--- a/workspace/00_get_data.Rmd
+++ b/workspace/00_get_data.Rmd
@@ -110,11 +110,11 @@ dir.create(TE_points_path, recursive = TRUE, showWarnings = FALSE)
 
 get_sb_file("5dbc53d4e4b06957974eddae", 
             "2015_TE_Model_Estimates_lat.long_COMIDs.7z",
-            TE_points_path, unzip = TRUE)
+            TE_points_path)
 
 get_sb_file("63adc826d34e92aad3ca5af4", 
             "galanter_and_others_2023.zip",
-            TE_points_path, unzip = TRUE)
+            TE_points_path)
 
 out_list <- c(out_list, list(TE_points_path = TE_points_path))
 
@@ -132,22 +132,12 @@ water use withdrawals.
 #   Attributes and Modified Routing for NHDPlus Version 2.1 Flowlines: U.S. 
 #   Geological Survey data release, https://doi.org/10.5066/P986KZEM.
 
-USGS_IT_path <- file.path(data_dir, "USGS_IT")
-
-if(!file.exists(USGS_IT_path)) {
-  dir.create(USGS_IT_path, recursive = TRUE, showWarnings = FALSE)
-  
-  ITzip <- "supplemental_files.zip"
-  check_auth()
-  sbtools::item_file_download("5d16509ee4b0941bde5d8ffe", names = ITzip, 
-                              destinations = file.path(USGS_IT_path, ITzip))
-  
-  unzip(file.path(USGS_IT_path, ITzip), exdir = USGS_IT_path)
-  
-  rm(ITzip)
-}
-
-out_list <- c(out_list, list(USGS_IT_path = USGS_IT_path))
+out_list <- c(
+  out_list, 
+  list(USGS_IT_path = 
+         get_sb_file("5d16509ee4b0941bde5d8ffe", 
+                     "supplemental_files.zip",
+                     file.path(data_dir, "USGS_IT"))))
 ```
 
 Two datasets relate hydro location information from the National Inventory of
@@ -169,29 +159,16 @@ natural flow (Wieczorek and others, 2021).
 
 NID_points_path <- file.path(data_dir, "NID_points")
 
-if(!file.exists(NID_points_path)) {
-  dir.create(NID_points_path, recursive = TRUE, showWarnings = FALSE)
-  
-  NIDtxt <- "NID_attributes_20170612.txt"
-  check_auth()
-  sbtools::item_file_download("5dbc53d4e4b06957974eddae", names = NIDtxt, 
-                              destinations = file.path(NID_points_path, 
-                                                       NIDtxt))
-  
-  NIDzip <- "Final_NID_2018.zip"
-  check_auth()
-  sbtools::item_file_download("5fb7e483d34eb413d5e14873", names = NIDzip, 
-                              destinations = file.path(NID_points_path, 
-                                                       NIDzip))
+get_sb_file("5dbc53d4e4b06957974eddae",
+            "NID_attributes_20170612.txt",
+            NID_points_path)
 
-   unzip(file.path(NID_points_path, NIDzip), files = NULL, 
-         exdir = NID_points_path)
-  
-  
-  rm(NIDtxt, NIDzip)
-}
+get_sb_file("5fb7e483d34eb413d5e14873",
+            "Final_NID_2018.zip",
+            NID_points_path)
 
 out_list <- c(out_list, list(NID_points_path = NID_points_path))
+
 if(mapview)(mapview(read_sf(out_list$NID_points_path)))
 ```
 
@@ -213,83 +190,43 @@ efforts. These include:
 #    Units: U.S. Geological Survey data release, 
 #    https://doi.org/10.5066/P9CFXHGT.
 
+epa_data_root <- "https://dmap-data-commons-ow.s3.amazonaws.com/NHDPlusV21/Data/"
+
 nhdplus_dir <- file.path(data_dir, "NHDPlusNationalData")
-nhdplus_gdb <- file.path(data_dir, 
-                         "NHDPlusNationalData/NHDPlusV21_National_Seamless_Flattened_Lower48.gdb")
+nhdplus_gdb <- file.path(nhdplus_dir, "NHDPlusV21_National_Seamless_Flattened_Lower48.gdb")
 
 islands_dir <- file.path(data_dir, "islands")
-islands_gdb <- file.path(islands_dir, 
-                         "NHDPlusNationalData/NHDPlusV21_National_Seamless_Flattened_HI_PR_VI_PI.gdb/")
+islands_gdb <- file.path(islands_dir, "NHDPlusNationalData/NHDPlusV21_National_Seamless_Flattened_HI_PR_VI_PI.gdb/")
 
 rpu <- file.path(nhdplus_dir, "NHDPlusGlobalData", "BoundaryUnit.shp")
 
-if(!file.exists(nhdplus_dir)) {
-  message("downloading NHDPlus...")
-  
-  dir.create(nhdplus_dir, recursive = TRUE, showWarnings = FALSE)
-  dir.create(islands_dir, recursive = TRUE, showWarnings = FALSE)
+get_sb_file("5dbc53d4e4b06957974eddae", "NHDPlusV21_NationalData_GageLoc_05.7z", nhdplus_dir)
 
-  gLz <- "NHDPlusV21_NationalData_GageLoc_05.7z"
-  check_auth()
-  sbtools::item_file_download("5dbc53d4e4b06957974eddae", names = gLz, 
-                              destinations = file.path(nhdplus_dir, gLz))
-  
-  system(paste0(sevenz, " e -o", nhdplus_dir, " ", 
-                file.path(nhdplus_dir, gLz)))
-  
-  xWalk <- "CrosswalkTable_NHDplus_HU12_CSV.7z"
-  
-  sbtools::item_file_download("5c86a747e4b09388244b3da1", names = xWalk, 
-                              destinations = file.path(nhdplus_dir, xWalk))
-  check_auth()
-  system(paste0(sevenz, " e -o", nhdplus_dir, " ", 
-                file.path(nhdplus_dir, xWalk)))
-  
-  x <- tryCatch(
-    download_nhdplusv2(data_dir, url = "https://dmap-data-commons-ow.s3.amazonaws.com/NHDPlusV21/Data/NationalData/NHDPlusV21_NationalData_Seamless_Geodatabase_Lower48_07.7z"),
-    # Quiet the download, overwrite existing files
-    error =  function(e)
-  {system(paste0(sevenz, " x ", file.path(data_dir, 
-                                          "NHDPlusV21_NationalData_Seamless_Geodatabase_Lower48_07.7z")
-                       , " -o", data_dir), ignore.stdout = T)}
-  )
-
-  x <- tryCatch(
-    download_nhdplusv2(islands_dir,  
-                       url = paste0("https://dmap-data-commons-ow.s3.amazonaws.com/NHDPlusV21/",
-               "Data/NationalData/NHDPlusV21_NationalData_Seamless", 
-               "_Geodatabase_HI_PR_VI_PI_03.7z")),
-    
-    # Quiet the download, overwrite existing files
-    error =  function(e)
-    {system(paste0(sevenz, " x ", file.path(islands_dir, "NHDPlusV21_NationalData_Seamless_Geodatabase_HI_PR_VI_PI_03.7z")
-                     , " -o", islands_dir), ignore.stdout = T)}
-  )
-  
-  rm(gLz, xWalk)
-  
-  HUC12 <- read_sf(nhdplus_gdb, layer = "HUC12") %>% 
-    st_make_valid() %>% 
-    st_transform(., crs = proj_crs)
+get_sb_file("5c86a747e4b09388244b3da1", "CrosswalkTable_NHDplus_HU12_CSV.7z", nhdplus_dir)
 
-  saveRDS(HUC12, file = file.path(nhdplus_dir, "HUC12.rds"))
- 
-  gagelocgf <- "GageLocGFinfo.dbf"
-  check_auth()
-  sbtools::item_file_download("5dcd5f96e4b069579760aedb", names = gagelocgf, 
-                              destinations = file.path(data_dir, gagelocgf))
-  
-  u <- "http://www.horizon-systems.com/NHDPlusData/NHDPlusV21/Data/GlobalData/NHDPlusV21_NHDPlusGlobalData_03.7z"
-  
-  out_f <-  file.path(nhdplus_dir, basename(u))
-  
-  download.file(u, destfile = out_f, mode = "wb")
-  
-  system(paste0(sevenz,  " x ", out_f, " -o", nhdplus_dir), 
-         ignore.stdout = TRUE, intern = TRUE)
-   
+# will download the 7z and unzip into the folder structure in nhdplus_gdb path
+download_file(paste0(epa_data_root, "NationalData/NHDPlusV21_NationalData_Seamless_Geodatabase_Lower48_07.7z"),
+              out_path = data_dir, check_path = nhdplus_gdb)
+
+download_file(paste0(epa_data_root, "NationalData/NHDPlusV21_NationalData_Seamless_Geodatabase_HI_PR_VI_PI_03.7z"),
+              out_path = islands_dir, check_path = islands_gdb)
+
+# cache the huc12 layer in rds format
+hu12_rds <- file.path(nhdplus_dir, "HUC12.rds")
+
+if(!file.exists(hu12_rds)) {
+  read_sf(nhdplus_gdb, layer = "HUC12") |>
+    st_make_valid() |>
+    st_transform(crs = proj_crs) |> 
+    # TODO: convert this to gpkg
+    saveRDS(file = hu12_rds)
 }
 
+get_sb_file("5dcd5f96e4b069579760aedb", "GageLocGFinfo.dbf", data_dir)
+
+download_file(paste0(epa_data_root, "GlobalData/NHDPlusV21_NHDPlusGlobalData_03.7z"),
+              out_path = nhdplus_dir, check_path = rpu)
+
 out_list <- c(out_list, list(nhdplus_dir = nhdplus_dir, 
                              nhdplus_gdb = nhdplus_gdb, 
                              islands_dir = islands_dir, 
@@ -314,64 +251,22 @@ ref_cat <- file.path(ref_fab_path, "reference_catchments.gpkg")
 ref_fl <- file.path(ref_fab_path, "reference_flowline.gpkg")
 nwm_fl <- file.path(ref_fab_path, "nwm_network.gpkg")
 
-if(!dir.exists(ref_fab_path)) {
-  dir.create(ref_fab_path, recursive = TRUE, showWarnings = FALSE)
-}
-
-if(!file.exists(ref_cat)){
-  possible_vpu <- c("01", "08", "10L", "15", "02", "04", "05", "06", "07", "09", 
-                  "03S", "03W", "03N", "10U", "11", "12", "13", "14",  "16", 
-                  "17", "18")
-
-  for (vpu in possible_vpu){
-    vpu_gpkg <- paste0(vpu, "_reference_features.gpkg")
-    check_auth()
-    sbtools::item_file_download("6317a72cd34e36012efa4e8a", names = vpu_gpkg, 
-                                destinations = file.path(ref_fab_path, vpu_gpkg))
-  }
-  check_auth()
-  sbtools::item_file_download("61295190d34e40dd9c06bcd7", 
-                              names = "reference_catchments.gpkg", 
-                              destinations = ref_cat)
-  check_auth()
-  sbtools::item_file_download("61295190d34e40dd9c06bcd7", 
-                              names = "reference_flowline.gpkg", 
-                              destinations = ref_fl)
-  check_auth()
-  sbtools::item_file_download("61295190d34e40dd9c06bcd7", 
-                              names = "nwm_network.gpkg",
-                              destinations = nwm_fl)
+for (vpu in c("01", "08", "10L", "15", "02", "04", "05", "06", "07", "09", 
+              "03S", "03W", "03N", "10U", "11", "12", "13", "14",  "16", 
+              "17", "18")) {
+  
+  get_sb_file("6317a72cd34e36012efa4e8a", 
+              paste0(vpu, "_reference_features.gpkg"), 
+              ref_fab_path)
 }
 
+get_sb_file("61295190d34e40dd9c06bcd7",
+            c("reference_catchments.gpkg", "reference_flowline.gpkg", "nwm_network.gpkg"), 
+            out_destination = ref_fab_path)
 
-out_list <- c(out_list, list(ref_fab_path = ref_fab_path, ref_cat = ref_cat, 
-                             ref_fl = ref_fl, nwm_fl = nwm_fl))
-```
-
-NHDPlus Value-added attributes used to characterize the network and get 
-information such as velocity and mean streamflow.
-
-```{r NHDPlusV2 VAA}
-#  Waterbodies - derived after downloading and post-processing 
-#  NHDPlus Seamless National Geodatabase
-#  Compacted here into a GDB
-VAA_fst_path <- file.path(nhdplus_dir, "nhdplusVAA.fst")
-VAA_rds_path <- file.path(nhdplus_dir, "nhd_vaa.rds")
-#TODO pull attributes needed for threshold POIs
-
-if(!file.exists(VAA_rds_path)) {
-  message("downloading NHDPlusVAA...")
 
-  download_vaa(VAA_fst_path)
-  
-  # Extract other attributes
-  nhdpv2_vaa <- read_sf(nhdplus_gdb, "NHDFlowline_Network") %>%
-    st_drop_geometry()
-  
-  saveRDS(nhdpv2_vaa, file.path(nhdplus_dir, "nhd_vaa.rds"))
-}
-
-out_list <- c(out_list, list(VAA_fst = VAA_fst_path, VAA_rds = VAA_rds_path))
+out_list <- c(out_list, list(ref_fab_path = ref_fab_path, 
+                             ref_cat = ref_cat, ref_fl = ref_fl, nwm_fl = nwm_fl))
 ```
 
 NHDPlus Waterbody and Area Polygons converted to an RDS file for easier 
@@ -385,21 +280,17 @@ waterbodies_path <- file.path(nhdplus_dir, "nhdplus_waterbodies.rds")
 
 if(!file.exists(waterbodies_path)) {
   message("formatting NHDPlus watebodies...")
-
-  # Read the feature class
-  wb_sf <- read_sf(nhdplus_gdb, "NHDWaterbody") %>%
-    st_transform(proj_crs) %>%
-    mutate(layer = "NHDWaterbody")
-  
-  wbarea_sf <- read_sf(nhdplus_gdb, "NHDArea") %>%
-    st_transform(proj_crs) %>%
-    mutate(layer = "NHDArea")
   
-  wb_fc <- data.table::rbindlist(list(wb_sf, wbarea_sf), fill = TRUE) %>%
-    st_as_sf()
+  data.table::rbindlist(list(
+    read_sf(out_list$nhdplus_gdb, "NHDWaterbody") |>
+      st_transform(proj_crs) |>
+      mutate(layer = "NHDWaterbody"), 
+    read_sf(out_list$nhdplus_gdb, "NHDArea") |>
+      st_transform(proj_crs) |>
+      mutate(layer = "NHDArea")), fill = TRUE) |>
+    st_as_sf() |>
+    saveRDS(waterbodies_path)
   
-  # Convert to simple feature and save out
-  saveRDS(wb_fc, waterbodies_path)
 }
 
 out_list <- c(out_list, list(waterbodies_path = waterbodies_path))
diff --git a/workspace/R/1_get_data.R b/workspace/R/1_get_data.R
index 19dd825c09acd180855122093277737325701211..53e00d97a4ab2ddfb75186537546499fd1e28da5 100644
--- a/workspace/R/1_get_data.R
+++ b/workspace/R/1_get_data.R
@@ -12,6 +12,8 @@ get_sb_file <- function(item, item_files, out_destination, unzip = TRUE) {
     item_files <- sbtools::item_list_files(item)$fname
   }
   
+  if(!dir.exists(out_destination)) dir.create(out_destination, showWarnings = FALSE, recursive = TRUE)
+  
   out_files <- file.path(out_destination, item_files)
   
   missing <- out_files[!file.exists(out_files)]
@@ -24,14 +26,40 @@ get_sb_file <- function(item, item_files, out_destination, unzip = TRUE) {
   
   if(unzip) {
     
-    un7zip_fs <- missing[grepl("7z$", missing)]
-    
+    un7zip_fs <- missing[grepl("7z$|zip$", missing)]
+
     for(f in un7zip_fs) {
       system(paste0(sevenz, " e -o", out_destination, " ", f))
     }
     
   }
   
-  out_destination
+  invisible(out_destination)
   
 }
+
+#' download a file
+#' @param url character url where the file can be retrieved
+#' @param out_path character path the directory to save the downloaded file
+#' @param check_path character path to check that should exist when done
+#' @param unzip logical whether to try to unzip the file
+download_file <- function(url, out_path, check_path = NULL, unzip = TRUE) {
+
+  if(file.exists(check_path)) return(invisible(check_path))
+
+  dir.create(out_path, showWarnings = FALSE, recursive = TRUE)
+  
+  out <- file.path(out_path, basename(url))
+  
+  if(!file.exists(out)) {
+    httr::RETRY("GET", url, httr::write_disk(out))
+  
+    if(unzip & grepl("7z$|zip$", out))
+      system(paste0(sevenz, " x ", out, " -o", out_path))
+    
+  }
+  
+  if(file.exists(check_path)) return(invisible(check_path))
+  
+  stop("did not create ", check_file)
+}
\ No newline at end of file