Skip to content
Snippets Groups Projects
00_get_data.Rmd 15 KiB
Newer Older
  • Learn to ignore specific revisions
  • Blodgett, David L.'s avatar
    Blodgett, David L. committed
    ---
    title: "GFv2 Get Data"
    output: html_document
    ---
    
    This notebook pulls data from a number of sources and populates the GFv2 data directory. Any new data requirements should be added as code chunks here. 
    
    Each code chunk should create a path to the file you want to use in a process step, check if that path exists, and put the data there if it does not. All paths are stored in a list that is saved to the `cache` directory. If changes are made to the output of this notebook, they should be checked in.
    
    **If resources from ScienceBase need to be downloaded Rmarkdown document should be run from RStudio so username and password authentication will work**
    
    ```{r}
    
    Blodgett, David L.'s avatar
    Blodgett, David L. committed
    library(jsonlite)
    
    library(hyRefactor)
    
    Blodgett, David L.'s avatar
    Blodgett, David L. committed
    library(sf)
    
    
    if(!dir.exists("data")) {dir.create("data")}
    if(!dir.exists("bin")) {dir.create("bin")}
    
    Blodgett, David L.'s avatar
    Blodgett, David L. committed
    out_list <- list("data_dir" = data_dir)
    out_file <- file.path("cache", "data_paths.json")
    
    
    sevenz <- "7z"
    check_7z <- try(nhdplusTools:::check7z(), silent = TRUE)
    if(is(check_7z, "try-error")) {
      message("trying to download 7z -- it's not on your path")
      # Download command-line Z-zip
      if(!file.exists("bin/7za.exe")){
        download.file("https://www.7-zip.org/a/7za920.zip", destfile = "bin/7za920.zip")
        unzip("bin/7za920.zip", exdir = "bin")
      }
      sevenz <- "bin/7za.exe"
    }
    
    ```{r HUC12 outlets}
    # HUC12 outlets - derived by Dave Blodgetts code (in progress); currently hosted at GFv2 POI Source data page
    
    
    hu12_points_path <- file.path(data_dir, "hu_outlets.gpkg")
    
    if(!file.exists(hu12_points_path)) {
      if(is.null(sbtools::current_session()))
        sb <- authenticate_sb()
      
    
      sbtools::item_file_download("5dbc53d4e4b06957974eddae", names = "hu_outlets.gpkg", 
    
    Blodgett, David L.'s avatar
    Blodgett, David L. committed
    out_list <- c(out_list, list(hu12_points_path = hu12_points_path))
    ```
    
    
    ```{r GagesIII}
    # GagesIII - data release in progress by Chase and others, currently hosted at GFv2 POI Source data page
    
    
    gagesiii_points_path <- file.path(data_dir, "GAGESIII_gages")
    
    if(!file.exists(gagesiii_points_path)) {
      dir.create(gagesiii_points_path, recursive = TRUE, showWarnings = FALSE)
      
      if(is.null(sbtools::current_session()))
        authenticate_sb()
      
      g3z <- "GAGESIII_gages.zip"
      
      sbtools::item_file_download("5dbc53d4e4b06957974eddae", names = g3z, 
                                  destinations = file.path(gagesiii_points_path, g3z))
      
      unzip(file.path(gagesiii_points_path, g3z), exdir = gagesiii_points_path)
      
      rm(g3z)
    
      sbtools::item_file_download("5dcd5f96e4b069579760aedb", names = g3gf, 
                                  destinations = file.path(data_dir, g3gf))
    
    Blodgett, David L.'s avatar
    Blodgett, David L. committed
    out_list <- c(out_list, list(gagesiii_points_path = gagesiii_points_path))
    ```
    
    
    ```{r Thermoelectric Facilities}
    # Thermoelectric plants - data compiled by Melissa Harris and Amy Galanter (multiple sources), 
    # Currently hosted at GFv2 POI Source data page
    #-----------------------------------------------------
    #   Harris, Melissa A. and Diehl, Timothy H., 2017. A Comparison of Three Federal Datasets for Thermoelectric Water
    #   Withdrawals in the United States for 2010. Journal of the American Water Resources Association 
    #   (JAWRA) 53( 5): 1062– 1080. https://doi.org/10.1111/1752-1688.12551
    
    
    TE_points_path <- file.path(data_dir, "TE_points")
    
    if(!file.exists(TE_points_path)) {
      dir.create(TE_points_path, recursive = TRUE, showWarnings = FALSE)
      
      if(is.null(sbtools::current_session()))
        authenticate_sb()
      
      TEz <- "2015_TE_Model_Estimates_lat.long_COMIDs.7z"
      
      sbtools::item_file_download("5dbc53d4e4b06957974eddae", names = TEz, 
                                  destinations = file.path(TE_points_path, TEz))
      
    
      system(paste0(sevenz, " e -o", TE_points_path, " ", file.path(TE_points_path, TEz)))
    
      
      rm(TEz)
    }
    
    out_list <- c(out_list, list(TE_points_path = TE_points_path))
    ```
    
    
    ```{r Interbasin transfers and diversions - USGS}
    # USGS IT and diversion points on NHDPlus v2 network -
    #------------------------------------------------------
    #   Schwarz, G.E., 2019, E2NHDPlusV2_us: Database of Ancillary Hydrologic Attributes and Modified Routing for 
    #   NHDPlus Version 2.1 Flowlines: U.S. Geological Survey data release, https://doi.org/10.5066/P986KZEM.
    
    
    USGS_IT_path <- file.path(data_dir, "USGS_IT")
    
    if(!file.exists(USGS_IT_path)) {
      dir.create(USGS_IT_path, recursive = TRUE, showWarnings = FALSE)
      
      if(is.null(sbtools::current_session()))
        authenticate_sb()
      
      ITzip <- "supplemental_files.zip"
      
      sbtools::item_file_download("5d16509ee4b0941bde5d8ffe", names = ITzip, 
                                  destinations = file.path(USGS_IT_path, ITzip))
      
      unzip(file.path(USGS_IT_path, ITzip), exdir = USGS_IT_path)
    
    Bock, Andy's avatar
    Bock, Andy committed
      
      rm(ITzip)
    
    }
    
    out_list <- c(out_list, list(USGS_IT_path = USGS_IT_path))
    ```
    
    
    ```{r Interbasin transfers - KSU}
    # KSU IT Facilities - data compiled by Kerim Dickson (Kansas State University)
    #-----------------------------------------------------------------------------
    #   Dickson, Kerim E. and Dzombak, David A., 2017. Inventory of Interbasin Transfers in the United States. 
    #   Journal of the #   American Water Resources Association (JAWRA) 53( 5): 1121‐ 1132.
    #   https://doi.org/10.1111/1752-1688.12561
    
    
    KSU_IT_path <- file.path(data_dir, "KSU_IT")
    
    if(!file.exists(KSU_IT_path)) {
      dir.create(KSU_IT_path, recursive = TRUE, showWarnings = FALSE)
      
      if(is.null(sbtools::current_session()))
        authenticate_sb()
      
      IT <- "IBT_Database.xlsx"
      
      sbtools::item_file_download("5dbc53d4e4b06957974eddae", names = IT, 
                                  destinations = file.path(KSU_IT_path, IT))
    
    Bock, Andy's avatar
    Bock, Andy committed
      
      rm(IT)
    
    }
    
    out_list <- c(out_list, list(KSU_IT_path = KSU_IT_path))
    ```
    
    
    ```{r National Inventory of Dams}
    # National Inventory of Dams data coupled with NHDPlus COMIDs
    #-----------------------------------------------------------------------------
    #    Wieczorek, M.E., Jackson, S.E., and Schwarz, G.E., 2018, Select Attributes for NHDPlus Version 2.1 Reach Catchments 
    #    and Modified Network Routed Upstream Watersheds for the Conterminous United States (ver. 2.0, November 2019): 
    #    U.S. Geological Survey data release, https://doi.org/10.5066/F7765D7V.
    #
    #    https://nid.sec.usace.army.mil/ords/f?p=105%3A1%3A%3A%3A%3A%3A%3A
    
    
    NID_points_path <- file.path(data_dir, "NID_points")
    
    if(!file.exists(NID_points_path)) {
      dir.create(NID_points_path, recursive = TRUE, showWarnings = FALSE)
      
      if(is.null(sbtools::current_session()))
        authenticate_sb()
      
      NIDtxt <- "NID_attributes_20170612.txt"
      
      sbtools::item_file_download("5dbc53d4e4b06957974eddae", names = NIDtxt, 
                                  destinations = file.path(NID_points_path, NIDtxt))
    
      
      NIDsnap <- "NAWQA_NID_snap.rds"
      
      sbtools::item_file_download("5dbc53d4e4b06957974eddae", names = NIDsnap, 
                                  destinations = file.path(NID_points_path, NIDsnap))
    
    Bock, Andy's avatar
    Bock, Andy committed
      
      rm(NIDtxt, NIDsnap)
    
    }
    
    out_list <- c(out_list, list(NID_points_path = NID_points_path))
    ```
    
    
    ```{r VPU outlets}
    # VPU Outlets for subsetting - Derived by Team Spatial for subsetting VPUs (hydrologic regions)
    
    VPU_outlets <- file.path(data_dir, "RegOutlets.json")
    
    if(!file.exists(VPU_outlets)) {
      
      if(is.null(sbtools::current_session()))
        authenticate_sb()
      
      VPU_out <- "RegOutlets.json"
      
      sbtools::item_file_download("5dbc53d4e4b06957974eddae", names = VPU_out, 
                                  destinations = file.path(VPU_outlets))
    
    Bock, Andy's avatar
    Bock, Andy committed
      
      rm(VPU_out)
    
    out_list <- c(out_list, list(VPU_outlets = VPU_outlets))
    ```
    
    ```{r NHDPlusV2}
    # NHDPlus Seamless National Data -  pulled from NHDPlus national data server; post-processed to RDS files by NHDPlusTools
    # GageLoc - Gages snapped to NHDPlusv2 flowlines (QAQC not verified)
    # NHDPlus HUC12 crosswalk 
    #------------------------
    #    Moore, R.B., Johnston, C.M., and Hayes, L., 2019, Crosswalk Table Between NHDPlus V2.1 and 
    #    its Accompanying WBD Snapshot of 12-Digit Hydrologic Units: U.S. Geological Survey data release, 
    #    https://doi.org/10.5066/P9CFXHGT.
    
    nhdplus_dir <- file.path(data_dir, "NHDPlusNationalData")
    nhdplus_gdb <- file.path(data_dir, "NHDPlusNationalData/NHDPlusV21_National_Seamless_Flattened_Lower48.gdb")
    
    
    islands_dir <- file.path(data_dir, "islands")
    islands_gdb <- file.path(islands_dir, "NHDPlusNationalData/NHDPlusV21_National_Seamless_Flattened_HI_PR_VI_PI.gdb/")
    
    
    
    if(!file.exists(nhdplus_dir)) {
      message("downloading NHDPlus...")
      
    
    Bock, Andy's avatar
    Bock, Andy committed
      dir.create(nhdplus_dir, recursive = TRUE, showWarnings = FALSE)
      
      if(is.null(sbtools::current_session()))
        authenticate_sb()
      
    
      gLz <- "NHDPlusV21_NationalData_GageLoc_05.7z"
      
      sbtools::item_file_download("5dbc53d4e4b06957974eddae", names = gLz, 
                                  destinations = file.path(nhdplus_dir, gLz))
    
      system(paste0(sevenz, " e -o", nhdplus_dir, " ", file.path(nhdplus_dir, gLz)))
    
      xWalk <- "CrosswalkTable_NHDplus_HU12_CSV.7z"
    
      sbtools::item_file_download("5c86a747e4b09388244b3da1", names = xWalk, 
                                  destinations = file.path(nhdplus_dir, xWalk))
    
      system(paste0(sevenz, nhdplus_dir, " ", file.path(nhdplus_dir, xWalk)))
    
    Bock, Andy's avatar
    Bock, Andy committed
      x <- tryCatch(
        download_nhdplusv2(data_dir),
        # Quiet the download, overwrite existing files
        error =  function(e)
    
        {system(paste0(sevenz, " x ", file.path(data_dir, "NHDPlusV21_NationalData_Seamless_Geodatabase_Lower48_07.7z")
    
    Bock, Andy's avatar
    Bock, Andy committed
                         , " -o", data_dir), ignore.stdout = T)}
      )
    
    
      nhdplus_path(nhdplus_gdb)
    
      suppressWarnings(staged_nhdplus <- stage_national_data())
    
    
      x <- tryCatch(
        download_nhdplusv2(islands_dir, "https://s3.amazonaws.com/edap-nhdplus/NHDPlusV21/Data/NationalData/NHDPlusV21_NationalData_Seamless_Geodatabase_HI_PR_VI_PI_03.7z"),
        # Quiet the download, overwrite existing files
        error =  function(e)
        {system(paste0(sevenz, " x ", file.path(islands_dir, "NHDPlusV21_NationalData_Seamless_Geodatabase_HI_PR_VI_PI_03.7z")
                         , " -o", data_dir), ignore.stdout = T)}
      )
      
      nhdplus_path(islands_gdb)
    
      suppressWarnings(staged_nhdplus_islands <- stage_national_data())
    
    Bock, Andy's avatar
    Bock, Andy committed
      
      rm(gLz, xWalk)
    
      HUC12 <- read_sf(data_paths$nhdplus_gdb, layer = "HUC12") %>% st_make_valid() %>% st_transform(., crs = 5070)
    
    
      saveRDS(HUC12, file = file.path(nhdplus_dir, "HUC12.rds"))
    
     
      gagelocgf <- "GageLocGFinfo.dbf"
    
      sbtools::item_file_download("5dcd5f96e4b069579760aedb", names = gagelocgf, 
                                  destinations = file.path(data_dir, gagelocgf))
    
    out_list <- c(out_list, list(nhdplus_dir = nhdplus_dir, nhdplus_gdb = nhdplus_gdb, islands_dir = islands_dir, islands_gdb = islands_gdb))
    
    ```{r NHDPlusV2 Waterbodies}
    # Waterbodies - derived after downloading and post-processing NHDPlus Seamless National Geodatabase
    
    
    waterbodies_path <- file.path(nhdplus_dir, "nhdplus_waterbodies.rds")
    
    if(!file.exists(waterbodies_path)) {
      message("formatting NHDPlus watebodies...")
    
      nhdplus_path(nhdplus_gdb)
    
      # Read the feature class
    
      fc <- read_sf(data_paths$nhdplus_gdb, "NHDWaterbody") %>% st_as_sf() %>% st_transform(., crs = 5070)
    
      # Convert to simple feature and save out
    
      wbSF <- st_as_sf(fc)
    
      saveRDS(wbSF, waterbodies_path)
    
    }
    
    out_list <- c(out_list, list(waterbodies_path = waterbodies_path))
    ```
    
    ```{r NHDPlusV2 FDR_FAC}
    
    # NHDPlus FDR/FAC grids available by raster processing unit
    
    fdr_fac_dir <- file.path(data_dir, "fdrfac")
    dir.create(fdr_fac_dir, recursive = TRUE, showWarnings = FALSE)
    # Will need to make this work for all the FdrFac data -- maybe just shell out to wget.
    
    download_fdr_fac(fdr_fac_dir)
    
    
    dirs <- unique(dirname(list.files(fdr_fac_dir, recursive = TRUE, full.names = TRUE)))
    fdr <- dirs[grepl(".*/fdr$", dirs)]
    fac <- dirs[grepl(".*/fac$", dirs)]
    
    
    out <- list(fdr = list(), fac = list())
    
    
    rpu <- substr(fdr, (nchar(fdr) - 6), (nchar(fdr) - 4))
    
    out$fdr <- as.list(setNames(fdr, paste0("rpu_", rpu)))
    out$fac <- as.list(setNames(fac, paste0("rpu_", rpu)))
    
    
    out_list<- c(out_list, out)
    ```
    
    
    ```{r WBD}
    # Current version of National WBD
    
    
    wbd_dir <- file.path(data_dir, "wbd")
    
    if(!dir.exists(wbd_dir)) {
      dir.create(wbd_dir, recursive = TRUE)
    
      wbd <- download_wbd(wbd_dir, "https://prd-tnm.s3.amazonaws.com/StagedProducts/Hydrography/WBD/National/GDB/WBD_National_GDB.zip")
    
    wbd_rds <- file.path(wbd_dir, "WBD.rds")
    out <- list(latest_wbd = file.path(wbd_dir, list.files(wbd_dir, pattern = "*.gdb")))
    
    if(!file.exists(wbd_rds)) {
    
      # Read the feature class
    
      wbdfc <- sf::read_sf(file.path(wbd_dir, 
                                     out$latest_wbd),
                           "WBDHU12") %>% 
        st_as_sf() %>% 
        st_transform(crs = 5070)
      
      saveRDS(wbdfc, wbd_rds)
    }
    
    out_rds <- list(latest_wbd_rds = wbd_rds)
    
    out_list <- c(out_list, out, out_rds)
    
    ```{r MERIT HydroDEM}
    # MERIT HydroDEM - used for AK Geospatial Fabric, and potentially Mexico portion of R13
    #-----------------------------------------------------------------------------
    #    Yamazaki, D., Ikeshima, D., Sosa, J., Bates, P. D., Allen, G. H., & Pavelsky, T. M. ( 2019). 
    #    MERIT Hydro: a high‐resolution global hydrography map based on latest topography dataset.
    #    Water Resources Research, 55, 5053– 5073. https://doi.org/10.1029/2019WR024873
    
    
    Blodgett, David L.'s avatar
    Blodgett, David L. committed
    merit_dir <- file.path(data_dir, "merged_AK_MERIT_Hydro")
    
    if(!dir.exists(merit_dir)) {
    
      dir.create(merit_dir, recursive = TRUE, showWarnings = FALSE)
      
      if(is.null(sbtools::current_session()))
        authenticate_sb()
      
      m <- "merged_AK_MERIT_Hydro.zip"
      
      sbtools::item_file_download("5dbc53d4e4b06957974eddae", names = m, 
                                  destinations = file.path(data_dir, m))
      
      unzip(file.path(data_dir, m), exdir = data_dir)
      
      rm(m)
    
    Blodgett, David L.'s avatar
    Blodgett, David L. committed
    } else {
      out <- list(merit_catchments = file.path(merit_dir, 
                                               "cat_pfaf_78_81_82_MERIT_Hydro_v07_Basins_v01.shp"),
                  merit_rivers = file.path(merit_dir, 
                                           "riv_pfaf_78_81_82_MERIT_Hydro_v07_Basins_v01.shp"))
      out_list <- c(out_list, out)
    }
    ```
    
    
    ```{r e2nhd}
    
    zip_file <- "e2nhdplusv2_us_csv.zip"
    out_zip <- file.path(data_dir, zip_file)
    
    out <- list(e2nhd_network = file.path(data_dir, "e2nhdplusv2_us.csv"))
    
    if(!file.exists(out$e2nhd_network)) {
    
    sbtools::item_file_download("5d16509ee4b0941bde5d8ffe",
                                names = zip_file,
                                destinations = out_zip)
    
    
    zip::unzip(out, exdir = data_dir)
    
    }
    
    out_list <- c(out_list, out)
    
    ```
    
    ```{r nwm_topology}
    
    out <- list(nwm_network = file.path(data_dir, (sb_f <- "NWM_v2_1_CONUS_Topology.csv")))
    
    if(!file.exists(out$nwm_network)) {
    
    sbtools::item_file_download("5dbc53d4e4b06957974eddae",
                                names = sb_f,
                                destinations = out)
    }
    
    out_list <- c(out_list, out)
    
    ```
    
    
    ```{r nhdplus_attributes}
    
    out <- list(new_nhdp_atts = file.path("cache", (sb_f <- "enhd_nhdplusatts.csv")))
    
    if(!file.exists(out$new_nhdp_atts)) {
      sbtools::item_file_download("5dcd5f96e4b069579760aedb",
                                  names = sb_f,
                                  destinations = out)
    }
    
    out_list <- c(out_list, out)
    ```
    
    
    Blodgett, David L.'s avatar
    Blodgett, David L. committed
    ```{r}
    write_json(out_list, path = out_file, pretty = TRUE, auto_unbox = TRUE)
    
    Blodgett, David L.'s avatar
    Blodgett, David L. committed
    ```