Skip to content
Snippets Groups Projects
get_data.Rmd 4.52 KiB
Newer Older
  • Learn to ignore specific revisions
  • Blodgett, David L.'s avatar
    Blodgett, David L. committed
    ---
    title: "GFv2 Get Data"
    output: html_document
    ---
    
    This notebook pulls data from a number of sources and populates the GFv2 data directory. Any new data requirements should be added as code chunks here. 
    
    Each code chunk should create a path to the file you want to use in a process step, check if that path exists, and put the data there if it does not. All paths are stored in a list that is saved to the `cache` directory. If changes are made to the output of this notebook, they should be checked in.
    
    **If resources from ScienceBase need to be downloaded Rmarkdown document should be run from RStudio so username and password authentication will work**
    
    ```{r}
    
    Blodgett, David L.'s avatar
    Blodgett, David L. committed
    library(jsonlite)
    
    library(hyRefactor)
    
    Blodgett, David L.'s avatar
    Blodgett, David L. committed
    out_list <- list("data_dir" = data_dir)
    out_file <- file.path("cache", "data_paths.json")
    ```
    
    
    Blodgett, David L.'s avatar
    Blodgett, David L. committed
    
    ```{r}
    
    hu12_points_path <- file.path(data_dir, "hu_points.gpkg")
    
    if(!file.exists(hu12_points_path)) {
      if(is.null(sbtools::current_session()))
        sb <- authenticate_sb()
      
      sbtools::item_file_download("5dbc53d4e4b06957974eddae", names = "hu_points.gpkg", 
                                  destinations = hu12_points_path, session = sb)
    }
    
    
    Blodgett, David L.'s avatar
    Blodgett, David L. committed
    out_list <- c(out_list, list(hu12_points_path = hu12_points_path))
    ```
    
    ```{r}
    
    gagesiii_points_path <- file.path(data_dir, "GAGESIII_gages")
    
    if(!file.exists(gagesiii_points_path)) {
      dir.create(gagesiii_points_path, recursive = TRUE, showWarnings = FALSE)
      
      if(is.null(sbtools::current_session()))
        authenticate_sb()
      
      g3z <- "GAGESIII_gages.zip"
      
      sbtools::item_file_download("5dbc53d4e4b06957974eddae", names = g3z, 
                                  destinations = file.path(gagesiii_points_path, g3z))
      
      unzip(file.path(gagesiii_points_path, g3z), exdir = gagesiii_points_path)
      
      rm(g3z)
    }
    
    
    Blodgett, David L.'s avatar
    Blodgett, David L. committed
    out_list <- c(out_list, list(gagesiii_points_path = gagesiii_points_path))
    ```
    
    
    ```{r}
    TE_points_path <- file.path(data_dir, "TE_points")
    
    if(!file.exists(TE_points_path)) {
      dir.create(TE_points_path, recursive = TRUE, showWarnings = FALSE)
      
      if(is.null(sbtools::current_session()))
        authenticate_sb()
      
      TEz <- "2015_TE_Model_Estimates_lat.long_COMIDs.7z"
      
      sbtools::item_file_download("5dbc53d4e4b06957974eddae", names = TEz, 
                                  destinations = file.path(TE_points_path, TEz))
      
      unzip(file.path(TE_points_path, TEz), exdir = TE_points_path)
      
      rm(TEz)
    }
    
    out_list <- c(out_list, list(TE_points_path = TE_points_path))
    ```
    
    
    Blodgett, David L.'s avatar
    Blodgett, David L. committed
    ```{r}
    
    nhdplus_dir <- file.path(data_dir, "NHDPlusNationalData")
    
    nhdplus_gdb <- file.path(data_dir, "NHDPlusNationalData/NHDPlusV21_National_Seamless_Flattened_Lower48.gdb")
    
    if(!file.exists(nhdplus_dir)) {
      message("downloading NHDPlus...")
    
    suppressMessages(nhdplus_gdb <- download_nhdplusv2(data_dir))
    
    
    nhdplus_path(nhdplus_gdb)
    
    suppressWarnings(staged_nhdplus <- stage_national_data())
    
    out_list <- c(out_list, list(nhdplus_dir = nhdplus_dir, nhdplus_gdb = nhdplus_gdb))
    
    ```{r}
    fdr_fac_dir <- file.path(data_dir, "fdrfac")
    dir.create(fdr_fac_dir, recursive = TRUE, showWarnings = FALSE)
    # Will need to make this work for all the FdrFac data -- maybe just shell out to wget.
    
    
    download_fdr_fac(fdr_fac_dir, regions = c("01", "06", "03"))
    
    dirs <- unique(dirname(list.files(fdr_fac_dir, recursive = TRUE, full.names = TRUE)))
    fdr <- dirs[grepl(".*/fdr$", dirs)]
    fac <- dirs[grepl(".*/fac$", dirs)]
    
    
    out <- list(fdr = list(), fac = list())
    
    
    rpu <- substr(fdr, (nchar(fdr) - 6), (nchar(fdr) - 4))
    
    out$fdr <- as.list(setNames(fdr, paste0("rpu_", rpu)))
    out$fac <- as.list(setNames(fac, paste0("rpu_", rpu)))
    
    
    out_list<- c(out_list, out)
    ```
    
    
    Blodgett, David L.'s avatar
    Blodgett, David L. committed
    ```{r}
    merit_dir <- file.path(data_dir, "merged_AK_MERIT_Hydro")
    
    if(!dir.exists(merit_dir)) {
      message("Talk to John Hammond to get the merged AK MERIT data.")
    } else {
      out <- list(merit_catchments = file.path(merit_dir, 
                                               "cat_pfaf_78_81_82_MERIT_Hydro_v07_Basins_v01.shp"),
                  merit_rivers = file.path(merit_dir, 
                                           "riv_pfaf_78_81_82_MERIT_Hydro_v07_Basins_v01.shp"))
      out_list <- c(out_list, out)
    }
    ```
    
    
    ```{r get_latest_wbd}
    wbd_dir <- file.path(data_dir, "wbd")
    if(!dir.exists(wbd_dir)) {
      dir.create(wbd_dir, recursive = TRUE)
    
      wbd <- download_wbd(wbd_dir, "https://prd-tnm.s3.amazonaws.com/StagedProducts/Hydrography/WBD/National/GDB/WBD_National_GDB.zip")
    
    }
    
    out_list <- c(out_list, latest_wbd = file.path(wbd_dir, list.files(wbd_dir, pattern = "*.gdb")))
    ```
    
    
    Blodgett, David L.'s avatar
    Blodgett, David L. committed
    Don't edit the following chunk.
    
    Blodgett, David L.'s avatar
    Blodgett, David L. committed
    ```{r}
    write_json(out_list, path = out_file, pretty = TRUE, auto_unbox = TRUE)
    
    Blodgett, David L.'s avatar
    Blodgett, David L. committed
    ```