Skip to content
Snippets Groups Projects
data_utils.R 4.83 KiB
#' @title get census data of interest
#' @description pull census data of interest with set geography, variable of interest, and year. 
#' @param geography the geography of your data
#' @param variable Character string or vector of character strings of variable IDs. tidycensus automatically returns the estimate and the margin of error associated with the variable.
#' @param states, An optional vector of states for which you are requesting data. State names, postal codes, and FIPS codes are accepted. Defaults to NULL.
#' @param year The year, or endyear, of the ACS sample. 5-year ACS data is available from 2009 through 2021; 1-year ACS data is available from 2005 through 2021, with the exception of 2020. Defaults to 2021.
#' @param, survey_var Character, The ACS contains one-year, three-year, and five-year surveys expressed as "acs1", "acs3", and "acs5". The default selection is "acs5." See `?get_acs()` for more information.
#' @param proj Set projection
#' @param percent_rename if else statement where if TRUE, Rename tidycensus default estimate column name to percent
#' @return a dataframe with census data 
get_census_data <- function(geography, variable, states, year, proj, survey_var, percent_rename) {
  var_name <- pluck(variable, 1)
  
  # Your code for fetching ACS data and processing it
  df <- get_acs(
    geography = geography,
    variable = var_name,
    state = states,
    year = year,
    geometry = TRUE,
    survey = survey_var
  ) |>
    janitor::clean_names() |>
    st_transform(proj)
  
  if (percent_rename == TRUE) {
    
    df_rename <- df |>
      st_sf() |> rename(percent = estimate)
    return(df_rename) 
  
  } else {
    return(df) 
  }
}

#' @title Join total population data with total_[variable] data to get percents of X population
#' @param tot_pop df of total population by county in conus
#' @param tot_var sf of estimated totals of X population by county in conus
#' @return a dataframe with additional `percent` column  
process_perc <- function(tot_pop, tot_var){
  joined_df <- left_join(tot_var, tot_pop, by = "geoid")
  
  joined_perc_df <- joined_df |> 
    mutate(percent = (as.numeric(estimate) / tot_pop) * 100)
  
  return(joined_perc_df)}
prep_tree_data <- function(data) {
  
}

## data processing for population density raster map
#' @title Fetch conus for raster processing 
#' @param states, chr string with long name of states of interest to filter by
fetch_conus_sf <- function(states){
  
  conus_sf <- tigris::states(cb = TRUE) |> 
    dplyr::filter(NAME %in% states
    ) |> 
    rmapshaper::ms_simplify(keep = 0.2) 
}

#' @title Process population density raster data for plotting
#' @param in_raster character string - .tif file path of raster data for specified variable of interest
#' @param conus_sf, sf of conus states outline
#' @param conus_proj, projected sf of conus states outline
#' @param outfile_path, outfile path for processed tifs 
process_pop_dens <- function(in_raster, conus_sf, conus_proj, outfile_path){
  
  raw_data <- rast(in_raster)

  # crop population data to area of interest 
  terra::window(raw_data) <- terra::ext(conus_sf) 
  
  # project population data
  conus_sf_rast <- rast(conus_proj, resolution = c(1000, 1000)) 
  pop_usa_proj <- project(raw_data, conus_sf_rast)
  # match boundaries of population data to conus data
  pop_usa_mask <- terra::mask(pop_usa_proj, terra::vect(conus_proj))
  
  # change to tibble using tidy terra for processing into log scale 
  usa_dat <- as_tibble(pop_usa_mask, xy = TRUE)
  usa_dat$pop <- ifelse(usa_dat$gpw_v4_population_count_rev11_2020_30_sec < 1 
                        & usa_dat$gpw_v4_population_count_rev11_2020_30_sec > 0, 0.1, 
                        usa_dat$gpw_v4_population_count_rev11_2020_30_sec)
  usa_dat$pop_log10 <- log10(usa_dat$pop)
  usa_dat <- usa_dat |> 
    dplyr::select(-gpw_v4_population_count_rev11_2020_30_sec, -pop)
  # convert back into raster
  usa_dat_rast <- as_spatraster(usa_dat) 
  
  writeRaster(usa_dat_rast, filename = outfile_path, overwrite=TRUE)
  
  return(outfile_path)
}


#' @title Process impervous surfaces raster data for plotting
#' @param in_raster character string - .tif file path of raster data for specified variable of interest
#' @param conus_proj, projected sf of conus states outline
#' @param outfile_path, outfile path for processed tifs 
process_imp_surf <- function(in_raster, conus_proj, outfile_path){
  
  imp_surf_raw <- rast(in_raster)
  
  # crop population data to area of interest
  terra::window(imp_surf_raw) <- terra::ext(conus_proj)
  
  # project population data
  conus_sf_rast <- rast(conus_proj, resolution = c(1000, 1000)) 
  imp_surf_proj <- project(imp_surf_raw, conus_sf_rast)
  
  # match boundaries of population data to conus data
  imp_surf_mask <- terra::mask(imp_surf_proj, terra::vect(conus_proj))
  
  writeRaster(imp_surf_mask, filename = outfile_path, overwrite=TRUE)
  
  return(outfile_path)
}