Skip to content
Snippets Groups Projects
data_utils.R 4.02 KiB
Newer Older
  • Learn to ignore specific revisions
  • Azadpour, Elmera's avatar
    Azadpour, Elmera committed
    #' @title get census data of interest
    #' @description pull census data of interest with set geography, variable of interest, and year. 
    #' @param geography the geography of your data
    #' @param variable Character string or vector of character strings of variable IDs. tidycensus automatically returns the estimate and the margin of error associated with the variable.
    #' @param states, An optional vector of states for which you are requesting data. State names, postal codes, and FIPS codes are accepted. Defaults to NULL.
    #' @param year The year, or endyear, of the ACS sample. 5-year ACS data is available from 2009 through 2021; 1-year ACS data is available from 2005 through 2021, with the exception of 2020. Defaults to 2021.
    
    #' @param, survey_var Character, The ACS contains one-year, three-year, and five-year surveys expressed as "acs1", "acs3", and "acs5". The default selection is "acs5." See `?get_acs()` for more information.
    
    Azadpour, Elmera's avatar
    Azadpour, Elmera committed
    #' @param proj Set projection
    
    #' @param percent_rename if else statement where if TRUE, Rename tidycensus default estimate column name to percent
    
    Azadpour, Elmera's avatar
    Azadpour, Elmera committed
    #' @return a dataframe with census data 
    
    get_census_data <- function(geography, variable, states, year, proj, survey_var, percent_rename) {
    
      var_name <- pluck(variable, 1)
    
    Azadpour, Elmera's avatar
    Azadpour, Elmera committed
      # Your code for fetching ACS data and processing it
      df <- get_acs(
        geography = geography,
    
        variable = var_name,
    
    Azadpour, Elmera's avatar
    Azadpour, Elmera committed
        state = states,
        year = year,
    
    Azadpour, Elmera's avatar
    Azadpour, Elmera committed
      ) |>
        janitor::clean_names() |>
        st_transform(proj)
    
      
      if (percent_rename == TRUE) {
        
        df_rename <- df |>
          st_sf() |> rename(percent = estimate)
        return(df_rename) 
      
      } else {
        return(df) 
      }
    
    Azadpour, Elmera's avatar
    Azadpour, Elmera committed
    }
    
    #' @title Join total population data with total_[variable] data to get percents of X population
    #' @param tot_pop df of total population by county in conus
    #' @param tot_var sf of estimated totals of X population by county in conus
    #' @return a dataframe with additional `percent` column  
    process_perc <- function(tot_pop, tot_var){
      joined_df <- left_join(tot_var, tot_pop, by = "geoid")
      
      joined_perc_df <- joined_df |> 
        mutate(percent = (as.numeric(estimate) / tot_pop) * 100)
      
    
      return(joined_perc_df)}
    
    Cee Nell's avatar
    Cee Nell committed
    prep_tree_data <- function(data) {
      
    
    }
    
    ## data processing for population density raster map
    
    fetch_conus_sf <- function(){
      
      conus_sf <- tigris::states(cb = TRUE) |> 
        dplyr::filter(NAME %in% c('Washington', 'Oregon', 'California', 'Idaho', 'Nevada',
                                  'Utah', 'Arizona', 'Montana', 'Wyoming', 'Colorado',
                                  'New Mexico', 'North Dakota', 'South Dakota', 'Nebraska', 'Kansas',
                                  'Oklahoma', 'Texas', 'Minnesota', 'Iowa', 'Missouri',
                                  'Arkansas', 'Louisiana')
        ) |> 
        rmapshaper::ms_simplify(keep = 0.2) 
    }
    
    #' @title Process raster data for ploting
    #' @param 
    process_raster <- function(in_raster, proj, conus, conus_proj, outfile_path){
      # in_raster = p1_pop_density_raster, proj = p1_proj, conus = p2_conus_sf, conus_proj = p2_conus_sf_proj
      raw_data <- rast(in_raster)
    
      # crop population data to area of interest
      terra::window(raw_data) <- terra::ext(conus)
      
      # project population data
      conus_sf_rast <- rast(conus_proj, resolution = c(1000, 1000)) 
      pop_usa_proj <- project(raw_data, conus_sf_rast)
      # match boundaries of population data to conus data
      pop_usa_mask <- terra::mask(pop_usa_proj, terra::vect(conus_proj))
      
      # change to tibble using tidy terra for processing into log scale 
      usa_dat <- as_tibble(pop_usa_mask, xy = TRUE)
      usa_dat$pop <- ifelse(usa_dat$gpw_v4_population_count_rev11_2020_30_sec < 1 
                            & usa_dat$gpw_v4_population_count_rev11_2020_30_sec > 0, 0.1, 
                            usa_dat$gpw_v4_population_count_rev11_2020_30_sec)
      usa_dat$pop_log10 <- log10(usa_dat$pop)
      usa_dat <- usa_dat |> 
        dplyr::select(-gpw_v4_population_count_rev11_2020_30_sec, -pop)
      # convert back into raster
      usa_dat_rast <- as_spatraster(usa_dat) 
      
      writeRaster(usa_dat_rast, filename=outfile_path, overwrite=TRUE)
    }