source('2_process/src/data_utils.R') p2_targets <- list( # Confirming raw data matches `p1_unc_stats` from SB tar_target(p2_unc_agg_summary, p1_unc_stats |> group_by(dimension, determinant) |> summarize(across(c(contains('related'), contains('unknown'), contains('significant'), contains('direction')), list(total = ~sum(.x, na.rm=TRUE)))) |> mutate(evidence_val = pos_related_total + neg_related_total + unrelated_total + unk_direction_total) ), tar_target(p2_unc_agg_ind_summary, p1_unc_stats |> group_by(dimension, determinant, indicator) |> summarize(across(c(contains('related'), contains('unknown'), contains('significant'), contains('direction')), list(total = ~sum(.x, na.rm=TRUE)))) |> mutate(evidence_val = pos_related_total + neg_related_total + unrelated_total + unk_direction_total) ), # Based on metadata: # Amt of evidence: Small = total_studies < 5; Medium = total_studies 5-9; Large,total_studies = > 9 # Amt of agreement: Low = < 50% of models; Medium = >50% & <74% of models; High = >74% of models; NA if the level of agreement could not be calculated as indicator was measured only once. # Dimension and determinant level tar_target(p2_top_trend_stats, p2_unc_agg_summary |> dplyr::select(dimension, determinant, #indicator, pos_related_total, neg_related_total, unrelated_total, unk_direction_total) |> #pivot_longer(!c(dimension,determinant)) |> group_by(#dimension, determinant) |> # for each determinant find the maximum % of studies in agreement # across the significance categories. #slice_max(value) |> # rename(sig_name = name, sig_value = value) mutate(sig_value = pmax(pos_related_total, neg_related_total, unrelated_total, unk_direction_total)) ), tar_target(p2_top_trend_ind_stats, p2_unc_agg_ind_summary |> dplyr::select(dimension, determinant, indicator, pos_related_total, neg_related_total, unrelated_total, unk_direction_total) |> pivot_longer(!c(dimension,determinant, indicator)) |> group_by(dimension, determinant, indicator) |> slice_max(value) |> rename(sig_name = name, sig_value = value) ), # Join `p2_unc_agg_summary` to top trends to get percentages of agreement and evidence for determinant and nested dimension tar_target(`p2_unc_agg_summary_csv`, p2_unc_agg_summary |> left_join(p2_top_trend_stats) |> # level of agreement is the max percent of studies in agreement dplyr::mutate(level_agreement = 100*(sig_value/evidence_val), evidence_bin = case_when( evidence_val < 5 ~ "Small", between(evidence_val, 5, 9) ~ "Medium", evidence_val >= 10 ~ "Large"), agreement_bin = case_when( level_agreement < 50 ~ "Low", between(level_agreement, 51, 74) ~ "Medium", level_agreement > 74 ~ "High")) |> # distinct(determinant, .keep_all = TRUE) |> readr::write_csv('public/determinant_uncertainty.csv') ), # commented out for now so we don't overwrite spanish names #tar_target(p2_unc_determinant_json, # read_csv(p2_unc_agg_summary_csv) |> # toJSON(pretty = TRUE) |> # write("public/determinant_uncertainty.json") # ), tar_target(`p2_unc_agg_summary_ind_csv`, p2_unc_agg_ind_summary |> left_join(p2_top_trend_ind_stats) |> # level of agreement is the max percent of studies in agreement dplyr::mutate(level_agreement = 100*(sig_value/evidence_val), evidence_bin = case_when( evidence_val < 5 ~ "Small", between(evidence_val, 5, 9) ~ "Medium", evidence_val >= 10 ~ "Large"), agreement_bin = case_when( level_agreement < 50 ~ "Low", between(level_agreement, 51, 74) ~ "Medium", level_agreement > 74 ~ "High")) |> distinct(indicator, .keep_all = TRUE) |> dplyr::select(dimension, determinant, indicator, evidence_val, evidence_bin, level_agreement) |> readr::write_csv('public/indicator_uncertainty.csv') ), tar_target(p2_indicators, p1_unc_stats |> distinct(dimension, determinant, indicator) ), # Process census data for variables of interest # B01003_001 = Total Population # B19013_001 = Median Household Income in the Past 12 Months (in 2022 Inflation-Adjusted Dollars) # B02001_003 = Estimate!!Total:!!Black or African American alone # B03001_003 = Estimate!!Total:!!Hispanic or Latino: # B01001_002 = Estimate!!Total:!!Male: # B01001_026 = Estimate!!Total:!!Female: tar_target(p2_census_acs5_layers, list("B01003_001", "B19013_001", "B02001_003", "B03001_003", "B01001_002", "B01001_026") ), tar_target(p2_census_acs5_data, get_census_data(geography = 'county', variable = p2_census_acs5_layers, states = p1_census_states, year = 2022, proj = p1_proj, survey_var = "acs5", percent_rename = FALSE), pattern = map(p2_census_acs5_layers), iteration = "list" ), tar_target(p2_tot_pop, p2_census_acs5_data[[1]] |> st_drop_geometry() |> rename(tot_pop = estimate)), # Add % of total population col to each census layer tar_target(p2_perc_census_acs5_layers_sf, process_perc(tot_var = p2_census_acs5_data, tot_pop = p2_tot_pop), pattern = map(p2_census_acs5_data), iteration = "list"), # Disaggregated census data # The subject tables include the following geographies: nation, all states (including DC and Puerto Rico), all metropolitan areas, all congressional districts, all counties, all places and all tracts. Subject tables provide an overview of the estimates available in a particular topic. The data are presented as both counts and percentages. There are over 66,000 variables in this dataset. # More info here: https://api.census.gov/data/2019/acs/acs5.html # load_variables(2022, "acs5/subject", cache = TRUE) # Age related variables # S0101_C02_022 = Estimate!!Percent!!Total population!!SELECTED AGE CATEGORIES!!Under 18 years # S0101_C02_023 = Estimate!!Percent!!Total population!!SELECTED AGE CATEGORIES!!18 to 24 years # S0101_C02_024 = Estimate!!Percent!!Total population!!SELECTED AGE CATEGORIES!!15 to 44 years # S0101_C02_028 = Estimate!!Percent!!Total population!!SELECTED AGE CATEGORIES!!60 years and over tar_target(p2_census_acs5sub_age_layers, c("S0101_C02_022", "S0101_C02_023", "S0101_C02_024", "S0101_C02_028")), tar_target(p2_census_acs5sub_age_data, get_census_data(geography = 'county', variable = p2_census_acs5sub_age_layers, states = p1_census_states, year = 2022, proj = p1_proj, survey_var = "acs5", percent_rename = TRUE), pattern = map(p2_census_acs5sub_age_layers), iteration = "list"), # income related variables # S1901_C01_014 = Estimate!!Households!!PERCENT ALLOCATED!!Household income in the past 12 months tar_target(p2_census_acs5sub_income_layers, c("S1901_C01_014")), tar_target(p2_census_acs5sub_income_data, get_census_data(geography = 'county', variable = p2_census_acs5sub_income_layers, states = p1_census_states, year = 2022, proj = p1_proj, survey_var = "acs5", percent_rename = TRUE), pattern = map(p2_census_acs5sub_income_layers), iteration = "list"), # education related variables # S1501_C01_003 = Estimate!!Total!!AGE BY EDUCATIONAL ATTAINMENT!!Population 18 to 24 years!!High school graduate (includes equivalency) # S1501_C01_009 = Estimate!!Total!!AGE BY EDUCATIONAL ATTAINMENT!!Population 25 years and over!!High school graduate (includes equivalency) tar_target(p2_census_acs5sub_education_layers, c("S1501_C01_003", "S1501_C01_009")), tar_target(p2_census_acs5sub_education_data, get_census_data(geography = 'county', variable = p2_census_acs5sub_education_layers, states = p1_census_states, year = 2022, proj = p1_proj, survey_var = "acs5", percent_rename = FALSE), pattern = map(p2_census_acs5sub_education_layers), iteration = "list"), # household and rent related variables # B25010_001 = Estimate!!Average household size --!!Total:Average Household Size of Occupied Housing Units by Tenure # B25064_001 = Estimate!!Median gross rent tar_target(p2_census_acs5_household_layers, c("B25010_001", "B25064_001")), tar_target(p2_census_acs5sub_household_data, get_census_data(geography = 'county', variable = p2_census_acs5_household_layers, states = p1_census_states, year = 2022, proj = p1_proj, survey_var = "acs5", percent_rename = FALSE), pattern = map(p2_census_acs5_household_layers), iteration = "list"), # percent households variable # DP04_0002P = Percent!!HOUSING OCCUPANCY!!Total housing units!!Occupied housing units # this does not have geometry, so we will join using tigris::counties() tar_target(p2_census_acs5profile_household_layers, c("DP04_0002P")), tar_target(p2_census_acs5profile_household_data, get_acs(geography = "county", variables = p2_census_acs5profile_household_layers, year = 2022, survey = "acs5") |> mutate(state_name = sub(".*, ", "", NAME)) |> filter(state_name %in% p1_census_states)), tar_target(p2_counties_sf, tigris::counties(cb = TRUE) |> st_transform(crs = p1_proj) |> ms_simplify(keep = 0.2)), # Join counties spatial to households dataframe tar_target(p2_census_acs5profile_household_sf, p2_counties_sf |> inner_join(p2_census_acs5profile_household_data, by = "GEOID")), # Median Household Income in the Past 12 Months (in 2022 Inflation-Adjusted Dollars) for white only, Black or African American Alone, American Indian and Alaska Native Alone, Asian Alone, Native Hawaiian and Other Pacific Islander Alone, Hispanic or Latino tar_target(p2_census_acs5_income_by_race_layers, c("B19013A_001", "B19013B_001", "B19013C_001", "B19013D_001", "B19013E_001", "B19013I_001")), tar_target(p2_census_acs5sub_income_by_race_data, get_census_data(geography = 'county', variable = p2_census_acs5_income_by_race_layers, states = p1_census_states, year = 2022, proj = p1_proj, survey_var = "acs5", percent_rename = FALSE), pattern = map(p2_census_acs5_income_by_race_layers), iteration = "list"), # Disability status # S1810_C03_001: Estimate!!Percent with a disability!!Total civilian noninstitutionalized population # S1810_C02_001: Estimate!!With a disability!!Total civilian noninstitutionalized population tar_target(p2_census_acs5_disability_layers, c("S1810_C03_001", "S1810_C02_001")), tar_target(p2_census_acs5sub_disability_data, get_census_data(geography = 'county', variable = p2_census_acs5_disability_layers, states = p1_census_states, year = 2022, proj = p1_proj, survey_var = "acs5", percent_rename = FALSE), pattern = map(p2_census_acs5_disability_layers), iteration = "list"), # process population density raster data tar_target(p2_conus_sf, fetch_conus_sf()), tar_target(p2_conus_sf_proj, p2_conus_sf |> st_transform(p1_proj)), tar_target(p2_conus_inner, rmapshaper::ms_innerlines(p2_conus_sf_proj)), tar_target(p2_pop_density_processed, process_pop_dens_raster(in_raster = p1_pop_density_raster_tif, #proj = p1_proj, conus = p2_conus_sf, conus_proj = p2_conus_sf_proj, outfile_path = "2_process/out/pop_density.tif"), format = "file"), # process impervious surfaces raster data tar_target(p2_imp_surf_processed, process_imp_surf(in_raster = p1_imp_surf_tif, conus_proj = p2_conus_sf_proj, outfile_path = "2_process/out/imp_surfaces.tif"), format = "file") )