source('2_process/src/data_utils.R') p2_targets <- list( # Confirming raw data matches `p1_unc_stats` from SB tar_target(p2_unc_agg_summary, p1_unc_agg |> group_by(dimension, determinant) |> summarize(across(c(contains('related'), contains('unknown'), contains('significant')), list(total = ~sum(.x, na.rm=TRUE)))) |> mutate(evidence_val = positively_related_total + negatively_related_total + unrelated_total + unknown_direction_total) ), # Based on metadata: # Amt of evidence: Small = total_studies < 5; Medium = total_studies 5-9; Large,total_studies = > 9 # Amt of agreement: Low = < 50% of models; Medium = >50% & <74% of models; High = >74% of models; NA if the level of agreement could not be calculated as indicator was measured only once. tar_target(p2_top_trend_stats, p2_unc_agg_summary |> dplyr::select(dimension, determinant, #indicator, positively_related_total, negatively_related_total, unrelated_total, unknown_direction_total) |> pivot_longer(!c(dimension,determinant)) |> group_by(dimension, determinant) |> # for each indicator find the maximum % of studies in agreement # across the significance categories. slice_max(value) |> rename(sig_name = name, sig_value = value) ), # Join `p2_unc_agg_summary` to top trends to get percentages of agreement and evidence tar_target(p2_unc_agg_summary_csv, p2_unc_agg_summary |> left_join(p2_top_trend_stats) |> # level of agreement is the max percent of studies in agreement dplyr::mutate(level_agreement = 100*(sig_value/evidence_val)) |> readr::write_csv('2_process/out/indicator_uncertainty.csv') ), tar_target(p2_indicators, p1_unc_stats |> distinct(dimension, determinant, indicator) ), # Process census data for variables of interest # B01003_001 = Total Population # B19013_001 = Median Household Income in the Past 12 Months (in 2022 Inflation-Adjusted Dollars) # B02001_003 = Estimate!!Total:!!Black or African American alone # B03001_003 = Estimate!!Total:!!Hispanic or Latino: # B01001_002 = Estimate!!Total:!!Male: # B01001_026 = Estimate!!Total:!!Female: tar_target(p2_census_acs5_layers, list("B01003_001", "B19013_001", "B02001_003", "B03001_003", "B01001_002", "B01001_026") ), tar_target(p2_census_acs5_data, get_census_data(geography = 'county', variable = p2_census_acs5_layers, states = p1_census_states, year = 2022, proj = p1_proj, survey_var = "acs5", percent_rename = FALSE), pattern = map(p2_census_acs5_layers), iteration = "list" ), tar_target(p2_tot_pop, p2_census_acs5_data[[1]] |> st_drop_geometry() |> rename(tot_pop = estimate)), # Add % of total population col to each census layer tar_target(p2_perc_census_acs5_layers_sf, process_perc(tot_var = p2_census_acs5_data, tot_pop = p2_tot_pop), pattern = map(p2_census_acs5_data), iteration = "list"), # Disaggregated census data # The subject tables include the following geographies: nation, all states (including DC and Puerto Rico), all metropolitan areas, all congressional districts, all counties, all places and all tracts. Subject tables provide an overview of the estimates available in a particular topic. The data are presented as both counts and percentages. There are over 66,000 variables in this dataset. # More info here: https://api.census.gov/data/2019/acs/acs5.html # load_variables(2022, "acs5/subject", cache = TRUE) # Age related variables # S0101_C02_022 = Estimate!!Percent!!Total population!!SELECTED AGE CATEGORIES!!Under 18 years # S0101_C02_023 = Estimate!!Percent!!Total population!!SELECTED AGE CATEGORIES!!18 to 24 years # S0101_C02_024 = Estimate!!Percent!!Total population!!SELECTED AGE CATEGORIES!!15 to 44 years # S0101_C02_028 = Estimate!!Percent!!Total population!!SELECTED AGE CATEGORIES!!60 years and over tar_target(p2_census_acs5sub_age_layers, c("S0101_C02_022", "S0101_C02_023", "S0101_C02_024", "S0101_C02_028")), tar_target(p2_census_acs5sub_age_data, get_census_data(geography = 'county', variable = p2_census_acs5sub_age_layers, states = p1_census_states, year = 2022, proj = p1_proj, survey_var = "acs5", percent_rename = TRUE), pattern = map(p2_census_acs5sub_age_layers), iteration = "list"), # income related variables # S1901_C01_014 = Estimate!!Households!!PERCENT ALLOCATED!!Household income in the past 12 months tar_target(p2_census_acs5sub_income_layers, c("S1901_C01_014")), tar_target(p2_census_acs5sub_income_data, get_census_data(geography = 'county', variable = p2_census_acs5sub_income_layers, states = p1_census_states, year = 2022, proj = p1_proj, survey_var = "acs5", percent_rename = TRUE), pattern = map(p2_census_acs5sub_income_layers), iteration = "list"), # education related variables # S1501_C01_003 = Estimate!!Total!!AGE BY EDUCATIONAL ATTAINMENT!!Population 18 to 24 years!!High school graduate (includes equivalency) # S1501_C01_009 = Estimate!!Total!!AGE BY EDUCATIONAL ATTAINMENT!!Population 25 years and over!!High school graduate (includes equivalency) tar_target(p2_census_acs5sub_education_layers, c("S1501_C01_003", "S1501_C01_009")), tar_target(p2_census_acs5sub_education_data, get_census_data(geography = 'county', variable = p2_census_acs5sub_education_layers, states = p1_census_states, year = 2022, proj = p1_proj, survey_var = "acs5", percent_rename = FALSE), pattern = map(p2_census_acs5sub_education_layers), iteration = "list"), # household related variables # S1101_C01_001 = Estimate!!Total!!HOUSEHOLDS!!Total households # S1101_C04_001 = Estimate!!Female householder, no spouse present, family household!!HOUSEHOLDS!!Total households # B25064_001 = Estimate!!Median gross rent tar_target(p2_census_acs5_household_layers, c("S1101_C01_001", "S1101_C04_001", "B25064_001")), tar_target(p2_census_acs5sub_household_data, get_census_data(geography = 'county', variable = p2_census_acs5_household_layers, states = p1_census_states, year = 2022, proj = p1_proj, survey_var = "acs5", percent_rename = FALSE), pattern = map(p2_census_acs5_household_layers), iteration = "list") )