Newer
Older
p2_targets <- list(
# Confirming raw data matches `p1_unc_stats` from SB
tar_target(p2_unc_agg_summary,
summarize(across(c(contains('related'),
contains('unknown'),
contains('significant')),
list(total = ~sum(.x, na.rm=TRUE)))) |>
mutate(evidence_val = positively_related_total + negatively_related_total +
unrelated_total + unknown_direction_total)
),
# Based on metadata:
# Amt of evidence: Small = total_studies < 5; Medium = total_studies 5-9; Large,total_studies = > 9
# Amt of agreement: Low = < 50% of models; Medium = >50% & <74% of models; High = >74% of models; NA if the level of agreement could not be calculated as indicator was measured only once.
tar_target(p2_top_trend_stats,
p2_unc_agg_summary |>
positively_related_total, negatively_related_total, unrelated_total,
unknown_direction_total) |>
pivot_longer(!c(dimension,determinant)) |>
group_by(dimension, determinant) |>
# for each indicator find the maximum % of studies in agreement
# across the significance categories.
slice_max(value) |>
rename(sig_name = name, sig_value = value)
),
# Join `p2_unc_agg_summary` to top trends to get percentages of agreement and evidence
p2_unc_agg_summary |>
left_join(p2_top_trend_stats) |>
# level of agreement is the max percent of studies in agreement
dplyr::mutate(level_agreement = 100*(sig_value/evidence_val)) |>
readr::write_csv('2_process/out/indicator_uncertainty.csv')
tar_target(p2_indicators,
p1_unc_stats |>
distinct(dimension, determinant, indicator)
),
# Process census data for variables of interest
# B01003_001 = Total Population
# B19013_001 = Median Household Income in the Past 12 Months (in 2022 Inflation-Adjusted Dollars)
# B02001_003 = Estimate!!Total:!!Black or African American alone
# B03001_003 = Estimate!!Total:!!Hispanic or Latino:
# B01001_002 = Estimate!!Total:!!Male:
# B01001_026 = Estimate!!Total:!!Female:

Azadpour, Elmera
committed
tar_target(p2_census_acs5_layers,
list("B01003_001", "B19013_001", "B02001_003",
"B03001_003", "B01001_002", "B01001_026")
),

Azadpour, Elmera
committed
tar_target(p2_census_acs5_data,
get_census_data(geography = 'county', variable = p2_census_acs5_layers,
states = p1_census_states, year = 2022, proj = p1_proj,
survey_var = "acs5", percent_rename = FALSE),
pattern = map(p2_census_acs5_layers),

Azadpour, Elmera
committed
p2_census_acs5_data[[1]] |>
# Add % of total population col to each census layer

Azadpour, Elmera
committed
tar_target(p2_perc_census_acs5_layers_sf,
process_perc(tot_var = p2_census_acs5_data,

Azadpour, Elmera
committed
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
pattern = map(p2_census_acs5_data),
iteration = "list"),
# Disaggregated census data
# The subject tables include the following geographies: nation, all states (including DC and Puerto Rico), all metropolitan areas, all congressional districts, all counties, all places and all tracts. Subject tables provide an overview of the estimates available in a particular topic. The data are presented as both counts and percentages. There are over 66,000 variables in this dataset.
# More info here: https://api.census.gov/data/2019/acs/acs5.html
# load_variables(2022, "acs5/subject", cache = TRUE)
# Age related variables
# S0101_C02_022 = Estimate!!Percent!!Total population!!SELECTED AGE CATEGORIES!!Under 18 years
# S0101_C02_023 = Estimate!!Percent!!Total population!!SELECTED AGE CATEGORIES!!18 to 24 years
# S0101_C02_024 = Estimate!!Percent!!Total population!!SELECTED AGE CATEGORIES!!15 to 44 years
# S0101_C02_028 = Estimate!!Percent!!Total population!!SELECTED AGE CATEGORIES!!60 years and over
tar_target(p2_census_acs5sub_age_layers,
c("S0101_C02_022", "S0101_C02_023", "S0101_C02_024", "S0101_C02_028")),
tar_target(p2_census_acs5sub_age_data,
get_census_data(geography = 'county', variable = p2_census_acs5sub_age_layers,
states = p1_census_states, year = 2022, proj = p1_proj,
survey_var = "acs5", percent_rename = TRUE),
pattern = map(p2_census_acs5sub_age_layers),
iteration = "list"),
# income related variables
# S1901_C01_014 = Estimate!!Households!!PERCENT ALLOCATED!!Household income in the past 12 months
# S1901_C01_015 = Estimate!!Households!!PERCENT ALLOCATED!!Family income in the past 12 months
# S1901_C01_016 = Estimate!!Households!!PERCENT ALLOCATED!!Nonfamily income in the past 12 months
tar_target(p2_census_acs5sub_income_layers,
c("S1901_C01_014", "S1901_C01_015", "S1901_C01_016")),
tar_target(p2_census_acs5sub_income_data,
get_census_data(geography = 'county', variable = p2_census_acs5sub_income_layers,
states = p1_census_states, year = 2022, proj = p1_proj,
survey_var = "acs5", percent_rename = TRUE),
pattern = map(p2_census_acs5sub_income_layers),
iteration = "list"),
# education related variables
# S1501_C01_003 = Estimate!!Total!!AGE BY EDUCATIONAL ATTAINMENT!!Population 18 to 24 years!!High school graduate (includes equivalency)
# S1501_C01_009 = Estimate!!Total!!AGE BY EDUCATIONAL ATTAINMENT!!Population 25 years and over!!High school graduate (includes equivalency)
tar_target(p2_census_acs5sub_education_layers,
c("S1501_C01_003", "S1501_C01_009")),
tar_target(p2_census_acs5sub_education_data,
get_census_data(geography = 'county', variable = p2_census_acs5sub_education_layers,
states = p1_census_states, year = 2022, proj = p1_proj,
survey_var = "acs5", percent_rename = FALSE),
pattern = map(p2_census_acs5sub_education_layers),
iteration = "list")
)