Created
February 4, 2026 22:28
-
-
Save alexpghayes/9c10f9e53c3e521c2aceea1fa4b68f9e to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ``` r | |
| library(glue) | |
| library(stringr) | |
| library(tidycensus) | |
| library(tidyverse) | |
| # ripped from https://github.com/kuriwaki/ccesMRPprep/blob/main/data-raw/specify_ACS-codes.R | |
| # determines names of census variables to pull // gives them helpful names | |
| make_acscodes_df <- function(year = 2023, survey = "acs5") { | |
| ages <- c( | |
| "18 to 24 years", | |
| "25 to 34 years", | |
| "35 to 44 years", | |
| "45 to 64 years", | |
| "65 years and over", | |
| "18 and 19 years", | |
| "20 to 24 years", | |
| "25 to 29 years", | |
| "30 to 34 years", | |
| "35 to 44 years", | |
| "45 to 54 years", | |
| "55 to 64 years", | |
| "65 to 74 years", | |
| "75 to 84 years", | |
| "85 years and over" | |
| ) | |
| education <- c( | |
| "Less than high school diploma", | |
| "Nursery to 4th grade", | |
| "No schooling completed", | |
| "Less than 9th grade", | |
| "5th and 6th grade", | |
| "7th and 8th grade", | |
| "9th grade", | |
| "10th grade", | |
| "11th grade", | |
| "12th grade, no diploma", | |
| "Less than 9th grade", | |
| "Less than high school graduate", | |
| "9th to 12th grade,? no diploma", | |
| "High school graduate \\(includes equivalency\\)", | |
| "High school graduate, GED, or alternative", | |
| "Some college", | |
| "Some college or associate's degree", | |
| "Associate's degree", | |
| "Bachelor's degree$", | |
| "Doctorate degree", | |
| "Master's degree", | |
| "Professional school degree", | |
| "Graduate or professional degree", | |
| "Bachelor's degree or higher" | |
| ) | |
| races <- c( | |
| "White alone, not Hispanic or Latino", | |
| "Hispanic or Latino", | |
| "Black or African American alone", | |
| "American Indian and Alaska Native alone", | |
| "Asian alone", | |
| "Native Hawaiian and Other Pacific Islander alone", | |
| "Some other race alone", | |
| "Two or more races" | |
| ) | |
| ages_regex <- as.character(glue("({str_c(ages, collapse = '|')})")) | |
| edu_regex <- as.character(glue("({str_c(education, collapse = '|')})")) | |
| races_regex <- as.character(glue("({str_c(races, collapse = '|')})")) | |
| vars_raw <- load_variables( | |
| year = year, | |
| dataset = survey, | |
| cache = TRUE | |
| ) | |
| vars <- vars_raw |> | |
| mutate(variable = name) |> | |
| separate(name, sep = "_", into = c("table", "num")) |> | |
| select(variable, table, concept, num, label, everything()) |> | |
| filter(str_detect(label, "Total")) |> | |
| mutate(label = str_remove(label, "Estimate!!Total")) |> | |
| mutate( | |
| gender = str_extract(label, "(Male|Female)"), | |
| age = str_extract(label, ages_regex), | |
| educ = str_extract(label, edu_regex), | |
| race = coalesce( | |
| str_extract(label, regex(races_regex, ignore_case = TRUE)), | |
| str_extract(concept, regex(races_regex, ignore_case = TRUE)) | |
| ) | |
| ) | |
| } | |
| year <- 2023 | |
| survey <- "acs5" | |
| acscodes_df <- make_acscodes_df(year = year, survey = survey) | |
| acscodes_age_sex_educ <- acscodes_df |> | |
| filter(!is.na(gender), !is.na(age), !is.na(educ)) |> | |
| filter(str_detect(table, "^B")) |> | |
| pull(variable) | |
| acscodes_age_sex_race <- acscodes_df |> | |
| filter(str_detect(table, "B01001[B-I]")) |> | |
| filter(!is.na(gender), !is.na(age), !is.na(race)) |> | |
| pull(variable) | |
| acscodes_sex_educ_race <- acscodes_df |> | |
| filter(!is.na(educ), !is.na(gender), !is.na(race)) |> | |
| pull(variable) | |
| get_acs_helper <- function( | |
| geography, | |
| variables, | |
| variable_labels, | |
| year, | |
| survey | |
| ) { | |
| acs_data <- get_acs( | |
| geography = geography, | |
| variables = variables, | |
| year = year, | |
| survey = survey, | |
| geometry = FALSE, | |
| ) |> | |
| left_join(variable_labels, by = join_by(variable)) | |
| } | |
| geography <- "state" | |
| # various geographies work here | |
| age_sex_educ_df <- get_acs_helper( | |
| geography = geography, | |
| variables = acscodes_age_sex_educ, | |
| variable_labels = acscodes_df, | |
| year = year, | |
| survey = survey | |
| ) | |
| #> Getting data from the 2019-2023 5-year ACS | |
| age_sex_race_df <- get_acs_helper( | |
| geography = geography, | |
| variables = acscodes_age_sex_race, | |
| variable_labels = acscodes_df, | |
| year = year, | |
| survey = survey | |
| ) | |
| #> Getting data from the 2019-2023 5-year ACS | |
| sex_educ_race_df <- get_acs_helper( | |
| geography = geography, | |
| variables = acscodes_sex_educ_race, | |
| variable_labels = acscodes_df, | |
| year = year, | |
| survey = survey | |
| ) | |
| #> Getting data from the 2019-2023 5-year ACS | |
| age_sex_educ_df | |
| #> # A tibble: 3,640 × 14 | |
| #> GEOID NAME variable estimate moe table concept num label geography | |
| #> <chr> <chr> <chr> <dbl> <dbl> <chr> <chr> <chr> <chr> <chr> | |
| #> 1 01 Alabama B15001_004 3584 588 B15001 Sex by … 004 :!!M… tract | |
| #> 2 01 Alabama B15001_005 28336 1407 B15001 Sex by … 005 :!!M… tract | |
| #> 3 01 Alabama B15001_006 95200 2634 B15001 Sex by … 006 :!!M… tract | |
| #> 4 01 Alabama B15001_007 82856 2289 B15001 Sex by … 007 :!!M… tract | |
| #> 5 01 Alabama B15001_008 10896 854 B15001 Sex by … 008 :!!M… tract | |
| #> 6 01 Alabama B15001_009 16384 1343 B15001 Sex by … 009 :!!M… tract | |
| #> 7 01 Alabama B15001_010 1455 350 B15001 Sex by … 010 :!!M… tract | |
| #> 8 01 Alabama B15001_012 8839 919 B15001 Sex by … 012 :!!M… tract | |
| #> 9 01 Alabama B15001_013 24695 1231 B15001 Sex by … 013 :!!M… tract | |
| #> 10 01 Alabama B15001_014 108139 2513 B15001 Sex by … 014 :!!M… tract | |
| #> # ℹ 3,630 more rows | |
| #> # ℹ 4 more variables: gender <chr>, age <chr>, educ <chr>, race <chr> | |
| ``` | |
| <sup>Created on 2026-02-04 with [reprex v2.1.1](https://reprex.tidyverse.org)</sup> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment