Skip to content

Instantly share code, notes, and snippets.

@alexpghayes
Created February 4, 2026 22:28
Show Gist options
  • Select an option

  • Save alexpghayes/9c10f9e53c3e521c2aceea1fa4b68f9e to your computer and use it in GitHub Desktop.

Select an option

Save alexpghayes/9c10f9e53c3e521c2aceea1fa4b68f9e to your computer and use it in GitHub Desktop.
``` r
library(glue)
library(stringr)
library(tidycensus)
library(tidyverse)
# ripped from https://github.com/kuriwaki/ccesMRPprep/blob/main/data-raw/specify_ACS-codes.R
# determines names of census variables to pull // gives them helpful names
make_acscodes_df <- function(year = 2023, survey = "acs5") {
ages <- c(
"18 to 24 years",
"25 to 34 years",
"35 to 44 years",
"45 to 64 years",
"65 years and over",
"18 and 19 years",
"20 to 24 years",
"25 to 29 years",
"30 to 34 years",
"35 to 44 years",
"45 to 54 years",
"55 to 64 years",
"65 to 74 years",
"75 to 84 years",
"85 years and over"
)
education <- c(
"Less than high school diploma",
"Nursery to 4th grade",
"No schooling completed",
"Less than 9th grade",
"5th and 6th grade",
"7th and 8th grade",
"9th grade",
"10th grade",
"11th grade",
"12th grade, no diploma",
"Less than 9th grade",
"Less than high school graduate",
"9th to 12th grade,? no diploma",
"High school graduate \\(includes equivalency\\)",
"High school graduate, GED, or alternative",
"Some college",
"Some college or associate's degree",
"Associate's degree",
"Bachelor's degree$",
"Doctorate degree",
"Master's degree",
"Professional school degree",
"Graduate or professional degree",
"Bachelor's degree or higher"
)
races <- c(
"White alone, not Hispanic or Latino",
"Hispanic or Latino",
"Black or African American alone",
"American Indian and Alaska Native alone",
"Asian alone",
"Native Hawaiian and Other Pacific Islander alone",
"Some other race alone",
"Two or more races"
)
ages_regex <- as.character(glue("({str_c(ages, collapse = '|')})"))
edu_regex <- as.character(glue("({str_c(education, collapse = '|')})"))
races_regex <- as.character(glue("({str_c(races, collapse = '|')})"))
vars_raw <- load_variables(
year = year,
dataset = survey,
cache = TRUE
)
vars <- vars_raw |>
mutate(variable = name) |>
separate(name, sep = "_", into = c("table", "num")) |>
select(variable, table, concept, num, label, everything()) |>
filter(str_detect(label, "Total")) |>
mutate(label = str_remove(label, "Estimate!!Total")) |>
mutate(
gender = str_extract(label, "(Male|Female)"),
age = str_extract(label, ages_regex),
educ = str_extract(label, edu_regex),
race = coalesce(
str_extract(label, regex(races_regex, ignore_case = TRUE)),
str_extract(concept, regex(races_regex, ignore_case = TRUE))
)
)
}
year <- 2023
survey <- "acs5"
acscodes_df <- make_acscodes_df(year = year, survey = survey)
acscodes_age_sex_educ <- acscodes_df |>
filter(!is.na(gender), !is.na(age), !is.na(educ)) |>
filter(str_detect(table, "^B")) |>
pull(variable)
acscodes_age_sex_race <- acscodes_df |>
filter(str_detect(table, "B01001[B-I]")) |>
filter(!is.na(gender), !is.na(age), !is.na(race)) |>
pull(variable)
acscodes_sex_educ_race <- acscodes_df |>
filter(!is.na(educ), !is.na(gender), !is.na(race)) |>
pull(variable)
get_acs_helper <- function(
geography,
variables,
variable_labels,
year,
survey
) {
acs_data <- get_acs(
geography = geography,
variables = variables,
year = year,
survey = survey,
geometry = FALSE,
) |>
left_join(variable_labels, by = join_by(variable))
}
geography <- "state"
# various geographies work here
age_sex_educ_df <- get_acs_helper(
geography = geography,
variables = acscodes_age_sex_educ,
variable_labels = acscodes_df,
year = year,
survey = survey
)
#> Getting data from the 2019-2023 5-year ACS
age_sex_race_df <- get_acs_helper(
geography = geography,
variables = acscodes_age_sex_race,
variable_labels = acscodes_df,
year = year,
survey = survey
)
#> Getting data from the 2019-2023 5-year ACS
sex_educ_race_df <- get_acs_helper(
geography = geography,
variables = acscodes_sex_educ_race,
variable_labels = acscodes_df,
year = year,
survey = survey
)
#> Getting data from the 2019-2023 5-year ACS
age_sex_educ_df
#> # A tibble: 3,640 × 14
#> GEOID NAME variable estimate moe table concept num label geography
#> <chr> <chr> <chr> <dbl> <dbl> <chr> <chr> <chr> <chr> <chr>
#> 1 01 Alabama B15001_004 3584 588 B15001 Sex by … 004 :!!M… tract
#> 2 01 Alabama B15001_005 28336 1407 B15001 Sex by … 005 :!!M… tract
#> 3 01 Alabama B15001_006 95200 2634 B15001 Sex by … 006 :!!M… tract
#> 4 01 Alabama B15001_007 82856 2289 B15001 Sex by … 007 :!!M… tract
#> 5 01 Alabama B15001_008 10896 854 B15001 Sex by … 008 :!!M… tract
#> 6 01 Alabama B15001_009 16384 1343 B15001 Sex by … 009 :!!M… tract
#> 7 01 Alabama B15001_010 1455 350 B15001 Sex by … 010 :!!M… tract
#> 8 01 Alabama B15001_012 8839 919 B15001 Sex by … 012 :!!M… tract
#> 9 01 Alabama B15001_013 24695 1231 B15001 Sex by … 013 :!!M… tract
#> 10 01 Alabama B15001_014 108139 2513 B15001 Sex by … 014 :!!M… tract
#> # ℹ 3,630 more rows
#> # ℹ 4 more variables: gender <chr>, age <chr>, educ <chr>, race <chr>
```
<sup>Created on 2026-02-04 with [reprex v2.1.1](https://reprex.tidyverse.org)</sup>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment