alexpghayes · February 4, 2026 22:28
diff --git a/tidycensus-pull-mrp-crosstabs.R b/tidycensus-pull-mrp-crosstabs.R
 ``` r
 library(glue)
 library(stringr)
 library(tidycensus)
 library(tidyverse)

 # ripped from https://github.com/kuriwaki/ccesMRPprep/blob/main/data-raw/specify_ACS-codes.R
 # determines names of census variables to pull // gives them helpful names
 make_acscodes_df <- function(year = 2023, survey = "acs5") {
  ages <- c(
    "18 to 24 years",
    "25 to 34 years",
    "35 to 44 years",
    "45 to 64 years",
    "65 years and over",
    "18 and 19 years",
    "20 to 24 years",
    "25 to 29 years",
    "30 to 34 years",
    "35 to 44 years",
    "45 to 54 years",
    "55 to 64 years",
    "65 to 74 years",
    "75 to 84 years",
    "85 years and over"
  )

  education <- c(
    "Less than high school diploma",
    "Nursery to 4th grade",
    "No schooling completed",
    "Less than 9th grade",
    "5th and 6th grade",
    "7th and 8th grade",
    "9th grade",
    "10th grade",
    "11th grade",
    "12th grade, no diploma",
    "Less than 9th grade",
    "Less than high school graduate",
    "9th to 12th grade,? no diploma",
    "High school graduate \\(includes equivalency\\)",
    "High school graduate, GED, or alternative",
    "Some college",
    "Some college or associate's degree",
    "Associate's degree",
    "Bachelor's degree$",
    "Doctorate degree",
    "Master's degree",
    "Professional school degree",
    "Graduate or professional degree",
    "Bachelor's degree or higher"
  )
  races <- c(
    "White alone, not Hispanic or Latino",
    "Hispanic or Latino",
    "Black or African American alone",
    "American Indian and Alaska Native alone",
    "Asian alone",
    "Native Hawaiian and Other Pacific Islander alone",
    "Some other race alone",
    "Two or more races"
  )

  ages_regex <- as.character(glue("({str_c(ages, collapse = '|')})"))
  edu_regex <- as.character(glue("({str_c(education, collapse = '|')})"))
  races_regex <- as.character(glue("({str_c(races, collapse = '|')})"))

  vars_raw <- load_variables(
    year = year,
    dataset = survey,
    cache = TRUE
  )

  vars <- vars_raw |>
    mutate(variable = name) |>
    separate(name, sep = "_", into = c("table", "num")) |>
    select(variable, table, concept, num, label, everything()) |>
    filter(str_detect(label, "Total")) |>
    mutate(label = str_remove(label, "Estimate!!Total")) |>
    mutate(
      gender = str_extract(label, "(Male|Female)"),
      age = str_extract(label, ages_regex),
      educ = str_extract(label, edu_regex),
      race = coalesce(
        str_extract(label, regex(races_regex, ignore_case = TRUE)),
        str_extract(concept, regex(races_regex, ignore_case = TRUE))
      )
    )
 }

 year <- 2023
 survey <- "acs5"

 acscodes_df <- make_acscodes_df(year = year, survey = survey)

 acscodes_age_sex_educ <- acscodes_df |>
  filter(!is.na(gender), !is.na(age), !is.na(educ)) |>
  filter(str_detect(table, "^B")) |>
  pull(variable)

 acscodes_age_sex_race <- acscodes_df |>
  filter(str_detect(table, "B01001[B-I]")) |>
  filter(!is.na(gender), !is.na(age), !is.na(race)) |>
  pull(variable)

 acscodes_sex_educ_race <- acscodes_df |>
  filter(!is.na(educ), !is.na(gender), !is.na(race)) |>
  pull(variable)

 get_acs_helper <- function(
  geography,
  variables,
  variable_labels,
  year,
  survey
 ) {
  acs_data <- get_acs(
    geography = geography,
    variables = variables,
    year = year,
    survey = survey,
    geometry = FALSE,
  ) |>
    left_join(variable_labels, by = join_by(variable))
 }

 geography <- "state"

 # various geographies work here
 age_sex_educ_df <- get_acs_helper(
  geography = geography,
  variables = acscodes_age_sex_educ,
  variable_labels = acscodes_df,
  year = year,
  survey = survey
 )
 #> Getting data from the 2019-2023 5-year ACS

 age_sex_race_df <- get_acs_helper(
  geography = geography,
  variables = acscodes_age_sex_race,
  variable_labels = acscodes_df,
  year = year,
  survey = survey
 )
 #> Getting data from the 2019-2023 5-year ACS

 sex_educ_race_df <- get_acs_helper(
  geography = geography,
  variables = acscodes_sex_educ_race,
  variable_labels = acscodes_df,
  year = year,
  survey = survey
 )
 #> Getting data from the 2019-2023 5-year ACS

 age_sex_educ_df
 #> # A tibble: 3,640 × 14
 #>    GEOID NAME    variable   estimate   moe table  concept  num   label geography
 #>    <chr> <chr>   <chr>         <dbl> <dbl> <chr>  <chr>    <chr> <chr> <chr>    
 #>  1 01    Alabama B15001_004     3584   588 B15001 Sex by … 004   :!!M… tract    
 #>  2 01    Alabama B15001_005    28336  1407 B15001 Sex by … 005   :!!M… tract    
 #>  3 01    Alabama B15001_006    95200  2634 B15001 Sex by … 006   :!!M… tract    
 #>  4 01    Alabama B15001_007    82856  2289 B15001 Sex by … 007   :!!M… tract    
 #>  5 01    Alabama B15001_008    10896   854 B15001 Sex by … 008   :!!M… tract    
 #>  6 01    Alabama B15001_009    16384  1343 B15001 Sex by … 009   :!!M… tract    
 #>  7 01    Alabama B15001_010     1455   350 B15001 Sex by … 010   :!!M… tract    
 #>  8 01    Alabama B15001_012     8839   919 B15001 Sex by … 012   :!!M… tract    
 #>  9 01    Alabama B15001_013    24695  1231 B15001 Sex by … 013   :!!M… tract    
 #> 10 01    Alabama B15001_014   108139  2513 B15001 Sex by … 014   :!!M… tract    
 #> # ℹ 3,630 more rows
 #> # ℹ 4 more variables: gender <chr>, age <chr>, educ <chr>, race <chr>
 ```

 <sup>Created on 2026-02-04 with [reprex v2.1.1](https://reprex.tidyverse.org)</sup>
	``` r
	library(glue)
	library(stringr)
	library(tidycensus)
	library(tidyverse)

	# ripped from https://github.com/kuriwaki/ccesMRPprep/blob/main/data-raw/specify_ACS-codes.R
	# determines names of census variables to pull // gives them helpful names
	make_acscodes_df <- function(year = 2023, survey = "acs5") {
	ages <- c(
	"18 to 24 years",
	"25 to 34 years",
	"35 to 44 years",
	"45 to 64 years",
	"65 years and over",
	"18 and 19 years",
	"20 to 24 years",
	"25 to 29 years",
	"30 to 34 years",
	"35 to 44 years",
	"45 to 54 years",
	"55 to 64 years",
	"65 to 74 years",
	"75 to 84 years",
	"85 years and over"
	)

	education <- c(
	"Less than high school diploma",
	"Nursery to 4th grade",
	"No schooling completed",
	"Less than 9th grade",
	"5th and 6th grade",
	"7th and 8th grade",
	"9th grade",
	"10th grade",
	"11th grade",
	"12th grade, no diploma",
	"Less than 9th grade",
	"Less than high school graduate",
	"9th to 12th grade,? no diploma",
	"High school graduate \\(includes equivalency\\)",
	"High school graduate, GED, or alternative",
	"Some college",
	"Some college or associate's degree",
	"Associate's degree",
	"Bachelor's degree$",
	"Doctorate degree",
	"Master's degree",
	"Professional school degree",
	"Graduate or professional degree",
	"Bachelor's degree or higher"
	)
	races <- c(
	"White alone, not Hispanic or Latino",
	"Hispanic or Latino",
	"Black or African American alone",
	"American Indian and Alaska Native alone",
	"Asian alone",
	"Native Hawaiian and Other Pacific Islander alone",
	"Some other race alone",
	"Two or more races"
	)

	ages_regex <- as.character(glue("({str_c(ages, collapse = '\|')})"))
	edu_regex <- as.character(glue("({str_c(education, collapse = '\|')})"))
	races_regex <- as.character(glue("({str_c(races, collapse = '\|')})"))

	vars_raw <- load_variables(
	year = year,
	dataset = survey,
	cache = TRUE
	)

	vars <- vars_raw \|>
	mutate(variable = name) \|>
	separate(name, sep = "_", into = c("table", "num")) \|>
	select(variable, table, concept, num, label, everything()) \|>
	filter(str_detect(label, "Total")) \|>
	mutate(label = str_remove(label, "Estimate!!Total")) \|>
	mutate(
	gender = str_extract(label, "(Male\|Female)"),
	age = str_extract(label, ages_regex),
	educ = str_extract(label, edu_regex),
	race = coalesce(
	str_extract(label, regex(races_regex, ignore_case = TRUE)),
	str_extract(concept, regex(races_regex, ignore_case = TRUE))
	)
	)
	}

	year <- 2023
	survey <- "acs5"

	acscodes_df <- make_acscodes_df(year = year, survey = survey)

	acscodes_age_sex_educ <- acscodes_df \|>
	filter(!is.na(gender), !is.na(age), !is.na(educ)) \|>
	filter(str_detect(table, "^B")) \|>
	pull(variable)

	acscodes_age_sex_race <- acscodes_df \|>
	filter(str_detect(table, "B01001[B-I]")) \|>
	filter(!is.na(gender), !is.na(age), !is.na(race)) \|>
	pull(variable)

	acscodes_sex_educ_race <- acscodes_df \|>
	filter(!is.na(educ), !is.na(gender), !is.na(race)) \|>
	pull(variable)

	get_acs_helper <- function(
	geography,
	variables,
	variable_labels,
	year,
	survey
	) {
	acs_data <- get_acs(
	geography = geography,
	variables = variables,
	year = year,
	survey = survey,
	geometry = FALSE,
	) \|>
	left_join(variable_labels, by = join_by(variable))
	}

	geography <- "state"

	# various geographies work here
	age_sex_educ_df <- get_acs_helper(
	geography = geography,
	variables = acscodes_age_sex_educ,
	variable_labels = acscodes_df,
	year = year,
	survey = survey
	)
	#> Getting data from the 2019-2023 5-year ACS

	age_sex_race_df <- get_acs_helper(
	geography = geography,
	variables = acscodes_age_sex_race,
	variable_labels = acscodes_df,
	year = year,
	survey = survey
	)
	#> Getting data from the 2019-2023 5-year ACS

	sex_educ_race_df <- get_acs_helper(
	geography = geography,
	variables = acscodes_sex_educ_race,
	variable_labels = acscodes_df,
	year = year,
	survey = survey
	)
	#> Getting data from the 2019-2023 5-year ACS

	age_sex_educ_df
	#> # A tibble: 3,640 × 14
	#> GEOID NAME variable estimate moe table concept num label geography
	#> <chr> <chr> <chr> <dbl> <dbl> <chr> <chr> <chr> <chr> <chr>
	#> 1 01 Alabama B15001_004 3584 588 B15001 Sex by … 004 :!!M… tract
	#> 2 01 Alabama B15001_005 28336 1407 B15001 Sex by … 005 :!!M… tract
	#> 3 01 Alabama B15001_006 95200 2634 B15001 Sex by … 006 :!!M… tract
	#> 4 01 Alabama B15001_007 82856 2289 B15001 Sex by … 007 :!!M… tract
	#> 5 01 Alabama B15001_008 10896 854 B15001 Sex by … 008 :!!M… tract
	#> 6 01 Alabama B15001_009 16384 1343 B15001 Sex by … 009 :!!M… tract
	#> 7 01 Alabama B15001_010 1455 350 B15001 Sex by … 010 :!!M… tract
	#> 8 01 Alabama B15001_012 8839 919 B15001 Sex by … 012 :!!M… tract
	#> 9 01 Alabama B15001_013 24695 1231 B15001 Sex by … 013 :!!M… tract
	#> 10 01 Alabama B15001_014 108139 2513 B15001 Sex by … 014 :!!M… tract
	#> # ℹ 3,630 more rows
	#> # ℹ 4 more variables: gender <chr>, age <chr>, educ <chr>, race <chr>
	```

	<sup>Created on 2026-02-04 with [reprex v2.1.1](https://reprex.tidyverse.org)</sup>
No results found