Skip to content

Instantly share code, notes, and snippets.

@ctesta01
Created February 10, 2026 17:42
Show Gist options
  • Select an option

  • Save ctesta01/568293f8391117b0d2e525bc7449a5e6 to your computer and use it in GitHub Desktop.

Select an option

Save ctesta01/568293f8391117b0d2e525bc7449a5e6 to your computer and use it in GitHub Desktop.
Bringing in data dictionary labels to many dataframes
# Read In Data Tables ----------------------------------------------------
raw_transfusion_data <- list(
dti_untx = readr::read_csv(here(data_dir, "DTI_UnTx_V2.1.csv")),
dti_tx = readr::read_csv(here(data_dir, "Transfused_DTI_50_V2.csv")),
o2_vitals_untx = readr::read_csv(here(data_dir, "Vitals_O2_UnTx_V2.1.csv")),
o2_vitals_tx = readr::read_csv(here(data_dir, "Transfused_02_Vitals_V2.csv")),
blood_type_untx = readr::read_csv(here(data_dir, "ABO_UnTx_V2.csv")),
blood_type_tx = readr::read_csv(here(data_dir, "Transfused_ABO_V2.1.csv")),
encounter_untx = readr::read_csv(here(data_dir, "Encounter_UnTx_V2.csv")),
encounter_tx = readr::read_csv(here(data_dir, "Transfused_Encounter_V2.csv"))
)
# Read in Data Dictionary -------------------------------------------------
data_dictionary <- list(
dti = readxl::read_excel(here(data_dir, "Data Dictionary.xlsx"), sheet = 'DTI Table'),
o2_vitals = readxl::read_excel(here(data_dir, "Data Dictionary.xlsx"), sheet = 'Vitals_O2 Table'),
encounter = readxl::read_excel(here(data_dir, "Data Dictionary.xlsx"), sheet = 'Encounter Table'),
blood_type = readxl::read_excel(here(data_dir, "Data Dictionary.xlsx"), sheet = 'ABO_Table'),
icd = readxl::read_excel(here(data_dir, "Data Dictionary.xlsx"), sheet = 'ICD Table'),
labs = readxl::read_excel(here(data_dir, "Data Dictionary.xlsx"), sheet = 'Labs Table'),
patient = readxl::read_excel(here(data_dir, "Data Dictionary.xlsx"), sheet = 'Patient Table')
)
# apply data dictionary to columns ----------------------------------------
# a helper function to make a named vector out of the data dictionary
# descriptions -- an entry for each column
make_datadict_label_mapping <- function(dict_df) {
dict_df |> select(`Column Name`, `Description`) |>
transmute(
name = as.character(`Column Name`),
label = as.character(Description)) |>
deframe()
}
# a function to apply the column labels using labelled::var_label
apply_labels <- function(df, label_map) {
common_colnames <- intersect(names(df), names(label_map))
labelled::var_label(df[common_colnames]) <- label_map[common_colnames]
return(df)
}
# create label maps
datadict_label_maps <- purrr::map(data_dictionary, make_datadict_label_mapping)
# apply the label maps using modify_at from purrr
raw_transfusion_data <- raw_transfusion_data |>
modify_at(
c("dti_untx", "dti_tx"),
~ apply_labels(.x, datadict_label_maps$dti)
) |>
modify_at(
c("o2_vitals_untx", "o2_vitals_tx"),
~ apply_labels(.x, datadict_label_maps$o2_vitals)
) |>
modify_at(
c("encounter_untx", "encounter_tx"),
~ apply_labels(.x, datadict_label_maps$encounter)
) |>
modify_at(
c("blood_type_untx", "blood_type_tx"),
~ apply_labels(.x, datadict_label_maps$blood_type)
)
@ctesta01
Copy link
Author

The end result is that all of the underlying tables have nice labels that appear when you View(raw_transfusion_data$dti_tx) in RStudio for example:

image

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment