Skip to content

Instantly share code, notes, and snippets.

@damonbayer
Last active August 18, 2025 21:47
Show Gist options
  • Select an option

  • Save damonbayer/23ed3c4b2213d7f820a3204e45cc2b6c to your computer and use it in GitHub Desktop.

Select an option

Save damonbayer/23ed3c4b2213d7f820a3204e45cc2b6c to your computer and use it in GitHub Desktop.
library(tidyverse)
library(arrow)
library(fs)
library(withr)
library(here)
flu_hub_path <- here("FluSight-forecast-hub")
flu_ts_original <- read_csv(
"https://raw.githubusercontent.com/cdcepi/FluSight-forecast-hub/refs/heads/main/target-data/time-series.csv"
)
create_tha_versions_script <- r"(
FILE="target-data/target-hospital-admissions.csv"
git clone --filter=blob:none --no-checkout https://github.com/cdcepi/FluSight-forecast-hub.git
cd FluSight-forecast-hub
git sparse-checkout init --cone
git sparse-checkout set "$FILE"
git checkout main
DEST_DIR="tha_versions"
mkdir -p "$DEST_DIR"
git log --format="%H" -- "$FILE" | while read commit; do
git show "$commit:$FILE" > "$DEST_DIR/$commit.csv"
done
)"
with_dir(here(), system(create_tha_versions_script))
with_dir(
flu_hub_path,
flu_git_history <- system(
'git log --follow --pretty=format:"%H, %ad, %an, %s" --date=short -- target-data/target-hospital-admissions.csv',
intern = TRUE
) |>
I() |>
read_csv(col_names = c("hash", "date", "author", "message")) |>
rename_with(\(x) str_c("commit_", x))
)
flu_ts_raw <- path(flu_hub_path, "tha_versions") |>
dir_ls() |>
map(\(x) {
read_csv(
x,
col_types = cols(
date = col_date(),
location = col_character(),
location_name = col_character(),
value = col_double(),
weekly_rate = col_double()
),
id = "file_name",
col_select = -any_of("...1")
)
}) |>
bind_rows() |>
mutate(
commit_hash = file_name |>
path_file() |>
path_ext_remove()
) |>
select(-file_name) |>
left_join(flu_git_history, by = "commit_hash") |>
rename(as_of = commit_date) |>
select(-starts_with("commit_")) |>
arrange(as_of) |>
nest(.by = as_of) |>
distinct(data, .keep_all = TRUE) |>
unnest(data)
flu_ts <- flu_ts_raw |>
rename(observation = value, target_end_date = date) |>
mutate(target = "wk inc flu hosp") |>
select(all_of(colnames(flu_ts_original)))
write_parquet(flu_ts, here("time-series.parquet"))
dir_delete(flu_hub_path)
This file has been truncated, but you can view the full file.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment