Skip to content

Instantly share code, notes, and snippets.

@samuelkordik
Created August 12, 2025 08:01
Show Gist options
  • Select an option

  • Save samuelkordik/e9d096d8234c1b94060b271b5c531e32 to your computer and use it in GitHub Desktop.

Select an option

Save samuelkordik/e9d096d8234c1b94060b271b5c531e32 to your computer and use it in GitHub Desktop.
Better Histograms
#' A Better Histogram
#'
#' An opinionated histogram, using improved binwidth calculations and
#' embracing an opinionated sense of style.
#'
#' Calculates binwidth using Freedman-Diaconis method.
#'
#' @param .data dataframe
#' @param aes_x aesthestics x argument for `ggplot()`
#' @param label_args Arguments for `labs`
#' @param ... Additional arguments passed to `geom_histogram`
#'
#' @returns ggplot2 plot object
#' @export
#'
better_hist <- function(.data,
aes_x,
label_args=list(),
add_mean = TRUE,
add_sd = NULL,
add_density = FALSE,
lower_bound = NULL,
upper_bound = NULL,
...
) {
if (is.null(label_args$title))
label_args$title <- paste0(rlang::as_string(expr(.data)), " Histogram")
if (is.null(label_args$y))
label_args$y <- "count"
aes_x <- rlang::enquo(aes_x)
# Calculate summary stats
x_vals <- .data[[rlang::as_name(aes_x)]]
x_vals <- x_vals[!is.na(x_vals)]
mu <- mean(x_vals, na.rm=TRUE)
sigma <- sd(x_vals, na.rm=TRUE)
# Calculate binwidth
fd_binwidth <- 2 * IQR(x_vals, na.rm=TRUE) / length(x_vals)^(1/3)
# Get density values
density_data <- density(.data[[rlang::as_name(aes_x)]])
max_density <- max(density_data$y)
hist_info <- ggplot_build(
ggplot(.data, aes(x = !!aes_x)) +
geom_histogram(binwidth = fd_binwidth)
)$data[[1]]
max_count <- max(hist_info$count)
scale_factor <- 0.7*max_count/max_density
message(paste0("scale_factor = 0.7*",max_count,"/",round(max_density,4),"= ", round(scale_factor,4)))
# Set up colors
hist_fill <- dfr_colors$omd_navy
#hist_lines <- ifelse(add_density, dfr_colors$dark_navy, "white")
hist_lines <- "white"
stat_lines <- dfr_colors$aqua
# Get names
yaxis <- ifelse(is.null(label_args$y), "Count", label_args$y)
secaxis <- ifelse(is.null(label_args[["sec"]]), "Density", label_args[["sec"]])
# Set bounds
if (is.null(lower_bound)) lower_bound <- 0
if (is.null(upper_bound)) {
upper_bound <- quantile(x_vals, 0.999, na.rm=TRUE)
message("Setting upper bound at 0.9999 quantile.")
}
message(paste0("Limiting x axis to (", lower_bound, ", ", upper_bound, ")"))
# Annotations
locate_y_height <- function(x) {
hist_info[hist_info$xmin <= x & hist_info$xmax > x,]["y"]
}
annotations <- tibble::tribble(
~x, ~y, ~label,
round(min(x_vals),2), hist_info[1,]$ymax + 3, "Min:",
round(mu, 2), locate_y_height(mu) |>
dplyr::pull("y")+ 5, "Mean:",
round(max(x_vals), 2), hist_info[length(hist_info)-1,]$ymax + 3, "Max"
)
if (!is.null(add_sd)) {
for (sd_x in add_sd) {
xsd <- mu - sd_x * sigma
xsdz <- mu + sd_x * sigma
annotations |> add_row(
x = xsd,
y = locate_y_height(xsd) |> dplyr::pull(y) + 5,
label = paste0("-", sd_x, "๐œŽ")
) |> add_row(
x = xsdz,
y = locate_y_height(xsd) |> dplyr::pull(y) + 5,
label = paste0(sd_x, "๐œŽ")
) -> annotations
}
}
.data |>
ggplot2::ggplot(aes(x = !!aes_x)) +
ggplot2::geom_histogram(fill = hist_fill, color = hist_lines, binwidth = fd_binwidth,
...) +
hrbrthemes::theme_ipsum_rc(grid="Y") -> p
if (add_density) {
p <- p +
ggplot2::geom_density(aes(y = ..density.. * scale_factor),
fill = dfr_colors$yellow,
color = dfr_colors$omd_navy,
alpha = 0.5) +
hrbrthemes::scale_y_comma(name = yaxis,
sec.axis = sec_axis(~ . / scale_factor, name = secaxis),
expand = expansion(mult = c(0, 0.1)))
} else {
p <- p + hrbrthemes::scale_y_comma(expand = expansion(mult = c(0, 0.1)))
}
if (length(label_args) > 0) {
p <- p + do.call(ggplot2::labs, label_args)
}
if (add_mean) {
p <- p + ggplot2::geom_vline(xintercept = mu, color = stat_lines, size=1)
}
if (!is.null(add_sd)) {
for (sd_x in add_sd) {
p <- p +
ggplot2::geom_vline(xintercept = mu - sd_x * sigma,
color = stat_lines, size=.9, linetype = "dashed") +
ggplot2::geom_vline(xintercept = mu + sd_x * sigma,
color = stat_lines, size=.9, linetype = "dashed")
}
}
p <- p + ggplot2::xlim(lower_bound, upper_bound) +
ggplot2::geom_label(data = annotations,
aes(x = x, y = y, label = paste(label, x),
),
fontface = "bold")
p
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment