Skip to content

Instantly share code, notes, and snippets.

@mschauer
Last active December 19, 2025 13:01
Show Gist options
  • Select an option

  • Save mschauer/33798d5fd103d7cd45376cb828df0e2b to your computer and use it in GitHub Desktop.

Select an option

Save mschauer/33798d5fd103d7cd45376cb828df0e2b to your computer and use it in GitHub Desktop.
Simple Gaussian DAG hill-climber (score-based) in Julia
using Random
rng = MersenneTwister(0)
n = 2000
# True SEM: X -> Y -> Z
true_adj = Bool[
0 1 0
0 0 1
0 0 0]
epsX = randn(rng, n)
epsY = randn(rng, n)
epsZ = randn(rng, n)
X = epsX
Y = 1.2 .* X .+ epsY
Z = -0.8 .* Y .+ epsZ
data = hcat(X, Y, Z)
adj, score, hist = hillclimb_gaussian_dag(data; use_bic=true, verbose=true)
println("\nFinal adjacency (i->j if true):")
display(Matrix(adj))
println("Final score: ", score)
# Simple Gaussian DAG hill-climber (score-based) in idiomatic Julia.
#
# - Linear-Gaussian SEM local scores via OLS regressions
# - Score = sum local log-likelihoods (MLE σ² = SSE/n) minus optional BIC penalty
# - Greedy hill-climb over single-edge add/delete/reverse moves that preserve acyclicity
#
# This is for experimentation / teaching, not speed.
using LinearAlgebra
using Statistics
# ---------- Utilities ----------
"Return true iff adjacency matrix adj (p×p, adj[i,j]=true means i→j) is acyclic."
function is_acyclic(adj::AbstractMatrix{Bool})::Bool
p = size(adj, 1)
indeg = [count(@view adj[:, j]) for j in 1:p] # in-degree
stack = Int[]
for v in 1:p
indeg[v] == 0 && push!(stack, v)
end
seen = 0
indeg2 = copy(indeg)
while !isempty(stack)
v = pop!(stack)
seen += 1
@inbounds for w in 1:p
if adj[v, w]
indeg2[w] -= 1
indeg2[w] == 0 && push!(stack, w)
end
end
end
return seen == p
end
"Indices of parents of node j (1-based)."
parents(adj::AbstractMatrix{Bool}, j::Int) = findall(@view adj[:, j])
# ---------- Scoring ----------
"""
Local Gaussian log-likelihood for y ~ N(Xβ, σ² I), using OLS and MLE σ² = SSE/n.
If X has 0 columns, fits mean-only model (intercept-only) when include_intercept=true.
Returns (loglik, sse, sigma2).
"""
function local_gaussian_loglik(X::AbstractMatrix{<:Real},
y::AbstractVector{<:Real};
include_intercept::Bool = true)
n = length(y)
yy = Vector{Float64}(y)
if size(X, 2) == 0
if include_intercept
μ = mean(yy)
r = yy .- μ
sse = dot(r, r)
kβ = 1
else
# no regressors and no intercept: mean fixed at 0
sse = dot(yy, yy)
kβ = 0
end
else
Z = Matrix{Float64}(X)
if include_intercept
Z = hcat(ones(n), Z)
kβ = size(Z, 2)
else
kβ = size(Z, 2)
end
β = Z \ yy
r = yy - Z * β
sse = dot(r, r)
end
σ2 = max(sse / n, 1e-12)
ll = -0.5 * n * (log(2π) + log(σ2) + 1.0)
return ll, sse, σ2, kβ
end
"""
Score a DAG for data (n×p) with adjacency adj (p×p Bool).
Score = sum_j log p(x_j | parents) - 0.5*k*log(n) (if use_bic=true)
where k counts regression coefficients + variance parameters across nodes.
"""
function score_dag_gaussian(data::AbstractMatrix{<:Real},
adj::AbstractMatrix{Bool};
use_bic::Bool = true,
include_intercept::Bool = true)
X = Matrix{Float64}(data)
n, p = size(X)
size(adj, 1) == p && size(adj, 2) == p || error("adj must be p×p")
total_ll = 0.0
total_k = 0
for j in 1:p
pa = parents(adj, j)
y = @view X[:, j]
if isempty(pa)
ll, _, _, kβ = local_gaussian_loglik(zeros(n, 0), y; include_intercept=include_intercept)
total_ll += ll
total_k += kβ + 1 # betas + variance
else
Z = @view X[:, pa]
ll, _, _, kβ = local_gaussian_loglik(Z, y; include_intercept=include_intercept)
total_ll += ll
total_k += kβ + 1 # betas + variance
end
end
return use_bic ? (total_ll - 0.5 * total_k * log(n)) : total_ll
end
# ---------- Neighborhood moves ----------
"""
Generate all single-edge moves (add/delete/reverse) that keep acyclicity.
Returns a vector of (new_adj, move_description).
"""
function neighbors_hillclimb(adj::AbstractMatrix{Bool})
p = size(adj, 1)
moves = Vector{Tuple{BitMatrix, String}}()
for i in 1:p, j in 1:p
i == j && continue
if !adj[i, j] && !adj[j, i]
# add i -> j
new = BitMatrix(adj)
new[i, j] = true
is_acyclic(new) && push!(moves, (new, "add $i->$j"))
end
if adj[i, j]
# delete i -> j
new = BitMatrix(adj)
new[i, j] = false
push!(moves, (new, "del $i->$j"))
# reverse i -> j to j -> i
if !adj[j, i]
new2 = BitMatrix(adj)
new2[i, j] = false
new2[j, i] = true
is_acyclic(new2) && push!(moves, (new2, "rev $i->$j to $j->$i"))
end
end
end
return moves
end
# ---------- Hill-climber ----------
"""
Greedy hill-climb over DAGs with Gaussian score.
Returns (adj, score, history), where history is a vector of (score, move).
"""
function hillclimb_gaussian_dag(data::AbstractMatrix{<:Real};
start_adj::Union{Nothing, AbstractMatrix{Bool}} = nothing,
use_bic::Bool = true,
include_intercept::Bool = true,
max_steps::Int = 10_000,
tol::Float64 = 1e-10,
verbose::Bool = false)
n, p = size(data)
adj = start_adj === nothing ? falses(p, p) : BitMatrix(start_adj)
is_acyclic(adj) || error("start_adj must be acyclic")
cur_score = score_dag_gaussian(data, adj; use_bic=use_bic, include_intercept=include_intercept)
history = Vector{Tuple{Float64, String}}()
push!(history, (cur_score, "start"))
steps = 0
while steps < max_steps
best_score = cur_score
best_adj = nothing
best_move = ""
for (cand_adj, move) in neighbors_hillclimb(adj)
s = score_dag_gaussian(data, cand_adj; use_bic=use_bic, include_intercept=include_intercept)
if s > best_score + tol
best_score = s
best_adj = cand_adj
best_move = move
end
end
best_adj === nothing && break
adj = best_adj
cur_score = best_score
steps += 1
push!(history, (cur_score, best_move))
verbose && println("[$steps] $best_move -> score $(round(cur_score, digits=6))")
end
return adj, cur_score, history
end
@mschauer
Copy link
Author

Output. Compare with true_adj.

[1] add 2->3 -> score -9369.288832
[2] add 1->2 -> score -8473.519351

Final adjacency (i->j if true):
3×3 Matrix{Bool}:
 0  1  0
 0  0  1
 0  0  0
Final score: -8473.519350540599

Compare with true adjacency:




Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment