Last active
December 19, 2025 13:01
-
-
Save mschauer/33798d5fd103d7cd45376cb828df0e2b to your computer and use it in GitHub Desktop.
Simple Gaussian DAG hill-climber (score-based) in Julia
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| using Random | |
| rng = MersenneTwister(0) | |
| n = 2000 | |
| # True SEM: X -> Y -> Z | |
| true_adj = Bool[ | |
| 0 1 0 | |
| 0 0 1 | |
| 0 0 0] | |
| epsX = randn(rng, n) | |
| epsY = randn(rng, n) | |
| epsZ = randn(rng, n) | |
| X = epsX | |
| Y = 1.2 .* X .+ epsY | |
| Z = -0.8 .* Y .+ epsZ | |
| data = hcat(X, Y, Z) | |
| adj, score, hist = hillclimb_gaussian_dag(data; use_bic=true, verbose=true) | |
| println("\nFinal adjacency (i->j if true):") | |
| display(Matrix(adj)) | |
| println("Final score: ", score) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Simple Gaussian DAG hill-climber (score-based) in idiomatic Julia. | |
| # | |
| # - Linear-Gaussian SEM local scores via OLS regressions | |
| # - Score = sum local log-likelihoods (MLE σ² = SSE/n) minus optional BIC penalty | |
| # - Greedy hill-climb over single-edge add/delete/reverse moves that preserve acyclicity | |
| # | |
| # This is for experimentation / teaching, not speed. | |
| using LinearAlgebra | |
| using Statistics | |
| # ---------- Utilities ---------- | |
| "Return true iff adjacency matrix adj (p×p, adj[i,j]=true means i→j) is acyclic." | |
| function is_acyclic(adj::AbstractMatrix{Bool})::Bool | |
| p = size(adj, 1) | |
| indeg = [count(@view adj[:, j]) for j in 1:p] # in-degree | |
| stack = Int[] | |
| for v in 1:p | |
| indeg[v] == 0 && push!(stack, v) | |
| end | |
| seen = 0 | |
| indeg2 = copy(indeg) | |
| while !isempty(stack) | |
| v = pop!(stack) | |
| seen += 1 | |
| @inbounds for w in 1:p | |
| if adj[v, w] | |
| indeg2[w] -= 1 | |
| indeg2[w] == 0 && push!(stack, w) | |
| end | |
| end | |
| end | |
| return seen == p | |
| end | |
| "Indices of parents of node j (1-based)." | |
| parents(adj::AbstractMatrix{Bool}, j::Int) = findall(@view adj[:, j]) | |
| # ---------- Scoring ---------- | |
| """ | |
| Local Gaussian log-likelihood for y ~ N(Xβ, σ² I), using OLS and MLE σ² = SSE/n. | |
| If X has 0 columns, fits mean-only model (intercept-only) when include_intercept=true. | |
| Returns (loglik, sse, sigma2). | |
| """ | |
| function local_gaussian_loglik(X::AbstractMatrix{<:Real}, | |
| y::AbstractVector{<:Real}; | |
| include_intercept::Bool = true) | |
| n = length(y) | |
| yy = Vector{Float64}(y) | |
| if size(X, 2) == 0 | |
| if include_intercept | |
| μ = mean(yy) | |
| r = yy .- μ | |
| sse = dot(r, r) | |
| kβ = 1 | |
| else | |
| # no regressors and no intercept: mean fixed at 0 | |
| sse = dot(yy, yy) | |
| kβ = 0 | |
| end | |
| else | |
| Z = Matrix{Float64}(X) | |
| if include_intercept | |
| Z = hcat(ones(n), Z) | |
| kβ = size(Z, 2) | |
| else | |
| kβ = size(Z, 2) | |
| end | |
| β = Z \ yy | |
| r = yy - Z * β | |
| sse = dot(r, r) | |
| end | |
| σ2 = max(sse / n, 1e-12) | |
| ll = -0.5 * n * (log(2π) + log(σ2) + 1.0) | |
| return ll, sse, σ2, kβ | |
| end | |
| """ | |
| Score a DAG for data (n×p) with adjacency adj (p×p Bool). | |
| Score = sum_j log p(x_j | parents) - 0.5*k*log(n) (if use_bic=true) | |
| where k counts regression coefficients + variance parameters across nodes. | |
| """ | |
| function score_dag_gaussian(data::AbstractMatrix{<:Real}, | |
| adj::AbstractMatrix{Bool}; | |
| use_bic::Bool = true, | |
| include_intercept::Bool = true) | |
| X = Matrix{Float64}(data) | |
| n, p = size(X) | |
| size(adj, 1) == p && size(adj, 2) == p || error("adj must be p×p") | |
| total_ll = 0.0 | |
| total_k = 0 | |
| for j in 1:p | |
| pa = parents(adj, j) | |
| y = @view X[:, j] | |
| if isempty(pa) | |
| ll, _, _, kβ = local_gaussian_loglik(zeros(n, 0), y; include_intercept=include_intercept) | |
| total_ll += ll | |
| total_k += kβ + 1 # betas + variance | |
| else | |
| Z = @view X[:, pa] | |
| ll, _, _, kβ = local_gaussian_loglik(Z, y; include_intercept=include_intercept) | |
| total_ll += ll | |
| total_k += kβ + 1 # betas + variance | |
| end | |
| end | |
| return use_bic ? (total_ll - 0.5 * total_k * log(n)) : total_ll | |
| end | |
| # ---------- Neighborhood moves ---------- | |
| """ | |
| Generate all single-edge moves (add/delete/reverse) that keep acyclicity. | |
| Returns a vector of (new_adj, move_description). | |
| """ | |
| function neighbors_hillclimb(adj::AbstractMatrix{Bool}) | |
| p = size(adj, 1) | |
| moves = Vector{Tuple{BitMatrix, String}}() | |
| for i in 1:p, j in 1:p | |
| i == j && continue | |
| if !adj[i, j] && !adj[j, i] | |
| # add i -> j | |
| new = BitMatrix(adj) | |
| new[i, j] = true | |
| is_acyclic(new) && push!(moves, (new, "add $i->$j")) | |
| end | |
| if adj[i, j] | |
| # delete i -> j | |
| new = BitMatrix(adj) | |
| new[i, j] = false | |
| push!(moves, (new, "del $i->$j")) | |
| # reverse i -> j to j -> i | |
| if !adj[j, i] | |
| new2 = BitMatrix(adj) | |
| new2[i, j] = false | |
| new2[j, i] = true | |
| is_acyclic(new2) && push!(moves, (new2, "rev $i->$j to $j->$i")) | |
| end | |
| end | |
| end | |
| return moves | |
| end | |
| # ---------- Hill-climber ---------- | |
| """ | |
| Greedy hill-climb over DAGs with Gaussian score. | |
| Returns (adj, score, history), where history is a vector of (score, move). | |
| """ | |
| function hillclimb_gaussian_dag(data::AbstractMatrix{<:Real}; | |
| start_adj::Union{Nothing, AbstractMatrix{Bool}} = nothing, | |
| use_bic::Bool = true, | |
| include_intercept::Bool = true, | |
| max_steps::Int = 10_000, | |
| tol::Float64 = 1e-10, | |
| verbose::Bool = false) | |
| n, p = size(data) | |
| adj = start_adj === nothing ? falses(p, p) : BitMatrix(start_adj) | |
| is_acyclic(adj) || error("start_adj must be acyclic") | |
| cur_score = score_dag_gaussian(data, adj; use_bic=use_bic, include_intercept=include_intercept) | |
| history = Vector{Tuple{Float64, String}}() | |
| push!(history, (cur_score, "start")) | |
| steps = 0 | |
| while steps < max_steps | |
| best_score = cur_score | |
| best_adj = nothing | |
| best_move = "" | |
| for (cand_adj, move) in neighbors_hillclimb(adj) | |
| s = score_dag_gaussian(data, cand_adj; use_bic=use_bic, include_intercept=include_intercept) | |
| if s > best_score + tol | |
| best_score = s | |
| best_adj = cand_adj | |
| best_move = move | |
| end | |
| end | |
| best_adj === nothing && break | |
| adj = best_adj | |
| cur_score = best_score | |
| steps += 1 | |
| push!(history, (cur_score, best_move)) | |
| verbose && println("[$steps] $best_move -> score $(round(cur_score, digits=6))") | |
| end | |
| return adj, cur_score, history | |
| end |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Output. Compare with
true_adj.Compare with true adjacency: