Skip to content

Instantly share code, notes, and snippets.

@Basten7
Created September 10, 2025 14:58
Show Gist options
  • Select an option

  • Save Basten7/926a38f0fc1bf21db5011fef7b32bb03 to your computer and use it in GitHub Desktop.

Select an option

Save Basten7/926a38f0fc1bf21db5011fef7b32bb03 to your computer and use it in GitHub Desktop.

llama.cpp Metal mgpu overlay (env shim + optional hooks)

This overlay adds a tiny Objective‑C helper that:

  1. Lets you specify Metal device(s) via GGML_METAL_DEVICES="3,4,5"

    • If GGML_METAL_DEVICE_INDEX is not set, it will be derived from the first index in GGML_METAL_DEVICES
    • Example log:
      [metal-env-shim] derived GGML_METAL_DEVICE_INDEX='3' from GGML_METAL_DEVICES='3,4'
  2. Provides weak optional hooks:

    • void ggml_mgpu_helper_on_init(id<MTLDevice> dev);
    • void ggml_mgpu_helper_on_layer(int layer_index); These do nothing unless you link your own .m file that defines them. Upstream ggml-metal.m remains untouched.

This keeps the overlay small and robust across upstream changes.

Bootstrap (apply to your fork or a local clone)

# Clone your fork (or upstream)
git clone https://github.com/<you>/llama.cpp.git
cd llama.cpp

# (Optional) checkout a known build (example matches your logs)
# git checkout d35a1e8c

# Add this overlay
bash ./scripts/bootstrap-overlay.sh  # copies helper + applies the CMake patch

# Build
cmake -B build -DLLAMA_CURL=1 -DGGML_METAL=ON -DGGML_METAL_MGPU=ON \
  -DGGML_BACKEND_ACCELERATE=ON -DGGML_K_QUANTS=ON \
  -DCMAKE_BUILD_TYPE=Release -DOpenMP_ROOT=$(brew --prefix)/opt/libomp
cmake --build build --config Release -j $(sysctl -n hw.ncpu)
#!/usr/bin/env bash
set -euo pipefail
# Run from the root of a llama.cpp checkout
if [ ! -f "CMakeLists.txt" ] || [ ! -d "ggml/src/ggml-metal" ]; then
echo "Run this from the root of your llama.cpp repo."
exit 1
fi
# Ensure ext/ exists in repo
mkdir -p ext patches scripts
# Write the helper if it isn't present
if [ ! -f "ext/ggml-metal-mgpu-ctor-ext.m" ]; then
cat > ext/ggml-metal-mgpu-ctor-ext.m <<'EOF'
#import <Metal/Metal.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
void ggml_mgpu_helper_on_init(id<MTLDevice> dev) __attribute__((weak));
void ggml_mgpu_helper_on_layer(int layer_index) __attribute__((weak));
__attribute__((constructor))
static void ggml_metal_env_ctor(void) {
const char *list = getenv("GGML_METAL_DEVICES");
if (!list || !*list) return;
const char *idx = getenv("GGML_METAL_DEVICE_INDEX");
if (idx && *idx) {
fprintf(stderr, "[metal-env-shim] GGML_METAL_DEVICE_INDEX already set to '%s' – leaving as-is.\n", idx);
return;
}
const char *p = list;
while (*p == ' ' || *p == '\t') ++p;
char *endp = NULL;
long v = strtol(p, &endp, 10);
if (endp != p && v >= 0) {
char buf[32];
snprintf(buf, sizeof(buf), "%ld", v);
setenv("GGML_METAL_DEVICE_INDEX", buf, 0);
fprintf(stderr, "[metal-env-shim] derived GGML_METAL_DEVICE_INDEX='%s' from GGML_METAL_DEVICES='%s'\n", buf, list);
} else {
fprintf(stderr, "[metal-env-shim] GGML_METAL_DEVICES present but no leading index could be parsed: '%s'\n", list);
}
}
void ggml_mgpu_helper_on_init(id<MTLDevice> dev) { (void)dev; }
void ggml_mgpu_helper_on_layer(int layer_index) { (void)layer_index; }
EOF
fi
# Write the CMake patch if it isn't present
if [ ! -f "patches/0001-ggml-metal-add-ctor-env-shim-and-hooks-cmake.patch" ]; then
cat > patches/0001-ggml-metal-add-ctor-env-shim-and-hooks-cmake.patch <<'EOF'
diff --git a/ggml/src/ggml-metal/CMakeLists.txt b/ggml/src/ggml-metal/CMakeLists.txt
index 1111111..2222222 100644
--- a/ggml/src/ggml-metal/CMakeLists.txt
+++ b/ggml/src/ggml-metal/CMakeLists.txt
@@ -1,5 +1,10 @@
# ggml-metal CMake
# (context differs between revisions; we only append our file)
+
+# --- mgpu env shim + weak hooks
+if (APPLE)
+ target_sources(ggml-metal PRIVATE ${CMAKE_CURRENT_LIST_DIR}/../../../../ext/ggml-metal-mgpu-ctor-ext.m)
+endif()
EOF
fi
# Try to apply the patch; if it fails, append manually
if patch -p1 --dry-run < patches/0001-ggml-metal-add-ctor-env-shim-and-hooks-cmake.patch >/dev/null 2>&1; then
patch -p1 < patches/0001-ggml-metal-add-ctor-env-shim-and-hooks-cmake.patch
else
# Fall back: append target_sources line once
cmake_list="ggml/src/ggml-metal/CMakeLists.txt"
line=" target_sources(ggml-metal PRIVATE \${CMAKE_CURRENT_LIST_DIR}/../../../../ext/ggml-metal-mgpu-ctor-ext.m)"
if ! grep -Fq "ggml-metal-mgpu-ctor-ext.m" "$cmake_list"; then
printf "\n# mgpu env shim + weak hooks\nif (APPLE)\n%s\nendif()\n" "$line" >> "$cmake_list"
fi
fi
echo "Overlay applied. Proceed to build with CMake."
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment