Skip to content

Instantly share code, notes, and snippets.

@bcdonadio
Created October 29, 2025 20:52
Show Gist options
  • Select an option

  • Save bcdonadio/ccbbeb5f0fa13788cbd797a4f6f0de36 to your computer and use it in GitHub Desktop.

Select an option

Save bcdonadio/ccbbeb5f0fa13788cbd797a4f6f0de36 to your computer and use it in GitHub Desktop.
#!/usr/bin/env bash
set -euo pipefail
renice -n 19 $$ >/dev/null || true
export TMPDIR="${TMPDIR:-$(pwd)/tmp}"
mkdir -p "${TMPDIR}" || true
# ============================================================
# Native build of PyTorch + vLLM + FlashInfer + LMCache
# Target GPU: GH100 NVL (Grace Hopper, SM 9.0)
# System: Python 3.13, CUDA 13.0
# ============================================================
PYTHON_VERSION=3.13
CUDA_FULL_VERSION="13.0"
CUDA_VERSION="13"
VENV_PATH=".venv"
# ---- Source versions ----------------------------------------
PYTORCH_TAG="v2.9.0"
VLLM_VERSION="0.11.0"
FLASHINFER_VERSION="0.4.1"
LMCACHE_VERSION="0.3.8"
PYTORCH_VERSION="${PYTORCH_TAG#v}"
# ---- Build state (initial state before detection) -----------
CUDA_AVAILABLE=0
VLLM_INSTALLED=0
FLASHINFER_INSTALLED=0
export VLLM_INSTALLED FLASHINFER_INSTALLED
NCCL_AVAILABLE=0
CUDNN_AVAILABLE=0
CUDSS_AVAILABLE=0
CUSPARSELT_AVAILABLE=0
export NCCL_AVAILABLE CUDNN_AVAILABLE CUDSS_AVAILABLE CUSPARSELT_AVAILABLE
# ---- Compiler / CUDA config ---------------------------------
export CC=gcc
export CXX=g++
export MAX_JOBS="40"
export TORCH_CUDA_ARCH_LIST="9.0" # GH100 (Grace Hopper)
usage() {
local exit_code="${1:-0}"
cat <<EOF_USAGE
Usage: $(basename "$0") [--clean]
Options:
--clean Remove existing virtual environment and build artifacts before rebuilding.
-h, --help Show this help message.
EOF_USAGE
exit "${exit_code}"
}
log_step() { echo "[+] $*"; }
log_skip() { echo "[=] $*"; }
log_info() { echo "[i] $*"; }
detect_cuda_home() {
local requested="${CUDA_HOME:-}"
if [[ -n "${requested}" && -d "${requested}" ]]; then
echo "${requested}"
return
fi
local best=""
local candidate_list
candidate_list=$(ls -d /usr/local/cuda-${CUDA_VERSION} /usr/local/cuda /usr/local/cuda-* 2>/dev/null | sort -Vr || true)
if [[ -n "${candidate_list}" ]]; then
while IFS= read -r candidate; do
[[ -z "${candidate}" ]] && continue
if [[ -d "${candidate}" ]]; then
best="${candidate}"
break
fi
done <<<"${candidate_list}"
fi
if [[ -n "${best}" ]]; then
echo "${best}"
fi
}
setup_cuda_env() {
local detected
detected=$(detect_cuda_home || true)
if [[ -n "${detected}" ]]; then
export CUDA_HOME="${detected}"
export PATH="${CUDA_HOME}/bin:${PATH}"
export LD_LIBRARY_PATH="${CUDA_HOME}/lib64:${LD_LIBRARY_PATH:-}"
log_step "Using CUDA toolkit at ${CUDA_HOME}"
local nvcc_path
nvcc_path=$(command -v nvcc || true)
if [[ -z "${nvcc_path}" && -x "${CUDA_HOME}/bin/nvcc" ]]; then
nvcc_path="${CUDA_HOME}/bin/nvcc"
fi
if [[ -n "${nvcc_path}" ]]; then
CUDA_AVAILABLE=1
else
CUDA_AVAILABLE=0
log_info "nvcc not found; CUDA-dependent builds will be skipped"
fi
else
log_info "CUDA toolkit not found; proceeding with CPU-only build"
unset CUDA_HOME
CUDA_AVAILABLE=0
fi
}
prepend_unique_path() {
local var="$1"
local path="$2"
[[ -z "${path}" ]] && return
if [[ ! -d "${path}" ]]; then
return
fi
local current="${!var-}"
if [[ -z "${current:-}" ]]; then
printf -v "${var}" "%s" "${path}"
elif [[ ":${current}:" != *":${path}:"* ]]; then
printf -v "${var}" "%s" "${path}:${current}"
else
printf -v "${var}" "%s" "${current}"
fi
export "${var}"
}
locate_python_module_path() {
local module="$1"
local header="${2:-}"
python - "$module" "$header" <<'PY'
import importlib.util, os, sys
module, header = sys.argv[1], sys.argv[2]
spec = importlib.util.find_spec(module)
if not spec or not spec.submodule_search_locations:
raise SystemExit(0)
for path in spec.submodule_search_locations:
if not path:
continue
real = os.path.realpath(path)
if not os.path.isdir(real):
continue
if not header:
print(real)
break
include_dir = os.path.join(real, "include")
if os.path.isdir(include_dir) and os.path.exists(os.path.join(include_dir, header)):
print(real)
break
PY
}
symlink_unversioned_lib() {
local dir="$1"
local lib="$2"
[[ -z "${dir}" || -z "${lib}" ]] && return 1
local target="${dir}/${lib}"
if [[ -e "${target}" ]]; then
return 0
fi
local versioned
versioned="$(python - "$dir" "$lib" <<'PY'
import pathlib, sys
libdir = pathlib.Path(sys.argv[1])
base = sys.argv[2]
pattern = base + ".*"
for candidate in sorted(libdir.glob(pattern), reverse=True):
if candidate.is_file():
print(candidate.name)
break
PY
)"
if [[ -z "${versioned}" ]]; then
return 1
fi
ln -sf "${versioned}" "${target}"
}
setup_cuda_component() {
local friendly="$1"
local module="$2"
local header="$3"
local lib_names="${4:-}"
local prefix="$5"
if [[ "${CUDA_AVAILABLE}" -eq 0 ]]; then
log_skip "${friendly} setup skipped (CUDA unavailable)"
return 1
fi
local base_path
base_path="$(locate_python_module_path "${module}" "${header}" || true)"
if [[ -z "${base_path}" ]]; then
return 1
fi
local include_dir="${base_path}/include"
local lib_dir=""
for candidate in lib64 lib; do
if [[ -d "${base_path}/${candidate}" ]]; then
lib_dir="${base_path}/${candidate}"
break
fi
done
if [[ -z "${lib_dir}" ]]; then
return 1
fi
if [[ ! -d "${include_dir}" ]]; then
return 1
fi
local IFS=';'
local libs=()
read -r -a libs <<<"${lib_names:-}"
IFS=$' \t\n'
for lib in "${libs[@]}"; do
[[ -z "${lib}" ]] && continue
if ! symlink_unversioned_lib "${lib_dir}" "${lib}"; then
log_info "${friendly}: missing ${lib} in ${lib_dir}"
return 1
fi
done
prepend_unique_path "LD_LIBRARY_PATH" "${lib_dir}"
prepend_unique_path "CMAKE_LIBRARY_PATH" "${lib_dir}"
prepend_unique_path "CMAKE_INCLUDE_PATH" "${include_dir}"
export "${prefix}_ROOT=${base_path}"
export "${prefix}_INCLUDE_DIR=${include_dir}"
export "${prefix}_INCLUDE_DIRS=${include_dir}"
export "${prefix}_INCLUDE_PATH=${include_dir}"
export "${prefix}_LIB_DIR=${lib_dir}"
export "${prefix}_LIBRARY_DIR=${lib_dir}"
if [[ ${#libs[@]} -gt 0 ]]; then
local main_lib="${libs[0]}"
if [[ -n "${main_lib}" ]]; then
local lib_path="${lib_dir}/${main_lib}"
if [[ -e "${lib_path}" ]]; then
export "${prefix}_LIBRARY=${lib_path}"
export "${prefix}_LIBRARIES=${lib_path}"
export "${prefix}_LIBRARY_PATH=${lib_path}"
fi
fi
fi
log_step "Configured ${friendly} from Python package at ${base_path}"
return 0
}
setup_nccl_env() {
if setup_cuda_component "NCCL" "nvidia.nccl" "nccl.h" "libnccl.so" "NCCL"; then
NCCL_AVAILABLE=1
else
log_info "NCCL Python package not located; relying on system detection"
NCCL_AVAILABLE=0
fi
}
setup_cudnn_env() {
if setup_cuda_component "cuDNN" "nvidia.cudnn" "cudnn.h" "libcudnn.so" "CUDNN"; then
CUDNN_AVAILABLE=1
else
log_info "cuDNN package not located; ensure system cuDNN is available"
CUDNN_AVAILABLE=0
fi
}
setup_cudss_env() {
if setup_cuda_component "cuDSS" "nvidia.cu${CUDA_VERSION}" "cudss.h" "libcudss.so" "CUDSS"; then
CUDSS_AVAILABLE=1
else
log_info "cuDSS package not located; ensure system cuDSS is available"
CUDSS_AVAILABLE=0
fi
}
setup_cusparselt_env() {
if setup_cuda_component "cuSPARSELt" "nvidia.cusparselt" "cusparseLt.h" "libcusparseLt.so" "CUSPARSELT"; then
CUSPARSELT_AVAILABLE=1
else
log_info "cuSPARSELt package not located; ensure system cuSPARSELt is available"
CUSPARSELT_AVAILABLE=0
fi
}
python_include_dir() {
python - <<'PY'
import sysconfig
include = sysconfig.get_path("include") or sysconfig.get_config_var("INCLUDEPY")
if include:
print(include)
PY
}
python_library_path() {
python - <<'PY'
import os, sysconfig
libdir = sysconfig.get_config_var("LIBDIR")
libname = sysconfig.get_config_var("LDLIBRARY")
if not libdir or not libname:
raise SystemExit(1)
candidates = []
candidate = os.path.join(libdir, libname)
candidates.append(candidate)
suffix = sysconfig.get_config_var("SHLIB_SUFFIX") or ".so"
if not candidate.endswith(suffix):
candidates.append(candidate + suffix)
if candidate.endswith(".so") and not candidate.endswith(".so.1.0"):
candidates.append(candidate + ".1.0")
for path in candidates:
if os.path.exists(path):
print(path)
break
else:
raise SystemExit(1)
PY
}
ensure_python_headers() {
local include_dir="$1"
if [[ -f "${include_dir}/patchlevel.h" ]]; then
echo "${include_dir}"
return
fi
local full_version
full_version=$(python - <<'PY'
import platform
print(platform.python_version())
PY
)
local headers_root="build/python-headers-${full_version}"
local headers_abs="${PWD}/${headers_root}"
local target_include="${headers_abs}/Include"
if [[ ! -f "${target_include}/patchlevel.h" ]]; then
log_step "Preparing CPython headers ${full_version}" >&2
mkdir -p "${headers_abs}"
local tarball="build/Python-${full_version}.tgz"
if [[ ! -f "${tarball}" ]]; then
curl -LsSf "https://www.python.org/ftp/python/${full_version}/Python-${full_version}.tgz" -o "${tarball}"
fi
local src_dir="build/Python-${full_version}"
if [[ ! -d "${src_dir}" ]]; then
tar -xzf "${tarball}" -C build
fi
rm -rf "${target_include}"
cp -R "${src_dir}/Include" "${target_include}"
mkdir -p "${target_include}/internal"
cp -R "${src_dir}/Include/internal/"* "${target_include}/internal/" 2>/dev/null || true
if [[ -f "/usr/include/python${PYTHON_VERSION}/pyconfig-64.h" ]]; then
cp "/usr/include/python${PYTHON_VERSION}/pyconfig-64.h" "${target_include}/pyconfig.h"
fi
fi
echo "${target_include}"
}
ensure_repo() {
local name="$1"
local dir="$2"
local url="$3"
local ref="$4"
if [ -d "${dir}/.git" ]; then
log_skip "${name} repository already present at ${dir}"
else
log_step "Cloning ${name} ${ref}"
git clone --recursive "${url}" "${dir}"
fi
pushd "${dir}" >/dev/null
if ! git fetch --tags --quiet; then
log_info "Skipping tag fetch for ${name}; using existing refs"
fi
if [ -n "${ref}" ]; then
local target current
target=$(git rev-parse "${ref}^{commit}")
current=$(git rev-parse HEAD)
if [ "${current}" != "${target}" ]; then
log_step "Checking out ${name} ${ref}"
git checkout "${ref}"
current=$(git rev-parse HEAD)
else
log_skip "${name} already at ${ref}"
fi
fi
git submodule update --init --recursive
popd >/dev/null
}
patch_vllm_repo() {
local repo_dir="vllm"
if [[ ! -d "${repo_dir}" ]]; then
return
fi
log_step "Patching vLLM build configuration for CUDA ${CUDA_FULL_VERSION}"
pushd "${repo_dir}" >/dev/null
python - <<'PY'
from pathlib import Path
replacements = {
Path("pyproject.toml"): [
("torch == 2.8.0", "torch >= 2.9.0a0"),
],
Path("requirements/build.txt"): [
("torch==2.8.0", "torch>=2.9.0a0"),
],
Path("requirements/cuda.txt"): [
("torch==2.8.0", "torch>=2.9.0a0"),
("torch==2.8.0+cu128", "torch>=2.9.0a0"),
("torchaudio==2.8.0", "torchaudio>=2.8.0"),
("torchvision==0.23.0", "torchvision>=0.23.0"),
],
}
patched = False
for path, mapping in replacements.items():
if not path.exists():
continue
text = path.read_text()
new_text = text
for old, new in mapping:
new_text = new_text.replace(old, new)
if new_text != text:
path.write_text(new_text)
print(f"Patched {path}")
patched = True
if not patched:
print("No vLLM files required patching.")
PY
popd >/dev/null
}
python_module_version() {
local module="$1"
python - "${module}" <<'PY'
import importlib, sys
module = sys.argv[1]
try:
mod = importlib.import_module(module)
except Exception:
sys.exit(1)
version = getattr(mod, "__version__", None)
if version is None:
sys.exit(2)
print(version)
PY
}
python_module_version_matches() {
local module="$1"
local expected_prefix="$2"
local version
if ! version=$(python_module_version "${module}" 2>/dev/null); then
return 1
fi
[[ -z "${expected_prefix}" || "${version}" == "${expected_prefix}"* ]]
}
python_torch_cuda_version() {
python - <<'PY'
try:
import torch
except Exception:
raise SystemExit(1)
cuda_version = getattr(getattr(torch, "version", None), "cuda", None)
if not cuda_version:
raise SystemExit(2)
print(cuda_version)
PY
}
CLEAN=0
while [[ $# -gt 0 ]]; do
case "$1" in
--clean)
CLEAN=1
;;
-h|--help)
usage 0
;;
*)
log_info "Unknown argument: $1"
usage 1
;;
esac
shift
done
if [[ "${CLEAN}" -eq 1 ]]; then
log_step "Cleaning previous build artifacts"
rm -rf -- "${VENV_PATH}" build
fi
setup_cuda_env
if [ ! -d "${VENV_PATH}" ]; then
log_step "Creating virtual environment at ${VENV_PATH} (Python ${PYTHON_VERSION})"
uv venv "${VENV_PATH}" --python "${PYTHON_VERSION}"
else
log_skip "Virtual environment already exists at ${VENV_PATH}"
fi
VENV_ABS_PATH=$(python - <<PY
import os
print(os.path.abspath("${VENV_PATH}"))
PY
)
source "${VENV_PATH}/bin/activate"
PY_INCLUDE=$(python_include_dir || true)
if [[ -n "${PY_INCLUDE}" ]]; then
PY_INCLUDE=$(ensure_python_headers "${PY_INCLUDE}")
fi
PY_LIB=$(python_library_path || true)
mapfile -t PY_SITE_DIRS < <(python - <<'PY'
import sysconfig
for key in ("purelib", "platlib"):
path = sysconfig.get_paths().get(key)
if path:
print(path)
PY
)
if [[ -n "${PY_INCLUDE}" ]]; then
PY_INCLUDE_DIRS="${PY_INCLUDE}"
if [[ -d "${PY_INCLUDE}/internal" ]]; then
PY_INCLUDE_DIRS="${PY_INCLUDE_DIRS};${PY_INCLUDE}/internal"
fi
export Python_INCLUDE_DIR="${PY_INCLUDE}"
export Python_INCLUDE_DIRS="${PY_INCLUDE_DIRS}"
export Python3_INCLUDE_DIR="${PY_INCLUDE}"
export Python3_INCLUDE_DIRS="${PY_INCLUDE_DIRS}"
fi
if [[ -n "${PY_LIB}" ]]; then
if [[ "${PY_LIB}" == *.so.1.0 ]]; then
alias_name="$(basename "${PY_LIB%.1.0}")"
alias_path="${VENV_ABS_PATH}/lib/${alias_name}"
mkdir -p "${VENV_ABS_PATH}/lib"
ln -sf "${PY_LIB}" "${alias_path}"
PY_LIB="${alias_path}"
fi
export PYTHON_USER_LIBDIR="$(dirname "${PY_LIB}")"
export PYTHON_USER_LDLIBRARY="$(basename "${PY_LIB}")"
export Python_LIBRARY="${PY_LIB}"
export Python_LIBRARIES="${PY_LIB}"
export Python3_LIBRARY="${PY_LIB}"
export Python3_LIBRARIES="${PY_LIB}"
export LD_LIBRARY_PATH="$(dirname "${PY_LIB}"):${LD_LIBRARY_PATH:-}"
export CMAKE_LIBRARY_PATH="$(dirname "${PY_LIB}"):${CMAKE_LIBRARY_PATH:-}"
for sitecustomize_path in "${PY_SITE_DIRS[@]}"; do
sitecustomize_file="${sitecustomize_path}/sitecustomize.py"
mkdir -p "${sitecustomize_path}"
cat <<'PY' > "${sitecustomize_file}"
import os
import sysconfig
libdir = os.environ.get("PYTHON_USER_LIBDIR")
libname = os.environ.get("PYTHON_USER_LDLIBRARY")
if libdir:
for key in ("LIBDIR", "LIBPL"):
sysconfig.get_config_vars()[key] = libdir
if libname:
for key in ("LDLIBRARY", "LIBRARY"):
sysconfig.get_config_vars()[key] = libname
PY
done
fi
if [[ -n "${PY_INCLUDE}" ]]; then
export CMAKE_INCLUDE_PATH="${PY_INCLUDE}:${CMAKE_INCLUDE_PATH:-}"
fi
log_step "Ensuring build tooling is up to date"
uv pip install --upgrade --no-build-isolation \
pip setuptools wheel \
ninja cmake packaging pyyaml \
typing_extensions sympy six \
appdirs setuptools-scm jinja2 regex build
log_step "Installing CUDA toolkit and dependencies via NVIDIA PyIndex"
uv pip install --upgrade --no-build-isolation \
nvidia-pyindex
uv pip install --upgrade --no-build-isolation \
cuda-toolkit[all]~=${CUDA_FULL_VERSION} \
cupy-cuda${CUDA_VERSION}x \
nvidia-cudss-cu${CUDA_VERSION} \
nvidia-nccl-cu${CUDA_VERSION} \
nvidia-cudnn-cu${CUDA_VERSION} \
nvidia-cusparselt-cu${CUDA_VERSION}
setup_nccl_env
setup_cudnn_env
setup_cudss_env
setup_cusparselt_env
if [[ "${CUDA_AVAILABLE}" -eq 1 ]]; then
missing_components=()
[[ "${CUDNN_AVAILABLE}" -ne 1 ]] && missing_components+=("cuDNN")
[[ "${CUDSS_AVAILABLE}" -ne 1 ]] && missing_components+=("cuDSS")
[[ "${CUSPARSELT_AVAILABLE}" -ne 1 ]] && missing_components+=("cuSPARSELt")
if (( ${#missing_components[@]} )); then
log_info "Missing required CUDA libraries: ${missing_components[*]}"
exit 1
fi
fi
if [ ! -d build ]; then
log_step "Creating build directory"
mkdir -p build
else
log_skip "Build directory already exists"
fi
pushd build >/dev/null
# ============================================================
# 1. Build PyTorch from source
# ============================================================
ensure_repo "PyTorch" "pytorch" "https://github.com/pytorch/pytorch.git" "${PYTORCH_TAG}"
build_pytorch=1
existing_torch_version=""
existing_torch_cuda=""
if existing_torch_version=$(python_module_version torch 2>/dev/null); then
if existing_torch_cuda=$(python_torch_cuda_version 2>/dev/null); then
:
else
existing_torch_cuda=""
fi
if [[ "${existing_torch_version}" == "${PYTORCH_VERSION}"* && "${existing_torch_cuda}" == "${CUDA_FULL_VERSION}" ]]; then
log_skip "PyTorch ${PYTORCH_TAG} already installed for CUDA ${existing_torch_cuda}"
build_pytorch=0
else
log_info "Existing PyTorch ${existing_torch_version:-<unknown>} built for CUDA ${existing_torch_cuda:-<unknown>}; expected CUDA ${CUDA_FULL_VERSION}. Rebuilding from source."
python -m pip uninstall -y torch torchvision torchaudio >/dev/null 2>&1 || true
fi
fi
if [[ "${build_pytorch}" -eq 1 ]]; then
log_step "Building and installing PyTorch ${PYTORCH_TAG} (all cores, full optimization)"
pushd pytorch >/dev/null
if [[ "${CUDA_AVAILABLE}" -eq 1 ]]; then
export USE_CUDA=1
export USE_NCCL="${NCCL_AVAILABLE}"
export USE_SYSTEM_NCCL="${NCCL_AVAILABLE}"
else
export USE_CUDA=0
export USE_NCCL=0
export USE_SYSTEM_NCCL=0
fi
if [[ "${CUDA_AVAILABLE}" -eq 1 ]]; then
export USE_CUDNN="${CUDNN_AVAILABLE}"
export USE_CUDSS="${CUDSS_AVAILABLE}"
export USE_CUSPARSELT="${CUSPARSELT_AVAILABLE}"
else
export USE_CUDNN=0
export USE_CUDSS=0
export USE_CUSPARSELT=0
fi
export USE_MKLDNN=1
export BUILD_TEST=0
export BUILD_CAFFE2=0
export USE_MPS=0
export USE_FBGEMM=1
export USE_QNNPACK=0
export USE_XNNPACK=1
PY_CMAKE_ARGS=(
"-DPython3_EXECUTABLE=${VENV_ABS_PATH}/bin/python"
"-DPython3_LIBRARY=${PY_LIB}"
"-DPython3_INCLUDE_DIR=${PY_INCLUDE}"
"-DPython_LIBRARY=${PY_LIB}"
"-DPython_INCLUDE_DIR=${PY_INCLUDE}"
"-DPython3_FIND_STRATEGY=LOCATION"
"-DPython3_ROOT_DIR=${VENV_ABS_PATH}"
)
OLD_CMAKE_ARGS="${CMAKE_ARGS:-}"
export CMAKE_ARGS="${PY_CMAKE_ARGS[*]} ${OLD_CMAKE_ARGS}"
OLD_CFLAGS="${CFLAGS:-}"
OLD_CPPFLAGS="${CPPFLAGS:-}"
export CFLAGS="-I${PY_INCLUDE} ${OLD_CFLAGS}"
export CPPFLAGS="-I${PY_INCLUDE} ${OLD_CPPFLAGS}"
python setup.py install
export CMAKE_ARGS="${OLD_CMAKE_ARGS}"
export CFLAGS="${OLD_CFLAGS}"
export CPPFLAGS="${OLD_CPPFLAGS}"
popd >/dev/null
fi
# ============================================================
# 2. Verify torch build
# ============================================================
log_step "Verifying PyTorch build"
python - <<'EOF_VERIFY'
import torch
print("torch:", torch.__version__, "CUDA:", torch.version.cuda)
print("Devices:", torch.cuda.device_count(), "Arch list:", torch.cuda.get_arch_list())
EOF_VERIFY
# ============================================================
# 3. Build vLLM
# ============================================================
ensure_repo "vLLM" "vllm" "https://github.com/vllm-project/vllm.git" "v${VLLM_VERSION}"
patch_vllm_repo
if python_module_version_matches vllm "${VLLM_VERSION}"; then
log_skip "vLLM ${VLLM_VERSION} already installed"
VLLM_INSTALLED=1
export VLLM_INSTALLED
elif [[ "${CUDA_AVAILABLE}" -eq 0 ]]; then
log_info "Skipping vLLM build because nvcc tooling is unavailable"
else
log_step "Building and installing vLLM ${VLLM_VERSION}"
pushd vllm >/dev/null
export VLLM_BUILD_CUDA_EXT=1
python -m pip install -v --no-build-isolation --no-deps .
popd >/dev/null
VLLM_INSTALLED=1
export VLLM_INSTALLED
fi
uv pip install -r build/vllm/requirements/common.txt
uv pip install numba
# ============================================================
# 4. Build FlashInfer (core only)
# ============================================================
ensure_repo "FlashInfer" "flashinfer" "https://github.com/flashinfer-ai/flashinfer.git" "v${FLASHINFER_VERSION}"
if python_module_version_matches flashinfer "${FLASHINFER_VERSION}"; then
log_skip "FlashInfer ${FLASHINFER_VERSION} already installed"
FLASHINFER_INSTALLED=1
export FLASHINFER_INSTALLED
elif [[ "${CUDA_AVAILABLE}" -eq 0 ]]; then
log_info "Skipping FlashInfer build because nvcc tooling is unavailable"
else
log_step "Building and installing FlashInfer ${FLASHINFER_VERSION}"
pushd flashinfer >/dev/null
python -m pip install -v .
popd >/dev/null
FLASHINFER_INSTALLED=1
export FLASHINFER_INSTALLED
fi
# ============================================================
# 5. Install LMCache
# ============================================================
if python_module_version_matches lmcache "${LMCACHE_VERSION}"; then
log_skip "LMCache ${LMCACHE_VERSION} already installed"
else
log_step "Installing LMCache ${LMCACHE_VERSION}"
uv pip install -U --prerelease=allow --no-deps "lmcache==${LMCACHE_VERSION}"
fi
# ============================================================
# 6. Sanity check
# ============================================================
log_step "Verifying installation"
python - <<'EOF_SANITY'
import importlib
import os
def show_status(name, module=None, extra=""):
status = f"{name}:"
if module is None:
status += f" {extra}" if extra else " not installed"
else:
version = getattr(module, "__version__", "<unknown>")
status += f" {version}"
if extra:
status += f" {extra}"
print(status)
torch = importlib.import_module("torch")
show_status("torch", torch, f"(CUDA {torch.version.cuda})")
if os.environ.get("VLLM_INSTALLED") == "1":
vllm = importlib.import_module("vllm")
show_status("vllm", vllm)
else:
show_status("vllm", extra="skipped (nvcc unavailable)")
if os.environ.get("FLASHINFER_INSTALLED") == "1":
flashinfer = importlib.import_module("flashinfer")
show_status("flashinfer", flashinfer)
else:
show_status("flashinfer", extra="skipped (nvcc unavailable)")
lmcache = importlib.import_module("lmcache")
show_status("lmcache", lmcache)
EOF_SANITY
popd >/dev/null
echo
echo "[✓] Native build complete. Environment in ${VENV_PATH}"
echo "[i] Activate it with: source ${VENV_PATH}/bin/activate"
if [[ "${CUDA_AVAILABLE}" -eq 1 ]]; then
echo "[→] Built for GH100 (SM 9.0) using all ${MAX_JOBS} cores."
else
echo "[→] Completed CPU-only build (CUDA toolchain not detected)."
fi
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment