Created
October 29, 2025 20:52
-
-
Save bcdonadio/ccbbeb5f0fa13788cbd797a4f6f0de36 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env bash | |
| set -euo pipefail | |
| renice -n 19 $$ >/dev/null || true | |
| export TMPDIR="${TMPDIR:-$(pwd)/tmp}" | |
| mkdir -p "${TMPDIR}" || true | |
| # ============================================================ | |
| # Native build of PyTorch + vLLM + FlashInfer + LMCache | |
| # Target GPU: GH100 NVL (Grace Hopper, SM 9.0) | |
| # System: Python 3.13, CUDA 13.0 | |
| # ============================================================ | |
| PYTHON_VERSION=3.13 | |
| CUDA_FULL_VERSION="13.0" | |
| CUDA_VERSION="13" | |
| VENV_PATH=".venv" | |
| # ---- Source versions ---------------------------------------- | |
| PYTORCH_TAG="v2.9.0" | |
| VLLM_VERSION="0.11.0" | |
| FLASHINFER_VERSION="0.4.1" | |
| LMCACHE_VERSION="0.3.8" | |
| PYTORCH_VERSION="${PYTORCH_TAG#v}" | |
| # ---- Build state (initial state before detection) ----------- | |
| CUDA_AVAILABLE=0 | |
| VLLM_INSTALLED=0 | |
| FLASHINFER_INSTALLED=0 | |
| export VLLM_INSTALLED FLASHINFER_INSTALLED | |
| NCCL_AVAILABLE=0 | |
| CUDNN_AVAILABLE=0 | |
| CUDSS_AVAILABLE=0 | |
| CUSPARSELT_AVAILABLE=0 | |
| export NCCL_AVAILABLE CUDNN_AVAILABLE CUDSS_AVAILABLE CUSPARSELT_AVAILABLE | |
| # ---- Compiler / CUDA config --------------------------------- | |
| export CC=gcc | |
| export CXX=g++ | |
| export MAX_JOBS="40" | |
| export TORCH_CUDA_ARCH_LIST="9.0" # GH100 (Grace Hopper) | |
| usage() { | |
| local exit_code="${1:-0}" | |
| cat <<EOF_USAGE | |
| Usage: $(basename "$0") [--clean] | |
| Options: | |
| --clean Remove existing virtual environment and build artifacts before rebuilding. | |
| -h, --help Show this help message. | |
| EOF_USAGE | |
| exit "${exit_code}" | |
| } | |
| log_step() { echo "[+] $*"; } | |
| log_skip() { echo "[=] $*"; } | |
| log_info() { echo "[i] $*"; } | |
| detect_cuda_home() { | |
| local requested="${CUDA_HOME:-}" | |
| if [[ -n "${requested}" && -d "${requested}" ]]; then | |
| echo "${requested}" | |
| return | |
| fi | |
| local best="" | |
| local candidate_list | |
| candidate_list=$(ls -d /usr/local/cuda-${CUDA_VERSION} /usr/local/cuda /usr/local/cuda-* 2>/dev/null | sort -Vr || true) | |
| if [[ -n "${candidate_list}" ]]; then | |
| while IFS= read -r candidate; do | |
| [[ -z "${candidate}" ]] && continue | |
| if [[ -d "${candidate}" ]]; then | |
| best="${candidate}" | |
| break | |
| fi | |
| done <<<"${candidate_list}" | |
| fi | |
| if [[ -n "${best}" ]]; then | |
| echo "${best}" | |
| fi | |
| } | |
| setup_cuda_env() { | |
| local detected | |
| detected=$(detect_cuda_home || true) | |
| if [[ -n "${detected}" ]]; then | |
| export CUDA_HOME="${detected}" | |
| export PATH="${CUDA_HOME}/bin:${PATH}" | |
| export LD_LIBRARY_PATH="${CUDA_HOME}/lib64:${LD_LIBRARY_PATH:-}" | |
| log_step "Using CUDA toolkit at ${CUDA_HOME}" | |
| local nvcc_path | |
| nvcc_path=$(command -v nvcc || true) | |
| if [[ -z "${nvcc_path}" && -x "${CUDA_HOME}/bin/nvcc" ]]; then | |
| nvcc_path="${CUDA_HOME}/bin/nvcc" | |
| fi | |
| if [[ -n "${nvcc_path}" ]]; then | |
| CUDA_AVAILABLE=1 | |
| else | |
| CUDA_AVAILABLE=0 | |
| log_info "nvcc not found; CUDA-dependent builds will be skipped" | |
| fi | |
| else | |
| log_info "CUDA toolkit not found; proceeding with CPU-only build" | |
| unset CUDA_HOME | |
| CUDA_AVAILABLE=0 | |
| fi | |
| } | |
| prepend_unique_path() { | |
| local var="$1" | |
| local path="$2" | |
| [[ -z "${path}" ]] && return | |
| if [[ ! -d "${path}" ]]; then | |
| return | |
| fi | |
| local current="${!var-}" | |
| if [[ -z "${current:-}" ]]; then | |
| printf -v "${var}" "%s" "${path}" | |
| elif [[ ":${current}:" != *":${path}:"* ]]; then | |
| printf -v "${var}" "%s" "${path}:${current}" | |
| else | |
| printf -v "${var}" "%s" "${current}" | |
| fi | |
| export "${var}" | |
| } | |
| locate_python_module_path() { | |
| local module="$1" | |
| local header="${2:-}" | |
| python - "$module" "$header" <<'PY' | |
| import importlib.util, os, sys | |
| module, header = sys.argv[1], sys.argv[2] | |
| spec = importlib.util.find_spec(module) | |
| if not spec or not spec.submodule_search_locations: | |
| raise SystemExit(0) | |
| for path in spec.submodule_search_locations: | |
| if not path: | |
| continue | |
| real = os.path.realpath(path) | |
| if not os.path.isdir(real): | |
| continue | |
| if not header: | |
| print(real) | |
| break | |
| include_dir = os.path.join(real, "include") | |
| if os.path.isdir(include_dir) and os.path.exists(os.path.join(include_dir, header)): | |
| print(real) | |
| break | |
| PY | |
| } | |
| symlink_unversioned_lib() { | |
| local dir="$1" | |
| local lib="$2" | |
| [[ -z "${dir}" || -z "${lib}" ]] && return 1 | |
| local target="${dir}/${lib}" | |
| if [[ -e "${target}" ]]; then | |
| return 0 | |
| fi | |
| local versioned | |
| versioned="$(python - "$dir" "$lib" <<'PY' | |
| import pathlib, sys | |
| libdir = pathlib.Path(sys.argv[1]) | |
| base = sys.argv[2] | |
| pattern = base + ".*" | |
| for candidate in sorted(libdir.glob(pattern), reverse=True): | |
| if candidate.is_file(): | |
| print(candidate.name) | |
| break | |
| PY | |
| )" | |
| if [[ -z "${versioned}" ]]; then | |
| return 1 | |
| fi | |
| ln -sf "${versioned}" "${target}" | |
| } | |
| setup_cuda_component() { | |
| local friendly="$1" | |
| local module="$2" | |
| local header="$3" | |
| local lib_names="${4:-}" | |
| local prefix="$5" | |
| if [[ "${CUDA_AVAILABLE}" -eq 0 ]]; then | |
| log_skip "${friendly} setup skipped (CUDA unavailable)" | |
| return 1 | |
| fi | |
| local base_path | |
| base_path="$(locate_python_module_path "${module}" "${header}" || true)" | |
| if [[ -z "${base_path}" ]]; then | |
| return 1 | |
| fi | |
| local include_dir="${base_path}/include" | |
| local lib_dir="" | |
| for candidate in lib64 lib; do | |
| if [[ -d "${base_path}/${candidate}" ]]; then | |
| lib_dir="${base_path}/${candidate}" | |
| break | |
| fi | |
| done | |
| if [[ -z "${lib_dir}" ]]; then | |
| return 1 | |
| fi | |
| if [[ ! -d "${include_dir}" ]]; then | |
| return 1 | |
| fi | |
| local IFS=';' | |
| local libs=() | |
| read -r -a libs <<<"${lib_names:-}" | |
| IFS=$' \t\n' | |
| for lib in "${libs[@]}"; do | |
| [[ -z "${lib}" ]] && continue | |
| if ! symlink_unversioned_lib "${lib_dir}" "${lib}"; then | |
| log_info "${friendly}: missing ${lib} in ${lib_dir}" | |
| return 1 | |
| fi | |
| done | |
| prepend_unique_path "LD_LIBRARY_PATH" "${lib_dir}" | |
| prepend_unique_path "CMAKE_LIBRARY_PATH" "${lib_dir}" | |
| prepend_unique_path "CMAKE_INCLUDE_PATH" "${include_dir}" | |
| export "${prefix}_ROOT=${base_path}" | |
| export "${prefix}_INCLUDE_DIR=${include_dir}" | |
| export "${prefix}_INCLUDE_DIRS=${include_dir}" | |
| export "${prefix}_INCLUDE_PATH=${include_dir}" | |
| export "${prefix}_LIB_DIR=${lib_dir}" | |
| export "${prefix}_LIBRARY_DIR=${lib_dir}" | |
| if [[ ${#libs[@]} -gt 0 ]]; then | |
| local main_lib="${libs[0]}" | |
| if [[ -n "${main_lib}" ]]; then | |
| local lib_path="${lib_dir}/${main_lib}" | |
| if [[ -e "${lib_path}" ]]; then | |
| export "${prefix}_LIBRARY=${lib_path}" | |
| export "${prefix}_LIBRARIES=${lib_path}" | |
| export "${prefix}_LIBRARY_PATH=${lib_path}" | |
| fi | |
| fi | |
| fi | |
| log_step "Configured ${friendly} from Python package at ${base_path}" | |
| return 0 | |
| } | |
| setup_nccl_env() { | |
| if setup_cuda_component "NCCL" "nvidia.nccl" "nccl.h" "libnccl.so" "NCCL"; then | |
| NCCL_AVAILABLE=1 | |
| else | |
| log_info "NCCL Python package not located; relying on system detection" | |
| NCCL_AVAILABLE=0 | |
| fi | |
| } | |
| setup_cudnn_env() { | |
| if setup_cuda_component "cuDNN" "nvidia.cudnn" "cudnn.h" "libcudnn.so" "CUDNN"; then | |
| CUDNN_AVAILABLE=1 | |
| else | |
| log_info "cuDNN package not located; ensure system cuDNN is available" | |
| CUDNN_AVAILABLE=0 | |
| fi | |
| } | |
| setup_cudss_env() { | |
| if setup_cuda_component "cuDSS" "nvidia.cu${CUDA_VERSION}" "cudss.h" "libcudss.so" "CUDSS"; then | |
| CUDSS_AVAILABLE=1 | |
| else | |
| log_info "cuDSS package not located; ensure system cuDSS is available" | |
| CUDSS_AVAILABLE=0 | |
| fi | |
| } | |
| setup_cusparselt_env() { | |
| if setup_cuda_component "cuSPARSELt" "nvidia.cusparselt" "cusparseLt.h" "libcusparseLt.so" "CUSPARSELT"; then | |
| CUSPARSELT_AVAILABLE=1 | |
| else | |
| log_info "cuSPARSELt package not located; ensure system cuSPARSELt is available" | |
| CUSPARSELT_AVAILABLE=0 | |
| fi | |
| } | |
| python_include_dir() { | |
| python - <<'PY' | |
| import sysconfig | |
| include = sysconfig.get_path("include") or sysconfig.get_config_var("INCLUDEPY") | |
| if include: | |
| print(include) | |
| PY | |
| } | |
| python_library_path() { | |
| python - <<'PY' | |
| import os, sysconfig | |
| libdir = sysconfig.get_config_var("LIBDIR") | |
| libname = sysconfig.get_config_var("LDLIBRARY") | |
| if not libdir or not libname: | |
| raise SystemExit(1) | |
| candidates = [] | |
| candidate = os.path.join(libdir, libname) | |
| candidates.append(candidate) | |
| suffix = sysconfig.get_config_var("SHLIB_SUFFIX") or ".so" | |
| if not candidate.endswith(suffix): | |
| candidates.append(candidate + suffix) | |
| if candidate.endswith(".so") and not candidate.endswith(".so.1.0"): | |
| candidates.append(candidate + ".1.0") | |
| for path in candidates: | |
| if os.path.exists(path): | |
| print(path) | |
| break | |
| else: | |
| raise SystemExit(1) | |
| PY | |
| } | |
| ensure_python_headers() { | |
| local include_dir="$1" | |
| if [[ -f "${include_dir}/patchlevel.h" ]]; then | |
| echo "${include_dir}" | |
| return | |
| fi | |
| local full_version | |
| full_version=$(python - <<'PY' | |
| import platform | |
| print(platform.python_version()) | |
| PY | |
| ) | |
| local headers_root="build/python-headers-${full_version}" | |
| local headers_abs="${PWD}/${headers_root}" | |
| local target_include="${headers_abs}/Include" | |
| if [[ ! -f "${target_include}/patchlevel.h" ]]; then | |
| log_step "Preparing CPython headers ${full_version}" >&2 | |
| mkdir -p "${headers_abs}" | |
| local tarball="build/Python-${full_version}.tgz" | |
| if [[ ! -f "${tarball}" ]]; then | |
| curl -LsSf "https://www.python.org/ftp/python/${full_version}/Python-${full_version}.tgz" -o "${tarball}" | |
| fi | |
| local src_dir="build/Python-${full_version}" | |
| if [[ ! -d "${src_dir}" ]]; then | |
| tar -xzf "${tarball}" -C build | |
| fi | |
| rm -rf "${target_include}" | |
| cp -R "${src_dir}/Include" "${target_include}" | |
| mkdir -p "${target_include}/internal" | |
| cp -R "${src_dir}/Include/internal/"* "${target_include}/internal/" 2>/dev/null || true | |
| if [[ -f "/usr/include/python${PYTHON_VERSION}/pyconfig-64.h" ]]; then | |
| cp "/usr/include/python${PYTHON_VERSION}/pyconfig-64.h" "${target_include}/pyconfig.h" | |
| fi | |
| fi | |
| echo "${target_include}" | |
| } | |
| ensure_repo() { | |
| local name="$1" | |
| local dir="$2" | |
| local url="$3" | |
| local ref="$4" | |
| if [ -d "${dir}/.git" ]; then | |
| log_skip "${name} repository already present at ${dir}" | |
| else | |
| log_step "Cloning ${name} ${ref}" | |
| git clone --recursive "${url}" "${dir}" | |
| fi | |
| pushd "${dir}" >/dev/null | |
| if ! git fetch --tags --quiet; then | |
| log_info "Skipping tag fetch for ${name}; using existing refs" | |
| fi | |
| if [ -n "${ref}" ]; then | |
| local target current | |
| target=$(git rev-parse "${ref}^{commit}") | |
| current=$(git rev-parse HEAD) | |
| if [ "${current}" != "${target}" ]; then | |
| log_step "Checking out ${name} ${ref}" | |
| git checkout "${ref}" | |
| current=$(git rev-parse HEAD) | |
| else | |
| log_skip "${name} already at ${ref}" | |
| fi | |
| fi | |
| git submodule update --init --recursive | |
| popd >/dev/null | |
| } | |
| patch_vllm_repo() { | |
| local repo_dir="vllm" | |
| if [[ ! -d "${repo_dir}" ]]; then | |
| return | |
| fi | |
| log_step "Patching vLLM build configuration for CUDA ${CUDA_FULL_VERSION}" | |
| pushd "${repo_dir}" >/dev/null | |
| python - <<'PY' | |
| from pathlib import Path | |
| replacements = { | |
| Path("pyproject.toml"): [ | |
| ("torch == 2.8.0", "torch >= 2.9.0a0"), | |
| ], | |
| Path("requirements/build.txt"): [ | |
| ("torch==2.8.0", "torch>=2.9.0a0"), | |
| ], | |
| Path("requirements/cuda.txt"): [ | |
| ("torch==2.8.0", "torch>=2.9.0a0"), | |
| ("torch==2.8.0+cu128", "torch>=2.9.0a0"), | |
| ("torchaudio==2.8.0", "torchaudio>=2.8.0"), | |
| ("torchvision==0.23.0", "torchvision>=0.23.0"), | |
| ], | |
| } | |
| patched = False | |
| for path, mapping in replacements.items(): | |
| if not path.exists(): | |
| continue | |
| text = path.read_text() | |
| new_text = text | |
| for old, new in mapping: | |
| new_text = new_text.replace(old, new) | |
| if new_text != text: | |
| path.write_text(new_text) | |
| print(f"Patched {path}") | |
| patched = True | |
| if not patched: | |
| print("No vLLM files required patching.") | |
| PY | |
| popd >/dev/null | |
| } | |
| python_module_version() { | |
| local module="$1" | |
| python - "${module}" <<'PY' | |
| import importlib, sys | |
| module = sys.argv[1] | |
| try: | |
| mod = importlib.import_module(module) | |
| except Exception: | |
| sys.exit(1) | |
| version = getattr(mod, "__version__", None) | |
| if version is None: | |
| sys.exit(2) | |
| print(version) | |
| PY | |
| } | |
| python_module_version_matches() { | |
| local module="$1" | |
| local expected_prefix="$2" | |
| local version | |
| if ! version=$(python_module_version "${module}" 2>/dev/null); then | |
| return 1 | |
| fi | |
| [[ -z "${expected_prefix}" || "${version}" == "${expected_prefix}"* ]] | |
| } | |
| python_torch_cuda_version() { | |
| python - <<'PY' | |
| try: | |
| import torch | |
| except Exception: | |
| raise SystemExit(1) | |
| cuda_version = getattr(getattr(torch, "version", None), "cuda", None) | |
| if not cuda_version: | |
| raise SystemExit(2) | |
| print(cuda_version) | |
| PY | |
| } | |
| CLEAN=0 | |
| while [[ $# -gt 0 ]]; do | |
| case "$1" in | |
| --clean) | |
| CLEAN=1 | |
| ;; | |
| -h|--help) | |
| usage 0 | |
| ;; | |
| *) | |
| log_info "Unknown argument: $1" | |
| usage 1 | |
| ;; | |
| esac | |
| shift | |
| done | |
| if [[ "${CLEAN}" -eq 1 ]]; then | |
| log_step "Cleaning previous build artifacts" | |
| rm -rf -- "${VENV_PATH}" build | |
| fi | |
| setup_cuda_env | |
| if [ ! -d "${VENV_PATH}" ]; then | |
| log_step "Creating virtual environment at ${VENV_PATH} (Python ${PYTHON_VERSION})" | |
| uv venv "${VENV_PATH}" --python "${PYTHON_VERSION}" | |
| else | |
| log_skip "Virtual environment already exists at ${VENV_PATH}" | |
| fi | |
| VENV_ABS_PATH=$(python - <<PY | |
| import os | |
| print(os.path.abspath("${VENV_PATH}")) | |
| PY | |
| ) | |
| source "${VENV_PATH}/bin/activate" | |
| PY_INCLUDE=$(python_include_dir || true) | |
| if [[ -n "${PY_INCLUDE}" ]]; then | |
| PY_INCLUDE=$(ensure_python_headers "${PY_INCLUDE}") | |
| fi | |
| PY_LIB=$(python_library_path || true) | |
| mapfile -t PY_SITE_DIRS < <(python - <<'PY' | |
| import sysconfig | |
| for key in ("purelib", "platlib"): | |
| path = sysconfig.get_paths().get(key) | |
| if path: | |
| print(path) | |
| PY | |
| ) | |
| if [[ -n "${PY_INCLUDE}" ]]; then | |
| PY_INCLUDE_DIRS="${PY_INCLUDE}" | |
| if [[ -d "${PY_INCLUDE}/internal" ]]; then | |
| PY_INCLUDE_DIRS="${PY_INCLUDE_DIRS};${PY_INCLUDE}/internal" | |
| fi | |
| export Python_INCLUDE_DIR="${PY_INCLUDE}" | |
| export Python_INCLUDE_DIRS="${PY_INCLUDE_DIRS}" | |
| export Python3_INCLUDE_DIR="${PY_INCLUDE}" | |
| export Python3_INCLUDE_DIRS="${PY_INCLUDE_DIRS}" | |
| fi | |
| if [[ -n "${PY_LIB}" ]]; then | |
| if [[ "${PY_LIB}" == *.so.1.0 ]]; then | |
| alias_name="$(basename "${PY_LIB%.1.0}")" | |
| alias_path="${VENV_ABS_PATH}/lib/${alias_name}" | |
| mkdir -p "${VENV_ABS_PATH}/lib" | |
| ln -sf "${PY_LIB}" "${alias_path}" | |
| PY_LIB="${alias_path}" | |
| fi | |
| export PYTHON_USER_LIBDIR="$(dirname "${PY_LIB}")" | |
| export PYTHON_USER_LDLIBRARY="$(basename "${PY_LIB}")" | |
| export Python_LIBRARY="${PY_LIB}" | |
| export Python_LIBRARIES="${PY_LIB}" | |
| export Python3_LIBRARY="${PY_LIB}" | |
| export Python3_LIBRARIES="${PY_LIB}" | |
| export LD_LIBRARY_PATH="$(dirname "${PY_LIB}"):${LD_LIBRARY_PATH:-}" | |
| export CMAKE_LIBRARY_PATH="$(dirname "${PY_LIB}"):${CMAKE_LIBRARY_PATH:-}" | |
| for sitecustomize_path in "${PY_SITE_DIRS[@]}"; do | |
| sitecustomize_file="${sitecustomize_path}/sitecustomize.py" | |
| mkdir -p "${sitecustomize_path}" | |
| cat <<'PY' > "${sitecustomize_file}" | |
| import os | |
| import sysconfig | |
| libdir = os.environ.get("PYTHON_USER_LIBDIR") | |
| libname = os.environ.get("PYTHON_USER_LDLIBRARY") | |
| if libdir: | |
| for key in ("LIBDIR", "LIBPL"): | |
| sysconfig.get_config_vars()[key] = libdir | |
| if libname: | |
| for key in ("LDLIBRARY", "LIBRARY"): | |
| sysconfig.get_config_vars()[key] = libname | |
| PY | |
| done | |
| fi | |
| if [[ -n "${PY_INCLUDE}" ]]; then | |
| export CMAKE_INCLUDE_PATH="${PY_INCLUDE}:${CMAKE_INCLUDE_PATH:-}" | |
| fi | |
| log_step "Ensuring build tooling is up to date" | |
| uv pip install --upgrade --no-build-isolation \ | |
| pip setuptools wheel \ | |
| ninja cmake packaging pyyaml \ | |
| typing_extensions sympy six \ | |
| appdirs setuptools-scm jinja2 regex build | |
| log_step "Installing CUDA toolkit and dependencies via NVIDIA PyIndex" | |
| uv pip install --upgrade --no-build-isolation \ | |
| nvidia-pyindex | |
| uv pip install --upgrade --no-build-isolation \ | |
| cuda-toolkit[all]~=${CUDA_FULL_VERSION} \ | |
| cupy-cuda${CUDA_VERSION}x \ | |
| nvidia-cudss-cu${CUDA_VERSION} \ | |
| nvidia-nccl-cu${CUDA_VERSION} \ | |
| nvidia-cudnn-cu${CUDA_VERSION} \ | |
| nvidia-cusparselt-cu${CUDA_VERSION} | |
| setup_nccl_env | |
| setup_cudnn_env | |
| setup_cudss_env | |
| setup_cusparselt_env | |
| if [[ "${CUDA_AVAILABLE}" -eq 1 ]]; then | |
| missing_components=() | |
| [[ "${CUDNN_AVAILABLE}" -ne 1 ]] && missing_components+=("cuDNN") | |
| [[ "${CUDSS_AVAILABLE}" -ne 1 ]] && missing_components+=("cuDSS") | |
| [[ "${CUSPARSELT_AVAILABLE}" -ne 1 ]] && missing_components+=("cuSPARSELt") | |
| if (( ${#missing_components[@]} )); then | |
| log_info "Missing required CUDA libraries: ${missing_components[*]}" | |
| exit 1 | |
| fi | |
| fi | |
| if [ ! -d build ]; then | |
| log_step "Creating build directory" | |
| mkdir -p build | |
| else | |
| log_skip "Build directory already exists" | |
| fi | |
| pushd build >/dev/null | |
| # ============================================================ | |
| # 1. Build PyTorch from source | |
| # ============================================================ | |
| ensure_repo "PyTorch" "pytorch" "https://github.com/pytorch/pytorch.git" "${PYTORCH_TAG}" | |
| build_pytorch=1 | |
| existing_torch_version="" | |
| existing_torch_cuda="" | |
| if existing_torch_version=$(python_module_version torch 2>/dev/null); then | |
| if existing_torch_cuda=$(python_torch_cuda_version 2>/dev/null); then | |
| : | |
| else | |
| existing_torch_cuda="" | |
| fi | |
| if [[ "${existing_torch_version}" == "${PYTORCH_VERSION}"* && "${existing_torch_cuda}" == "${CUDA_FULL_VERSION}" ]]; then | |
| log_skip "PyTorch ${PYTORCH_TAG} already installed for CUDA ${existing_torch_cuda}" | |
| build_pytorch=0 | |
| else | |
| log_info "Existing PyTorch ${existing_torch_version:-<unknown>} built for CUDA ${existing_torch_cuda:-<unknown>}; expected CUDA ${CUDA_FULL_VERSION}. Rebuilding from source." | |
| python -m pip uninstall -y torch torchvision torchaudio >/dev/null 2>&1 || true | |
| fi | |
| fi | |
| if [[ "${build_pytorch}" -eq 1 ]]; then | |
| log_step "Building and installing PyTorch ${PYTORCH_TAG} (all cores, full optimization)" | |
| pushd pytorch >/dev/null | |
| if [[ "${CUDA_AVAILABLE}" -eq 1 ]]; then | |
| export USE_CUDA=1 | |
| export USE_NCCL="${NCCL_AVAILABLE}" | |
| export USE_SYSTEM_NCCL="${NCCL_AVAILABLE}" | |
| else | |
| export USE_CUDA=0 | |
| export USE_NCCL=0 | |
| export USE_SYSTEM_NCCL=0 | |
| fi | |
| if [[ "${CUDA_AVAILABLE}" -eq 1 ]]; then | |
| export USE_CUDNN="${CUDNN_AVAILABLE}" | |
| export USE_CUDSS="${CUDSS_AVAILABLE}" | |
| export USE_CUSPARSELT="${CUSPARSELT_AVAILABLE}" | |
| else | |
| export USE_CUDNN=0 | |
| export USE_CUDSS=0 | |
| export USE_CUSPARSELT=0 | |
| fi | |
| export USE_MKLDNN=1 | |
| export BUILD_TEST=0 | |
| export BUILD_CAFFE2=0 | |
| export USE_MPS=0 | |
| export USE_FBGEMM=1 | |
| export USE_QNNPACK=0 | |
| export USE_XNNPACK=1 | |
| PY_CMAKE_ARGS=( | |
| "-DPython3_EXECUTABLE=${VENV_ABS_PATH}/bin/python" | |
| "-DPython3_LIBRARY=${PY_LIB}" | |
| "-DPython3_INCLUDE_DIR=${PY_INCLUDE}" | |
| "-DPython_LIBRARY=${PY_LIB}" | |
| "-DPython_INCLUDE_DIR=${PY_INCLUDE}" | |
| "-DPython3_FIND_STRATEGY=LOCATION" | |
| "-DPython3_ROOT_DIR=${VENV_ABS_PATH}" | |
| ) | |
| OLD_CMAKE_ARGS="${CMAKE_ARGS:-}" | |
| export CMAKE_ARGS="${PY_CMAKE_ARGS[*]} ${OLD_CMAKE_ARGS}" | |
| OLD_CFLAGS="${CFLAGS:-}" | |
| OLD_CPPFLAGS="${CPPFLAGS:-}" | |
| export CFLAGS="-I${PY_INCLUDE} ${OLD_CFLAGS}" | |
| export CPPFLAGS="-I${PY_INCLUDE} ${OLD_CPPFLAGS}" | |
| python setup.py install | |
| export CMAKE_ARGS="${OLD_CMAKE_ARGS}" | |
| export CFLAGS="${OLD_CFLAGS}" | |
| export CPPFLAGS="${OLD_CPPFLAGS}" | |
| popd >/dev/null | |
| fi | |
| # ============================================================ | |
| # 2. Verify torch build | |
| # ============================================================ | |
| log_step "Verifying PyTorch build" | |
| python - <<'EOF_VERIFY' | |
| import torch | |
| print("torch:", torch.__version__, "CUDA:", torch.version.cuda) | |
| print("Devices:", torch.cuda.device_count(), "Arch list:", torch.cuda.get_arch_list()) | |
| EOF_VERIFY | |
| # ============================================================ | |
| # 3. Build vLLM | |
| # ============================================================ | |
| ensure_repo "vLLM" "vllm" "https://github.com/vllm-project/vllm.git" "v${VLLM_VERSION}" | |
| patch_vllm_repo | |
| if python_module_version_matches vllm "${VLLM_VERSION}"; then | |
| log_skip "vLLM ${VLLM_VERSION} already installed" | |
| VLLM_INSTALLED=1 | |
| export VLLM_INSTALLED | |
| elif [[ "${CUDA_AVAILABLE}" -eq 0 ]]; then | |
| log_info "Skipping vLLM build because nvcc tooling is unavailable" | |
| else | |
| log_step "Building and installing vLLM ${VLLM_VERSION}" | |
| pushd vllm >/dev/null | |
| export VLLM_BUILD_CUDA_EXT=1 | |
| python -m pip install -v --no-build-isolation --no-deps . | |
| popd >/dev/null | |
| VLLM_INSTALLED=1 | |
| export VLLM_INSTALLED | |
| fi | |
| uv pip install -r build/vllm/requirements/common.txt | |
| uv pip install numba | |
| # ============================================================ | |
| # 4. Build FlashInfer (core only) | |
| # ============================================================ | |
| ensure_repo "FlashInfer" "flashinfer" "https://github.com/flashinfer-ai/flashinfer.git" "v${FLASHINFER_VERSION}" | |
| if python_module_version_matches flashinfer "${FLASHINFER_VERSION}"; then | |
| log_skip "FlashInfer ${FLASHINFER_VERSION} already installed" | |
| FLASHINFER_INSTALLED=1 | |
| export FLASHINFER_INSTALLED | |
| elif [[ "${CUDA_AVAILABLE}" -eq 0 ]]; then | |
| log_info "Skipping FlashInfer build because nvcc tooling is unavailable" | |
| else | |
| log_step "Building and installing FlashInfer ${FLASHINFER_VERSION}" | |
| pushd flashinfer >/dev/null | |
| python -m pip install -v . | |
| popd >/dev/null | |
| FLASHINFER_INSTALLED=1 | |
| export FLASHINFER_INSTALLED | |
| fi | |
| # ============================================================ | |
| # 5. Install LMCache | |
| # ============================================================ | |
| if python_module_version_matches lmcache "${LMCACHE_VERSION}"; then | |
| log_skip "LMCache ${LMCACHE_VERSION} already installed" | |
| else | |
| log_step "Installing LMCache ${LMCACHE_VERSION}" | |
| uv pip install -U --prerelease=allow --no-deps "lmcache==${LMCACHE_VERSION}" | |
| fi | |
| # ============================================================ | |
| # 6. Sanity check | |
| # ============================================================ | |
| log_step "Verifying installation" | |
| python - <<'EOF_SANITY' | |
| import importlib | |
| import os | |
| def show_status(name, module=None, extra=""): | |
| status = f"{name}:" | |
| if module is None: | |
| status += f" {extra}" if extra else " not installed" | |
| else: | |
| version = getattr(module, "__version__", "<unknown>") | |
| status += f" {version}" | |
| if extra: | |
| status += f" {extra}" | |
| print(status) | |
| torch = importlib.import_module("torch") | |
| show_status("torch", torch, f"(CUDA {torch.version.cuda})") | |
| if os.environ.get("VLLM_INSTALLED") == "1": | |
| vllm = importlib.import_module("vllm") | |
| show_status("vllm", vllm) | |
| else: | |
| show_status("vllm", extra="skipped (nvcc unavailable)") | |
| if os.environ.get("FLASHINFER_INSTALLED") == "1": | |
| flashinfer = importlib.import_module("flashinfer") | |
| show_status("flashinfer", flashinfer) | |
| else: | |
| show_status("flashinfer", extra="skipped (nvcc unavailable)") | |
| lmcache = importlib.import_module("lmcache") | |
| show_status("lmcache", lmcache) | |
| EOF_SANITY | |
| popd >/dev/null | |
| echo | |
| echo "[✓] Native build complete. Environment in ${VENV_PATH}" | |
| echo "[i] Activate it with: source ${VENV_PATH}/bin/activate" | |
| if [[ "${CUDA_AVAILABLE}" -eq 1 ]]; then | |
| echo "[→] Built for GH100 (SM 9.0) using all ${MAX_JOBS} cores." | |
| else | |
| echo "[→] Completed CPU-only build (CUDA toolchain not detected)." | |
| fi |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment