bcdonadio · October 29, 2025 20:52
diff --git a/setup_vllm_native_build.sh b/setup_vllm_native_build.sh
 #!/usr/bin/env bash
 set -euo pipefail

 renice -n 19 $$ >/dev/null || true

 export TMPDIR="${TMPDIR:-$(pwd)/tmp}"
 mkdir -p "${TMPDIR}" || true

 # ============================================================
 #  Native build of PyTorch + vLLM + FlashInfer + LMCache
 #  Target GPU: GH100 NVL (Grace Hopper, SM 9.0)
 #  System: Python 3.13, CUDA 13.0
 # ============================================================

 PYTHON_VERSION=3.13
 CUDA_FULL_VERSION="13.0"
 CUDA_VERSION="13"
 VENV_PATH=".venv"

 # ---- Source versions ----------------------------------------
 PYTORCH_TAG="v2.9.0"
 VLLM_VERSION="0.11.0"
 FLASHINFER_VERSION="0.4.1"
 LMCACHE_VERSION="0.3.8"
 PYTORCH_VERSION="${PYTORCH_TAG#v}"

 # ---- Build state (initial state before detection) -----------
 CUDA_AVAILABLE=0
 VLLM_INSTALLED=0
 FLASHINFER_INSTALLED=0
 export VLLM_INSTALLED FLASHINFER_INSTALLED
 NCCL_AVAILABLE=0
 CUDNN_AVAILABLE=0
 CUDSS_AVAILABLE=0
 CUSPARSELT_AVAILABLE=0
 export NCCL_AVAILABLE CUDNN_AVAILABLE CUDSS_AVAILABLE CUSPARSELT_AVAILABLE

 # ---- Compiler / CUDA config ---------------------------------
 export CC=gcc
 export CXX=g++
 export MAX_JOBS="40"
 export TORCH_CUDA_ARCH_LIST="9.0"          # GH100 (Grace Hopper)

 usage() {
  local exit_code="${1:-0}"
  cat <<EOF_USAGE
 Usage: $(basename "$0") [--clean]

 Options:
  --clean        Remove existing virtual environment and build artifacts before rebuilding.
  -h, --help     Show this help message.
 EOF_USAGE
  exit "${exit_code}"
 }

 log_step() { echo "[+] $*"; }
 log_skip() { echo "[=] $*"; }
 log_info() { echo "[i] $*"; }

 detect_cuda_home() {
  local requested="${CUDA_HOME:-}"
  if [[ -n "${requested}" && -d "${requested}" ]]; then
    echo "${requested}"
    return
  fi

  local best=""
  local candidate_list
  candidate_list=$(ls -d /usr/local/cuda-${CUDA_VERSION} /usr/local/cuda /usr/local/cuda-* 2>/dev/null | sort -Vr || true)
  if [[ -n "${candidate_list}" ]]; then
    while IFS= read -r candidate; do
      [[ -z "${candidate}" ]] && continue
      if [[ -d "${candidate}" ]]; then
        best="${candidate}"
        break
      fi
    done <<<"${candidate_list}"
  fi

  if [[ -n "${best}" ]]; then
    echo "${best}"
  fi
 }

 setup_cuda_env() {
  local detected
  detected=$(detect_cuda_home || true)
  if [[ -n "${detected}" ]]; then
    export CUDA_HOME="${detected}"
    export PATH="${CUDA_HOME}/bin:${PATH}"
    export LD_LIBRARY_PATH="${CUDA_HOME}/lib64:${LD_LIBRARY_PATH:-}"
    log_step "Using CUDA toolkit at ${CUDA_HOME}"
    local nvcc_path
    nvcc_path=$(command -v nvcc || true)
    if [[ -z "${nvcc_path}" && -x "${CUDA_HOME}/bin/nvcc" ]]; then
      nvcc_path="${CUDA_HOME}/bin/nvcc"
    fi
    if [[ -n "${nvcc_path}" ]]; then
      CUDA_AVAILABLE=1
    else
      CUDA_AVAILABLE=0
      log_info "nvcc not found; CUDA-dependent builds will be skipped"
    fi
  else
    log_info "CUDA toolkit not found; proceeding with CPU-only build"
    unset CUDA_HOME
    CUDA_AVAILABLE=0
  fi
 }

 prepend_unique_path() {
  local var="$1"
  local path="$2"
  [[ -z "${path}" ]] && return
  if [[ ! -d "${path}" ]]; then
    return
  fi
  local current="${!var-}"
  if [[ -z "${current:-}" ]]; then
    printf -v "${var}" "%s" "${path}"
  elif [[ ":${current}:" != *":${path}:"* ]]; then
    printf -v "${var}" "%s" "${path}:${current}"
  else
    printf -v "${var}" "%s" "${current}"
  fi
  export "${var}"
 }

 locate_python_module_path() {
  local module="$1"
  local header="${2:-}"
  python - "$module" "$header" <<'PY'
 import importlib.util, os, sys
 module, header = sys.argv[1], sys.argv[2]
 spec = importlib.util.find_spec(module)
 if not spec or not spec.submodule_search_locations:
    raise SystemExit(0)
 for path in spec.submodule_search_locations:
    if not path:
        continue
    real = os.path.realpath(path)
    if not os.path.isdir(real):
        continue
    if not header:
        print(real)
        break
    include_dir = os.path.join(real, "include")
    if os.path.isdir(include_dir) and os.path.exists(os.path.join(include_dir, header)):
        print(real)
        break
 PY
 }

 symlink_unversioned_lib() {
  local dir="$1"
  local lib="$2"
  [[ -z "${dir}" || -z "${lib}" ]] && return 1
  local target="${dir}/${lib}"
  if [[ -e "${target}" ]]; then
    return 0
  fi
  local versioned
  versioned="$(python - "$dir" "$lib" <<'PY'
 import pathlib, sys
 libdir = pathlib.Path(sys.argv[1])
 base = sys.argv[2]
 pattern = base + ".*"
 for candidate in sorted(libdir.glob(pattern), reverse=True):
    if candidate.is_file():
        print(candidate.name)
        break
 PY
 )"
  if [[ -z "${versioned}" ]]; then
    return 1
  fi
  ln -sf "${versioned}" "${target}"
 }

 setup_cuda_component() {
  local friendly="$1"
  local module="$2"
  local header="$3"
  local lib_names="${4:-}"
  local prefix="$5"

  if [[ "${CUDA_AVAILABLE}" -eq 0 ]]; then
    log_skip "${friendly} setup skipped (CUDA unavailable)"
    return 1
  fi

  local base_path
  base_path="$(locate_python_module_path "${module}" "${header}" || true)"
  if [[ -z "${base_path}" ]]; then
    return 1
  fi

  local include_dir="${base_path}/include"
  local lib_dir=""
  for candidate in lib64 lib; do
    if [[ -d "${base_path}/${candidate}" ]]; then
      lib_dir="${base_path}/${candidate}"
      break
    fi
  done
  if [[ -z "${lib_dir}" ]]; then
    return 1
  fi
  if [[ ! -d "${include_dir}" ]]; then
    return 1
  fi

  local IFS=';'
  local libs=()
  read -r -a libs <<<"${lib_names:-}"
  IFS=$' \t\n'
  for lib in "${libs[@]}"; do
    [[ -z "${lib}" ]] && continue
    if ! symlink_unversioned_lib "${lib_dir}" "${lib}"; then
      log_info "${friendly}: missing ${lib} in ${lib_dir}"
      return 1
    fi
  done

  prepend_unique_path "LD_LIBRARY_PATH" "${lib_dir}"
  prepend_unique_path "CMAKE_LIBRARY_PATH" "${lib_dir}"
  prepend_unique_path "CMAKE_INCLUDE_PATH" "${include_dir}"

  export "${prefix}_ROOT=${base_path}"
  export "${prefix}_INCLUDE_DIR=${include_dir}"
  export "${prefix}_INCLUDE_DIRS=${include_dir}"
  export "${prefix}_INCLUDE_PATH=${include_dir}"
  export "${prefix}_LIB_DIR=${lib_dir}"
  export "${prefix}_LIBRARY_DIR=${lib_dir}"

  if [[ ${#libs[@]} -gt 0 ]]; then
    local main_lib="${libs[0]}"
    if [[ -n "${main_lib}" ]]; then
      local lib_path="${lib_dir}/${main_lib}"
      if [[ -e "${lib_path}" ]]; then
        export "${prefix}_LIBRARY=${lib_path}"
        export "${prefix}_LIBRARIES=${lib_path}"
        export "${prefix}_LIBRARY_PATH=${lib_path}"
      fi
    fi
  fi

  log_step "Configured ${friendly} from Python package at ${base_path}"
  return 0
 }

 setup_nccl_env() {
  if setup_cuda_component "NCCL" "nvidia.nccl" "nccl.h" "libnccl.so" "NCCL"; then
    NCCL_AVAILABLE=1
  else
    log_info "NCCL Python package not located; relying on system detection"
    NCCL_AVAILABLE=0
  fi
 }

 setup_cudnn_env() {
  if setup_cuda_component "cuDNN" "nvidia.cudnn" "cudnn.h" "libcudnn.so" "CUDNN"; then
    CUDNN_AVAILABLE=1
  else
    log_info "cuDNN package not located; ensure system cuDNN is available"
    CUDNN_AVAILABLE=0
  fi
 }

 setup_cudss_env() {
  if setup_cuda_component "cuDSS" "nvidia.cu${CUDA_VERSION}" "cudss.h" "libcudss.so" "CUDSS"; then
    CUDSS_AVAILABLE=1
  else
    log_info "cuDSS package not located; ensure system cuDSS is available"
    CUDSS_AVAILABLE=0
  fi
 }

 setup_cusparselt_env() {
  if setup_cuda_component "cuSPARSELt" "nvidia.cusparselt" "cusparseLt.h" "libcusparseLt.so" "CUSPARSELT"; then
    CUSPARSELT_AVAILABLE=1
  else
    log_info "cuSPARSELt package not located; ensure system cuSPARSELt is available"
    CUSPARSELT_AVAILABLE=0
  fi
 }

 python_include_dir() {
  python - <<'PY'
 import sysconfig
 include = sysconfig.get_path("include") or sysconfig.get_config_var("INCLUDEPY")
 if include:
    print(include)
 PY
 }

 python_library_path() {
  python - <<'PY'
 import os, sysconfig
 libdir = sysconfig.get_config_var("LIBDIR")
 libname = sysconfig.get_config_var("LDLIBRARY")
 if not libdir or not libname:
    raise SystemExit(1)
 candidates = []
 candidate = os.path.join(libdir, libname)
 candidates.append(candidate)
 suffix = sysconfig.get_config_var("SHLIB_SUFFIX") or ".so"
 if not candidate.endswith(suffix):
    candidates.append(candidate + suffix)
 if candidate.endswith(".so") and not candidate.endswith(".so.1.0"):
    candidates.append(candidate + ".1.0")
 for path in candidates:
    if os.path.exists(path):
        print(path)
        break
 else:
    raise SystemExit(1)
 PY
 }

 ensure_python_headers() {
  local include_dir="$1"
  if [[ -f "${include_dir}/patchlevel.h" ]]; then
    echo "${include_dir}"
    return
  fi

  local full_version
  full_version=$(python - <<'PY'
 import platform
 print(platform.python_version())
 PY
 )

  local headers_root="build/python-headers-${full_version}"
  local headers_abs="${PWD}/${headers_root}"
  local target_include="${headers_abs}/Include"

  if [[ ! -f "${target_include}/patchlevel.h" ]]; then
    log_step "Preparing CPython headers ${full_version}" >&2
    mkdir -p "${headers_abs}"
    local tarball="build/Python-${full_version}.tgz"
    if [[ ! -f "${tarball}" ]]; then
      curl -LsSf "https://www.python.org/ftp/python/${full_version}/Python-${full_version}.tgz" -o "${tarball}"
    fi
    local src_dir="build/Python-${full_version}"
    if [[ ! -d "${src_dir}" ]]; then
      tar -xzf "${tarball}" -C build
    fi
    rm -rf "${target_include}"
    cp -R "${src_dir}/Include" "${target_include}"
    mkdir -p "${target_include}/internal"
    cp -R "${src_dir}/Include/internal/"* "${target_include}/internal/" 2>/dev/null || true
    if [[ -f "/usr/include/python${PYTHON_VERSION}/pyconfig-64.h" ]]; then
      cp "/usr/include/python${PYTHON_VERSION}/pyconfig-64.h" "${target_include}/pyconfig.h"
    fi
  fi

  echo "${target_include}"
 }

 ensure_repo() {
  local name="$1"
  local dir="$2"
  local url="$3"
  local ref="$4"

  if [ -d "${dir}/.git" ]; then
    log_skip "${name} repository already present at ${dir}"
  else
    log_step "Cloning ${name} ${ref}"
    git clone --recursive "${url}" "${dir}"
  fi

  pushd "${dir}" >/dev/null
  if ! git fetch --tags --quiet; then
    log_info "Skipping tag fetch for ${name}; using existing refs"
  fi

  if [ -n "${ref}" ]; then
    local target current
    target=$(git rev-parse "${ref}^{commit}")
    current=$(git rev-parse HEAD)
    if [ "${current}" != "${target}" ]; then
      log_step "Checking out ${name} ${ref}"
      git checkout "${ref}"
      current=$(git rev-parse HEAD)
    else
      log_skip "${name} already at ${ref}"
    fi
  fi

  git submodule update --init --recursive
  popd >/dev/null
 }

 patch_vllm_repo() {
  local repo_dir="vllm"
  if [[ ! -d "${repo_dir}" ]]; then
    return
  fi

  log_step "Patching vLLM build configuration for CUDA ${CUDA_FULL_VERSION}"
  pushd "${repo_dir}" >/dev/null
  python - <<'PY'
 from pathlib import Path

 replacements = {
    Path("pyproject.toml"): [
        ("torch == 2.8.0", "torch >= 2.9.0a0"),
    ],
    Path("requirements/build.txt"): [
        ("torch==2.8.0", "torch>=2.9.0a0"),
    ],
    Path("requirements/cuda.txt"): [
        ("torch==2.8.0", "torch>=2.9.0a0"),
        ("torch==2.8.0+cu128", "torch>=2.9.0a0"),
        ("torchaudio==2.8.0", "torchaudio>=2.8.0"),
        ("torchvision==0.23.0", "torchvision>=0.23.0"),
    ],
 }

 patched = False
 for path, mapping in replacements.items():
    if not path.exists():
        continue
    text = path.read_text()
    new_text = text
    for old, new in mapping:
        new_text = new_text.replace(old, new)
    if new_text != text:
        path.write_text(new_text)
        print(f"Patched {path}")
        patched = True

 if not patched:
    print("No vLLM files required patching.")
 PY
  popd >/dev/null
 }

 python_module_version() {
  local module="$1"
  python - "${module}" <<'PY'
 import importlib, sys
 module = sys.argv[1]
 try:
    mod = importlib.import_module(module)
 except Exception:
    sys.exit(1)
 version = getattr(mod, "__version__", None)
 if version is None:
    sys.exit(2)
 print(version)
 PY
 }

 python_module_version_matches() {
  local module="$1"
  local expected_prefix="$2"
  local version
  if ! version=$(python_module_version "${module}" 2>/dev/null); then
    return 1
  fi
  [[ -z "${expected_prefix}" || "${version}" == "${expected_prefix}"* ]]
 }

 python_torch_cuda_version() {
  python - <<'PY'
 try:
    import torch
 except Exception:
    raise SystemExit(1)
 cuda_version = getattr(getattr(torch, "version", None), "cuda", None)
 if not cuda_version:
    raise SystemExit(2)
 print(cuda_version)
 PY
 }

 CLEAN=0
 while [[ $# -gt 0 ]]; do
  case "$1" in
    --clean)
      CLEAN=1
      ;;
    -h|--help)
      usage 0
      ;;
    *)
      log_info "Unknown argument: $1"
      usage 1
      ;;
  esac
  shift
 done

 if [[ "${CLEAN}" -eq 1 ]]; then
  log_step "Cleaning previous build artifacts"
  rm -rf -- "${VENV_PATH}" build
 fi

 setup_cuda_env

 if [ ! -d "${VENV_PATH}" ]; then
  log_step "Creating virtual environment at ${VENV_PATH} (Python ${PYTHON_VERSION})"
  uv venv "${VENV_PATH}" --python "${PYTHON_VERSION}"
 else
  log_skip "Virtual environment already exists at ${VENV_PATH}"
 fi

 VENV_ABS_PATH=$(python - <<PY
 import os
 print(os.path.abspath("${VENV_PATH}"))
 PY
 )

 source "${VENV_PATH}/bin/activate"

 PY_INCLUDE=$(python_include_dir || true)
 if [[ -n "${PY_INCLUDE}" ]]; then
  PY_INCLUDE=$(ensure_python_headers "${PY_INCLUDE}")
 fi
 PY_LIB=$(python_library_path || true)
 mapfile -t PY_SITE_DIRS < <(python - <<'PY'
 import sysconfig
 for key in ("purelib", "platlib"):
    path = sysconfig.get_paths().get(key)
    if path:
        print(path)
 PY
 )
 if [[ -n "${PY_INCLUDE}" ]]; then
  PY_INCLUDE_DIRS="${PY_INCLUDE}"
  if [[ -d "${PY_INCLUDE}/internal" ]]; then
    PY_INCLUDE_DIRS="${PY_INCLUDE_DIRS};${PY_INCLUDE}/internal"
  fi
  export Python_INCLUDE_DIR="${PY_INCLUDE}"
  export Python_INCLUDE_DIRS="${PY_INCLUDE_DIRS}"
  export Python3_INCLUDE_DIR="${PY_INCLUDE}"
  export Python3_INCLUDE_DIRS="${PY_INCLUDE_DIRS}"
 fi
 if [[ -n "${PY_LIB}" ]]; then
  if [[ "${PY_LIB}" == *.so.1.0 ]]; then
    alias_name="$(basename "${PY_LIB%.1.0}")"
    alias_path="${VENV_ABS_PATH}/lib/${alias_name}"
    mkdir -p "${VENV_ABS_PATH}/lib"
    ln -sf "${PY_LIB}" "${alias_path}"
    PY_LIB="${alias_path}"
  fi
  export PYTHON_USER_LIBDIR="$(dirname "${PY_LIB}")"
  export PYTHON_USER_LDLIBRARY="$(basename "${PY_LIB}")"
  export Python_LIBRARY="${PY_LIB}"
  export Python_LIBRARIES="${PY_LIB}"
  export Python3_LIBRARY="${PY_LIB}"
  export Python3_LIBRARIES="${PY_LIB}"
  export LD_LIBRARY_PATH="$(dirname "${PY_LIB}"):${LD_LIBRARY_PATH:-}"
  export CMAKE_LIBRARY_PATH="$(dirname "${PY_LIB}"):${CMAKE_LIBRARY_PATH:-}"
  for sitecustomize_path in "${PY_SITE_DIRS[@]}"; do
    sitecustomize_file="${sitecustomize_path}/sitecustomize.py"
    mkdir -p "${sitecustomize_path}"
    cat <<'PY' > "${sitecustomize_file}"
 import os
 import sysconfig

 libdir = os.environ.get("PYTHON_USER_LIBDIR")
 libname = os.environ.get("PYTHON_USER_LDLIBRARY")

 if libdir:
    for key in ("LIBDIR", "LIBPL"):
        sysconfig.get_config_vars()[key] = libdir

 if libname:
    for key in ("LDLIBRARY", "LIBRARY"):
        sysconfig.get_config_vars()[key] = libname
 PY
  done
 fi

 if [[ -n "${PY_INCLUDE}" ]]; then
  export CMAKE_INCLUDE_PATH="${PY_INCLUDE}:${CMAKE_INCLUDE_PATH:-}"
 fi

 log_step "Ensuring build tooling is up to date"
 uv pip install --upgrade --no-build-isolation \
  pip setuptools wheel \
  ninja cmake packaging pyyaml \
  typing_extensions sympy six \
  appdirs setuptools-scm jinja2 regex build

 log_step "Installing CUDA toolkit and dependencies via NVIDIA PyIndex"
 uv pip install --upgrade --no-build-isolation \
  nvidia-pyindex

 uv pip install --upgrade --no-build-isolation \
  cuda-toolkit[all]~=${CUDA_FULL_VERSION} \
  cupy-cuda${CUDA_VERSION}x \
  nvidia-cudss-cu${CUDA_VERSION} \
  nvidia-nccl-cu${CUDA_VERSION} \
  nvidia-cudnn-cu${CUDA_VERSION} \
  nvidia-cusparselt-cu${CUDA_VERSION}

 setup_nccl_env
 setup_cudnn_env
 setup_cudss_env
 setup_cusparselt_env

 if [[ "${CUDA_AVAILABLE}" -eq 1 ]]; then
  missing_components=()
  [[ "${CUDNN_AVAILABLE}" -ne 1 ]] && missing_components+=("cuDNN")
  [[ "${CUDSS_AVAILABLE}" -ne 1 ]] && missing_components+=("cuDSS")
  [[ "${CUSPARSELT_AVAILABLE}" -ne 1 ]] && missing_components+=("cuSPARSELt")
  if (( ${#missing_components[@]} )); then
    log_info "Missing required CUDA libraries: ${missing_components[*]}"
    exit 1
  fi
 fi

 if [ ! -d build ]; then
  log_step "Creating build directory"
  mkdir -p build
 else
  log_skip "Build directory already exists"
 fi

 pushd build >/dev/null

 # ============================================================
 # 1. Build PyTorch from source
 # ============================================================
 ensure_repo "PyTorch" "pytorch" "https://github.com/pytorch/pytorch.git" "${PYTORCH_TAG}"

 build_pytorch=1
 existing_torch_version=""
 existing_torch_cuda=""
 if existing_torch_version=$(python_module_version torch 2>/dev/null); then
  if existing_torch_cuda=$(python_torch_cuda_version 2>/dev/null); then
    :
  else
    existing_torch_cuda=""
  fi
  if [[ "${existing_torch_version}" == "${PYTORCH_VERSION}"* && "${existing_torch_cuda}" == "${CUDA_FULL_VERSION}" ]]; then
    log_skip "PyTorch ${PYTORCH_TAG} already installed for CUDA ${existing_torch_cuda}"
    build_pytorch=0
  else
    log_info "Existing PyTorch ${existing_torch_version:-<unknown>} built for CUDA ${existing_torch_cuda:-<unknown>}; expected CUDA ${CUDA_FULL_VERSION}. Rebuilding from source."
    python -m pip uninstall -y torch torchvision torchaudio >/dev/null 2>&1 || true
  fi
 fi

 if [[ "${build_pytorch}" -eq 1 ]]; then
  log_step "Building and installing PyTorch ${PYTORCH_TAG} (all cores, full optimization)"
  pushd pytorch >/dev/null
  if [[ "${CUDA_AVAILABLE}" -eq 1 ]]; then
    export USE_CUDA=1
    export USE_NCCL="${NCCL_AVAILABLE}"
    export USE_SYSTEM_NCCL="${NCCL_AVAILABLE}"
  else
    export USE_CUDA=0
    export USE_NCCL=0
    export USE_SYSTEM_NCCL=0
  fi
  if [[ "${CUDA_AVAILABLE}" -eq 1 ]]; then
    export USE_CUDNN="${CUDNN_AVAILABLE}"
    export USE_CUDSS="${CUDSS_AVAILABLE}"
    export USE_CUSPARSELT="${CUSPARSELT_AVAILABLE}"
  else
    export USE_CUDNN=0
    export USE_CUDSS=0
    export USE_CUSPARSELT=0
  fi
  export USE_MKLDNN=1
  export BUILD_TEST=0
  export BUILD_CAFFE2=0
  export USE_MPS=0
  export USE_FBGEMM=1
  export USE_QNNPACK=0
  export USE_XNNPACK=1
  PY_CMAKE_ARGS=(
    "-DPython3_EXECUTABLE=${VENV_ABS_PATH}/bin/python"
    "-DPython3_LIBRARY=${PY_LIB}"
    "-DPython3_INCLUDE_DIR=${PY_INCLUDE}"
    "-DPython_LIBRARY=${PY_LIB}"
    "-DPython_INCLUDE_DIR=${PY_INCLUDE}"
    "-DPython3_FIND_STRATEGY=LOCATION"
    "-DPython3_ROOT_DIR=${VENV_ABS_PATH}"
  )
  OLD_CMAKE_ARGS="${CMAKE_ARGS:-}"
  export CMAKE_ARGS="${PY_CMAKE_ARGS[*]} ${OLD_CMAKE_ARGS}"
  OLD_CFLAGS="${CFLAGS:-}"
  OLD_CPPFLAGS="${CPPFLAGS:-}"
  export CFLAGS="-I${PY_INCLUDE} ${OLD_CFLAGS}"
  export CPPFLAGS="-I${PY_INCLUDE} ${OLD_CPPFLAGS}"
  python setup.py install
  export CMAKE_ARGS="${OLD_CMAKE_ARGS}"
  export CFLAGS="${OLD_CFLAGS}"
  export CPPFLAGS="${OLD_CPPFLAGS}"
  popd >/dev/null
 fi

 # ============================================================
 # 2. Verify torch build
 # ============================================================
 log_step "Verifying PyTorch build"
 python - <<'EOF_VERIFY'
 import torch
 print("torch:", torch.__version__, "CUDA:", torch.version.cuda)
 print("Devices:", torch.cuda.device_count(), "Arch list:", torch.cuda.get_arch_list())
 EOF_VERIFY

 # ============================================================
 # 3. Build vLLM
 # ============================================================
 ensure_repo "vLLM" "vllm" "https://github.com/vllm-project/vllm.git" "v${VLLM_VERSION}"
 patch_vllm_repo

 if python_module_version_matches vllm "${VLLM_VERSION}"; then
  log_skip "vLLM ${VLLM_VERSION} already installed"
  VLLM_INSTALLED=1
  export VLLM_INSTALLED
 elif [[ "${CUDA_AVAILABLE}" -eq 0 ]]; then
  log_info "Skipping vLLM build because nvcc tooling is unavailable"
 else
  log_step "Building and installing vLLM ${VLLM_VERSION}"
  pushd vllm >/dev/null
  export VLLM_BUILD_CUDA_EXT=1
  python -m pip install -v --no-build-isolation --no-deps .
  popd >/dev/null
  VLLM_INSTALLED=1
  export VLLM_INSTALLED
 fi

 uv pip install -r build/vllm/requirements/common.txt
 uv pip install numba

 # ============================================================
 # 4. Build FlashInfer (core only)
 # ============================================================
 ensure_repo "FlashInfer" "flashinfer" "https://github.com/flashinfer-ai/flashinfer.git" "v${FLASHINFER_VERSION}"

 if python_module_version_matches flashinfer "${FLASHINFER_VERSION}"; then
  log_skip "FlashInfer ${FLASHINFER_VERSION} already installed"
  FLASHINFER_INSTALLED=1
  export FLASHINFER_INSTALLED
 elif [[ "${CUDA_AVAILABLE}" -eq 0 ]]; then
  log_info "Skipping FlashInfer build because nvcc tooling is unavailable"
 else
  log_step "Building and installing FlashInfer ${FLASHINFER_VERSION}"
  pushd flashinfer >/dev/null
  python -m pip install -v .
  popd >/dev/null
  FLASHINFER_INSTALLED=1
  export FLASHINFER_INSTALLED
 fi

 # ============================================================
 # 5. Install LMCache
 # ============================================================
 if python_module_version_matches lmcache "${LMCACHE_VERSION}"; then
  log_skip "LMCache ${LMCACHE_VERSION} already installed"
 else
  log_step "Installing LMCache ${LMCACHE_VERSION}"
  uv pip install -U --prerelease=allow --no-deps "lmcache==${LMCACHE_VERSION}"
 fi

 # ============================================================
 # 6. Sanity check
 # ============================================================
 log_step "Verifying installation"
 python - <<'EOF_SANITY'
 import importlib
 import os

 def show_status(name, module=None, extra=""):
    status = f"{name}:"
    if module is None:
        status += f" {extra}" if extra else " not installed"
    else:
        version = getattr(module, "__version__", "<unknown>")
        status += f" {version}"
        if extra:
            status += f" {extra}"
    print(status)

 torch = importlib.import_module("torch")
 show_status("torch", torch, f"(CUDA {torch.version.cuda})")

 if os.environ.get("VLLM_INSTALLED") == "1":
    vllm = importlib.import_module("vllm")
    show_status("vllm", vllm)
 else:
    show_status("vllm", extra="skipped (nvcc unavailable)")

 if os.environ.get("FLASHINFER_INSTALLED") == "1":
    flashinfer = importlib.import_module("flashinfer")
    show_status("flashinfer", flashinfer)
 else:
    show_status("flashinfer", extra="skipped (nvcc unavailable)")

 lmcache = importlib.import_module("lmcache")
 show_status("lmcache", lmcache)
 EOF_SANITY

 popd >/dev/null

 echo
 echo "[✓] Native build complete. Environment in ${VENV_PATH}"
 echo "[i] Activate it with: source ${VENV_PATH}/bin/activate"
 if [[ "${CUDA_AVAILABLE}" -eq 1 ]]; then
  echo "[→] Built for GH100 (SM 9.0) using all ${MAX_JOBS} cores."
 else
  echo "[→] Completed CPU-only build (CUDA toolchain not detected)."
 fi
No results found