Install dependencies, clone and build:
# Install deps
sudo apt install ccache libcurl4-openssl-dev
# Build vulkan headers
git clone https://github.com/KhronosGroup/Vulkan-Headers.git
cd Vulkan-Headers
cmake -B build -DCMAKE_INSTALL_PREFIX=/usr/local
sudo cmake --build build --target install
# Install and build llama.cppp
git clone git@github.com:ggml-org/llama.cpp.git
cd llama.cpp
cmake -B build -DGGML_VULKAN=ON -DLLAMA_CURL=ON -DGGML_JINJA=ON -DVulkan_INCLUDE_DIR=/usr/local/include
cmake --build build --config Release -j$(nproc)llama-server -hf bartowski/Qwen2.5-Coder-3B-Instruct-GGUF \
--model Qwen2.5-Coder-3B-Instruct-Q4_K_M.gguf \
--host 0.0.0.0 \
--port 8080 \
-c 8192 \
-ngl 99Install llama-vscode