Tested Docker commands for deploying Hugging Face LLMs with vLLM on NVIDIA H100 GPUs.
docker run --runtime nvidia --gpus all \
-v ~/.cache/huggingface:/root/.cache/huggingface \
--env "HUGGING_FACE_HUB_TOKEN=<token>" \
-p 8002:8000 \
--ipc=host \