Output from deploying: kubernetes-sigs/wg-ai-gateway#31
$> curl -s http://172.18.255.240/v1/models | jq
{
"object": "list",
"data": [
{
"id": "gpt-4-external",
"object": "model",
"created": 1770960766,
"owned_by": "vllm",
"root": "gpt-4-external",
"parent": null
}
]
}
$> curl -s -X POST http://172.18.255.240/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{"model":"gpt-4-external","messages":[{"role":"user","content":"Hello"}],"max_tokens":10}' | jq
{
"id": "chatcmpl-a962f048-5ac2-4302-9cbb-8dc12543f9bb",
"created": 1770960776,
"model": "gpt-4-external",
"usage": {
"prompt_tokens": 1,
"completion_tokens": 8,
"total_tokens": 9
},
"object": "chat.completion",
"do_remote_decode": false,
"do_remote_prefill": false,
"remote_block_ids": null,
"remote_engine_id": "",
"remote_host": "",
"remote_port": 0,
"choices": [
{
"index": 0,
"finish_reason": "stop",
"message": {
"role": "assistant",
"content": "Testing@, #testing 1$ ,"
}
}
]
}
$> curl -s http://172.18.255.240/get | jq
{
"args": {},
"headers": {
"Accept": "*/*",
"Host": "httpbin.org",
"User-Agent": "curl/8.5.0",
"X-Amzn-Trace-Id": "Root=1-698eb792-6a4bb1e81d27b52660623096",
"X-Envoy-Expected-Rq-Timeout-Ms": "15000"
},
"origin": "13.217.18.80",
"url": "http://httpbin.org/get"
}