Applies TRLP to the MaaS gateway for the vSR https://${MAAS_HOST}/v1/chat/completions route.
$> export MAAS_HOST="maas.$(oc get ingresses.config.openshift.io cluster -o jsonpath='{.spec.domain}')"
export ACCESS_TOKEN=$(curl -sSk --oauth2-bearer "$(oc whoami -t)" \
--json '{"expiration": "10m"}' \
"https://${MAAS_HOST}/maas-api/v1/tokens" | jq -r .token)
# Test with auth (expect 200)
curl -sSk -X POST "https://${MAAS_HOST}/v1/chat/completions" \
-H "Authorization: Bearer ${ACCESS_TOKEN}" \
-H "Content-Type: application/json" \
-d '{"model":"auto","messages":[{"role":"user","content":"Hello"}]}' | jq .
{
"id": "chatcmpl-c17e2327-9497-491a-a696-a622da3d1d74",
"created": 1770618393,
"model": "Model-B",
"usage": {
"prompt_tokens": 1,
"completion_tokens": 42,
"total_tokens": 43
},
"object": "chat.completion",
"do_remote_decode": false,
"do_remote_prefill": false,
"remote_block_ids": null,
"remote_engine_id": "",
"remote_host": "",
"remote_port": 0,
"choices": [
{
"index": 0,
"finish_reason": "stop",
"message": {
"role": "assistant",
"content": "To be or not to be that is the question. Alas, poor Yorick! I knew him, Horatio: A fellow of infinite jest Today it is partially cloudy and raining. Today is a nice sunny day."
}
}
]
}
$> # Test without auth (expect 401)
curl -sSk -w "\nHTTP Status: %{http_code}\n" \
-X POST "https://${MAAS_HOST}/v1/chat/completions" \
-H "Content-Type: application/json" \
-d '{"model":"auto","messages":[{"role":"user","content":"test"}]}'
HTTP Status: 401
$> # Test TRLP (expect 429 after budget exhausted, free tier = 100 tokens/min)
for i in $(seq 1 10); do
curl -sSk -o /dev/null -w "Request $i: %{http_code}\n" \
-X POST "https://${MAAS_HOST}/v1/chat/completions" \
-H "Authorization: Bearer ${ACCESS_TOKEN}" \
-H "Content-Type: application/json" \
-d '{"model":"auto","messages":[{"role":"user","content":"Write a long story about dragons"}]}'
done
Request 1: 200
Request 2: 200
Request 3: 429
Request 4: 429
Request 5: 429
Request 6: 429
Request 7: 429
Request 8: 429
Request 9: 429
Request 10: 429