Endpoint Examples
LLM
Chat
Lists all providers supported by LLMstudio
POST
/
api
/
engine
/
chat
/
{provider}
curl --request POST \
--url http://localhost:8000/api/engine/chat/{provider} \
--header 'Content-Type: application/json' \
--data '{
"api_key": "<string>",
"model": "<string>",
"chat_input": "<string>",
"parameters": "<any>",
"is_stream": "<any>"
}'
{
"id": "72f34d3b-f254-4950-9d6f-9f66e082fd2f",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"message": {
"content": "Hello! I am a virtual assistant here to help with any questions or tasks you may have. How can I assist you today?",
"role": "assistant",
"function_call": null,
"tool_calls": null
}
}
],
"created": 1718622757,
"model": "gpt-3.5-turbo",
"object": "chat.completion",
"system_fingerprint": null,
"usage": null,
"session_id": null,
"chat_input": "Hello! Who are you?",
"chat_output": "Hello! I am a virtual assistant here to help with any questions or tasks you may have. How can I assist you today?",
"context": [
{
"role": "user",
"content": "Hello! Who are you?"
}
],
"provider": "openai",
"timestamp": 1718622757.612226,
"parameters": {
"temperature": 1,
"max_tokens": 2048,
"top_p": 1,
"frequency_penalty": 0,
"presence_penalty": 0
},
"metrics": {
"input_tokens": 6,
"output_tokens": 26,
"total_tokens": 32,
"cost_usd": 0.000061,
"latency_s": 1.0556859970092773,
"time_to_first_token_s": 0.8302950859069824,
"inter_token_latency_s": 0.008317514702125831,
"tokens_per_second": 26.523038175483098
}
}
API key to access the provider
API key to access the provider
API key to access the provider
API key to access the provider
API key to access the provider
{
"id": "72f34d3b-f254-4950-9d6f-9f66e082fd2f",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"message": {
"content": "Hello! I am a virtual assistant here to help with any questions or tasks you may have. How can I assist you today?",
"role": "assistant",
"function_call": null,
"tool_calls": null
}
}
],
"created": 1718622757,
"model": "gpt-3.5-turbo",
"object": "chat.completion",
"system_fingerprint": null,
"usage": null,
"session_id": null,
"chat_input": "Hello! Who are you?",
"chat_output": "Hello! I am a virtual assistant here to help with any questions or tasks you may have. How can I assist you today?",
"context": [
{
"role": "user",
"content": "Hello! Who are you?"
}
],
"provider": "openai",
"timestamp": 1718622757.612226,
"parameters": {
"temperature": 1,
"max_tokens": 2048,
"top_p": 1,
"frequency_penalty": 0,
"presence_penalty": 0
},
"metrics": {
"input_tokens": 6,
"output_tokens": 26,
"total_tokens": 32,
"cost_usd": 0.000061,
"latency_s": 1.0556859970092773,
"time_to_first_token_s": 0.8302950859069824,
"inter_token_latency_s": 0.008317514702125831,
"tokens_per_second": 26.523038175483098
}
}
curl --request POST \
--url http://localhost:8000/api/engine/chat/{provider} \
--header 'Content-Type: application/json' \
--data '{
"api_key": "<string>",
"model": "<string>",
"chat_input": "<string>",
"parameters": "<any>",
"is_stream": "<any>"
}'
{
"id": "72f34d3b-f254-4950-9d6f-9f66e082fd2f",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"message": {
"content": "Hello! I am a virtual assistant here to help with any questions or tasks you may have. How can I assist you today?",
"role": "assistant",
"function_call": null,
"tool_calls": null
}
}
],
"created": 1718622757,
"model": "gpt-3.5-turbo",
"object": "chat.completion",
"system_fingerprint": null,
"usage": null,
"session_id": null,
"chat_input": "Hello! Who are you?",
"chat_output": "Hello! I am a virtual assistant here to help with any questions or tasks you may have. How can I assist you today?",
"context": [
{
"role": "user",
"content": "Hello! Who are you?"
}
],
"provider": "openai",
"timestamp": 1718622757.612226,
"parameters": {
"temperature": 1,
"max_tokens": 2048,
"top_p": 1,
"frequency_penalty": 0,
"presence_penalty": 0
},
"metrics": {
"input_tokens": 6,
"output_tokens": 26,
"total_tokens": 32,
"cost_usd": 0.000061,
"latency_s": 1.0556859970092773,
"time_to_first_token_s": 0.8302950859069824,
"inter_token_latency_s": 0.008317514702125831,
"tokens_per_second": 26.523038175483098
}
}