Endpoint Examples
LLM
Chat
Lists all providers supported by LLMstudio
POST
/
api
/
engine
/
chat
/
{provider}
Copy
curl --request POST \
--url http://localhost:8000/api/engine/chat/{provider} \
--header 'Content-Type: application/json' \
--data '{
"api_key": "<string>",
"model": "<string>",
"chat_input": "<string>",
"parameters": "<any>",
"is_stream": "<any>"
}'
Copy
{
"id": "72f34d3b-f254-4950-9d6f-9f66e082fd2f",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"message": {
"content": "Hello! I am a virtual assistant here to help with any questions or tasks you may have. How can I assist you today?",
"role": "assistant",
"function_call": null,
"tool_calls": null
}
}
],
"created": 1718622757,
"model": "gpt-3.5-turbo",
"object": "chat.completion",
"system_fingerprint": null,
"usage": null,
"session_id": null,
"chat_input": "Hello! Who are you?",
"chat_output": "Hello! I am a virtual assistant here to help with any questions or tasks you may have. How can I assist you today?",
"context": [
{
"role": "user",
"content": "Hello! Who are you?"
}
],
"provider": "openai",
"timestamp": 1718622757.612226,
"parameters": {
"temperature": 1,
"max_tokens": 2048,
"top_p": 1,
"frequency_penalty": 0,
"presence_penalty": 0
},
"metrics": {
"input_tokens": 6,
"output_tokens": 26,
"total_tokens": 32,
"cost_usd": 0.000061,
"latency_s": 1.0556859970092773,
"time_to_first_token_s": 0.8302950859069824,
"inter_token_latency_s": 0.008317514702125831,
"tokens_per_second": 26.523038175483098
}
}
API key to access the provider
API key to access the provider
API key to access the provider
API key to access the provider
API key to access the provider
Copy
{
"id": "72f34d3b-f254-4950-9d6f-9f66e082fd2f",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"message": {
"content": "Hello! I am a virtual assistant here to help with any questions or tasks you may have. How can I assist you today?",
"role": "assistant",
"function_call": null,
"tool_calls": null
}
}
],
"created": 1718622757,
"model": "gpt-3.5-turbo",
"object": "chat.completion",
"system_fingerprint": null,
"usage": null,
"session_id": null,
"chat_input": "Hello! Who are you?",
"chat_output": "Hello! I am a virtual assistant here to help with any questions or tasks you may have. How can I assist you today?",
"context": [
{
"role": "user",
"content": "Hello! Who are you?"
}
],
"provider": "openai",
"timestamp": 1718622757.612226,
"parameters": {
"temperature": 1,
"max_tokens": 2048,
"top_p": 1,
"frequency_penalty": 0,
"presence_penalty": 0
},
"metrics": {
"input_tokens": 6,
"output_tokens": 26,
"total_tokens": 32,
"cost_usd": 0.000061,
"latency_s": 1.0556859970092773,
"time_to_first_token_s": 0.8302950859069824,
"inter_token_latency_s": 0.008317514702125831,
"tokens_per_second": 26.523038175483098
}
}
Copy
curl --request POST \
--url http://localhost:8000/api/engine/chat/{provider} \
--header 'Content-Type: application/json' \
--data '{
"api_key": "<string>",
"model": "<string>",
"chat_input": "<string>",
"parameters": "<any>",
"is_stream": "<any>"
}'
Copy
{
"id": "72f34d3b-f254-4950-9d6f-9f66e082fd2f",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"message": {
"content": "Hello! I am a virtual assistant here to help with any questions or tasks you may have. How can I assist you today?",
"role": "assistant",
"function_call": null,
"tool_calls": null
}
}
],
"created": 1718622757,
"model": "gpt-3.5-turbo",
"object": "chat.completion",
"system_fingerprint": null,
"usage": null,
"session_id": null,
"chat_input": "Hello! Who are you?",
"chat_output": "Hello! I am a virtual assistant here to help with any questions or tasks you may have. How can I assist you today?",
"context": [
{
"role": "user",
"content": "Hello! Who are you?"
}
],
"provider": "openai",
"timestamp": 1718622757.612226,
"parameters": {
"temperature": 1,
"max_tokens": 2048,
"top_p": 1,
"frequency_penalty": 0,
"presence_penalty": 0
},
"metrics": {
"input_tokens": 6,
"output_tokens": 26,
"total_tokens": 32,
"cost_usd": 0.000061,
"latency_s": 1.0556859970092773,
"time_to_first_token_s": 0.8302950859069824,
"inter_token_latency_s": 0.008317514702125831,
"tokens_per_second": 26.523038175483098
}
}
Assistant
Responses are generated using AI and may contain mistakes.