Batch
This quickstart walks you through generating your first batch completions with Siraya AI.
Create a message batch
Create a batch of messages for asynchronous processing. All usage is charged at 50% of the standard API prices.
A Batch is composed of a list of requests. The shape of an individual request is comprised of:
- A unique
custom_idfor identifying the Messages request - A
paramsobject with the standard Messages API parameters
You can create a batch by passing this list into the requests parameter:
import requests
import json
headers = {
"Authorization": "Bearer <<API_KEY>>",
"Content-Type": "application/json"
}
data = {
"requests": [
{
"custom_id": "my-request-01",
"params": {
"model": "openai/gpt-4o-mini-batch",
"max_tokens": 1024,
"messages": [
{
"role": "user",
"content": "How to learn nestjs?"
}
],
"metadata": {
"ANY_ADDITIONAL_PROPERTY": "text"
},
"stop_sequences": [
"text"
],
"system": "text",
"temperature": 1,
"tool_choice": None,
"tools": [],
"top_k": 1,
"top_p": 1,
"thinking": {
"budget_tokens": 1024,
"type": "enabled"
}
}
},
{
"custom_id": "my-request-02",
"params": {
"model": "openai/gpt-4o-mini-batch",
"max_tokens": 1024,
"messages": [
{
"role": "user",
"content": "How to learn Reactjs?"
}
],
"metadata": {
"ANY_ADDITIONAL_PROPERTY": "text"
},
"stop_sequences": [
"text"
],
"system": "text",
"temperature": 1,
"tool_choice": None,
"tools": [],
"top_k": 1,
"top_p": 1,
"thinking": {
"budget_tokens": 1024,
"type": "enabled"
}
}
},
{
"custom_id": "my-request-03",
"params": {
"model": "openai/gpt-4o-mini-batch",
"max_tokens": 1024,
"messages": [
{
"role": "user",
"content": "How to learn Nextjs?"
}
],
"metadata": {
"ANY_ADDITIONAL_PROPERTY": "text"
},
"stop_sequences": [
"text"
],
"system": "text",
"temperature": 1,
"tool_choice": None,
"tools": [],
"top_k": 1,
"top_p": 1,
"thinking": {
"budget_tokens": 1024,
"type": "enabled"
}
}
}
]
}
response = requests.post("https://llm.siraya.pro/v1/batches", headers=headers, data=json.dumps(data))
data = response.json()
print("Batch created:", json.dumps(data, indent=2, ensure_ascii=False))
In this example, three separate requests are batched together for asynchronous processing. Each request has a unique custom_id and contains the standard parameters you'd use for a Messages API call.
{
'batch': {
'cancelled_at': None,
'cancelling_at': None,
'completed_at': None,
'completion_window': '24h',
'created_at': 1765972352,
'endpoint': '',
'error_file_id': '',
'errors': None,
'expired_at': None,
'expires_at': 1766058749,
'failed_at': None,
'finalizing_at': None,
'id': 'batch_a34c321b-ed4b-4e91-ae29-7f02939d8962',
'in_progress_at': None,
'input_file_id': 'file-142b17fbff7d4a06a88ec9205ae143c9',
'metadata': None,
'object': 'batch',
'output_file_id': '',
'request_counts': {
'completed': 0,
'failed': 0,
'total': 0
},
'status': 'validating'
},
'batch_id': 'batch_a34c321b-ed4b-4e91-ae29-7f02939d8962',
'file': {
'bytes': 802,
'created_at': 1765972347,
'filename': 'batch.jsonl',
'id': 'file-142b17fbff7d4a06a88ec9205ae143c9',
'object': 'file',
'purpose': 'batch',
'status': 'processed'
},
'file_id': 'file-142b17fbff7d4a06a88ec9205ae143c9',
'task_id': 2,
'task_status': 'NOT_START'
}
Get status or results of a specific message batch
Get batch status if in progress, or stream results if completed in JSONL format.
import requests
import json
# Insert your batch_id here
batch_id = "batch_a34c321b-ed4b-4e91-ae29-7f02939d8962"
headers = {
"Authorization": "Bearer <<API_KEY>>",
"Content-Type": "application/json"
}
response = requests.get("https://llm.siraya.pro/v1/batches/{batch_id}", headers=headers)
print("Raw response:\n", response.text[:500])
try:
data = [json.loads(line) for line in response.text.splitlines() if line.strip()]
print("\n✅ Parsed JSONL:")
print(json.dumps(data, indent=2))
except json.JSONDecodeError:
try:
data = response.json()
print("\n✅ Parsed JSON:")
print(json.dumps(data, indent=2))
except Exception as e:
print("\n⚠️ Could not parse response:", e)
Cancel a specific batch
You can cancel a Batch that is currently processing using the cancel endpoint. Immediately after cancellation, a batch's processing_status will be canceling. Canceled batches end up with a status of ended and may contain partial results for requests that were processed before cancellation.
import requests
import json
batch_id = "batch_a34c321b-ed4b-4e91-ae29-7f02939d8962"
headers = {
"Authorization": "Bearer <<API_KEY>>",
"Content-Type": "application/json"
}
response = requests.post(
f"https://llm.siraya.pro/v1/batches/{batch_id}/cancel",
headers=headers
)
if response.status_code == 200:
print("Batch canceled successfully:")
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))
else:
print(f"Failed to cancel batch ({response.status_code}):")
data = response.json()
print(json.dumps(data, indent=2, ensure_ascii=False))