Streaming
The OpenResponses API supports streaming to receive tokens as they're generated instead of waiting for the complete response. Set stream: true in your request, then read the response body as a stream of server-sent events. Each event contains a response chunk that you can display incrementally.
import requests
import json
url = "https://llm.siraya.pro/v1/responses"
headers = {
"Content-Type": "application/json",
"Authorization": "Bearer <API_KEY>",
}
data = {
"model": "google/gemini-3-flash",
"input": [
{
"type": "message",
"role": "user",
"content": "Write a haiku about debugging code.",
}
],
"stream": True,
}
with requests.post(url, headers=headers, json=data, stream=True) as response:
response.raise_for_status()
for line in response.iter_lines(decode_unicode=True):
if line and line.startswith("data:"):
data_str = line[5:].strip()
if data_str and data_str != "[DONE]":
event = json.loads(data_str)
if event.get("type") == "response.output_text.delta":
print(event.get("delta"), end="", flush=True)
const response = await fetch('https://llm.siraya.pro/v1/responses', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
Authorization: `Bearer <API_KEY>`,
},
body: JSON.stringify({
model: 'google/gemini-3-flash',
input: [
{
type: 'message',
role: 'user',
content: 'Write a haiku about debugging code.',
},
],
stream: true,
}),
});
const reader = response.body.getReader();
const decoder = new TextDecoder();
while (true) {
const { done, value } = await reader.read();
if (done) break;
const chunk = decoder.decode(value);
const lines = chunk.split('\n');
for (const line of lines) {
if (line.startsWith('data:')) {
const data = line.substring(6).trim();
if (data) {
const event = JSON.parse(data);
if (event.type === 'response.output_text.delta') {
process.stdout.write(event.delta);
}
}
}
}
}
Streaming events
response.created- Response initializedresponse.output_text.delta- Text chunk receivedresponse.output_text.done- Text generation completeresponse.completed- Full response complete with usage stats