Skip to content

Streaming

The OpenResponses API supports streaming to receive tokens as they're generated instead of waiting for the complete response. Set stream: true in your request, then read the response body as a stream of server-sent events. Each event contains a response chunk that you can display incrementally.

import requests
import json

url = "https://llm.siraya.pro/v1/responses"
headers = {
    "Content-Type": "application/json",
    "Authorization": "Bearer <API_KEY>",
}

data = {
    "model": "google/gemini-3-flash",
    "input": [
        {
            "type": "message",
            "role": "user",
            "content": "Write a haiku about debugging code.",
        }
    ],
    "stream": True,
}

with requests.post(url, headers=headers, json=data, stream=True) as response:
    response.raise_for_status()
    for line in response.iter_lines(decode_unicode=True):
        if line and line.startswith("data:"):
            data_str = line[5:].strip()
            if data_str and data_str != "[DONE]":
                event = json.loads(data_str)
                if event.get("type") == "response.output_text.delta":
                    print(event.get("delta"), end="", flush=True)
const response = await fetch('https://llm.siraya.pro/v1/responses', {
  method: 'POST',
  headers: {
    'Content-Type': 'application/json',
    Authorization: `Bearer <API_KEY>`,
  },
  body: JSON.stringify({
    model: 'google/gemini-3-flash',
    input: [
      {
        type: 'message',
        role: 'user',
        content: 'Write a haiku about debugging code.',
      },
    ],
    stream: true,
  }),
});

const reader = response.body.getReader();
const decoder = new TextDecoder();

while (true) {
  const { done, value } = await reader.read();
  if (done) break;

  const chunk = decoder.decode(value);
  const lines = chunk.split('\n');

  for (const line of lines) {
    if (line.startsWith('data:')) {
      const data = line.substring(6).trim();
      if (data) {
        const event = JSON.parse(data);
        if (event.type === 'response.output_text.delta') {
          process.stdout.write(event.delta);
        }
      }
    }
  }
}

Streaming events

  • response.created - Response initialized
  • response.output_text.delta - Text chunk received
  • response.output_text.done - Text generation complete
  • response.completed - Full response complete with usage stats