WebSocket Flushing

Python
Python (Async)
TypeScript

def tts_websocket_flushing(client: Cartesia) -> None:
    """Demonstrates manual flushing to separate audio from different transcripts."""
    from typing_extensions import IO

    transcripts = ["Stay hungry, ", "stay foolish."]

    with client.tts.websocket_connect() as ws:
        ctx = ws.context(
            model_id="sonic-latest",
            voice={"mode": "id", "id": "6ccbfb76-1fc6-48f7-b71d-91ac6298247b"},
            output_format={"container": "raw", "encoding": "pcm_s16le", "sample_rate": 44100},
            language="en",
        )  # Auto-generates context_id

        # 1. Send first transcript
        print("Sending first transcript...")
        ctx.push(transcripts[0])

        # 2. Flush! This forces all buffered audio for the first transcript to be generated
        # and increments the flush_id counter on the server.
        print("Flushing...")
        ctx.push("", flush=True)

        # 3. Send second transcript
        print("Sending second transcript...")
        ctx.push(transcripts[1])

        ctx.no_more_inputs()

        import datetime

        timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")

        # We'll save audio to separate files based on flush_id
        files: dict[int, IO[bytes]] = {}

        for response in ctx.receive():
            if response.type == "chunk" and response.audio:
                # Get flush_id from response (defaults to 0 if not present)
                flush_id = response.flush_id or 0

                if flush_id not in files:
                    filename = f"tts_flush_{flush_id}_{timestamp}.pcm"
                    files[flush_id] = open(filename, "wb")
                    print(f"Created new file for flush_id {flush_id}: {filename}")

                files[flush_id].write(response.audio)

            elif response.type == "flush_done":
                print(f"Flush done received for flush_id: {response.flush_id}")

            elif response.type == "error":
                print(f"error: {response.message or response.title}")

        # Close all open files
        for f in files.values():
            f.close()

        print("\nFinished.")
        print("You can play the generated audio files with these commands:")
        for flush_id, f in files.items():
            print(f"  Flush ID {flush_id}: ffplay -f s16le -ar 44100 {f.name}")

From cartesia-python/examples/examples.py:264

async def tts_websocket_flushing_async(client: AsyncCartesia) -> None:
    """Async manual flushing example."""
    from typing_extensions import IO

    transcripts = ["First transcript.", "Second transcript."]

    async with client.tts.websocket_connect() as ws:
        ctx = ws.context(
            model_id="sonic-latest",
            voice={"mode": "id", "id": "6ccbfb76-1fc6-48f7-b71d-91ac6298247b"},
            output_format={"container": "raw", "encoding": "pcm_s16le", "sample_rate": 44100},
            language="en",
        )

        # 1. Send first transcript
        print("Sending first transcript...")
        await ctx.push(transcripts[0])

        # 2. Flush!
        print("Flushing...")
        await ctx.push("", flush=True)

        # 3. Send second transcript
        print("Sending second transcript...")
        await ctx.push(transcripts[1])

        await ctx.no_more_inputs()

        import datetime

        timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")

        files: dict[int, IO[bytes]] = {}

        async for response in ctx.receive():
            if response.type == "chunk" and response.audio:
                flush_id = response.flush_id or 0

                if flush_id not in files:
                    filename = f"tts_flush_async_{flush_id}_{timestamp}.pcm"
                    files[flush_id] = open(filename, "wb")
                    print(f"Created new file for flush_id {flush_id}: {filename}")

                files[flush_id].write(response.audio)

            elif response.type == "flush_done":
                print(f"Flush done received for flush_id: {response.flush_id}")

            elif response.type == "error":
                print(f"error: {response.message or response.title}")

        for f in files.values():
            f.close()

        print("\nFinished.")
        print("You can play the generated audio files with these commands:")
        for flush_id, f in files.items():
            print(f"  Flush ID {flush_id}: ffplay -f s16le -ar 44100 {f.name}")

From cartesia-python/examples/async_examples.py:181

async function ttsWebsocketFlushing(client: Cartesia): Promise<void> {
  const ws = await client.tts.websocket();
  ws.on('error', (err) => console.error('WS error:', err.message));

  const files: Map<number, fs.WriteStream> = new Map();

  try {
    const ctx = ws.context({
      model_id: 'sonic-latest',
      voice: { mode: 'id', id: '6ccbfb76-1fc6-48f7-b71d-91ac6298247b' },
      output_format: { container: 'raw', encoding: 'pcm_f32le', sample_rate: 44100 },
      language: 'en',
    });

    // 1. Send first transcript
    console.log('Sending first transcript...');
    await ctx.push({ transcript: 'Stay hungry, ' });

    // 2. Flush — forces all buffered audio for the first transcript to be generated.
    console.log('Flushing...');
    await ctx.flush();

    // 3. Send second transcript
    console.log('Sending second transcript...');
    await ctx.push({ transcript: 'stay foolish.' });

    await ctx.no_more_inputs();

    const ts = timestamp();

    for await (const event of ctx.receive()) {
      // Log every response, but redact audio data to avoid swamping the console.
      const loggable: Record<string, unknown> = { ...event };
      if (loggable['data']) loggable['data'] = '[...]';
      if (loggable['audio']) loggable['audio'] = '[...]';
      console.log('Event:', JSON.stringify(loggable));

      if (event.type === 'chunk' && event.audio) {
        const flushId = event.flush_id ?? 0;
        let file = files.get(flushId);
        if (file === undefined) {
          const name = `tts_flush_${flushId}_${ts}.pcm`;
          file = fs.createWriteStream(name);
          files.set(flushId, file);
        }
        file.write(event.audio);
      } else if (event.type === 'error') {
        throw new Error(`${event.title}: ${event.message}`);
      }
    }

    console.log('\nFinished. Play the generated audio files with:');
    for (const [flushId, f] of files) {
      console.log(`  Flush ID ${flushId}: ffplay -f f32le -ar 44100 ${f.path}`);
    }
  } finally {
    for (const f of files.values()) f.end();
    ws.close();
  }
}

From cartesia-js/examples/node_examples.ts:203

Run this example

Python
Python (Async)
TypeScript

git clone --branch v3.2.0 https://github.com/cartesia-ai/cartesia-python
cd cartesia-python
uv sync
CARTESIA_API_KEY=YOUR_KEY uv run examples/examples.py tts_websocket_flushing

git clone --branch v3.2.0 https://github.com/cartesia-ai/cartesia-python
cd cartesia-python
uv sync
CARTESIA_API_KEY=YOUR_KEY uv run examples/async_examples.py tts_websocket_flushing_async

git clone --branch v3.2.0 https://github.com/cartesia-ai/cartesia-js
cd cartesia-js
pnpm i
CARTESIA_API_KEY=YOUR_KEY pnpm tsn examples/node_examples.ts ttsWebsocketFlushing

STT

TTS Generate

TTS WebSocket

TTS SSE

Voices

Other

Browser

Next.js

Run this example

​Run this example

Run this example