Skip to main content
async function ttsWebsocketStreamAudio(client: Cartesia): Promise<void> {
  /** Stream audio from a WebSocket and play it in real-time with Web Audio API. */
  const sampleRate = 44100;
  const audioCtx = new AudioContext({ sampleRate });

  const ws = await client.tts.websocket();

  const chunks: Float32Array[] = [];

  for await (const event of ws.generate({
    model_id: 'sonic-3',
    transcript: 'This is being streamed in real time from a WebSocket connection.',
    voice: { mode: 'id', id: '6ccbfb76-1fc6-48f7-b71d-91ac6298247b' },
    output_format: { container: 'raw', encoding: 'pcm_f32le', sample_rate: sampleRate },
  })) {
    if (event.type === 'chunk' && event.audio) {
      // event.audio is a raw buffer of f32le samples
      const floats = new Float32Array(
        event.audio.buffer,
        event.audio.byteOffset,
        event.audio.byteLength / 4,
      );
      chunks.push(floats);
    }
  }

  ws.close();

  // Combine all chunks into a single AudioBuffer and play
  const totalSamples = chunks.reduce((sum, c) => sum + c.length, 0);
  const audioBuffer = audioCtx.createBuffer(1, totalSamples, sampleRate);
  const channelData = audioBuffer.getChannelData(0);

  let offset = 0;
  for (const chunk of chunks) {
    channelData.set(chunk, offset);
    offset += chunk.length;
  }

  const source = audioCtx.createBufferSource();
  source.buffer = audioBuffer;
  source.connect(audioCtx.destination);
  source.start();
}
From cartesia-js/examples/browser_examples.ts:78

Run this example

This example runs in the browser. See the Next.js example for a working setup.