Skip to main content
async function ttsWebsocketStreamAudio(client: Cartesia): Promise<void> {
  const sampleRate = 44100;
  const audioCtx = new AudioContext({ sampleRate });

  const chunks: Float32Array[] = [];
  const ws = await client.tts.websocket();
  ws.on('error', (err) => console.error(err.message));

  try {
    const ctx = ws.context({
      model_id: 'sonic-latest',
      voice: { mode: 'id', id: '6ccbfb76-1fc6-48f7-b71d-91ac6298247b' },
      output_format: { container: 'raw', encoding: 'pcm_f32le', sample_rate: sampleRate },
      language: 'en',
    });

    await ctx.push({
      transcript: 'This is being streamed in real time from a WebSocket connection.',
    });
    await ctx.no_more_inputs();

    for await (const event of ctx.receive()) {
      if (event.type === 'chunk' && event.audio) {
        // event.audio is a raw buffer of f32le samples
        const floats = new Float32Array(
          event.audio.buffer,
          event.audio.byteOffset,
          event.audio.byteLength / 4,
        );
        chunks.push(floats);
      } else if (event.type === 'error') {
        console.error(event.title, event.message);
      }
    }
  } finally {
    ws.close();
  }

  // Combine all chunks into a single AudioBuffer and play
  const totalSamples = chunks.reduce((sum, c) => sum + c.length, 0);
  const audioBuffer = audioCtx.createBuffer(1, totalSamples, sampleRate);
  const channelData = audioBuffer.getChannelData(0);

  let offset = 0;
  for (const chunk of chunks) {
    channelData.set(chunk, offset);
    offset += chunk.length;
  }

  const source = audioCtx.createBufferSource();
  source.buffer = audioBuffer;
  source.connect(audioCtx.destination);
  source.start();
}
From cartesia-js/examples/browser_examples.ts:120

Run this example

This example runs in the browser. See the Next.js example for a working setup.