Skip to main content
def tts_websocket_response_handling(client: Cartesia) -> None:
    """WebSocket response type handling."""
    with client.tts.websocket_connect() as connection:
        connection.send({
            "model_id": "sonic-3",
            "transcript": "Hello, world!",
            "voice": {"mode": "id", "id": "voice-id"},
            "output_format": {"container": "raw", "encoding": "pcm_f32le", "sample_rate": 44100},
        })

        import datetime
        filename = f"tts_websocket_response_handling_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.pcm"

        # Write chunks to file as they arrive.
        # You could also send chunks over the network, play them in real-time, etc.
        with open(filename, "wb") as f:
            for response in connection:
                if response.type == "chunk" and response.audio:
                    f.write(response.audio)
                elif response.type == "timestamps":
                    process_timestamps(response.word_timestamps)
                elif response.type == "done" or response.done:
                    break
                elif response.type == "error":
                    raise Exception(response.error)

        print(f"Saved audio to {filename}")
        print(f"Play with: ffplay -f f32le -ar 44100 {filename}")
From cartesia-python/examples/examples.py:427

Run this example

cd cartesia-python
CARTESIA_API_KEY=YOUR_KEY python3 examples/examples.py tts_websocket_response_handling