Skip to main content
def infill_create(client: Cartesia, *args: str) -> None:
    """Create infill audio between two clips."""
    from pathlib import Path

    if len(args) < 3:
        print("Usage: stt_transcribe <audio_file_before> <audio_file_after> <transcript>")
        sys.exit(1)

    left_file, right_file, *transcript_parts = args

    # Can pass file paths directly (as Path objects)
    response = client.tts.infill(
        model_id="sonic-3",
        language="en",
        transcript=" ".join(transcript_parts),
        left_audio=Path(left_file),
        right_audio=Path(right_file),
        voice_id="6ccbfb76-1fc6-48f7-b71d-91ac6298247b",
        output_format={"container": "wav", "encoding": "pcm_s16le", "sample_rate": 44100},
    )
    response.write_to_file("infill_output.wav")
    print(f"Saved audio to infill_output.wav")
    print(f"Play with: ffplay -f wav infill_output.wav")
From cartesia-python/examples/examples.py:578

Run this example

git clone --branch v3.2.0 https://github.com/cartesia-ai/cartesia-python
cd cartesia-python
uv sync
CARTESIA_API_KEY=YOUR_KEY uv run examples/examples.py infill_create left.wav right.wav "transcript to infill"