from cartesia import Cartesia
# inputs
your_api_key: str = ""
your_voice_embedding: list[float] = []
language = "en"
transcript = """
It's nice to meet you.
Hope you're having a great day!
Could we reschedule our meeting tomorrow?
Please call me back as soon as possible.
"""
source_tts_model_id = "sonic"
client = Cartesia(api_key=your_api_key)
# Step 1: generate an audio sample
print(f"Generating audio sample {source_tts_model_id=}")
source_audio_iterator = client.tts.bytes(
voice={"mode": "embedding", "embedding": your_voice_embedding},
model_id=source_tts_model_id,
transcript=transcript,
language=language,
output_format={
"container": "wav",
"encoding": "pcm_f32le",
"sample_rate": 44100
},
)
# Step 2: clone a voice
print("Cloning a voice")
voice = client.voices.clone(
name="My Voice",
language=language,
clip=b"".join(source_audio_iterator),
mode="similarity",
)
print(f"Cloned voice {voice.id}")
# you can now use the voice like this
migrate_to_model = "sonic-3"
generated_sample_file_name = f"{migrate_to_model}_{voice.id}.wav"
cloned_audio_iterator = client.tts.bytes(
voice={"mode": "id", "id": voice.id},
model_id=migrate_to_model,
transcript=transcript,
language=language,
output_format={
"container": "wav",
"encoding": "pcm_f32le",
"sample_rate": 44100
},
)
with open(generated_sample_file_name, "wb") as f:
for chunk in cloned_audio_iterator:
f.write(chunk)
print(f"Listen to your new voice: {generated_sample_file_name}")
try:
import subprocess
subprocess.run(
[
"ffplay",
"-loglevel",
"quiet",
"-autoexit",
"-nodisp",
generated_sample_file_name,
]
)
except FileNotFoundError:
pass