End-to-end Pro Voice Cloning (Python)

Use Cartesia’s REST API to create a Pro Voice Clone.

Prerequisites

  1. You have a Cartesia API token (export it as CARTESIA_API_TOKEN).
  2. You have at least 1 M credits on your account.
  3. You have a folder called samples/ with one or more .wav files.
1"""
2End-to-end Pro Voice Cloning example.
3
4Steps
5-----
61. Create a dataset.
72. Upload audio files from samples/ to the dataset.
83. Kick off a fine-tune from that dataset.
94. Poll until fine-tune is completed.
105. Get the voices produced by the fine-tune.
11"""
12
13import os
14import time
15from pathlib import Path
16
17import requests
18
19API_BASE = "https://api.cartesia.ai"
20API_HEADERS = {
21 "Cartesia-Version": "2025-04-16",
22 "Authorization": f"Bearer {os.environ['CARTESIA_API_KEY']}",
23}
24
25
26def create_dataset(name: str, description: str) -> str:
27 """POST /datasets → dataset id."""
28 res = requests.post(
29 f"{API_BASE}/datasets",
30 headers=API_HEADERS,
31 json={"name": name, "description": description},
32 )
33 res.raise_for_status()
34 return res.json()["id"]
35
36
37def upload_file_to_dataset(dataset_id: str, path: Path) -> None:
38 """POST /datasets/{dataset_id}/files (multipart/form-data)."""
39 with path.open("rb") as fp:
40 res = requests.post(
41 f"{API_BASE}/datasets/{dataset_id}/files",
42 headers=API_HEADERS,
43 files={"file": fp, "purpose": (None, "fine_tune")},
44 )
45 res.raise_for_status()
46
47
48def create_fine_tune(dataset_id: str, *, name: str, language: str, model_id: str) -> str:
49 """POST /fine-tunes → fine-tune id."""
50 body = {
51 "name": name,
52 "description": "Pro Voice Clone demo",
53 "language": language,
54 "model_id": model_id,
55 "dataset": dataset_id,
56 }
57 res = requests.post(f"{API_BASE}/fine-tunes", headers=API_HEADERS, json=body, timeout=60)
58 res.raise_for_status()
59 return res.json()["id"]
60
61
62def wait_for_fine_tune(ft_id: str, every: float = 10.0) -> None:
63 """Poll GET /fine-tunes/{id} until status == completed."""
64 start = time.monotonic()
65 while True:
66 res = requests.get(f"{API_BASE}/fine-tunes/{ft_id}", headers=API_HEADERS)
67 res.raise_for_status()
68 status = res.json()["status"]
69 print(f"fine-tune {ft_id} -> {status}. Elapsed: {time.monotonic() - start:.0f}s")
70 if status == "completed":
71 return
72 if status == "failed":
73 raise RuntimeError(f"fine-tune ended with status={status}")
74 time.sleep(every)
75
76
77def list_voices(ft_id: str) -> list[dict]:
78 """GET /fine-tunes/{id}/voices → list of voices."""
79 res = requests.get(f"{API_BASE}/fine-tunes/{ft_id}/voices", headers=API_HEADERS)
80 res.raise_for_status()
81 return res.json()["data"]
82
83
84if __name__ == "__main__":
85 # Create the dataset
86 DATASET_ID = create_dataset("PVC demo", "Samples for a Pro Voice Clone")
87 print("Created dataset:", DATASET_ID)
88
89 # Upload .wav files to the dataset
90 for wav_path in Path("samples").glob("*.wav"):
91 upload_file_to_dataset(DATASET_ID, wav_path)
92 print(f"Uploaded {wav_path.name} to dataset {DATASET_ID}")
93
94 # Ask for confirmation before kicking off the fine-tune
95 confirmation = input(
96 "Are you sure you want to start the fine-tune? It will cost 1M credits upon successful completion (yes/no): "
97 )
98 if confirmation.lower() != "yes":
99 print("Fine-tuning cancelled by user.")
100 exit()
101
102 # Kick off the fine-tune
103 FINE_TUNE_ID = create_fine_tune(
104 DATASET_ID,
105 name="PVC demo",
106 language="en",
107 model_id="sonic-2",
108 )
109 print(f"Started fine-tune: {FINE_TUNE_ID}")
110
111 # Wait for training to finish
112 wait_for_fine_tune(FINE_TUNE_ID)
113 print("Fine-tune completed!")
114
115 # Fetch the voices created by the fine-tune
116 FINE_TUNE_ID = "fine_tune_5YoCXHgSEyadGrZraMJWwf"
117 voices = list_voices(FINE_TUNE_ID)
118 print("Voices IDs:")
119 for voice in voices:
120 print(voice["id"])