Text-to-Speech (Bytes)

curl --request POST \
  --url https://api.cartesia.ai/tts/bytes \
  --header 'Cartesia-Version: <cartesia-version>' \
  --header 'Content-Type: application/json' \
  --header 'X-API-Key: <api-key>' \
  --data '
{
  "model_id": "sonic-3.5",
  "transcript": "<string>",
  "voice": {
    "mode": "id",
    "id": "<string>",
    "__experimental_controls": {
      "speed": 123,
      "emotion": []
    }
  },
  "output_format": {
    "container": "raw",
    "sample_rate": 123,
    "bit_rate": 123
  },
  "duration": 123,
  "speed": "normal"
}
'

import requests

url = "https://api.cartesia.ai/tts/bytes"

payload = {
    "model_id": "sonic-3.5",
    "transcript": "<string>",
    "voice": {
        "mode": "id",
        "id": "<string>",
        "__experimental_controls": {
            "speed": 123,
            "emotion": []
        }
    },
    "output_format": {
        "container": "raw",
        "sample_rate": 123,
        "bit_rate": 123
    },
    "duration": 123,
    "speed": "normal"
}
headers = {
    "Cartesia-Version": "<cartesia-version>",
    "X-API-Key": "<api-key>",
    "Content-Type": "application/json"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {
    'Cartesia-Version': '<cartesia-version>',
    'X-API-Key': '<api-key>',
    'Content-Type': 'application/json'
  },
  body: JSON.stringify({
    model_id: 'sonic-3.5',
    transcript: '<string>',
    voice: {mode: 'id', id: '<string>', __experimental_controls: {speed: 123, emotion: []}},
    output_format: {container: 'raw', sample_rate: 123, bit_rate: 123},
    duration: 123,
    speed: 'normal'
  })
};

fetch('https://api.cartesia.ai/tts/bytes', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.cartesia.ai/tts/bytes",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'model_id' => 'sonic-3.5',
    'transcript' => '<string>',
    'voice' => [
        'mode' => 'id',
        'id' => '<string>',
        '__experimental_controls' => [
                'speed' => 123,
                'emotion' => [
                                
                ]
        ]
    ],
    'output_format' => [
        'container' => 'raw',
        'sample_rate' => 123,
        'bit_rate' => 123
    ],
    'duration' => 123,
    'speed' => 'normal'
  ]),
  CURLOPT_HTTPHEADER => [
    "Cartesia-Version: <cartesia-version>",
    "Content-Type: application/json",
    "X-API-Key: <api-key>"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.cartesia.ai/tts/bytes"

	payload := strings.NewReader("{\n  \"model_id\": \"sonic-3.5\",\n  \"transcript\": \"<string>\",\n  \"voice\": {\n    \"mode\": \"id\",\n    \"id\": \"<string>\",\n    \"__experimental_controls\": {\n      \"speed\": 123,\n      \"emotion\": []\n    }\n  },\n  \"output_format\": {\n    \"container\": \"raw\",\n    \"sample_rate\": 123,\n    \"bit_rate\": 123\n  },\n  \"duration\": 123,\n  \"speed\": \"normal\"\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Cartesia-Version", "<cartesia-version>")
	req.Header.Add("X-API-Key", "<api-key>")
	req.Header.Add("Content-Type", "application/json")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.cartesia.ai/tts/bytes")
  .header("Cartesia-Version", "<cartesia-version>")
  .header("X-API-Key", "<api-key>")
  .header("Content-Type", "application/json")
  .body("{\n  \"model_id\": \"sonic-3.5\",\n  \"transcript\": \"<string>\",\n  \"voice\": {\n    \"mode\": \"id\",\n    \"id\": \"<string>\",\n    \"__experimental_controls\": {\n      \"speed\": 123,\n      \"emotion\": []\n    }\n  },\n  \"output_format\": {\n    \"container\": \"raw\",\n    \"sample_rate\": 123,\n    \"bit_rate\": 123\n  },\n  \"duration\": 123,\n  \"speed\": \"normal\"\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.cartesia.ai/tts/bytes")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Cartesia-Version"] = '<cartesia-version>'
request["X-API-Key"] = '<api-key>'
request["Content-Type"] = 'application/json'
request.body = "{\n  \"model_id\": \"sonic-3.5\",\n  \"transcript\": \"<string>\",\n  \"voice\": {\n    \"mode\": \"id\",\n    \"id\": \"<string>\",\n    \"__experimental_controls\": {\n      \"speed\": 123,\n      \"emotion\": []\n    }\n  },\n  \"output_format\": {\n    \"container\": \"raw\",\n    \"sample_rate\": 123,\n    \"bit_rate\": 123\n  },\n  \"duration\": 123,\n  \"speed\": \"normal\"\n}"

response = http.request(request)
puts response.read_body

"<string>"

POST

tts

bytes

Text-to-Speech (Bytes)

curl --request POST \
  --url https://api.cartesia.ai/tts/bytes \
  --header 'Cartesia-Version: <cartesia-version>' \
  --header 'Content-Type: application/json' \
  --header 'X-API-Key: <api-key>' \
  --data '
{
  "model_id": "sonic-3.5",
  "transcript": "<string>",
  "voice": {
    "mode": "id",
    "id": "<string>",
    "__experimental_controls": {
      "speed": 123,
      "emotion": []
    }
  },
  "output_format": {
    "container": "raw",
    "sample_rate": 123,
    "bit_rate": 123
  },
  "duration": 123,
  "speed": "normal"
}
'

import requests

url = "https://api.cartesia.ai/tts/bytes"

payload = {
    "model_id": "sonic-3.5",
    "transcript": "<string>",
    "voice": {
        "mode": "id",
        "id": "<string>",
        "__experimental_controls": {
            "speed": 123,
            "emotion": []
        }
    },
    "output_format": {
        "container": "raw",
        "sample_rate": 123,
        "bit_rate": 123
    },
    "duration": 123,
    "speed": "normal"
}
headers = {
    "Cartesia-Version": "<cartesia-version>",
    "X-API-Key": "<api-key>",
    "Content-Type": "application/json"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {
    'Cartesia-Version': '<cartesia-version>',
    'X-API-Key': '<api-key>',
    'Content-Type': 'application/json'
  },
  body: JSON.stringify({
    model_id: 'sonic-3.5',
    transcript: '<string>',
    voice: {mode: 'id', id: '<string>', __experimental_controls: {speed: 123, emotion: []}},
    output_format: {container: 'raw', sample_rate: 123, bit_rate: 123},
    duration: 123,
    speed: 'normal'
  })
};

fetch('https://api.cartesia.ai/tts/bytes', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.cartesia.ai/tts/bytes",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'model_id' => 'sonic-3.5',
    'transcript' => '<string>',
    'voice' => [
        'mode' => 'id',
        'id' => '<string>',
        '__experimental_controls' => [
                'speed' => 123,
                'emotion' => [
                                
                ]
        ]
    ],
    'output_format' => [
        'container' => 'raw',
        'sample_rate' => 123,
        'bit_rate' => 123
    ],
    'duration' => 123,
    'speed' => 'normal'
  ]),
  CURLOPT_HTTPHEADER => [
    "Cartesia-Version: <cartesia-version>",
    "Content-Type: application/json",
    "X-API-Key: <api-key>"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.cartesia.ai/tts/bytes"

	payload := strings.NewReader("{\n  \"model_id\": \"sonic-3.5\",\n  \"transcript\": \"<string>\",\n  \"voice\": {\n    \"mode\": \"id\",\n    \"id\": \"<string>\",\n    \"__experimental_controls\": {\n      \"speed\": 123,\n      \"emotion\": []\n    }\n  },\n  \"output_format\": {\n    \"container\": \"raw\",\n    \"sample_rate\": 123,\n    \"bit_rate\": 123\n  },\n  \"duration\": 123,\n  \"speed\": \"normal\"\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Cartesia-Version", "<cartesia-version>")
	req.Header.Add("X-API-Key", "<api-key>")
	req.Header.Add("Content-Type", "application/json")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.cartesia.ai/tts/bytes")
  .header("Cartesia-Version", "<cartesia-version>")
  .header("X-API-Key", "<api-key>")
  .header("Content-Type", "application/json")
  .body("{\n  \"model_id\": \"sonic-3.5\",\n  \"transcript\": \"<string>\",\n  \"voice\": {\n    \"mode\": \"id\",\n    \"id\": \"<string>\",\n    \"__experimental_controls\": {\n      \"speed\": 123,\n      \"emotion\": []\n    }\n  },\n  \"output_format\": {\n    \"container\": \"raw\",\n    \"sample_rate\": 123,\n    \"bit_rate\": 123\n  },\n  \"duration\": 123,\n  \"speed\": \"normal\"\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.cartesia.ai/tts/bytes")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Cartesia-Version"] = '<cartesia-version>'
request["X-API-Key"] = '<api-key>'
request["Content-Type"] = 'application/json'
request.body = "{\n  \"model_id\": \"sonic-3.5\",\n  \"transcript\": \"<string>\",\n  \"voice\": {\n    \"mode\": \"id\",\n    \"id\": \"<string>\",\n    \"__experimental_controls\": {\n      \"speed\": 123,\n      \"emotion\": []\n    }\n  },\n  \"output_format\": {\n    \"container\": \"raw\",\n    \"sample_rate\": 123,\n    \"bit_rate\": 123\n  },\n  \"duration\": 123,\n  \"speed\": \"normal\"\n}"

response = http.request(request)
puts response.read_body

"<string>"

Authorizations

X-API-Key

string

header

required

Headers

Cartesia-Version

enum<string>

required

API version header.

Available options:

2024-11-13

Example:

"2024-11-13"

Body

application/json

model_id

enum<string>

required

The ID of the model to use for the generation. See Models all options.

Available options:

sonic-3.5,

sonic-3,

sonic-latest

Example:

"sonic-3.5"

transcript

string

required

voice

TTSRequestIdSpecifier · object

required

TTSRequestIdSpecifier
TTSRequestEmbeddingSpecifier

Show child attributes

output_format

RawOutputFormat · object

required

RawOutputFormat
WAVOutputFormat
MP3OutputFormat

Show child attributes

language

enum<string> | null

The language that the given voice should speak the transcript in.

Available options:

en,

fr,

de,

es,

pt,

zh,

ja,

hi,

it,

ko,

nl,

pl,

ru,

sv,

tr

duration

number<double> | null

The maximum duration of the audio in seconds. You do not usually need to specify this. If the duration is not appropriate for the length of the transcript, the output audio may be truncated.

speed

enum<string> | null

default:normal

deprecated

Influences the speed of the generated speech. Faster speeds may reduce hallucination rate.

This feature is experimental and may not work for all voices.

Available options:

slow,

normal,

fast

Response

200 - audio/*

Audio bytes

The response is of type file.

API Status and Version

Text-to-Speech (SSE)

⌘I

Use the API

API Status

TTS

STT

Auth

Voices

Infill

Text-to-Speech (Bytes)

Authorizations

Headers

Body

Response