バッチ音声認識 (STT)

Batch Speech-to-Text

curl --request POST \
  --url https://api.cartesia.ai/stt \
  --header 'Authorization: Bearer <token>' \
  --header 'Cartesia-Version: <cartesia-version>' \
  --header 'Content-Type: multipart/form-data' \
  --form file='@example-file' \
  --form model=ink-whisper \
  --form language=en \
  --form 'timestamp_granularities[]=word'

import requests

url = "https://api.cartesia.ai/stt"

files = { "file": ("example-file", open("example-file", "rb")) }
payload = {
    "model": "ink-whisper",
    "language": "en",
    "timestamp_granularities[]": "word"
}
headers = {
    "Cartesia-Version": "<cartesia-version>",
    "Authorization": "Bearer <token>"
}

response = requests.post(url, data=payload, files=files, headers=headers)

print(response.text)

const form = new FormData();
form.append('file', '<string>');
form.append('model', 'ink-whisper');
form.append('language', 'en');
form.append('timestamp_granularities[]', 'word');

const options = {
  method: 'POST',
  headers: {'Cartesia-Version': '<cartesia-version>', Authorization: 'Bearer <token>'}
};

options.body = form;

fetch('https://api.cartesia.ai/stt', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.cartesia.ai/stt",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => "-----011000010111000001101001\r\nContent-Disposition: form-data; name=\"file\"; filename=\"example-file\"\r\nContent-Type: application/octet-stream\r\n\r\n<string>\r\n-----011000010111000001101001\r\nContent-Disposition: form-data; name=\"model\"\r\n\r\nink-whisper\r\n-----011000010111000001101001\r\nContent-Disposition: form-data; name=\"language\"\r\n\r\nen\r\n-----011000010111000001101001\r\nContent-Disposition: form-data; name=\"timestamp_granularities%5B%5D\"\r\n\r\nword\r\n-----011000010111000001101001--",
  CURLOPT_HTTPHEADER => [
    "Authorization: Bearer <token>",
    "Cartesia-Version: <cartesia-version>",
    "Content-Type: multipart/form-data"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.cartesia.ai/stt"

	payload := strings.NewReader("-----011000010111000001101001\r\nContent-Disposition: form-data; name=\"file\"; filename=\"example-file\"\r\nContent-Type: application/octet-stream\r\n\r\n<string>\r\n-----011000010111000001101001\r\nContent-Disposition: form-data; name=\"model\"\r\n\r\nink-whisper\r\n-----011000010111000001101001\r\nContent-Disposition: form-data; name=\"language\"\r\n\r\nen\r\n-----011000010111000001101001\r\nContent-Disposition: form-data; name=\"timestamp_granularities%5B%5D\"\r\n\r\nword\r\n-----011000010111000001101001--")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Cartesia-Version", "<cartesia-version>")
	req.Header.Add("Authorization", "Bearer <token>")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.cartesia.ai/stt")
  .header("Cartesia-Version", "<cartesia-version>")
  .header("Authorization", "Bearer <token>")
  .body("-----011000010111000001101001\r\nContent-Disposition: form-data; name=\"file\"; filename=\"example-file\"\r\nContent-Type: application/octet-stream\r\n\r\n<string>\r\n-----011000010111000001101001\r\nContent-Disposition: form-data; name=\"model\"\r\n\r\nink-whisper\r\n-----011000010111000001101001\r\nContent-Disposition: form-data; name=\"language\"\r\n\r\nen\r\n-----011000010111000001101001\r\nContent-Disposition: form-data; name=\"timestamp_granularities%5B%5D\"\r\n\r\nword\r\n-----011000010111000001101001--")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.cartesia.ai/stt")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Cartesia-Version"] = '<cartesia-version>'
request["Authorization"] = 'Bearer <token>'
request.body = "-----011000010111000001101001\r\nContent-Disposition: form-data; name=\"file\"; filename=\"example-file\"\r\nContent-Type: application/octet-stream\r\n\r\n<string>\r\n-----011000010111000001101001\r\nContent-Disposition: form-data; name=\"model\"\r\n\r\nink-whisper\r\n-----011000010111000001101001\r\nContent-Disposition: form-data; name=\"language\"\r\n\r\nen\r\n-----011000010111000001101001\r\nContent-Disposition: form-data; name=\"timestamp_granularities%5B%5D\"\r\n\r\nword\r\n-----011000010111000001101001--"

response = http.request(request)
puts response.read_body

{
  "type": "transcript",
  "text": "<string>",
  "request_id": "<string>",
  "is_final": true,
  "language": "<string>",
  "duration": 123,
  "words": [
    {
      "word": "<string>",
      "start": 123,
      "end": 123
    }
  ]
}

POST

stt

Batch Speech-to-Text

curl --request POST \
  --url https://api.cartesia.ai/stt \
  --header 'Authorization: Bearer <token>' \
  --header 'Cartesia-Version: <cartesia-version>' \
  --header 'Content-Type: multipart/form-data' \
  --form file='@example-file' \
  --form model=ink-whisper \
  --form language=en \
  --form 'timestamp_granularities[]=word'

import requests

url = "https://api.cartesia.ai/stt"

files = { "file": ("example-file", open("example-file", "rb")) }
payload = {
    "model": "ink-whisper",
    "language": "en",
    "timestamp_granularities[]": "word"
}
headers = {
    "Cartesia-Version": "<cartesia-version>",
    "Authorization": "Bearer <token>"
}

response = requests.post(url, data=payload, files=files, headers=headers)

print(response.text)

const form = new FormData();
form.append('file', '<string>');
form.append('model', 'ink-whisper');
form.append('language', 'en');
form.append('timestamp_granularities[]', 'word');

const options = {
  method: 'POST',
  headers: {'Cartesia-Version': '<cartesia-version>', Authorization: 'Bearer <token>'}
};

options.body = form;

fetch('https://api.cartesia.ai/stt', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.cartesia.ai/stt",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => "-----011000010111000001101001\r\nContent-Disposition: form-data; name=\"file\"; filename=\"example-file\"\r\nContent-Type: application/octet-stream\r\n\r\n<string>\r\n-----011000010111000001101001\r\nContent-Disposition: form-data; name=\"model\"\r\n\r\nink-whisper\r\n-----011000010111000001101001\r\nContent-Disposition: form-data; name=\"language\"\r\n\r\nen\r\n-----011000010111000001101001\r\nContent-Disposition: form-data; name=\"timestamp_granularities%5B%5D\"\r\n\r\nword\r\n-----011000010111000001101001--",
  CURLOPT_HTTPHEADER => [
    "Authorization: Bearer <token>",
    "Cartesia-Version: <cartesia-version>",
    "Content-Type: multipart/form-data"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.cartesia.ai/stt"

	payload := strings.NewReader("-----011000010111000001101001\r\nContent-Disposition: form-data; name=\"file\"; filename=\"example-file\"\r\nContent-Type: application/octet-stream\r\n\r\n<string>\r\n-----011000010111000001101001\r\nContent-Disposition: form-data; name=\"model\"\r\n\r\nink-whisper\r\n-----011000010111000001101001\r\nContent-Disposition: form-data; name=\"language\"\r\n\r\nen\r\n-----011000010111000001101001\r\nContent-Disposition: form-data; name=\"timestamp_granularities%5B%5D\"\r\n\r\nword\r\n-----011000010111000001101001--")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Cartesia-Version", "<cartesia-version>")
	req.Header.Add("Authorization", "Bearer <token>")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.cartesia.ai/stt")
  .header("Cartesia-Version", "<cartesia-version>")
  .header("Authorization", "Bearer <token>")
  .body("-----011000010111000001101001\r\nContent-Disposition: form-data; name=\"file\"; filename=\"example-file\"\r\nContent-Type: application/octet-stream\r\n\r\n<string>\r\n-----011000010111000001101001\r\nContent-Disposition: form-data; name=\"model\"\r\n\r\nink-whisper\r\n-----011000010111000001101001\r\nContent-Disposition: form-data; name=\"language\"\r\n\r\nen\r\n-----011000010111000001101001\r\nContent-Disposition: form-data; name=\"timestamp_granularities%5B%5D\"\r\n\r\nword\r\n-----011000010111000001101001--")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.cartesia.ai/stt")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Cartesia-Version"] = '<cartesia-version>'
request["Authorization"] = 'Bearer <token>'
request.body = "-----011000010111000001101001\r\nContent-Disposition: form-data; name=\"file\"; filename=\"example-file\"\r\nContent-Type: application/octet-stream\r\n\r\n<string>\r\n-----011000010111000001101001\r\nContent-Disposition: form-data; name=\"model\"\r\n\r\nink-whisper\r\n-----011000010111000001101001\r\nContent-Disposition: form-data; name=\"language\"\r\n\r\nen\r\n-----011000010111000001101001\r\nContent-Disposition: form-data; name=\"timestamp_granularities%5B%5D\"\r\n\r\nword\r\n-----011000010111000001101001--"

response = http.request(request)
puts response.read_body

{
  "type": "transcript",
  "text": "<string>",
  "request_id": "<string>",
  "is_final": true,
  "language": "<string>",
  "duration": 123,
  "words": [
    {
      "word": "<string>",
      "start": 123,
      "end": 123
    }
  ]
}

承認

Authorization

string

header

必須

A short-lived access token to make API requests from a client.

ヘッダー

Cartesia-Version

enum<string>

必須

API version header.

利用可能なオプション:

2024-06-10

例:

"2024-06-10"

クエリパラメータ

encoding

enum<string> | null

Required when uploading raw PCM data without a container header. If not specified, the audio file will be decoded automatically from its container (e.g. WAV, MP3, FLAC). Must match the actual encoding of your audio. For detailed guidance on each format, see Audio Input.

利用可能なオプション:

pcm_s16le,

pcm_s32le,

pcm_f16le,

pcm_f32le,

pcm_mulaw,

pcm_alaw

sample_rate

integer | null

The sample rate of the audio in Hz.

ボディ

multipart/form-data

file

必須

There's no need to break up your audio file. Long files are intelligently chunked by our server.

Supported audio formats: flac, m4a, mp3, mp4, mpeg, mpga, oga, ogg, wav, webm

model

enum<string>

必須

ID of the model to use for transcription. Must be in the ink-whisper family of models.

利用可能なオプション:

ink-whisper

例:

"ink-whisper"

language

enum<string>

デフォルト:en

The language of the input audio in ISO-639-1 format

利用可能なオプション:

en,

zh,

de,

es,

ru,

ko,

fr,

ja,

pt,

tr,

pl,

ca,

nl,

ar,

sv,

it,

id,

hi,

fi,

vi,

he,

uk,

el,

ms,

cs,

ro,

da,

hu,

ta,

no,

th,

ur,

hr,

bg,

lt,

la,

mi,

ml,

cy,

sk,

te,

fa,

lv,

bn,

sr,

az,

sl,

kn,

et,

mk,

br,

eu,

is,

hy,

ne,

mn,

bs,

kk,

sq,

sw,

gl,

mr,

pa,

si,

km,

sn,

yo,

so,

af,

oc,

ka,

be,

tg,

sd,

gu,

am,

yi,

lo,

uz,

fo,

ht,

ps,

tk,

nn,

mt,

sa,

lb,

my,

bo,

tl,

mg,

as,

tt,

haw,

ln,

ha,

ba,

jw,

su,

yue

timestamp_granularities[]

enum<string>[]

The granularity of timestamps to include in the response. Currently only word level timestamps are supported, providing start and end times for each word.

利用可能なオプション:

word

レスポンス

200 - application/json

type

enum<string>

必須

The message type. Always transcript for a batch transcription response.

利用可能なオプション:

transcript

text

string

必須

The transcribed text.

request_id

string

Unique identifier for this transcription request.

is_final

boolean

非推奨

Not used for batch transcription.

language

string

The specified language of the input audio.

duration

number<double>

The duration of the input audio in seconds.

words

TranscriptionWord · object[]

Word-level timestamps showing the start and end time of each word. Only included when [word] is passed into timestamp_granularities[].

Show child attributes

音声認識 (ストリーミング)

新しいアクセストークンを生成する

⌘I

API の利用

API ステータス

TTS

STT

認証

ボイス

Infill

バッチ音声認識 (STT)

承認

ヘッダー

クエリパラメータ

ボディ

レスポンス