Use this file to discover all available pages before exploring further.
Use Twilio Programmable Voice with Media Streams so a phone call receives audio generated by Cartesia TTS over WebSockets. This walkthrough uses Node.js: a small server bridges Twilio’s stream to Cartesia and plays TTS audio on the callee’s line.
Replace the placeholder values with your actual credentials.
3
Create the Main Script
Create a file named app.js (or any name you prefer) and add the following code:
const twilio = require('twilio');const WebSocket = require('ws');const http = require('http');const ngrok = require('@ngrok/ngrok');const dotenv = require('dotenv');const crypto = require('crypto');// Load environment variablesdotenv.config({ override: true });// Function to get a value from environment variable or command line argumentfunction getConfig(key, defaultValue = undefined) { return process.env[key] || process.argv.find(arg => arg.startsWith(`${key}=`))?.split('=')[1] || defaultValue;}// Configurationconst config = { TWILIO_ACCOUNT_SID: getConfig('TWILIO_ACCOUNT_SID'), TWILIO_AUTH_TOKEN: getConfig('TWILIO_AUTH_TOKEN'), CARTESIA_API_KEY: getConfig('CARTESIA_API_KEY'), NGROK_AUTHTOKEN: getConfig('NGROK_AUTHTOKEN'),};// Validate required configurationconst requiredConfig = ['TWILIO_ACCOUNT_SID', 'TWILIO_AUTH_TOKEN', 'CARTESIA_API_KEY', 'NGROK_AUTHTOKEN'];for (const key of requiredConfig) { if (!config[key]) { console.error(`Missing required configuration: ${key}`); process.exit(1); }}const client = twilio(config.TWILIO_ACCOUNT_SID, config.TWILIO_AUTH_TOKEN);
4
Configure Cartesia TTS
In the script, you’ll find a configuration section for Cartesia TTS. Make sure to set the following variables according to your needs:
const TTS_WEBSOCKET_URL = `wss://api.cartesia.ai/tts/websocket?cartesia_version=2025-03-01`;const modelId = 'sonic-3';const voice = { 'mode': 'id', // You can check available voices using the Cartesia API or at https://play.cartesia.ai 'id': "e07c00bc-4134-4eae-9ea4-1a55fb45746b"};const partialResponse = 'Hi there, my name is Cartesia. I hope youre having a great day!';
5
Set Up Twilio Calling
Configure your Twilio outbound and inbound numbers:
const outbound = "+1234567890"; // Replace with the number you want to callconst inbound = "+1234567890"; // Replace with your Twilio number
6
Implement Main Logic
The main() function orchestrates the entire process:
Connects to the Cartesia TTS WebSocket
Tests the TTS WebSocket
Sets up a Twilio WebSocket server
Creates an ngrok tunnel for the Twilio WebSocket
Initiates the call using Twilio
let ttsWebSocket;let callSid;let messageComplete = false;let audioChunksReceived = 0;function log(message) { console.log(`[${new Date().toISOString()}] ${message}`);}function connectToTTSWebSocket() { return new Promise((resolve, reject) => { log('Attempting to connect to TTS WebSocket'); ttsWebSocket = new WebSocket(TTS_WEBSOCKET_URL, { headers: { 'X-Api-Key': config.CARTESIA_API_KEY } }); ttsWebSocket.on('open', () => { log('Connected to TTS WebSocket'); resolve(ttsWebSocket); }); ttsWebSocket.on('error', (error) => { log(`TTS WebSocket error: ${error.message}`); reject(error); }); ttsWebSocket.on('close', (code, reason) => { log(`TTS WebSocket closed. Code: ${code}, Reason: ${reason}`); reject(new Error('TTS WebSocket closed unexpectedly')); }); });}function sendTTSMessage(message) { const textMessage = { 'model_id': modelId, 'transcript': message, 'voice': voice, 'output_format': { 'container': 'raw', 'encoding': 'pcm_mulaw', 'sample_rate': 8000 }, // create a new context for each message since each is a complete transcript 'context_id': crypto.randomUUID() }; log(`Sending message to TTS WebSocket: ${message}`); ttsWebSocket.send(JSON.stringify(textMessage));}function testTTSWebSocket() { return new Promise((resolve, reject) => { const testMessage = 'This is a test message'; let receivedAudio = false; sendTTSMessage(testMessage); const timeout = setTimeout(() => { if (!receivedAudio) { reject(new Error('Timeout: No audio received from TTS WebSocket')); } }, 10000); // 10 second timeout ttsWebSocket.on('message', (audioChunk) => { if (!receivedAudio) { log(audioChunk); log('Received audio chunk from TTS for test message'); receivedAudio = true; clearTimeout(timeout); resolve(); } }); });}async function startCall(twilioWebsocketUrl) { try { log(`Initiating call with WebSocket URL: ${twilioWebsocketUrl}`); const call = await client.calls.create({ twiml: `<Response><Connect><Stream url="${twilioWebsocketUrl}"/></Connect></Response>`, to: outbound, // Replace with the phone number you want to call from: inbound // Replace with your Twilio phone number }); callSid = call.sid; log(`Call initiated. SID: ${callSid}`); } catch (error) { log(`Error initiating call: ${error.message}`); throw error; }}async function hangupCall() { try { log(`Attempting to hang up call: ${callSid}`); await client.calls(callSid).update({status: 'completed'}); log('Call hung up successfully'); } catch (error) { log(`Error hanging up call: ${error.message}`); }}function setupTwilioWebSocket() { return new Promise((resolve, reject) => { const server = http.createServer((req, res) => { log(`Received HTTP request: ${req.method} ${req.url}`); res.writeHead(200); res.end('WebSocket server is running'); }); const wss = new WebSocket.Server({ server }); log('WebSocket server created'); wss.on('connection', (twilioWs, request) => { log(`Twilio WebSocket connection attempt from ${request.socket.remoteAddress}`); let streamSid = null; twilioWs.on('message', (message) => { try { const msg = JSON.parse(message); log(`Received message from Twilio: ${JSON.stringify(msg)}`); if (msg.event === 'start') { log('Media stream started'); streamSid = msg.start.streamSid; log(`Stream SID: ${streamSid}`); sendTTSMessage(partialResponse); } else if (msg.event === 'media' && !messageComplete) { log('Received media event'); } else if (msg.event === 'stop') { log('Media stream stopped'); hangupCall(); } } catch (error) { log(`Error processing Twilio message: ${error.message}`); } }); twilioWs.on('close', (code, reason) => { log(`Twilio WebSocket disconnected. Code: ${code}, Reason: ${reason}`); }); twilioWs.on('error', (error) => { log(`Twilio WebSocket error: ${error.message}`); }); // Handle incoming audio chunks from TTS WebSocket ttsWebSocket.on('message', (audioChunk) => { log('Received audio chunk from TTS'); try { if (streamSid) { twilioWs.send(JSON.stringify({ event: 'media', streamSid: streamSid, media: { payload: JSON.parse(audioChunk)['data'] } })); audioChunksReceived++; log(`Audio chunks received: ${audioChunksReceived}`); if (audioChunksReceived >= 50) { messageComplete = true; log('Message complete, preparing to hang up'); setTimeout(hangupCall, 2000); } } else { log('Warning: Received audio chunk but streamSid is not set'); } } catch (error) { log(`Error sending audio chunk to Twilio: ${error.message}`); } }); log('Twilio WebSocket connected and handlers set up'); }); wss.on('error', (error) => { log(`WebSocket server error: ${error.message}`); }); server.listen(0, () => { const port = server.address().port; log(`Twilio WebSocket server is running on port ${port}`); resolve(port); }); server.on('error', (error) => { log(`HTTP server error: ${error.message}`); reject(error); }); }); }async function setupNgrokTunnel(port) { try { const listener = await ngrok.forward({ addr: port, authtoken: config.NGROK_AUTHTOKEN, }); const wssUrl = listener.url().replace('https://', 'wss://'); log(`ngrok tunnel established: ${wssUrl}`); return wssUrl; } catch (error) { log(`Error setting up ngrok tunnel: ${error.message}`); throw error; } }async function main() { try { log('Starting application'); await connectToTTSWebSocket(); log('TTS WebSocket connected successfully'); await testTTSWebSocket(); log('TTS WebSocket test passed successfully'); const twilioWebsocketPort = await setupTwilioWebSocket(); log(`Twilio WebSocket server set up on port ${twilioWebsocketPort}`); const twilioWebsocketUrl = await setupNgrokTunnel(twilioWebsocketPort); await startCall(twilioWebsocketUrl); } catch (error) { log(`Error in main function: ${error.message}`); }}// Run the scriptmain();
7
Run the Application
To run the application, use the following command: