> ## Documentation Index
> Fetch the complete documentation index at: https://docs.cartesia.ai/llms.txt
> Use this file to discover all available pages before exploring further.

# Realtime Speech-to-Text (Auto)

> Realtime speech transcription with built-in turn detection

<Note>
  This endpoint is English only right now.  
  We expect to add more languages in the coming months.
</Note>




## AsyncAPI

````yaml asyncapi.yml /stt/turns/websocket
id: /stt/turns/websocket
title: /stt/turns/websocket
description: |
  Realtime speech transcription with built-in turn detection

  <Note>
    This endpoint is English only right now.  
    We expect to add more languages in the coming months.
  </Note>
servers:
  - id: production
    protocol: wss
    host: api.cartesia.ai
    bindings: []
    variables: []
address: /stt/turns/websocket
parameters:
  - id: model
    jsonSchema:
      type: string
      description: >
        ID of the model to use for transcription, e.g. `ink-2`.


        See [Models](/build-with-cartesia/stt-models/latest) for available
        models.
    description: |
      ID of the model to use for transcription, e.g. `ink-2`.

      See [Models](/build-with-cartesia/stt-models/latest) for available models.
    type: string
    required: true
    deprecated: false
  - id: encoding
    jsonSchema:
      type: string
      description: >
        The encoding format of the audio data. This determines how the server
        interprets the raw binary audio data you send.


        Supported encodings: `pcm_s16le`, `pcm_s32le`, `pcm_f16le`, `pcm_f32le`,
        `pcm_mulaw`, `pcm_alaw`.


        For guidance on choosing an encoding, see [Audio
        encodings](/build-with-cartesia/capability-guides/stt-input-encodings).
    description: >
      The encoding format of the audio data. This determines how the server
      interprets the raw binary audio data you send.


      Supported encodings: `pcm_s16le`, `pcm_s32le`, `pcm_f16le`, `pcm_f32le`,
      `pcm_mulaw`, `pcm_alaw`.


      For guidance on choosing an encoding, see [Audio
      encodings](/build-with-cartesia/capability-guides/stt-input-encodings).
    type: string
    required: true
    deprecated: false
  - id: sample_rate
    jsonSchema:
      type: string
      description: |
        The sample rate of the audio in Hz.
    description: |
      The sample rate of the audio in Hz.
    type: string
    required: true
    deprecated: false
  - id: cartesia_version
    jsonSchema:
      type: string
      description: API version, e.g. `2026-03-01`
    description: API version, e.g. `2026-03-01`
    type: string
    required: true
    deprecated: false
bindings: []
operations:
  - &ref_1
    id: sendSTTTurnsAudio
    title: Send s t t turns audio
    type: receive
    messages:
      - &ref_3
        id: sttTurnsAudioData
        payload:
          - type: string
            format: binary
            description: >
              Send WebSocket binary messages containing raw audio data as
              specified by the `encoding` and `sample_rate` query parameters.


              Audio Requirements:

              - Send audio in small chunks, e.g. 100 ms

              - Audio format must match the `encoding` and `sample_rate`
              parameters
            x-parser-schema-id: <anonymous-schema-94>
            name: Send Audio Data
        headers: []
        jsonPayloadSchema:
          type: string
          format: binary
          description: >-
            Raw audio data as a binary message in the format specified by the
            `encoding` parameter. Send audio in small chunks, e.g. 100 ms.
          x-parser-schema-id: <anonymous-schema-94>
        title: Send Audio Data
        description: >
          Send WebSocket binary messages containing raw audio data as specified
          by the `encoding` and `sample_rate` query parameters.


          Audio Requirements:

          - Send audio in small chunks, e.g. 100 ms

          - Audio format must match the `encoding` and `sample_rate` parameters
        example: '{}'
        bindings: []
        extensions:
          - id: x-parser-unique-object-id
            value: sttTurnsAudioData
      - &ref_4
        id: sttTurnsCloseCommand
        payload:
          - name: Close Command
            description: >-
              Send a JSON encoded close command as WebSocket text message to
              close the session cleanly. All buffered audio will be processed by
              the model into events.
            type: object
            properties:
              - name: type
                type: string
                description: >-
                  Command type. Send this as a JSON encoded WebSocket text
                  message to close the session.
                enumValues:
                  - close
                required: true
        headers: []
        jsonPayloadSchema:
          type: object
          required:
            - type
          properties:
            type:
              type: string
              enum:
                - close
              description: >-
                Command type. Send this as a JSON encoded WebSocket text message
                to close the session.
              x-parser-schema-id: <anonymous-schema-95>
          x-parser-schema-id: STTTurnsCloseCommand
        title: Close Command
        description: >-
          Send a JSON encoded close command as WebSocket text message to close
          the session cleanly. All buffered audio will be processed by the model
          into events.
        example: |-
          {
            "type": "close"
          }
        bindings: []
        extensions:
          - id: x-parser-unique-object-id
            value: sttTurnsCloseCommand
    bindings: []
    extensions: &ref_0
      - id: x-parser-unique-object-id
        value: /stt/turns/websocket
  - &ref_2
    id: receiveSTTTurnsEvents
    title: Receive s t t turns events
    description: >-
      The server sends turn events as the model transcribes. Messages can be of
      type `turn.start`, `turn.update`, `turn.eager_end`, `turn.resume`,
      `turn.end`, or `error`. All emitted text is final — the model does not
      revise previous output. The `transcript` field is cumulative within a
      turn.
    type: send
    messages:
      - &ref_5
        id: sttTurnsConnected
        payload:
          - name: Connected
            description: |
              Fires once when the WebSocket connection is established.

              You do not need to wait for this event before sending audio.
            type: object
            properties:
              - name: type
                type: string
                description: Event type identifier.
                enumValues:
                  - connected
                required: true
              - name: request_id
                type: string
                description: Unique identifier for this connection.
                required: true
        headers: []
        jsonPayloadSchema:
          type: object
          required:
            - type
            - request_id
          properties:
            type:
              type: string
              enum:
                - connected
              description: Event type identifier.
              x-parser-schema-id: <anonymous-schema-96>
            request_id:
              type: string
              description: Unique identifier for this connection.
              x-parser-schema-id: <anonymous-schema-97>
          x-parser-schema-id: STTTurnsConnected
        title: Connected
        description: |
          Fires once when the WebSocket connection is established.

          You do not need to wait for this event before sending audio.
        example: |-
          {
            "type": "connected",
            "request_id": "2ff8af53-4d38-479d-8287-58940f01c701"
          }
        bindings: []
        extensions:
          - id: x-parser-unique-object-id
            value: sttTurnsConnected
      - &ref_6
        id: sttTurnsTurnStart
        payload:
          - name: Turn Start
            description: >
              Marks the start of a user turn. Fires quickly after the user
              begins speaking.


              This event can be used to interrupt your agent to avoid talking
              over the user.
            type: object
            properties:
              - name: type
                type: string
                description: Event type identifier.
                enumValues:
                  - turn.start
                required: true
              - name: request_id
                type: string
                description: >-
                  Unique identifier for this connection. Does not change between
                  turns.
                required: true
        headers: []
        jsonPayloadSchema:
          type: object
          required:
            - type
            - request_id
          properties:
            type:
              type: string
              enum:
                - turn.start
              description: Event type identifier.
              x-parser-schema-id: <anonymous-schema-98>
            request_id:
              type: string
              description: >-
                Unique identifier for this connection. Does not change between
                turns.
              x-parser-schema-id: <anonymous-schema-99>
          x-parser-schema-id: STTTurnsTurnStart
        title: Turn Start
        description: >
          Marks the start of a user turn. Fires quickly after the user begins
          speaking.


          This event can be used to interrupt your agent to avoid talking over
          the user.
        example: |-
          {
            "type": "turn.start",
            "request_id": "2ff8af53-4d38-479d-8287-58940f01c701"
          }
        bindings: []
        extensions:
          - id: x-parser-unique-object-id
            value: sttTurnsTurnStart
      - &ref_7
        id: sttTurnsTurnUpdate
        payload:
          - name: Turn Update
            description: |
              Fires repeatedly as the model transcribes the current user turn.
            type: object
            properties:
              - name: type
                type: string
                description: Event type identifier.
                enumValues:
                  - turn.update
                required: true
              - name: transcript
                type: string
                description: >-
                  Cumulative text for the current turn, i.e. the full text
                  transcribed so far in this turn, not a delta.
                required: true
              - name: request_id
                type: string
                description: >-
                  Unique identifier for this connection. Does not change between
                  turns.
                required: true
        headers: []
        jsonPayloadSchema:
          type: object
          required:
            - type
            - transcript
            - request_id
          properties:
            type:
              type: string
              enum:
                - turn.update
              description: Event type identifier.
              x-parser-schema-id: <anonymous-schema-100>
            transcript:
              type: string
              description: >-
                Cumulative text for the current turn, i.e. the full text
                transcribed so far in this turn, not a delta.
              x-parser-schema-id: <anonymous-schema-101>
            request_id:
              type: string
              description: >-
                Unique identifier for this connection. Does not change between
                turns.
              x-parser-schema-id: <anonymous-schema-102>
          x-parser-schema-id: STTTurnsTurnUpdate
        title: Turn Update
        description: |
          Fires repeatedly as the model transcribes the current user turn.
        example: |-
          {
            "type": "turn.update",
            "transcript": "Hey can you help",
            "request_id": "2ff8af53-4d38-479d-8287-58940f01c701"
          }
        bindings: []
        extensions:
          - id: x-parser-unique-object-id
            value: sttTurnsTurnUpdate
      - &ref_8
        id: sttTurnsTurnEagerEnd
        payload:
          - name: Turn Eager End [PREVIEW]
            description: >
              Fires when the model predicts that the user might be done
              speaking.
            type: object
            properties:
              - name: type
                type: string
                description: Event type identifier.
                enumValues:
                  - turn.eager_end
                required: true
              - name: transcript
                type: string
                description: Cumulative text for the current turn.
                required: true
              - name: request_id
                type: string
                description: >-
                  Unique identifier for this connection. Does not change between
                  turns.
                required: true
        headers: []
        jsonPayloadSchema:
          type: object
          required:
            - type
            - transcript
            - request_id
          properties:
            type:
              type: string
              enum:
                - turn.eager_end
              description: Event type identifier.
              x-parser-schema-id: <anonymous-schema-103>
            transcript:
              type: string
              description: Cumulative text for the current turn.
              x-parser-schema-id: <anonymous-schema-104>
            request_id:
              type: string
              description: >-
                Unique identifier for this connection. Does not change between
                turns.
              x-parser-schema-id: <anonymous-schema-105>
          x-parser-schema-id: STTTurnsTurnEagerEnd
        title: Turn Eager End [PREVIEW]
        description: |
          Fires when the model predicts that the user might be done speaking.
        example: |-
          {
            "type": "turn.eager_end",
            "transcript": "Hey can you help me with something?",
            "request_id": "2ff8af53-4d38-479d-8287-58940f01c701"
          }
        bindings: []
        extensions:
          - id: x-parser-unique-object-id
            value: sttTurnsTurnEagerEnd
      - &ref_9
        id: sttTurnsTurnResume
        payload:
          - name: Turn Resume [PREVIEW]
            description: >
              Fires after `turn.eager_end` if the user turn has not actually
              ended.
            type: object
            properties:
              - name: type
                type: string
                description: Event type identifier.
                enumValues:
                  - turn.resume
                required: true
              - name: request_id
                type: string
                description: >-
                  Unique identifier for this connection. Does not change between
                  turns.
                required: true
        headers: []
        jsonPayloadSchema:
          type: object
          required:
            - type
            - request_id
          properties:
            type:
              type: string
              enum:
                - turn.resume
              description: Event type identifier.
              x-parser-schema-id: <anonymous-schema-106>
            request_id:
              type: string
              description: >-
                Unique identifier for this connection. Does not change between
                turns.
              x-parser-schema-id: <anonymous-schema-107>
          x-parser-schema-id: STTTurnsTurnResume
        title: Turn Resume [PREVIEW]
        description: |
          Fires after `turn.eager_end` if the user turn has not actually ended.
        example: |-
          {
            "type": "turn.resume",
            "request_id": "2ff8af53-4d38-479d-8287-58940f01c701"
          }
        bindings: []
        extensions:
          - id: x-parser-unique-object-id
            value: sttTurnsTurnResume
      - &ref_10
        id: sttTurnsTurnEnd
        payload:
          - name: Turn End
            description: |
              Marks the end of a user turn.
            type: object
            properties:
              - name: type
                type: string
                description: Event type identifier.
                enumValues:
                  - turn.end
                required: true
              - name: transcript
                type: string
                description: Definitive transcript for the completed turn.
                required: true
              - name: request_id
                type: string
                description: >-
                  Unique identifier for this connection. Does not change between
                  turns.
                required: true
        headers: []
        jsonPayloadSchema:
          type: object
          required:
            - type
            - transcript
            - request_id
          properties:
            type:
              type: string
              enum:
                - turn.end
              description: Event type identifier.
              x-parser-schema-id: <anonymous-schema-108>
            transcript:
              type: string
              description: Definitive transcript for the completed turn.
              x-parser-schema-id: <anonymous-schema-109>
            request_id:
              type: string
              description: >-
                Unique identifier for this connection. Does not change between
                turns.
              x-parser-schema-id: <anonymous-schema-110>
          x-parser-schema-id: STTTurnsTurnEnd
        title: Turn End
        description: |
          Marks the end of a user turn.
        example: |-
          {
            "type": "turn.end",
            "transcript": "Hey can you help me with something?",
            "request_id": "2ff8af53-4d38-479d-8287-58940f01c701"
          }
        bindings: []
        extensions:
          - id: x-parser-unique-object-id
            value: sttTurnsTurnEnd
      - &ref_11
        id: sttErrorResponse
        payload:
          - name: Error Response
            description: Error information for STT WebSocket connections.
            type: object
            properties:
              - name: type
                type: string
                description: Event type identifier.
                enumValues:
                  - error
                required: true
              - name: error_code
                type: string
                description: Machine-readable error code.
                required: false
              - name: status_code
                type: number
                description: An HTTP response status code.
                required: true
              - name: title
                type: string
                description: Human-readable error title.
                required: true
              - name: message
                type: string
                description: Human-readable error message.
                required: true
              - name: doc_url
                type: string
                description: URL to relevant documentation
                required: false
              - name: request_id
                type: string
                description: Unique identifier for this websocket connection
                required: false
        headers: []
        jsonPayloadSchema:
          type: object
          required:
            - type
            - status_code
            - title
            - message
          properties:
            type:
              type: string
              enum:
                - error
              description: Event type identifier.
              x-parser-schema-id: <anonymous-schema-83>
            error_code:
              type: string
              description: Machine-readable error code.
              x-parser-schema-id: <anonymous-schema-84>
            status_code:
              type: number
              format: integer
              description: An HTTP response status code.
              x-parser-schema-id: <anonymous-schema-85>
            title:
              type: string
              description: Human-readable error title.
              x-parser-schema-id: <anonymous-schema-86>
            message:
              type: string
              description: Human-readable error message.
              x-parser-schema-id: <anonymous-schema-87>
            doc_url:
              type: string
              description: URL to relevant documentation
              x-parser-schema-id: <anonymous-schema-88>
            request_id:
              type: string
              description: Unique identifier for this websocket connection
              x-parser-schema-id: <anonymous-schema-89>
          x-parser-schema-id: STTErrorResponse
        title: Error Response
        description: Error information for STT WebSocket connections.
        example: |-
          {
            "type": "error",
            "title": "Invalid model",
            "message": "The model is not valid, make sure it is a valid model ID.",
            "error_code": "model_not_found",
            "doc_url": "https://docs.cartesia.ai/build-with-cartesia/stt-models/latest",
            "status_code": 400,
            "request_id": "2ff8af53-4d38-479d-8287-58940f01c701"
          }
        bindings: []
        extensions:
          - id: x-parser-unique-object-id
            value: sttErrorResponse
    bindings: []
    extensions: *ref_0
sendOperations:
  - *ref_1
receiveOperations:
  - *ref_2
sendMessages:
  - *ref_3
  - *ref_4
receiveMessages:
  - *ref_5
  - *ref_6
  - *ref_7
  - *ref_8
  - *ref_9
  - *ref_10
  - *ref_11
extensions:
  - id: x-parser-unique-object-id
    value: /stt/turns/websocket
securitySchemes:
  - id: apiKey
    name: X-API-Key
    type: httpApiKey
    description: API key passed in a header.
    in: header
    extensions: []
  - id: accessTokenQuery
    name: access_token
    type: httpApiKey
    description: >
      A short-lived access token passed in a query param to make API requests
      from a client.

      This is particularly useful in the browser, where WebSockets do not
      support headers.

      See [Authenticate client
      apps](/get-started/authenticate-your-client-applications) to generate an
      access token.
    in: query
    extensions: []

````