> ## Documentation Index
> Fetch the complete documentation index at: https://docs.cartesia.ai/llms.txt
> Use this file to discover all available pages before exploring further.

# Infill (Bytes)

> Generate audio that smoothly connects two existing audio segments


## OpenAPI

````yaml latest.yml POST /infill/bytes
openapi: 3.0.1
info:
  title: Cartesia API
  version: 0.0.1
servers:
  - url: https://api.cartesia.ai
    description: Production
security: []
paths:
  /infill/bytes:
    post:
      tags:
        - Infill
      summary: Infill (Bytes)
      description: Generate audio that smoothly connects two existing audio segments
      operationId: infill_bytes
      parameters:
        - $ref: '#/components/parameters/CartesiaVersionHeader'
      requestBody:
        required: true
        content:
          multipart/form-data:
            schema:
              type: object
              properties:
                left_audio:
                  type: string
                  format: binary
                  description: >-
                    Audio clip that comes before the infill transcript:

                    `left_audio` -> `transcript` -> `right_audio`


                    For best results, target natural pauses in the audio and
                    clip tightly.

                    At least one of `left_audio` or `right_audio` must be
                    provided.


                    Supported audio formats: `flac`, `mp3`, `mpeg`, `mpga`,
                    `oga`, `ogg`, `wav`, `webm`
                right_audio:
                  type: string
                  format: binary
                  description: >-
                    Audio clip that comes after the infill transcript:

                    `left_audio` -> `transcript` -> `right_audio`


                    For best results, target natural pauses in the audio and
                    clip tightly.

                    At least one of `left_audio` or `right_audio` must be
                    provided.


                    Supported audio formats: `flac`, `mp3`, `mpeg`, `mpga`,
                    `oga`, `ogg`, `wav`, `webm`
                model_id:
                  description: The ID of the model to use for generating audio
                  type: string
                  enum:
                    - sonic-3
                    - sonic-3-2026-01-12
                    - sonic-3-2025-10-27
                language:
                  $ref: '#/components/schemas/SupportedLanguage'
                  description: The language of the transcript
                transcript:
                  description: >-
                    The infill text to generate.

                    For best results, use longer transcripts to give the model
                    more flexibility to adapt to the rest of the audio.
                  type: string
                voice_id:
                  description: The ID of the voice to use for generating audio
                  type: string
                output_format[container]:
                  $ref: '#/components/schemas/OutputFormatContainer'
                  description: The format of the output audio
                output_format[sample_rate]:
                  description: The sample rate of the output audio
                  type: integer
                  enum:
                    - 8000
                    - 16000
                    - 22050
                    - 24000
                    - 44100
                    - 48000
                output_format[encoding]:
                  $ref: '#/components/schemas/RawEncoding'
                  description: Required for `raw` and `wav` containers.
                  nullable: true
                output_format[bit_rate]:
                  description: Required for `mp3` containers.
                  type: integer
                  nullable: true
      responses:
        '200':
          description: Audio bytes
          content:
            audio/*:
              schema:
                type: string
                format: binary
      security:
        - APIKeyAuth: []
components:
  parameters:
    CartesiaVersionHeader:
      name: Cartesia-Version
      in: header
      description: API version header.
      required: true
      schema:
        type: string
        format: date
        example: '2026-03-01'
        enum:
          - '2026-03-01'
  schemas:
    SupportedLanguage:
      title: SupportedLanguage
      type: string
      enum:
        - en
        - fr
        - de
        - es
        - pt
        - zh
        - ja
        - hi
        - it
        - ko
        - nl
        - pl
        - ru
        - sv
        - tr
        - tl
        - bg
        - ro
        - ar
        - cs
        - el
        - fi
        - hr
        - ms
        - sk
        - da
        - ta
        - uk
        - hu
        - 'no'
        - vi
        - bn
        - th
        - he
        - ka
        - id
        - te
        - gu
        - kn
        - ml
        - mr
        - pa
      description: >-
        The language that the given voice should speak the transcript in. This
        may depend on the model you're using. See
        [Models](/build-with-cartesia/tts-models/latest) for details.
    OutputFormatContainer:
      title: OutputFormatContainer
      type: string
      enum:
        - raw
        - wav
        - mp3
    RawEncoding:
      title: RawEncoding
      type: string
      description: >-
        The encoding format for output audio. See [TTS Output Audio
        Format](/build-with-cartesia/capability-guides/tts-output-audio-format)
        if you're unsure what to use.
      enum:
        - pcm_f32le
        - pcm_s16le
        - pcm_mulaw
        - pcm_alaw
  securitySchemes:
    APIKeyAuth:
      type: http
      scheme: bearer
      bearerFormat: API Key
      description: >-
        Cartesia API key (`sk_car_...`). Get one at
        [play.cartesia.ai/keys](https://play.cartesia.ai/keys).

````