Loading...
Loading...
Convert text to speech using ElevenLabs voice AI. Use when generating audio from text, creating voiceovers, building voice apps, or synthesizing speech in 70+ languages.
npx skill4agent add elevenlabs/skills text-to-speechSetup: See Installation Guide. For JavaScript, usepackages only.@elevenlabs/*
from elevenlabs.client import ElevenLabs
client = ElevenLabs()
audio = client.text_to_speech.convert(
text="Hello, welcome to ElevenLabs!",
voice_id="JBFqnCBsd6RMkjVDRZzb", # George
model_id="eleven_multilingual_v2"
)
with open("output.mp3", "wb") as f:
for chunk in audio:
f.write(chunk)import { ElevenLabsClient } from "@elevenlabs/elevenlabs-js";
import { createWriteStream } from "fs";
const client = new ElevenLabsClient();
const audio = await client.textToSpeech.convert("JBFqnCBsd6RMkjVDRZzb", {
text: "Hello, welcome to ElevenLabs!",
modelId: "eleven_multilingual_v2",
});
audio.pipe(createWriteStream("output.mp3"));curl -X POST "https://api.elevenlabs.io/v1/text-to-speech/JBFqnCBsd6RMkjVDRZzb" \
-H "xi-api-key: $ELEVENLABS_API_KEY" -H "Content-Type: application/json" \
-d '{"text": "Hello!", "model_id": "eleven_multilingual_v2"}' --output output.mp3| Model ID | Languages | Latency | Best For |
|---|---|---|---|
| 74 | Standard | Highest quality, emotional range |
| 29 | Standard | High quality, most use cases |
| 32 | ~75ms | Ultra-low latency, real-time |
| English | ~75ms | English-only, fastest |
| 32 | Low | Balanced quality/speed |
JBFqnCBsd6RMkjVDRZzbEXAVITQu4vr4xnSDxMaLonwK4e9ZLuTAKqWW03F9XB0fDUnXU5powFXDhCwavoices = client.voices.get_all()
for voice in voices.voices:
print(f"{voice.voice_id}: {voice.name}")from elevenlabs import VoiceSettings
audio = client.text_to_speech.convert(
text="Customize my voice settings.",
voice_id="JBFqnCBsd6RMkjVDRZzb",
voice_settings=VoiceSettings(
stability=0.5,
similarity_boost=0.75,
style=0.5,
use_speaker_boost=True
)
)audio = client.text_to_speech.convert(
text="Bonjour, comment allez-vous?",
voice_id="JBFqnCBsd6RMkjVDRZzb",
model_id="eleven_multilingual_v2",
language_code="fr" # ISO 639-1 code
)"auto""on""off"audio = client.text_to_speech.convert(
text="Call 1-800-555-0123 on 01/15/2026",
voice_id="JBFqnCBsd6RMkjVDRZzb",
apply_text_normalization="on"
)# First request
audio1 = client.text_to_speech.convert(
text="This is the first part.",
voice_id="JBFqnCBsd6RMkjVDRZzb",
next_text="And this continues the story."
)
# Second request using previous context
audio2 = client.text_to_speech.convert(
text="And this continues the story.",
voice_id="JBFqnCBsd6RMkjVDRZzb",
previous_text="This is the first part."
)| Format | Description |
|---|---|
| MP3 44.1kHz 128kbps (default) - compressed, good for web/apps |
| MP3 44.1kHz 192kbps (Creator+) - higher quality compressed |
| Raw uncompressed audio at 16kHz - use for real-time processing |
| Raw uncompressed audio at 22.05kHz |
| Raw uncompressed audio at 24kHz - good balance for streaming |
| Raw uncompressed audio at 44.1kHz (Pro+) - CD quality |
| μ-law compressed 8kHz - standard for phone systems (Twilio, telephony) |
audio_stream = client.text_to_speech.convert(
text="This text will be streamed as audio.",
voice_id="JBFqnCBsd6RMkjVDRZzb",
model_id="eleven_flash_v2_5" # Ultra-low latency
)
for chunk in audio_stream:
play_audio(chunk)try:
audio = client.text_to_speech.convert(
text="Generate speech",
voice_id="invalid-voice-id"
)
except Exception as e:
print(f"API error: {e}")x-character-countrequest-idresponse = client.text_to_speech.convert.with_raw_response(
text="Hello!", voice_id="JBFqnCBsd6RMkjVDRZzb", model_id="eleven_multilingual_v2"
)
audio = response.parse()
print(f"Characters used: {response.headers.get('x-character-count')}")