Loading...
Loading...
Transform the voice in an audio recording into a different target voice while preserving emotion, timing, and delivery using the ElevenLabs Voice Changer (speech-to-speech) API. Use when converting one voice to another, changing the speaker/narrator of an existing recording, dubbing a voice-over in a different voice, creating character voices from a scratch performance, anonymizing a speaker, or any "voice conversion / voice transfer / speech-to-speech" task. Make sure to use this skill whenever the user mentions voice changing, voice conversion, speech-to-speech, swapping a voice in audio, re-voicing a clip, or applying a different voice to an existing recording — even if they don't explicitly say "voice changer".
npx skill4agent add elevenlabs/skills voice-changerspeech_to_speechspeechToSpeechSetup: See Installation Guide. For JavaScript, usepackages only.@elevenlabs/*
eleven_multilingual_sts_v2eleven_english_sts_v2from elevenlabs import ElevenLabs
client = ElevenLabs()
with open("source.mp3", "rb") as audio_file:
audio_stream = client.speech_to_speech.convert(
voice_id="JBFqnCBsd6RMkjVDRZzb", # George
audio=audio_file,
model_id="eleven_multilingual_sts_v2",
output_format="mp3_44100_128",
)
with open("converted.mp3", "wb") as f:
for chunk in audio_stream:
f.write(chunk)import { ElevenLabsClient } from "@elevenlabs/elevenlabs-js";
import { createReadStream, createWriteStream } from "fs";
const client = new ElevenLabsClient();
const audioStream = await client.speechToSpeech.convert("JBFqnCBsd6RMkjVDRZzb", {
audio: createReadStream("source.mp3"),
modelId: "eleven_multilingual_sts_v2",
outputFormat: "mp3_44100_128",
});
audioStream.pipe(createWriteStream("converted.mp3"));curl -X POST "https://api.elevenlabs.io/v1/speech-to-speech/JBFqnCBsd6RMkjVDRZzb?output_format=mp3_44100_128" \
-H "xi-api-key: $ELEVENLABS_API_KEY" \
-F "audio=@source.mp3" \
-F "model_id=eleven_multilingual_sts_v2" \
--output converted.mp3| Parameter | Type | Default | Description |
|---|---|---|---|
| string (required) | — | Target voice to speak in. Use a pre-made voice ID, a cloned voice, or a voice from the library |
| file (required) | — | Source audio whose performance (emotion, timing, delivery) will be preserved |
| string | | |
| string | | See output formats table below |
| JSON string | — | Override stored voice settings for this request only |
| integer | — | Best-effort deterministic sampling (0 – 4294967295) |
| boolean | | Run the isolation model on the input before conversion |
| string | | |
| int (query) | — | 0–4. Trade quality for latency. |
| boolean (query) | | Set to |
| Model ID | Languages | Best For |
|---|---|---|
| 29 | Recommended for everything — often outperforms the English model even on English audio |
| English | API default — English-only fallback |
can_do_voice_conversionpcm_s16le_16opus_*mp3_*optimize_streaming_latencyeleven_multilingual_sts_v2JBFqnCBsd6RMkjVDRZzbEXAVITQu4vr4xnSDxMaLonwK4e9ZLuTAKqWW03F9XB0fDUnXU5powFXDhCwavoices = client.voices.get_all()
for voice in voices.voices:
print(f"{voice.voice_id}: {voice.name}")import requests
from io import BytesIO
from elevenlabs import ElevenLabs
client = ElevenLabs()
audio_url = "https://storage.googleapis.com/eleven-public-cdn/audio/marketing/nicole.mp3"
response = requests.get(audio_url)
audio_data = BytesIO(response.content)
audio_stream = client.speech_to_speech.convert(
voice_id="JBFqnCBsd6RMkjVDRZzb",
audio=audio_data,
model_id="eleven_multilingual_sts_v2",
output_format="mp3_44100_128",
)
with open("converted.mp3", "wb") as f:
for chunk in audio_stream:
f.write(chunk)from elevenlabs import VoiceSettings
audio_stream = client.speech_to_speech.convert(
voice_id="JBFqnCBsd6RMkjVDRZzb",
audio=audio_file,
model_id="eleven_multilingual_sts_v2",
voice_settings=VoiceSettings(
stability=0.5,
similarity_boost=0.75,
style=0.0,
use_speaker_boost=True,
),
)remove_background_noise=Trueaudio_stream = client.speech_to_speech.convert(
voice_id="JBFqnCBsd6RMkjVDRZzb",
audio=audio_file,
model_id="eleven_multilingual_sts_v2",
remove_background_noise=True,
)file_format="pcm_s16le_16"audio_stream = client.speech_to_speech.convert(
voice_id="JBFqnCBsd6RMkjVDRZzb",
audio=pcm_bytes,
model_id="eleven_multilingual_sts_v2",
file_format="pcm_s16le_16",
)optimize_streaming_latency| Format | Description |
|---|---|
| MP3 44.1kHz 128kbps (default) — good for web/apps |
| MP3 44.1kHz 192kbps (Creator+) — higher quality |
| MP3 44.1kHz 64kbps — smaller files |
| MP3 22.05kHz 32kbps — smallest MP3 |
| Raw PCM 16kHz — real-time pipelines |
| Raw PCM 24kHz — good streaming balance |
| Raw PCM 44.1kHz (Pro+) — CD quality |
| Raw PCM 48kHz (Pro+) — highest quality |
| μ-law 8kHz — Twilio / telephony |
| A-law 8kHz — telephony |
| Opus 48kHz 64kbps — efficient streaming |
seedaudio_stream = client.speech_to_speech.convert(
voice_id="JBFqnCBsd6RMkjVDRZzb",
audio=audio_file,
model_id="eleven_multilingual_sts_v2",
seed=12345,
)remove_background_noise=Trueeleven_multilingual_sts_v2remove_background_noise=Truevoice_idtry:
audio_stream = client.speech_to_speech.convert(
voice_id="JBFqnCBsd6RMkjVDRZzb",
audio=audio_file,
model_id="eleven_multilingual_sts_v2",
)
except Exception as e:
print(f"Voice changer failed: {e}")voice_idmodel_idfile_format