Loading...
Loading...
Transcribe speech to text using OpenRouter's speech-to-text API. Use when the user asks to transcribe audio, convert speech to text, extract a transcript from a recording or meeting, caption a video's audio, or mentions STT, speech-to-text, ASR, or transcription.
npx skill4agent add openrouterteam/skills openrouter-sttPOST /api/v1/audio/transcriptionscurlOPENROUTER_API_KEYinput_audio: { data, format }multipart/form-datafile/v1/audio/transcriptionscurlfetchrequeststextusagecostsecondstotal_tokensinput_tokensoutput_tokensgoogle/chirp-3{
"text": "I used to rule the world.",
"usage": {
"seconds": 20,
"cost": 0.005333
}
}{
"text": "Hello, this is a test of speech-to-text transcription.",
"usage": {
"total_tokens": 113,
"input_tokens": 83,
"output_tokens": 30,
"cost": 0.000508
}
}#!/usr/bin/env bash
set -euo pipefail
MODEL="google/chirp-3"
FORMAT="wav" # wav, mp3, flac, m4a, ogg, webm, aac
AUDIO="audio.wav"
BODY=$(mktemp)
PAYLOAD=$(mktemp)
audio_b64=$(base64 < "$AUDIO" | tr -d '\n')
jq -n --arg model "$MODEL" --arg data "$audio_b64" --arg fmt "$FORMAT" \
'{model: $model, input_audio: {data: $data, format: $fmt}}' > "$PAYLOAD"
# --data-binary @file keeps the base64 payload off argv (avoids E2BIG / ARG_MAX).
http_code=$(curl -sS -X POST https://openrouter.ai/api/v1/audio/transcriptions \
-H "Authorization: Bearer $OPENROUTER_API_KEY" \
-H "Content-Type: application/json" \
--output "$BODY" \
-w '%{http_code}' \
--data-binary @"$PAYLOAD")
if [[ "$http_code" != "200" ]]; then
echo "STT failed (HTTP $http_code):" >&2
cat "$BODY" >&2
rm -f "$BODY" "$PAYLOAD"
exit 1
fi
jq -r '.text' "$BODY"
rm -f "$BODY" "$PAYLOAD"curl -sS "https://openrouter.ai/api/v1/models?output_modalities=transcription" \
| jq '.data[] | {id, name, pricing}'google/chirp-3openai/whisper-1openai/whisper-large-v3| Field | Required | Notes |
|---|---|---|
| yes | Full model slug from |
| yes | Base64-encoded raw audio bytes. Not a data URI — just the base64 payload, no |
| yes | |
| no | ISO-639-1 code ( |
| no | 0–1. Lower is more deterministic. |
| no | Provider passthrough — see below. |
wavflacmp3m4aaacwebmoggMediaRecorderformat.wavffprobe <file>provider.options.<slug>prompt{
"model": "openai/whisper-large-v3",
"input_audio": { "data": "UklGRiQA...", "format": "wav" },
"provider": {
"options": {
"groq": {
"prompt": "Expected vocabulary: OpenRouter, API, transcription"
}
}
}
}import fs from "fs";
const audio = await fs.promises.readFile("audio.wav");
const data = audio.toString("base64");
const res = await fetch("https://openrouter.ai/api/v1/audio/transcriptions", {
method: "POST",
headers: {
Authorization: `Bearer ${process.env.OPENROUTER_API_KEY}`,
"Content-Type": "application/json",
},
body: JSON.stringify({
model: "google/chirp-3",
input_audio: { data, format: "wav" },
}),
});
if (!res.ok) {
throw new Error(`STT failed (HTTP ${res.status}): ${await res.text()}`);
}
const result = await res.json();
console.log(result.text);import base64
import os
import requests
with open("audio.wav", "rb") as f:
data = base64.b64encode(f.read()).decode("utf-8")
res = requests.post(
"https://openrouter.ai/api/v1/audio/transcriptions",
headers={
"Authorization": f"Bearer {os.environ['OPENROUTER_API_KEY']}",
"Content-Type": "application/json",
},
json={
"model": "google/chirp-3",
"input_audio": {"data": data, "format": "wav"},
},
)
if not res.ok:
raise RuntimeError(f"STT failed (HTTP {res.status_code}): {res.text}")
print(res.json()["text"])textformatffprobe audio.wav"Invalid base64"datadata:audio/wav;base64,...FileReaderZodError{"success":false,"error":{"name":"ZodError","message":"[...]"}}messageinput_audio.datainput_audio.format/api/v1/models?output_modalities=transcriptiongoogle/chirp-3chirp-3