Loading...
Loading...
Generate and transcribe speech using Google's Gemini-TTS and Chirp 3 models. Supports Text-to-Speech (Single/Multi-speaker), Instant Custom Voice, and Speech-to-Text (Transcription/Diarization).
npx skill4agent add cnemri/google-genai-skills speech-buildgoogle-genaigoogle-cloud-speechfrom google import genai
from google.genai import types
# For STT: from google.cloud import speech_v2
client = genai.Client()AoedePuckresponse = client.models.generate_content(
model="gemini-2.5-flash-preview-tts",
contents="Hello, world!",
config=types.GenerateContentConfig(
response_modalities=["AUDIO"],
speech_config=types.SpeechConfig(
voice_config=types.VoiceConfig(
prebuilt_voice_config=types.PrebuiltVoiceConfig(voice_name='Kore')
)
)
)
)# Requires google-cloud-speech
from google.cloud import speech_v2
# ... (See stt.md for full setup)
response = speech_client.recognize(...)