Loading...
Loading...
Gemini 3 Pro API/SDK integration for text generation, reasoning, and chat. Covers setup, authentication, thinking levels, streaming, and production deployment. Use when working with Gemini 3 Pro API, Python SDK, Node.js SDK, text generation, chat applications, or advanced reasoning tasks.
npx skill4agent add adaptationio/skrillz gemini-3-pro-apigemini-3-pro-preview# Install SDK
pip install google-genai
# Basic usage
import google.generativeai as genai
genai.configure(api_key="YOUR_API_KEY")
model = genai.GenerativeModel("gemini-3-pro-preview")
response = model.generate_content("Explain quantum computing")
print(response.text)// Install SDK
npm install @google/generative-ai
// Basic usage
import { GoogleGenerativeAI } from "@google/generative-ai";
const genAI = new GoogleGenerativeAI("YOUR_API_KEY");
const model = genAI.getGenerativeModel({ model: "gemini-3-pro-preview" });
const result = await model.generateContent("Explain quantum computing");
console.log(result.response.text());# Python
pip install google-genai
# Node.js
npm install @google/generative-ai# Python - using environment variable (recommended)
import os
import google.generativeai as genai
genai.configure(api_key=os.getenv("GEMINI_API_KEY"))// Node.js - using environment variable (recommended)
const genAI = new GoogleGenerativeAI(process.env.GEMINI_API_KEY);# Python
model = genai.GenerativeModel("gemini-3-pro-preview")
response = model.generate_content("Write a haiku about coding")
print(response.text)# Python
model = genai.GenerativeModel(
"gemini-3-pro-preview",
generation_config={
"thinking_level": "high", # Dynamic reasoning
"temperature": 1.0, # Keep at 1.0 for best results
"max_output_tokens": 8192
}
)chat = model.start_chat(history=[])response = chat.send_message(
"Explain how neural networks learn",
stream=True
)
# Stream tokens in real-time
for chunk in response:
print(chunk.text, end="", flush=True)# History is automatically maintained
# Access it anytime
print(f"Conversation turns: {len(chat.history)}")
# Continue conversation
response = chat.send_message("Can you give an example?")references/thought-signatures.mdimport time
from google.api_core import retry, exceptions
@retry.Retry(predicate=retry.if_exception_type(
exceptions.ResourceExhausted,
exceptions.ServiceUnavailable
))
def send_with_retry(chat, message):
return chat.send_message(message)
try:
response = send_with_retry(chat, user_input)
except exceptions.GoogleAPIError as e:
print(f"API error: {e}")# Use environment variables (never hardcode keys)
import os
from pathlib import Path
# Option 1: Environment variable
api_key = os.getenv("GEMINI_API_KEY")
# Option 2: Secrets manager (recommended for production)
# Use Google Secret Manager, AWS Secrets Manager, etc.model = genai.GenerativeModel(
"gemini-3-pro-preview",
generation_config={
"thinking_level": "high", # or "low" for simple tasks
"temperature": 1.0, # CRITICAL: Keep at 1.0
"max_output_tokens": 4096,
"top_p": 0.95,
"top_k": 40
},
safety_settings={
# Configure content filtering as needed
}
)from google.api_core import exceptions, retry
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def generate_with_fallback(prompt, max_retries=3):
@retry.Retry(
predicate=retry.if_exception_type(
exceptions.ResourceExhausted,
exceptions.ServiceUnavailable,
exceptions.DeadlineExceeded
),
initial=1.0,
maximum=10.0,
multiplier=2.0,
deadline=60.0
)
def _generate():
return model.generate_content(prompt)
try:
return _generate()
except exceptions.InvalidArgument as e:
logger.error(f"Invalid argument: {e}")
raise
except exceptions.PermissionDenied as e:
logger.error(f"Permission denied: {e}")
raise
except Exception as e:
logger.error(f"Unexpected error: {e}")
# Fallback to simpler model or cached response
return Nonedef log_usage(response):
usage = response.usage_metadata
logger.info(f"Tokens - Input: {usage.prompt_token_count}, "
f"Output: {usage.candidates_token_count}, "
f"Total: {usage.total_token_count}")
# Estimate cost (for prompts ≤200k tokens)
input_cost = (usage.prompt_token_count / 1_000_000) * 2.00
output_cost = (usage.candidates_token_count / 1_000_000) * 12.00
total_cost = input_cost + output_cost
logger.info(f"Estimated cost: ${total_cost:.6f}")
response = model.generate_content(prompt)
log_usage(response)import time
from collections import deque
class RateLimiter:
def __init__(self, max_requests_per_minute=60):
self.max_rpm = max_requests_per_minute
self.requests = deque()
def wait_if_needed(self):
now = time.time()
# Remove requests older than 1 minute
while self.requests and self.requests[0] < now - 60:
self.requests.popleft()
# Check if at limit
if len(self.requests) >= self.max_rpm:
sleep_time = 60 - (now - self.requests[0])
if sleep_time > 0:
time.sleep(sleep_time)
self.requests.append(now)
limiter = RateLimiter(max_requests_per_minute=60)
def generate_with_rate_limit(prompt):
limiter.wait_if_needed()
return model.generate_content(prompt)import logging
from datetime import datetime
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler('gemini_api.log'),
logging.StreamHandler()
]
)
logger = logging.getLogger(__name__)
def monitored_generate(prompt):
start_time = datetime.now()
try:
response = model.generate_content(prompt)
duration = (datetime.now() - start_time).total_seconds()
logger.info(f"Success - Duration: {duration}s, "
f"Tokens: {response.usage_metadata.total_token_count}")
return response
except Exception as e:
duration = (datetime.now() - start_time).total_seconds()
logger.error(f"Failed - Duration: {duration}s, Error: {e}")
raisethinking_levelthinking_level: "high"thinking_level: "low"# Python
model = genai.GenerativeModel(
"gemini-3-pro-preview",
generation_config={
"thinking_level": "high" # or "low"
}
)// Node.js
const model = genAI.getGenerativeModel({
model: "gemini-3-pro-preview",
generationConfig: {
thinking_level: "high" // or "low"
}
});thinking_levelthinking_budgetreferences/thinking-levels.mdresponse = model.generate_content(
"Write a long article about AI",
stream=True
)
for chunk in response:
print(chunk.text, end="", flush=True)const result = await model.generateContentStream("Write a long article about AI");
for await (const chunk of result.stream) {
process.stdout.write(chunk.text());
}references/streaming.md| Context Size | Input | Output |
|---|---|---|
| ≤ 200k tokens | $2/1M | $12/1M |
| > 200k tokens | $4/1M | $18/1M |
thinking_level: "low"gemini-3-advancedreferences/best-practices.md| Model | Context | Output | Input Price | Best For |
|---|---|---|---|---|
| gemini-3-pro-preview | 1M | 64k | $2-4/1M | Complex reasoning, coding |
| gemini-1.5-pro | 1M | 8k | $7-14/1M | General use, multimodal |
| gemini-1.5-flash | 1M | 8k | $0.35-0.70/1M | Simple tasks, cost-sensitive |
| Error | Cause | Solution |
|---|---|---|
| Rate limit exceeded | Implement retry with backoff |
| Invalid parameters | Validate input, check docs |
| Invalid API key | Check authentication |
| Request timeout | Reduce context, retry |
from google.api_core import exceptions, retry
@retry.Retry(
predicate=retry.if_exception_type(
exceptions.ResourceExhausted,
exceptions.ServiceUnavailable
),
initial=1.0,
maximum=60.0,
multiplier=2.0
)
def safe_generate(prompt):
try:
return model.generate_content(prompt)
except exceptions.InvalidArgument as e:
logger.error(f"Invalid argument: {e}")
raise
except exceptions.PermissionDenied as e:
logger.error(f"Permission denied - check API key: {e}")
raise
except Exception as e:
logger.error(f"Unexpected error: {e}")
raisereferences/error-handling.mdgemini-3-multimodalgemini-3-image-generationgemini-3-advancedgemini-3-multimodalgemini-3-image-generationthinking_level: "low"