Loading...
Loading...
Complete reference for the Portkey AI Gateway Python SDK with unified API access to 200+ LLMs, automatic fallbacks, caching, and full observability. Use when building Python applications that need LLM integration with production-grade reliability.
npx skill4agent add portkey-ai/skills portkey-python-sdkpip install portkey-ai
# Or with poetry/uv
poetry add portkey-ai
uv add portkey-aiimport os
from portkey_ai import Portkey
client = Portkey(
api_key=os.environ["PORTKEY_API_KEY"],
virtual_key="your-openai-virtual-key"
)
response = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "Hello!"}]
)
print(response.choices[0].message.content)import os
from portkey_ai import Portkey
client = Portkey(
api_key=os.environ["PORTKEY_API_KEY"], # From app.portkey.ai
virtual_key="openai-virtual-key-xxx" # From app.portkey.ai/virtual-keys
)client = Portkey(
api_key=os.environ["PORTKEY_API_KEY"],
config="pc-config-xxx" # Config ID from dashboard
)response = client.chat.completions.create(
model="gpt-4o",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Explain quantum computing briefly."}
]
)
print(response.choices[0].message.content)
print(f"Tokens used: {response.usage.total_tokens}")stream = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "Write a short story"}],
stream=True
)
for chunk in stream:
if chunk.choices[0].delta.content:
print(chunk.choices[0].delta.content, end="", flush=True)import asyncio
from portkey_ai import AsyncPortkey
async def main():
client = AsyncPortkey(
api_key=os.environ["PORTKEY_API_KEY"],
virtual_key="openai-key"
)
response = await client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "Hello!"}]
)
print(response.choices[0].message.content)
asyncio.run(main())async def stream_response():
client = AsyncPortkey(
api_key=os.environ["PORTKEY_API_KEY"],
virtual_key="openai-key"
)
stream = await client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "Write a poem"}],
stream=True
)
async for chunk in stream:
if chunk.choices[0].delta.content:
print(chunk.choices[0].delta.content, end="", flush=True)client = Portkey(
api_key=os.environ["PORTKEY_API_KEY"],
config={
"strategy": {"mode": "fallback"},
"targets": [
{
"virtual_key": "openai-key",
"override_params": {"model": "gpt-4o"}
},
{
"virtual_key": "anthropic-key",
"override_params": {"model": "claude-3-5-sonnet-20241022"}
}
]
}
)
# If OpenAI fails, automatically tries Anthropic
response = client.chat.completions.create(
messages=[{"role": "user", "content": "Hello!"}]
)client = Portkey(
api_key=os.environ["PORTKEY_API_KEY"],
config={
"strategy": {"mode": "loadbalance"},
"targets": [
{"virtual_key": "openai-key-1", "weight": 0.7},
{"virtual_key": "openai-key-2", "weight": 0.3}
]
}
)client = Portkey(
api_key=os.environ["PORTKEY_API_KEY"],
config={
"retry": {
"attempts": 3,
"on_status_codes": [429, 500, 502, 503, 504]
},
"virtual_key": "openai-key"
}
)client = Portkey(
api_key=os.environ["PORTKEY_API_KEY"],
config={
"cache": {
"mode": "semantic", # or "simple" for exact match
"max_age": 3600 # TTL in seconds
},
"virtual_key": "openai-key"
}
)
# Similar queries return cached responses
response1 = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "What is the capital of France?"}]
)
response2 = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "Tell me France's capital"}]
) # Returns cached responseclient = Portkey(
api_key=os.environ["PORTKEY_API_KEY"],
virtual_key="openai-key",
request_timeout=30 # 30 seconds
)import uuid
client = Portkey(
api_key=os.environ["PORTKEY_API_KEY"],
virtual_key="openai-key",
trace_id=str(uuid.uuid4())
)client = Portkey(
api_key=os.environ["PORTKEY_API_KEY"],
virtual_key="openai-key",
metadata={
"user_id": "user-123",
"session_id": "session-456",
"environment": "production"
}
)response = client.with_options(
trace_id="unique-trace-id",
metadata={"request_type": "summarization"}
).chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "Summarize this..."}]
)messages = [
{"role": "system", "content": "You are a helpful coding assistant."},
{"role": "user", "content": "What is Python?"},
{"role": "assistant", "content": "Python is a high-level programming language..."},
{"role": "user", "content": "Show me a hello world example."}
]
response = client.chat.completions.create(model="gpt-4o", messages=messages)response = client.chat.completions.create(
model="gpt-4o",
messages=[
{"role": "system", "content": "Extract as JSON with name and age fields."},
{"role": "user", "content": "John is 30 years old."}
],
response_format={"type": "json_object"}
)
# Returns: {"name": "John", "age": 30}def create_production_client():
return Portkey(
api_key=os.environ["PORTKEY_API_KEY"],
config={
"strategy": {"mode": "fallback"},
"targets": [
{
"virtual_key": os.environ["OPENAI_VIRTUAL_KEY"],
"override_params": {"model": "gpt-4o"},
"retry": {"attempts": 2, "on_status_codes": [429, 500]}
},
{
"virtual_key": os.environ["ANTHROPIC_VIRTUAL_KEY"],
"override_params": {"model": "claude-3-5-sonnet-20241022"}
}
],
"cache": {"mode": "semantic", "max_age": 3600}
},
trace_id="production-session",
metadata={"environment": "production"}
)