Loading...
Loading...
Opik observability for LLM agents — Agent Configuration, Local Runner (opik connect), Evaluation Suites, threads, integrations. Use for "configure my agent", "connect my agent", "evaluate my agent" or "integrate with Opik".
npx skill4agent add comet-ml/opik-skills opik@opik.trackentrypoint=TrueAgentConfig.env.env.localdotenv.envpython-dotenvdotenvOPIK_API_KEYOPIK_WORKSPACE.env.example.env.sample.env~/.opik.config.env.env.local.env~/.opik.config~/.opik.config.envOPIK_API_KEY.envproject_name"default"~/.opik.config[opik]
api_key=your-api-key
url_override=https://www.comet.com/opik/api
workspace=your-workspace.env# Opik
OPIK_API_KEY=your-api-key
OPIK_URL_OVERRIDE=https://www.comet.com/opik/api
OPIK_WORKSPACE=your-workspaceOPIK_WORKSPACEworkspaceNamenew Opik({...})https://www.comet.com/opik/apiapi_keyworkspacehttp://localhost:5173/apidefaultopik configure
opik configure --use_local
npx opik-ts configure
npx opik-ts configure --use-local@opik.track(project_name="my-project")
def run():
...const client = new Opik({ projectName: "my-project" });import opik
@opik.track(entrypoint=True, name="my-agent")
def agent(query: str) -> str:
context = retrieve(query)
return generate(query, context)
@opik.track(type="tool")
def retrieve(query: str) -> list:
return search_db(query)
@opik.track(type="llm")
def generate(query: str, context: list) -> str:
return llm_call(query, context)
result = agent("What is ML?")
opik.flush_tracker() # required in scriptsgeneralllmtoolguardrailfrom opik.integrations.openai import track_openai # OpenAI
from opik.integrations.anthropic import track_anthropic # Anthropic
from opik.integrations.langchain import OpikTracer # LangChain
from opik.integrations.crewai import track_crewai # CrewAI
from opik.integrations.dspy import OpikCallback # DSPy
from opik.integrations.adk import track_adk_agent_recursive # Google ADKOpikLogger@opik.tracklitellm@opik.trackcurrent_span_datalitellm.completion()litellm.acompletion()OpikLoggerOpikLogger@opik.trackfrom opik import track
from opik.opik_context import get_current_span_data
from litellm.integrations.opik.opik import OpikLogger
import litellm
litellm.callbacks = [OpikLogger()]
@track
def call_llm(messages, model="gpt-4o"):
return litellm.completion(
model=model,
messages=messages,
metadata={
"opik": {
"current_span_data": get_current_span_data(),
"tags": ["litellm"],
},
},
)
@track(entrypoint=True)
def agent(query: str) -> str:
return call_llm([{"role": "user", "content": query}])litellm.completionlitellm.acompletion@opik.trackimport { Opik } from "opik";
const client = new Opik({ projectName: "my-project" });
const trace = client.trace({
name: "my-agent",
input: { query: "What is ML?" },
});
const toolSpan = trace.span({
name: "retrieve-context",
type: "tool",
input: { query: "What is ML?" },
});
// retrieval logic
toolSpan.end({ output: { documents: [] } });
const llmSpan = trace.span({
name: "generate-response",
type: "llm",
input: { prompt: "What is ML?" },
});
// model call
llmSpan.end({ output: { response: "Machine learning is..." } });
trace.end({ output: { response: "Machine learning is..." } });
await client.flush();projectNamereferences/tracing-typescript.mdawait client.flush()generalllmtoolguardrailthread_idthread_id@opik.track(entrypoint=True)
def handle_message(session_id: str, message: str) -> str:
opik.update_current_trace(thread_id=session_id)
return generate_response(session_id, message)from opik.evaluation import evaluate_threads
from opik.evaluation.metrics.conversation import (
SessionCompletenessMetric, UserFrustrationMetric, ConversationalCoherenceMetric,
)
results = evaluate_threads(project_name="chat-agent", metrics=[
SessionCompletenessMetric(), UserFrustrationMetric(), ConversationalCoherenceMetric(),
])thread_idthread_idAgentConfigAgentConfigConfigSettingsAgentSettingsModelConfig@dataclassmodeltemperaturesystem_promptmax_tokensopik.AgentConfig@dataclassBaseModelopik.AgentConfigAnnotatedstropik.Promptclient.create_agent_config_version()client.get_agent_config()from typing import Annotated
import opik
class AgentConfig(opik.AgentConfig):
model: Annotated[str, "LLM model"] # NO defaults
temperature: Annotated[float, "Sampling temperature"]
system_prompt: Annotated[opik.Prompt, "Managed system prompt"]
DEFAULT_AGENT_CONFIG = AgentConfig(
model="gpt-4o",
temperature=0.7,
system_prompt=opik.Prompt(
name="agent-system-prompt",
prompt="You are a helpful assistant for {{product}}.",
),
)
client = opik.Opik()
client.create_agent_config_version(
AgentConfig(
model="gpt-4o",
temperature=0.7,
system_prompt=opik.Prompt(
name="agent-system-prompt",
prompt="You are a helpful assistant for {{product}}.",
),
),
project_name="my-agent",
)
# Identical values → same version (dedup). Different values → new version.
@opik.track(entrypoint=True, project_name="my-agent")
def run_agent(question: str) -> str:
cfg = client.get_agent_config(
fallback=DEFAULT_AGENT_CONFIG,
project_name="my-agent",
# optional: latest=True | env="staging" | version="v1" (default: prod)
)
return llm_call(
model=cfg.model,
temperature=cfg.temperature,
system_prompt=cfg.system_prompt.format(product="Opik"),
question=question,
)get_agent_config()@opik.trackcfg.deploy_to("prod")Promptopik.api_objects.prompt.text.promptChatPromptopik.api_objects.prompt.chat.chat_promptopik connect --pair <CODE> python3 app.py # Python
opik connect --pair <CODE> npx tsx app.ts # TypeScriptpython3 app.pynpx tsx app.ts@track(entrypoint=True)track({ entrypoint: true, params: [{name, type}] }, fn)| Issue | Fix |
|---|---|
| No entrypoint found | Add |
| Invalid pair code | Codes expire — get a new one |
| Connection refused | Check Opik server (OSS) or API key (Cloud) |
| Anti-Pattern | Fix |
|---|---|
Existing config class left unconverted (e.g., | Convert to |
| Hardcoded config | Use |
| Missing entrypoint | Add |
| No thread_id on conversational agent | Wire |
| Must be inside decorated function |
TS missing | Add explicit |
Missing | Call before exit |
| Topic | File |
|---|---|
| Python SDK (decorators, async, distributed, config, entrypoint) | |
| TypeScript SDK (client, decorators, entrypoint, params) | |
| REST API | |
| All integrations | |
| Core concepts (traces, spans, threads, metadata) | |
| Evaluation (suites, 41 built-in metrics, trajectory) | |