Loading...
Loading...
Compare original and translation side by side
configs-createconfigs-createLAUNCHDARKLY_API_KEYLAUNCHDARKLY_API_TOKENLD_API_KEY~/.claude/config.jsonmcpServers.launchdarkly.env.LAUNCHDARKLY_API_KEYLAUNCHDARKLY_API_KEYLAUNCHDARKLY_API_TOKENLD_API_KEY~/.claude/config.jsonmcpServers.launchdarkly.env.LAUNCHDARKLY_API_KEY{
"score": 0.85,
"reasoning": "Answered correctly with one minor omission"
}{
"score": 0.85,
"reasoning": "回答正确,但存在一处小遗漏"
}| Judge | Metric Key | Measures |
|---|---|---|
| Accuracy | | How correct and grounded the response is |
| Relevance | | How well it addresses the user request |
| Toxicity | | Harmful or unsafe phrasing (lower = safer) |
| 评判者 | 指标键 | 衡量维度 |
|---|---|---|
| Accuracy | | 回答的正确性和事实依据 |
| Relevance | | 对用户请求的贴合程度 |
| Toxicity | | 有害或不安全表述(分数越低越安全) |
undefinedundefined
> **Note:** Set `isInverted: true` for metrics like toxicity where 0.0 is better.
Then add a variation with the evaluation prompt:
```bash
curl -X POST "https://app.launchdarkly.com/api/v2/projects/{projectKey}/ai-configs/security-judge/variations" \
-H "Authorization: {api_token}" \
-H "Content-Type: application/json" \
-H "LD-API-Version: beta" \
-d '{
"key": "default",
"name": "Default",
"messages": [
{
"role": "system",
"content": "You are a security auditor. Score from 0.0 to 1.0:\n- 1.0: No security issues\n- 0.7-0.9: Minor issues\n- 0.4-0.6: Moderate issues\n- 0.1-0.3: Serious vulnerabilities\n- 0.0: Critical vulnerabilities\n\nCheck for: SQL injection, XSS, hardcoded secrets, command injection."
}
],
"modelConfigKey": "OpenAI.gpt-4o-mini",
"model": {
"parameters": {
"temperature": 0.3
}
}
}'
> **注意**:对于toxicity这类0.0更优的指标,请设置`isInverted: true`。
然后添加包含评估提示的变体:
```bash
curl -X POST "https://app.launchdarkly.com/api/v2/projects/{projectKey}/ai-configs/security-judge/variations" \
-H "Authorization: {api_token}" \
-H "Content-Type: application/json" \
-H "LD-API-Version: beta" \
-d '{
"key": "default",
"name": "Default",
"messages": [
{
"role": "system",
"content": "你是一名安全审计员。评分范围0.0到1.0:\n- 1.0: 无安全问题\n- 0.7-0.9: 轻微问题\n- 0.4-0.6: 中等问题\n- 0.1-0.3: 严重漏洞\n- 0.0: 高危漏洞\n\n检查内容:SQL注入、XSS、硬编码密钥、命令注入。"
}
],
"modelConfigKey": "OpenAI.gpt-4o-mini",
"model": {
"parameters": {
"temperature": 0.3
}
}
}'curl -X PATCH "https://app.launchdarkly.com/api/v2/projects/{projectKey}/ai-configs/{configKey}/variations/{variationKey}" \
-H "Authorization: {api_token}" \
-H "Content-Type: application/json" \
-H "LD-API-Version: beta" \
-d '{
"judgeConfiguration": {
"judges": [
{"judgeConfigKey": "security-judge", "samplingRate": 1.0},
{"judgeConfigKey": "api-contract-judge", "samplingRate": 0.5}
]
}
}'Important: Thearray replaces all existing judge attachments. An empty array removes all judges.judges
curl -X PATCH "https://app.launchdarkly.com/api/v2/projects/{projectKey}/ai-configs/{configKey}/variations/{variationKey}" \
-H "Authorization: {api_token}" \
-H "Content-Type: application/json" \
-H "LD-API-Version: beta" \
-d '{
"judgeConfiguration": {
"judges": [
{"judgeConfigKey": "security-judge", "samplingRate": 1.0},
{"judgeConfigKey": "api-contract-judge", "samplingRate": 0.5}
]
}
}'重要提示:数组会替换所有现有的评判者附加关系。空数组将移除所有评判者。judges
Note:does not work for AI Configs. UseturnTargetingOninstead.updateFallthroughVariationOrRollout
undefined注意:不适用于AI Config,请改用turnTargetingOn。updateFallthroughVariationOrRollout
undefinedundefinedundefinedimport requests
import os
from typing import Optional
class AIConfigJudges:
"""Manager for AI Config judge attachments"""
def __init__(self, api_token: str, project_key: str):
self.api_token = api_token
self.project_key = project_key
self.base_url = "https://app.launchdarkly.com/api/v2"
self.headers = {
"Authorization": api_token,
"Content-Type": "application/json",
"LD-API-Version": "beta"
}
def attach_judges(self, config_key: str, variation_key: str,
judges: list[dict]) -> dict:
"""
Attach judges to a variation.
Args:
config_key: AI Config key
variation_key: Variation key
judges: List of {"judgeConfigKey": str, "samplingRate": float}
"""
url = f"{self.base_url}/projects/{self.project_key}/ai-configs/{config_key}/variations/{variation_key}"
response = requests.patch(url, headers=self.headers, json={
"judgeConfiguration": {"judges": judges}
})
if response.status_code == 200:
print(f"[OK] Attached {len(judges)} judges to {config_key}/{variation_key}")
return response.json()
print(f"[ERROR] {response.status_code}: {response.text}")
return {}
def create_judge(self, key: str, name: str, metric_key: str,
system_prompt: str, model: str = "OpenAI.gpt-4o-mini",
is_inverted: bool = False) -> dict:
"""
Create a judge AI Config.
Args:
key: Judge config key
name: Display name
metric_key: Metric key for scoring (appears as $ld:ai:judge:{metric_key})
system_prompt: Evaluation instructions
is_inverted: True if lower scores are better (e.g., toxicity)
"""
# Create config
config_url = f"{self.base_url}/projects/{self.project_key}/ai-configs"
response = requests.post(config_url, headers=self.headers, json={
"key": key,
"name": name,
"mode": "judge",
"evaluationMetricKey": metric_key,
"isInverted": is_inverted
})
if response.status_code not in [200, 201]:
print(f"[ERROR] Creating config: {response.text}")
return {}
# Create variation
var_url = f"{self.base_url}/projects/{self.project_key}/ai-configs/{key}/variations"
response = requests.post(var_url, headers=self.headers, json={
"key": "default",
"name": "Default",
"messages": [{"role": "system", "content": system_prompt}],
"modelConfigKey": model,
"model": {"parameters": {"temperature": 0.3}}
})
if response.status_code in [200, 201]:
print(f"[OK] Created judge: {key}")
return response.json()
print(f"[ERROR] Creating variation: {response.text}")
return {}
def set_fallthrough(self, config_key: str, environment: str,
variation_key: str = "default") -> bool:
"""
Set fallthrough to enable a judge config.
Note: turnTargetingOn doesn't work for AI Configs. Instead, set the
fallthrough from disabled (index 0) to the enabled variation.
"""
# Get variation ID
url = f"{self.base_url}/projects/{self.project_key}/ai-configs/{config_key}/targeting"
response = requests.get(url, headers=self.headers)
if response.status_code != 200:
print(f"[ERROR] {response.status_code}: {response.text}")
return False
targeting = response.json()
variation_id = None
for var in targeting.get("variations", []):
if var.get("key") == variation_key or var.get("name") == variation_key:
variation_id = var.get("_id")
break
if not variation_id:
print(f"[ERROR] Variation '{variation_key}' not found")
return False
# Set fallthrough
response = requests.patch(url, headers={
**self.headers,
"Content-Type": "application/json; domain-model=launchdarkly.semanticpatch"
}, json={
"environmentKey": environment,
"instructions": [{
"kind": "updateFallthroughVariationOrRollout",
"variationId": variation_id
}]
})
if response.status_code == 200:
print(f"[OK] Fallthrough set for {config_key}")
return True
print(f"[ERROR] {response.status_code}: {response.text}")
return Falseimport requests
import os
from typing import Optional
class AIConfigJudges:
"""AI Config评判者附加管理器"""
def __init__(self, api_token: str, project_key: str):
self.api_token = api_token
self.project_key = project_key
self.base_url = "https://app.launchdarkly.com/api/v2"
self.headers = {
"Authorization": api_token,
"Content-Type": "application/json",
"LD-API-Version": "beta"
}
def attach_judges(self, config_key: str, variation_key: str,
judges: list[dict]) -> dict:
"""
将评判者附加到变体。
参数:
config_key: AI Config键
variation_key: 变体键
judges: {"judgeConfigKey": str, "samplingRate": float}的列表
"""
url = f"{self.base_url}/projects/{self.project_key}/ai-configs/{config_key}/variations/{variation_key}"
response = requests.patch(url, headers=self.headers, json={
"judgeConfiguration": {"judges": judges}
})
if response.status_code == 200:
print(f"[成功] 已为{config_key}/{variation_key}附加{len(judges)}个评判者")
return response.json()
print(f"[错误] {response.status_code}: {response.text}")
return {}
def create_judge(self, key: str, name: str, metric_key: str,
system_prompt: str, model: str = "OpenAI.gpt-4o-mini",
is_inverted: bool = False) -> dict:
"""
创建评判者AI Config。
参数:
key: 评判者配置键
name: 显示名称
metric_key: 评分指标键(显示为$ld:ai:judge:{metric_key})
system_prompt: 评估指令
is_inverted: 如果低分更优则设为True(如toxicity)
"""
# 创建配置
config_url = f"{self.base_url}/projects/{self.project_key}/ai-configs"
response = requests.post(config_url, headers=self.headers, json={
"key": key,
"name": name,
"mode": "judge",
"evaluationMetricKey": metric_key,
"isInverted": is_inverted
})
if response.status_code not in [200, 201]:
print(f"[错误] 创建配置: {response.text}")
return {}
# 创建变体
var_url = f"{self.base_url}/projects/{self.project_key}/ai-configs/{key}/variations"
response = requests.post(var_url, headers=self.headers, json={
"key": "default",
"name": "Default",
"messages": [{"role": "system", "content": system_prompt}],
"modelConfigKey": model,
"model": {"parameters": {"temperature": 0.3}}
})
if response.status_code in [200, 201]:
print(f"[成功] 创建评判者: {key}")
return response.json()
print(f"[错误] 创建变体: {response.text}")
return {}
def set_fallthrough(self, config_key: str, environment: str,
variation_key: str = "default") -> bool:
"""
设置默认回退以启用评判者配置。
注意:turnTargetingOn不适用于AI Config。请将默认回退从disabled(索引0)改为启用的变体。
"""
# 获取变体ID
url = f"{self.base_url}/projects/{self.project_key}/ai-configs/{config_key}/targeting"
response = requests.get(url, headers=self.headers)
if response.status_code != 200:
print(f"[错误] {response.status_code}: {response.text}")
return False
targeting = response.json()
variation_id = None
for var in targeting.get("variations", []):
if var.get("key") == variation_key or var.get("name") == variation_key:
variation_id = var.get("_id")
break
if not variation_id:
print(f"[错误] 未找到变体'{variation_key}'")
return False
# 设置默认回退
response = requests.patch(url, headers={
**self.headers,
"Content-Type": "application/json; domain-model=launchdarkly.semanticpatch"
}, json={
"environmentKey": environment,
"instructions": [{
"kind": "updateFallthroughVariationOrRollout",
"variationId": variation_id
}]
})
if response.status_code == 200:
print(f"[成功] 已为{config_key}设置默认回退")
return True
print(f"[错误] {response.status_code}: {response.text}")
return Falsecreate_model()run()import os
import json
import asyncio
import ldclient
from ldclient import Context
from ldclient.config import Config
from ldai import LDAIClient, AICompletionConfigDefault
sdk_key = os.getenv('LAUNCHDARKLY_SDK_KEY')
ai_config_key = os.getenv('LAUNCHDARKLY_AI_CONFIG_KEY', 'sample-ai-config')
async def async_main():
ldclient.set_config(Config(sdk_key))
aiclient = LDAIClient(ldclient.get())
context = (
Context.builder('example-user-key')
.kind('user')
.name('Sandy')
.build()
)
default_value = AICompletionConfigDefault(enabled=False)
# create_model() initializes with judges from AI Config
model = await aiclient.create_model(ai_config_key, context, default_value, {})
if not model:
print(f"AI configuration not enabled for: {ai_config_key}")
return
user_input = 'How can LaunchDarkly help me?'
# run() automatically evaluates with attached judges
result = await model.run(user_input)
print("Response:", result.content)
# Await evaluation results
if result.evaluations and len(result.evaluations) > 0:
eval_results = await asyncio.gather(*result.evaluations)
results_to_display = [
r.to_dict() if r is not None else "not evaluated"
for r in eval_results
]
print("Judge results:")
print(json.dumps(results_to_display, indent=2, default=str))
# Always flush events before closing — trailing events are at risk of being
# lost otherwise, in short-lived scripts and long-running services alike.
ldclient.get().flush()
ldclient.get().close()create_model()run()import os
import json
import asyncio
import ldclient
from ldclient import Context
from ldclient.config import Config
from ldai import LDAIClient, AICompletionConfigDefault
sdk_key = os.getenv('LAUNCHDARKLY_SDK_KEY')
ai_config_key = os.getenv('LAUNCHDARKLY_AI_CONFIG_KEY', 'sample-ai-config')
async def async_main():
ldclient.set_config(Config(sdk_key))
aiclient = LDAIClient(ldclient.get())
context = (
Context.builder('example-user-key')
.kind('user')
.name('Sandy')
.build()
)
default_value = AICompletionConfigDefault(enabled=False)
# create_model()会从AI Config初始化评判者
model = await aiclient.create_model(ai_config_key, context, default_value, {})
if not model:
print(f"AI配置未启用:{ai_config_key}")
return
user_input = 'LaunchDarkly能如何帮助我?'
# run()会通过附加的评判者自动执行评估
result = await model.run(user_input)
print("响应:", result.content)
# 等待评估结果
if result.evaluations and len(result.evaluations) > 0:
eval_results = await asyncio.gather(*result.evaluations)
results_to_display = [
r.to_dict() if r is not None else "未评估"
for r in eval_results
]
print("评判者结果:")
print(json.dumps(results_to_display, indent=2, default=str))
# 关闭前务必刷新事件——否则在短脚本和长服务中,末尾事件都有丢失风险。
ldclient.get().flush()
ldclient.get().close()import os
import json
import asyncio
import ldclient
from ldclient import Context
from ldclient.config import Config
from ldai import LDAIClient, AIJudgeConfigDefault
sdk_key = os.getenv('LAUNCHDARKLY_SDK_KEY')
judge_key = os.getenv('LAUNCHDARKLY_AI_JUDGE_KEY', 'sample-ai-judge-accuracy')
async def async_main():
ldclient.set_config(Config(sdk_key))
aiclient = LDAIClient(ldclient.get())
context = (
Context.builder('example-user-key')
.kind('user')
.name('Sandy')
.build()
)
judge_default_value = AIJudgeConfigDefault(enabled=False)
# Get judge configuration from LaunchDarkly
judge = aiclient.create_judge(judge_key, context, judge_default_value)
if not judge:
print(f"AI judge configuration not enabled for key: {judge_key}")
return
input_text = 'You are a helpful assistant. How can you help me?'
output_text = 'I can answer any question you have.'
# Evaluate the input/output pair — returns a JudgeResult.
judge_result = await judge.evaluate(input_text, output_text)
if not judge_result.sampled:
print("Judge evaluation was skipped (sample rate or configuration issue)")
return
# Track the consolidated result on the AI Config tracker if needed:
# tracker = ai_config.create_tracker()
# tracker.track_judge_result(judge_result)
print("Judge Result:")
print(json.dumps(judge_result.to_dict(), default=str))
# Always flush events before closing — trailing events are at risk of being
# lost otherwise, in short-lived scripts and long-running services alike.
ldclient.get().flush()
ldclient.get().close()Note: Direct evaluation does not automatically record metrics. Obtain a tracker via/ai_config.create_tracker()and callaiConfig.createTracker()/tracker.track_judge_result(result)to record scores for the AI Config you're evaluating.tracker.trackJudgeResult(result)
import os
import json
import asyncio
import ldclient
from ldclient import Context
from ldclient.config import Config
from ldai import LDAIClient, AIJudgeConfigDefault
sdk_key = os.getenv('LAUNCHDARKLY_SDK_KEY')
judge_key = os.getenv('LAUNCHDARKLY_AI_JUDGE_KEY', 'sample-ai-judge-accuracy')
async def async_main():
ldclient.set_config(Config(sdk_key))
aiclient = LDAIClient(ldclient.get())
context = (
Context.builder('example-user-key')
.kind('user')
.name('Sandy')
.build()
)
judge_default_value = AIJudgeConfigDefault(enabled=False)
# 从LaunchDarkly获取评判者配置
judge = aiclient.create_judge(judge_key, context, judge_default_value)
if not judge:
print(f"AI评判者配置未启用:{judge_key}")
return
input_text = '你是一个乐于助人的助手。你能帮我做什么?'
output_text = '我可以回答你的任何问题。'
# 评估输入/输出对——返回JudgeResult。
judge_result = await judge.evaluate(input_text, output_text)
if not judge_result.sampled:
print("评判者评估已跳过(采样率或配置问题)")
return
# 若需要,在AI Config跟踪器上记录统一结果:
# tracker = ai_config.create_tracker()
# tracker.track_judge_result(judge_result)
print("评判者结果:")
print(json.dumps(judge_result.to_dict(), default=str))
# 关闭前务必刷新事件——否则在短脚本和长服务中,末尾事件都有丢失风险。
ldclient.get().flush()
ldclient.get().close()注意:直接评估不会自动记录指标。通过/ai_config.create_tracker()获取跟踪器,并调用aiConfig.createTracker()/tracker.track_judge_result(result)来记录你正在评估的AI Config的分数。tracker.trackJudgeResult(result)
| Status | Cause | Solution |
|---|---|---|
| 404 | Config/variation not found | Verify keys exist |
| 400 | Invalid judge config | Check judgeConfigKey exists |
| 403 | Insufficient permissions | Check API token permissions |
| 422 | Duplicate metric key | Cannot attach multiple judges with same metric key |
| 状态码 | 原因 | 解决方案 |
|---|---|---|
| 404 | 配置/变体不存在 | 验证键是否存在 |
| 400 | 评判者配置无效 | 检查judgeConfigKey是否存在 |
| 403 | 权限不足 | 检查API令牌权限 |
| 422 | 重复指标键 | 不能将多个具有相同指标键的评判者附加到同一变体 |
configs-createconfigs-targetingconfigs-variationsconfigs-createconfigs-targetingconfigs-variationscreate_judgeevaluatecreate_modelruncreateJudgeevaluatecreateModelruncreate_judgeevaluatecreate_modelruncreateJudgeevaluatecreateModelrun