Loading...
Loading...
Compare original and translation side by side
// 1. Add AI binding to wrangler.jsonc
{ "ai": { "binding": "AI" } }
// 2. Run model with streaming (recommended)
export default {
async fetch(request: Request, env: Env): Promise<Response> {
const stream = await env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
messages: [{ role: 'user', content: 'Tell me a story' }],
stream: true, // Always stream for text generation!
});
return new Response(stream, {
headers: { 'content-type': 'text/event-stream' },
});
},
};// 1. 在wrangler.jsonc中添加AI绑定
{ "ai": { "binding": "AI" } }
// 2. 以流式传输方式运行模型(推荐)
export default {
async fetch(request: Request, env: Env): Promise<Response> {
const stream = await env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
messages: [{ role: 'user', content: '给我讲个故事' }],
stream: true, // 文本生成建议始终启用流式传输!
});
return new Response(stream, {
headers: { 'content-type': 'text/event-stream' },
});
},
};"Exceeded character limit"import { encode } from 'gpt-tokenizer'; // or model-specific tokenizer
const tokens = encode(prompt);
const contextWindow = 32768; // Model's max tokens (check docs)
const maxResponseTokens = 2048;
if (tokens.length + maxResponseTokens > contextWindow) {
throw new Error(`Prompt exceeds context window: ${tokens.length} tokens`);
}
const response = await env.AI.run('@cf/mistral/mistral-7b-instruct-v0.2', {
messages: [{ role: 'user', content: prompt }],
max_tokens: maxResponseTokens,
});import { encode } from 'gpt-tokenizer'; // 或模型专属的令牌计算器
const tokens = encode(prompt);
const contextWindow = 32768; // 模型的最大令牌数(请查阅文档)
const maxResponseTokens = 2048;
if (tokens.length + maxResponseTokens > contextWindow) {
throw new Error(`提示词超出上下文窗口限制:${tokens.length}个令牌`);
}
const response = await env.AI.run('@cf/mistral/mistral-7b-instruct-v0.2', {
messages: [{ role: 'user', content: prompt }],
max_tokens: maxResponseTokens,
});// Use AI Gateway for detailed request logging
const response = await env.AI.run(
'@cf/meta/llama-3.1-8b-instruct',
{ messages: [{ role: 'user', content: query }] },
{ gateway: { id: 'my-gateway' } }
);
// Monitor dashboard at: https://dash.cloudflare.com → AI → Workers AI
// Compare neuron usage with token counts
// File support ticket with details if discrepancy persists// 使用AI Gateway进行详细请求日志记录
const response = await env.AI.run(
'@cf/meta/llama-3.1-8b-instruct',
{ messages: [{ role: 'user', content: query }] },
{ gateway: { id: 'my-gateway' } }
);
// 在以下地址监控控制台:https://dash.cloudflare.com → AI → Workers AI
// 对比神经元使用量与令牌计数
// 如果差异持续存在,提交包含详细信息的支持工单"MiniflareCoreError: wrapped binding module can't be resolved (internal modules only)"unstable_getMiniflareWorkerOptions// wrangler.jsonc - Option 1: Use remote AI binding in local dev
{
"ai": { "binding": "AI" },
"dev": {
"remote": true // Use production AI binding locally
}
}undefined"MiniflareCoreError: wrapped binding module can't be resolved (internal modules only)"unstable_getMiniflareWorkerOptions// wrangler.jsonc - 选项1:本地开发使用远程AI绑定
{
"ai": { "binding": "AI" },
"dev": {
"remote": true // 本地使用生产环境AI绑定
}
}undefinedundefinedundefined"AiError: Input prompt contains NSFW content (code 3030)"@cf/black-forest-labs/flux-1-schnell// ❌ May trigger error 3030
const response = await env.AI.run('@cf/black-forest-labs/flux-1-schnell', {
prompt: 'hamburger', // Single word triggers filter
});
// ✅ Add context to avoid false positives
const response = await env.AI.run('@cf/black-forest-labs/flux-1-schnell', {
prompt: 'A photo of a delicious large hamburger on a plate with lettuce and tomato',
num_steps: 4,
});"AiError: Input prompt contains NSFW content (code 3030)"@cf/black-forest-labs/flux-1-schnell// ❌ 可能触发错误3030
const response = await env.AI.run('@cf/black-forest-labs/flux-1-schnell', {
prompt: 'hamburger', // 单字提示词触发过滤器
});
// ✅ 添加上下文避免误判
const response = await env.AI.run('@cf/black-forest-labs/flux-1-schnell', {
prompt: '一盘配有生菜和番茄的美味大汉堡的照片',
num_steps: 4,
});"Error: unexpected type 'int32' with value 'undefined' (code 1000)"num_stepsnum_steps: 4// ✅ Always include num_steps for image generation
const image = await env.AI.run('@cf/black-forest-labs/flux-1-schnell', {
prompt: 'A beautiful sunset over mountains',
num_steps: 4, // Required - typically 4 for Flux Schnell
});
// Note: FLUX.2 [klein] 4B has fixed steps=4 (cannot be adjusted)"Error: unexpected type 'int32' with value 'undefined' (code 1000)"num_stepsnum_steps: 4// ✅ 图像生成始终包含num_steps参数
const image = await env.AI.run('@cf/black-forest-labs/flux-1-schnell', {
prompt: '山间美丽的日落',
num_steps: 4, // 必填 - Flux Schnell模型通常设为4
});
// 注意:FLUX.2 [klein] 4B模型的steps固定为4(无法调整)zod-to-json-schemaundefinedzod-to-json-schemaundefinedundefinedundefinedenv.AI.run(
model: string,
inputs: ModelInputs,
options?: { gateway?: { id: string; skipCache?: boolean } }
): Promise<ModelOutput | ReadableStream>env.AI.run(
model: string,
inputs: ModelInputs,
options?: { gateway?: { id: string; skipCache?: boolean } }
): Promise<ModelOutput | ReadableStream>| Model | Best For | Rate Limit | Size | Notes |
|---|---|---|---|---|
| 2025 Models | ||||
| Latest Llama, general purpose | 300/min | 17B | NEW 2025 |
| Largest open-source GPT | 300/min | 120B | NEW 2025 |
| Smaller open-source GPT | 300/min | 20B | NEW 2025 |
| 128K context, 140+ languages | 300/min | 12B | NEW 2025, vision |
| Vision + tool calling | 300/min | 24B | NEW 2025 |
| Reasoning, complex tasks | 300/min | 32B | NEW 2025 |
| Coding specialist | 300/min | 32B | NEW 2025 |
| Fast quantized | 300/min | 30B | NEW 2025 |
| Small, efficient | 300/min | Micro | NEW 2025 |
| Performance (2025) | ||||
| 2-4x faster (2025 update) | 300/min | 70B | Speculative decoding |
| Fast 8B variant | 300/min | 8B | - |
| Standard Models | ||||
| General purpose | 300/min | 8B | - |
| Ultra-fast, simple tasks | 300/min | 1B | - |
| Coding, technical | 300/min | 32B | - |
| 模型 | 适用场景 | 速率限制 | 模型规模 | 说明 |
|---|---|---|---|---|
| 2025年新增模型 | ||||
| 最新Llama模型,通用场景 | 300次/分钟 | 17B | 2025年新增 |
| 最大的开源GPT模型 | 300次/分钟 | 120B | 2025年新增 |
| 轻量化开源GPT模型 | 300次/分钟 | 20B | 2025年新增 |
| 128K上下文窗口,支持140+语言 | 300次/分钟 | 12B | 2025年新增,支持视觉 |
| 视觉+工具调用 | 300次/分钟 | 24B | 2025年新增 |
| 推理、复杂任务 | 300次/分钟 | 32B | 2025年新增 |
| 编码、技术场景 | 300次/分钟 | 32B | 2025年新增 |
| 快速量化模型 | 300次/分钟 | 30B | 2025年新增 |
| 小型、高效 | 300次/分钟 | Micro | 2025年新增 |
| 2025年性能优化模型 | ||||
| 速度提升2-4倍(2025年更新) | 300次/分钟 | 70B | 采用投机解码 |
| 快速8B变体 | 300次/分钟 | 8B | - |
| 标准模型 | ||||
| 通用场景 | 300次/分钟 | 8B | - |
| 超快速、简单任务 | 300次/分钟 | 1B | - |
| 编码、技术场景 | 300次/分钟 | 32B | - |
| Model | Dimensions | Best For | Rate Limit | Notes |
|---|---|---|---|---|
| 768 | Best-in-class RAG | 3000/min | NEW 2025 |
| 768 | General RAG (2x faster) | 3000/min | pooling: "cls" recommended |
| 1024 | High accuracy (2x faster) | 1500/min | pooling: "cls" recommended |
| 384 | Fast, low storage (2x faster) | 3000/min | pooling: "cls" recommended |
| 768 | Qwen embeddings | 3000/min | NEW 2025 |
pooling: "cls"pooling: "mean"| 模型 | 维度 | 适用场景 | 速率限制 | 说明 |
|---|---|---|---|---|
| 768 | 顶级RAG场景 | 3000次/分钟 | 2025年新增 |
| 768 | 通用RAG(速度提升2倍) | 3000次/分钟 | 推荐使用pooling: "cls" |
| 1024 | 高精度(速度提升2倍) | 1500次/分钟 | 推荐使用pooling: "cls" |
| 384 | 快速、低存储(速度提升2倍) | 3000次/分钟 | 推荐使用pooling: "cls" |
| 768 | Qwen嵌入模型 | 3000次/分钟 | 2025年新增 |
pooling: "cls"pooling: "mean"| Model | Best For | Rate Limit | Notes |
|---|---|---|---|
| High quality, photorealistic | 720/min | ⚠️ See warnings below |
| Leonardo AI style | 720/min | NEW 2025, requires num_steps |
| Leonardo AI variant | 720/min | NEW 2025, requires num_steps |
| General purpose | 720/min | Requires num_steps |
num_steps: 4// ✅ Correct pattern for image generation
const image = await env.AI.run('@cf/black-forest-labs/flux-1-schnell', {
prompt: 'A photo of a delicious hamburger on a plate with fresh vegetables',
num_steps: 4, // Required to avoid error 1000
});
// Descriptive context helps avoid NSFW false positives (error 3030)| 模型 | 适用场景 | 速率限制 | 说明 |
|---|---|---|---|
| 高质量、照片级写实 | 720次/分钟 | ⚠️ 请参阅下方警告 |
| Leonardo AI风格 | 720次/分钟 | 2025年新增,需要num_steps参数 |
| Leonardo AI变体 | 720次/分钟 | 2025年新增,需要num_steps参数 |
| 通用场景 | 720次/分钟 | 需要num_steps参数 |
num_steps: 4// ✅ 图像生成正确示例
const image = await env.AI.run('@cf/black-forest-labs/flux-1-schnell', {
prompt: '一盘配有新鲜蔬菜的美味汉堡的照片',
num_steps: 4, // 必填,避免错误1000
});
// 描述性上下文有助于避免NSFW误判(错误3030)| Model | Best For | Rate Limit | Notes |
|---|---|---|---|
| Image understanding | 720/min | - |
| Vision + text (128K context) | 300/min | NEW 2025 |
| 模型 | 适用场景 | 速率限制 | 说明 |
|---|---|---|---|
| 图像理解 | 720次/分钟 | - |
| 视觉+文本(128K上下文窗口) | 300次/分钟 | 2025年新增 |
| Model | Type | Rate Limit | Notes |
|---|---|---|---|
| Text-to-speech (English) | 720/min | NEW 2025 |
| Text-to-speech (Spanish) | 720/min | NEW 2025 |
| Speech-to-text (+ WebSocket) | 720/min | NEW 2025 |
| Speech-to-text (faster) | 720/min | NEW 2025 |
| 模型 | 类型 | 速率限制 | 说明 |
|---|---|---|---|
| 文本转语音(英文) | 720次/分钟 | 2025年新增 |
| 文本转语音(西班牙文) | 720次/分钟 | 2025年新增 |
| 语音转文本(支持WebSocket) | 720次/分钟 | 2025年新增 |
| 语音转文本(更快) | 720次/分钟 | 2025年新增 |
// 1. Generate embeddings
const embeddings = await env.AI.run('@cf/baai/bge-base-en-v1.5', { text: [userQuery] });
// 2. Search Vectorize
const matches = await env.VECTORIZE.query(embeddings.data[0], { topK: 3 });
const context = matches.matches.map((m) => m.metadata.text).join('\n\n');
// 3. Generate with context
const response = await env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
messages: [
{ role: 'system', content: `Answer using this context:\n${context}` },
{ role: 'user', content: userQuery },
],
stream: true,
});// 1. 生成嵌入向量
const embeddings = await env.AI.run('@cf/baai/bge-base-en-v1.5', { text: [userQuery] });
// 2. 搜索Vectorize
const matches = await env.VECTORIZE.query(embeddings.data[0], { topK: 3 });
const context = matches.matches.map((m) => m.metadata.text).join('\n\n');
// 3. 结合上下文生成结果
const response = await env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
messages: [
{ role: 'system', content: `使用以下上下文回答问题:\n${context}` },
{ role: 'user', content: userQuery },
],
stream: true,
});import { z } from 'zod';
const Schema = z.object({ name: z.string(), items: z.array(z.string()) });
const response = await env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
messages: [{
role: 'user',
content: `Generate JSON matching: ${JSON.stringify(Schema.shape)}`
}],
});
const validated = Schema.parse(JSON.parse(response.response));import { z } from 'zod';
const Schema = z.object({ name: z.string(), items: z.array(z.string()) });
const response = await env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
messages: [{
role: 'user',
content: `生成符合以下结构的JSON: ${JSON.stringify(Schema.shape)}`
}],
});
const validated = Schema.parse(JSON.parse(response.response));const response = await env.AI.run(
'@cf/meta/llama-3.1-8b-instruct',
{ prompt: 'Hello' },
{ gateway: { id: 'my-gateway', skipCache: false } }
);
// Access logs and send feedback
const gateway = env.AI.gateway('my-gateway');
await gateway.patchLog(env.AI.aiGatewayLogId, {
feedback: { rating: 1, comment: 'Great response' },
});const response = await env.AI.run(
'@cf/meta/llama-3.1-8b-instruct',
{ prompt: '你好' },
{ gateway: { id: 'my-gateway', skipCache: false } }
);
// 访问日志并提交反馈
const gateway = env.AI.gateway('my-gateway');
await gateway.patchLog(env.AI.aiGatewayLogId, {
feedback: { rating: 1, comment: '回复很棒' },
});// Custom cache TTL (1 hour for expensive queries)
const response = await fetch(
`https://gateway.ai.cloudflare.com/v1/${accountId}/${gatewayId}/workers-ai/@cf/meta/llama-3.1-8b-instruct`,
{
method: 'POST',
headers: {
'Authorization': `Bearer ${env.CLOUDFLARE_API_KEY}`,
'Content-Type': 'application/json',
'cf-aig-cache-ttl': '3600', // 1 hour in seconds (min: 60, max: 2592000)
},
body: JSON.stringify({
messages: [{ role: 'user', content: prompt }],
}),
}
);
// Skip cache for real-time data
const response = await fetch(gatewayUrl, {
headers: {
'cf-aig-skip-cache': 'true', // Bypass cache entirely
},
// ...
});
// Check if response was cached
const cacheStatus = response.headers.get('cf-aig-cache-status'); // "HIT" or "MISS"cf-aig-cache-ttlcf-aig-skip-cache'true'cf-aig-cache-keycf-aig-cache-status"HIT""MISS"// 自定义缓存TTL(昂贵查询缓存1小时)
const response = await fetch(
`https://gateway.ai.cloudflare.com/v1/${accountId}/${gatewayId}/workers-ai/@cf/meta/llama-3.1-8b-instruct`,
{
method: 'POST',
headers: {
'Authorization': `Bearer ${env.CLOUDFLARE_API_KEY}`,
'Content-Type': 'application/json',
'cf-aig-cache-ttl': '3600', // 缓存1小时(单位:秒,最小值60,最大值2592000)
},
body: JSON.stringify({
messages: [{ role: 'user', content: prompt }],
}),
}
);
// 实时数据跳过缓存
const response = await fetch(gatewayUrl, {
headers: {
'cf-aig-skip-cache': 'true', // 完全绕过缓存
},
// ...
});
// 检查响应是否来自缓存
const cacheStatus = response.headers.get('cf-aig-cache-status'); // "HIT" 或 "MISS"cf-aig-cache-ttlcf-aig-skip-cache'true'cf-aig-cache-keycf-aig-cache-status| Task Type | Default Limit | Notes |
|---|---|---|
| Text Generation | 300/min | Some fast models: 400-1500/min |
| Text Embeddings | 3000/min | BGE-large: 1500/min |
| Image Generation | 720/min | All image models |
| Vision Models | 720/min | Image understanding |
| Audio (TTS/STT) | 720/min | Deepgram, Whisper |
| Translation | 720/min | M2M100, Opus MT |
| Classification | 2000/min | Text classification |
| 任务类型 | 默认限制 | 说明 |
|---|---|---|
| 文本生成 | 300次/分钟 | 部分快速模型:400-1500次/分钟 |
| 文本嵌入 | 3000次/分钟 | BGE-large:1500次/分钟 |
| 图像生成 | 720次/分钟 | 所有图像模型 |
| 视觉模型 | 720次/分钟 | 图像理解 |
| 音频(TTS/STT) | 720次/分钟 | Deepgram、Whisper |
| 翻译 | 720次/分钟 | M2M100、Opus MT |
| 分类 | 2000次/分钟 | 文本分类 |
| Model | Input | Output | Notes |
|---|---|---|---|
| 2025 Models | |||
| Llama 4 Scout 17B | $0.270 | $0.850 | NEW 2025 |
| GPT-OSS 120B | $0.350 | $0.750 | NEW 2025 |
| GPT-OSS 20B | $0.200 | $0.300 | NEW 2025 |
| Gemma 3 12B | $0.345 | $0.556 | NEW 2025 |
| Mistral 3.1 24B | $0.351 | $0.555 | NEW 2025 |
| Qwen QwQ 32B | $0.660 | $1.000 | NEW 2025 |
| Qwen Coder 32B | $0.660 | $1.000 | NEW 2025 |
| IBM Granite Micro | $0.017 | $0.112 | NEW 2025 |
| EmbeddingGemma 300M | $0.012 | N/A | NEW 2025 |
| Qwen3 Embedding 0.6B | $0.012 | N/A | NEW 2025 |
| Performance (2025) | |||
| Llama 3.3 70B Fast | $0.293 | $2.253 | 2-4x faster |
| Llama 3.1 8B FP8 Fast | $0.045 | $0.384 | Fast variant |
| Standard Models | |||
| Llama 3.2 1B | $0.027 | $0.201 | - |
| Llama 3.1 8B | $0.282 | $0.827 | - |
| Deepseek R1 32B | $0.497 | $4.881 | - |
| BGE-base (2x faster) | $0.067 | N/A | 2025 speedup |
| BGE-large (2x faster) | $0.204 | N/A | 2025 speedup |
| Image Models (2025) | |||
| Flux 1 Schnell | $0.0000528 per 512x512 tile | - | |
| Leonardo Lucid | $0.006996 per 512x512 tile | NEW 2025 | |
| Leonardo Phoenix | $0.005830 per 512x512 tile | NEW 2025 | |
| Audio Models (2025) | |||
| Deepgram Aura 2 | $0.030 per 1k chars | NEW 2025 | |
| Deepgram Nova 3 | $0.0052 per audio min | NEW 2025 | |
| Whisper v3 Turbo | $0.0005 per audio min | NEW 2025 |
| 模型 | 输入 | 输出 | 说明 |
|---|---|---|---|
| 2025年新增模型 | |||
| Llama 4 Scout 17B | $0.270 | $0.850 | 2025年新增 |
| GPT-OSS 120B | $0.350 | $0.750 | 2025年新增 |
| GPT-OSS 20B | $0.200 | $0.300 | 2025年新增 |
| Gemma 3 12B | $0.345 | $0.556 | 2025年新增 |
| Mistral 3.1 24B | $0.351 | $0.555 | 2025年新增 |
| Qwen QwQ 32B | $0.660 | $1.000 | 2025年新增 |
| Qwen Coder 32B | $0.660 | $1.000 | 2025年新增 |
| IBM Granite Micro | $0.017 | $0.112 | 2025年新增 |
| EmbeddingGemma 300M | $0.012 | N/A | 2025年新增 |
| Qwen3 Embedding 0.6B | $0.012 | N/A | 2025年新增 |
| 2025年性能优化模型 | |||
| Llama 3.3 70B Fast | $0.293 | $2.253 | 速度提升2-4倍 |
| Llama 3.1 8B FP8 Fast | $0.045 | $0.384 | 快速变体 |
| 标准模型 | |||
| Llama 3.2 1B | $0.027 | $0.201 | - |
| Llama 3.1 8B | $0.282 | $0.827 | - |
| Deepseek R1 32B | $0.497 | $4.881 | - |
| BGE-base(速度提升2倍) | $0.067 | N/A | 2025年提速 |
| BGE-large(速度提升2倍) | $0.204 | N/A | 2025年提速 |
| 图像模型(2025年) | |||
| Flux 1 Schnell | 每512x512像素块$0.0000528 | - | |
| Leonardo Lucid | 每512x512像素块$0.006996 | 2025年新增 | |
| Leonardo Phoenix | 每512x512像素块$0.005830 | 2025年新增 | |
| 音频模型(2025年) | |||
| Deepgram Aura 2 | 每1000字符$0.030 | 2025年新增 | |
| Deepgram Nova 3 | 每音频分钟$0.0052 | 2025年新增 | |
| Whisper v3 Turbo | 每音频分钟$0.0005 | 2025年新增 |
async function runAIWithRetry(
env: Env,
model: string,
inputs: any,
maxRetries = 3
): Promise<any> {
let lastError: Error;
for (let i = 0; i < maxRetries; i++) {
try {
return await env.AI.run(model, inputs);
} catch (error) {
lastError = error as Error;
// Rate limit - retry with exponential backoff
if (lastError.message.toLowerCase().includes('rate limit')) {
await new Promise((resolve) => setTimeout(resolve, Math.pow(2, i) * 1000));
continue;
}
throw error; // Other errors - fail immediately
}
}
throw lastError!;
}async function runAIWithRetry(
env: Env,
model: string,
inputs: any,
maxRetries = 3
): Promise<any> {
let lastError: Error;
for (let i = 0; i < maxRetries; i++) {
try {
return await env.AI.run(model, inputs);
} catch (error) {
lastError = error as Error;
// 速率限制 - 指数退避重试
if (lastError.message.toLowerCase().includes('rate limit')) {
await new Promise((resolve) => setTimeout(resolve, Math.pow(2, i) * 1000));
continue;
}
throw error; // 其他错误 - 立即失败
}
}
throw lastError!;
}import OpenAI from 'openai';
const openai = new OpenAI({
apiKey: env.CLOUDFLARE_API_KEY,
baseURL: `https://api.cloudflare.com/client/v4/accounts/${env.ACCOUNT_ID}/ai/v1`,
});
// Chat completions
await openai.chat.completions.create({
model: '@cf/meta/llama-3.1-8b-instruct',
messages: [{ role: 'user', content: 'Hello!' }],
});/v1/chat/completions/v1/embeddingsimport OpenAI from 'openai';
const openai = new OpenAI({
apiKey: env.CLOUDFLARE_API_KEY,
baseURL: `https://api.cloudflare.com/client/v4/accounts/${env.ACCOUNT_ID}/ai/v1`,
});
// 聊天补全
await openai.chat.completions.create({
model: '@cf/meta/llama-3.1-8b-instruct',
messages: [{ role: 'user', content: '你好!' }],
});/v1/chat/completions/v1/embeddingsimport { createWorkersAI } from 'workers-ai-provider'; // v3.0.2 with AI SDK v5
import { generateText, streamText } from 'ai';
const workersai = createWorkersAI({ binding: env.AI });
// Generate or stream
await generateText({
model: workersai('@cf/meta/llama-3.1-8b-instruct'),
prompt: 'Write a poem',
});import { createWorkersAI } from 'workers-ai-provider'; // 配合AI SDK v5的v3.0.2版本
import { generateText, streamText } from 'ai';
const workersai = createWorkersAI({ binding: env.AI });
// 生成或流式传输结果
await generateText({
model: workersai('@cf/meta/llama-3.1-8b-instruct'),
prompt: '写一首诗',
});Note: These tips come from community discussions and production experience.
注意:这些技巧来自社区讨论和生产实践经验。
import { Hono } from 'hono';
type Bindings = { AI: Ai };
const app = new Hono<{ Bindings: Bindings }>();
app.post('/chat', async (c) => {
const { prompt } = await c.req.json();
const stream = await c.env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
messages: [{ role: 'user', content: prompt }],
stream: true,
});
// Return stream directly (not c.stream())
return new Response(stream, {
headers: {
'content-type': 'text/event-stream',
'cache-control': 'no-cache',
'connection': 'keep-alive',
},
});
});import { Hono } from 'hono';
type Bindings = { AI: Ai };
const app = new Hono<{ Bindings: Bindings }>();
app.post('/chat', async (c) => {
const { prompt } = await c.req.json();
const stream = await c.env.AI.run('@cf/meta/llama-3.1-8b-instruct', {
messages: [{ role: 'user', content: prompt }],
stream: true,
});
// 直接返回流(不要使用c.stream())
return new Response(stream, {
headers: {
'content-type': 'text/event-stream',
'cache-control': 'no-cache',
'connection': 'keep-alive',
},
});
});undefinedundefined
**Note**: Most "version incompatibility" issues turn out to be network configuration problems.
---
**注意**:大多数“版本不兼容”问题实际上是网络配置问题。
---mcp__cloudflare-docs__search_cloudflare_documentationmcp__cloudflare-docs__search_cloudflare_documentation