Loading...
Loading...
Compare original and translation side by side
Filelist[File]strFilelist[File]Filelist[File]strFilelist[File]FileField(description=...)FileField(description=...)networkxtree-sitterbeautifulsoup4py3-none-anyallowed_domainspdfspreadsheetdocxnetworkxtree-sitterbeautifulsoup4py3-none-anyallowed_domainspdfspreadsheetdocxmax_iterationsallowed_domainssub_lmmax_iterationsallowed_domainssub_lmschema.pyschema.pysignature.pySkill(...)Stage1(documents) --[ExtractedData]--> Stage2(extracted) --[Report]--> Stage3(report)schema.pyschema.pysignature.pySkill(...)Stage1(documents) --[ExtractedData]--> Stage2(extracted) --[Report]--> Stage3(report)my_rlm/
├── __init__.py # Public exports (service class, schema, signature)
├── schema.py # Pydantic models for inputs AND outputs
├── signature.py # DSPy Signature (inputs/outputs + strategy docstring)
├── service.py # DSPy Module wiring signature + PredictRLM + skills
└── skills.py # (optional) Custom skill definitions beyond built-in skillsschema.pysignature.pyservice.py__init__.pyskills.pymy_rlm/
├── __init__.py # 公共导出(服务类、模式、签名)
├── schema.py # 输入和输出的Pydantic模型
├── signature.py # DSPy Signature(输入/输出 + 策略文档字符串)
├── service.py # DSPy Module:连接签名 + PredictRLM + 技能
└── skills.py # (可选)除内置技能外的自定义技能定义schema.pysignature.pyservice.py__init__.pyskills.pyField(description=...)from pydantic import BaseModel, Field
class KeyDate(BaseModel):
"""A key date extracted from a document."""
name: str = Field(description="e.g. 'Submission Deadline', 'Effective Date'")
date: str = Field(description="ISO format date (YYYY-MM-DD)")
time: str | None = Field(
None, description="24-hour format (HH:MM), e.g. '14:00', '09:30'"
)
timezone: str | None = Field(
None, description="Timezone code, e.g. 'EST', 'EDT', 'PST', 'UTC'"
)
class DocumentAnalysis(BaseModel):
"""Structured analysis of a document set."""
report: str = Field(
description="Full analysis as a well-formatted markdown report"
)
key_dates: list[KeyDate] = Field(
default_factory=list, description="Important dates found in the documents"
)Field(description=...)from pydantic import BaseModel, Field
class KeyDate(BaseModel):
"""从文档中提取的关键日期。"""
name: str = Field(description="例如:'提交截止日期'、'生效日期'")
date: str = Field(description="ISO格式日期(YYYY-MM-DD)")
time: str | None = Field(
None, description="24小时制格式(HH:MM),例如:'14:00'、'09:30'"
)
timezone: str | None = Field(
None, description="时区代码,例如:'EST'、'EDT'、'PST'、'UTC'"
)
class DocumentAnalysis(BaseModel):
"""文档集的结构化分析结果。"""
report: str = Field(
description="格式规范的markdown格式完整分析报告"
)
key_dates: list[KeyDate] = Field(
default_factory=list, description="文档中发现的重要日期"
)import dspy
from predict_rlm import File
from .schema import DocumentAnalysis
class AnalyzeDocuments(dspy.Signature):
"""Analyze documents and produce a structured report.
1. **Read the report criteria** (appended below) to understand what
information to extract and in what format.
2. **Survey the documents** to understand what you're working with:
file names, page counts, document types.
3. **Gather information** systematically by rendering pages as images
and using predict() to extract content.
4. **Produce the report** following the format specified in the criteria.
Use tables for structured data, prose for analysis and context.
"""
documents: list[File] = dspy.InputField(
desc="PDF documents to analyze"
)
analysis: DocumentAnalysis = dspy.OutputField(
desc="Structured analysis with markdown report, key dates, and key entities"
)import dspy
from predict_rlm import File
from .schema import DocumentAnalysis
class AnalyzeDocuments(dspy.Signature):
"""分析文档并生成结构化报告。
1. **阅读报告标准**(附在下方),了解需要提取的信息及其格式。
2. **调查文档**,了解工作对象:文件名、页数、文档类型。
3. **系统收集信息**,将页面渲染为图片并使用predict()提取内容。
4. **生成报告**,遵循标准中指定的格式。使用表格呈现结构化数据,使用散文体进行分析和说明上下文。
"""
documents: list[File] = dspy.InputField(
desc="要分析的PDF文档"
)
analysis: DocumentAnalysis = dspy.OutputField(
desc="包含markdown报告、关键日期和关键实体的结构化分析结果"
)import dspy
from predict_rlm import File, PredictRLM
from predict_rlm.skills import pdf as pdf_skill
from .schema import DocumentAnalysis
from .signature import AnalyzeDocuments
class DocumentAnalyzer(dspy.Module):
def __init__(
self,
sub_lm: dspy.LM | str | None = None,
max_iterations: int = 30,
verbose: bool = False,
debug: bool = False,
):
self.sub_lm = sub_lm
self.max_iterations = max_iterations
self.verbose = verbose
self.debug = debug
async def aforward(
self, documents: list[File], criteria: str
) -> DocumentAnalysis:
signature = AnalyzeDocuments.with_instructions(
AnalyzeDocuments.instructions + "\n\n# Task\n\n" + criteria.strip()
)
predictor = PredictRLM(
signature,
sub_lm=self.sub_lm,
skills=[pdf_skill],
max_iterations=self.max_iterations,
verbose=self.verbose,
debug=self.debug,
)
result = await predictor.acall(documents=documents)
return result.analysisfrom predict_rlm.skills import pdf as pdf_skill
from predict_rlm.skills import spreadsheet as spreadsheet_skill
async def aforward(self, documents: list[File]) -> MyOutput:
predictor = PredictRLM(
MySignature,
sub_lm=self.sub_lm,
skills=[pdf_skill, spreadsheet_skill],
tools={"fetch_exchange_rate": fetch_exchange_rate},
...
)import dspy
from predict_rlm import File, PredictRLM
from predict_rlm.skills import pdf as pdf_skill
from .schema import DocumentAnalysis
from .signature import AnalyzeDocuments
class DocumentAnalyzer(dspy.Module):
def __init__(
self,
sub_lm: dspy.LM | str | None = None,
max_iterations: int = 30,
verbose: bool = False,
debug: bool = False,
):
self.sub_lm = sub_lm
self.max_iterations = max_iterations
self.verbose = verbose
self.debug = debug
async def aforward(
self, documents: list[File], criteria: str
) -> DocumentAnalysis:
signature = AnalyzeDocuments.with_instructions(
AnalyzeDocuments.instructions + "\n\n# 任务\n\n" + criteria.strip()
)
predictor = PredictRLM(
signature,
sub_lm=self.sub_lm,
skills=[pdf_skill],
max_iterations=self.max_iterations,
verbose=self.verbose,
debug=self.debug,
)
result = await predictor.acall(documents=documents)
return result.analysisfrom predict_rlm.skills import pdf as pdf_skill
from predict_rlm.skills import spreadsheet as spreadsheet_skill
async def aforward(self, documents: list[File]) -> MyOutput:
predictor = PredictRLM(
MySignature,
sub_lm=self.sub_lm,
skills=[pdf_skill, spreadsheet_skill],
tools={"fetch_exchange_rate": fetch_exchange_rate},
...
)async def aforward(self, documents: list[File]):
# Stage 1: Extract
extractor = PredictRLM(ExtractSignature, sub_lm=self.sub_lm, skills=[pdf_skill])
extracted = await extractor.acall(documents=documents)
# Stage 2: Analyze (uses output from stage 1)
analyzer = PredictRLM(AnalyzeSignature, sub_lm=self.sub_lm, skills=[analysis_skill])
result = await analyzer.acall(data=extracted.data)
return resultasync def aforward(self, documents: list[File]):
# 阶段1:提取
extractor = PredictRLM(ExtractSignature, sub_lm=self.sub_lm, skills=[pdf_skill])
extracted = await extractor.acall(documents=documents)
# 阶段2:分析(使用阶段1的输出)
analyzer = PredictRLM(AnalyzeSignature, sub_lm=self.sub_lm, skills=[analysis_skill])
result = await analyzer.acall(data=extracted.data)
return resultfrom predict_rlm import Skill
from predict_rlm.skills import pdf as pdf_skill
redaction_skill = Skill(
name="redaction",
instructions="""How to redact content from PDFs using pymupdf.from predict_rlm import Skill
from predict_rlm.skills import pdf as pdf_skill
redaction_skill = Skill(
name="redaction",
instructions="""如何使用pymupdf对PDF内容进行编辑。
---
---predict()predict()predict()predict()predict()predict()File/sandbox/input/{field_name}//sandbox/output/{field_name}/from predict_rlm import FileFile/sandbox/input/{field_name}//sandbox/output/{field_name}/from predict_rlm import FileundefinedundefinedPredictRLM(
signature: type[Signature] | str, # DSPy signature class
lm: dspy.LM | str | None = None, # Main LM (code generation)
sub_lm: dspy.LM | str | None = None, # Sub-LM for predict() calls
max_iterations: int = 30,
max_llm_calls: int = 50,
verbose: bool = False,
tools: dict[str, Callable] | list[Callable] | None = None,
allowed_domains: list[str] | None = None,
skills: list[Skill] | None = None,
debug: bool = False,
output_dir: str | Path | None = None,
)lmsub_lm"openai/gpt-5.4"dspy.LMlmdspy.context(lm=...)PredictRLM(
signature: type[Signature] | str, # DSPy签名类
lm: dspy.LM | str | None = None, # 主LM(代码生成)
sub_lm: dspy.LM | str | None = None, # 用于predict()调用的子LM
max_iterations: int = 30,
max_llm_calls: int = 50,
verbose: bool = False,
tools: dict[str, Callable] | list[Callable] | None = None,
allowed_domains: list[str] | None = None,
skills: list[Skill] | None = None,
debug: bool = False,
output_dir: str | Path | None = None,
)lmsub_lm"openai/gpt-5.4"dspy.LMlmdspy.context(lm=...)from predict_rlm import Skill
Skill(
name="my-skill", # Short identifier
instructions="How to approach...", # Prose injected into the RLM prompt
packages=["pandas", "openpyxl"], # PyPI packages installed in the sandbox
modules={"helper": "/path/to/helper.py"}, # Python files mounted as importable modules
tools={"fetch": fetch_fn}, # Host-side callable functions exposed to the RLM
)tools=from predict_rlm import Skill
Skill(
name="my-skill", # 简短标识符
instructions="How to approach...", # 注入RLM提示的指导性文本
packages=["pandas", "openpyxl"], # 在沙箱中安装的PyPI包
modules={"helper": "/path/to/helper.py"}, # 挂载为可导入模块的Python文件
tools={"fetch": fetch_fn}, # 暴露给RLM的主机端可调用函数
)tools=from predict_rlm.skills import pdf as pdf_skill # pymupdf
from predict_rlm.skills import spreadsheet as spreadsheet_skill # openpyxl, pandas, formulas
from predict_rlm.skills import docx as docx_skill # python-docx| Skill | Packages | Modules | What it teaches the RLM |
|---|---|---|---|
| — | Read, render, modify, and redact PDFs | |
| spreadsheet | | | Build and modify Excel workbooks with formulas and formatting |
| docx | | | Read, write, and modify Word documents with tables, formatting, and styles |
from predict_rlm.skills import pdf as pdf_skill # pymupdf
from predict_rlm.skills import spreadsheet as spreadsheet_skill # openpyxl, pandas, formulas
from predict_rlm.skills import docx as docx_skill # python-docx| 技能 | 包 | 模块 | 教授RLM的内容 |
|---|---|---|---|
| — | 读取、渲染、修改和编辑PDF | |
| spreadsheet | | | 使用公式和格式构建及修改Excel工作簿 |
| docx | | | 读取、写入和修改带有表格、格式和样式的Word文档 |
async def fetch_exchange_rate(currency: str, date: str) -> str:
"""Fetch the exchange rate for a currency on a given date.
Args:
currency: ISO currency code (e.g. "EUR", "GBP")
date: Date in YYYY-MM-DD format
Returns:
JSON string with the exchange rate data
"""
async with httpx.AsyncClient() as client:
resp = await client.get(f"https://api.example.com/rates/{currency}/{date}")
return resp.texttools={"name": fn}tools=async def fetch_exchange_rate(currency: str, date: str) -> str:
"""获取指定日期的货币汇率。
参数:
currency: ISO货币代码(例如"EUR"、"GBP")
date: YYYY-MM-DD格式的日期
返回:
包含汇率数据的JSON字符串
"""
async with httpx.AsyncClient() as client:
resp = await client.get(f"https://api.example.com/rates/{currency}/{date}")
return resp.texttools={"name": fn}tools=| Use a Skill when... | Use |
|---|---|
| The RLM needs a package installed in the sandbox | The function must run on the host (API calls, DB queries, filesystem) |
| You need to teach the RLM how to use something | The tool's docstring is self-explanatory |
| The knowledge is reusable across RLMs | It's a single specific function for one RLM |
| 使用Skill的场景... | 使用 |
|---|---|
| RLM需要在沙箱中安装包 | 函数必须在主机上运行(API调用、数据库查询、文件系统操作) |
| 需要教授RLM如何使用某功能 | 工具的文档字符串本身已清晰说明 |
| 知识可在多个RLM间复用 | 仅为单个RLM提供的特定函数 |
predict()result = await predict(
"image: dspy.Image -> items: list[Item]",
instructions="Extract all line items from this invoice page",
image=page_image,
)dspy.Imagepredict()result = await predict(
"image: dspy.Image -> items: list[Item]",
instructions="从发票页面提取所有行项目",
image=page_image,
)dspy.Imagefrom predict_rlm import PredictRLM, Skill, File
from predict_rlm.skills import pdf, spreadsheet, docxfrom predict_rlm import PredictRLM, Skill, File
from predict_rlm.skills import pdf, spreadsheet, docxallowed_domainsallowed_domains