Loading...
Loading...
Pull structured data from messy text using AI. Use when parsing invoices, extracting fields from emails, scraping entities from articles, converting unstructured text to JSON, extracting contact info, parsing resumes, reading forms, or any task where messy text goes in and clean structured data comes out. Powered by DSPy extraction.
npx skill4agent add lebsral/dspy-programming-not-prompting-lms-skills ai-parsing-dataimport dspy
class ParseContact(dspy.Signature):
"""Extract contact information from the text."""
text: str = dspy.InputField(desc="Text containing contact information")
name: str = dspy.OutputField(desc="Person's full name")
email: str = dspy.OutputField(desc="Email address")
phone: str = dspy.OutputField(desc="Phone number")
parser = dspy.ChainOfThought(ParseContact)from pydantic import BaseModel, Field
class Address(BaseModel):
street: str
city: str
state: str
zip_code: str
class Person(BaseModel):
name: str
age: int
address: Address
skills: list[str]
class ParsePerson(dspy.Signature):
"""Extract person details from the text."""
text: str = dspy.InputField()
person: Person = dspy.OutputField()
parser = dspy.ChainOfThought(ParsePerson)
result = parser(text="John Doe, 32, lives at 123 Main St, Springfield IL 62701. Expert in Python and SQL.")
print(result.person) # Person(name='John Doe', age=32, ...)class Entity(BaseModel):
name: str
type: str = Field(description="Type: person, organization, location, or date")
class ParseEntities(dspy.Signature):
"""Extract all named entities from the text."""
text: str = dspy.InputField()
entities: list[Entity] = dspy.OutputField(desc="All entities found in the text")
parser = dspy.ChainOfThought(ParseEntities)class ValidatedParser(dspy.Module):
def __init__(self):
self.parse = dspy.ChainOfThought(ParseContact)
def forward(self, text):
result = self.parse(text=text)
dspy.Suggest(
"@" in result.email,
"Email should contain @"
)
dspy.Suggest(
len(result.phone) >= 10,
"Phone number should have at least 10 digits"
)
return resultdef parsing_metric(example, prediction, trace=None):
"""Score based on field-level accuracy."""
correct = 0
total = 0
for field in ["name", "email", "phone"]:
expected = getattr(example, field, None)
predicted = getattr(prediction, field, None)
if expected is not None:
total += 1
if predicted and expected.lower().strip() == predicted.lower().strip():
correct += 1
return correct / total if total > 0 else 0.0optimizer = dspy.BootstrapFewShot(metric=parsing_metric, max_bootstrapped_demos=4)
optimized = optimizer.compile(parser, trainset=trainset)list[Model]ChainOfThoughtdspy.Suggestdspy.Assert/ai-summarizing/ai-decomposing-tasks/ai-improving-accuracy