Loading...
Loading...
Use when implementing on-device AI with Apple's Foundation Models framework — prevents context overflow, blocking UI, wrong model use cases, and manual JSON parsing when @Generable should be used. iOS 26+, macOS 26+, iPadOS 26+, axiom-visionOS 26+
npx skill4agent add charleswiltgen/axiom axiom-foundation-modelsaxiom-foundation-models-diagaxiom-foundation-models-ref// ❌ BAD - Asking for world knowledge
let session = LanguageModelSession()
let response = try await session.respond(to: "What's the capital of France?")session.respond()async// ❌ BAD - Blocking main thread
Button("Generate") {
let response = try await session.respond(to: prompt) // UI frozen!
}// ✅ GOOD - Async on background
Button("Generate") {
Task {
let response = try await session.respond(to: prompt)
// Update UI with response
}
}// ❌ BAD - Manual JSON parsing
let prompt = "Generate a person with name and age as JSON"
let response = try await session.respond(to: prompt)
let data = response.content.data(using: .utf8)!
let person = try JSONDecoder().decode(Person.self, from: data) // CRASHES!{firstName: "John"}{name: "John"}// ✅ GOOD - @Generable guarantees structure
@Generable
struct Person {
let name: String
let age: Int
}
let response = try await session.respond(
to: "Generate a person",
generating: Person.self
)
// response.content is type-safe Person instance// ❌ BAD - No availability check
let session = LanguageModelSession() // Might fail!// ✅ GOOD - Check first
switch SystemLanguageModel.default.availability {
case .available:
let session = LanguageModelSession()
// proceed
case .unavailable(let reason):
// Show graceful UI: "AI features require Apple Intelligence"
}// ❌ BAD - Everything in one prompt
let prompt = """
Generate a 7-day itinerary for Tokyo including hotels, restaurants,
activities for each day, transportation details, budget breakdown...
"""
// Exceeds context, poor quality// ✅ GOOD - Handle overflow
do {
let response = try await session.respond(to: prompt)
} catch LanguageModelSession.GenerationError.exceededContextWindowSize {
// Condense transcript and create new session
session = condensedSession(from: session)
}// ✅ GOOD - Handle guardrails
do {
let response = try await session.respond(to: userInput)
} catch LanguageModelSession.GenerationError.guardrailViolation {
// Show message: "I can't help with that request"
}// ✅ GOOD - Check supported languages
let supported = SystemLanguageModel.default.supportedLanguages
guard supported.contains(Locale.current.language) else {
// Show disclaimer
return
}switch SystemLanguageModel.default.availability {
case .available:
// Proceed with implementation
print("✅ Foundation Models available")
case .unavailable(let reason):
// Handle gracefully - show UI message
print("❌ Unavailable: \(reason)")
}| Use Case | Foundation Models? | Alternative |
|---|---|---|
| Summarization | ✅ YES | |
| Extraction (key info from text) | ✅ YES | |
| Classification (categorize content) | ✅ YES | |
| Content tagging | ✅ YES (built-in adapter!) | |
| World knowledge | ❌ NO | ChatGPT, Claude, Gemini |
| Complex reasoning | ❌ NO | Server LLMs |
| Mathematical computation | ❌ NO | Calculator, symbolic math |
@Generable
struct SearchSuggestions {
@Guide(description: "Suggested search terms", .count(4))
var searchTerms: [String]
}let stream = session.streamResponse(
to: prompt,
generating: Itinerary.self
)
for try await partial in stream {
// Update UI incrementally
self.itinerary = partial
}Need on-device AI?
│
├─ World knowledge/reasoning?
│ └─ ❌ NOT Foundation Models
│ → Use ChatGPT, Claude, Gemini, etc.
│ → Reason: 3B parameter model, not trained for encyclopedic knowledge
│
├─ Summarization?
│ └─ ✅ YES → Pattern 1 (Basic Session)
│ → Example: Summarize article, condense email
│ → Time: 10-15 minutes
│
├─ Structured extraction?
│ └─ ✅ YES → Pattern 2 (@Generable)
│ → Example: Extract name, date, amount from invoice
│ → Time: 15-20 minutes
│
├─ Content tagging?
│ └─ ✅ YES → Pattern 3 (contentTagging use case)
│ → Example: Tag article topics, extract entities
│ → Time: 10 minutes
│
├─ Need external data?
│ └─ ✅ YES → Pattern 4 (Tool calling)
│ → Example: Fetch weather, query contacts, get locations
│ → Time: 20-30 minutes
│
├─ Long generation?
│ └─ ✅ YES → Pattern 5 (Streaming)
│ → Example: Generate itinerary, create story
│ → Time: 15-20 minutes
│
└─ Dynamic schemas (runtime-defined structure)?
└─ ✅ YES → Pattern 6 (DynamicGenerationSchema)
→ Example: Level creator, user-defined forms
→ Time: 30-40 minutesimport FoundationModels
func respond(userInput: String) async throws -> String {
let session = LanguageModelSession(instructions: """
You are a friendly barista in a pixel art coffee shop.
Respond to the player's question concisely.
"""
)
let response = try await session.respond(to: userInput)
return response.content
}let session = LanguageModelSession()
// First turn
let first = try await session.respond(to: "Write a haiku about fishing")
print(first.content)
// "Silent waters gleam,
// Casting lines in morning mist—
// Hope in every cast."
// Second turn - model remembers context
let second = try await session.respond(to: "Do another one about golf")
print(second.content)
// "Silent morning dew,
// Caddies guide with gentle words—
// Paths of patience tread."
// Inspect full transcript
print(session.transcript)let transcript = session.transcript
// Use for:
// - Debugging generation issues
// - Showing conversation history in UI
// - Exporting chat logsdo {
let response = try await session.respond(to: prompt)
} catch LanguageModelSession.GenerationError.guardrailViolation {
// Content policy triggered
print("Cannot generate that content")
} catch LanguageModelSession.GenerationError.unsupportedLanguageOrLocale {
// Language not supported
print("Please use English or another supported language")
}// ❌ BAD - Unreliable
let prompt = "Generate a person with name and age as JSON"
let response = try await session.respond(to: prompt)
// Might get: {"firstName": "John"} when you expect {"name": "John"}
// Might get invalid JSON entirely
// Must parse manually, prone to crashes@Generable
struct Person {
let name: String
let age: Int
}
let session = LanguageModelSession()
let response = try await session.respond(
to: "Generate a person",
generating: Person.self
)
let person = response.content // Type-safe Person instance!@GenerableStringIntFloatDoubleBool@Generable
struct SearchSuggestions {
var searchTerms: [String]
}@Generable
struct Itinerary {
var destination: String
var days: [DayPlan] // Composed type
}
@Generable
struct DayPlan {
var activities: [String]
}@Generable
struct NPC {
let name: String
let encounter: Encounter
@Generable
enum Encounter {
case orderCoffee(String)
case wantToTalkToManager(complaint: String)
}
}@Generable
struct Itinerary {
var destination: String
var relatedItineraries: [Itinerary] // Recursive!
}@Generable
struct NPC {
@Guide(description: "A full name with first and last")
let name: String
}@Generable
struct Character {
@Guide(.range(1...10))
let level: Int
}@Generable
struct Suggestions {
@Guide(description: "Suggested search terms", .count(4))
var searchTerms: [String]
}@Generable
struct Result {
@Guide(.maximumCount(3))
let topics: [String]
}@Generable
struct NPC {
@Guide(Regex {
Capture {
ChoiceOf {
"Mr"
"Mrs"
}
}
". "
OneOrMore(.word)
})
let name: String
}
// Output: {name: "Mrs. Brewster"}@Generable
struct Itinerary {
var destination: String // Generated first
var days: [DayPlan] // Generated second
var summary: String // Generated last
}// User waits 3-5 seconds seeing nothing
let response = try await session.respond(to: prompt, generating: Itinerary.self)
// Then entire result appears at once@Generable
struct Itinerary {
var name: String
var days: [DayPlan]
}
let stream = session.streamResponse(
to: "Generate a 3-day itinerary to Mt. Fuji",
generating: Itinerary.self
)
for try await partial in stream {
print(partial) // Incrementally updated
}@GenerablePartiallyGenerated// Compiler generates:
extension Itinerary {
struct PartiallyGenerated {
var name: String? // All properties optional!
var days: [DayPlan]?
}
}struct ItineraryView: View {
let session: LanguageModelSession
@State private var itinerary: Itinerary.PartiallyGenerated?
var body: some View {
VStack {
if let name = itinerary?.name {
Text(name)
.font(.title)
}
if let days = itinerary?.days {
ForEach(days, id: \.self) { day in
DayView(day: day)
}
}
Button("Generate") {
Task {
let stream = session.streamResponse(
to: "Generate 3-day itinerary to Tokyo",
generating: Itinerary.self
)
for try await partial in stream {
self.itinerary = partial
}
}
}
}
}
}if let name = itinerary?.name {
Text(name)
.transition(.opacity)
}
if let days = itinerary?.days {
ForEach(days, id: \.self) { day in
DayView(day: day)
.transition(.slide)
}
}// ✅ GOOD - Stable identity
ForEach(days, id: \.id) { day in
DayView(day: day)
}
// ❌ BAD - Identity changes, animations break
ForEach(days.indices, id: \.self) { index in
DayView(day: days[index])
}// ✅ GOOD - Title appears first, summary last
@Generable
struct Itinerary {
var name: String // Shows first
var days: [DayPlan] // Shows second
var summary: String // Shows last (can reference days)
}
// ❌ BAD - Summary before content
@Generable
struct Itinerary {
var summary: String // Doesn't make sense before days!
var days: [DayPlan]
}// ❌ BAD - Model will hallucinate
let response = try await session.respond(
to: "What's the temperature in Cupertino?"
)
// Output: "It's about 72°F" (completely made up!)import FoundationModels
import WeatherKit
import CoreLocation
struct GetWeatherTool: Tool {
let name = "getWeather"
let description = "Retrieve latest weather for a city"
@Generable
struct Arguments {
@Guide(description: "The city to fetch weather for")
var city: String
}
func call(arguments: Arguments) async throws -> ToolOutput {
let places = try await CLGeocoder().geocodeAddressString(arguments.city)
let weather = try await WeatherService.shared.weather(for: places.first!.location!)
let temp = weather.currentWeather.temperature.value
return ToolOutput("\(arguments.city)'s temperature is \(temp) degrees.")
}
}let session = LanguageModelSession(
tools: [GetWeatherTool()],
instructions: "Help user with weather forecasts."
)
let response = try await session.respond(
to: "What's the temperature in Cupertino?"
)
print(response.content)
// "It's 71°F in Cupertino!"GetWeatherToolprotocol Tool {
var name: String { get }
var description: String { get }
associatedtype Arguments: Generable
func call(arguments: Arguments) async throws -> ToolOutput
}getWeatherfindContact@Generablereturn ToolOutput("Temperature is 71°F")let content = GeneratedContent(properties: ["temperature": 71])
return ToolOutput(content)let session = LanguageModelSession(
tools: [
GetWeatherTool(),
FindRestaurantTool(),
FindHotelTool()
],
instructions: "Plan travel itineraries."
)
let response = try await session.respond(
to: "Create a 2-day plan for Tokyo"
)
// Model autonomously decides:
// - Calls FindRestaurantTool for dining
// - Calls FindHotelTool for accommodation
// - Calls GetWeatherTool to suggest activitiesclass FindContactTool: Tool {
let name = "findContact"
let description = "Find contact from age generation"
var pickedContacts = Set<String>() // State!
@Generable
struct Arguments {
let generation: Generation
@Generable
enum Generation {
case babyBoomers
case genX
case millennial
case genZ
}
}
func call(arguments: Arguments) async throws -> ToolOutput {
// Use Contacts API
var contacts = fetchContacts(for: arguments.generation)
// Remove already picked
contacts.removeAll(where: { pickedContacts.contains($0.name) })
guard let picked = contacts.randomElement() else {
return ToolOutput("No more contacts")
}
pickedContacts.insert(picked.name) // Update state
return ToolOutput(picked.name)
}
}call1. Session initialized with tools
2. User prompt: "What's Tokyo's weather?"
3. Model analyzes: "Need weather data"
4. Model generates tool call: getWeather(city: "Tokyo")
5. Framework calls your tool's call() method
6. Your tool fetches real data from API
7. Tool output inserted into transcript
8. Model generates final response using tool outputstruct FindPointsOfInterestTool: Tool {
let name = "findPointsOfInterest"
let description = "Find restaurants, museums, parks near a landmark"
let landmark: String
@Generable
struct Arguments {
let category: Category
@Generable
enum Category {
case restaurant
case museum
case park
case marina
}
}
func call(arguments: Arguments) async throws -> ToolOutput {
// Use MapKit
let request = MKLocalSearch.Request()
request.naturalLanguageQuery = "\(arguments.category) near \(landmark)"
let search = MKLocalSearch(request: request)
let response = try await search.start()
let names = response.mapItems.prefix(5).map { $0.name ?? "" }
return ToolOutput(names.joined(separator: ", "))
}
}// Long conversation...
for i in 1...100 {
let response = try await session.respond(to: "Question \(i)")
// Eventually...
// Error: exceededContextWindowSize
}var session = LanguageModelSession()
do {
let response = try await session.respond(to: prompt)
print(response.content)
} catch LanguageModelSession.GenerationError.exceededContextWindowSize {
// New session, no history
session = LanguageModelSession()
}var session = LanguageModelSession()
do {
let response = try await session.respond(to: prompt)
} catch LanguageModelSession.GenerationError.exceededContextWindowSize {
// New session with condensed history
session = condensedSession(from: session)
}
func condensedSession(from previous: LanguageModelSession) -> LanguageModelSession {
let allEntries = previous.transcript.entries
var condensedEntries = [Transcript.Entry]()
// Always include first entry (instructions)
if let first = allEntries.first {
condensedEntries.append(first)
// Include last entry (most recent context)
if allEntries.count > 1, let last = allEntries.last {
condensedEntries.append(last)
}
}
let condensedTranscript = Transcript(entries: condensedEntries)
return LanguageModelSession(transcript: condensedTranscript)
}func condensedSession(from previous: LanguageModelSession) -> LanguageModelSession {
let entries = previous.transcript.entries
guard entries.count > 3 else {
return LanguageModelSession(transcript: previous.transcript)
}
// Keep first (instructions) and last (recent)
var condensedEntries = [entries.first!]
// Summarize middle entries
let middleEntries = Array(entries[1..<entries.count-1])
let summaryPrompt = """
Summarize this conversation in 2-3 sentences:
\(middleEntries.map { $0.content }.joined(separator: "\n"))
"""
// Use Foundation Models itself to summarize!
let summarySession = LanguageModelSession()
let summary = try await summarySession.respond(to: summaryPrompt)
condensedEntries.append(Transcript.Entry(content: summary.content))
condensedEntries.append(entries.last!)
return LanguageModelSession(transcript: Transcript(entries: condensedEntries))
}// ❌ BAD
let prompt = """
I want you to generate a comprehensive detailed analysis of this article
with multiple sections including summary, key points, sentiment analysis,
main arguments, counter arguments, logical fallacies, and conclusions...
"""
// ✅ GOOD
let prompt = "Summarize this article's key points"// ❌ BAD - One massive generation
let response = try await session.respond(
to: "Create 7-day itinerary with hotels, restaurants, activities..."
)
// ✅ GOOD - Multiple smaller generations
let overview = try await session.respond(to: "Create high-level 7-day plan")
for day in 1...7 {
let details = try await session.respond(to: "Detail activities for day \(day)")
}let response = try await session.respond(
to: prompt,
options: GenerationOptions(sampling: .greedy)
)let response = try await session.respond(
to: prompt,
options: GenerationOptions(temperature: 0.5)
)let response = try await session.respond(
to: prompt,
options: GenerationOptions(temperature: 2.0)
)0.1-0.51.01.5-2.0"I understand ChatGPT delivers great results for certain tasks. However,
for this feature, Foundation Models is the right choice for three critical reasons:
1. **Privacy**: This feature processes [medical notes/financial data/personal content].
Users expect this data stays on-device. Sending to external API violates that trust
and may have compliance issues.
2. **Cost**: At scale, ChatGPT API calls cost $X per 1000 requests. Foundation Models
is free. For Y million users, that's $Z annually we can avoid.
3. **Offline capability**: Foundation Models works without internet. Users in airplane
mode or with poor signal still get full functionality.
**When to use ChatGPT**: If this feature required world knowledge or complex reasoning,
ChatGPT would be the right choice. But this is [summarization/extraction/classification],
which is exactly what Foundation Models is optimized for.
**Time estimate**: Foundation Models implementation: 15-20 minutes.
Privacy compliance review for ChatGPT: 2-4 weeks."{firstName: "John"}{name: "John"}keyNotFoundHere's the person: {name: "John", age: 30}// ❌ BAD - Will fail
let prompt = "Generate a person with name and age as JSON"
let response = try await session.respond(to: prompt)
// Model outputs: {"firstName": "John Smith", "years": 30}
// Your code expects: {"name": ..., "age": ...}
// CRASH: keyNotFound(name)// ✅ GOOD - 15 minutes, guaranteed to work
@Generable
struct Person {
let name: String
let age: Int
}
let response = try await session.respond(
to: "Generate a person",
generating: Person.self
)
// response.content is type-safe Person, always valid"I understand JSON parsing feels familiar, but for LLM output, @Generable is objectively
better for three technical reasons:
1. **Constrained decoding guarantees structure**: Model can ONLY generate valid Person
instances. Impossible to get wrong keys, invalid JSON, or missing fields.
2. **No parsing code needed**: Framework handles parsing automatically. Zero chance of
parsing bugs.
3. **Compile-time safety**: If we change Person struct, compiler catches all issues.
Manual JSON parsing = runtime crashes.
**Real cost**: Manual JSON approach will hit edge cases. Debugging 'keyNotFound' crashes
takes 2-4 hours. @Generable implementation takes 15 minutes and has zero parsing bugs.
**Analogy**: This is like choosing Swift over Objective-C for new code. Both work, but
Swift's type safety prevents entire categories of bugs."// ❌ BAD - One massive prompt
let prompt = """
Extract from this invoice:
- Vendor name
- Invoice date
- Total amount
- Line items (description, quantity, price each)
- Payment terms
- Due date
- Tax amount
...
"""
// 4 seconds, poor quality, might exceed context
// ✅ GOOD - Structured extraction with focused prompts
@Generable
struct InvoiceBasics {
let vendor: String
let date: String
let amount: Double
}
let basics = try await session.respond(
to: "Extract vendor, date, and amount",
generating: InvoiceBasics.self
) // 0.5 seconds, axiom-high quality
@Generable
struct LineItem {
let description: String
let quantity: Int
let price: Double
}
let items = try await session.respond(
to: "Extract line items",
generating: [LineItem].self
) // 1 second, axiom-high quality
// Total: 1.5 seconds, better quality, graceful partial failures"I understand the appeal of one simple API call. However, this specific task requires
a different approach:
1. **Context limits**: Invoice + complex extraction prompt will likely exceed 4096 token
limit. Multiple focused prompts stay well under limit.
2. **Better quality**: Model performs better with focused tasks. 'Extract vendor name'
gets 95%+ accuracy. 'Extract everything' gets 60-70%.
3. **Faster perceived performance**: Multiple prompts with streaming show progressive
results. Users see vendor name in 0.5s, not waiting 5s for everything.
4. **Graceful degradation**: If line items fail, we still have basics. All-or-nothing
approach means total failure.
**Implementation**: Breaking into 3-4 focused extractions takes 30 minutes. One big
prompt takes 2-3 hours debugging why it hits context limit and produces poor results."class ViewModel: ObservableObject {
private var session: LanguageModelSession?
init() {
// Prewarm on init, not when user taps button
Task {
self.session = LanguageModelSession(instructions: "...")
}
}
func generate(prompt: String) async throws -> String {
let response = try await session!.respond(to: prompt)
return response.content
}
}let firstResponse = try await session.respond(
to: "Generate first person",
generating: Person.self
// Schema inserted automatically
)
// Subsequent requests with SAME schema
let secondResponse = try await session.respond(
to: "Generate another person",
generating: Person.self,
options: GenerationOptions(includeSchemaInPrompt: false)
)// ✅ GOOD - Title shows immediately
@Generable
struct Article {
var title: String // Shows in 0.2s
var summary: String // Shows in 0.8s
var fullText: String // Shows in 2.5s
}
// ❌ BAD - Wait for everything
@Generable
struct Article {
var fullText: String // User waits 2.5s
var title: String
var summary: String
}exceededContextWindowSizeguardrailViolationunsupportedLanguageOrLocaleTask {}