Loading...
Loading...
Control web interfaces with natural language using Page Agent, a JavaScript in-page GUI agent for browser automation
npx skill4agent add aradotso/ai-agent-skills page-agent-web-automationSkill by ara.so — AI Agent Skills collection.
npm install page-agent<script src="https://cdn.jsdelivr.net/npm/page-agent@1.8.2/dist/iife/page-agent.demo.js" crossorigin="true"></script>?autoInit=false<script src="https://cdn.jsdelivr.net/npm/page-agent@1.8.2/dist/iife/page-agent.demo.js?autoInit=false" crossorigin="true"></script>
<script>
const agent = new window.PageAgent({...});
</script>import { PageAgent } from 'page-agent'
const agent = new PageAgent({
model: 'qwen3.5-plus',
baseURL: 'https://dashscope.aliyuncs.com/compatible-mode/v1',
apiKey: process.env.DASHSCOPE_API_KEY,
language: 'en-US',
})// Simple command execution
await agent.execute('Click the login button')
// Form filling
await agent.execute('Fill in the email field with user@example.com')
// Multi-step workflow
await agent.execute('Search for "page agent" and click the first result')
// Navigation
await agent.execute('Go to the settings page')const agent = new PageAgent({
// LLM Configuration
model: 'gpt-4',
baseURL: 'https://api.openai.com/v1',
apiKey: process.env.OPENAI_API_KEY,
// Language settings
language: 'en-US', // or 'zh-CN'
// Optional: Custom system prompt
systemPrompt: 'You are a helpful assistant...',
})const agent = new PageAgent({
model: 'claude-3-5-sonnet-20241022',
baseURL: 'https://api.anthropic.com/v1',
apiKey: process.env.ANTHROPIC_API_KEY,
language: 'en-US',
// Execution options
maxSteps: 20, // Maximum execution steps
timeout: 30000, // Timeout in milliseconds
// Custom element selector strategy
elementSelector: {
includeInvisible: false,
maxElements: 100,
},
// Debug mode
debug: true,
})const agent = new PageAgent({
model: 'gpt-4',
baseURL: 'https://api.openai.com/v1',
apiKey: process.env.OPENAI_API_KEY,
})const agent = new PageAgent({
model: 'claude-3-5-sonnet-20241022',
baseURL: 'https://api.anthropic.com/v1',
apiKey: process.env.ANTHROPIC_API_KEY,
})const agent = new PageAgent({
model: 'qwen3.5-plus',
baseURL: 'https://dashscope.aliyuncs.com/compatible-mode/v1',
apiKey: process.env.DASHSCOPE_API_KEY,
})const agent = new PageAgent({
model: 'gpt-4',
baseURL: process.env.AZURE_OPENAI_ENDPOINT,
apiKey: process.env.AZURE_OPENAI_API_KEY,
})import { PageAgent } from 'page-agent'
class SaaSCopilot {
private agent: PageAgent
constructor() {
this.agent = new PageAgent({
model: 'gpt-4',
baseURL: process.env.LLM_BASE_URL,
apiKey: process.env.LLM_API_KEY,
language: 'en-US',
})
}
async handleUserCommand(command: string) {
try {
const result = await this.agent.execute(command)
return { success: true, result }
} catch (error) {
console.error('Copilot error:', error)
return { success: false, error: error.message }
}
}
async autoFillForm(formData: Record<string, string>) {
const commands = Object.entries(formData).map(
([field, value]) => `Fill ${field} with ${value}`
)
for (const command of commands) {
await this.agent.execute(command)
}
}
}
// Usage
const copilot = new SaaSCopilot()
await copilot.handleUserCommand('Create a new project named "Website Redesign"')import { PageAgent } from 'page-agent'
async function automateFormFilling() {
const agent = new PageAgent({
model: 'qwen3.5-plus',
baseURL: 'https://dashscope.aliyuncs.com/compatible-mode/v1',
apiKey: process.env.DASHSCOPE_API_KEY,
})
// Smart form filling with natural language
await agent.execute(`
Fill out the registration form:
- First name: John
- Last name: Doe
- Email: john.doe@example.com
- Password: Use a strong password
- Check the terms and conditions checkbox
- Click submit
`)
}import { PageAgent } from 'page-agent'
class AccessibilityAgent {
private agent: PageAgent
constructor() {
this.agent = new PageAgent({
model: 'gpt-4',
baseURL: process.env.OPENAI_BASE_URL,
apiKey: process.env.OPENAI_API_KEY,
language: 'en-US',
})
}
async handleVoiceCommand(voiceTranscript: string) {
// Convert voice commands to actions
await this.agent.execute(voiceTranscript)
}
async describeCurrentPage() {
// Use agent to describe page content for screen readers
const description = await this.agent.execute(
'Describe what is visible on this page'
)
return description
}
}import { PageAgent } from 'page-agent'
async function complexWorkflow() {
const agent = new PageAgent({
model: 'claude-3-5-sonnet-20241022',
baseURL: 'https://api.anthropic.com/v1',
apiKey: process.env.ANTHROPIC_API_KEY,
})
// Execute complex multi-step task
await agent.execute(`
1. Navigate to the products page
2. Filter by category "Electronics"
3. Sort by price (low to high)
4. Add the first three items to cart
5. Go to checkout
`)
}import { PageAgent } from 'page-agent'
async function executeWithRetry(command: string, maxRetries = 3) {
const agent = new PageAgent({
model: 'gpt-4',
baseURL: process.env.OPENAI_BASE_URL,
apiKey: process.env.OPENAI_API_KEY,
})
for (let attempt = 1; attempt <= maxRetries; attempt++) {
try {
const result = await agent.execute(command)
return { success: true, result }
} catch (error) {
console.error(`Attempt ${attempt} failed:`, error)
if (attempt === maxRetries) {
return { success: false, error: error.message }
}
// Wait before retry
await new Promise(resolve => setTimeout(resolve, 1000 * attempt))
}
}
}// In your extension background script
import { PageAgent } from 'page-agent'
const agent = new PageAgent({
model: 'gpt-4',
baseURL: process.env.OPENAI_BASE_URL,
apiKey: process.env.OPENAI_API_KEY,
})
// Execute commands across multiple tabs
chrome.runtime.onMessage.addListener((message, sender, sendResponse) => {
if (message.type === 'EXECUTE_COMMAND') {
agent.execute(message.command)
.then(result => sendResponse({ success: true, result }))
.catch(error => sendResponse({ success: false, error: error.message }))
return true // Keep channel open for async response
}
})# Start MCP server
npx page-agent-mcp{
"mcpServers": {
"page-agent": {
"command": "npx",
"args": ["page-agent-mcp"]
}
}
}const agent = new PageAgent({
model: 'gpt-4',
baseURL: process.env.OPENAI_BASE_URL,
apiKey: process.env.OPENAI_API_KEY,
})
// Listen to execution events
agent.on('step', (stepData) => {
console.log('Agent step:', stepData)
})
agent.on('complete', (result) => {
console.log('Execution complete:', result)
})
agent.on('error', (error) => {
console.error('Agent error:', error)
})import { PageAgent } from 'page-agent'
const agent = new PageAgent({
model: 'gpt-4',
baseURL: process.env.OPENAI_BASE_URL,
apiKey: process.env.OPENAI_API_KEY,
})
// Register custom action
agent.registerAction('sendEmail', async (params) => {
// Custom email sending logic
await sendEmail(params.to, params.subject, params.body)
return { success: true }
})
// Use custom action
await agent.execute('Send an email to team@example.com with subject "Update"')display: nonevisibility: hiddenmaxElementsconst agent = new PageAgent({
model: 'gpt-4',
baseURL: process.env.OPENAI_BASE_URL,
apiKey: process.env.OPENAI_API_KEY,
elementSelector: {
includeInvisible: false,
maxElements: 200, // Increase if needed
},
})console.log(process.env.OPENAI_API_KEY)// Debug API configuration
const agent = new PageAgent({
model: 'gpt-4',
baseURL: process.env.OPENAI_BASE_URL,
apiKey: process.env.OPENAI_API_KEY,
debug: true, // Enable debug logging
})const agent = new PageAgent({
model: 'gpt-4',
baseURL: process.env.OPENAI_BASE_URL,
apiKey: process.env.OPENAI_API_KEY,
timeout: 60000, // 60 seconds
maxSteps: 30,
})<meta http-equiv="Content-Security-Policy"
content="script-src 'self' https://cdn.jsdelivr.net;">const agent = new PageAgent({
model: 'gpt-4',
baseURL: process.env.OPENAI_BASE_URL,
apiKey: process.env.OPENAI_API_KEY,
language: 'en-US', // or 'zh-CN'
})