Loading...
Loading...
Scrape webpage content, extract metadata, download images, and prepare for import/migration to AEM Edge Delivery Services. Returns analysis JSON with paths, metadata, cleaned HTML, and local images.
npx skill4agent add adobe/skills scrape-webpagenpm install playwrightnpx playwright install chromiumcd .claude/skills/scrape-webpage/scripts && npm installnode .claude/skills/scrape-webpage/scripts/analyze-webpage.js "https://example.com/page" --output ./import-workresources/web-page-analysis.md./import-work/metadata.json./import-work/screenshot.png./import-work/cleaned.html./import-work/images/ls -lh ./import-work/metadata.json ./import-work/screenshot.png ./import-work/cleaned.html
ls -lh ./import-work/images/ | head -5{
"url": "https://example.com/page",
"timestamp": "2025-01-12T10:30:00.000Z",
"paths": {
"documentPath": "/us/en/about",
"htmlFilePath": "us/en/about.plain.html",
"mdFilePath": "us/en/about.md",
"dirPath": "us/en",
"filename": "about"
},
"screenshot": "./import-work/screenshot.png",
"html": {
"filePath": "./import-work/cleaned.html",
"size": 45230
},
"metadata": {
"title": "Page Title",
"description": "Page description",
"og:image": "https://example.com/image.jpg",
"canonical": "https://example.com/page"
},
"images": {
"count": 15,
"mapping": {
"https://example.com/hero.jpg": "./images/a1b2c3d4e5f6.jpg",
"https://example.com/logo.webp": "./images/f6e5d4c3b2a1.png"
},
"stats": {
"total": 15,
"converted": 3,
"skipped": 12,
"failed": 0
}
}
}paths.documentPathpaths.htmlFilePathimages.mappingmetadatanpx playwright install chromiumcd .claude/skills/scrape-webpage/scripts && npm install