Loading...
Loading...
Scrape authenticated websites from WSL2 using Edge CDP. Launches headed Edge for user auth, then headless scraping via Chrome DevTools Protocol. Use when mirroring internal wikis, docs sites, or any site requiring 2FA/SSO login.
npx skill4agent add rysweet/amplihack authenticated-web-scraperWSL2 Windows
┌─────────────────┐ ┌──────────────────────┐
│ Claude Code │ │ Edge Browser │
│ │ kill │ (user's profile) │
│ 1. Kill Edge ───┼──────────>│ │
│ │ launch │ │
│ 2. Launch Edge ─┼──────────>│ --remote-debug:9222 │
│ │ │ --debug-addr:0.0.0.0 │
│ [User auths │ │ │
│ in browser] │ │ CDP WebSocket on :9222│
│ │ cmd.exe │ │
│ 3. Run scraper ─┼──────────>│ node scraper.mjs │
│ │ │ connects localhost:9222│
│ 4. Read output <┼───────────│ writes to C:\Temp\... │
└─────────────────┘ └──────────────────────┘localhost:9222cmd.exe /c "node script.mjs"cmd.exe/mnt/c/Temp/...cmd.exe /c "where node"wscmd.exe /c "cd C:\Temp && npm install ws"/mnt/c/Program Files (x86)/Microsoft/Edge/Application/msedge.exeimport { execSync, spawn } from "child_process";
// CRITICAL: Kill ALL Edge processes first, otherwise debug flags are ignored
execSync('cmd.exe /c "taskkill /F /IM msedge.exe /T"');
await sleep(3000);
const EDGE = "/mnt/c/Program Files (x86)/Microsoft/Edge/Application/msedge.exe";
spawn(
EDGE,
[
"--remote-debugging-port=9222",
"--remote-debugging-address=0.0.0.0",
"--remote-allow-origins=*",
targetUrl,
],
{ detached: true, stdio: "ignore" }
).unref();# Verify CDP is running (must query from Windows side)
powershell.exe -Command "Invoke-RestMethod -Uri http://localhost:9222/json/version"http://localhost:9222/json/listwsRuntime.evaluatePage.navigatePage.enable.txt.html_links.jsoncp script.mjs /mnt/c/Temp/scraper.mjs
cmd.exe /c "cd C:\Temp && node scraper.mjs C:\Temp\output" 2>&1// Navigate to a page
await cdpSend(ws, "Page.navigate", { url });
// Extract text content
await cdpSend(ws, "Runtime.evaluate", {
expression: 'document.querySelector("main").innerText',
returnByValue: true,
});
// Extract links as JSON
await cdpSend(ws, "Runtime.evaluate", {
expression:
'JSON.stringify([...document.querySelectorAll("a[href]")].map(a => ({href: a.href, text: a.textContent.trim()})))',
returnByValue: true,
});
// Get full HTML
await cdpSend(ws, "Runtime.evaluate", {
expression: "document.documentElement.outerHTML",
returnByValue: true,
});--remote-debugging-port127.0.0.1cmd.exeC:\Temp\.../mnt/c/Temp/...investigation-workflowknowledge-builder