Loading...
Loading...
Google search MCP server with academic PDF extraction, no API key required, CAPTCHA recovery, and parallel search capabilities
npx skill4agent add aradotso/mcp-skills google-surf-mcp-searchSkill by ara.so — MCP Skills collection.
~/.claude.json{
"mcpServers": {
"google-surf": {
"command": "npx",
"args": ["-y", "google-surf-mcp"]
}
}
}git clone https://github.com/HarimxChoi/google-surf-mcp
cd google-surf-mcp
npm install
npm run build{
"mcpServers": {
"google-surf": {
"command": "node",
"args": ["/absolute/path/to/google-surf-mcp/build/index.js"]
}
}
}npm run bootstrapCHROME_PATH=/usr/bin/google-chrome SURF_TZ=America/New_York npm run bootstrapsearchquerylimitresults[]{ title, url, snippet }droppeddropped_reasons[]cache_hit// Via MCP tool call
{
"query": "typescript async patterns",
"limit": 5
}{
"results": [
{
"title": "Async/Await in TypeScript",
"url": "https://example.com/typescript-async",
"snippet": "Learn how to use async/await patterns..."
}
],
"dropped": 2,
"dropped_reasons": ["sponsored", "knowledge_panel"],
"cache_hit": false
}search_parallelquerieslimitsearch{
"queries": [
"mcp server best practices",
"playwright stealth techniques",
"typescript pdf extraction",
"google search scraping 2026"
],
"limit": 3
}extracturlmax_charsmode"full""abstract""metadata"fullabstractmetadatacontenttitleexcerptlengthis_pdfpage_countextraction_quality"high""medium""low"// Extract full academic paper
{
"url": "https://arxiv.org/pdf/2301.12345.pdf",
"mode": "full"
}
// Quick abstract for triage
{
"url": "https://nature.com/articles/s41586-023-12345-6",
"mode": "abstract",
"max_chars": 2000
}{
"content": "# Paper Title\n\nAbstract: This paper presents...",
"title": "Novel Approach to AI Safety",
"excerpt": "This paper presents a novel approach...",
"length": 45678,
"is_pdf": true,
"page_count": 12,
"extraction_quality": "high"
}search_extractquerylimitmax_charsmode"abstract""full"mode="abstract"mode="full"results[]extracted_content// Triage mode (default, token-efficient)
{
"query": "claude mcp server tutorials",
"limit": 5,
"mode": "abstract"
}
// Full extraction (when you need complete content)
{
"query": "machine learning interpretability survey",
"limit": 3,
"mode": "full",
"max_chars": 50000
}{
"results": [
{
"title": "Building MCP Servers",
"url": "https://example.com/mcp-tutorial",
"snippet": "Complete guide to MCP servers...",
"extracted_content": {
"content": "# Building MCP Servers\n\nMCP (Model Context Protocol)...",
"title": "Building MCP Servers",
"length": 1523,
"is_pdf": false,
"extraction_quality": "high"
}
}
]
}healthstatus"healthy""degraded"cascade_moderate_limitercache_statsconfig// No parameters
{}# Chrome binary path (auto-detected if not set)
CHROME_PATH=/usr/bin/google-chrome
# Profile storage (default: ~/.google-surf-mcp)
SURF_PROFILE_ROOT=/custom/path/profiles
# Browser locale and timezone
SURF_LOCALE=en-US
SURF_TZ=America/New_York# Run Chrome visibly (for demos/debugging)
SURF_HEADLESS=false
# Remote debugging mode (headless servers)
SURF_REMOTE_DEBUG=true
# Cloud/serverless mode (fail-fast on CAPTCHA)
SURF_CLOUD_MODE=true# Idle close timeout (ms), 0 disables
SURF_IDLE_CLOSE_MS=30000
# Rate limit (requests per minute)
SURF_RATE_LIMIT_PER_MIN=10
# Search cache TTL (ms), 0 disables
SURF_CACHE_TTL_SEARCH_MS=86400000
# Cache LRU size
SURF_CACHE_MAX_ENTRIES=1000# Allow private IPs in extract (default: false)
SURF_ALLOW_PRIVATE=true
# Ignore TLS errors (auto-on in cloud mode)
SURF_INSECURE_TLS=false
# Disable sandbox (auto-on in cloud mode)
SURF_NO_SANDBOX=false# Disable cascade fallback (pin single mode)
SURF_CASCADE_DISABLED=true
SURF_USE_STEALTH=true
# Humanlike browsing (off | background | inline)
SURF_HUMANLIKE_MODE=background// Step 1: Search and triage with abstracts
const triage = await use_mcp_tool("google-surf", "search_extract", {
query: "transformer architecture improvements 2026",
limit: 10,
mode: "abstract"
});
// Step 2: Extract full text for promising papers
const topPapers = triage.results.slice(0, 3);
const fullTexts = await Promise.all(
topPapers.map(paper =>
use_mcp_tool("google-surf", "extract", {
url: paper.url,
mode: "full",
max_chars: 100000
})
)
);const relatedTopics = await use_mcp_tool("google-surf", "search_parallel", {
queries: [
"MCP server authentication patterns",
"MCP server error handling",
"MCP server rate limiting",
"MCP server caching strategies"
],
limit: 5
});
// Process results by topic
relatedTopics.forEach((topicResults, index) => {
console.log(`Topic ${index + 1}:`, topicResults.results.length, "results");
});// 1. Find relevant sources
const sources = await use_mcp_tool("google-surf", "search", {
query: "typescript best practices 2026",
limit: 20
});
// 2. Extract abstracts to filter quality
const abstracts = await Promise.all(
sources.results.map(result =>
use_mcp_tool("google-surf", "extract", {
url: result.url,
mode: "abstract"
})
)
);
// 3. Full extraction for high-quality sources
const highQuality = abstracts
.filter(a => a.extraction_quality === "high")
.slice(0, 5);
const fullContent = await Promise.all(
highQuality.map(a =>
use_mcp_tool("google-surf", "extract", {
url: a.url,
mode: "full"
})
)
);// Check server health before batch operations
const health = await use_mcp_tool("google-surf", "health", {});
if (health.status !== "healthy") {
console.warn("Server degraded, reducing concurrency");
}
const rateLimit = health.rate_limiter.requests_per_minute;
if (rateLimit > 8) {
// Wait before starting batch
await sleep(60000);
}# No config needed - default behaviorSURF_HEADLESS=falseSURF_HEADLESS=true
SURF_REMOTE_DEBUG=truechrome://inspectssh -L 9222:localhost:9222 your-serverSURF_CLOUD_MODE=trueCAPTCHA_REQUIREDChrome binary not found# Find your Chrome installation
which google-chrome
which chromium
# Set explicitly
CHROME_PATH=/usr/bin/google-chrome npm run bootstrapnpm run bootstrapSURF_RATE_LIMIT_PER_MIN=5 npx google-surf-mcpconst health = await use_mcp_tool("google-surf", "health", {});
console.log(health.cascade_mode); // Should cycle: none → stealth → humanlikeconst health = await use_mcp_tool("google-surf", "health", {});
// Check rate_limiter.requests_per_minute
// Check cache_stats for anomaliesSURF_CACHE_TTL_SEARCH_MS=0 npx google-surf-mcpconst results = await use_mcp_tool("google-surf", "search", {
query: "test query"
});
console.log(results.dropped_reasons);
// If all results dropped as "sponsored", selector may be stale// Try metadata mode first
const meta = await use_mcp_tool("google-surf", "extract", {
url: "https://example.com/paper.pdf",
mode: "metadata"
});
console.log(meta.page_count); // If 0, PDF is inaccessible# Allow private IPs (only if you control the URLs)
SURF_ALLOW_PRIVATE=true npx google-surf-mcpconst result = await use_mcp_tool("google-surf", "extract", {
url: "https://example.com/article"
});
if (result.extraction_quality === "low") {
// HTML was poorly structured or blocked
// Try fetching directly via other means
}# Keep contexts warm longer
SURF_IDLE_CLOSE_MS=120000 npx google-surf-mcpsearch_parallelconst queries = [...100queries];
const batches = chunk(queries, 10);
for (const batch of batches) {
const results = await use_mcp_tool("google-surf", "search_parallel", {
queries: batch
});
// Process batch
await sleep(5000); // Respect rate limits
}# Disable search caching
SURF_CACHE_TTL_SEARCH_MS=0
# Increase cache size
SURF_CACHE_MAX_ENTRIES=5000
# Custom cache location
SURF_CACHE_ROOT=/tmp/google-surf-cachesearchextract# Default: 10 requests/minute
SURF_RATE_LIMIT_PER_MIN=10
# Conservative for shared IPs
SURF_RATE_LIMIT_PER_MIN=5
# Aggressive (may trigger CAPTCHAs)
SURF_RATE_LIMIT_PER_MIN=20const health = await use_mcp_tool("google-surf", "health", {});
console.log(health.rate_limiter);
// { requests_per_minute: 7, limit: 10, window_start: "2026-05-17T..." }search_extractmode="abstract"mode="full"search_parallelsearchextraction_quality{ error }SURF_CLOUD_MODE=false