Loading...
Loading...
Give AI agents eyes to see the internet — scrape Twitter, Reddit, YouTube, GitHub, Bilibili, XiaoHongShu with zero API fees
npx skill4agent add aradotso/ai-agent-skills agent-reach-internet-accessSkill by ara.so — AI Agent Skills collection.
# Basic installation
pip install agent-reach
# The tool will auto-detect and install:
# - Node.js (for some MCP servers)
# - gh CLI (for GitHub)
# - mcporter (for MCP integrations)
# - twitter-cli (for Twitter/X)
# - rdt-cli (for Reddit)
# - yt-dlp (for YouTube/Bilibili)agent-reach doctor# Read a web page
curl https://r.jina.ai/https://example.com
# Get JSON format
curl https://r.jina.ai/https://example.com \
-H "Accept: application/json"
# With images
curl https://r.jina.ai/https://example.com \
-H "X-With-Images-Summary: true"import requests
url = "https://example.com"
response = requests.get(f"https://r.jina.ai/{url}")
markdown_content = response.text
# With options
headers = {
"X-With-Links-Summary": "true",
"X-With-Images-Summary": "true"
}
response = requests.get(f"https://r.jina.ai/{url}", headers=headers)# Get video metadata + subtitles
yt-dlp --dump-json --write-auto-subs --skip-download \
"https://www.youtube.com/watch?v=VIDEO_ID"
# Search YouTube
yt-dlp "ytsearch5:AI agents tutorial" --dump-json
# Get specific subtitle language
yt-dlp --write-subs --sub-lang en --skip-download URL
# Bilibili videos (works same way)
yt-dlp --dump-json "https://www.bilibili.com/video/BV..."import subprocess
import json
def get_video_info(url):
result = subprocess.run(
["yt-dlp", "--dump-json", "--write-auto-subs",
"--skip-download", url],
capture_output=True, text=True
)
return json.loads(result.stdout)
# Search videos
def search_youtube(query, max_results=5):
result = subprocess.run(
["yt-dlp", f"ytsearch{max_results}:{query}", "--dump-json"],
capture_output=True, text=True
)
return [json.loads(line) for line in result.stdout.strip().split('\n')]# Configure (paste exported cookies when prompted)
twitter configure
# Read a tweet
twitter tweet https://twitter.com/user/status/123456789
# Search tweets
twitter search "AI agents" --limit 20
# Get user timeline
twitter timeline @username --limit 50
# Get tweet thread
twitter thread https://twitter.com/user/status/123456789~/.twitter-cli/config.json# Login with cookies
rdt login
# Search posts
rdt search "machine learning" --limit 20
# Read post with comments
rdt post https://reddit.com/r/programming/comments/...
# Get subreddit posts
rdt subreddit r/python --limit 30# Login (opens browser OAuth flow)
gh auth login
# View repository
gh repo view owner/repo
# Search repositories
gh search repos "LLM framework" --limit 20
# Search issues
gh search issues "bug" --repo owner/repo
# View issue
gh issue view 123 --repo owner/repo
# Create issue
gh issue create --repo owner/repo \
--title "Bug report" --body "Description"import subprocess
import json
def search_repos(query, limit=20):
result = subprocess.run(
["gh", "search", "repos", query,
"--limit", str(limit), "--json", "name,description,url"],
capture_output=True, text=True
)
return json.loads(result.stdout)
def get_repo_info(owner_repo):
result = subprocess.run(
["gh", "repo", "view", owner_repo, "--json",
"description,stargazerCount,forkCount,url"],
capture_output=True, text=True
)
return json.loads(result.stdout)# Configure (sets up MCP server)
mcporter add xiaohongshu
# The MCP server provides these tools:
# - search_notes: Search XHS posts
# - get_note_detail: Get post content
# - post_note: Create new post
# - comment_note: Add comment
# - like_note: Like a post~/.mcporter/xiaohongshu/config.json# Get hot videos
bili hot --limit 20
# Search videos
bili search "Python tutorial" --limit 30
# Get video info
bili video BV1xx411c7mD
# Get user dynamics
bili user-dynamic 123456# Add Exa MCP server (no API key needed for basic use)
mcporter add exa
# The MCP server provides:
# - search: AI-powered semantic search
# - find_similar: Find similar pages
# - get_contents: Extract page contentsexport EXA_API_KEY=your_key_hereimport feedparser
# Parse RSS feed
feed = feedparser.parse("https://example.com/feed.xml")
for entry in feed.entries:
print(f"Title: {entry.title}")
print(f"Link: {entry.link}")
print(f"Published: {entry.published}")
print(f"Summary: {entry.summary}")
print("---")# Use Exa search to find WeChat articles
# Articles are auto-extracted when URLs contain mp.weixin.qq.com# Search content
agent-reach weibo search "AI" --type content
# Get hot search
agent-reach weibo hot
# Get user posts
agent-reach weibo user USER_ID
# Get comments
agent-reach weibo comments POST_ID# Get hot topics
agent-reach v2ex hot
# Get node topics
agent-reach v2ex node python
# Get topic details
agent-reach v2ex topic 123456~/.twitter-cli/config.json~/.rdt-cli/cookies.json~/.mcporter/xiaohongshu/config.json# Set proxy environment variables
export HTTP_PROXY=http://proxy-server:port
export HTTPS_PROXY=http://proxy-server:port
# Or configure per-tool
yt-dlp --proxy http://proxy-server:port URL# OAuth login (recommended)
gh auth login
# Or use token
export GITHUB_TOKEN=ghp_your_token_here
gh auth login --with-token <<< $GITHUB_TOKENimport subprocess
import json
def get_twitter_thread(url):
result = subprocess.run(
["twitter", "thread", url],
capture_output=True, text=True
)
return result.stdout
thread_content = get_twitter_thread(
"https://twitter.com/user/status/123456789"
)import subprocess
import json
def get_video_transcript(url):
# Get metadata + subtitles
result = subprocess.run(
["yt-dlp", "--dump-json", "--write-auto-subs",
"--skip-download", url],
capture_output=True, text=True
)
data = json.loads(result.stdout)
# Subtitles are in data['subtitles'] or data['automatic_captions']
return {
'title': data.get('title'),
'description': data.get('description'),
'duration': data.get('duration'),
'subtitles': data.get('automatic_captions', {})
}import subprocess
import json
def search_github_issues(query, repo=None):
cmd = ["gh", "search", "issues", query,
"--limit", "20", "--json",
"title,url,state,body,comments"]
if repo:
cmd.extend(["--repo", repo])
result = subprocess.run(cmd, capture_output=True, text=True)
return json.loads(result.stdout)
# Search across all repos
issues = search_github_issues("memory leak in agents")
# Search specific repo
issues = search_github_issues("bug", repo="openai/gpt-4")# Search and save results
rdt search "your_product_name" --limit 50 > mentions.txt
# Get specific subreddit
rdt subreddit r/artificial --limit 100import requests
def get_clean_content(url):
response = requests.get(
f"https://r.jina.ai/{url}",
headers={
"X-With-Links-Summary": "true",
"X-No-Cache": "true"
}
)
return response.text
content = get_clean_content("https://news.ycombinator.com")agent-reach doctortwitter configurerdt loginHTTP_PROXYHTTPS_PROXYgh auth loginpip install -U yt-dlp# Check mcporter status
mcporter list
# Restart a server
mcporter restart xiaohongshu
# View server logs
mcporter logs exa# Test proxy connection
curl -x http://proxy:port https://api.bilibili.com
# Set for specific command
export HTTPS_PROXY=http://proxy:port
yt-dlp URL# Proxy (for server deployments)
export HTTP_PROXY=http://proxy:port
export HTTPS_PROXY=http://proxy:port
# GitHub
export GITHUB_TOKEN=ghp_xxxxx
# Exa (optional, for advanced features)
export EXA_API_KEY=your_key_here
# Custom config paths (optional)
export AGENT_REACH_CONFIG_DIR=~/.config/agent-reach~/.twitter-cli/~/.rdt-cli/--safe# Update agent-reach
pip install -U agent-reach
# Update individual tools
pip install -U yt-dlp
gh extension upgrade --all
npm update -g mcporter| Platform | Out of Box | After Config | Notes |
|---|---|---|---|
| Web | ✅ | — | Jina Reader, no limits |
| YouTube | ✅ | — | yt-dlp, 1800+ sites |
| RSS | ✅ | — | feedparser |
| GitHub | ✅ | 🔧 Auth for private | gh CLI |
| 🔧 Cookie | 🔧 Cookie | twitter-cli | |
| 🔧 Cookie | 🔧 Cookie | rdt-cli | |
| Bilibili | ✅ Local | 🔧 Proxy (server) | yt-dlp |
| XiaoHongShu | 🔧 Cookie | 🔧 Cookie | xhs-cli via MCP |
| Search | 🔧 MCP | 🔧 API key (optional) | Exa |
| ✅ | — | Via Exa search | |
| ✅ | — | Direct API | |
| V2EX | ✅ | — | Direct API |