Loading...
Loading...
Discover article URLs from https://www.eceee.org/all-news/ and extract/persist full article text into SQLite with retry-safe incremental sync. Use when building or maintaining an eceee news fulltext corpus for downstream search, indexing, or summarization.
npx skill4agent add tiangong-ai/skills eceee-news-fulltext-fetchhttps://www.eceee.org/all-news/entry_contentexport ECEEE_NEWS_DB_PATH="/absolute/path/to/eceee_news.db"
python3 scripts/fulltext_fetch.py init-db --db "$ECEEE_NEWS_DB_PATH"python3 scripts/fulltext_fetch.py sync \
--db "$ECEEE_NEWS_DB_PATH" \
--index-url "https://www.eceee.org/all-news/" \
--limit 50 \
--min-chars 180python3 scripts/fulltext_fetch.py sync \
--db "$ECEEE_NEWS_DB_PATH" \
--discover-onlypython3 scripts/fulltext_fetch.py fetch-entry \
--db "$ECEEE_NEWS_DB_PATH" \
--entry-id 123python3 scripts/fulltext_fetch.py fetch-entry \
--db "$ECEEE_NEWS_DB_PATH" \
--url "https://www.eceee.org/all-news/news/example-slug/"python3 scripts/fulltext_fetch.py list-entries --db "$ECEEE_NEWS_DB_PATH" --limit 100
python3 scripts/fulltext_fetch.py list-content --db "$ECEEE_NEWS_DB_PATH" --status ready --limit 100entriesurltitlepublished_atdiscovered_atlast_seen_atentry_contententry_idsource_urlfinal_urlhttp_statusextractortrafilaturahtml-parsernonecontent_textcontent_hashcontent_lengthstatusreadyfailedhttps://www.eceee.org/all-news/newslink/all-news/news/mainContentColumntrafilaturaentry_idreadyreadyfailednext_retry_at--dbECEEE_NEWS_DB_PATH--index-url--discover-only--limit--force--only-failed--since-date--refetch-days--oldest-first--timeout--max-bytes--min-chars--max-retries--retry-backoff-minutes--user-agent--disable-trafilatura--fail-on-errors< --min-charsmax_retriesreferences/schema.mdreferences/fetch-rules.mdassets/config.example.jsonscripts/fulltext_fetch.py