Loading...
Loading...
Use this skill to implement hybrid search combining BM25 keyword search with semantic vector search using Reciprocal Rank Fusion (RRF). **Trigger when user asks to:** - Combine keyword and semantic search - Implement hybrid search or multi-modal retrieval - Use BM25/pg_textsearch with pgvector together - Implement RRF (Reciprocal Rank Fusion) for search - Build search that handles both exact terms and meaning **Keywords:** hybrid search, BM25, pg_textsearch, RRF, reciprocal rank fusion, keyword search, full-text search, reranking, cross-encoder Covers: pg_textsearch BM25 index setup, parallel query patterns, client-side RRF fusion (Python/TypeScript), weighting strategies, and optional ML reranking.
npx skill4agent add timescale/pg-aiguide postgres-hybrid-text-search-- Enable extensions
CREATE EXTENSION IF NOT EXISTS vector;
CREATE EXTENSION IF NOT EXISTS pg_textsearch;
-- Table with both indexes
CREATE TABLE documents (
id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY,
content TEXT NOT NULL,
embedding halfvec(1536) NOT NULL
);
-- BM25 index for keyword search
CREATE INDEX ON documents USING bm25 (content) WITH (text_config = 'english');
-- HNSW index for semantic search
CREATE INDEX ON documents USING hnsw (embedding halfvec_cosine_ops);<@>text_config'french''german'k1bCREATE INDEX ON documents USING bm25 (content) WITH (text_config = 'english', k1 = 1.5, b = 0.8);1 / (k + rank)k-- Query 1: Keyword search (BM25)
-- $1: search text
SELECT id, content FROM documents ORDER BY content <@> $1 LIMIT 50;-- Query 2: Semantic search (separate query, run in parallel)
-- $1: embedding of your search text as halfvec(1536)
SELECT id, content FROM documents ORDER BY embedding <=> $1::halfvec(1536) LIMIT 50;# Client-side RRF fusion (Python)
def rrf_fusion(keyword_results, semantic_results, k=60, limit=10):
scores = {}
content_map = {}
for rank, row in enumerate(keyword_results, start=1):
scores[row['id']] = scores.get(row['id'], 0) + 1 / (k + rank)
content_map[row['id']] = row['content']
for rank, row in enumerate(semantic_results, start=1):
scores[row['id']] = scores.get(row['id'], 0) + 1 / (k + rank)
content_map[row['id']] = row['content']
sorted_ids = sorted(scores, key=scores.get, reverse=True)[:limit]
return [{'id': id, 'content': content_map[id], 'score': scores[id]} for id in sorted_ids]// Client-side RRF fusion (TypeScript)
type Row = { id: number; content: string };
type Result = Row & { score: number };
function rrfFusion(keywordResults: Row[], semanticResults: Row[], k = 60, limit = 10): Result[] {
const scores = new Map<number, number>();
const contentMap = new Map<number, string>();
keywordResults.forEach((row, i) => {
scores.set(row.id, (scores.get(row.id) ?? 0) + 1 / (k + i + 1));
contentMap.set(row.id, row.content);
});
semanticResults.forEach((row, i) => {
scores.set(row.id, (scores.get(row.id) ?? 0) + 1 / (k + i + 1));
contentMap.set(row.id, row.content);
});
return [...scores.entries()]
.sort((a, b) => b[1] - a[1])
.slice(0, limit)
.map(([id, score]) => ({ id, content: contentMap.get(id)!, score }));
}| Parameter | Default | Description |
|---|---|---|
| 60 | Smoothing constant. Higher values reduce rank differences; 60 is standard |
| Candidates per search | 50 | Higher = better recall, more work |
| Final limit | 10 | Results returned after fusion |
# Weight semantic search 2x higher than keyword
keyword_weight = 1.0
semantic_weight = 2.0
for rank, row in enumerate(keyword_results, start=1):
scores[row['id']] = scores.get(row['id'], 0) + keyword_weight / (k + rank)
for rank, row in enumerate(semantic_results, start=1):
scores[row['id']] = scores.get(row['id'], 0) + semantic_weight / (k + rank)// Weight semantic search 2x higher than keyword
const keywordWeight = 1.0;
const semanticWeight = 2.0;
keywordResults.forEach((row, i) => {
scores.set(row.id, (scores.get(row.id) ?? 0) + keywordWeight / (k + i + 1));
});
semanticResults.forEach((row, i) => {
scores.set(row.id, (scores.get(row.id) ?? 0) + semanticWeight / (k + i + 1));
});cross-encoder/ms-marco-MiniLM-L-6-v2# 1. Fuse results with RRF (more candidates for reranking)
candidates = rrf_fusion(keyword_results, semantic_results, limit=100)
# 2. Rerank with cross-encoder
from sentence_transformers import CrossEncoder
reranker = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')
pairs = [(query_text, doc['content']) for doc in candidates]
scores = reranker.predict(pairs)
# 3. Return top 10 by reranker score
reranked = sorted(zip(candidates, scores), key=lambda x: x[1], reverse=True)[:10]import { CohereClientV2 } from 'cohere-ai';
// 1. Fuse results with RRF (more candidates for reranking)
const candidates = rrfFusion(keywordResults, semanticResults, 60, 100);
// 2. Rerank via API (example uses Cohere SDK; Jina, Voyage, and others work similarly)
const cohere = new CohereClientV2({ token: COHERE_API_KEY });
const reranked = await cohere.rerank({
model: 'rerank-v3.5',
query: queryText,
documents: candidates.map(c => c.content),
topN: 10
});
// 3. Map back to original documents
const results = reranked.results.map(r => candidates[r.index]);smallint[]-- Enable pgvectorscale (in addition to pgvector)
CREATE EXTENSION IF NOT EXISTS vectorscale;
-- Table with label column for filtering
CREATE TABLE documents (
id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY,
content TEXT NOT NULL,
embedding halfvec(1536) NOT NULL,
labels smallint[] NOT NULL -- e.g., category IDs, tenant IDs
);
-- StreamingDiskANN index with label filtering
CREATE INDEX ON documents USING diskann (embedding vector_cosine_ops, labels);
-- BM25 index for keyword search
CREATE INDEX ON documents USING bm25 (content) WITH (text_config = 'english');
-- Filtered semantic search using && (array overlap)
SELECT id, content FROM documents
WHERE labels && ARRAY[1, 3]::smallint[]
ORDER BY embedding <=> $1::halfvec(1536) LIMIT 50;-- Force index usage for verification (planner may prefer seqscan on small tables)
SET enable_seqscan = off;
-- Verify BM25 index is used
EXPLAIN SELECT id, content FROM documents ORDER BY content <@> 'search text' LIMIT 10;
-- Look for: Index Scan using ... (bm25)
-- Verify HNSW index is used
EXPLAIN SELECT id, content FROM documents ORDER BY embedding <=> '[0.1, 0.2, ...]'::halfvec(1536) LIMIT 10;
-- Look for: Index Scan using ... (hnsw)
SET enable_seqscan = on; -- Re-enable for normal operation
-- Check index sizes
SELECT indexname, pg_size_pretty(pg_relation_size(indexname::regclass)) AS size
FROM pg_indexes WHERE tablename = 'documents';enable_seqscan = off<@><=>| Symptom | Likely Cause | Fix |
|---|---|---|
| Missing exact matches | Keyword search not returning them | Check BM25 index exists; verify text_config matches content language |
| Poor semantic results | Embedding model mismatch | Ensure query embedding uses same model as stored embeddings |
| Slow queries | Large candidate pools or missing indexes | Reduce inner LIMIT; verify both indexes exist and are used (EXPLAIN) |
| Skewed results | One method dominating | Adjust RRF weights; verify both searches return reasonable candidates |