Loading...
Loading...
Build GraphRAG retrieval pipelines on Neo4j using the neo4j-graphrag Python package (formerly neo4j-genai). Covers retriever selection (VectorRetriever, HybridRetriever, VectorCypherRetriever, HybridCypherRetriever, Text2CypherRetriever), retrieval_query Cypher fragments, query_params, pipeline wiring (GraphRAG + LLM), embedder setup, index creation, and LangChain/LlamaIndex integration. Does NOT handle KG construction from documents — use neo4j-document-import-skill. Does NOT handle plain vector search — use neo4j-vector-index-skill. Does NOT handle GDS analytics — use neo4j-gds-skill. Does NOT handle agent memory — use neo4j-agent-memory-skill.
npx skill4agent add neo4j-contrib/neo4j-skills neo4j-graphrag-skillneo4j-graphragretrieval_queryGraphRAGlangchain-neo4jneo4j-document-import-skillneo4j-vector-index-skillneo4j-gds-skillneo4j-agent-memory-skillneo4j-cypher-skillpip install neo4j-graphrag
# LLM/embedder extras (choose one or more):
pip install neo4j-graphrag[openai] # OpenAI + AzureOpenAI
pip install neo4j-graphrag[google] # VertexAI
pip install neo4j-graphrag[anthropic] # Anthropic
pip install neo4j-graphrag[ollama] # Ollama (local)
pip install neo4j-graphrag[cohere] # Cohere
pip install neo4j-graphrag[sentence-transformers] # local embeddings
# BREAKING: old package `neo4j-genai` is deprecated — imports also changed:
pip uninstall neo4j-genai
# neo4j_genai.retrievers → neo4j_graphrag.retrievers
# neo4j_genai.generation → neo4j_graphrag.generationHas fulltext index? YES → Hybrid variants (better recall)
NO → Vector variants (baseline)
Needs graph context after vector lookup? YES → Cypher variants
NO → plain variants
For natural-language-to-Cypher? → Text2CypherRetriever (no embedder needed)
For multi-tool LLM routing? → ToolsRetriever
Using external vector DB? → WeaviateNeo4jRetriever / PineconeNeo4jRetriever / QdrantNeo4jRetriever| Retriever | Vector | Fulltext | Graph | When to use |
|---|---|---|---|---|
| ✓ | — | — | Baseline; quick start |
| ✓ | ✓ | — | Better recall; no graph context |
| ✓ | — | ✓ | GraphRAG without fulltext |
| ✓ | ✓ | ✓ | Production GraphRAG — default choice |
| — | — | ✓ | LLM generates Cypher; no embedder |
| varies | varies | varies | Multi-retriever LLM routing |
// Vector index (all retrievers need this)
CREATE VECTOR INDEX chunk_embedding IF NOT EXISTS
FOR (c:Chunk) ON (c.embedding)
OPTIONS { indexConfig: {
`vector.dimensions`: 1536,
`vector.similarity_function`: 'cosine'
} };
// Fulltext index (Hybrid retrievers only)
CREATE FULLTEXT INDEX chunk_fulltext IF NOT EXISTS
FOR (c:Chunk) ON EACH [c.text];
// Confirm ONLINE before ingesting:
SHOW INDEXES YIELD name, state
WHERE name IN ['chunk_embedding', 'chunk_fulltext']
RETURN name, state;
// Both must show state = 'ONLINE'from neo4j import GraphDatabase
from neo4j_graphrag.retrievers import HybridCypherRetriever
from neo4j_graphrag.embeddings import OpenAIEmbeddings
from neo4j_graphrag.generation import GraphRAG
from neo4j_graphrag.llm import OpenAILLM
driver = GraphDatabase.driver("neo4j+s://<host>:7687", auth=("neo4j", "<password>"))
embedder = OpenAIEmbeddings(model="text-embedding-3-small") # 1536 dims — match index
# retrieval_query: Cypher fragment executed after vector lookup.
# `node` = matched node from vector index (AUTO-INJECTED — do NOT declare)
# `score` = similarity float (AUTO-INJECTED — do NOT declare)
# MUST include RETURN clause. MUST return `score` column.
retrieval_query = """
MATCH (node)<-[:HAS_CHUNK]-(article:Article)
OPTIONAL MATCH (article)-[:MENTIONS]->(org:Organization)
RETURN node.text AS chunk_text,
article.title AS article_title,
collect(DISTINCT org.name) AS mentioned_organizations,
score
"""
retriever = HybridCypherRetriever(
driver=driver,
vector_index_name="chunk_embedding",
fulltext_index_name="chunk_fulltext",
retrieval_query=retrieval_query,
embedder=embedder,
)
llm = OpenAILLM(model_name="gpt-4o", model_params={"temperature": 0})
rag = GraphRAG(retriever=retriever, llm=llm)
response = rag.search(query_text="Who does Alice work for?", retriever_config={"top_k": 5})
print(response.answer)retrieval_queryretriever_configretrieval_query = """
MATCH (node)<-[:HAS_CHUNK]-(article:Article)-[:MENTIONS]->(org:Organization)
WHERE org.name = $entity_name
RETURN node.text AS chunk_text, article.title AS title, score
"""
retriever = VectorCypherRetriever(
driver=driver,
index_name="chunk_embedding",
retrieval_query=retrieval_query,
embedder=embedder,
)
# Pass query_params inside retriever_config on each search:
response = rag.search(
query_text="What happened at Apple?",
retriever_config={"top_k": 10, "query_params": {"entity_name": "Apple"}},
)
# Direct retriever call (without GraphRAG wrapper):
results = retriever.search(
query_text="What happened at Apple?",
top_k=10,
query_params={"entity_name": "Apple"},
)# Filter reduces candidate pool BEFORE vector similarity ranking
results = retriever.search(
query_text="quarterly results",
top_k=5,
filters={"date": {"$gte": "2024-01-01"}},
)
# Supported operators: $eq $ne $lt $lte $gt $gte $between $in $like $ilikefrom neo4j_graphrag.retrievers import VectorRetriever
retriever = VectorRetriever(
driver=driver,
index_name="chunk_embedding",
embedder=embedder,
return_properties=["text", "source", "page_number"], # subset of node props
)
# No retrieval_query needed — returns node properties directlyfrom neo4j_graphrag.retrievers import Text2CypherRetriever
# LLM generates Cypher from natural language; no vector index needed
retriever = Text2CypherRetriever(
driver=driver,
llm=OpenAILLM(model_name="gpt-4o"),
neo4j_schema=None, # auto-fetched from db; or pass string
examples=["Q: Who works at Neo4j? A: MATCH (p:Person)-[:WORKS_AT]->(c:Company {name:'Neo4j'}) RETURN p.name"],
)
results = retriever.search(query_text="Which people work at Neo4j?")neo4j_schema=Nonefrom neo4j_graphrag.generation.prompts import RagTemplate
custom_template = RagTemplate(
template="""Answer the question using ONLY the context below.
Context: {context}
Question: {query_text}
Answer:""",
expected_inputs=["context", "query_text"],
)
rag = GraphRAG(retriever=retriever, llm=llm, prompt_template=custom_template)| Error | Cause | Fix |
|---|---|---|
| Old package installed | |
| Missing | Add |
| | Add |
| Declared | Remove it — |
| Wrong variable name in retrieval_query | Use exactly |
| Embedding dimension mismatch | Index created with different dims | Drop index, recreate with correct |
| Index name typo or index not ONLINE | |
| Low recall on hybrid search | Fulltext index not on right property | Fulltext index must cover same property as |
| Large corpus with many entities | Set |
| Calling | Wrap in |
| Empty KG after pipeline run | | Temporarily set |
from neo4j_graphrag.embeddings import (
OpenAIEmbeddings, # OpenAI text-embedding-3-*
AzureOpenAIEmbeddings, # Azure-hosted OpenAI
VertexAIEmbeddings, # Google Vertex AI
MistralAIEmbeddings, # Mistral
CohereEmbeddings, # Cohere embed-v3
OllamaEmbeddings, # Local via Ollama
SentenceTransformerEmbeddings, # Local HuggingFace
)
# Dimension mapping (must match vector index):
# text-embedding-3-small → 1536
# text-embedding-3-large → 3072
# text-embedding-ada-002 → 1536
# all-MiniLM-L6-v2 → 384from neo4j_graphrag.llm import (
OpenAILLM,
AzureOpenAILLM,
AnthropicLLM,
VertexAILLM,
MistralAILLM,
CohereLLM,
OllamaLLM,
)
# Any LangChain chat model also accepted by GraphRAGresponse = rag.search(
query_text="...",
retriever_config={
"top_k": 5, # candidates per search (default 5)
"query_params": {...}, # passed to retrieval_query Cypher
"filters": {...}, # pre-filter before vector search
},
return_context=False, # True: include retrieved chunks in response
response_fallback="No context found.", # returned when retriever yields nothing
)
# response.answer → str
# response.retriever_result → RawSearchResult (if return_context=True)retriever.search()top_ktop_kretrieval_queryLIMITretrieval_queryfiltersSHOW INDEXES YIELD name, optionsvector.dimensionsneo4j-genaineo4j-graphragvector.dimensionsretrieval_querynodescorenodescoreretrieval_queryquery_paramsretriever_configretriever.search()retriever_config={"top_k": N}rag.search()