Loading...
Loading...
Expert in deploying and customizing a modular RAG system with MCP protocol for AI assistants
npx skill4agent add aradotso/mcp-skills modular-rag-mcp-serverSkill by ara.so — MCP Skills collection.
query_knowledge_hublist_collectionsget_document_summary# Clone the repository
git clone https://github.com/jerry-ai-dev/MODULAR-RAG-MCP-SERVER.git
cd MODULAR-RAG-MCP-SERVER
# In VS Code with Copilot/Claude, type in chat:
setup# Create virtual environment
python -m venv venv
source venv/bin/activate # On Windows: venv\Scripts\activate
# Install dependencies
pip install -r requirements.txt
# Copy and configure environment variables
cp .env.example .env
# Edit .env with your API keyssrc/core/config.pyfrom src.core.config import get_config
config = get_config()
# Access configuration
llm_provider = config.llm.provider # "openai", "anthropic", etc.
embedding_provider = config.embedding.provider
vector_store_type = config.vector_store.type # "qdrant", "chroma", etc..env# LLM Provider
OPENAI_API_KEY=your_openai_key_here
ANTHROPIC_API_KEY=your_anthropic_key_here
# Embedding Provider
COHERE_API_KEY=your_cohere_key_here
# Reranker (optional)
JINA_API_KEY=your_jina_key_here
# Vector Store (if using cloud)
QDRANT_URL=your_qdrant_url
QDRANT_API_KEY=your_qdrant_keysrc/core/config.pyclass LLMConfig:
provider: str = "openai" # or "anthropic", "cohere"
model: str = "gpt-4"
temperature: float = 0.7
max_tokens: int = 2048
class EmbeddingConfig:
provider: str = "openai" # or "cohere", "huggingface"
model: str = "text-embedding-3-small"
dimension: int = 1536
class RerankerConfig:
enabled: bool = True
provider: str = "cohere" # or "jina", "cross-encoder"
model: str = "rerank-english-v3.0"
top_k: int = 5from src.ingestion.pipeline import IngestionPipeline
from src.core.config import get_config
config = get_config()
pipeline = IngestionPipeline(config)
# Ingest a PDF document
result = pipeline.ingest_document(
file_path="path/to/document.pdf",
collection_name="my_collection",
metadata={"source": "internal_docs", "version": "1.0"}
)
print(f"Ingested {result['chunks_created']} chunks")
print(f"Ingestion ID: {result['ingestion_id']}")from src.retrieval.hybrid_search import HybridSearchRetriever
from src.core.config import get_config
config = get_config()
retriever = HybridSearchRetriever(config)
# Perform hybrid search
results = retriever.retrieve(
query="How does the authentication system work?",
collection_name="my_collection",
top_k=10, # Initial retrieval
rerank_top_k=5 # After reranking
)
for idx, result in enumerate(results):
print(f"{idx+1}. Score: {result.score:.4f}")
print(f" Text: {result.text[:100]}...")
print(f" Metadata: {result.metadata}")# Run MCP server (usually configured in Claude Desktop config)
python src/mcp/server.pyclaude_desktop_config.json{
"mcpServers": {
"rag-knowledge-hub": {
"command": "python",
"args": ["/path/to/project/src/mcp/server.py"],
"env": {
"PYTHONPATH": "/path/to/project"
}
}
}
}# When Claude calls this tool:
{
"query": "What are the deployment requirements?",
"collection_name": "my_collection",
"top_k": 5
}# Returns: ["collection1", "collection2", ...]{
"document_id": "doc_123",
"collection_name": "my_collection"
}streamlit run src/dashboard/app.pyfrom src.evaluation.evaluator import RAGEvaluator
from src.core.config import get_config
config = get_config()
evaluator = RAGEvaluator(config)
# Prepare test dataset
test_cases = [
{
"query": "What is the API rate limit?",
"expected_answer": "The API rate limit is 1000 requests per hour.",
"ground_truth_context": ["Rate limits are set to 1000 req/hour..."]
},
# ... more test cases
]
# Run evaluation
results = evaluator.evaluate(
test_cases=test_cases,
collection_name="my_collection",
metrics=["faithfulness", "answer_relevancy", "context_precision"]
)
print(f"Average Faithfulness: {results['faithfulness']:.3f}")
print(f"Average Answer Relevancy: {results['answer_relevancy']:.3f}")# In src/core/config.py
class EmbeddingConfig:
provider: str = "cohere" # Changed from "openai"
model: str = "embed-english-v3.0"
dimension: int = 1024 # Cohere dimensionfrom src.core.config import get_config
config = get_config()
config.embedding.provider = "cohere"
config.embedding.model = "embed-english-v3.0"
config.embedding.dimension = 1024from src.ingestion.splitters.base import BaseSplitter
from typing import List
class CustomSplitter(BaseSplitter):
def __init__(self, chunk_size: int = 500, overlap: int = 50):
self.chunk_size = chunk_size
self.overlap = overlap
def split(self, text: str, metadata: dict = None) -> List[dict]:
chunks = []
start = 0
while start < len(text):
end = start + self.chunk_size
chunk_text = text[start:end]
chunks.append({
"text": chunk_text,
"metadata": {
**(metadata or {}),
"chunk_index": len(chunks),
"start_char": start
}
})
start += self.chunk_size - self.overlap
return chunks
# Register and use
from src.ingestion.pipeline import IngestionPipeline
pipeline = IngestionPipeline(config)
pipeline.splitter = CustomSplitter(chunk_size=300, overlap=30)from src.retrieval.rerankers.base import BaseReranker
from typing import List
class CustomReranker(BaseReranker):
def rerank(self, query: str, documents: List[dict], top_k: int = 5) -> List[dict]:
# Custom reranking logic
scored_docs = []
for doc in documents:
# Example: simple keyword matching score
score = sum(1 for word in query.lower().split()
if word in doc['text'].lower())
scored_docs.append({**doc, 'rerank_score': score})
# Sort by score and return top_k
scored_docs.sort(key=lambda x: x['rerank_score'], reverse=True)
return scored_docs[:top_k]
# Use in retriever
from src.retrieval.hybrid_search import HybridSearchRetriever
retriever = HybridSearchRetriever(config)
retriever.reranker = CustomReranker()from src.ingestion.pipeline import IngestionPipeline
pipeline = IngestionPipeline(config)
# Enable image captioning
result = pipeline.ingest_document(
file_path="document_with_images.pdf",
collection_name="multimodal_docs",
enable_image_captioning=True, # Vision LLM generates descriptions
metadata={"type": "technical_manual"}
)
# Images are converted to text descriptions and embedded with surrounding textimport os
from pathlib import Path
pipeline = IngestionPipeline(config)
docs_dir = Path("./documents")
results = []
for pdf_file in docs_dir.glob("*.pdf"):
try:
result = pipeline.ingest_document(
file_path=str(pdf_file),
collection_name="batch_collection",
metadata={"filename": pdf_file.name}
)
results.append(result)
print(f"✓ Ingested {pdf_file.name}")
except Exception as e:
print(f"✗ Failed {pdf_file.name}: {e}")
print(f"Total successful: {len(results)}")~/Library/Application Support/Claude/claude_desktop_config.json{
"mcpServers": {
"rag-knowledge-hub": {
"command": "/usr/bin/python3",
"args": ["/absolute/path/to/project/src/mcp/server.py"],
"env": {
"PYTHONPATH": "/absolute/path/to/project",
"OPENAI_API_KEY": "sk-..."
}
}
}
}config.ingestion.chunk_size = 300 # Reduce for precision
config.ingestion.chunk_overlap = 50config.reranker.enabled = True
config.reranker.provider = "cohere"
config.reranker.top_k = 5from src.retrieval.hybrid_search import HybridSearchRetriever
retriever = HybridSearchRetriever(config)
retriever.dense_weight = 0.7 # Semantic search
retriever.sparse_weight = 0.3 # BM25 exact match# Create golden test set
evaluator = RAGEvaluator(config)
results = evaluator.evaluate(test_cases, collection_name="my_collection")
# Adjust parameters based on metrics# .env
QDRANT_URL=https://your-cluster.qdrant.io
QDRANT_API_KEY=your_api_key# Start Qdrant with Docker
docker run -p 6333:6333 qdrant/qdrant
# In config
QDRANT_URL=http://localhost:6333# config.py
class VectorStoreConfig:
type: str = "chroma"
persist_directory: str = "./chroma_db"# Increase chunk size, reduce batch size
config.ingestion.chunk_size = 800
config.ingestion.batch_size = 10 # Embed 10 chunks at a timepipeline = IngestionPipeline(config)
pipeline.process_streaming(
file_path="large_document.pdf",
collection_name="large_docs"
)config.llm.max_retries = 5
config.llm.retry_delay = 2.0 # seconds# OpenAI allows batching up to 2048 texts
config.embedding.batch_size = 100from src.core.config import get_config
from src.retrieval.hybrid_search import HybridSearchRetriever
from src.generation.generator import Generator
from src.evaluation.evaluator import RAGEvaluator
config = get_config()
# Custom retriever configuration
retriever = HybridSearchRetriever(config)
retriever.dense_weight = 0.6
retriever.sparse_weight = 0.4
# Custom generator
generator = Generator(config)
generator.system_prompt = "You are a helpful technical assistant..."
# Run custom RAG
def custom_rag_query(query: str, collection: str):
# Retrieve
contexts = retriever.retrieve(query, collection, top_k=5)
# Generate
response = generator.generate(
query=query,
contexts=[c.text for c in contexts],
metadata=[c.metadata for c in contexts]
)
# Evaluate (optional)
evaluator = RAGEvaluator(config)
metrics = evaluator.evaluate_single(
query=query,
response=response,
contexts=[c.text for c in contexts]
)
return {
"response": response,
"contexts": contexts,
"metrics": metrics
}
result = custom_rag_query("What are the system requirements?", "docs")
print(result["response"])from src.rag_system import RAGSystem
from src.core.config import get_config
# Initialize
config = get_config()
rag = RAGSystem(config)
# In your FastAPI/Flask app
@app.post("/ask")
async def ask_question(query: str, collection: str = "default"):
result = rag.query(
query=query,
collection_name=collection,
top_k=5
)
return {
"answer": result["response"],
"sources": result["contexts"],
"confidence": result["metrics"]["answer_relevancy"]
}maindevclean-startmaindevclean-start