Loading...
Loading...
MongoDB document modeling, aggregation pipeline optimization, sharding strategies, replica set configuration, connection pool management, and indexing patterns. Use this skill for MongoDB-specific issues, NoSQL performance optimization, and schema design.
npx skill4agent add cin12211/orca-q mongodb-expert// Analyze document sizes and structure
db.collection.stats();
db.collection.findOne(); // Inspect document structure
db.collection.aggregate([{ $project: { size: { $bsonSize: "$$ROOT" } } }]);
// Check for large arrays
db.collection.find({}, { arrayField: { $slice: 1 } }).forEach(doc => {
print(doc.arrayField.length);
});// ANTI-PATTERN: Unbounded array growth
const AuthorSchema = {
name: String,
posts: [ObjectId] // Can grow unbounded
};
// BETTER: Reference from the 'many' side
const PostSchema = {
title: String,
author: ObjectId,
content: String
};// Analyze aggregation performance
db.collection.aggregate([
{ $match: { category: "electronics" } },
{ $group: { _id: "$brand", total: { $sum: "$price" } } }
]).explain("executionStats");
// Check for index usage in aggregation
db.collection.aggregate([{ $indexStats: {} }]);// OPTIMAL: Early filtering with $match
db.collection.aggregate([
{ $match: { date: { $gte: new Date("2024-01-01") } } }, // Use index early
{ $project: { _id: 1, amount: 1, category: 1 } }, // Reduce document size
{ $group: { _id: "$category", total: { $sum: "$amount" } } }
]);// GOOD: Group by shard key for pushdown optimization
db.collection.aggregate([
{ $group: { _id: "$shardKeyField", count: { $sum: 1 } } }
]);
// OPTIMAL: Compound shard key grouping
db.collection.aggregate([
{ $group: {
_id: {
region: "$region", // Part of shard key
category: "$category" // Part of shard key
},
total: { $sum: "$amount" }
}}
]);// Analyze index usage
db.collection.find({ category: "electronics", price: { $lt: 100 } }).explain("executionStats");
// Check index statistics
db.collection.aggregate([{ $indexStats: {} }]);
// Find unused indexes
db.collection.getIndexes().forEach(index => {
const stats = db.collection.aggregate([{ $indexStats: {} }]).toArray()
.find(stat => stat.name === index.name);
if (stats.accesses.ops === 0) {
print("Unused index: " + index.name);
}
});// Query: { status: "active", createdAt: { $gte: date } }, sort: { priority: -1 }
// OPTIMAL index order following ESR rule:
db.collection.createIndex({
status: 1, // Equality
priority: -1, // Sort
createdAt: 1 // Range
});// Multi-condition query optimization
db.collection.createIndex({ "category": 1, "price": -1, "rating": 1 });
// Partial index for conditional data
db.collection.createIndex(
{ "email": 1 },
{
partialFilterExpression: {
"email": { $exists: true, $ne: null }
}
}
);
// Text index for search functionality
db.collection.createIndex({
"title": "text",
"description": "text"
}, {
weights: { "title": 10, "description": 1 }
});// Monitor connection pool in Node.js
const client = new MongoClient(uri, {
maxPoolSize: 10,
monitorCommands: true
});
// Connection pool monitoring
client.on('connectionPoolCreated', (event) => {
console.log('Pool created:', event.address);
});
client.on('connectionCheckedOut', (event) => {
console.log('Connection checked out:', event.connectionId);
});
client.on('connectionPoolCleared', (event) => {
console.log('Pool cleared:', event.address);
});const client = new MongoClient(uri, {
maxPoolSize: 10, // Max concurrent connections
minPoolSize: 5, // Maintain minimum connections
maxIdleTimeMS: 30000, // Close idle connections after 30s
maxConnecting: 2, // Limit concurrent connection attempts
connectTimeoutMS: 10000,
socketTimeoutMS: 10000,
serverSelectionTimeoutMS: 5000
});// Pool size formula: (peak concurrent operations * 1.2) + buffer
// For 50 concurrent operations: maxPoolSize = (50 * 1.2) + 10 = 70
// Consider: replica set members, read preferences, write concerns// Performance profiling
db.setProfilingLevel(1, { slowms: 100 });
db.system.profile.find().sort({ ts: -1 }).limit(5);
// Query execution analysis
db.collection.find({
category: "electronics",
price: { $gte: 100, $lte: 500 }
}).hint({ category: 1, price: 1 }).explain("executionStats");
// Index effectiveness measurement
const stats = db.collection.find(query).explain("executionStats");
const ratio = stats.executionStats.totalDocsExamined / stats.executionStats.totalDocsReturned;
// Aim for ratio close to 1.0// Only return necessary fields
db.collection.find(
{ category: "electronics" },
{ name: 1, price: 1, _id: 0 } // Reduce network overhead
);
// Use covered queries when possible
db.collection.createIndex({ category: 1, name: 1, price: 1 });
db.collection.find(
{ category: "electronics" },
{ name: 1, price: 1, _id: 0 }
); // Entirely satisfied by index// Cursor-based pagination (better than skip/limit)
let lastId = null;
const pageSize = 20;
function getNextPage(lastId) {
const query = lastId ? { _id: { $gt: lastId } } : {};
return db.collection.find(query).sort({ _id: 1 }).limit(pageSize);
}// Analyze shard distribution
sh.status();
db.stats();
// Check chunk distribution
db.chunks.find().forEach(chunk => {
print("Shard: " + chunk.shard + ", Range: " + tojson(chunk.min) + " to " + tojson(chunk.max));
});
// Monitor balancer activity
sh.getBalancerState();
sh.getBalancerHost();// GOOD: User ID with timestamp (high cardinality, even distribution)
{ "userId": 1, "timestamp": 1 }
// POOR: Status field (low cardinality, uneven distribution)
{ "status": 1 } // Only a few possible values
// OPTIMAL: Compound shard key for better distribution
{ "region": 1, "customerId": 1, "date": 1 }// Target single shard with shard key in query
db.collection.find({ userId: "user123", date: { $gte: startDate } });
// Avoid scatter-gather queries
db.collection.find({ email: "user@example.com" }); // Scans all shards if email not in shard key// Replica set health monitoring
rs.status();
rs.conf();
rs.printReplicationInfo();
// Monitor oplog
db.oplog.rs.find().sort({ $natural: -1 }).limit(1);
// Check replica lag
rs.status().members.forEach(member => {
if (member.state === 2) { // Secondary
const lag = (rs.status().date - member.optimeDate) / 1000;
print("Member " + member.name + " lag: " + lag + " seconds");
}
});// Read preference strategies
const readPrefs = {
primary: "primary", // Strong consistency
primaryPreferred: "primaryPreferred", // Fallback to secondary
secondary: "secondary", // Load distribution
secondaryPreferred: "secondaryPreferred", // Prefer secondary
nearest: "nearest" // Lowest latency
};
// Tag-based read preferences for geographic routing
db.collection.find().readPref("secondary", [{ "datacenter": "west" }]);// Comprehensive replica set connection
const uri = "mongodb://user:pass@host1:27017,host2:27017,host3:27017/database?" +
"replicaSet=rs0&" +
"readPreference=secondaryPreferred&" +
"readPreferenceTags=datacenter:west&" +
"w=majority&" +
"wtimeout=5000";// Monitor transaction metrics
db.serverStatus().transactions;
// Check current operations
db.currentOp({ "active": true, "secs_running": { "$gt": 5 } });
// Analyze transaction conflicts
db.adminCommand("serverStatus").transactions.retriedCommandsCount;const session = client.startSession();
try {
await session.withTransaction(async () => {
const accounts = session.client.db("bank").collection("accounts");
// Keep transaction scope minimal
await accounts.updateOne(
{ _id: fromAccountId },
{ $inc: { balance: -amount } },
{ session }
);
await accounts.updateOne(
{ _id: toAccountId },
{ $inc: { balance: amount } },
{ session }
);
}, {
readConcern: { level: "majority" },
writeConcern: { w: "majority" }
});
} finally {
await session.endSession();
}async function withTransactionRetry(session, operation) {
while (true) {
try {
await session.withTransaction(operation);
break;
} catch (error) {
if (error.hasErrorLabel('TransientTransactionError')) {
console.log('Retrying transaction...');
continue;
}
throw error;
}
}
}// Instead of sparse schema with many null fields
const productSchema = {
name: String,
attributes: [
{ key: "color", value: "red" },
{ key: "size", value: "large" },
{ key: "material", value: "cotton" }
]
};// Group time-series data into buckets
const sensorDataBucket = {
sensor_id: ObjectId("..."),
date: ISODate("2024-01-01"),
readings: [
{ timestamp: ISODate("2024-01-01T00:00:00Z"), temperature: 20.1 },
{ timestamp: ISODate("2024-01-01T00:05:00Z"), temperature: 20.3 }
// ... up to 1000 readings per bucket
]
};const orderSchema = {
items: [
{ product: "laptop", price: 999.99, quantity: 2 },
{ product: "mouse", price: 29.99, quantity: 1 }
],
// Pre-computed totals
subtotal: 2029.97,
tax: 162.40,
total: 2192.37
};const movieSchema = {
title: "The Matrix",
year: 1999,
// Subset of most important cast members
mainCast: ["Keanu Reeves", "Laurence Fishburne"],
// Reference to complete cast collection
fullCastRef: ObjectId("...")
};// Create index that covers the entire query
db.products.createIndex({ category: 1, name: 1, price: 1 });
// Query is entirely satisfied by index
db.products.find(
{ category: "electronics" },
{ name: 1, price: 1, _id: 0 }
);// Index only documents that match filter
db.users.createIndex(
{ email: 1 },
{
partialFilterExpression: {
email: { $exists: true, $type: "string" }
}
}
);db.collection.aggregate([{ $project: { size: { $bsonSize: "$$ROOT" } } }])$match$projectallowDiskUseexplain("executionStats")// 1. Aggregation Pipeline Optimization
db.collection.aggregate([
{ $match: { date: { $gte: startDate } } }, // Early filtering
{ $project: { _id: 1, amount: 1, type: 1 } }, // Reduce document size
{ $group: { _id: "$type", total: { $sum: "$amount" } } }
]);
// 2. Compound Index Strategy
db.collection.createIndex({
status: 1, // Equality
priority: -1, // Sort
createdAt: 1 // Range
});
// 3. Connection Pool Monitoring
const client = new MongoClient(uri, {
maxPoolSize: 10,
minPoolSize: 5,
maxIdleTimeMS: 30000
});
// 4. Read Preference Optimization
db.collection.find().readPref("secondaryPreferred", [{ region: "us-west" }]);db.dropDatabase()db.collection.drop()allowDiskUse: true