Loading...
Loading...
Context layer for data agents - builds semantic layer, wiki, and warehouse metadata to enable accurate AI-powered analytics queries
npx skill4agent add aradotso/ai-agent-skills ktx-ai-data-agents-context-layerSkill by ara.so — AI Agent Skills collection.
npm install -g @kaelio/ktxnpm install --save-dev @kaelio/ktxnpxnpx @kaelio/ktx setup# Initialize or resume a ktx project
ktx setup
# Check project readiness
ktx statusktx setupktx project: /home/user/analytics
Project ready: yes
LLM ready: yes (claude-sonnet-4-6)
Embeddings ready: yes (text-embedding-3-small)
Databases configured: yes (warehouse)
Context sources configured: yes (dbt_main)
ktx context built: yes
Agent integration ready: yes (codex:project)my-project/
├── ktx.yaml # Project configuration
├── semantic-layer/<connection-id>/ # YAML semantic sources (commit)
├── wiki/global/ # Shared business context (commit)
├── wiki/user/<user-id>/ # User-scoped notes (commit)
├── raw-sources/<connection-id>/ # Ingest artifacts and reports (commit)
└── .ktx/ # Local state and secrets (git-ignore)ktx.yamlsemantic-layer/wiki/raw-sources/.ktx/project:
name: my-analytics-project
version: 1.0.0
llm:
provider: anthropic # or google-vertex, ai-gateway, claude-code
model: claude-sonnet-4-6
# API key stored in .ktx/secrets.json
embeddings:
provider: openai
model: text-embedding-3-small
# API key stored in .ktx/secrets.json
databases:
warehouse:
type: postgres # or snowflake, bigquery, clickhouse, mysql, mssql, sqlite
host: db.example.com
port: 5432
database: analytics
schema: public
# credentials stored in .ktx/secrets.json
context_sources:
dbt_main:
type: dbt
manifest_path: ./target/manifest.json
looker_main:
type: looker
base_url: https://company.looker.com
# API credentials in .ktx/secrets.json
notion_wiki:
type: notion
# API token in .ktx/secrets.jsonexport KTX_PROJECT_DIR=/path/to/project
export ANTHROPIC_API_KEY=your-key-here
export OPENAI_API_KEY=your-key-here
export GOOGLE_APPLICATION_CREDENTIALS=/path/to/service-account.json# Ingest all configured sources
ktx ingest
# Ingest specific connection
ktx ingest --connection warehouse
# Ingest specific context source
ktx ingest --source dbt_main
# Force re-ingestion (skip cache)
ktx ingest --force# Search semantic layer (metrics, dimensions, entities)
ktx sl "revenue"
ktx sl "customer lifetime value"
# Search wiki pages
ktx wiki "refund policy"
ktx wiki "how to calculate churn"
# List all semantic sources
ktx sl list
# Show specific metric details
ktx sl show "monthly_recurring_revenue"# Start MCP server (required for agent integration)
ktx mcp start
# Start with specific project
ktx mcp start --project-dir /path/to/project
# Check MCP status
ktx mcp status# Validate project configuration
ktx validate
# Show detailed status
ktx status --verbose
# Clear all context and rebuild
ktx clear && ktx ingestktx setupktx mcp startQuery the warehouse using ktx to find monthly recurring revenue for the last 6 monthsSearch ktx wiki for our customer segmentation strategyUse ktx to explore available metrics related to user engagementsemantic-layer/warehouse/metrics.yamlmetrics:
- name: monthly_recurring_revenue
description: Sum of all active subscription revenue for the month
type: simple
sql: "SUM(subscription_amount)"
base_entity: subscription
filters:
- field: status
operator: equals
value: active
dimensions:
- plan_type
- customer_segment
- name: customer_ltv
description: Customer lifetime value - total revenue per customer
type: derived
sql: |
SUM(order_total) / COUNT(DISTINCT customer_id)
base_entity: order
dimensions:
- acquisition_channel
- cohort_monthwiki/global/analytics-definitions.md# Analytics Definitions
## Revenue Recognition
We recognize revenue on a cash basis when payment is received, not when
the invoice is sent.
## Customer Churn
A customer is considered churned if they have not had an active subscription
for 90 consecutive days. Paused subscriptions do not count as churn.
## Cohort Analysis
Customer cohorts are defined by their first purchase month. Use the
`first_order_date` field to group customers into cohorts.ktx ingest --source wikiimport { execSync } from 'child_process';
// Search semantic layer
const metrics = JSON.parse(
execSync('ktx sl "revenue" --json', { encoding: 'utf-8' })
);
console.log('Available revenue metrics:', metrics);
// Search wiki
const wikiResults = JSON.parse(
execSync('ktx wiki "churn" --json', { encoding: 'utf-8' })
);
console.log('Wiki pages about churn:', wikiResults);import { KtxProject } from '@kaelio/ktx';
async function buildContext() {
const project = await KtxProject.load('/path/to/project');
// Ingest all sources
await project.ingest({ force: false });
// Query semantic layer
const revenueMetrics = await project.semanticLayer.search('revenue');
console.log('Found metrics:', revenueMetrics.map(m => m.name));
}
buildContext();manifest.jsoncd /path/to/dbt-project
dbt compilektx.yamlcontext_sources:
dbt_main:
type: dbt
manifest_path: ../dbt-project/target/manifest.json
include_models: true
include_sources: true
include_metrics: truektx ingest --source dbt_mainref()databases:
production:
type: snowflake
account: xy12345.us-east-1
database: PROD_DB
warehouse: COMPUTE_WH
staging:
type: postgres
host: staging-db.internal
database: staging_analyticsktx ingest --connection production
ktx ingest --connection stagingsemantic-layer/warehouse/entities.yamlentities:
- name: customer
description: Customer dimension table
table: public.customers
primary_key: customer_id
columns:
- name: customer_id
type: integer
- name: email
type: string
- name: plan_type
type: string
- name: created_at
type: timestamp
- name: subscription
description: Active and historical subscriptions
table: public.subscriptions
primary_key: subscription_id
foreign_keys:
- column: customer_id
references_entity: customer
references_column: customer_id# Search only metrics (not dimensions or entities)
ktx sl "conversion" --type metric
# Search with semantic similarity threshold
ktx sl "customer value" --threshold 0.7
# Full-text search in wiki
ktx wiki "refund AND policy"ktx mcp start# Check project status first
ktx status
# Ensure LLM and embeddings are configured
ktx setup
# Verify project directory
ktx mcp start --project-dir /path/to/project
# Check for port conflicts (default 3000)
lsof -i :3000
# Start on different port
ktx mcp start --port 3001ktx ingest --connection warehouse# Verify credentials in .ktx/secrets.json
cat .ktx/secrets.json
# Test connection manually
psql -h db.example.com -U your_user -d analytics
# Check firewall / network access
telnet db.example.com 5432
# Use read-only user for safety
# ktx never writes, but explicit read-only is best practicektx sl "revenue"# Ensure context is built
ktx status
# Re-ingest if needed
ktx ingest --force
# Check semantic-layer directory
ls semantic-layer/warehouse/
# Verify embeddings are configured
ktx status --verbosektx ingest --source dbt_main# Compile dbt project first
cd /path/to/dbt-project
dbt compile
# Verify manifest path in ktx.yaml
cat ktx.yaml | grep manifest_path
# Use absolute path if needed
context_sources:
dbt_main:
type: dbt
manifest_path: /absolute/path/to/target/manifest.json# Review flagged contradictions
cat raw-sources/warehouse/contradictions.json
# Common causes:
# - dbt docs out of sync with warehouse
# - Recent schema changes not yet in dbt
# - Typos in column names or descriptions# Ensure MCP server is running
ktx mcp start
# Check agent integration status
ktx status | grep "Agent integration"
# Re-run setup to refresh integration
ktx setup
# Verify agent is configured to use MCP
# (check agent settings for MCP server configuration)llm:
provider: ai-gateway
endpoint: https://gateway.example.com/v1
model: custom-model-v2
max_tokens: 4096
temperature: 0.1#!/bin/bash
# Daily context refresh script
export KTX_PROJECT_DIR=/path/to/project
# Pull latest dbt changes
cd /path/to/dbt-project && git pull && dbt compile
# Rebuild ktx context
cd $KTX_PROJECT_DIR
ktx ingest --force
# Restart MCP server
pkill -f "ktx mcp" || true
ktx mcp start --daemondatabases:
warehouse:
type: postgres
# ... connection details ...
include_schemas:
- public
- analytics
exclude_tables:
- tmp_*
- staging_*
sample_size: 1000 # rows to sample per table.ktx/secrets.jsonANTHROPIC_API_KEYOPENAI_API_KEY