Loading...
Loading...
Monitoring, logging, and tracing implementation using OpenTelemetry as the unified standard. Use when building production systems requiring visibility into performance, errors, and behavior. Covers OpenTelemetry (metrics, logs, traces), Prometheus, Grafana, Loki, Jaeger, Tempo, structured logging (structlog, tracing, slog, pino), and alerting.
npx skill4agent add ancoleman/ai-design-components implementing-observability┌────────────────────────────────────────────────────────┐
│ OpenTelemetry: The Unified Standard │
├────────────────────────────────────────────────────────┤
│ │
│ ONE SDK for ALL signals: │
│ ├── Metrics (Prometheus-compatible) │
│ ├── Logs (structured, correlated) │
│ ├── Traces (distributed, standardized) │
│ └── Context (propagates across services) │
│ │
│ Language SDKs: │
│ ├── Python: opentelemetry-api, opentelemetry-sdk │
│ ├── Rust: opentelemetry, tracing-opentelemetry │
│ ├── Go: go.opentelemetry.io/otel │
│ └── TypeScript: @opentelemetry/api │
│ │
│ Export to ANY backend: │
│ ├── LGTM Stack (Loki, Grafana, Tempo, Mimir) │
│ ├── Prometheus + Jaeger │
│ ├── Datadog, New Relic, Honeycomb (SaaS) │
│ └── Custom backends via OTLP protocol │
│ │
└────────────────────────────────────────────────────────┘/websites/opentelemetry_iofrom opentelemetry import metrics
meter = metrics.get_meter(__name__)
http_requests = meter.create_counter("http.server.requests")
http_requests.add(1, {"method": "GET", "status": 200})import structlog
from opentelemetry import trace
logger = structlog.get_logger()
span = trace.get_current_span()
ctx = span.get_span_context()
logger.info(
"processing_request",
trace_id=format(ctx.trace_id, '032x'),
span_id=format(ctx.span_id, '016x'),
user_id=user_id
)references/structured-logging.mdfrom opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
app = FastAPI()
FastAPIInstrumentor.instrument_app(app) # Auto-traces all HTTP requestsreferences/opentelemetry-setup.md┌────────────────────────────────────────────────────────┐
│ LGTM Architecture │
├────────────────────────────────────────────────────────┤
│ │
│ ┌──────────────────────────────────────────────┐ │
│ │ Grafana Dashboard (Port 3000) │ │
│ │ Unified UI for Logs, Metrics, Traces │ │
│ └──────┬──────────────┬─────────────┬─────────┘ │
│ │ │ │ │
│ ▼ ▼ ▼ │
│ ┌──────────┐ ┌──────────┐ ┌──────────┐ │
│ │ Loki │ │ Tempo │ │ Mimir │ │
│ │ (Logs) │ │ (Traces) │ │(Metrics) │ │
│ │Port 3100 │ │Port 3200 │ │Port 9009 │ │
│ └────▲─────┘ └────▲─────┘ └────▲─────┘ │
│ │ │ │ │
│ └──────────────┴─────────────┘ │
│ │ │
│ ┌───────▼────────┐ │
│ │ Grafana Alloy │ │
│ │ (Collector) │ │
│ │ Port 4317/8 │ ← OTLP gRPC/HTTP │
│ └───────▲────────┘ │
│ │ │
│ OpenTelemetry Instrumented Apps │
│ │
└────────────────────────────────────────────────────────┘examples/lgtm-docker-compose/docker-compose.ymlreferences/lgtm-stack.mdtrace_idspan_idimport structlog
from opentelemetry import trace
logger = structlog.get_logger()
span = trace.get_current_span()
ctx = span.get_span_context()
logger.info(
"request_processed",
trace_id=format(ctx.trace_id, '032x'), # 32-char hex
span_id=format(ctx.span_id, '016x'), # 16-char hex
user_id=user_id
)use tracing::{info, instrument};
#[instrument(fields(user_id = %user_id))]
async fn process_request(user_id: u64) -> Result<Response> {
// trace_id/span_id automatically included
info!(user_id = user_id, "processing request");
Ok(result)
}references/trace-context.md{job="api-service"} |= "trace_id=4bf92f3577b34da6a3ce929d0e0e4736"python scripts/setup_otel.py --language python --framework fastapipip install opentelemetry-api opentelemetry-sdk \
opentelemetry-instrumentation-fastapi \
opentelemetry-exporter-otlpreferences/opentelemetry-setup.mdcd examples/lgtm-docker-compose
docker-compose up -d
# Grafana: http://localhost:3000 (admin/admin)
# OTLP: localhost:4317 (gRPC), localhost:4318 (HTTP)references/lgtm-stack.mdreferences/structured-logging.mdreferences/alerting-rules.mdfrom opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
app = FastAPI()
FastAPIInstrumentor.instrument_app(app) # Auto-trace all HTTP requestsreferences/opentelemetry-setup.mdfrom opentelemetry import trace
tracer = trace.get_tracer(__name__)
with tracer.start_as_current_span("fetch_user_details") as span:
span.set_attribute("user_id", user_id)
user = await db.fetch_user(user_id)
span.set_attribute("user_found", user is not None)from opentelemetry.trace import Status, StatusCode
with tracer.start_as_current_span("process_payment") as span:
try:
result = process_payment(amount, card_token)
span.set_status(Status(StatusCode.OK))
except PaymentError as e:
span.set_status(Status(StatusCode.ERROR, str(e)))
span.record_exception(e)
raisereferences/trace-context.md# Test log-trace correlation
# 1. Make request to your app
# 2. Copy trace_id from logs
# 3. Query in Grafana: {job="myapp"} |= "trace_id=<TRACE_ID>"
# Validate metrics
python scripts/validate_metrics.pyexamples/fastapi-otel/references/opentelemetry-setup.mdreferences/structured-logging.mdreferences/lgtm-stack.mdreferences/trace-context.mdreferences/alerting-rules.mdexamples/fastapi-otel/examples/axum-tracing/examples/lgtm-docker-compose/scripts/setup_otel.pyscripts/generate_dashboards.pyscripts/validate_metrics.py