log-aggregation
Compare original and translation side by side
🇺🇸
Original
English🇨🇳
Translation
ChineseLog Aggregation
日志聚合
Overview
概述
Build comprehensive log aggregation systems to collect, parse, and analyze logs from multiple sources, enabling centralized monitoring, debugging, and compliance auditing.
构建全面的日志聚合系统,从多个来源收集、解析和分析日志,实现集中式监控、调试与合规审计。
When to Use
适用场景
- Centralized log collection
- Distributed system debugging
- Compliance and audit logging
- Security event monitoring
- Application performance analysis
- Error tracking and alerting
- Historical log retention
- Real-time log searching
- 集中式日志收集
- 分布式系统调试
- 合规与审计日志
- 安全事件监控
- 应用性能分析
- 错误追踪与告警
- 历史日志留存
- 实时日志搜索
Implementation Examples
实现示例
1. ELK Stack Configuration
1. ELK Stack配置
yaml
undefinedyaml
undefineddocker-compose.yml - ELK Stack setup
docker-compose.yml - ELK Stack setup
version: '3.8'
services:
elasticsearch:
image: docker.elastic.co/elasticsearch/elasticsearch:8.5.0
environment:
- discovery.type=single-node
- xpack.security.enabled=false
- "ES_JAVA_OPTS=-Xms512m -Xmx512m"
ports:
- "9200:9200"
volumes:
- elasticsearch_data:/usr/share/elasticsearch/data
healthcheck:
test: curl -s http://localhost:9200 >/dev/null || exit 1
interval: 10s
timeout: 5s
retries: 5
logstash:
image: docker.elastic.co/logstash/logstash:8.5.0
volumes:
- ./logstash.conf:/usr/share/logstash/pipeline/logstash.conf
ports:
- "5000:5000"
- "9600:9600"
depends_on:
- elasticsearch
environment:
- "LS_JAVA_OPTS=-Xmx256m -Xms256m"
kibana:
image: docker.elastic.co/kibana/kibana:8.5.0
ports:
- "5601:5601"
environment:
- ELASTICSEARCH_URL=http://elasticsearch:9200
depends_on:
- elasticsearch
filebeat:
image: docker.elastic.co/beats/filebeat:8.5.0
volumes:
- ./filebeat.yml:/usr/share/filebeat/filebeat.yml
- /var/lib/docker/containers:/var/lib/docker/containers:ro
- /var/run/docker.sock:/var/run/docker.sock:ro
command: filebeat -e -strict.perms=false
depends_on:
- elasticsearch
volumes:
elasticsearch_data:
undefinedversion: '3.8'
services:
elasticsearch:
image: docker.elastic.co/elasticsearch/elasticsearch:8.5.0
environment:
- discovery.type=single-node
- xpack.security.enabled=false
- "ES_JAVA_OPTS=-Xms512m -Xmx512m"
ports:
- "9200:9200"
volumes:
- elasticsearch_data:/usr/share/elasticsearch/data
healthcheck:
test: curl -s http://localhost:9200 >/dev/null || exit 1
interval: 10s
timeout: 5s
retries: 5
logstash:
image: docker.elastic.co/logstash/logstash:8.5.0
volumes:
- ./logstash.conf:/usr/share/logstash/pipeline/logstash.conf
ports:
- "5000:5000"
- "9600:9600"
depends_on:
- elasticsearch
environment:
- "LS_JAVA_OPTS=-Xmx256m -Xms256m"
kibana:
image: docker.elastic.co/kibana/kibana:8.5.0
ports:
- "5601:5601"
environment:
- ELASTICSEARCH_URL=http://elasticsearch:9200
depends_on:
- elasticsearch
filebeat:
image: docker.elastic.co/beats/filebeat:8.5.0
volumes:
- ./filebeat.yml:/usr/share/filebeat/filebeat.yml
- /var/lib/docker/containers:/var/lib/docker/containers:ro
- /var/run/docker.sock:/var/run/docker.sock:ro
command: filebeat -e -strict.perms=false
depends_on:
- elasticsearch
volumes:
elasticsearch_data:
undefined2. Logstash Pipeline Configuration
2. Logstash管道配置
conf
undefinedconf
undefinedlogstash.conf
logstash.conf
input {
Receive logs via TCP/UDP
tcp {
port => 5000
codec => json
}
Read from files
file {
path => "/var/log/app/*.log"
start_position => "beginning"
codec => multiline {
pattern => "^%{TIMESTAMP_ISO8601}"
negate => true
what => "previous"
}
}
Read from Kubernetes
kubernetes {
kubernetes_url => "https://kubernetes.default"
ca_file => "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
}
}
filter {
Parse JSON logs
json {
source => "message"
target => "parsed"
}
Extract fields
grok {
match => {
"message" => "%{TIMESTAMP_ISO8601:timestamp} [%{LOGLEVEL:level}] %{GREEDYDATA:message}"
}
}
Add timestamp
date {
match => ["timestamp", "ISO8601"]
target => "@timestamp"
}
Add metadata
mutate {
add_field => {
"environment" => "production"
"datacenter" => "us-east-1"
}
remove_field => ["host"]
}
Drop debug logs in production
if [level] == "DEBUG" {
drop { }
}
Tag errors
if [level] =~ /ERROR|FATAL/ {
mutate {
add_tag => ["error"]
}
}
}
output {
Send to Elasticsearch
elasticsearch {
hosts => ["elasticsearch:9200"]
index => "logs-%{+YYYY.MM.dd}"
document_type => "_doc"
}
Also output errors to console
if "error" in [tags] {
stdout {
codec => rubydebug
}
}
}
undefinedinput {
Receive logs via TCP/UDP
tcp {
port => 5000
codec => json
}
Read from files
file {
path => "/var/log/app/*.log"
start_position => "beginning"
codec => multiline {
pattern => "^%{TIMESTAMP_ISO8601}"
negate => true
what => "previous"
}
}
Read from Kubernetes
kubernetes {
kubernetes_url => "https://kubernetes.default"
ca_file => "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
}
}
filter {
Parse JSON logs
json {
source => "message"
target => "parsed"
}
Extract fields
grok {
match => {
"message" => "%{TIMESTAMP_ISO8601:timestamp} [%{LOGLEVEL:level}] %{GREEDYDATA:message}"
}
}
Add timestamp
date {
match => ["timestamp", "ISO8601"]
target => "@timestamp"
}
Add metadata
mutate {
add_field => {
"environment" => "production"
"datacenter" => "us-east-1"
}
remove_field => ["host"]
}
Drop debug logs in production
if [level] == "DEBUG" {
drop { }
}
Tag errors
if [level] =~ /ERROR|FATAL/ {
mutate {
add_tag => ["error"]
}
}
}
output {
Send to Elasticsearch
elasticsearch {
hosts => ["elasticsearch:9200"]
index => "logs-%{+YYYY.MM.dd}"
document_type => "_doc"
}
Also output errors to console
if "error" in [tags] {
stdout {
codec => rubydebug
}
}
}
undefined3. Filebeat Configuration
3. Filebeat配置
yaml
undefinedyaml
undefinedfilebeat.yml
filebeat.yml
filebeat.inputs:
-
type: log enabled: true paths:
- /var/log/app/*.log fields: app: myapp environment: production multiline.pattern: '^[' multiline.negate: true multiline.match: after
-
type: docker enabled: true hints.enabled: true hints.default_config: enabled: true type: container paths: - /var/lib/docker/containers/${data.docker.container.id}/*.log
-
type: log enabled: true paths:
- /var/log/syslog
- /var/log/auth.log fields: service: system environment: production
processors:
- add_docker_metadata: host: "unix:///var/run/docker.sock"
- add_kubernetes_metadata: in_cluster: true
- add_host_metadata:
- add_fields: target: '' fields: environment: production
output.elasticsearch:
hosts: ["elasticsearch:9200"]
index: "filebeat-%{+yyyy.MM.dd}"
logging.level: info
logging.to_files: true
logging.files:
path: /var/log/filebeat
name: filebeat
keepfiles: 7
permissions: 0640
undefinedfilebeat.inputs:
-
type: log enabled: true paths:
- /var/log/app/*.log fields: app: myapp environment: production multiline.pattern: '^[' multiline.negate: true multiline.match: after
-
type: docker enabled: true hints.enabled: true hints.default_config: enabled: true type: container paths: - /var/lib/docker/containers/${data.docker.container.id}/*.log
-
type: log enabled: true paths:
- /var/log/syslog
- /var/log/auth.log fields: service: system environment: production
processors:
- add_docker_metadata: host: "unix:///var/run/docker.sock"
- add_kubernetes_metadata: in_cluster: true
- add_host_metadata:
- add_fields: target: '' fields: environment: production
output.elasticsearch:
hosts: ["elasticsearch:9200"]
index: "filebeat-%{+yyyy.MM.dd}"
logging.level: info
logging.to_files: true
logging.files:
path: /var/log/filebeat
name: filebeat
keepfiles: 7
permissions: 0640
undefined4. Kibana Dashboard and Alerts
4. Kibana仪表板与告警
json
{
"dashboard": {
"title": "Application Logs Overview",
"panels": [
{
"title": "Error Rate by Service",
"query": "level: ERROR",
"visualization": "bar_chart",
"groupBy": ["service"],
"timeRange": "1h"
},
{
"title": "Top 10 Error Messages",
"query": "level: ERROR",
"visualization": "table",
"fields": ["message", "count"],
"sort": [{"count": "desc"}],
"size": 10
},
{
"title": "Request Latency Distribution",
"query": "duration: *",
"visualization": "histogram"
},
{
"title": "Errors Over Time",
"query": "level: ERROR",
"visualization": "line_chart",
"dateHistogram": "1m"
}
]
},
"alerts": [
{
"name": "High Error Rate",
"query": "level: ERROR",
"threshold": 100,
"window": "5m",
"action": "slack"
},
{
"name": "Critical Exceptions",
"query": "level: FATAL",
"threshold": 1,
"window": "1m",
"action": "email"
}
]
}json
{
"dashboard": {
"title": "Application Logs Overview",
"panels": [
{
"title": "Error Rate by Service",
"query": "level: ERROR",
"visualization": "bar_chart",
"groupBy": ["service"],
"timeRange": "1h"
},
{
"title": "Top 10 Error Messages",
"query": "level: ERROR",
"visualization": "table",
"fields": ["message", "count"],
"sort": [{"count": "desc"}],
"size": 10
},
{
"title": "Request Latency Distribution",
"query": "duration: *",
"visualization": "histogram"
},
{
"title": "Errors Over Time",
"query": "level: ERROR",
"visualization": "line_chart",
"dateHistogram": "1m"
}
]
},
"alerts": [
{
"name": "High Error Rate",
"query": "level: ERROR",
"threshold": 100,
"window": "5m",
"action": "slack"
},
{
"name": "Critical Exceptions",
"query": "level: FATAL",
"threshold": 1,
"window": "1m",
"action": "email"
}
]
}5. Loki Configuration (Kubernetes)
5. Loki配置(Kubernetes)
yaml
undefinedyaml
undefinedloki-config.yaml
loki-config.yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: loki-config
namespace: logging
data:
loki-config.yaml: |
auth_enabled: false
ingester:
chunk_idle_period: 3m
chunk_retain_period: 1m
max_chunk_age: 1h
chunk_encoding: snappy
chunk_target_size: 1048576
limits_config:
enforce_metric_name: false
reject_old_samples: true
reject_old_samples_max_age: 168h
schema_config:
configs:
- from: 2020-05-15
store: boltdb-shipper
object_store: filesystem
schema: v11
index:
prefix: index_
period: 24h
server:
http_listen_port: 3100
storage_config:
boltdb_shipper:
active_index_directory: /loki/index
cache_location: /loki/cache
shared_store: filesystem
filesystem:
directory: /loki/chunksapiVersion: apps/v1
kind: Deployment
metadata:
name: loki
namespace: logging
spec:
replicas: 1
selector:
matchLabels:
app: loki
template:
metadata:
labels:
app: loki
spec:
containers:
- name: loki
image: grafana/loki:2.8.0
ports:
- containerPort: 3100
volumeMounts:
- name: loki-config
mountPath: /etc/loki
- name: loki-storage
mountPath: /loki
args:
- -config.file=/etc/loki/loki-config.yaml
volumes:
- name: loki-config
configMap:
name: loki-config
- name: loki-storage
emptyDir: {}
apiVersion: v1
kind: Service
metadata:
name: loki
namespace: logging
spec:
selector:
app: loki
ports:
- port: 3100
targetPort: 3100
undefinedapiVersion: v1
kind: ConfigMap
metadata:
name: loki-config
namespace: logging
data:
loki-config.yaml: |
auth_enabled: false
ingester:
chunk_idle_period: 3m
chunk_retain_period: 1m
max_chunk_age: 1h
chunk_encoding: snappy
chunk_target_size: 1048576
limits_config:
enforce_metric_name: false
reject_old_samples: true
reject_old_samples_max_age: 168h
schema_config:
configs:
- from: 2020-05-15
store: boltdb-shipper
object_store: filesystem
schema: v11
index:
prefix: index_
period: 24h
server:
http_listen_port: 3100
storage_config:
boltdb_shipper:
active_index_directory: /loki/index
cache_location: /loki/cache
shared_store: filesystem
filesystem:
directory: /loki/chunksapiVersion: apps/v1
kind: Deployment
metadata:
name: loki
namespace: logging
spec:
replicas: 1
selector:
matchLabels:
app: loki
template:
metadata:
labels:
app: loki
spec:
containers:
- name: loki
image: grafana/loki:2.8.0
ports:
- containerPort: 3100
volumeMounts:
- name: loki-config
mountPath: /etc/loki
- name: loki-storage
mountPath: /loki
args:
- -config.file=/etc/loki/loki-config.yaml
volumes:
- name: loki-config
configMap:
name: loki-config
- name: loki-storage
emptyDir: {}
apiVersion: v1
kind: Service
metadata:
name: loki
namespace: logging
spec:
selector:
app: loki
ports:
- port: 3100
targetPort: 3100
undefined6. Log Aggregation Deployment Script
6. 日志聚合部署脚本
bash
#!/bin/bashbash
#!/bin/bashdeploy-logging.sh - Deploy logging infrastructure
deploy-logging.sh - Deploy logging infrastructure
set -euo pipefail
NAMESPACE="logging"
ENV="${1:-production}"
echo "Deploying logging stack to $ENV..."
set -euo pipefail
NAMESPACE="logging"
ENV="${1:-production}"
echo "Deploying logging stack to $ENV..."
Create namespace
Create namespace
kubectl create namespace "$NAMESPACE" --dry-run=client -o yaml | kubectl apply -f -
kubectl create namespace "$NAMESPACE" --dry-run=client -o yaml | kubectl apply -f -
Deploy Elasticsearch
Deploy Elasticsearch
echo "Deploying Elasticsearch..."
kubectl apply -f elasticsearch-deployment.yaml -n "$NAMESPACE"
kubectl rollout status deployment/elasticsearch -n "$NAMESPACE" --timeout=5m
echo "Deploying Elasticsearch..."
kubectl apply -f elasticsearch-deployment.yaml -n "$NAMESPACE"
kubectl rollout status deployment/elasticsearch -n "$NAMESPACE" --timeout=5m
Deploy Logstash
Deploy Logstash
echo "Deploying Logstash..."
kubectl apply -f logstash-deployment.yaml -n "$NAMESPACE"
kubectl rollout status deployment/logstash -n "$NAMESPACE" --timeout=5m
echo "Deploying Logstash..."
kubectl apply -f logstash-deployment.yaml -n "$NAMESPACE"
kubectl rollout status deployment/logstash -n "$NAMESPACE" --timeout=5m
Deploy Kibana
Deploy Kibana
echo "Deploying Kibana..."
kubectl apply -f kibana-deployment.yaml -n "$NAMESPACE"
kubectl rollout status deployment/kibana -n "$NAMESPACE" --timeout=5m
echo "Deploying Kibana..."
kubectl apply -f kibana-deployment.yaml -n "$NAMESPACE"
kubectl rollout status deployment/kibana -n "$NAMESPACE" --timeout=5m
Deploy Filebeat as DaemonSet
Deploy Filebeat as DaemonSet
echo "Deploying Filebeat..."
kubectl apply -f filebeat-daemonset.yaml -n "$NAMESPACE"
echo "Deploying Filebeat..."
kubectl apply -f filebeat-daemonset.yaml -n "$NAMESPACE"
Wait for all pods
Wait for all pods
echo "Waiting for all logging services..."
kubectl wait --for=condition=ready pod -l app=elasticsearch -n "$NAMESPACE" --timeout=300s
echo "Waiting for all logging services..."
kubectl wait --for=condition=ready pod -l app=elasticsearch -n "$NAMESPACE" --timeout=300s
Create default index pattern
Create default index pattern
echo "Setting up Kibana index pattern..."
kubectl exec -it -n "$NAMESPACE" svc/kibana -- curl -X POST
http://localhost:5601/api/saved_objects/index-pattern/logs
-H 'kbn-xsrf: true'
-H 'Content-Type: application/json'
-d '{"attributes":{"title":"logs-*","timeFieldName":"@timestamp"}}'
http://localhost:5601/api/saved_objects/index-pattern/logs
-H 'kbn-xsrf: true'
-H 'Content-Type: application/json'
-d '{"attributes":{"title":"logs-*","timeFieldName":"@timestamp"}}'
echo "Logging stack deployed successfully!"
echo "Kibana: http://localhost:5601"
undefinedecho "Setting up Kibana index pattern..."
kubectl exec -it -n "$NAMESPACE" svc/kibana -- curl -X POST
http://localhost:5601/api/saved_objects/index-pattern/logs
-H 'kbn-xsrf: true'
-H 'Content-Type: application/json'
-d '{"attributes":{"title":"logs-*","timeFieldName":"@timestamp"}}'
http://localhost:5601/api/saved_objects/index-pattern/logs
-H 'kbn-xsrf: true'
-H 'Content-Type: application/json'
-d '{"attributes":{"title":"logs-*","timeFieldName":"@timestamp"}}'
echo "Logging stack deployed successfully!"
echo "Kibana: http://localhost:5601"
undefinedBest Practices
最佳实践
✅ DO
✅ 要做的
- Parse and structure log data
- Use appropriate log levels
- Add contextual information
- Implement log retention policies
- Set up log-based alerting
- Index important fields
- Use consistent timestamp formats
- Implement access controls
- 解析并结构化日志数据
- 使用合适的日志级别
- 添加上下文信息
- 实施日志保留策略
- 设置基于日志的告警
- 为重要字段建立索引
- 使用统一的时间戳格式
- 实施访问控制
❌ DON'T
❌ 不要做的
- Store sensitive data in logs
- Log at DEBUG level in production
- Send raw unstructured logs
- Ignore storage costs
- Skip log parsing
- Lack monitoring of log systems
- Store logs forever
- Log PII without encryption
- 在日志中存储敏感数据
- 在生产环境中使用DEBUG级别日志
- 发送原始非结构化日志
- 忽略存储成本
- 跳过日志解析
- 不对日志系统进行监控
- 永久存储日志
- 未加密就存储个人可识别信息(PII)