Loading...
Loading...
Kubernetes clusters, pods, nodes, workloads, storage, networking, and resource relationships. Query K8s inventory, diagnose degraded deployments and pod failures, investigate rollouts, audit ingress and network policies.
npx skill4agent add dynatrace/dynatrace-for-ai dt-obs-kubernetes| File | Contents |
|---|---|
| Clusters, namespaces, resource distribution |
| Labels, annotations, k8s.object parsing patterns |
| Node selectors, affinity, taints, HA scheduling |
| Exit codes, pod conditions, init containers, image pull errors, logs, service→pod drill-down |
| Degraded deployments, stuck rollouts, node conditions, CPU throttling, HPA, StatefulSet ordering |
| PVC/PV lifecycle, phase reference, orphaned volumes, StorageClass |
| Routing rule parsing, TLS audit |
| Policy listing, namespace isolation audit |
K8S_DEPLOYMENTK8S_STATEFULSETK8S_DAEMONSETK8S_JOBK8S_CRONJOBK8S_HORIZONTALPODAUTOSCALERK8S_CLUSTERK8S_NAMESPACEK8S_NODEK8S_PODK8S_SERVICEK8S_CONFIGMAPK8S_SECRETK8S_PERSISTENTVOLUMECLAIMK8S_PERSISTENTVOLUMEK8S_INGRESSK8S_NETWORKPOLICYsmartscapeNodes K8S_POD
| filter k8s.namespace.name == "production"
| fields k8s.cluster.name, k8s.pod.nametimeseries cpu = sum(dt.kubernetes.container.cpu_usage),
by: {k8s.pod.name, k8s.namespace.name}
| fieldsAdd avg_cpu = arrayAvg(cpu)fetch logs
| filter k8s.namespace.name == "production" and loglevel == "ERROR"k8s.cluster.namek8s.namespace.namek8s.pod.namek8s.node.namek8s.workload.namek8s.workload.kindk8s.container.namek8s.objecttags[label]dt.kubernetes.container.cpu_usagecpu_throttledlimits_cpurequests_cpudt.kubernetes.container.memory_working_setlimits_memoryrequests_memorydt.kubernetes.container.restartsoom_killsdt.kubernetes.node.pods_allocatablecpu_allocatablememory_allocatabledt.kubernetes.podsK8S_PODCONTAINERK8S_PODk8s.objectCONTAINERdt-obs-hostsCONTAINER --(is_part_of)--> K8S_PODsmartscapeNodes K8S_POD
| filter k8s.namespace.name == "<namespace>"
| traverse edgeTypes: {is_part_of}, targetTypes: {CONTAINER}, direction: backward, fieldsKeep: {id}
| fields k8s.cluster.name, k8s.namespace.name, k8s.pod.name, container.id=idSERVICEK8S_PODk8s.workload.namereferences/pod-debugging.mdsmartscapeNodes K8S_CLUSTER
| fields k8s.cluster.name, k8s.cluster.version, k8s.cluster.distributiontimeseries {
current_pods = avg(dt.kubernetes.pods),
max_pods = avg(dt.kubernetes.node.pods_allocatable)
}, by: {k8s.node.name, k8s.cluster.name}
| fieldsAdd pod_capacity_pct = (arrayAvg(current_pods) / arrayAvg(max_pods)) * 100
| filter pod_capacity_pct > 80smartscapeNodes K8S_POD
| parse k8s.object, "JSON:config"
| fieldsAdd phase = config[status][phase]
| filter phase != "Running"
| fields k8s.cluster.name, k8s.namespace.name, k8s.pod.name, phasetimeseries {
cpu_usage = sum(dt.kubernetes.container.cpu_usage),
cpu_requests = avg(dt.kubernetes.container.requests_cpu)
}, by: {k8s.pod.name, k8s.namespace.name, k8s.cluster.name}
| fieldsAdd usage_pct = (arrayAvg(cpu_usage) / arrayAvg(cpu_requests)) * 100
| filter usage_pct < 30 and arrayAvg(cpu_requests) > 0smartscapeNodes K8S_POD
| parse k8s.object, "JSON:config"
| expand container = config[spec][containers]
| fieldsAdd
container_name = container[name],
cpu_limit = container[resources][limits][cpu],
memory_limit = container[resources][limits][memory]
| filter isNull(cpu_limit) or isNull(memory_limit)timeseries oom_kills = sum(dt.kubernetes.container.oom_kills),
by: {k8s.pod.name, k8s.namespace.name, k8s.cluster.name}
| filter arraySum(oom_kills) > 0
| fieldsAdd total_oom_kills = arraySum(oom_kills)
| sort total_oom_kills desctimeseries restarts = sum(dt.kubernetes.container.restarts),
by: {k8s.pod.name, k8s.namespace.name, k8s.cluster.name}
| fieldsAdd total_restarts = arraySum(restarts)
| filter total_restarts > 5smartscapeNodes K8S_POD
| parse k8s.object, "JSON:config"
| expand container = config[spec][containers]
| fieldsAdd
container_name = container[name],
privileged = container[securityContext][privileged]
| filter privileged == truesmartscapeNodes K8S_POD
| parse k8s.object, "JSON:config"
| expand container = config[spec][containers]
| fieldsAdd
container_name = container[name],
run_as_user = container[securityContext][runAsUser],
run_as_non_root = container[securityContext][runAsNonRoot]
| filter (isNull(run_as_user) or run_as_user == 0) and run_as_non_root != truesmartscapeNodes K8S_POD
| filter k8s.workload.kind == "deployment"
| summarize pod_count = count(),
node_count = countDistinct(k8s.node.name),
by: {k8s.cluster.name, k8s.namespace.name, k8s.workload.name}
| fieldsAdd ha_compliant = node_count > 1
| filter pod_count >= 2 and not ha_compliantfetch dt.davis.problems, from:now() - 2h
| filter not(dt.davis.is_duplicate) and event.status == "ACTIVE"
| filter matchesPhrase(smartscape.affected_entity.types, "K8S_")
| fields display_id, event.name, event.category, smartscape.affected_entity.idssmartscape.affected_entity.idslimitk8s.object