Loading...
Loading...
Use this skill when diagnosing, configuring, or monitoring NICs for AF_XDP / XDP workloads. Covers driver detection, hardware queue configuration, offload control (GSO/GRO/TSO/LRO), VLAN offloads, Flow Director (FDIR) rules, CPU core pinning and NUMA awareness, hardware queue and drop monitoring, BPF program inspection with bpftool, kernel tracing via ftrace, perf profiling and flamegraphs, IRQ-to-queue-to-core mapping, and a quick diagnostic checklist.
npx skill4agent add harsh4786/nic-xdp-diagnostics nic-xdp-diagnostics# PCI device listing — find your NIC's bus address
lspci
# Driver name, version, firmware, bus-info
ethtool -i <iface>
# Link state, speed, duplex negotiation
ethtool <iface>
ethtool <iface> | egrep -i 'link|speed|duplex'
# Confirm interface is up and check for attached XDP program
ip link show dev <iface>
ip link show dev <iface> | grep xdpXDP_DRV# Show current and max queue count
ethtool -l <iface>
# Set combined queues (must match or exceed XDP queue IDs you bind to)
ethtool -L <iface> combined <N>
# List all queues exposed by the NIC
ls -1 /sys/class/net/<iface>/queues# Show current and max ring buffer depths (rx/tx)
ethtool -g <iface>ethtool -k <iface>
ethtool -k <iface> | grep -E 'generic-receive|large-receive|scatter-gather|tcp-segmentation'# XDP requires offloads disabled — aggregated/segmented frames break XDP processing
ethtool -K <iface> gro off lro off tso off gso offethtool -k <iface> | grep -i vlan
# Disable VLAN tag stripping (keep tags in packet data for XDP inspection)
ethtool -K <iface> rxvlan off
ethtool -K <iface> txvlan off
# Or via the longer form
ethtool -K <iface> rx-vlan-offload off
ethtool -K <iface> rx-vlan-filter off
# Re-enable if needed
ethtool -K <iface> rxvlan on
ethtool -K <iface> rx-vlan-filter onrxvlanethtool -k <iface> | grep -i ntupleethtool -n <iface>
ethtool -u <iface># Steer UDP traffic matching a 5-tuple → queue 3
sudo ethtool -U <iface> flow-type udp4 \
src-ip <src> dst-ip <dst> dst-port <port> action 3
# Steer TCP traffic to a specific dst-port → queue 0
sudo ethtool -U <iface> flow-type tcp4 \
src-ip 0.0.0.0 dst-ip <dst> dst-port <port> action 0# Show driver-specific private flags
ethtool --show-priv-flags <iface>
# Enable hardware loopback (useful for testing without a second machine)
sudo ethtool --set-priv-flags <iface> loopback on
sudo ethtool -s <iface> loopback on
# Check loopback support
sudo ethtool --show-features <iface> | grep loopback# Full CPU topology: CPU ID, physical core, socket, NUMA node
lscpu -e=CPU,CORE,SOCKET,NODE,ONLINE# Critical: pin XDP threads to cores on the same NUMA node as the NIC
cat /sys/class/net/<iface>/device/numa_node# Per-CPU utilization — look for cores near 0% usage
mpstat -P ALL 1 5
# Check what's pinned to each core already
ps -eo pid,comm,psr --sort=psr | awk '{count[$3]++; procs[$3]=procs[$3] " " $2} END {for (c in count) print "CPU " c ": " count[c] " procs:" procs[c]}'
# Check IRQ affinity — which cores handle which NIC interrupts
cat /proc/interrupts | grep <iface>
awk '/<iface>-TxRx/{print $1,$NF}' /proc/interrupts | sed 's/://'
grep . /proc/irq/*/smp_affinity_listsudo systemctl stop irqbalance
systemctl status irqbalance# Check current IRQ CPU affinity
cat /proc/irq/<irq_num>/smp_affinity_list
# Pin an IRQ to a specific core
echo <core_id> | sudo tee /proc/irq/<irq_num>/smp_affinity_list# List all MSI-X vectors for the NIC's PCI device
ls /sys/devices/pci<domain>/<bus>/<device>/msi_irqs# Full stats dump
ethtool -S <iface>
# XDP/XSK specific counters
ethtool -S <iface> | grep -i xdp
# Filter for drops, errors, misses
ethtool -S <iface> | egrep -i 'rx|drop|err|xdp|xsk' | head -n 50
# Per-queue packet counts
ethtool -S <iface> | grep -E "rx_queue"
ethtool -S <iface> | grep "rx_queue_<N>_packets:"# Watch queue counters in real time
watch -n 1 'ethtool -S <iface> | grep -E "rx_queue"'
# Watch drops and errors
watch -n1 "ethtool -S <iface> | grep -E 'rx_packets|rx_dropped|rx_queue'"
# Combined NIC + XDP socket status
watch -n 1 "echo '=== NIC ===' && ethtool -S <iface> | grep -iE 'drop|miss|err|full' && echo '=== XDP ===' && cat /proc/net/xdp 2>/dev/null"
# Full drop monitoring loop
IFACE=<iface>; QUEUE=<N>
while true; do
echo "--- $(date) ---"
echo "NIC Drops:"
ethtool -S $IFACE 2>/dev/null | grep -E "drop|miss|error|discard" | head -10
echo -e "\nQueue $QUEUE:"
ethtool -S $IFACE 2>/dev/null | grep -i "queue_${QUEUE}"
echo -e "\nXDP Sockets:"
cat /proc/net/xdp 2>/dev/null || echo "No XDP sockets found"
echo -e "\nInterface Totals:"
cat /proc/net/dev | awk -v iface=$IFACE '$1 ~ iface {print "RX pkts:", $2, "RX drop:", $5}'
sleep 5
done# Kernel's view of active XDP sockets
cat /proc/net/xdp
cat /proc/net/xdp 2>/dev/null || ss -ax | grep -i xdp# List all loaded BPF programs
bpftool prog show
# Details on a specific program
bpftool prog show id <prog_id>
# Profile a BPF program (cycles, instructions over 5 seconds)
bpftool prog profile id <prog_id> duration 5 cycles instructions
# List all BPF maps
bpftool map show
bpftool map show | grep -i xsk
# Dump map contents (debug maps, XSK maps)
bpftool map dump name <map_name>
bpftool map dump pinned /sys/fs/bpf/xsks_map
bpftool map dump id <map_id>bpftool# Stream trace output (Ctrl+C to stop)
cat /sys/kernel/debug/tracing/trace_pipe
# Background capture to a file
cat /sys/kernel/debug/tracing/trace_pipe > /tmp/xdp_trace.log &
# Read the log
cat /tmp/xdp_trace.log
# Tail live
tail -f /sys/kernel/debug/tracing/trace_pipe
# Stop background trace capture
pkill -f trace_pipe
fuser -k /sys/kernel/debug/tracing/trace_pipewatch -n1 "dmesg | grep xdp"bpf_trace_printk()trace_pipe# Core hardware counters for your XDP process
sudo perf stat -e cycles,instructions,cache-misses,LLC-load-misses,branches,branch-misses \
-p $(pgrep <process>) -- sleep 10
# Extended counters (-d -d -d = most detail)
sudo perf stat -d -d -d -p $(pgrep <process>)# Record with DWARF call graphs (most accurate stacks)
sudo perf record --call-graph dwarf -e cycles \
-p $(pgrep <process>) -- sleep 10
# Record on a specific CPU core
sudo perf record -F 997 -g --call-graph dwarf -C <core> -o perf.data -- sleep 60
# Record multiple event types
sudo perf record -e cycles,stalled-cycles-frontend,stalled-cycles-backend,cache-misses,branch-misses \
-g -p $(pgrep <process>)
# Interactive report
sudo perf report
# Generate flamegraph (requires inferno)
sudo perf script -i perf.data | inferno-collapse-perf | inferno-flamegraph > flamegraph.svg
# Live top-like view
sudo perf top -p $(pgrep <process>) -gperf statperf record# 1. Find which IRQs belong to your NIC
cat /proc/interrupts | grep <iface>
awk '/<iface>-TxRx/{print $1,$NF}' /proc/interrupts | sed 's/://'
# 2. Check current CPU affinity for each IRQ
cat /proc/irq/<irq_num>/smp_affinity_list
# 3. Pin queue IRQs to specific cores (avoid your XDP poll cores)
echo <core_id> | sudo tee /proc/irq/<irq_num>/smp_affinity_list| Step | Command | Looking For |
|---|---|---|
| 1 | | Driver supports XDP (ice, i40e, mlx5) |
| 2 | | Enough combined queues |
| 3 | | Ring buffer depth adequate |
| 4 | | All OFF |
| 5 | | ntuple ON for FDIR |
| 6 | | FDIR rules steering to correct queues |
| 7 | | NUMA node for core selection |
| 8 | | Available cores on correct NUMA |
| 9 | | Should be STOPPED |
| 10 | | IRQs pinned away from XDP cores |
| 11 | | XDP program loaded and attached |
| 12 | | AF_XDP sockets active |
| 13 | | Zero or stable drop counters |