Loading...
Loading...
Linux perf profiler skill for CPU performance analysis. Use when collecting sampling profiles with perf record, generating perf report, measuring hardware counters (cache misses, branch mispredicts, IPC), identifying hot functions, or feeding perf data into flamegraph tools. Activates on queries about perf, Linux performance counters, PMU events, off-CPU profiling, perf stat, perf annotate, or sampling-based profiling on Linux.
npx skill4agent add mohitmishra786/low-level-dev-skills linux-perfperfperf[unknown][kernel]# Install
sudo apt install linux-perf # Debian/Ubuntu (version-matched)
sudo dnf install perf # Fedora/RHEL
# Check permissions
# By default perf requires root or paranoid level ≤ 1
cat /proc/sys/kernel/perf_event_paranoid
# 2 = only CPU stats (not kernel), 1 = user+kernel, 0 = all, -1 = no restrictions
# Temporarily lower (session only)
sudo sysctl -w kernel.perf_event_paranoid=1
# Persistent
echo 'kernel.perf_event_paranoid=1' | sudo tee /etc/sysctl.d/99-perf.conf
sudo sysctl -p /etc/sysctl.d/99-perf.confgcc -g -O2 -fno-omit-frame-pointer -o prog main.c
# -fno-omit-frame-pointer: essential for frame-pointer-based unwinding
# Alternative: compile with DWARF CFI and use --call-graph=dwarf# Basic hardware counters
perf stat ./prog
# With specific events
perf stat -e cache-misses,cache-references,instructions,cycles,branch-misses ./prog
# Wall-clock comparison: N runs
perf stat -r 5 ./prog
# Attach to existing process
perf stat -p 12345 sleep 10perf stat# Default: sample at 1000 Hz (cycles event)
perf record -g ./prog
# Specify frequency
perf record -F 999 -g ./prog
# Specific event
perf record -e cache-misses -g ./prog
# Attach to running process
perf record -F 999 -g -p 12345 sleep 30
# Off-CPU profiling (time spent waiting)
perf record -e sched:sched_switch -ag sleep 10
# DWARF call graphs (better for binaries without frame pointers)
perf record -F 999 --call-graph=dwarf ./prog
# Save to named file
perf record -o myapp.perf.data -g ./progperf report # reads perf.data
perf report -i myapp.perf.data
perf report --no-children # self time only (not cumulative)
perf report --sort comm,dso,sym # sort by fields
perf report --stdio # non-interactive text outputEnterasdt?# Show assembly with hit percentages
perf annotate sym_name
# From report: press 'a' on a symbol
# Or directly:
perf annotate -i perf.data --symbol=hot_function --stdiomovvmovdqa# Live top, like 'top' but for functions
sudo perf top -g
# Filter by process
sudo perf top -p 12345# Generate perf script output
perf script > out.perf
# Use Brendan Gregg's FlameGraph tools
git clone https://github.com/brendangregg/FlameGraph
./FlameGraph/stackcollapse-perf.pl out.perf > out.folded
./FlameGraph/flamegraph.pl out.folded > flamegraph.svg
# Open flamegraph.svg in browserskills/profilers/flamegraphs| Problem | Cause | Fix |
|---|---|---|
| | Lower paranoid level or run with |
| Missing frame pointers or debug info | Recompile with |
| Kernel symbols not visible | Use |
| Kernel symbols unavailable | `echo 0 |
| Empty report for short program | Program exits too fast | Use |
| DWARF unwinding slow | Large DWARF stack | Limit with |
# List all available events
perf list
# Common hardware events
cycles
instructions
cache-references
cache-misses
branch-instructions
branch-misses
stalled-cycles-frontend
stalled-cycles-backend
# Software events
context-switches
cpu-migrations
page-faults
# Tracepoints (requires root)
sched:sched_switch
syscalls:sys_enter_readskills/profilers/flamegraphsskills/profilers/valgrindskills/compilers/gccskills/compilers/clang