Loading...
Loading...
Goal-driven fitness-scored improvement loop. Measures goals, picks worst gap, runs /rpi, compounds via knowledge flywheel.
npx skill4agent add boshu2/agentops evolvePurpose: Measure what's wrong. Fix the worst thing. Measure again. Compound.
/rpi/evolve # Run forever until kill switch or stagnation
/evolve --max-cycles=5 # Cap at 5 improvement cycles
/evolve --dry-run # Measure fitness, show what would be worked on, don't executemkdir -p .agents/evolveao inject 2>/dev/null || true--max-cycles=N--dry-run--skip-baselineSESSION_START_SHA=$(git rev-parse HEAD)evolve_state = {
cycle: 0,
max_cycles: <from flag, or Infinity if not set>,
dry_run: <from flag, default false>,
test_first: <from flag, default false>,
session_start_sha: $SESSION_START_SHA,
idle_streak: 0, # consecutive cycles with nothing to do
max_idle_streak: 3, # stop after this many consecutive idle cycles
history: []
}if [ "$SKIP_BASELINE" = "true" ]; then
log "Skipping baseline sweep (--skip-baseline flag set)"
exit 0
fi
if ! [ -f .agents/evolve/fitness-0-baseline.json ]; then
baseline = MEASURE_FITNESS() # run every GOALS.yaml goal
baseline.cycle = 0
write ".agents/evolve/fitness-0-baseline.json" baseline
# Baseline report
failing = [g for g in baseline.goals if g.result == "fail"]
failing.sort(by=weight, descending)
cat > .agents/evolve/cycle-0-report.md << EOF
# Cycle-0 Baseline
**Total goals:** ${len(baseline.goals)}
**Passing:** ${len(baseline.goals) - len(failing)}
**Failing:** ${len(failing)}
$(for g in failing: "- [weight ${g.weight}] ${g.id}: ${g.result}")
EOF
log "Baseline captured: ${len(failing)}/${len(baseline.goals)} goals failing"
fi# External kill (outside repo — can't be accidentally deleted by agents)
if [ -f ~/.config/evolve/KILL ]; then
echo "KILL SWITCH ACTIVE: $(cat ~/.config/evolve/KILL)"
# Write acknowledgment
echo "{\"killed_at\": \"$(date -Iseconds)\", \"cycle\": $CYCLE}" > .agents/evolve/KILLED.json
exit 0
fi
# Local convenience stop
if [ -f .agents/evolve/STOP ]; then
echo "STOP file detected: $(cat .agents/evolve/STOP 2>/dev/null)"
exit 0
fiGOALS.yaml# Run the check command
if eval "$goal_check" > /dev/null 2>&1; then
# Exit code 0 = PASS
result = "pass"
else
# Non-zero = FAIL
result = "fail"
fi# Write fitness snapshot
cat > .agents/evolve/fitness-${CYCLE}.json << EOF
{
"cycle": $CYCLE,
"timestamp": "$(date -Iseconds)",
"cycle_start_sha": "$(git rev-parse HEAD)",
"goals": [
{"id": "$goal_id", "result": "$result", "weight": $weight, "value": $metric_value, "threshold": $threshold},
...
]
}
EOFgo-coverage-floorgo test -cover"value": 85.7, "threshold": 80doc-coverage"value": 20, "threshold": 16shellcheck-clean"value": 0, "threshold": 0"value": nullif ! jq empty ".agents/evolve/fitness-${CYCLE}.json" 2>/dev/null; then
echo "ERROR: Fitness snapshot write failed or invalid JSON. Refusing to proceed."
exit 1
fi"result": "skip"failing_goals = [g for g in goals if g.result == "fail"]
if not failing_goals:
# Cycle-0 Comprehensive Sweep (optional full-repo scan)
# Before consuming harvested work, optionally discover items the harvest missed.
# This is because manual sweeps have found issues automated harvests didn't catch.
if ! [ -f .agents/evolve/last-sweep-date ] || \
[ $(date +%s) -gt $(( $(stat -f %m .agents/evolve/last-sweep-date) + 604800 )) ]; then
# Sweep is stale (> 7 days) or missing — run lightweight scan
log "Running cycle-0 comprehensive sweep (stale/missing: .agents/evolve/last-sweep-date)"
# Lightweight sweep: shellcheck, go vet, known anti-patterns
shellcheck hooks/*.sh 2>&1 | grep -v "^$" | while read line; do
add_to_next_work("shellcheck finding: $line", severity="medium", type="bug")
done
go vet ./cli/... 2>&1 | grep -v "^$" | while read line; do
add_to_next_work("go vet finding: $line", severity="medium", type="bug")
done
# grep for known anti-patterns (e.g., hardcoded secrets, TODO markers)
grep -r "TODO|FIXME|XXX" --include="*.go" --include="*.sh" . 2>/dev/null | while read line; do
add_to_next_work("code marker: $line", severity="low", type="tech-debt")
done
# Mark sweep complete
touch .agents/evolve/last-sweep-date
log "Cycle-0 sweep complete. New findings added to next-work.jsonl"
fi
# All goals pass — check harvested work from prior /rpi cycles
if [ -f .agents/rpi/next-work.jsonl ]; then
# Detect current repo for filtering
CURRENT_REPO=$(bd config --get prefix 2>/dev/null \
|| basename "$(git remote get-url origin 2>/dev/null)" .git 2>/dev/null \
|| basename "$(pwd)")
all_items = read_unconsumed(next-work.jsonl) # entries with consumed: false
# Filter by target_repo: include items where target_repo matches
# CURRENT_REPO, target_repo is "*" (cross-repo), or field is absent (backward compat).
# Skip items whose target_repo names a different repo.
items = [i for i in all_items
if i.target_repo in (CURRENT_REPO, "*", None)]
if items:
evolve_state.idle_streak = 0 # reset — we found work
selected_item = max(items, by=severity) # highest severity first
log "All goals met. Picking harvested work: {selected_item.title}"
# Execute as an /rpi cycle (Step 4), then mark consumed
/rpi "{selected_item.title}" --auto --max-cycles=1 --test-first # if --test-first set
/rpi "{selected_item.title}" --auto --max-cycles=1 # otherwise
mark_consumed(selected_item) # set consumed: true, consumed_by, consumed_at
# Skip Steps 4-5 (already executed above), go to Step 6 (log cycle)
log_cycle(cycle, goal_id="next-work:{selected_item.title}", result="harvested")
continue loop # → Step 1 (kill switch check)
# Nothing to do THIS cycle — but don't quit yet
evolve_state.idle_streak += 1
log "All goals met, no harvested work. Idle streak: {idle_streak}/{max_idle_streak}"
if evolve_state.idle_streak >= evolve_state.max_idle_streak:
log "Stagnation: {max_idle_streak} consecutive idle cycles. Nothing left to improve."
STOP → go to Teardown
# NOT stagnant yet — re-measure next cycle (external changes, new harvested work)
log "Re-measuring next cycle in case conditions changed..."
continue loop # → Step 1 (kill switch check)
# We have failing goals — reset idle streak
evolve_state.idle_streak = 0
# Sort by weight (highest priority first)
failing_goals.sort(by=weight, descending)
# Simple strike check: skip goals that failed the last 3 consecutive cycles
for goal in failing_goals:
recent = last_3_cycles_for(goal.id)
if all(r.result == "regressed" for r in recent):
log "Skipping {goal.id}: regressed 3 consecutive cycles. Needs human attention."
continue
selected = goal
break
if no goal selected:
log "All failing goals have regressed 3+ times. Human intervention needed."
STOP → go to Teardown--dry-runlog "Dry run: would work on '{selected.id}' (weight: {selected.weight})"
log "Description: {selected.description}"
log "Check command: {selected.check}"
# Also show queued harvested work (filtered to current repo)
if [ -f .agents/rpi/next-work.jsonl ]; then
all_items = read_unconsumed(next-work.jsonl)
items = [i for i in all_items
if i.target_repo in (CURRENT_REPO, "*", None)]
if items:
log "Harvested work queue ({len(items)} items):"
for item in items:
log " - [{item.severity}] {item.title} ({item.type})"
STOP → go to Teardown/rpi "Improve {selected.id}: {selected.description}" --auto --max-cycles=1 --test-first # if --test-first set
/rpi "Improve {selected.id}: {selected.description}" --auto --max-cycles=1 # otherwise/research/plan/pre-mortem/crank/vibe/post-mortemao forgefitness-{CYCLE}-post.jsonfitness-{CYCLE}.jsonfitness-{CYCLE}-post.json# Load pre-cycle results
pre_results = load("fitness-{CYCLE}.json")
# Re-measure ALL goals (writes fitness-{CYCLE}-post.json)
post_results = MEASURE_FITNESS()
# Check the target goal
if selected_goal.post_result == "pass":
outcome = "improved"
else:
outcome = "unchanged"
# FULL REGRESSION CHECK: compare ALL goals, not just the target
newly_failing = []
for goal in post_results.goals:
pre = pre_results.find(goal.id)
if pre.result == "pass" and goal.result == "fail":
newly_failing.append(goal.id)
if newly_failing:
outcome = "regressed"
log "REGRESSION: {newly_failing} started failing after fixing {selected.id}"
# Multi-commit revert using cycle start SHA
cycle_start_sha = pre_results.cycle_start_sha
commit_count = $(git rev-list --count ${cycle_start_sha}..HEAD)
if commit_count == 0:
log "No commits to revert"
elif commit_count == 1:
git revert HEAD --no-edit
else:
git revert --no-commit ${cycle_start_sha}..HEAD
git commit -m "revert: evolve cycle ${CYCLE} regression in {newly_failing}"
log "Reverted ${commit_count} commits. Moving to next goal."fitness-{CYCLE}-post.json.agents/evolve/cycle-history.jsonl{"cycle": 1, "goal_id": "test-pass-rate", "result": "improved", "commit_sha": "abc1234", "timestamp": "2026-02-11T21:00:00Z"}
{"cycle": 2, "goal_id": "doc-coverage", "result": "regressed", "commit_sha": "def5678", "reverted_to": "abc1234", "timestamp": "2026-02-11T21:30:00Z"}evolve_state.cycle += 1
# Only stop for max-cycles if the user explicitly set one
if evolve_state.max_cycles != Infinity and evolve_state.cycle >= evolve_state.max_cycles:
log "Max cycles ({max_cycles}) reached."
STOP → go to Teardown
# Otherwise: loop back to Step 1 (kill switch check) — run forever/post-mortem "evolve session: $CYCLE cycles, goals improved: X, harvested: Y"next-work.jsonl/evolve# Check if both baseline and final snapshot exist
if [ -f .agents/evolve/fitness-0-baseline.json ] && [ -f .agents/evolve/fitness-${CYCLE}.json ]; then
baseline = load(".agents/evolve/fitness-0-baseline.json")
final = load(".agents/evolve/fitness-${CYCLE}.json")
# Compute delta — goals that flipped between baseline and final
improved_count = 0
regressed_count = 0
unchanged_count = 0
delta_rows = []
for final_goal in final.goals:
baseline_goal = baseline.goals.find(g => g.id == final_goal.id)
baseline_result = baseline_goal ? baseline_goal.result : "unknown"
final_result = final_goal.result
if baseline_result == "fail" and final_result == "pass":
delta = "improved"
improved_count += 1
elif baseline_result == "pass" and final_result == "fail":
delta = "regressed"
regressed_count += 1
else:
delta = "unchanged"
unchanged_count += 1
delta_rows.append({goal_id: final_goal.id, baseline_result, final_result, delta})
# Write session-fitness-delta.md with trajectory table
cat > .agents/evolve/session-fitness-delta.md << EOF
# Session Fitness Trajectory
| goal_id | baseline_result | final_result | delta |
|---------|-----------------|--------------|-------|
$(for row in delta_rows: "| ${row.goal_id} | ${row.baseline_result} | ${row.final_result} | ${row.delta} |")
**Summary:** ${improved_count} improved, ${regressed_count} regressed, ${unchanged_count} unchanged
EOF
# Include delta summary in user-facing teardown report
log "Fitness trajectory: ${improved_count} improved, ${regressed_count} regressed, ${unchanged_count} unchanged"
ficat > .agents/evolve/session-summary.md << EOF
# /evolve Session Summary
**Date:** $(date -Iseconds)
**Cycles:** $CYCLE of $MAX_CYCLES
**Goals measured:** $(wc -l < GOALS.yaml goals)
## Cycle History
$(cat .agents/evolve/cycle-history.jsonl)
## Final Fitness
$(cat .agents/evolve/fitness-${CYCLE}.json)
## Post-Mortem
<path to post-mortem report from above>
## Next Steps
- Run \`/evolve\` again to continue improving
- Run \`/evolve --dry-run\` to check current fitness without executing
- Create \`~/.config/evolve/KILL\` to prevent future runs
- Create \`.agents/evolve/STOP\` for a one-time local stop
EOF## /evolve Complete
Cycles: N of M
Goals improved: X
Goals regressed: Y (reverted)
Goals unchanged: Z
Post-mortem: <verdict> (see <report-path>)
Run `/evolve` again to continue improving.references/compounding.md| File | Purpose | Who Creates It |
|---|---|---|
| Permanent stop (outside repo) | Human |
| One-time local stop | Human or automation |
echo "Taking a break" > ~/.config/evolve/KILL # Permanent
echo "done for today" > .agents/evolve/STOP # Local, one-timerm ~/.config/evolve/KILL
rm .agents/evolve/STOP| Flag | Default | Description |
|---|---|---|
| unlimited | Optional hard cap. Without this, loop runs forever. |
| off | Pass |
| off | Measure fitness and show plan, don't execute |
| off | Skip cycle-0 baseline sweep |
references/goals-schema.mdreferences/artifacts.mdreferences/examples.md| Problem | Cause | Solution |
|---|---|---|
| Kill switch file exists | Remove |
| "No goals to measure" error | GOALS.yaml missing or empty | Create GOALS.yaml in repo root with fitness goals (see references/goals-schema.md) |
| Cycle completes but fitness unchanged | Goal check command is always passing or always failing | Verify check command logic in GOALS.yaml produces exit code 0 (pass) or non-zero (fail) |
| Regression revert fails | Multiple commits in cycle or uncommitted changes | Check cycle-start SHA in fitness snapshot, commit or stash changes before retrying |
| Harvested work never consumed | All goals passing but | Check file exists and has |
| Loop stops after N cycles | | Omit |
skills/rpi/SKILL.mdskills/vibe/SKILL.mdskills/council/SKILL.mdGOALS.yaml