Loading...
Loading...
This skill should be used for multi-session autonomous agent work requiring progress checkpointing, failure recovery, and task dependency management. Triggers on '/harness' command, or when a task involves many subtasks needing progress persistence, sleep/resume cycles across context windows, recovery from mid-task failures with partial state, or distributed work across multiple agent sessions. Synthesized from Anthropic and OpenAI engineering practices for long-running agents.
npx skill4agent add stellarlinkco/myclaude harness/harness init <project-path> # Initialize harness files in project
/harness run # Start/resume the infinite loop
/harness status # Show current progress and stats
/harness add "task description" # Add a task to the list.harness-activeharness-tasks.jsonhooks/hooks.json/harness init/harness runtouch <project-path>/.harness-activerm <project-path>/.harness-active[2025-07-01T10:00:00Z] [SESSION-1] INIT Harness initialized for project /path/to/project
[2025-07-01T10:00:05Z] [SESSION-1] INIT Environment health check: PASS
[2025-07-01T10:00:10Z] [SESSION-1] LOCK acquired (pid=12345)
[2025-07-01T10:00:11Z] [SESSION-1] Starting [task-001] Implement user authentication (base=def5678)
[2025-07-01T10:05:00Z] [SESSION-1] CHECKPOINT [task-001] step=2/4 "auth routes created, tests pending"
[2025-07-01T10:15:30Z] [SESSION-1] Completed [task-001] (commit abc1234)
[2025-07-01T10:15:31Z] [SESSION-1] Starting [task-002] Add rate limiting (base=abc1234)
[2025-07-01T10:20:00Z] [SESSION-1] ERROR [task-002] [TASK_EXEC] Redis connection refused
[2025-07-01T10:20:01Z] [SESSION-1] ROLLBACK [task-002] git reset --hard abc1234
[2025-07-01T10:20:02Z] [SESSION-1] STATS tasks_total=5 completed=1 failed=1 pending=3 blocked=0 attempts_total=2 checkpoints=1{
"version": 2,
"created": "2025-07-01T10:00:00Z",
"session_config": {
"concurrency_mode": "exclusive",
"max_tasks_per_session": 20,
"max_sessions": 50
},
"tasks": [
{
"id": "task-001",
"title": "Implement user authentication",
"status": "completed",
"priority": "P0",
"depends_on": [],
"attempts": 1,
"max_attempts": 3,
"started_at_commit": "def5678",
"validation": {
"command": "npm test -- --testPathPattern=auth",
"timeout_seconds": 300
},
"on_failure": {
"cleanup": null
},
"error_log": [],
"checkpoints": [],
"completed_at": "2025-07-01T10:15:30Z"
},
{
"id": "task-002",
"title": "Add rate limiting",
"status": "failed",
"priority": "P1",
"depends_on": [],
"attempts": 1,
"max_attempts": 3,
"started_at_commit": "abc1234",
"validation": {
"command": "npm test -- --testPathPattern=rate-limit",
"timeout_seconds": 120
},
"on_failure": {
"cleanup": "docker compose down redis"
},
"error_log": ["[TASK_EXEC] Redis connection refused"],
"checkpoints": [],
"completed_at": null
},
{
"id": "task-003",
"title": "Add OAuth providers",
"status": "pending",
"priority": "P1",
"depends_on": ["task-001"],
"attempts": 0,
"max_attempts": 3,
"started_at_commit": null,
"validation": {
"command": "npm test -- --testPathPattern=oauth",
"timeout_seconds": 180
},
"on_failure": {
"cleanup": null
},
"error_log": [],
"checkpoints": [],
"completed_at": null
}
],
"session_count": 1,
"last_session": "2025-07-01T10:20:02Z"
}pendingin_progresscompletedfailedin_progressclaimed_bylease_expires_atSESSION-Nsession_countharness-tasks.jsonmkdir# Acquire lock (fail fast if another agent is running)
# Lock key must be stable even if invoked from a subdirectory.
ROOT="$PWD"
SEARCH="$PWD"
while [ "$SEARCH" != "/" ] && [ ! -f "$SEARCH/harness-tasks.json" ]; do
SEARCH="$(dirname "$SEARCH")"
done
if [ -f "$SEARCH/harness-tasks.json" ]; then
ROOT="$SEARCH"
fi
PWD_HASH="$(
printf '%s' "$ROOT" |
(shasum -a 256 2>/dev/null || sha256sum 2>/dev/null) |
awk '{print $1}' |
cut -c1-16
)"
LOCKDIR="/tmp/harness-${PWD_HASH:-unknown}.lock"
if ! mkdir "$LOCKDIR" 2>/dev/null; then
# Check if lock holder is still alive
LOCK_PID=$(cat "$LOCKDIR/pid" 2>/dev/null)
if [ -n "$LOCK_PID" ] && kill -0 "$LOCK_PID" 2>/dev/null; then
echo "ERROR: Another harness session is active (pid=$LOCK_PID)"; exit 1
fi
# Stale lock — atomically reclaim via mv to avoid TOCTOU race
STALE="$LOCKDIR.stale.$$"
if mv "$LOCKDIR" "$STALE" 2>/dev/null; then
rm -rf "$STALE"
mkdir "$LOCKDIR" || { echo "ERROR: Lock contention"; exit 1; }
echo "WARN: Removed stale lock${LOCK_PID:+ from pid=$LOCK_PID}"
else
echo "ERROR: Another agent reclaimed the lock"; exit 1
fi
fi
echo "$$" > "$LOCKDIR/pid"
trap 'rm -rf "$LOCKDIR"' EXIT[timestamp] [SESSION-N] LOCK acquired (pid=<PID>)[timestamp] [SESSION-N] LOCK releasedtrap EXITsession_config.concurrency_mode: "concurrent"harness-tasks.json.bak.tmpharness-progress.txtharness-tasks.jsonHARNESS_STATE_ROOT=/abs/path/to/state-rootstatus="in_progress"claimed_byHARNESS_WORKER_IDlease_expires_atin_progressgit reset --hardgit clean -fdharness-progress.txtharness-tasks.jsongit log --oneline -20git diff --statharness-init.shsession_countsession_countmax_sessionsdepends_onfailed[DEPENDENCY] Circular dependency detected: task-A -> task-B -> task-Adepends_ondepends_onfailedattempts >= max_attemptserror_log[DEPENDENCY]failed[DEPENDENCY] Blocked by failed task-XXXstatus: "pending"depends_oncompletedpriorityidstatus: "failed"attempts < max_attemptsdepends_oncompletedstarted_at_commitin_progressclaimed_bylease_expires_atStarting [<task-id>] <title> (base=<hash>)in_progress[timestamp] [SESSION-N] CHECKPOINT [task-id] step=M/N "description of what was done"checkpoints{ "step": M, "total": N, "description": "...", "timestamp": "ISO" }lease_expires_atvalidation.commandtimeoutgtimeoutvalidation.commandERROR [<task-id>] [CONFIG] Missing validation.commandcommand -v <binary>ENV_SETUPcompletedcompleted_atCompleted [<task-id>] (commit <hash>)attemptserror_logstarted_at_commitgit cat-file -t <hash>git reset --hard <started_at_commit>git clean -fdon_failure.cleanupERROR [<task-id>] [<category>] <message>failedmax_tasks_per_sessioncompletedfailedsession_config.max_tasks_per_sessionsession_config.max_sessionsstatus: "in_progress"claimed_bylease_expires_atgit diff --stat # Uncommitted changes?
git log --oneline -5 # Recent commits since task started?
git stash list # Any stashed work?checkpoints| Uncommitted? | Recent task commits? | Checkpoints? | Action |
|---|---|---|---|
| No | No | None | Mark |
| No | No | Some | Verify file state matches checkpoint claims. If files reflect checkpoint progress, resume from last step. If not, mark |
| No | Yes | Any | Run |
| Yes | No | Any | Run validation WITH uncommitted changes present. If passes → commit, mark |
| Yes | Yes | Any | Commit uncommitted changes, run |
[timestamp] [SESSION-N] RECOVERY [task-id] action="<action taken>" reason="<reason>"| Category | Default Recovery | Agent Action |
|---|---|---|
| Re-run init, then STOP if still failing | Run |
| STOP (requires human fix) | Log the config error precisely (file + field), then STOP. Do not guess or auto-mutate task metadata |
| Rollback via | Verify |
| Rollback via | Reset to |
| Kill process, execute cleanup, retry | Wrap validation with |
| Skip task, mark blocked | Log which dependency failed, mark task as |
| Use Context Window Recovery Protocol | New session assesses partial progress via Recovery Protocol — may result in completion or failure depending on validation |
harness-tasks.jsonharness-tasks.json.bakERROR [ENV_SETUP] harness-tasks.json corrupted and unrecoverableharness-tasks.jsonharness-tasks.json.bakharness-tasks.json.tmpmvharness-init.shharness-init.sh#!/bin/bash
set -e
npm install 2>/dev/null || pip install -r requirements.txt 2>/dev/null || true
curl -sf http://localhost:5432 >/dev/null 2>&1 || echo "WARN: DB not reachable"
npm test -- --bail --silent 2>/dev/null || echo "WARN: Smoke test failed"
echo "Environment health check complete"[ISO-timestamp] [SESSION-N] <TYPE> [task-id]? [category]? message[task-id][category]INITLOCKSTATSINITStartingCompletedERRORCHECKPOINTROLLBACKRECOVERYSTATSLOCKWARNENV_SETUPCONFIGTASK_EXECTEST_FAILTIMEOUTDEPENDENCYSESSION_TIMEOUTgrep "ERROR" harness-progress.txt # All errors
grep "ERROR" harness-progress.txt | grep "TASK_EXEC" # Execution errors only
grep "SESSION-3" harness-progress.txt # All session 3 activity
grep "STATS" harness-progress.txt # All session summaries
grep "CHECKPOINT" harness-progress.txt # All checkpoints
grep "RECOVERY" harness-progress.txt # All recovery actionsharness-tasks.jsonlast_sessionsession_count[timestamp] [SESSION-N] STATS tasks_total=10 completed=7 failed=1 pending=2 blocked=0 attempts_total=12 checkpoints=23blockeddepends_on/harness initharness-progress.txtharness-tasks.jsonsession_configharness-init.sh.gitignore/harness statusharness-tasks.jsonharness-progress.txtblockeddepends_on[status] task-id: title (attempts/max_attempts)harness-progress.txt/harness addharness-tasks.jsontask-NNNpendingmax_attempts: 3depends_onprioritydepends_onvalidation.commandtimeout_secondsgit worktree