# examples/builder-adversary — a Builder/Adversary loop pair (the cc-ci pattern, generic). # # Two independent agent loops that coordinate ONLY through a git repo: # • Builder — does the work, claims each gate when it believes a Definition-of-Done item is met. # • Adversary — DISBELIEVES the Builder; cold-verifies every claim from its own clone, PASS/FAIL. # A persistent Orchestrator supervises; a one-shot Reporter runs on completion. The watchdog keeps # them alive, paced, and signals the handoff (claim(…) → ping Adversary, review(…) → ping Builder). # # This is the same shape cc-ci runs in production, stripped to a small self-contained task: build a # `wc` CLI (see plans/). Nothing here is project-orchestrator/fleet aware — it is a plain project. # # Run it by hand (status starts nothing): # python3 ../../agents.py status --config agents.toml # python3 ../../agents.py up --config agents.toml # needs `claude` on PATH # python3 ../../agents.py down --config agents.toml # To exercise the mechanics with no agent CLI, set defaults.backend = "demo" (idles, no real work). # ─────────────────────────── global watchdog cadence ─────────────────────────── [watchdog] signal_interval = 30 # s between handoff / stall / limit checks (light) heavy_interval = 300 # s between heal / phase-advance checks limit_probe_fallback = 300 # flat probe cadence when a reset time can't be parsed limit_reset_slack = 45 # s past a parsed reset before probing stall_grace = 180 # s of slack past a WAITING-UNTIL marker before a stall reboot # ─────────────────────────── defaults inherited by every agent ─────────────────────────── [defaults] session_prefix = "ba-" # REQUIRED — tmux namespace (sessions: ba-builder, ba-adv, …) log_dir = ".ao-state" # REQUIRED — logs + state/, resolved relative to this file backend = "claude" # set to "demo" for a dependency-free mechanics-only run model = "claude-sonnet-4-6" watch = "heal" # none | heal | heal+stall # ─────────────────────────── backends (declared as data) ─────────────────────────── [backend.claude] bin = "claude" flags = "--dangerously-skip-permissions" remote_control = true supports_resume = true prompt_delivery = "arg" # full prompt passed as a CLI argument process_name = "claude" # enables backend-mismatch healing submit_key = "Enter" stall_idle = 300 active_re = "esc to interrupt|Running tool|⠇|⠙|· \\d+" limit_re = "spend limit|usage limit|limit reached|reached your .*limit|out of (credits|tokens)" fatal_re = "redacted_thinking|blocks cannot be modified|cannot be modified" [backend.demo] # dependency-free: a shell that just idles (no real work) bin = "echo '[demo] {session} up (kickoff: {kickoff})'; exec sleep 1000000" prompt_delivery = "exec" # ─────────────────────────── agents ─────────────────────────── # The loop pair is the star. The work repo (handoff.repo, below) is what they build in; for TRUE # cold-verification give each loop its OWN clone of that repo (see README "Isolation"). Here both # default to ./work for a single-host quick start. [[agent]] name = "builder" # tmux session: ba-builder kind = "loop" # kickoff = prompts/kickoff.md (per phase) + prompts/builder.md role = "builder" dir = "./work" # the Builder's working clone of the work repo watch = "heal+stall" # restart if dead/wedged AND if idle past stall_idle (respects WAITING-UNTIL) [[agent]] name = "adversary" session = "ba-adv" # abbreviated session name (handy in logs / remote-control) kind = "loop" role = "adversary" dir = "./work-adv" # the Adversary's SEPARATE clone — it verifies from a cold start watch = "heal+stall" [[agent]] name = "orchestrator" # tmux session: ba-orchestrator kind = "persistent" model = "claude-opus-4-8" resume = true # claude --resume watch = "heal" # keep it alive/healed; never stall-reboot a persistent supervisor prompt = """ You supervise this Builder/Adversary project. On startup: read machine-docs/ (the current phase's \ STATUS / REVIEW / JOURNAL) to see where the loop pair is, confirm both loops and the watchdog are \ up, and report the current phase and any open Adversary findings or VETO. Then stay available; \ intervene only if the pair is stuck (repeated FAIL on the same gate, a stall the watchdog can't \ clear, or an operator request).""" # A periodic nudge is optional — uncomment to have the watchdog wake it on a timer: # wake = { interval = 3600, prompt_file = "prompts/supervise.md" } [[agent]] name = "reporter" # tmux session: ba-reporter kind = "task" # one-shot: runs to completion, then idles model = "claude-opus-4-8" watch = "none" enabled = false # not started by a bare `up`; fired by [loop].on_complete below prompt = """ The phase sequence is complete. Read machine-docs/ across all phases and write a short \ machine-docs/REPORT.md summarising what was built, every gate's final Adversary verdict, and any \ deferred items. Then go idle.""" # Non-AI helper service (tail + render the loop transcripts). Started by `up`, killed by `down`. [[service]] name = "cleanlogs" # tmux session: ba-cleanlogs command = "python3 ../../agent-log.py follow-all" dir = "." # ─────────────────────────── the phase machine (kind="loop" agents) ─────────────────────────── [loop] state_file = "phase-idx" # under /state/ resume_phase = true # keep the current index across restarts (don't reset to 0) auto_advance = true # advance when the phase's status file shows the done_marker done_marker = "## DONE" kickoff_template = "prompts/kickoff.md" # phase preamble; slots {phase_id}/{plan}/{status}/{role} roles_dir = "prompts" # role prompt = prompts/.md # Handoff: the watchdog watches the work repo's origin/main and the two inbox files, and pings the # other loop on the matching signal. claim(…) commits → ping Adversary; review(…) → ping Builder. handoff = { repo = "./work", claim_pings = "adversary", review_pings = "builder", inboxes = ["ADVERSARY-INBOX.md", "BUILDER-INBOX.md"], claim_pattern = "^claim", review_pattern = "^review", state_subdir = "machine-docs" } # When the last phase completes, fire the one-shot reporter (its trigger file under ). on_complete = { trigger_file = ".run-report-on-complete", run = "reporter" } # Phase sequence. Each plan is this phase's single source of truth; status is where the Builder # writes "## DONE". The second phase shows a per-phase model override (Builder on opus for it). phases = [ { id = "wc", plan = "plans/wc.md", status = "STATUS-wc.md" }, { id = "json", plan = "plans/json.md", status = "STATUS-json.md", models = { builder = "claude-opus-4-8" } }, ]