Files
mfowler 7f237a522c docs(examples): add a Builder/Adversary loop-pair example (the cc-ci pattern)
A self-contained examples/builder-adversary/ that distills the cc-ci production
loop pair into a tiny, fully-local task (build a `wc` CLI in two phases):

- agents.toml: builder + adversary loops, persistent orchestrator, on_complete
  reporter, cleanlogs service; phase machine with a per-phase model override
- prompts/: kickoff template + builder/adversary roles carrying the load-bearing
  protocol (claim()/review() handoff, machine-docs file-location rule,
  WHAT+HOW+EXPECTED+WHERE=STATUS / WHY=JOURNAL anti-anchoring, WAITING-UNTIL liveness)
- plans/: two phase plans (wc, json) each with a cold-verifiable Definition of Done
- README: how to run, the work-repo two-clone isolation model, how to adapt

Verified: `agents.py status --config agents.toml` parses and lists all agents.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-14 17:50:42 +00:00

126 lines
7.5 KiB
TOML

# examples/builder-adversary — a Builder/Adversary loop pair (the cc-ci pattern, generic).
#
# Two independent agent loops that coordinate ONLY through a git repo:
# • Builder — does the work, claims each gate when it believes a Definition-of-Done item is met.
# • Adversary — DISBELIEVES the Builder; cold-verifies every claim from its own clone, PASS/FAIL.
# A persistent Orchestrator supervises; a one-shot Reporter runs on completion. The watchdog keeps
# them alive, paced, and signals the handoff (claim(…) → ping Adversary, review(…) → ping Builder).
#
# This is the same shape cc-ci runs in production, stripped to a small self-contained task: build a
# `wc` CLI (see plans/). Nothing here is project-orchestrator/fleet aware — it is a plain project.
#
# Run it by hand (status starts nothing):
# python3 ../../agents.py status --config agents.toml
# python3 ../../agents.py up --config agents.toml # needs `claude` on PATH
# python3 ../../agents.py down --config agents.toml
# To exercise the mechanics with no agent CLI, set defaults.backend = "demo" (idles, no real work).
# ─────────────────────────── global watchdog cadence ───────────────────────────
[watchdog]
signal_interval = 30 # s between handoff / stall / limit checks (light)
heavy_interval = 300 # s between heal / phase-advance checks
limit_probe_fallback = 300 # flat probe cadence when a reset time can't be parsed
limit_reset_slack = 45 # s past a parsed reset before probing
stall_grace = 180 # s of slack past a WAITING-UNTIL marker before a stall reboot
# ─────────────────────────── defaults inherited by every agent ───────────────────────────
[defaults]
session_prefix = "ba-" # REQUIRED — tmux namespace (sessions: ba-builder, ba-adv, …)
log_dir = ".ao-state" # REQUIRED — logs + state/, resolved relative to this file
backend = "claude" # set to "demo" for a dependency-free mechanics-only run
model = "claude-sonnet-4-6"
watch = "heal" # none | heal | heal+stall
# ─────────────────────────── backends (declared as data) ───────────────────────────
[backend.claude]
bin = "claude"
flags = "--dangerously-skip-permissions"
remote_control = true
supports_resume = true
prompt_delivery = "arg" # full prompt passed as a CLI argument
process_name = "claude" # enables backend-mismatch healing
submit_key = "Enter"
stall_idle = 300
active_re = "esc to interrupt|Running tool|⠇|⠙|· \\d+"
limit_re = "spend limit|usage limit|limit reached|reached your .*limit|out of (credits|tokens)"
fatal_re = "redacted_thinking|blocks cannot be modified|cannot be modified"
[backend.demo] # dependency-free: a shell that just idles (no real work)
bin = "echo '[demo] {session} up (kickoff: {kickoff})'; exec sleep 1000000"
prompt_delivery = "exec"
# ─────────────────────────── agents ───────────────────────────
# The loop pair is the star. The work repo (handoff.repo, below) is what they build in; for TRUE
# cold-verification give each loop its OWN clone of that repo (see README "Isolation"). Here both
# default to ./work for a single-host quick start.
[[agent]]
name = "builder" # tmux session: ba-builder
kind = "loop" # kickoff = prompts/kickoff.md (per phase) + prompts/builder.md
role = "builder"
dir = "./work" # the Builder's working clone of the work repo
watch = "heal+stall" # restart if dead/wedged AND if idle past stall_idle (respects WAITING-UNTIL)
[[agent]]
name = "adversary"
session = "ba-adv" # abbreviated session name (handy in logs / remote-control)
kind = "loop"
role = "adversary"
dir = "./work-adv" # the Adversary's SEPARATE clone — it verifies from a cold start
watch = "heal+stall"
[[agent]]
name = "orchestrator" # tmux session: ba-orchestrator
kind = "persistent"
model = "claude-opus-4-8"
resume = true # claude --resume <state/orchestrator.id>
watch = "heal" # keep it alive/healed; never stall-reboot a persistent supervisor
prompt = """
You supervise this Builder/Adversary project. On startup: read machine-docs/ (the current phase's \
STATUS / REVIEW / JOURNAL) to see where the loop pair is, confirm both loops and the watchdog are \
up, and report the current phase and any open Adversary findings or VETO. Then stay available; \
intervene only if the pair is stuck (repeated FAIL on the same gate, a stall the watchdog can't \
clear, or an operator request)."""
# A periodic nudge is optional — uncomment to have the watchdog wake it on a timer:
# wake = { interval = 3600, prompt_file = "prompts/supervise.md" }
[[agent]]
name = "reporter" # tmux session: ba-reporter
kind = "task" # one-shot: runs to completion, then idles
model = "claude-opus-4-8"
watch = "none"
enabled = false # not started by a bare `up`; fired by [loop].on_complete below
prompt = """
The phase sequence is complete. Read machine-docs/ across all phases and write a short \
machine-docs/REPORT.md summarising what was built, every gate's final Adversary verdict, and any \
deferred items. Then go idle."""
# Non-AI helper service (tail + render the loop transcripts). Started by `up`, killed by `down`.
[[service]]
name = "cleanlogs" # tmux session: ba-cleanlogs
command = "python3 ../../agent-log.py follow-all"
dir = "."
# ─────────────────────────── the phase machine (kind="loop" agents) ───────────────────────────
[loop]
state_file = "phase-idx" # under <log_dir>/state/
resume_phase = true # keep the current index across restarts (don't reset to 0)
auto_advance = true # advance when the phase's status file shows the done_marker
done_marker = "## DONE"
kickoff_template = "prompts/kickoff.md" # phase preamble; slots {phase_id}/{plan}/{status}/{role}
roles_dir = "prompts" # role prompt = prompts/<role>.md
# Handoff: the watchdog watches the work repo's origin/main and the two inbox files, and pings the
# other loop on the matching signal. claim(…) commits → ping Adversary; review(…) → ping Builder.
handoff = { repo = "./work", claim_pings = "adversary", review_pings = "builder", inboxes = ["ADVERSARY-INBOX.md", "BUILDER-INBOX.md"], claim_pattern = "^claim", review_pattern = "^review", state_subdir = "machine-docs" }
# When the last phase completes, fire the one-shot reporter (its trigger file under <log_dir>).
on_complete = { trigger_file = ".run-report-on-complete", run = "reporter" }
# Phase sequence. Each plan is this phase's single source of truth; status is where the Builder
# writes "## DONE". The second phase shows a per-phase model override (Builder on opus for it).
phases = [
{ id = "wc", plan = "plans/wc.md", status = "STATUS-wc.md" },
{ id = "json", plan = "plans/json.md", status = "STATUS-json.md", models = { builder = "claude-opus-4-8" } },
]