A self-contained examples/builder-adversary/ that distills the cc-ci production loop pair into a tiny, fully-local task (build a `wc` CLI in two phases): - agents.toml: builder + adversary loops, persistent orchestrator, on_complete reporter, cleanlogs service; phase machine with a per-phase model override - prompts/: kickoff template + builder/adversary roles carrying the load-bearing protocol (claim()/review() handoff, machine-docs file-location rule, WHAT+HOW+EXPECTED+WHERE=STATUS / WHY=JOURNAL anti-anchoring, WAITING-UNTIL liveness) - plans/: two phase plans (wc, json) each with a cold-verifiable Definition of Done - README: how to run, the work-repo two-clone isolation model, how to adapt Verified: `agents.py status --config agents.toml` parses and lists all agents. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
126 lines
7.5 KiB
TOML
126 lines
7.5 KiB
TOML
# examples/builder-adversary — a Builder/Adversary loop pair (the cc-ci pattern, generic).
|
|
#
|
|
# Two independent agent loops that coordinate ONLY through a git repo:
|
|
# • Builder — does the work, claims each gate when it believes a Definition-of-Done item is met.
|
|
# • Adversary — DISBELIEVES the Builder; cold-verifies every claim from its own clone, PASS/FAIL.
|
|
# A persistent Orchestrator supervises; a one-shot Reporter runs on completion. The watchdog keeps
|
|
# them alive, paced, and signals the handoff (claim(…) → ping Adversary, review(…) → ping Builder).
|
|
#
|
|
# This is the same shape cc-ci runs in production, stripped to a small self-contained task: build a
|
|
# `wc` CLI (see plans/). Nothing here is project-orchestrator/fleet aware — it is a plain project.
|
|
#
|
|
# Run it by hand (status starts nothing):
|
|
# python3 ../../agents.py status --config agents.toml
|
|
# python3 ../../agents.py up --config agents.toml # needs `claude` on PATH
|
|
# python3 ../../agents.py down --config agents.toml
|
|
# To exercise the mechanics with no agent CLI, set defaults.backend = "demo" (idles, no real work).
|
|
|
|
# ─────────────────────────── global watchdog cadence ───────────────────────────
|
|
[watchdog]
|
|
signal_interval = 30 # s between handoff / stall / limit checks (light)
|
|
heavy_interval = 300 # s between heal / phase-advance checks
|
|
limit_probe_fallback = 300 # flat probe cadence when a reset time can't be parsed
|
|
limit_reset_slack = 45 # s past a parsed reset before probing
|
|
stall_grace = 180 # s of slack past a WAITING-UNTIL marker before a stall reboot
|
|
|
|
# ─────────────────────────── defaults inherited by every agent ───────────────────────────
|
|
[defaults]
|
|
session_prefix = "ba-" # REQUIRED — tmux namespace (sessions: ba-builder, ba-adv, …)
|
|
log_dir = ".ao-state" # REQUIRED — logs + state/, resolved relative to this file
|
|
backend = "claude" # set to "demo" for a dependency-free mechanics-only run
|
|
model = "claude-sonnet-4-6"
|
|
watch = "heal" # none | heal | heal+stall
|
|
|
|
# ─────────────────────────── backends (declared as data) ───────────────────────────
|
|
[backend.claude]
|
|
bin = "claude"
|
|
flags = "--dangerously-skip-permissions"
|
|
remote_control = true
|
|
supports_resume = true
|
|
prompt_delivery = "arg" # full prompt passed as a CLI argument
|
|
process_name = "claude" # enables backend-mismatch healing
|
|
submit_key = "Enter"
|
|
stall_idle = 300
|
|
active_re = "esc to interrupt|Running tool|⠇|⠙|· \\d+"
|
|
limit_re = "spend limit|usage limit|limit reached|reached your .*limit|out of (credits|tokens)"
|
|
fatal_re = "redacted_thinking|blocks cannot be modified|cannot be modified"
|
|
|
|
[backend.demo] # dependency-free: a shell that just idles (no real work)
|
|
bin = "echo '[demo] {session} up (kickoff: {kickoff})'; exec sleep 1000000"
|
|
prompt_delivery = "exec"
|
|
|
|
# ─────────────────────────── agents ───────────────────────────
|
|
# The loop pair is the star. The work repo (handoff.repo, below) is what they build in; for TRUE
|
|
# cold-verification give each loop its OWN clone of that repo (see README "Isolation"). Here both
|
|
# default to ./work for a single-host quick start.
|
|
|
|
[[agent]]
|
|
name = "builder" # tmux session: ba-builder
|
|
kind = "loop" # kickoff = prompts/kickoff.md (per phase) + prompts/builder.md
|
|
role = "builder"
|
|
dir = "./work" # the Builder's working clone of the work repo
|
|
watch = "heal+stall" # restart if dead/wedged AND if idle past stall_idle (respects WAITING-UNTIL)
|
|
|
|
[[agent]]
|
|
name = "adversary"
|
|
session = "ba-adv" # abbreviated session name (handy in logs / remote-control)
|
|
kind = "loop"
|
|
role = "adversary"
|
|
dir = "./work-adv" # the Adversary's SEPARATE clone — it verifies from a cold start
|
|
watch = "heal+stall"
|
|
|
|
[[agent]]
|
|
name = "orchestrator" # tmux session: ba-orchestrator
|
|
kind = "persistent"
|
|
model = "claude-opus-4-8"
|
|
resume = true # claude --resume <state/orchestrator.id>
|
|
watch = "heal" # keep it alive/healed; never stall-reboot a persistent supervisor
|
|
prompt = """
|
|
You supervise this Builder/Adversary project. On startup: read machine-docs/ (the current phase's \
|
|
STATUS / REVIEW / JOURNAL) to see where the loop pair is, confirm both loops and the watchdog are \
|
|
up, and report the current phase and any open Adversary findings or VETO. Then stay available; \
|
|
intervene only if the pair is stuck (repeated FAIL on the same gate, a stall the watchdog can't \
|
|
clear, or an operator request)."""
|
|
# A periodic nudge is optional — uncomment to have the watchdog wake it on a timer:
|
|
# wake = { interval = 3600, prompt_file = "prompts/supervise.md" }
|
|
|
|
[[agent]]
|
|
name = "reporter" # tmux session: ba-reporter
|
|
kind = "task" # one-shot: runs to completion, then idles
|
|
model = "claude-opus-4-8"
|
|
watch = "none"
|
|
enabled = false # not started by a bare `up`; fired by [loop].on_complete below
|
|
prompt = """
|
|
The phase sequence is complete. Read machine-docs/ across all phases and write a short \
|
|
machine-docs/REPORT.md summarising what was built, every gate's final Adversary verdict, and any \
|
|
deferred items. Then go idle."""
|
|
|
|
# Non-AI helper service (tail + render the loop transcripts). Started by `up`, killed by `down`.
|
|
[[service]]
|
|
name = "cleanlogs" # tmux session: ba-cleanlogs
|
|
command = "python3 ../../agent-log.py follow-all"
|
|
dir = "."
|
|
|
|
# ─────────────────────────── the phase machine (kind="loop" agents) ───────────────────────────
|
|
[loop]
|
|
state_file = "phase-idx" # under <log_dir>/state/
|
|
resume_phase = true # keep the current index across restarts (don't reset to 0)
|
|
auto_advance = true # advance when the phase's status file shows the done_marker
|
|
done_marker = "## DONE"
|
|
kickoff_template = "prompts/kickoff.md" # phase preamble; slots {phase_id}/{plan}/{status}/{role}
|
|
roles_dir = "prompts" # role prompt = prompts/<role>.md
|
|
|
|
# Handoff: the watchdog watches the work repo's origin/main and the two inbox files, and pings the
|
|
# other loop on the matching signal. claim(…) commits → ping Adversary; review(…) → ping Builder.
|
|
handoff = { repo = "./work", claim_pings = "adversary", review_pings = "builder", inboxes = ["ADVERSARY-INBOX.md", "BUILDER-INBOX.md"], claim_pattern = "^claim", review_pattern = "^review", state_subdir = "machine-docs" }
|
|
|
|
# When the last phase completes, fire the one-shot reporter (its trigger file under <log_dir>).
|
|
on_complete = { trigger_file = ".run-report-on-complete", run = "reporter" }
|
|
|
|
# Phase sequence. Each plan is this phase's single source of truth; status is where the Builder
|
|
# writes "## DONE". The second phase shows a per-phase model override (Builder on opus for it).
|
|
phases = [
|
|
{ id = "wc", plan = "plans/wc.md", status = "STATUS-wc.md" },
|
|
{ id = "json", plan = "plans/json.md", status = "STATUS-json.md", models = { builder = "claude-opus-4-8" } },
|
|
]
|