agent-orchestrator/examples/builder-adversary/agents.toml

# examples/builder-adversary — a Builder/Adversary loop pair (the cc-ci pattern, generic).
#
# Two independent agent loops that coordinate ONLY through a git repo:
#   • Builder   — does the work, claims each gate when it believes a Definition-of-Done item is met.
#   • Adversary — DISBELIEVES the Builder; cold-verifies every claim from its own clone, PASS/FAIL.
# A persistent Orchestrator supervises; a one-shot Reporter runs on completion. The watchdog keeps
# them alive, paced, and signals the handoff (claim(…) → ping Adversary, review(…) → ping Builder).
#
# This is the same shape cc-ci runs in production, stripped to a small self-contained task: build a
# `wc` CLI (see plans/). Nothing here is project-orchestrator/fleet aware — it is a plain project.
#
# Run it by hand (status starts nothing):
#   python3 ../../agents.py status --config agents.toml
#   python3 ../../agents.py up     --config agents.toml      # needs `claude` on PATH
#   python3 ../../agents.py down   --config agents.toml
# To exercise the mechanics with no agent CLI, set defaults.backend = "demo" (idles, no real work).

# ─────────────────────────── global watchdog cadence ───────────────────────────
[watchdog]
signal_interval      = 30      # s between handoff / stall / limit checks (light)
heavy_interval       = 300     # s between heal / phase-advance checks
limit_probe_fallback = 300     # flat probe cadence when a reset time can't be parsed
limit_reset_slack    = 45      # s past a parsed reset before probing
stall_grace          = 180     # s of slack past a WAITING-UNTIL marker before a stall reboot

# ─────────────────────────── defaults inherited by every agent ───────────────────────────
[defaults]
session_prefix = "ba-"           # REQUIRED — tmux namespace (sessions: ba-builder, ba-adv, …)
log_dir        = ".ao-state"     # REQUIRED — logs + state/, resolved relative to this file
backend        = "claude"        # set to "demo" for a dependency-free mechanics-only run
model          = "claude-sonnet-4-6"
watch          = "heal"          # none | heal | heal+stall

# ─────────────────────────── backends (declared as data) ───────────────────────────
[backend.claude]
bin             = "claude"
flags           = "--dangerously-skip-permissions"
remote_control  = true
supports_resume = true
prompt_delivery = "arg"          # full prompt passed as a CLI argument
process_name    = "claude"       # enables backend-mismatch healing
submit_key      = "Enter"
stall_idle      = 300
active_re = "esc to interrupt|Running tool|⠇|⠙|· \\d+"
limit_re  = "spend limit|usage limit|limit reached|reached your .*limit|out of (credits|tokens)"
fatal_re  = "redacted_thinking|blocks cannot be modified|cannot be modified"

[backend.demo]                   # dependency-free: a shell that just idles (no real work)
bin             = "echo '[demo] {session} up (kickoff: {kickoff})'; exec sleep 1000000"
prompt_delivery = "exec"

# ─────────────────────────── agents ───────────────────────────
# The loop pair is the star. The work repo (handoff.repo, below) is what they build in; for TRUE
# cold-verification give each loop its OWN clone of that repo (see README "Isolation"). Here both
# default to ./work for a single-host quick start.

[[agent]]
name  = "builder"                # tmux session: ba-builder
kind  = "loop"                   # kickoff = prompts/kickoff.md (per phase) + prompts/builder.md
role  = "builder"
dir   = "./work"                 # the Builder's working clone of the work repo
watch = "heal+stall"             # restart if dead/wedged AND if idle past stall_idle (respects WAITING-UNTIL)

[[agent]]
name    = "adversary"
session = "ba-adv"               # abbreviated session name (handy in logs / remote-control)
kind    = "loop"
role    = "adversary"
dir     = "./work-adv"           # the Adversary's SEPARATE clone — it verifies from a cold start
watch   = "heal+stall"

[[agent]]
name    = "orchestrator"         # tmux session: ba-orchestrator
kind    = "persistent"
model   = "claude-opus-4-8"
resume  = true                   # claude --resume <state/orchestrator.id>
watch   = "heal"                 # keep it alive/healed; never stall-reboot a persistent supervisor
prompt  = """
You supervise this Builder/Adversary project. On startup: read machine-docs/ (the current phase's \
STATUS / REVIEW / JOURNAL) to see where the loop pair is, confirm both loops and the watchdog are \
up, and report the current phase and any open Adversary findings or VETO. Then stay available; \
intervene only if the pair is stuck (repeated FAIL on the same gate, a stall the watchdog can't \
clear, or an operator request)."""
# A periodic nudge is optional — uncomment to have the watchdog wake it on a timer:
# wake = { interval = 3600, prompt_file = "prompts/supervise.md" }

[[agent]]
name    = "reporter"             # tmux session: ba-reporter
kind    = "task"                 # one-shot: runs to completion, then idles
model   = "claude-opus-4-8"
watch   = "none"
enabled = false                  # not started by a bare `up`; fired by [loop].on_complete below
prompt  = """
The phase sequence is complete. Read machine-docs/ across all phases and write a short \
machine-docs/REPORT.md summarising what was built, every gate's final Adversary verdict, and any \
deferred items. Then go idle."""

# Non-AI helper service (tail + render the loop transcripts). Started by `up`, killed by `down`.
[[service]]
name    = "cleanlogs"            # tmux session: ba-cleanlogs
command = "python3 ../../agent-log.py follow-all"
dir     = "."

# ─────────────────────────── the phase machine (kind="loop" agents) ───────────────────────────
[loop]
state_file       = "phase-idx"          # under <log_dir>/state/
resume_phase     = true                 # keep the current index across restarts (don't reset to 0)
auto_advance     = true                 # advance when the phase's status file shows the done_marker
done_marker      = "## DONE"
kickoff_template = "prompts/kickoff.md" # phase preamble; slots {phase_id}/{plan}/{status}/{role}
roles_dir        = "prompts"            # role prompt = prompts/<role>.md

# Handoff: the watchdog watches the work repo's origin/main and the two inbox files, and pings the
# other loop on the matching signal. claim(…) commits → ping Adversary; review(…) → ping Builder.
handoff = { repo = "./work", claim_pings = "adversary", review_pings = "builder", inboxes = ["ADVERSARY-INBOX.md", "BUILDER-INBOX.md"], claim_pattern = "^claim", review_pattern = "^review", state_subdir = "machine-docs" }

# When the last phase completes, fire the one-shot reporter (its trigger file under <log_dir>).
on_complete = { trigger_file = ".run-report-on-complete", run = "reporter" }

# Phase sequence. Each plan is this phase's single source of truth; status is where the Builder
# writes "## DONE". The second phase shows a per-phase model override (Builder on opus for it).
phases = [
  { id = "wc",   plan = "plans/wc.md",   status = "STATUS-wc.md" },
  { id = "json", plan = "plans/json.md", status = "STATUS-json.md", models = { builder = "claude-opus-4-8" } },
]