project-orchestrator/agents.toml

# project-orchestrator — agent-orchestrator harness config (this project's ONLY config).
#
# The PO is just a project that uses the agent-orchestrator harness (vendored as the `engine/`
# submodule). What makes it the project-orchestrator is its *job* — fleet management — not its
# architecture. Run it by hand exactly like any other project:
#
#   nix develop -c python3 engine/agents.py status
#   nix develop -c python3 engine/agents.py up           # start the PO agent + watchdog
#   nix develop -c python3 engine/agents.py down
#
# Runtime state (resume ids, limit windows) lives under <log_dir>/state/, NOT here. There is NO
# fleet/PO metadata in any project's config — the fleet lives only in this repo's fleet.toml.

# ─────────────────────────── global watchdog cadence ───────────────────────────
[watchdog]
signal_interval      = 30      # s between handoff / stall / limit checks (light)
heavy_interval       = 300     # s between heal / phase-advance checks
limit_probe_fallback = 300     # flat probe cadence when a reset time can't be parsed
limit_reset_slack    = 45      # s past a parsed reset before probing
stall_grace          = 180     # s of slack past a WAITING-UNTIL marker before a stall reboot

# ─────────────────────────── defaults inherited by every agent ───────────────────────────
[defaults]
session_prefix = "po-"           # REQUIRED — tmux namespace for the project-orchestrator project
log_dir        = ".ao-state"     # REQUIRED — logs + state/, resolved relative to this file
backend        = "claude"
model          = "claude-opus-4-8"
watch          = "heal"          # none | heal | heal+stall

# ─────────────────────────── backends (declared as data) ───────────────────────────
[backend.claude]
bin             = "claude"
flags           = "--dangerously-skip-permissions"
remote_control  = true
supports_resume = true
prompt_delivery = "arg"
process_name    = "claude"
submit_key      = "Enter"
stall_idle      = 300
active_re = "esc to interrupt|Running tool|⠇|⠙|· \\d+"
limit_re  = "spend limit|usage limit|limit reached|reached your .*limit|out of (credits|tokens)"
fatal_re  = "redacted_thinking|blocks cannot be modified|cannot be modified"

# ─────────────────────────── the PO agent ───────────────────────────
# A single persistent fleet-management agent is enough to start (the plan: "add a loop only if
# useful"). It is NOT a build loop — it manages a fleet of *other* projects: create / start / stop
# / update / list / status, reading each project's harness docs to work out how to drive it. Its
# startup prompt lives in prompts/. It is operator-driven: NO periodic wake — the PO manages
# projects on request, it does not watch them live.

[[agent]]
name        = "project-orchestrator"   # tmux session: po-project-orchestrator
kind        = "persistent"
backend     = "claude"
model       = "claude-opus-4-8"
resume      = true                      # resume its session across restarts (--resume <state id>)
watch       = "heal"                    # recover-if-dead (crash/wedge/wrong-backend); never reboot for idle
prompt_file = "prompts/orchestrator.md" # startup prompt: read your role + fleet, then report
# no `wake`: the watchdog sends NO periodic prompts. It heals a dead session but never nudges a live one.