The PO's job is to manage projects on request, not watch them live. Remove the hourly wake/sweep entirely: - agents.toml: watch="heal" (recover-if-dead), no `wake` field - prompts/supervise.md: deleted - prompts/orchestrator.md, README.md, docs/bootstrap.md, docs/manage-projects.md: drop sweep/wake references; document operator-driven, no periodic sweep Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
60 lines
3.6 KiB
TOML
60 lines
3.6 KiB
TOML
# project-orchestrator — agent-orchestrator harness config (this project's ONLY config).
|
|
#
|
|
# The PO is just a project that uses the agent-orchestrator harness (vendored as the `engine/`
|
|
# submodule). What makes it the project-orchestrator is its *job* — fleet management — not its
|
|
# architecture. Run it by hand exactly like any other project:
|
|
#
|
|
# nix develop -c python3 engine/agents.py status
|
|
# nix develop -c python3 engine/agents.py up # start the PO agent + watchdog
|
|
# nix develop -c python3 engine/agents.py down
|
|
#
|
|
# Runtime state (resume ids, limit windows) lives under <log_dir>/state/, NOT here. There is NO
|
|
# fleet/PO metadata in any project's config — the fleet lives only in this repo's fleet.toml.
|
|
|
|
# ─────────────────────────── global watchdog cadence ───────────────────────────
|
|
[watchdog]
|
|
signal_interval = 30 # s between handoff / stall / limit checks (light)
|
|
heavy_interval = 300 # s between heal / phase-advance checks
|
|
limit_probe_fallback = 300 # flat probe cadence when a reset time can't be parsed
|
|
limit_reset_slack = 45 # s past a parsed reset before probing
|
|
stall_grace = 180 # s of slack past a WAITING-UNTIL marker before a stall reboot
|
|
|
|
# ─────────────────────────── defaults inherited by every agent ───────────────────────────
|
|
[defaults]
|
|
session_prefix = "po-" # REQUIRED — tmux namespace for the project-orchestrator project
|
|
log_dir = ".ao-state" # REQUIRED — logs + state/, resolved relative to this file
|
|
backend = "claude"
|
|
model = "claude-opus-4-8"
|
|
watch = "heal" # none | heal | heal+stall
|
|
|
|
# ─────────────────────────── backends (declared as data) ───────────────────────────
|
|
[backend.claude]
|
|
bin = "claude"
|
|
flags = "--dangerously-skip-permissions"
|
|
remote_control = true
|
|
supports_resume = true
|
|
prompt_delivery = "arg"
|
|
process_name = "claude"
|
|
submit_key = "Enter"
|
|
stall_idle = 300
|
|
active_re = "esc to interrupt|Running tool|⠇|⠙|· \\d+"
|
|
limit_re = "spend limit|usage limit|limit reached|reached your .*limit|out of (credits|tokens)"
|
|
fatal_re = "redacted_thinking|blocks cannot be modified|cannot be modified"
|
|
|
|
# ─────────────────────────── the PO agent ───────────────────────────
|
|
# A single persistent fleet-management agent is enough to start (the plan: "add a loop only if
|
|
# useful"). It is NOT a build loop — it manages a fleet of *other* projects: create / start / stop
|
|
# / update / list / status, reading each project's harness docs to work out how to drive it. Its
|
|
# startup prompt lives in prompts/. It is operator-driven: NO periodic wake — the PO manages
|
|
# projects on request, it does not watch them live.
|
|
|
|
[[agent]]
|
|
name = "project-orchestrator" # tmux session: po-project-orchestrator
|
|
kind = "persistent"
|
|
backend = "claude"
|
|
model = "claude-opus-4-8"
|
|
resume = true # resume its session across restarts (--resume <state id>)
|
|
watch = "heal" # recover-if-dead (crash/wedge/wrong-backend); never reboot for idle
|
|
prompt_file = "prompts/orchestrator.md" # startup prompt: read your role + fleet, then report
|
|
# no `wake`: the watchdog sends NO periodic prompts. It heals a dead session but never nudges a live one.
|