From 289ef07df40a8264f3a36b4e91b923d1424c4658 Mon Sep 17 00:00:00 2001 From: autonomic-bot Date: Sat, 13 Jun 2026 18:39:00 +0000 Subject: [PATCH] =?UTF-8?q?feat:=20agent-orchestrator=20v0.1.0=20=E2=80=94?= =?UTF-8?q?=20generic=20multi-agent=20harness?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extracted and generalized from a project-specific agent launch engine. No project specifics remain in code: paths, the loop kickoff preamble, handoff conventions, and the on-complete hook are all config/template driven; session_prefix + log_dir are required. - agents.py: driver + watchdog (data-driven backends via prompt_delivery arg|ping|exec; required session_prefix/log_dir; project-rooted path resolution; configurable kickoff template, handoff patterns, on_complete task; tmux-safe; selftest + init verbs) - agent-log.py: config-driven claude transcript renderer - agents.example.toml: self-contained 2-agent example (dependency-free demo backend) - prompts/: generic builder/adversary/kickoff templates - smoke.sh: isolated up+down sandbox proof that cleans up after itself - flake.nix/.lock: devShell (python311 + tmux + git) - README.md: schema + verbs + AI-PO usage + nix Co-Authored-By: Claude Opus 4.8 --- .gitignore | 6 + README.md | 326 +++++++++++++++ agent-log.py | 221 ++++++++++ agents.example.toml | 109 +++++ agents.py | 929 +++++++++++++++++++++++++++++++++++++++++ examples/PLAN-demo1.md | 8 + examples/PLAN-demo2.md | 7 + flake.lock | 27 ++ flake.nix | 39 ++ prompts/adversary.md | 51 +++ prompts/builder.md | 63 +++ prompts/kickoff.md | 19 + smoke.sh | 89 ++++ 13 files changed, 1894 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100755 agent-log.py create mode 100644 agents.example.toml create mode 100755 agents.py create mode 100644 examples/PLAN-demo1.md create mode 100644 examples/PLAN-demo2.md create mode 100644 flake.lock create mode 100644 flake.nix create mode 100644 prompts/adversary.md create mode 100644 prompts/builder.md create mode 100644 prompts/kickoff.md create mode 100755 smoke.sh diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..835a235 --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +# runtime state + logs (never committed) +.ao-state/ +*.log +__pycache__/ +*.pyc +result diff --git a/README.md b/README.md new file mode 100644 index 0000000..4600d02 --- /dev/null +++ b/README.md @@ -0,0 +1,326 @@ +# agent-orchestrator + +A generic, reusable harness for running and supervising a fleet of AI-agent sessions in **tmux**. +One driver script + one declarative config (`agents.toml`) describe every agent — a Builder / +Adversary loop pair, a persistent supervisor, a one-shot task — and a **watchdog** keeps them +alive, healed, paced, and coordinated. The watchdog reads the same config every tick, so there is +never any env-vs-file drift. + +Nothing about any particular project lives in this repo. Paths, the loop **kickoff preamble**, the +**handoff conventions**, and the **on-complete** hook are all supplied by the project's config and +prompt files. A project consumes this repo as a pinned **git submodule** (`engine/`) and keeps its +own config, prompts, state, and tmux namespace — total isolation between projects. + +``` +agents.py the driver + watchdog (pure Python stdlib; needs python >= 3.11 for tomllib) +agent-log.py render claude JSONL transcripts into clean, greppable logs +agents.example.toml a self-contained 2-agent example project +prompts/ generic role + kickoff templates (builder / adversary / kickoff) +smoke.sh bring the example up + tear it down in an isolated sandbox, then clean up +flake.nix/.lock a Nix devShell with the runtime deps (python311, tmux, git) +``` + +--- + +## Quick start + +```bash +nix develop # python311 + tmux + git on PATH (see "Nix" below) + +python3 agents.py selftest # regression-test the activity detector (no config) +python3 agents.py status --config agents.example.toml # one table: every agent + the phase +./smoke.sh # prove up/down works end-to-end, isolated + clean + +python3 agents.py init myproject # scaffold a starter agents.toml + prompts/ +``` + +`up` is **use-or-create**: an already-running session is left alone, never double-started. + +```bash +python3 agents.py --config agents.toml up # start all enabled agents + services + watchdog +python3 agents.py --config agents.toml up builder # start just one agent (by name) +python3 agents.py --config agents.toml down # stop everything +python3 agents.py --config agents.toml logs builder # tail one session's log +python3 agents.py --config agents.toml phase show # where the loop phase machine is +``` + +`--config` defaults to `./agents.toml`, falling back to one next to `agents.py`. + +--- + +## The config: `agents.toml` + +Five section types: `[watchdog]`, `[backend.]`, `[defaults]`, `[[agent]]` / `[[service]]`, +and `[loop]`. See `agents.example.toml` for a complete, runnable example. + +### `[watchdog]` — global supervisor cadence + +```toml +[watchdog] +signal_interval = 30 # seconds between light checks (handoff / stall / limit) +heavy_interval = 300 # seconds between heal + phase-advance checks +limit_probe_fallback = 300 # re-probe cadence for a usage-limited agent when reset time is unparsable +limit_reset_slack = 45 # seconds to wait past a parsed reset before probing +stall_grace = 180 # seconds of slack past a WAITING-UNTIL marker before a stall reboot +``` + +### `[defaults]` — inherited by every agent + +```toml +[defaults] +session_prefix = "myproj-" # REQUIRED: tmux namespace for this project. No implicit default. +log_dir = ".ao-state" # REQUIRED: logs + state/. Relative paths resolve against the config dir. +backend = "claude" +model = "claude-sonnet-4-6" +dir = "." # default working dir for agents (relative → project dir) +watch = "heal" # none | heal | heal+stall +project_dir = "." # OPTIONAL: project root for resolving prompts/paths (default: config's dir) +``` + +`session_prefix` and `log_dir` are **required** — the harness has no project-specific fallbacks. +Every relative path (`log_dir`, an agent's `dir`, `handoff.repo`, prompt/template files) resolves +against `project_dir`, which defaults to the directory holding the config file. When the config +lives in a sandbox but the prompts live elsewhere (as `smoke.sh` does), set `project_dir` +explicitly. + +### `[backend.]` — backends declared as data + +A backend is fully described by config — no code change to add one. The one field that selects +behavior is `prompt_delivery`: + +| `prompt_delivery` | how the kickoff reaches the agent | example | +|---|---|---| +| `"arg"` | passed as a CLI argument (claude-style) | `claude … "$(cat kickoff)"` | +| `"ping"` | typed in after a TUI connects (opencode-style) | attach, wait, send-keys | +| `"exec"` | a plain command; the prompt is written to a file | generic / demo | + +```toml +[backend.claude] +bin = "claude" +flags = "--dangerously-skip-permissions" +remote_control = true # add a --remote-control flag +supports_resume = true # honor an agent's resume=true +prompt_delivery = "arg" +process_name = "claude" # the pane process a healthy session runs (backend-mismatch healing) +submit_key = "Enter" # key to submit a typed message +stall_idle = 300 # seconds idle before a heal+stall agent is rebooted +active_re = "esc to interrupt|Running tool|· \\d+" # pane shows the agent is WORKING +limit_re = "usage limit|limit reached|reached your .*limit" # usage/rate-limit banner +fatal_re = "redacted_thinking|cannot be modified" # unrecoverable session state → kill + restart + +[backend.opencode] # a TUI backend +bin = "opencode" +attach = "{bin} attach {server} --dir {dir}" +server = "http://127.0.0.1:4096" +prompt_delivery = "ping" +process_name = "opencode" +footer_ui = true # a static footer lingers after a turn → only the bottom = activity +log_grace = 180 # within this many seconds of a log write, treat as active +connect_delay = 12 # seconds to wait for the TUI before typing +submit_key = "C-m" +model_env = true # pass the model via OPENCODE_CONFIG_CONTENT +preamble = "set -a; . ./.env; set +a" # shell run before launch (e.g. load creds) +active_re = "esc interrupt|thinking|running tool|preparing patch" +limit_re = "usage limit|limit reached" + +[backend.demo] # a dependency-free backend for testing the harness mechanics +bin = "echo '[demo] {session} up'; exec sleep 1000000" +prompt_delivery = "exec" # {kickoff}=prompt file, {session}=session name, {model}=model +``` + +For an `"arg"` backend the flag *templates* are configurable (so you can point at a non-claude +CLI): `resume_flag` (default `--resume '{id}'`), `model_flag` (default `--model '{model}'`), +`remote_control_flag` (default `--remote-control '{session}'`). A backend that sets `process_name` +participates in backend-mismatch healing; one that doesn't (e.g. `demo`) never does. + +### `[[agent]]` — one block per agent + +```toml +[[agent]] +name = "builder" # tmux session defaults to ; override with session= +kind = "loop" # loop | persistent | task +backend = "claude" # overrides defaults.backend +model = "claude-opus-4-8" # overrides defaults.model +dir = "." # working dir (relative → project dir) +role = "builder" # loop agents only: role prompt = /.md +resume = true # (arg backends with supports_resume) --resume .id> +watch = "heal+stall" # none | heal | heal+stall +enabled = true # false = not started by a bare `up`, not supervised +wake = { interval = 3600, prompt_file = "prompts/supervise.md" } # periodic nudge +prompt = """inline startup text""" # persistent/task agents; OR prompt_file = "path.md" +log_signature = "PROJECT PHASE" # optional: disambiguate agents that share a dir (agent-log.py) +``` + +| kind | prompt source | typical `watch` | +|---|---|---| +| `loop` | auto-built: kickoff template + `prompts/.md` | `heal+stall` | +| `persistent` | `prompt` / `prompt_file` (+ optional `resume`, `wake`) | `heal` | +| `task` | `prompt` (runs once, then idles) | `none`, `enabled=false` | + +**`watch` policy:** + +| value | behavior | +|---|---| +| `none` | ignored by the watchdog entirely | +| `heal` | restart if the session is dead, FATAL-wedged, or running the wrong backend; pause all healing while inside a usage-limit window; **never** reboot just for being idle | +| `heal+stall` | everything in `heal`, **plus** reboot if idle past `stall_idle` — respecting any `WAITING-UNTIL: ` self-wake marker the agent prints as its last line | + +### `[[service]]` — non-AI helper processes + +```toml +[[service]] +name = "cleanlogs" +command = "python3 agent-log.py follow-all" +``` + +Started by a bare `up`, killed by `down`. Just a supervised command in a tmux session. + +### `[loop]` — the phase state machine (governs `kind="loop"` agents) + +```toml +[loop] +state_file = "phase-idx" # under /state/ +resume_phase = true # keep the phase index across restarts (don't reset to 0) +auto_advance = true # advance when the current phase's status file says done_marker +done_marker = "## DONE" +kickoff_template = "prompts/kickoff.md" # project preamble; slots {phase_id}/{plan}/{status}/{role} +roles_dir = "prompts" # role prompt = /.md +handoff = { repo = ".", claim_pings = "adversary", review_pings = "builder", + inboxes = ["ADVERSARY-INBOX.md", "BUILDER-INBOX.md"], + claim_pattern = "^claim", review_pattern = "^review", state_subdir = "machine-docs" } +on_complete = { trigger_file = ".run-on-complete", run = "reporter" } # run task agent on completion +phases = [ + { id = "p1", plan = "plans/p1.md", status = "STATUS-p1.md" }, + { id = "p2", plan = "plans/p2.md", status = "STATUS-p2.md", models = { builder = "claude-opus-4-8" } }, +] +``` + +- **Kickoff template.** A loop agent's prompt is `kickoff_template` (with `{phase_id}`, `{plan}`, + `{status}`, `{role}` substituted from the current phase) followed by `/.md`. + Both are project files; this repo ships generic starters in `prompts/`. There is no built-in + preamble text. +- **Per-phase model override.** A phase's `models = { builder = "...", adversary = "..." }` + overrides those agents' model for just that phase (matched on the agent's `role`). +- **Auto-advance.** Each heavy tick, if the current phase's `status` file (looked up in + `handoff.repo`'s `state_subdir/` then its root) contains a real `done_marker` — not a "Not + yet…" placeholder — the watchdog stops the loops, bumps the phase index, and restarts them on + the next phase. After the last phase it writes a `SEQUENCE-COMPLETE` marker under `log_dir` and + stops the loops (idempotent — no churn). Appending a phase later clears the stale marker and + resumes. On completion, an optional `on_complete.run` task agent fires if its `trigger_file` + exists under `log_dir`. +- **Handoff signalling.** The watchdog watches `handoff.repo`'s `origin/main` for commits whose + subject matches `claim_pattern` / `review_pattern`, and watches the two `inboxes` files. When a + claim lands it pings the `claim_pings` agent; a review pings `review_pings`; an inbox change + pings the relevant side. This is how the Builder and Adversary coordinate purely through git. + +--- + +## Config vs state + +- **Config** = `agents.toml` — declarative, version-controlled, the only source of truth. +- **State** = `/state/` — machine-written runtime only: `phase-idx` (current phase), + `.id` (resume id), `limited-.json` (active usage-limit window), + `kickoff-.txt` (the exact prompt last sent). Git-ignore your `log_dir`. +- **Env** = a one-off override for a *single* invocation only: `AGENT_MODEL_=…` / + `AGENT_BACKEND_=…`. The persisted watchdog ignores env and re-reads the file every tick — + deliberately, so env-vs-file drift can never silently revert a backend. + +--- + +## The driver: verbs + +The recommended (not required) verb set — an AI project-orchestrator can rely on these being +present, but a harness is free to add more: + +``` +agents.py up [name…] start enabled agents (+ services + watchdog); use-or-create +agents.py down [name…] stop agents/services/watchdog (all, or named) +agents.py status table of every agent: kind, backend, model, watch, state, phase +agents.py watchdog the supervisor loop (what the watchdog session runs) +agents.py logs tail that session's log +agents.py phase [show|next|set N] inspect / move the loop phase index +agents.py selftest regression-test the backend activity detector (needs no config) +agents.py init [dir] scaffold a starter agents.toml + prompts/ in a project dir + --config PATH use a specific config (default: ./agents.toml) +``` + +### The watchdog tick + +`agents.py watchdog` runs as the `watchdog` tmux session and **re-reads the config every +tick**. Each loop: + +- **signal tick** (`signal_interval`): handoff pings; for each watched agent the usage-limit check, + and for `heal+stall` agents the stall check; fire any due `wake`. +- **heavy tick** (`heavy_interval`): advance the loop phase if the current one is done; otherwise + heal each watched agent per its `watch` policy. When the sequence is complete the finished loops + stay stopped, but persistent agents stay supervised. + +**Usage-limit handling:** when an agent prints a limit banner, the watchdog parses the reset time, +arms a quiet window (never rebooting a limited agent), and at the end sends one probe to resume it +— re-arming if the banner re-prints. + +--- + +## Driving the harness from an AI project-orchestrator + +This harness is designed to be driven by an AI "project-orchestrator" (PO) that creates and runs +many projects, each pinning its own copy of this engine. The contract is intentionally **not +rigid** — the PO reads these docs and works out how to drive a project. What it can rely on: + +1. **One config, one driver.** Everything the PO needs to know about a project's agents is in that + project's `agents.toml`; everything it can *do* is a verb above. To inspect, `status`. To start + or stop, `up` / `down`. To move the phase, `phase`. +2. **Isolation by `session_prefix`.** Two projects never collide as long as their `session_prefix` + differ. The PO assigns each project a unique prefix at creation. +3. **State is on disk, not in the PO.** Phase index, resume ids and limit windows live under the + project's `log_dir`. The PO can restart a project (or the whole host) and the watchdog resumes + from there. +4. **Knowledge is one-directional.** A project repo contains nothing about the PO or the fleet — + it can be run by hand and would have no idea a PO exists. The PO's fleet registry is the only + record of which projects exist and at what engine ref. This repo never reaches "up" toward a PO. +5. **Submodule pin = the engine version.** A project pins this repo at a tag (e.g. `v0.1.0`) as a + submodule under `engine/`. Bumping is per-project and opt-in (`git submodule update --remote`); + one project's bump can't break another. + +A minimal project layout the PO scaffolds: + +``` +my-project/ # its own repo; knows nothing about the PO + agents.toml # harness config (this schema) + engine/ # this repo as a pinned submodule + prompts/ # role prompts + kickoff template + machine-docs/ # the loop pair's coordination files (STATUS/REVIEW/inboxes) + .ao-state/ # runtime state + logs (gitignored) + .env # project creds (never in git) +``` + +Run it by hand with `engine/agents.py up --config agents.toml`. + +--- + +## Nix + +A `flake.nix` provides a reproducible devShell with the runtime deps (`python311` for stdlib +`tomllib`, plus `tmux` and `git`): + +```bash +nix develop # enter the shell +nix develop -c python3 agents.py selftest # or run one command in it +nix flake check # evaluate + build the devShell +``` + +The agent CLIs themselves (`claude`, `opencode`) are **external, non-Nix tools** — install them +per their own docs and make sure they are on `PATH` before launching live agents. The devShell +documents this in its banner. + +--- + +## Adding things + +- **Add an agent** — add an `[[agent]]` block; `agents.py up `. No code change. +- **Add a backend** — add a `[backend.]` block (`bin`, `prompt_delivery`, the regexes); + point an agent at it with `backend = ""`. +- **Add / append a phase** — add an entry to `[loop].phases`; the watchdog advances into it + automatically (clearing a stale `SEQUENCE-COMPLETE` if the sequence had finished). +- **Change a model or backend** — edit the field (or a phase's `models = {}`), then + `agents.py down && agents.py up `. The watchdog re-reads the file; it won't fight you. diff --git a/agent-log.py b/agent-log.py new file mode 100755 index 0000000..fd510cb --- /dev/null +++ b/agent-log.py @@ -0,0 +1,221 @@ +#!/usr/bin/env python3 +"""Clean, greppable transcript logs for agent-orchestrator agents (claude backend). + +The claude CLI writes a structured JSONL transcript of every session under +~/.claude/projects//.jsonl. This renders that into a readable, +greppable one-event-per-line log — WITHOUT touching the agent (read-only on a file it writes +anyway, so no slowdown and zero extra tokens). The raw `tmux pipe-pane` logs are TUI-escape +soup; use these instead. + +Agents are discovered from the harness config (the same agents.toml the driver reads), so there +is nothing project-specific in this file. An agent's transcript directory is derived from its +working `dir`; when several agents share a dir, give them a `log_signature = "..."` (a substring +of their kickoff) to disambiguate. + +Usage: + agent-log.py [--config PATH] render print the clean transcript to stdout + agent-log.py [--config PATH] tail [N] print the last N (default 40) rendered events + agent-log.py [--config PATH] follow-all keep .clean.log current for every agent + +Output logs (follow-all): /.clean.log +Each line: `HH:MM:SS [kind] ...` — kinds: user, asst, tool:, result, think (skipped by +default). Long text/results are truncated (full detail stays in the JSONL); newlines → ⏎. +""" +import json, os, re, sys, time, tomllib +from pathlib import Path + +PROJ = os.environ.get("CLAUDE_PROJECTS", os.path.expanduser("~/.claude/projects")) +MAXLEN = 800 # truncate any single rendered block to this many chars + + +def _cfg_path(argv): + if "--config" in argv: + return Path(argv[argv.index("--config") + 1]) + cwd_cfg = Path.cwd() / "agents.toml" + return cwd_cfg if cwd_cfg.exists() else Path(__file__).resolve().parent / "agents.toml" + + +def load_agents(cfg_path): + """Return {agent_name: {"slug": , "sig": }} and the + log_dir, derived from the harness config.""" + with open(cfg_path, "rb") as f: + raw = tomllib.load(f) + defaults = raw.get("defaults", {}) + base = Path(cfg_path).resolve().parent + proj_dir = (base / defaults.get("project_dir", ".")).resolve() + log_dir = os.path.join(str((proj_dir / defaults.get("log_dir", ".ao-state"))), "") + agents = {} + for a in raw.get("agent", []): + d = a.get("dir", defaults.get("dir", ".")) + dp = Path(os.path.expanduser(d)) + dp = dp if dp.is_absolute() else (proj_dir / dp) + slug = re.sub(r"[^a-zA-Z0-9]", "-", str(dp.resolve())) + agents[a["name"]] = {"slug": slug, "sig": a.get("log_signature")} + return agents, str((proj_dir / defaults.get("log_dir", ".ao-state"))) + + +def _first_user_text(path, limit=4000): + try: + with open(path) as f: + for _ in range(60): + ln = f.readline() + if not ln: + break + try: + o = json.loads(ln) + except Exception: + continue + if o.get("type") == "user": + c = (o.get("message") or {}).get("content") + if isinstance(c, str): + return c[:limit] + if isinstance(c, list): + return " ".join(b.get("text", "") for b in c + if isinstance(b, dict) and b.get("type") == "text")[:limit] + except FileNotFoundError: + return "" + return "" + + +def active_jsonl(meta): + """The agent's current transcript: newest *.jsonl in its slug dir (optionally filtered by the + kickoff signature, to disambiguate agents that share a working dir).""" + d = os.path.join(PROJ, meta["slug"]) + try: + files = [os.path.join(d, f) for f in os.listdir(d) if f.endswith(".jsonl")] + except FileNotFoundError: + return None + files.sort(key=lambda p: os.path.getmtime(p), reverse=True) + for p in files: + if not meta["sig"] or meta["sig"] in _first_user_text(p): + return p + return None + + +def _clip(s): + s = " ".join(str(s).split()) if s else "" + return s if len(s) <= MAXLEN else s[:MAXLEN] + " …[+%d]" % (len(s) - MAXLEN) + + +def render_line(o, show_think=False): + t = o.get("type") + if t not in ("user", "assistant"): + return [] + ts = (o.get("timestamp") or "")[11:19] or "--:--:--" + m = o.get("message") or {} + c = m.get("content") + out = [] + if isinstance(c, str): + if c.strip(): + out.append(f"{ts} [user] {_clip(c)}") + return out + if not isinstance(c, list): + return out + for b in c: + if not isinstance(b, dict): + continue + bt = b.get("type") + if bt == "text": + txt = b.get("text", "") + if txt.strip(): + out.append(f"{ts} [{'asst' if t=='assistant' else 'user'}] {_clip(txt)}") + elif bt == "thinking": + if show_think: + out.append(f"{ts} [think] {_clip(b.get('thinking',''))}") + elif bt == "tool_use": + inp = b.get("input") or {} + brief = (inp.get("command") or inp.get("file_path") or inp.get("path") + or inp.get("prompt") or json.dumps(inp)[:200]) + out.append(f"{ts} [tool:{b.get('name')}] {_clip(brief)}") + elif bt == "tool_result": + rc = b.get("content") + if isinstance(rc, list): + rc = " ".join(x.get("text", "") for x in rc if isinstance(x, dict)) + out.append(f"{ts} [result] {_clip(rc)}") + return out + + +def render_file(path, show_think=False): + lines = [] + with open(path) as f: + for ln in f: + try: + o = json.loads(ln) + except Exception: + continue + lines += render_line(o, show_think) + return lines + + +def cmd_render(agents, agent, show_think=False): + p = active_jsonl(agents[agent]) + if not p: + print(f"(no transcript found for {agent})", file=sys.stderr); sys.exit(1) + print(f"# {agent} ← {p}") + for l in render_file(p, show_think): + print(l) + + +def cmd_tail(agents, agent, n=40): + p = active_jsonl(agents[agent]) + if not p: + print(f"(no transcript found for {agent})", file=sys.stderr); sys.exit(1) + for l in render_file(p)[-n:]: + print(l) + + +def cmd_follow_all(agents, log_dir): + os.makedirs(log_dir, exist_ok=True) + state = {} # agent -> (path, byte_offset) + while True: + for agent, meta in agents.items(): + p = active_jsonl(meta) + if not p: + continue + prev_path, off = state.get(agent, (None, 0)) + if p != prev_path: + off = 0 + try: + size = os.path.getsize(p) + if off > size: + off = 0 + with open(p) as f: + f.seek(off) + chunk = f.read() + new_off = f.tell() + except FileNotFoundError: + continue + if chunk: + rendered = [] + for ln in chunk.splitlines(): + try: + o = json.loads(ln) + except Exception: + continue + rendered += render_line(o) + if rendered: + with open(os.path.join(log_dir, f"{agent}.clean.log"), "a") as out: + out.write("\n".join(rendered) + "\n") + state[agent] = (p, new_off) + time.sleep(5) + + +def main(): + argv = sys.argv[1:] + cfg_path = _cfg_path(argv) + argv = [a for i, a in enumerate(argv) + if a != "--config" and (i == 0 or argv[i-1] != "--config")] + agents, log_dir = load_agents(cfg_path) + cmd = argv[0] if argv else "follow-all" + if cmd == "render": + cmd_render(agents, argv[1], "--think" in argv) + elif cmd == "tail": + cmd_tail(agents, argv[1], int(argv[2]) if len(argv) > 2 and argv[2].isdigit() else 40) + elif cmd == "follow-all": + cmd_follow_all(agents, log_dir) + else: + print(__doc__); sys.exit(2) + + +if __name__ == "__main__": + main() diff --git a/agents.example.toml b/agents.example.toml new file mode 100644 index 0000000..c0af01e --- /dev/null +++ b/agents.example.toml @@ -0,0 +1,109 @@ +# agent-orchestrator — example project config. +# +# One file declares: which agents exist, their backend, model, prompt, kind, and how the +# watchdog supervises them. Read by agents.py (driver + watchdog). Runtime state (phase index, +# resume ids, limit windows) lives under /state/, NOT here. +# +# This example is self-contained: its agents use a dependency-free `demo` backend (a shell that +# just idles), so the whole project can be brought up and torn down with no external agent CLI — +# see ./smoke.sh. The `claude` and `opencode` backends below are the real ones; point an agent at +# them with `backend = "claude"` for a live run. + +# ─────────────────────────── global watchdog cadence ─────────────────────────── +[watchdog] +signal_interval = 30 # s between handoff / stall / limit checks (light) +heavy_interval = 300 # s between heal / phase-advance checks +limit_probe_fallback = 300 # flat probe cadence when a reset time can't be parsed +limit_reset_slack = 45 # s past a parsed reset before probing +stall_grace = 180 # s of slack past a WAITING-UNTIL marker before a stall reboot + +# ─────────────────────────── defaults inherited by every agent ─────────────────────────── +[defaults] +session_prefix = "ao-example-" # REQUIRED — tmux namespace for this project (no implicit default) +log_dir = ".ao-state" # REQUIRED — logs + state/, resolved relative to this file +backend = "demo" +model = "" +watch = "none" # none | heal | heal+stall + +# ─────────────────────────── backends (declared as data) ─────────────────────────── +# A backend is fully described by config. `prompt_delivery` selects how the kickoff reaches the +# agent: "arg" (CLI argument, claude-style), "ping" (typed in after a TUI connects, opencode), +# or "exec" (a plain command; {kickoff}=prompt file, {session}=session name, {model}=model). + +[backend.demo] # dependency-free backend used by this example's smoke run +bin = "echo '[demo] {session} up (kickoff: {kickoff})'; exec sleep 1000000" +prompt_delivery = "exec" + +[backend.claude] # the real claude backend +bin = "claude" +flags = "--dangerously-skip-permissions" +remote_control = true +supports_resume = true +prompt_delivery = "arg" +process_name = "claude" # used for backend-mismatch healing +submit_key = "Enter" +stall_idle = 300 +active_re = "esc to interrupt|Running tool|⠇|⠙|· \\d+" +limit_re = "spend limit|usage limit|limit reached|reached your .*limit|out of (credits|tokens)" +fatal_re = "redacted_thinking|blocks cannot be modified|cannot be modified" + +[backend.opencode] # the real opencode backend (a TUI; prompt typed after connect) +bin = "opencode" +attach = "{bin} attach {server} --dir {dir}" +server = "http://127.0.0.1:4096" +supports_resume = false +prompt_delivery = "ping" +process_name = "opencode" +footer_ui = true # static footer lingers after a turn → only the bottom = activity +log_grace = 180 +connect_delay = 12 +submit_key = "C-m" +model_env = true # pass the model via OPENCODE_CONFIG_CONTENT +stall_idle = 900 +active_re = "esc interrupt|thinking|inferring|running tool|tool call|preparing patch|reading|searching" +limit_re = "spend limit|usage limit|limit reached|reached your .*limit|out of (credits|tokens)" +fatal_re = "redacted_thinking|blocks cannot be modified|cannot be modified" + +# ─────────────────────────── agents ─────────────────────────── +# A minimal 2-agent loop pair: a Builder that does the work and an Adversary that verifies it. + +[[agent]] +name = "builder" # tmux session: ao-example-builder +kind = "loop" # kickoff = kickoff_template + prompts/builder.md, per phase +role = "builder" + +[[agent]] +name = "adversary" # tmux session: ao-example-adversary +kind = "loop" +role = "adversary" + +# A persistent supervisor and a one-shot task are also supported: +# [[agent]] +# name = "orchestrator" +# kind = "persistent" +# backend = "claude" +# resume = true +# watch = "heal" +# wake = { interval = 3600, prompt_file = "prompts/supervise.md" } +# prompt = "You supervise this project. On startup, check status and report." + +# Non-AI helper services (started by a bare `up`, not AI sessions): +# [[service]] +# name = "cleanlogs" +# command = "python3 agent-log.py follow-all" + +# ─────────────────────────── the phase machine (kind="loop" agents) ─────────────────────────── +[loop] +state_file = "phase-idx" # under /state/ +resume_phase = true # keep the current index across restarts (don't reset to 0) +auto_advance = true # advance when the current phase's status file says the done_marker +done_marker = "## DONE" +kickoff_template = "prompts/kickoff.md" # project preamble; slots {phase_id}/{plan}/{status}/{role} +roles_dir = "prompts" # role prompt = /.md +handoff = { repo = ".", claim_pings = "adversary", review_pings = "builder", inboxes = ["ADVERSARY-INBOX.md", "BUILDER-INBOX.md"], claim_pattern = "^claim", review_pattern = "^review", state_subdir = "machine-docs" } +# on_complete = { trigger_file = ".run-on-complete", run = "reporter" } + +phases = [ + { id = "demo1", plan = "examples/PLAN-demo1.md", status = "STATUS-demo1.md" }, + { id = "demo2", plan = "examples/PLAN-demo2.md", status = "STATUS-demo2.md", models = { builder = "claude-opus-4-8" } }, +] diff --git a/agents.py b/agents.py new file mode 100755 index 0000000..d7eab02 --- /dev/null +++ b/agents.py @@ -0,0 +1,929 @@ +#!/usr/bin/env python3 +"""agent-orchestrator — one driver, one config (agents.toml) for a fleet of agents. + +A generic, reusable harness for running and supervising AI-agent sessions in tmux. Every +agent — a Builder/Adversary loop pair, a persistent supervisor, a one-shot task — is declared +in a single TOML config; the watchdog reads the SAME file, so there is no env-vs-file drift. +Nothing about any particular project lives in this code: paths, the loop kickoff preamble, the +handoff conventions, and the on-complete hook are all supplied by the project's config. + +Usage: + agents.py up [name...] start enabled agents (or just the named ones); use-or-create + agents.py down [name...] stop agents (or all) + agents.py status one table: every agent — kind, backend, model, session, phase + agents.py watchdog the supervisor loop (reads the config every tick) + agents.py logs tail an agent's session log + agents.py phase [set N|next|show] inspect / move the loop phase + agents.py selftest backend activity-detector regression checks (no config needed) + agents.py init [dir] scaffold a starter agents.toml + prompts/ in a project dir + +Options: + --config PATH config file (default: ./agents.toml, else