cc-ci-orchestrator/cc-ci-plan/agents.toml

# cc-ci unified agent configuration — the single source of truth.
#
# One file declares: which agents exist, their backend, model, prompt, kind, and how the
# watchdog supervises them. Read by agents.py (driver + watchdog). Runtime state (phase
# index, resume ids, limit windows) lives under <log_dir>/state/, NOT here.
#
# Precedence: this file is authoritative. A one-off CLI override (env AGENT_<field>_<name>)
# affects only a single `agents.py` invocation; the persisted watchdog always reads this file.

# ─────────────────────────── global watchdog cadence ───────────────────────────
[watchdog]
signal_interval      = 30      # s between handoff / stall / limit checks (light)
heavy_interval       = 300     # s between heal / phase-advance checks
limit_probe_fallback = 300     # flat probe cadence when a reset time can't be parsed
limit_reset_slack    = 45      # s past a parsed reset before probing

# ─────────────────────────── backends (declared as data) ───────────────────────────
[backend.claude]
bin             = "claude"
flags           = "--dangerously-skip-permissions"
remote_control  = true
supports_resume = true
prompt_delivery = "arg"        # full prompt passed as a CLI argument
submit_key      = "Enter"
stall_idle      = 300
active_re = "esc to interrupt|⠋|⠙|⠹|⠸|⠼|⠴|⠦|⠧|⠇|⠏|Running tool|▣|Build ·|· \\d+"
limit_re  = "spend limit|usage limit|limit reached|reached your .*limit|out of (credits|tokens)"
fatal_re  = "redacted_thinking|blocks cannot be modified|cannot be modified"

[backend.opencode]
bin             = "/home/loops/.local/bin/opencode"
server          = "http://127.0.0.1:4096"
supports_resume = false
prompt_delivery = "ping"       # send after the TUI connects
connect_delay   = 12
submit_key      = "C-m"
preamble        = "set -a; . /srv/cc-ci/.testenv; set +a"
stall_idle      = 900
active_re = "esc interrupt|thinking|inferring|running tool|tool call|preparing patch|reading|searching"
limit_re  = "spend limit|usage limit|limit reached|reached your .*limit|out of (credits|tokens)"
fatal_re  = "redacted_thinking|blocks cannot be modified|cannot be modified"

# ─────────────────────────── defaults for every agent ───────────────────────────
[defaults]
backend = "claude"
model   = "claude-sonnet-4-6"
dir     = "/srv/cc-ci-orch"
watch   = "heal"               # none | heal | heal+stall
log_dir = "/srv/cc-ci/.cc-ci-logs"

# ─────────────────────────── agents ───────────────────────────

[[agent]]
name    = "orchestrator"       # tmux session: cc-ci-orchestrator
kind    = "persistent"
model   = "claude-opus-4-8"
resume  = true                 # claude --resume <state/orchestrator.id>
watch   = "heal"               # restart if dead / FATAL / backend-mismatch; never stall-reboot
wake    = { interval = 3600, prompt_file = "ai-progress-monitor-prompt.txt" }
prompt  = """
STARTUP (auto-launch): you are the cc-ci orchestrator, just (re)launched, likely after a reboot. \
Do your AGENTS.md On-startup routine NOW: read cc-ci-plan/REBOOTS.md and run cc-ci-plan/launch.py \
status, then send a proactive PushNotification that you are online with the current phase and \
reboot count, and confirm cc-ci-loops.service brought the loops + watchdog back (relaunch with \
RESUME_PHASE=1 cc-ci-plan/launch.py start if not). Also read cc-ci-plan/JOURNAL.md for recent \
context before resuming supervision."""

[[agent]]
name  = "builder"              # tmux session: cc-ci-builder
kind  = "loop"
role  = "builder"              # phase kickoff = phase preamble + prompts/builder.md
dir   = "/srv/cc-ci/cc-ci"
watch = "heal+stall"

[[agent]]
name    = "adversary"
session = "cc-ci-adv"          # established convention is abbreviated (logs, remote-control)
kind    = "loop"
role    = "adversary"
dir     = "/srv/cc-ci/cc-ci-adv"
watch   = "heal+stall"

[[agent]]
name    = "assistant"          # tmux session: cc-ci-assistant
kind    = "persistent"
resume  = true
watch   = "none"
enabled = false
prompt  = """
You are the cc-ci ASSISTANT. Read cc-ci-plan/JOURNAL.md for context, then wait for a specific \
plan/task from the orchestrator or operator. Work autonomously until the assigned task is \
complete, report the result, and then wait for the next assignment."""

[[agent]]
name    = "upgrader"           # tmux session: cc-ci-upgrader
kind    = "task"               # one-shot: run to completion, then idle
dir     = "/srv/cc-ci"
watch   = "none"
enabled = false                # launched on demand or by [loop].on_complete
prompt  = """
You are the cc-ci weekly UPGRADER. Invoke the /upgrade-all skill in DEFAULT mode (read \
/srv/cc-ci/.claude/skills/upgrade-all/SKILL.md for the full procedure), run it to completion, \
then report the summary of every PR opened and go idle."""

[[agent]]
name    = "report"             # tmux session: cc-ci-report
kind    = "task"
dir     = "/srv/cc-ci"
model   = "claude-opus-4-8"    # report is authored by opus even when the upgrader runs sonnet
watch   = "none"
enabled = false
prompt  = """
You generate the public weekly "Recipe Report". Run the /recipe-report skill now (full spec: \
/srv/cc-ci/.claude/skills/recipe-report/SKILL.md; creds in /srv/cc-ci/.testenv), then go idle."""

# Non-AI helper services (started by `up`, not AI sessions)
[[service]]
name    = "cleanlogs"          # tmux session: cc-ci-cleanlogs
command = "python3 cc-ci-plan/agent-log.py follow-all"
dir     = "/srv/cc-ci-orch"

# ─────────────────────────── the phase machine (kind="loop" agents) ───────────────────────────
[loop]
state_file   = "phase-idx"     # under <log_dir>/state/
resume_phase = true            # keep current index across restarts (don't reset to 0)
auto_advance = true
done_marker  = "## DONE"
handoff = { repo = "/srv/cc-ci/cc-ci", claim_pings = "adversary", review_pings = "builder", inboxes = ["ADVERSARY-INBOX.md", "BUILDER-INBOX.md"] }
on_complete = { trigger_file = ".run-upgrade-on-complete", run = "upgrader" }

# Transcribed verbatim from the live .phases-spec, with the duplicate `mailu` (old idx 7)
# REMOVED. Per-phase model overrides (were .loop-model[-adv]-<id> files) are inline.
# Current phase after de-dupe: cf55 (index 9) — see state/phase-idx.
phases = [
  { id = "rcust",   plan = "recipe-custom-restructure-full-plan.md",     status = "STATUS-rcust.md" },
  { id = "shot",    plan = "plan-phase-shot-screenshots.md",             status = "STATUS-shot.md" },
  { id = "lvl5",    plan = "plan-phase-lvl5-lint-rung.md",               status = "STATUS-lvl5.md" },
  { id = "bsky",    plan = "plan-phase-bsky-fix.md",                     status = "STATUS-bsky.md" },
  { id = "dstamp",  plan = "plan-phase-dstamp-discourse-drift.md",       status = "STATUS-dstamp.md", models = { builder = "opus" } },
  { id = "mailu",   plan = "plan-phase-mailu-backup.md",                 status = "STATUS-mailu.md" },
  { id = "kuma",    plan = "plan-phase-kuma-monitor.md",                 status = "STATUS-kuma.md" },
  { id = "drone",   plan = "plan-phase-drone-enroll.md",                 status = "STATUS-drone.md" },
  { id = "cfold",   plan = "plan-phase-cfold-custom-folder.md",          status = "STATUS-cfold.md" },
  { id = "cf55",    plan = "plan-phase-cf55-gpt55-cfold-review.md",      status = "STATUS-cf55.md", models = { builder = "claude-sonnet-4-6", adversary = "claude-sonnet-4-6" } },
  { id = "pvfix",   plan = "plan-phase-pvfix-swarm-proxy.md",            status = "STATUS-pvfix.md" },
  { id = "pvcheck", plan = "plan-phase-pvcheck-post-proxy-verification.md", status = "STATUS-pvcheck.md" },
  { id = "ghost",   plan = "plan-phase-ghost-reeval.md",                 status = "STATUS-ghost.md" },
  { id = "cf48",    plan = "plan-phase-cf48-opus-cfold-review.md",       status = "STATUS-cf48.md", models = { builder = "claude-opus-4-8", adversary = "claude-opus-4-8" } },
  { id = "pxgate",  plan = "plan-phase-pxgate-proxy-healthgate.md",      status = "STATUS-pxgate.md" },
  # agent-orchestrator build (builder=opus, adversary=sonnet) — see plan-agent-orchestrator.md
  { id = "aoeng",   plan = "plan-phase-aoeng-engine.md",                 status = "STATUS-aoeng.md",  models = { builder = "claude-opus-4-8", adversary = "claude-sonnet-4-6" } },
  { id = "aotest",  plan = "plan-phase-aotest-verify.md",                status = "STATUS-aotest.md", models = { builder = "claude-opus-4-8", adversary = "claude-sonnet-4-6" } },
  { id = "porepo",  plan = "plan-phase-porepo-project-orchestrator.md",  status = "STATUS-porepo.md", models = { builder = "claude-opus-4-8", adversary = "claude-sonnet-4-6" } },
  { id = "poe2e",   plan = "plan-phase-poe2e-end-to-end.md",             status = "STATUS-poe2e.md", models = { builder = "claude-opus-4-8", adversary = "claude-sonnet-4-6" } },
  # gitea full-test enrollment + LFS PR #1 verification — see plan-phase-gtea-gitea-fulltests.md (operator 2026-06-15)
  { id = "gtea",    plan = "plan-phase-gtea-gitea-fulltests.md",         status = "STATUS-gtea.md",  models = { builder = "claude-sonnet-4-6", adversary = "claude-sonnet-4-6" } },
  # dynamic upgrade base + per-recipe previous/ config (opus); validated on discourse PR #4 — see plan-phase-prevb-*.md (operator 2026-06-16)
  { id = "prevb",   plan = "plan-phase-prevb-previous-dynamic-base.md",  status = "STATUS-prevb.md",  models = { builder = "claude-opus-4-8", adversary = "claude-opus-4-8" } },
  # full all-recipe regression after prevb (sonnet) — see plan-phase-regall-*.md (operator 2026-06-16)
  { id = "regall",  plan = "plan-phase-regall-recipe-regression.md",     status = "STATUS-regall.md", models = { builder = "claude-sonnet-4-6", adversary = "claude-sonnet-4-6" } },
  # same-version upgrade-base gap: step back to newest-older-published when last-green==head (opus, design A; B in IDEAS) — see plan-phase-samever-*.md (operator 2026-06-17)
  { id = "samever", plan = "plan-phase-samever-older-base-fallback.md",   status = "STATUS-samever.md", models = { builder = "claude-opus-4-8", adversary = "claude-opus-4-8" } },
  # make the canonical sweep ACTUALLY work (substitute for the hollow nightly sweep) + upstream-sync + skip-unchanged; verify end-to-end (opus) — see plan-phase-canon-*.md (operator 2026-06-17)
  { id = "canon",   plan = "plan-phase-canon-canonical-sweep.md",         status = "STATUS-canon.md",   models = { builder = "claude-opus-4-8", adversary = "claude-opus-4-8" } },
  # fix incomplete per-recipe run history on the CI dashboard (capped at latest 100 Drone builds; 362 runs exist) — source from local /var/lib/cc-ci-runs (opus) — see plan-phase-dash-*.md (operator 2026-06-17)
  { id = "dash",    plan = "plan-phase-dash-recipe-history.md",           status = "STATUS-dash.md",    models = { builder = "claude-opus-4-8", adversary = "claude-opus-4-8" } },
  # CI-server settings.toml + SKIP_CANONICALS_FOR_UPGRADE + release-tag-first no-canonical fallback (opus) — see plan-phase-settings-*.md (operator 2026-06-17)
  { id = "settings", plan = "plan-phase-settings-ci-server-config.md",    status = "STATUS-settings.md", models = { builder = "claude-opus-4-8", adversary = "claude-opus-4-8" } },
  # single-source the harness runtime env so the sweep timer + Drone runner SHARE deps (no duplication) — root-cause fix for DEFECT-3 drift (opus) — see plan-phase-nixenv-*.md (operator 2026-06-17)
  { id = "nixenv",  plan = "plan-phase-nixenv-shared-runtime-env.md",     status = "STATUS-nixenv.md",  models = { builder = "claude-opus-4-8", adversary = "claude-opus-4-8" } },
  # investigate ALL canon-sweep failures (discourse/mattermost-lts/mumble/bluesky/gitea/keycloak) + FIX each via recipe PR or harness improvement (opus) — see plan-phase-redfix-*.md (operator 2026-06-17)
  { id = "redfix",  plan = "plan-phase-redfix-canon-sweep-failures.md",   status = "STATUS-redfix.md",  models = { builder = "claude-opus-4-8", adversary = "claude-opus-4-8" } },
]