diff --git a/cc-ci-plan/launch.sh b/cc-ci-plan/launch.sh index 4b5c357..b1f572d 100755 --- a/cc-ci-plan/launch.sh +++ b/cc-ci-plan/launch.sh @@ -1,69 +1,80 @@ #!/usr/bin/env bash # -# launch.sh — start and supervise the two cc-ci autonomous loops + a watchdog. +# launch.sh — start and supervise the two cc-ci autonomous loops + a phase-aware watchdog. # # Model (see plan.md §6 / §6.1): two INDEPENDENT Claude Code sessions — # • Builder (tmux session: cc-ci-builder) working clone /srv/cc-ci/cc-ci # • Adversary (tmux session: cc-ci-adv) working clone /srv/cc-ci/cc-ci-adv # coordinating only through the git repo on git.autonomic.zone. # -# Each agent self-paces with a `/loop` (ScheduleWakeup) — that handles ITERATION. -# This script's watchdog handles RESILIENCE: it restarts a session that has died -# and stops everything once STATUS.md reports "## DONE". +# PHASES: the watchdog runs an ordered sequence of sub-phases (default: 1c then 1b). Each phase +# has its own plan + phase-namespaced loop-state files (STATUS-.md etc.). When a phase's +# STATUS-.md shows "## DONE", the watchdog AUTO-TRANSITIONS to the next phase; after the LAST +# phase it STOPS the loops and exits (a manual gate — e.g. check in before Phase 2). +# +# Three jobs: ITERATION (each agent's /loop), RESILIENCE (restart a dead loop), HANDOFF SIGNALLING +# (ping the waiting loop the moment its counterpart hands off), PHASE SEQUENCING (this file). # # Usage: -# ./launch.sh start # start both loops + watchdog (idempotent) +# ./launch.sh start # start the sequence at phase 0 + watchdog (stops/relaunches loops) # ./launch.sh watchdog # run only the supervision loop in the foreground -# ./launch.sh status # show session + DONE state +# ./launch.sh status # show phase + session + DONE state # ./launch.sh logs builder|adversary|watchdog # tail a session/log # ./launch.sh stop # stop both loops + watchdog -# -# Configure via env vars (defaults below). At minimum set CC_CI_REPO once the -# Builder has created the repo, so the watchdog can detect DONE. set -euo pipefail -# Absolute path to this script, so the watchdog re-invokes it correctly regardless of how it -# was called or what cwd the tmux session uses (a relative $0 breaks once we cd into PLAN_DIR). +# Absolute path to this script, so the watchdog re-invokes it correctly regardless of cwd. SELF="$(readlink -f "${BASH_SOURCE[0]}")" # ----- config ------------------------------------------------------------- PLAN_DIR="${PLAN_DIR:-/srv/cc-ci/cc-ci-plan}" CLAUDE_BIN="${CLAUDE_BIN:-claude}" -# Flags for unattended operation in a sandbox. Override if your setup differs. CLAUDE_FLAGS="${CLAUDE_FLAGS:---dangerously-skip-permissions}" -# REMOTE_CONTROL=1 launches each agent as an INTERACTIVE session with --remote-control, -# viewable/steerable at claude.ai/code (and the Claude mobile app). This is required for -# /loop + ScheduleWakeup to work at all (they are interactive-only — a piped/print-mode -# session cannot self-pace). Set REMOTE_CONTROL=0 for a plain interactive session with no -# remote surface. The box must be logged into the claude.ai account (run `claude` once to -# check `claude auth status`). Each agent gets its own RC session named after its tmux session. +# REMOTE_CONTROL=1 → interactive --remote-control sessions (viewable at claude.ai/code), required +# for /loop. The box must be logged into the claude.ai account. =0 for plain interactive. REMOTE_CONTROL="${REMOTE_CONTROL:-1}" -BUILDER_DIR="${BUILDER_DIR:-/srv/cc-ci/cc-ci}" # Builder's repo clone (it creates this) +BUILDER_DIR="${BUILDER_DIR:-/srv/cc-ci/cc-ci}" # Builder's repo clone ADV_DIR="${ADV_DIR:-/srv/cc-ci/cc-ci-adv}" # Adversary's repo clone -WATCH_DIR="${WATCH_DIR:-/srv/cc-ci/.cc-ci-watch}" # tiny clone the watchdog reads STATUS.md from LOG_DIR="${LOG_DIR:-/srv/cc-ci/.cc-ci-logs}" -CC_CI_REPO="${CC_CI_REPO:-https://git.autonomic.zone/recipe-maintainers/cc-ci.git}" # CI project repo (DONE detection); harmless until the Builder creates it -CC_CI_BRANCH="${CC_CI_BRANCH:-main}" - -WATCH_INTERVAL="${WATCH_INTERVAL:-300}" # seconds between HEAVY checks (restart dead loops, DONE) +WATCH_INTERVAL="${WATCH_INTERVAL:-300}" # seconds between HEAVY checks (phase DONE / restart dead loops) SIGNAL_INTERVAL="${SIGNAL_INTERVAL:-30}" # seconds between HANDOFF checks (ping the waiting loop) BUILDER_SESSION="cc-ci-builder" ADV_SESSION="cc-ci-adv" WATCHDOG_SESSION="cc-ci-watchdog" + +# Ordered phase sequence: each entry "id|planfile|statusbasename". The watchdog runs them in order, +# auto-transitions on the phase's "## DONE" (in BUILDER_DIR/), and STOPS after the +# last one (manual gate). Override PHASES_SPEC (semicolon-separated) to change the sequence. +PHASES_SPEC="${PHASES_SPEC:-1c|plan-phase1c-full-reproducibility.md|STATUS-1c.md;1b|plan-phase1b-review-lint.md|STATUS-1b.md}" +IFS=';' read -r -a PHASES <<< "$PHASES_SPEC" +PHASE_IDX_FILE="${PHASE_IDX_FILE:-$LOG_DIR/.phase-idx}" # -------------------------------------------------------------------------- log() { printf '[launch %(%H:%M:%S)T] %s\n' -1 "$*"; } die() { log "ERROR: $*"; exit 1; } - need() { command -v "$1" >/dev/null 2>&1 || die "missing dependency: $1"; } +# ----- phase helpers ------------------------------------------------------ +cur_idx() { local i; i="$(cat "$PHASE_IDX_FILE" 2>/dev/null || echo 0)"; [[ "$i" =~ ^[0-9]+$ ]] || i=0; echo "$i"; } +phase_id() { echo "${PHASES[$1]}" | cut -d'|' -f1; } +phase_plan() { echo "${PHASES[$1]}" | cut -d'|' -f2; } +phase_status() { echo "${PHASES[$1]}" | cut -d'|' -f3; } +phase_review() { echo "REVIEW-$(phase_id "$1").md"; } +phase_done() { grep -qE '^##[[:space:]]+DONE' "$BUILDER_DIR/$1" 2>/dev/null; } # $1 = status basename (read locally) +all_ids() { local p; for p in "${PHASES[@]}"; do printf '%s ' "$(echo "$p" | cut -d'|' -f1)"; done; } + preflight() { need tmux command -v "$CLAUDE_BIN" >/dev/null 2>&1 || die "claude CLI not found (set CLAUDE_BIN)" + local p plan + for p in "${PHASES[@]}"; do + plan="$(echo "$p" | cut -d'|' -f2)" + [[ -f "$PLAN_DIR/$plan" ]] || die "missing phase plan $PLAN_DIR/$plan" + done [[ -f "$PLAN_DIR/prompts/builder.md" ]] || die "missing $PLAN_DIR/prompts/builder.md" [[ -f "$PLAN_DIR/prompts/adversary.md" ]] || die "missing $PLAN_DIR/prompts/adversary.md" mkdir -p "$LOG_DIR" @@ -71,46 +82,48 @@ preflight() { session_alive() { tmux has-session -t "$1" 2>/dev/null; } -# Start one agent loop in its own tmux session, cd'd into its working dir, with -# the kickoff prompt passed to claude as a positional argument (see below for why -# not stdin). +# Build the per-session kickoff (phase preamble + base role prompt) and launch claude interactively. +# role ∈ {builder, adversary}. Passed as a POSITIONAL arg via inner $(cat ...) — never stdin +# (piping forces print mode and breaks /loop + remote-control). start_agent() { - local session="$1" workdir="$2" prompt_file="$3" - if session_alive "$session"; then - log "$session already running — leaving it" - return 0 - fi + local role="$1" session="$2" workdir="$3" + if session_alive "$session"; then log "$session already running — leaving it"; return 0; fi mkdir -p "$workdir" - log "starting $session (cwd=$workdir, remote_control=$REMOTE_CONTROL)" - # tmux gives claude a real PTY, so we run claude INTERACTIVELY (required for /loop + - # ScheduleWakeup). The kickoff prompt is passed as a POSITIONAL argument via an inner - # `$(cat ...)` — NOT piped on stdin, because piping forces print/headless mode which - # breaks both interactivity and --remote-control. The `\$(...)` defers to the inner shell - # so the whole multi-line prompt arrives as a single argument. + local idx pid plan status kf + idx="$(cur_idx)"; pid="$(phase_id "$idx")"; plan="$(phase_plan "$idx")"; status="$(phase_status "$idx")" + kf="$LOG_DIR/.kickoff-$session.txt" + { + cat < "$kf" + log "starting $session (phase=$pid, plan=$plan, cwd=$workdir, rc=$REMOTE_CONTROL)" local rc="" [[ "$REMOTE_CONTROL" == "1" ]] && rc="--remote-control '$session'" tmux new-session -d -s "$session" -c "$workdir" \ - "$CLAUDE_BIN $rc $CLAUDE_FLAGS \"\$(cat '$prompt_file')\"" - # Log the pane WITHOUT redirecting claude's stdout: a `>>log` redirect makes stdout a - # non-tty and drops claude out of interactive/remote-control mode. pipe-pane mirrors the - # live pane to the log file while claude keeps the PTY tmux gave it. + "$CLAUDE_BIN $rc $CLAUDE_FLAGS \"\$(cat '$kf')\"" tmux pipe-pane -o -t "$session" "cat >> '$LOG_DIR/$session.log'" } start_loops() { - start_agent "$BUILDER_SESSION" "$BUILDER_DIR" "$PLAN_DIR/prompts/builder.md" - start_agent "$ADV_SESSION" "$ADV_DIR" "$PLAN_DIR/prompts/adversary.md" + start_agent builder "$BUILDER_SESSION" "$BUILDER_DIR" + start_agent adversary "$ADV_SESSION" "$ADV_DIR" } -# Returns 0 (true) if the repo's STATUS.md contains a "## DONE" heading. -is_done() { - [[ -n "$CC_CI_REPO" ]] || return 1 - if [[ ! -d "$WATCH_DIR/.git" ]]; then - git clone --depth 1 --branch "$CC_CI_BRANCH" "$CC_CI_REPO" "$WATCH_DIR" >/dev/null 2>&1 || return 1 - fi - git -C "$WATCH_DIR" fetch --depth 1 origin "$CC_CI_BRANCH" >/dev/null 2>&1 || return 1 - git -C "$WATCH_DIR" reset --hard "origin/$CC_CI_BRANCH" >/dev/null 2>&1 || return 1 - grep -qE '^##[[:space:]]+DONE' "$WATCH_DIR/STATUS.md" 2>/dev/null +stop_loops() { + local s + for s in "$BUILDER_SESSION" "$ADV_SESSION"; do + if session_alive "$s"; then log "killing $s"; tmux kill-session -t "$s" || true; fi + done } # Wake a loop by typing a one-line message into its tmux session (queues if mid-turn). @@ -120,44 +133,32 @@ ping_session() { tmux send-keys -t "$s" -l -- "$msg" 2>/dev/null && { sleep 0.3; tmux send-keys -t "$s" Enter 2>/dev/null; } } -# Edge-triggered handoff signalling: the moment one loop produces the artifact the other is -# waiting on, ping the waiting loop so it wakes immediately instead of idling out its sleep. -# Reads the loops' local working clones (same host) for the fastest signal; the pinged loop -# still pulls the real state on wake. -# -# IMPORTANT: STATUS.md keeps *historical* gate lines ("Gate: Mn — CLAIMED, awaiting Adversary") -# even after they PASS (the Builder appends "→ Mn PASS"). So we cannot ping on the mere presence -# of "CLAIMED". We track the set of gates that are **claimed-and-awaiting but NOT yet PASS** (by -# gate id), and ping the Adversary ONLY when a gate *newly enters* that set — never on the -# watchdog's first observation (baseline), never when a line is merely edited or marked PASS. -_wd_awaiting="" # current set of unverified-claimed gate ids (newline-separated) -_wd_baselined="" # set once the first observation has established a baseline (no ping then) -_wd_last_review="" +# Edge-triggered handoff signalling for the CURRENT phase. Reads the loops' local clones. +# Ping the Adversary only when a gate id NEWLY appears on a "CLAIMED … awaiting" line (never on +# the baseline / restart / a passed-but-kept line). Ping the Builder when the phase REVIEW changes. +_wd_awaiting=""; _wd_baselined=""; _wd_last_review="" +handoff_reset() { _wd_awaiting=""; _wd_baselined=""; _wd_last_review=""; } # call on phase transition handoff_check() { - local sf="$BUILDER_DIR/STATUS.md" rf="$ADV_DIR/REVIEW.md" cur now added - # Builder -> Adversary: a gate newly CLAIMED & awaiting verification (and not already PASS). + local idx sf rf cur now added + idx="$(cur_idx)" + sf="$BUILDER_DIR/$(phase_status "$idx")"; rf="$ADV_DIR/$(phase_review "$idx")" if [[ -f "$sf" ]]; then - # gate ids appearing on any "CLAIMED … awaiting" line. We ping only when an id NEWLY appears - # vs the previous observation, so: a new claim pings; a gate passing (its line is kept, not - # removed) does not re-ping; editing evidence does not ping; watchdog restart re-baselines silently. - now="$(grep -iE 'CLAIMED.*awaiting' "$sf" 2>/dev/null \ - | grep -oiE 'M[0-9]+(\.[0-9]+)?' | tr '[:lower:]' '[:upper:]' | sort -u)" + now="$(grep -iE 'CLAIMED.*awaiting' "$sf" 2>/dev/null | grep -oiE 'M[0-9]+(\.[0-9]+)?|[A-Z][0-9]+' | tr '[:lower:]' '[:upper:]' | sort -u)" if [[ -n "$_wd_baselined" ]]; then added="$(comm -13 <(printf '%s\n' "$_wd_awaiting" | sort -u) <(printf '%s\n' "$now" | sort -u) | grep -vE '^$' || true)" if [[ -n "$added" ]]; then log "handoff: gate(s) newly awaiting verification: $(echo $added) -> pinging Adversary" - ping_session "$ADV_SESSION" "watchdog ping: the Builder has CLAIMED milestone gate(s) [$(echo $added)] in STATUS.md and is awaiting your verification. Pull and verify now." + ping_session "$ADV_SESSION" "watchdog ping: the Builder CLAIMED gate(s) [$(echo $added)] in $(phase_status "$idx") and is awaiting your verification. Pull and verify now." fi fi _wd_awaiting="$now"; _wd_baselined=1 fi - # Adversary -> Builder: REVIEW.md changed (a verdict/PASS/FAIL or a new finding). if [[ -f "$rf" ]]; then cur="$(md5sum "$rf" 2>/dev/null | awk '{print $1}')" if [[ -n "$cur" && "$cur" != "$_wd_last_review" ]]; then [[ -n "$_wd_last_review" ]] && { - log "handoff: REVIEW.md changed -> pinging Builder" - ping_session "$BUILDER_SESSION" "watchdog ping: the Adversary updated REVIEW.md (a verdict or finding). Pull and act now — if it PASSes your gate, proceed; if it's a finding, address it." + log "handoff: $(phase_review "$idx") changed -> pinging Builder" + ping_session "$BUILDER_SESSION" "watchdog ping: the Adversary updated $(phase_review "$idx") (a verdict or finding). Pull and act now — if it PASSes your gate, proceed; if it's a finding, address it." } _wd_last_review="$cur" fi @@ -165,27 +166,33 @@ handoff_check() { } watchdog_loop() { - log "watchdog up (signal=${SIGNAL_INTERVAL}s, heavy=${WATCH_INTERVAL}s, repo=${CC_CI_REPO:-})" - local elapsed="$WATCH_INTERVAL" # run a heavy check on the first tick too + local idx pid status next + idx="$(cur_idx)"; pid="$(phase_id "$idx")" + log "watchdog up (phase=$pid [$((idx+1))/${#PHASES[@]}], seq='$(all_ids)', signal=${SIGNAL_INTERVAL}s, heavy=${WATCH_INTERVAL}s)" + local elapsed="$WATCH_INTERVAL" while true; do - # Fast path every tick: ping a loop the moment its counterpart hands off. handoff_check - # Heavy path every WATCH_INTERVAL: DONE detection + restart dead loops. if (( elapsed >= WATCH_INTERVAL )); then elapsed=0 - if is_done; then - log "STATUS.md reports ## DONE — stopping loops." - stop_loops - log "watchdog exiting (project complete)." - exit 0 - fi - if ! session_alive "$BUILDER_SESSION"; then - log "builder session gone — restarting" - start_agent "$BUILDER_SESSION" "$BUILDER_DIR" "$PLAN_DIR/prompts/builder.md" - fi - if ! session_alive "$ADV_SESSION"; then - log "adversary session gone — restarting" - start_agent "$ADV_SESSION" "$ADV_DIR" "$PLAN_DIR/prompts/adversary.md" + idx="$(cur_idx)"; pid="$(phase_id "$idx")"; status="$(phase_status "$idx")" + if phase_done "$status"; then + next=$((idx + 1)) + if (( next < ${#PHASES[@]} )); then + log "PHASE $pid DONE (## DONE in $status) — auto-transitioning to $(phase_id "$next")." + stop_loops + echo "$next" > "$PHASE_IDX_FILE" + handoff_reset + start_loops + else + log "PHASE SEQUENCE COMPLETE (last phase $pid DONE). Stopping loops — MANUAL CHECK-IN required before Phase 2." + stop_loops + printf 'cc-ci phase sequence complete %(%F %T)T. Phases: %s. Loops stopped; manual check-in required before Phase 2.\n' -1 "$(all_ids)" > "$LOG_DIR/SEQUENCE-COMPLETE" + log "watchdog exiting." + exit 0 + fi + else + session_alive "$BUILDER_SESSION" || { log "builder gone — restarting (phase $pid)"; start_agent builder "$BUILDER_SESSION" "$BUILDER_DIR"; } + session_alive "$ADV_SESSION" || { log "adversary gone — restarting (phase $pid)"; start_agent adversary "$ADV_SESSION" "$ADV_DIR"; } fi fi sleep "$SIGNAL_INTERVAL" @@ -194,38 +201,33 @@ watchdog_loop() { } start_watchdog() { - if session_alive "$WATCHDOG_SESSION"; then - log "watchdog already running" - return 0 - fi + if session_alive "$WATCHDOG_SESSION"; then log "watchdog already running"; return 0; fi log "starting watchdog" tmux new-session -d -s "$WATCHDOG_SESSION" -c "$PLAN_DIR" \ "exec >>'$LOG_DIR/watchdog.log' 2>&1; '$SELF' watchdog" } -stop_loops() { - for s in "$BUILDER_SESSION" "$ADV_SESSION"; do - if session_alive "$s"; then log "killing $s"; tmux kill-session -t "$s" || true; fi - done -} - cmd_status() { + local idx pid; idx="$(cur_idx)"; pid="$(phase_id "$idx")" + echo " phase: $pid [$((idx+1))/${#PHASES[@]}] plan=$(phase_plan "$idx") status=$(phase_status "$idx")" + local s for s in "$BUILDER_SESSION" "$ADV_SESSION" "$WATCHDOG_SESSION"; do if session_alive "$s"; then echo " $s: RUNNING"; else echo " $s: stopped"; fi done - if [[ -n "$CC_CI_REPO" ]]; then - if is_done; then echo " project: ## DONE"; else echo " project: in progress"; fi - else - echo " project: (CC_CI_REPO unset — DONE-detection disabled)" - fi + if phase_done "$(phase_status "$idx")"; then echo " phase $pid: ## DONE"; else echo " phase $pid: in progress"; fi + [[ -f "$LOG_DIR/SEQUENCE-COMPLETE" ]] && echo " >>> $(cat "$LOG_DIR/SEQUENCE-COMPLETE")" } case "${1:-}" in start) preflight + # Fresh sequence: stop any running loops, reset to phase 0 (unless RESUME_PHASE=1 keeps the idx). + stop_loops + if [[ "${RESUME_PHASE:-}" != "1" ]]; then echo 0 > "$PHASE_IDX_FILE"; fi + rm -f "$LOG_DIR/SEQUENCE-COMPLETE" start_loops start_watchdog - log "started. inspect with: ./launch.sh status | attach: tmux attach -t $BUILDER_SESSION" + log "started at phase $(phase_id "$(cur_idx)"). status: ./launch.sh status | attach: tmux attach -t $BUILDER_SESSION" ;; watchdog) preflight; watchdog_loop ;; status) cmd_status ;; @@ -244,22 +246,19 @@ case "${1:-}" in ;; *) cat <} - CLAUDE_BIN = $CLAUDE_BIN - CLAUDE_FLAGS = $CLAUDE_FLAGS - REMOTE_CONTROL = $REMOTE_CONTROL (1 = interactive --remote-control, viewable at claude.ai/code) - BUILDER_DIR = $BUILDER_DIR - ADV_DIR = $ADV_DIR - WATCH_INTERVAL = ${WATCH_INTERVAL}s +Phase sequence (auto-transition on per-phase ## DONE; STOP after the last = manual gate): + $(all_ids) +Env: CLAUDE_BIN=$CLAUDE_BIN REMOTE_CONTROL=$REMOTE_CONTROL WATCH_INTERVAL=${WATCH_INTERVAL}s SIGNAL_INTERVAL=${SIGNAL_INTERVAL}s + PHASES_SPEC='$PHASES_SPEC' + RESUME_PHASE=1 to keep the current phase index instead of resetting to 0. EOF ;; esac