Reboot survival for the Pi orchestrator host: - systemd unit cc-ci-plan/systemd/cc-ci-loops.service (installed + enabled): on boot records the reboot, starts loops+watchdog (RESUME_PHASE=1), and resumes the orchestrator session. - reboot-log.sh: boot_id-gated reboot record -> REBOOTS.md (manual restarts don't count). - launch-orchestrator.sh: injects an AGENTS.md startup nudge so an auto-resumed orchestrator announces itself (PushNotification) + reports reboots. - AGENTS.md: on-startup notify routine documented. Plans/tooling accumulated this session: - plan-phase1d (generic suite), 1e (harness corrections), phase4 (final review), sso-dep-testing, orchestrator-migration (parked), test-e2e-testme-acceptance. - launch.sh: 1d/1e/2/2b/3/4 phase sequence, machine-docs-aware state resolution, limit-stall re-nudge, INBOX side-channel detection. - plan.md §6.1/§7: artifact-layer isolation, INBOX, 5-min long-run polling, DEFERRED. - prompts: isolation discipline + INBOX + pacing. - .gitignore: harden (.sops/, cc-ci-secrets/, .claude/, *.tmp.*). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
118 lines
6.1 KiB
Bash
Executable File
118 lines
6.1 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
#
|
|
# launch-orchestrator.sh — start/resume the cc-ci ORCHESTRATOR session in tmux under remote-control.
|
|
#
|
|
# The orchestrator (see /srv/cc-ci/AGENTS.md) is the long-lived SUPERVISORY session: it watches the
|
|
# Builder/Adversary loops, reads their logs/STATUS, edits the plan/prompts, restarts stuck loops, and
|
|
# owns the VM-level fallback. It is SEPARATE from the loops that launch.sh manages — this script only
|
|
# brings the orchestrator back (e.g. after a reboot, which kills the tmux server and every session in
|
|
# it). The conversation itself survives on disk across exits/reboots; remote-control only stays
|
|
# connected while the process is alive, so recovery = relaunch the process and re-attach by --resume.
|
|
#
|
|
# Naming: tmux session AND remote-control name are both "cc-ci-orchestrator", matching the loop
|
|
# sessions cc-ci-builder / cc-ci-adv / cc-ci-watchdog.
|
|
#
|
|
# Usage:
|
|
# ./launch-orchestrator.sh start # resume the persistent orchestrator session (DEFAULT)
|
|
# ./launch-orchestrator.sh fresh # start a NEW orchestrator session (no --resume)
|
|
# ./launch-orchestrator.sh status # show tmux + remote-control state
|
|
# ./launch-orchestrator.sh attach # tmux attach to the session (Ctrl-b d to detach)
|
|
# ./launch-orchestrator.sh stop # kill the tmux session (conversation persists on disk)
|
|
#
|
|
# The persistent session id is read from $ID_FILE (seeded on first run with DEFAULT_ID). A Claude
|
|
# session keeps the SAME id across --resume, so this stays valid across reboots. To point the script
|
|
# at a different session, edit that file or export ORCH_SESSION_ID.
|
|
|
|
set -euo pipefail
|
|
|
|
# ----- config -------------------------------------------------------------
|
|
SESSION="${ORCH_SESSION:-cc-ci-orchestrator}" # tmux session name == remote-control name
|
|
WORKDIR="${ORCH_DIR:-/srv/cc-ci}" # orchestrator cwd (its claude project dir)
|
|
CLAUDE_BIN="${CLAUDE_BIN:-claude}"
|
|
CLAUDE_FLAGS="${CLAUDE_FLAGS:---dangerously-skip-permissions}"
|
|
# REMOTE_CONTROL=1 → --remote-control session, viewable/steerable at claude.ai/code. Needs the box
|
|
# logged into the claude.ai account. =0 for a plain local interactive session.
|
|
REMOTE_CONTROL="${REMOTE_CONTROL:-1}"
|
|
LOG_DIR="${LOG_DIR:-/srv/cc-ci/.cc-ci-logs}"
|
|
ID_FILE="${ORCH_ID_FILE:-$LOG_DIR/.orchestrator-session-id}"
|
|
DEFAULT_ID="34a80a99-b37e-4809-b8da-ccc9fafe785e" # the orchestrator session as of 2026-05-28
|
|
# Startup nudge injected as the resumed session's first turn, so an AUTO-launched orchestrator (e.g.
|
|
# cc-ci-loops.service ExecStartPost after a reboot) actually RUNS its AGENTS.md startup routine —
|
|
# announce itself + report reboots — instead of resuming silently and waiting. Set empty to disable.
|
|
# Must contain NO single quotes (it is single-quoted into the tmux command).
|
|
STARTUP_PROMPT="${ORCH_STARTUP_PROMPT-STARTUP (auto-launch): you are the cc-ci orchestrator, just (re)launched, likely after a reboot. Do your AGENTS.md On-startup routine NOW: read cc-ci-plan/REBOOTS.md and run cc-ci-plan/launch.sh status, then send a proactive PushNotification that you are online with the current phase and reboot count, and confirm cc-ci-loops.service brought the loops + watchdog back (relaunch with RESUME_PHASE=1 cc-ci-plan/launch.sh start if not). Then resume supervising.}"
|
|
# --------------------------------------------------------------------------
|
|
|
|
log() { printf '[orchestrator %(%H:%M:%S)T] %s\n' -1 "$*"; }
|
|
die() { log "ERROR: $*"; exit 1; }
|
|
session_alive() { tmux has-session -t "$SESSION" 2>/dev/null; }
|
|
|
|
preflight() {
|
|
command -v tmux >/dev/null 2>&1 || die "missing dependency: tmux"
|
|
command -v "$CLAUDE_BIN" >/dev/null 2>&1 || die "claude CLI not found (set CLAUDE_BIN)"
|
|
[[ -d "$WORKDIR" ]] || die "workdir not found: $WORKDIR"
|
|
mkdir -p "$LOG_DIR"
|
|
[[ -f "$ID_FILE" ]] || echo "$DEFAULT_ID" > "$ID_FILE"
|
|
}
|
|
|
|
resume_id() { echo "${ORCH_SESSION_ID:-$(cat "$ID_FILE" 2>/dev/null || echo "$DEFAULT_ID")}"; }
|
|
|
|
# Launch claude in a detached tmux session. $1=resume ("resume"|"fresh").
|
|
start() {
|
|
local mode="${1:-resume}"
|
|
preflight
|
|
if session_alive; then
|
|
log "$SESSION already running — leaving it (use '$0 stop' first to relaunch)"
|
|
return 0
|
|
fi
|
|
local rc="" resume="" id=""
|
|
[[ "$REMOTE_CONTROL" == "1" ]] && rc="--remote-control '$SESSION'"
|
|
if [[ "$mode" == "resume" ]]; then
|
|
id="$(resume_id)"
|
|
[[ -n "$id" ]] && resume="--resume '$id'"
|
|
log "starting $SESSION (resume=$id, cwd=$WORKDIR, rc=$REMOTE_CONTROL)"
|
|
else
|
|
log "starting $SESSION FRESH (no resume, cwd=$WORKDIR, rc=$REMOTE_CONTROL)"
|
|
fi
|
|
# Startup nudge as a POSITIONAL prompt (not stdin — stdin would force print mode and break
|
|
# remote-control). On --resume this appends as the session's next turn, triggering the AGENTS.md
|
|
# startup routine (announce + report reboots). Empty STARTUP_PROMPT => clean resume, no nudge.
|
|
local prompt_arg=""
|
|
[[ -n "$STARTUP_PROMPT" ]] && prompt_arg="'$STARTUP_PROMPT'"
|
|
tmux new-session -d -s "$SESSION" -c "$WORKDIR" \
|
|
"$CLAUDE_BIN $resume $rc $CLAUDE_FLAGS $prompt_arg"
|
|
tmux pipe-pane -o -t "$SESSION" "cat >> '$LOG_DIR/$SESSION.log'"
|
|
log "started. status: $0 status | attach: tmux attach -t $SESSION"
|
|
}
|
|
|
|
case "${1:-start}" in
|
|
start) start resume ;;
|
|
fresh) start fresh ;;
|
|
stop)
|
|
if session_alive; then log "killing $SESSION"; tmux kill-session -t "$SESSION" || true; else log "$SESSION not running"; fi
|
|
;;
|
|
status)
|
|
if session_alive; then
|
|
log "$SESSION: RUNNING"
|
|
ps -eo pid,etime,args | grep "[r]emote-control $SESSION" || true
|
|
else
|
|
log "$SESSION: stopped"
|
|
fi
|
|
log "resume id: $(cat "$ID_FILE" 2>/dev/null || echo "$DEFAULT_ID") (file: $ID_FILE)"
|
|
;;
|
|
attach) exec tmux attach -t "$SESSION" ;;
|
|
*)
|
|
cat <<EOF
|
|
cc-ci orchestrator launcher
|
|
|
|
$0 start resume the persistent orchestrator session in tmux + remote-control (default)
|
|
$0 fresh start a NEW orchestrator session (no --resume)
|
|
$0 status show tmux + remote-control state and the resume id
|
|
$0 attach tmux attach to the session
|
|
$0 stop kill the tmux session (conversation persists on disk)
|
|
|
|
Env: SESSION=$SESSION WORKDIR=$WORKDIR REMOTE_CONTROL=$REMOTE_CONTROL CLAUDE_BIN=$CLAUDE_BIN
|
|
EOF
|
|
;;
|
|
esac
|