From 04fdefcd3989e3514d8c6f9968b9b7dd3f42e3a2 Mon Sep 17 00:00:00 2001 From: autonomic-bot Date: Tue, 2 Jun 2026 02:10:13 +0000 Subject: [PATCH] =?UTF-8?q?plan:=20overnight=20run=20=E2=80=94=20after=20a?= =?UTF-8?q?ssistant,=20run=20/upgrade-all=20+=20morning=20report?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bash runner (cheap polling, no claude budget) that gates on the assistant's PR-consolidation done-marker, waits past the usage-limit reset (~03:30 UTC) and for the loops to idle, runs the weekly /upgrade-all (DEFAULT, never merges), then writes overnight-report-.md and pings the orchestrator to notify. One-off; the Sunday 02:00 timer is unchanged. Co-Authored-By: Claude Opus 4.8 --- cc-ci-plan/overnight-run.sh | 92 ++++++++++++++++++++++++++++++++ cc-ci-plan/plan-overnight-run.md | 32 +++++++++++ 2 files changed, 124 insertions(+) create mode 100644 cc-ci-plan/overnight-run.sh create mode 100644 cc-ci-plan/plan-overnight-run.md diff --git a/cc-ci-plan/overnight-run.sh b/cc-ci-plan/overnight-run.sh new file mode 100644 index 0000000..1bae2e8 --- /dev/null +++ b/cc-ci-plan/overnight-run.sh @@ -0,0 +1,92 @@ +#!/usr/bin/env bash +# Overnight runner — after the Assistant finishes consolidating recipe PRs, run the weekly +# /upgrade-all (full test run over every enrolled recipe) and collect a morning report. +# +# Polls in pure bash (NO claude) so it doesn't burn the shared claude usage budget — only the +# /upgrade-all run itself spends tokens. Runs detached in tmux session `cc-ci-overnight`. +# +# tmux new-session -d -s cc-ci-overnight 'bash /srv/cc-ci/cc-ci-plan/overnight-run.sh' +set -uo pipefail + +LOG=/srv/cc-ci/.cc-ci-logs/overnight-run.log +DONE_MARKER=/srv/cc-ci/.cc-ci-logs/pr-consolidation.done # Assistant writes this when consolidation is finished +PLAN=/srv/cc-ci/cc-ci-plan +ORCH=cc-ci-orchestrator-vm +TODAY=$(date -u +%Y-%m-%d) +REPORT=/srv/cc-ci/.cc-ci-logs/overnight-report-${TODAY}.md +SUMMARY=/srv/cc-ci/.cc-ci-logs/upgrades/upgrade-all-${TODAY}.md + +log(){ printf '[overnight %s] %s\n' "$(date -u +%FT%TZ)" "$*" >>"$LOG"; } +active(){ tmux capture-pane -pt "$1" 2>/dev/null | grep -q 'esc to interrupt'; } # session busy? + +log "=== overnight runner started ===" + +# --- Gate A: wait for the Assistant's PR-consolidation done-marker (max 4h) --- +log "Gate A: waiting for assistant done-marker ($DONE_MARKER)..." +for _ in $(seq 1 48); do + [ -f "$DONE_MARKER" ] && break + sleep 300 +done +if [ -f "$DONE_MARKER" ]; then log "Gate A passed — assistant done: $(head -1 "$DONE_MARKER" 2>/dev/null)" +else log "Gate A TIMED OUT (4h) — proceeding without confirmation."; fi + +# --- Gate B: only run /upgrade-all after the usage-limit reset (~03:30 UTC) so it has budget --- +log "Gate B: waiting until >= 03:35 UTC (post usage-limit reset)..." +for _ in $(seq 1 36); do # cap ~3h + now=$((10#$(date -u +%H%M))) + [ "$now" -ge 335 ] && [ "$now" -lt 1200 ] && break # morning window, past reset + [ "$now" -ge 1200 ] && break # safety: never wait into the afternoon + sleep 300 +done +log "Gate B passed (now $(date -u +%FT%TZ))." + +# --- Gate C: let the build loops go idle so /upgrade-all doesn't contend on the Swarm (max 1h) --- +log "Gate C: waiting for builder/adversary to be idle (Swarm free)..." +for _ in $(seq 1 12); do + active cc-ci-builder || active cc-ci-adv || break + sleep 300 +done +active cc-ci-builder || active cc-ci-adv && log "Gate C: loops still active after 1h — proceeding anyway." || log "Gate C passed — loops idle." + +# --- Run the weekly /upgrade-all (DEFAULT: recipe PRs verified by !testme, NEVER merges) --- +log "launching /upgrade-all via launch-upgrader.py fresh ..." +LOOP_BACKEND=claude LOOP_MODEL=sonnet python3 "$PLAN/launch-upgrader.py" fresh >>"$LOG" 2>&1 + +# --- Wait for it to finish: the upgrader self-terminates and writes a dated summary (max ~8h) --- +log "waiting for /upgrade-all to complete (summary $SUMMARY or upgrader stop)..." +for _ in $(seq 1 96); do + sleep 300 + st=$(python3 "$PLAN/launch-upgrader.py" status 2>/dev/null) + if printf '%s' "$st" | grep -qi 'stopped'; then log "upgrader reports stopped."; break; fi +done + +# --- Collect the morning report --- +log "writing report $REPORT ..." +{ + echo "# cc-ci overnight run — ${TODAY}" + echo + echo "Generated $(date -u +%FT%TZ) by overnight-run.sh. See $LOG for the run trace." + echo + echo "## /upgrade-all summary" + if [ -f "$SUMMARY" ]; then cat "$SUMMARY"; else echo "(no dated summary at $SUMMARY — /upgrade-all may have stalled on the usage limit or errored; check the cc-ci-upgrader session + $LOG)"; fi + echo + echo "## Open PRs per recipe (post-run)" + set -a; . /srv/cc-ci/.testenv 2>/dev/null; set +a + G="https://${GITEA_USERNAME}:${GITEA_PASSWORD}@${GITEA_URL}/api/v1" + for r in $(curl -fsS "$G/orgs/recipe-maintainers/repos?limit=100" 2>/dev/null | python3 -c "import sys,json;print('\n'.join(sorted(x['name'] for x in json.load(sys.stdin))))" 2>/dev/null); do + case "$r" in cc-ci|cc-ci-orchestrator|cc-ci-secrets|archived-*) continue;; esac + n=$(curl -fsS "$G/repos/recipe-maintainers/$r/pulls?state=open&limit=50" 2>/dev/null | python3 -c "import sys,json;d=json.load(sys.stdin);print(len(d));[print(' - #%d %s'%(p['number'],p['title'][:60])) for p in d]" 2>/dev/null) + echo " - $r: ${n%%$'\n'*} open" + printf '%s\n' "$n" | tail -n +2 + done + echo + echo "## Phase / loop status" + python3 "$PLAN/launch.py" status 2>&1 +} > "$REPORT" 2>&1 + +touch /srv/cc-ci/.cc-ci-logs/.overnight-done +log "report written. pinging orchestrator to notify the operator." +# Ask the orchestrator (which has PushNotification) to deliver the morning notification + journal it. +MSG="OVERNIGHT RUN COMPLETE — read ${REPORT} and the /upgrade-all summary, then send the operator a proactive PushNotification with the headline (did /upgrade-all complete, how many recipe PRs, overall CI state), append a completion event to cc-ci-plan/JOURNAL.md, and rm /srv/cc-ci/.cc-ci-logs/.overnight-done." +tmux send-keys -t "$ORCH" -l -- "$MSG" 2>/dev/null; sleep 1; tmux send-keys -t "$ORCH" Enter 2>/dev/null +log "=== overnight runner finished ===" diff --git a/cc-ci-plan/plan-overnight-run.md b/cc-ci-plan/plan-overnight-run.md new file mode 100644 index 0000000..8f8d3ef --- /dev/null +++ b/cc-ci-plan/plan-overnight-run.md @@ -0,0 +1,32 @@ +# Plan — overnight run (consolidate → /upgrade-all → morning report) + +**Created:** 2026-06-02 ~02:10 UTC. **Runs via:** `cc-ci-plan/overnight-run.sh` in tmux `cc-ci-overnight`. + +## Goal +By morning, have a full picture of CI + all recipes: let things settle overnight, then run the weekly +**`/upgrade-all`** as a full test run and leave a report — so we can see whether it makes it through, +how many recipe PRs it produced, and the overall state. + +## Sequence (the runner enforces it; polls in pure bash so it doesn't burn the shared claude budget) +1. **Gate A — Assistant done.** Wait for the PR-consolidation task to finish (marker + `/srv/cc-ci/.cc-ci-logs/pr-consolidation.done`, which the assistant writes as its last step). So + `/upgrade-all` starts from a clean one-open-PR-per-recipe state. (4h timeout → proceed anyway.) +2. **Gate B — usage reset.** Don't launch `/upgrade-all` until after the claude usage-limit reset + (~03:30 UTC) so it has budget. (The loops + assistant are at ~90% now and may stall until reset; + the watchdog resumes the loops on lift.) +3. **Gate C — Swarm free.** Wait (≤1h) for the Builder/Adversary to be idle so `/upgrade-all` doesn't + contend with the regression canaries on the shared Swarm; proceed anyway after the cap. +4. **Run `/upgrade-all`** (`launch-upgrader.py fresh` → DEFAULT mode: surveys every enrolled recipe, + opens/updates a recipe PR per upgradeable one verified by `!testme`, **NEVER merges**). Wait for it + to finish (it self-terminates + writes `…/upgrades/upgrade-all-.md`; ≤8h). +5. **Morning report** → `/srv/cc-ci/.cc-ci-logs/overnight-report-.md`: the `/upgrade-all` + summary, open-PR-per-recipe survey, and `launch.py status`. Then ping the orchestrator to send the + operator a proactive PushNotification + journal it. + +## Guardrails / notes +- `/upgrade-all` DEFAULT **never merges** anything and never weakens tests. +- Shared claude budget: a full `/upgrade-all` is heavy; if it re-exhausts the limit it may stall + mid-run — the report will show how far it got (a partial result is still a result, per the ask). +- Neither the upgrader nor the assistant is watchdog-managed; if either limit-stalls, it stays stalled + until nudged. The runner has bounded timeouts so it never blocks forever. +- This is a one-off overnight run (not the recurring Sunday-02:00 timer, which stays as-is).