Two gaps for the scheduled Thursday glm-5.2 run: 1. Survival: the watchdog was a Popen child of the Type=oneshot service, which systemd's cgroup cleanup kills on exit. Spawn it under the persistent tmux server instead (_spawn_watchdog), like the run sessions — survives the oneshot. 2. The report runs on glm-5.2 sharing the same opencode-go budget the upgrade run drains, so it can 429-stall with no recovery. launch-report.py now spawns the SAME watchdog pointed at the cc-ci-report session (generic via UPGRADER_SESSION/ _MODEL/_DONE_MARKER/_RESUME_FILE), with a report-specific resume prompt. Also: _run_pids() is now scoped to the managed session (title or -s <sid>) so the report watchdog can't kill the idle upgrader process and vice-versa; resume() adds --dir and honors a custom resume prompt file. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
137 lines
7.4 KiB
Python
Executable File
137 lines
7.4 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""cc-ci recipe-report launcher — one-shot agent that runs /recipe-report after the weekly upgrade.
|
|
|
|
The report agent's model is configured SEPARATELY from the upgrader (REPORT_BACKEND/REPORT_MODEL),
|
|
but defaults to the same OpenCode Go subscription on glm-5.2. The model default tracks the backend
|
|
(opencode→opencode-go/glm-5.2, claude→opus).
|
|
|
|
Usage:
|
|
launch-report.py start [DATE] use-or-create the session; runs /recipe-report [DATE]
|
|
launch-report.py fresh [DATE] always start a new session
|
|
launch-report.py stop kill the session
|
|
launch-report.py status show session state
|
|
|
|
Env: REPORT_BACKEND (default opencode), REPORT_MODEL (default opencode-go/glm-5.2 | opus for claude),
|
|
OPENCODE_SHARE (1=attach web server + public --share link), REPORT_SESSION, REPORT_DIR.
|
|
"""
|
|
import os, subprocess, sys, time
|
|
from pathlib import Path
|
|
from datetime import datetime, timezone
|
|
|
|
SESSION = os.environ.get("REPORT_SESSION", "cc-ci-report")
|
|
WORKDIR = os.environ.get("REPORT_DIR", "/srv/cc-ci")
|
|
LOG_DIR = os.environ.get("LOG_DIR", "/srv/cc-ci/.cc-ci-logs")
|
|
BACKEND = os.environ.get("REPORT_BACKEND", "opencode")
|
|
_DEFAULT_MODEL = "opencode-go/glm-5.2" if BACKEND == "opencode" else "opus"
|
|
MODEL = os.environ.get("REPORT_MODEL", _DEFAULT_MODEL)
|
|
CLAUDE_BIN = os.environ.get("CLAUDE_BIN", "claude")
|
|
CLAUDE_FLAGS = os.environ.get("CLAUDE_FLAGS", "--dangerously-skip-permissions")
|
|
OPENCODE_BIN = os.environ.get("OPENCODE_BIN", "/home/loops/.local/bin/opencode")
|
|
OPENCODE_SERVER = os.environ.get("OPENCODE_SERVER", "http://127.0.0.1:4096")
|
|
OPENCODE_SHARE = os.environ.get("OPENCODE_SHARE", "1") == "1"
|
|
|
|
|
|
def log(m): print(f"[report {datetime.now(timezone.utc):%H:%M:%S}] {m}", flush=True)
|
|
def die(m): log(f"ERROR: {m}"); sys.exit(1)
|
|
def _sh(c): return subprocess.run(c, capture_output=True, text=True)
|
|
def session_alive(): return _sh(["tmux", "has-session", "-t", SESSION]).returncode == 0
|
|
def kill_session(): subprocess.run(["tmux", "kill-session", "-t", SESSION], capture_output=True)
|
|
def _busy(): return "esc to interrupt" in _sh(["tmux", "capture-pane", "-pt", SESSION]).stdout
|
|
|
|
|
|
def build_kickoff(date):
|
|
arg = f" {date}" if date else ""
|
|
return (
|
|
f"*** cc-ci RECIPE-REPORT — one-shot ***\n"
|
|
f"You generate the public weekly \"Recipe Report\". Run the /recipe-report skill now:\n"
|
|
f" invoke /recipe-report{arg}\n"
|
|
f"Full spec: {WORKDIR}/.claude/skills/recipe-report/SKILL.md. Creds in {WORKDIR}/.testenv; "
|
|
f"reach the CI server with `ssh cc-ci`.\n"
|
|
f"You are READ-ONLY: review the latest /upgrade-all run + every recipe's open PRs + CI verdicts, "
|
|
f"order the wire table by priority-to-address (CVE recipes first), and publish one HTML page per "
|
|
f"run to report.ci.commoninternet.net (+ regenerate the index). Public page — NO secrets/tokens/raw logs. "
|
|
f"Never merge/edit/comment on PRs. When done, print the report URL + 'RECIPE REPORT COMPLETE' and "
|
|
f"go idle (do NOT loop)."
|
|
)
|
|
|
|
|
|
def start(mode, date):
|
|
import shutil
|
|
if not shutil.which("tmux"):
|
|
die("tmux not found")
|
|
Path(LOG_DIR).mkdir(parents=True, exist_ok=True)
|
|
if session_alive():
|
|
# The report is a one-shot. Leave the session ONLY if it's actively producing a report;
|
|
# an idle/leftover session (e.g. last week's, gone idle) is killed so a new run starts.
|
|
if mode == "use-or-create" and _busy():
|
|
log(f"{SESSION} busy with a report — leaving it"); return
|
|
log(f"{SESSION} exists (idle/leftover) — killing first"); kill_session(); time.sleep(1)
|
|
|
|
kf = Path(LOG_DIR) / f".kickoff-{SESSION}.txt"
|
|
kf.write_text(build_kickoff(date))
|
|
model_flag = f"--model '{MODEL}'" if MODEL else ""
|
|
if BACKEND == "claude":
|
|
cmd = f"{CLAUDE_BIN} --remote-control '{SESSION}' {model_flag} {CLAUDE_FLAGS} \"$(cat '{kf}')\""
|
|
cwd = WORKDIR
|
|
elif BACKEND == "opencode":
|
|
# -m/--model/--attach/--title/--share are `run` SUBCOMMAND flags — must come AFTER `run`.
|
|
cwd = "/srv/cc-ci-orch/cc-ci"
|
|
share_flag = "--share" if OPENCODE_SHARE else ""
|
|
cmd = (
|
|
f"set -a; . /srv/cc-ci/.testenv; set +a; "
|
|
f"{OPENCODE_BIN} run {model_flag} {share_flag} --attach '{OPENCODE_SERVER}' "
|
|
f"--title '{SESSION}' --dir {cwd} \"$(cat '{kf}')\""
|
|
)
|
|
else:
|
|
die(f"unknown REPORT_BACKEND '{BACKEND}'")
|
|
log(f"starting {SESSION} (backend={BACKEND}, model={MODEL}, date={date or 'today'})")
|
|
subprocess.run(["tmux", "new-session", "-d", "-s", SESSION, "-c", cwd, cmd])
|
|
subprocess.run(["tmux", "pipe-pane", "-o", "-t", SESSION, f"cat >> '{LOG_DIR}/{SESSION}.log'"])
|
|
if BACKEND == "opencode":
|
|
if OPENCODE_SHARE:
|
|
log(f" attached to {OPENCODE_SERVER} → http://oc.commoninternet.net +public --share link")
|
|
# Watchdog for the report too: it runs on glm-5.2 sharing the same opencode-go budget the
|
|
# upgrade run just drained, so a 429 stall is likely. Reuse launch-upgrader.py's watchdog,
|
|
# pointed at THIS (cc-ci-report) session with a report-specific marker + resume prompt. It
|
|
# runs under the tmux server (survives the systemd oneshot like the run sessions).
|
|
if os.environ.get("REPORT_WATCHDOG", "1") == "1":
|
|
import shlex
|
|
LU = "/srv/cc-ci/cc-ci-plan/launch-upgrader.py"
|
|
rf = Path(LOG_DIR) / f".kickoff-{SESSION}-resume.txt"
|
|
rf.write_text(
|
|
"The opencode-go usage limit has reset (or the report stalled). CONTINUE generating and "
|
|
"publishing this week's public Recipe Report per /recipe-report: survey → write the spec "
|
|
"JSON → render with recipe-report.py → publish. Do NOT hand-write HTML (render() owns all "
|
|
"formatting). When the page is live, print 'RECIPE REPORT COMPLETE' and go idle.")
|
|
wenv = {"HOME": os.environ.get("HOME") or "/home/loops",
|
|
"UPGRADER_SESSION": SESSION, "UPGRADER_DIR": cwd, "LOG_DIR": LOG_DIR,
|
|
"UPGRADER_BACKEND": "opencode", "UPGRADER_MODEL": MODEL,
|
|
"OPENCODE_BIN": OPENCODE_BIN, "OPENCODE_SERVER": OPENCODE_SERVER,
|
|
"OPENCODE_SHARE": "1" if OPENCODE_SHARE else "0",
|
|
"UPGRADER_DONE_MARKER": "RECIPE REPORT COMPLETE",
|
|
"UPGRADER_RESUME_FILE": str(rf)}
|
|
envstr = " ".join(f"{k}={shlex.quote(str(v))}" for k, v in wenv.items())
|
|
wlog = f"{LOG_DIR}/{SESSION}-watchdog.log"
|
|
wcmd = f"env {envstr} python3 {shlex.quote(LU)} watchdog >> {shlex.quote(wlog)} 2>&1"
|
|
subprocess.run(["tmux", "kill-session", "-t", f"{SESSION}-watchdog"], capture_output=True)
|
|
subprocess.run(["tmux", "new-session", "-d", "-s", f"{SESSION}-watchdog", "-c", cwd, wcmd])
|
|
log(f" report watchdog spawned in tmux '{SESSION}-watchdog' — auto-resume on usage-limit stalls")
|
|
log(f"started. attach: tmux attach -t {SESSION}")
|
|
|
|
|
|
def main():
|
|
a = sys.argv[1:]
|
|
cmd = a[0] if a else "start"
|
|
date = a[1] if len(a) > 1 else ""
|
|
if cmd == "start": start("use-or-create", date)
|
|
elif cmd == "fresh": start("fresh", date)
|
|
elif cmd == "stop": kill_session(); log("stopped.")
|
|
elif cmd == "status":
|
|
log(f"{SESSION}: {'RUNNING' if session_alive() else 'stopped'} (backend={BACKEND} model={MODEL})")
|
|
else:
|
|
print(__doc__); sys.exit(2)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|