fix(watchdog): detect idle opencode turns
This commit is contained in:
@ -591,3 +591,19 @@ session cc-ci-orchestrator-stale can be killed; recipe-mirrors org still private
|
||||
- Bounced only `cc-ci-watchdog` so it loaded the 14-phase queue without interrupting the active builder
|
||||
and adversary sessions. Verified `launch.sh status`: `cfold [10/14]`, builder/adv/watchdog RUNNING;
|
||||
watched sessions are still `opencode` for builder/adv/orchestrator.
|
||||
|
||||
## 2026-06-12 ~21:45 — OpenCode watchdog idle detection fixed; stalled cfold loops recovered
|
||||
- Operator correctly observed the OpenCode loops had stalled after cfold M1. Root cause: watchdog
|
||||
activity detection reused the Claude-oriented `ACTIVE_RE`, whose tokens (`▣`, `Build ·`) also appear
|
||||
in OpenCode's static completed-turn footer. Finished OpenCode turns therefore looked active forever,
|
||||
so the idle-stall branch never fired.
|
||||
- Fixed `launch.py` with an OpenCode-specific classifier: Claude still uses the old `ACTIVE_RE` path;
|
||||
only `BACKEND=opencode` uses `opencode_pane_active()`, which checks the bottom prompt/status area
|
||||
plus recent log mtime and ignores the static completed footer.
|
||||
- Added `launch.py selftest-opencode-activity` regression test. It proves: completed OpenCode footer =
|
||||
idle, live `esc interrupt` footer = active, and a limit banner plus completed footer is not active.
|
||||
`python3 -m py_compile cc-ci-plan/launch.py` and the selftest both passed.
|
||||
- Restarted `cc-ci-watchdog` on the patched code. Builder was already stuck in a stale input mode after
|
||||
a manual nudge landed as a shell command, so restarted only `cc-ci-builder` via `start_agent`; it came
|
||||
back on OpenCode GPT-5.4 and is actively planning the M2 sweep. Adversary accepted its nudge and wrote
|
||||
`WAITING-UNTIL: 2026-06-12T21:55:28Z` while awaiting Builder's formal M2 claim.
|
||||
|
||||
@ -127,6 +127,8 @@ PHASE_IDX_FILE = os.environ.get("PHASE_IDX_FILE", f"{LOG_DIR}/.phase-idx")
|
||||
|
||||
# Regex patterns for session-state detection
|
||||
ACTIVE_RE = re.compile(r"esc to interrupt|⠋|⠙|⠹|⠸|⠼|⠴|⠦|⠧|⠇|⠏|Running tool|▣|Build ·|· \d+")
|
||||
OPENCODE_ACTIVE_RE = re.compile(
|
||||
r"esc interrupt|thinking|inferring|running tool|tool call|preparing patch|reading|searching", re.I)
|
||||
LIMIT_RE = re.compile(r"spend limit|usage limit|limit reached|reached your .*limit|out of (credits|tokens)", re.I)
|
||||
FATAL_RE = re.compile(r"redacted_thinking|blocks cannot be modified|cannot be modified", re.I)
|
||||
RECENT_ACTIVITY_RE = re.compile(r"thinking|inferring|running tool|remote control (active|connecting)|tool call|schedulewake?up", re.I)
|
||||
@ -175,6 +177,20 @@ def _log_recently_touched(session, age_seconds):
|
||||
except FileNotFoundError:
|
||||
return False
|
||||
|
||||
def opencode_pane_active(session, pane, *, use_log=True):
|
||||
# Completed OpenCode turns leave a static footer like "▣ Build · GPT-5.4" in
|
||||
# the pane. That is not activity. Only the bottom prompt/status area should
|
||||
# decide active/idle, optionally backed by a recently-written session log.
|
||||
bottom = "\n".join(pane.splitlines()[-10:])
|
||||
return bool(OPENCODE_ACTIVE_RE.search(bottom)) or (
|
||||
use_log and _log_recently_touched(session, OPENCODE_LOG_GRACE)
|
||||
)
|
||||
|
||||
def pane_active(session, pane, *, use_log=True):
|
||||
if BACKEND == "opencode":
|
||||
return opencode_pane_active(session, pane, use_log=use_log)
|
||||
return bool(ACTIVE_RE.search(pane))
|
||||
|
||||
def _last_nonempty_line(text):
|
||||
for line in reversed(text.splitlines()):
|
||||
if line.strip():
|
||||
@ -433,7 +449,7 @@ def limit_tick(role, session, pane):
|
||||
limited_now = bool(LIMIT_RE.search(pane))
|
||||
|
||||
if state is None:
|
||||
if not limited_now or ACTIVE_RE.search(pane):
|
||||
if not limited_now or pane_active(session, pane, use_log=False):
|
||||
return False
|
||||
now = time.time()
|
||||
until, parsed = _next_limit_until(pane, now)
|
||||
@ -447,7 +463,7 @@ def limit_tick(role, session, pane):
|
||||
return True
|
||||
|
||||
# Armed window: a spinner or a vanished banner means the limit lifted.
|
||||
if ACTIVE_RE.search(pane) or not limited_now:
|
||||
if pane_active(session, pane, use_log=False) or not limited_now:
|
||||
log(f"limit lifted on {role} ({session}) — clearing limit state")
|
||||
_clear_limit_state(session)
|
||||
return False
|
||||
@ -471,7 +487,7 @@ def limit_tick(role, session, pane):
|
||||
# banner = still limited → re-arm from the fresh banner (else flat fallback).
|
||||
time.sleep(3)
|
||||
pane2 = capture_pane(session, 40)
|
||||
if ACTIVE_RE.search(pane2) and not LIMIT_RE.search(pane2):
|
||||
if pane_active(session, pane2, use_log=False) and not LIMIT_RE.search(pane2):
|
||||
log(f"limit lifted on {role} ({session}) — probe resumed it")
|
||||
_clear_limit_state(session)
|
||||
# Return True ("handled this tick") so the caller does NOT continue evaluating
|
||||
@ -496,7 +512,7 @@ def heal_session(role, session, workdir):
|
||||
return
|
||||
|
||||
pane = capture_pane(session, 25)
|
||||
if ACTIVE_RE.search(pane):
|
||||
if pane_active(session, pane):
|
||||
return # actively working — leave it alone
|
||||
|
||||
if limit_tick(role, session, pane):
|
||||
@ -545,9 +561,7 @@ def stall_check_one(role, session, workdir):
|
||||
_idle_since[session] = 0.0
|
||||
return
|
||||
|
||||
if ACTIVE_RE.search(pane) or (BACKEND == "opencode" and (
|
||||
RECENT_ACTIVITY_RE.search(pane) or _log_recently_touched(session, OPENCODE_LOG_GRACE)
|
||||
)):
|
||||
if pane_active(session, pane):
|
||||
_idle_since[session] = 0.0
|
||||
return
|
||||
|
||||
@ -609,7 +623,7 @@ def heal_orchestrator():
|
||||
subprocess.run([ORCH_LAUNCHER, "start"], capture_output=True)
|
||||
return
|
||||
pane = capture_pane(ORCH_SESSION, 25)
|
||||
if ACTIVE_RE.search(pane):
|
||||
if pane_active(ORCH_SESSION, pane):
|
||||
return
|
||||
if limit_tick("orchestrator", ORCH_SESSION, pane):
|
||||
return # limit window — never kill/restart the orchestrator over a limit
|
||||
@ -639,7 +653,7 @@ def wake_orchestrator():
|
||||
# the orchestrator on track even if the limit-state machinery breaks. If the
|
||||
# limit is genuinely still in force the wake is harmless (banner re-prints).
|
||||
log(f"orchestrator ({ORCH_SESSION}) is in a limit window — sending hourly wake anyway (fallback)")
|
||||
if ACTIVE_RE.search(capture_pane(ORCH_SESSION, 25)):
|
||||
if pane_active(ORCH_SESSION, capture_pane(ORCH_SESSION, 25)):
|
||||
return False # busy — don't interrupt; retry when idle
|
||||
try:
|
||||
msg = " ".join(Path(ORCH_WAKE_PROMPT).read_text().split())
|
||||
@ -875,6 +889,43 @@ def cmd_status():
|
||||
if seq.exists():
|
||||
print(f" >>> {seq.read_text().strip()}")
|
||||
|
||||
def cmd_selftest_opencode_activity():
|
||||
idle_footer = """
|
||||
- M1: PASS
|
||||
- M2: IN PROGRESS
|
||||
|
||||
▣ Build · GPT-5.4 · 2m 19s
|
||||
|
||||
┃
|
||||
┃
|
||||
┃
|
||||
╹▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀
|
||||
178.4K (17%) ctrl+p commands
|
||||
"""
|
||||
active_footer = """
|
||||
~ Preparing patch...
|
||||
|
||||
▣ Build · GPT-5.4
|
||||
|
||||
┃
|
||||
┃
|
||||
┃
|
||||
╹▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀
|
||||
⬝⬝⬝■■■■■ esc interrupt 137.6K (13%) ctrl+p commands
|
||||
"""
|
||||
limit_footer = idle_footer + "\nYou've hit your weekly limit · resets Jun 16, 10pm (UTC)\n"
|
||||
checks = [
|
||||
("completed OpenCode footer is idle", not opencode_pane_active("__selftest__", idle_footer, use_log=False)),
|
||||
("live OpenCode footer is active", opencode_pane_active("__selftest__", active_footer, use_log=False)),
|
||||
("limit banner plus completed footer is not active", not opencode_pane_active("__selftest__", limit_footer, use_log=False)),
|
||||
]
|
||||
failed = [name for name, ok in checks if not ok]
|
||||
if failed:
|
||||
for name in failed:
|
||||
print(f"FAIL: {name}")
|
||||
sys.exit(1)
|
||||
print("opencode activity selftest PASS")
|
||||
|
||||
# ── main ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
def main():
|
||||
@ -908,6 +959,9 @@ def main():
|
||||
elif cmd == "status":
|
||||
cmd_status()
|
||||
|
||||
elif cmd == "selftest-opencode-activity":
|
||||
cmd_selftest_opencode_activity()
|
||||
|
||||
elif cmd == "stop":
|
||||
stop_loops()
|
||||
if session_alive(WATCHDOG_SESSION):
|
||||
|
||||
Reference in New Issue
Block a user