fix(watchdog): seed stall clock from pane's real last-activity, not watchdog start

Stall detection tracked idle time in an in-memory _idle_since map seeded to now()
on first observation, so a freshly-(re)started watchdog reset every agent's stall
clock and had to wait a full stall_idle before it could nudge — an agent idle for
an hour looked freshly-idle after a watchdog restart. Seed  from the tmux
window's last-activity timestamp (#{window_activity}) instead, so idle duration
reflects the agent's real last activity regardless of when the watchdog started.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
Claude-Session: https://claude.ai/code/session_01UWTdUq2bsic7JZGqJp3nD6
This commit is contained in:
2026-06-23 04:40:34 +00:00
parent 57082acc05
commit 65ceeb3a7b

View File

@ -467,6 +467,18 @@ def limit_tick(cfg, agent, pane):
_idle_since: dict[str, float] = {}
def _pane_last_active(session):
"""Unix timestamp of the tmux window's last activity (last output change), or None.
Seeds idle-duration from the agent's REAL last activity rather than `now`, so stalls are
detected regardless of when the watchdog process started — restarting the watchdog no longer
resets every agent's stall clock."""
r = subprocess.run(f"tmux display-message -p -t {session!r} '#{{window_activity}}'",
shell=True, capture_output=True, text=True)
try:
return float(r.stdout.strip())
except (ValueError, AttributeError):
return None
def _last_nonempty_line(text):
for line in reversed(text.splitlines()):
if line.strip():
@ -502,7 +514,9 @@ def stall_check_one(cfg, agent):
if pane_active(cfg, agent, pane):
_idle_since[session] = 0.0
return
since = _idle_since.get(session) or now
# Seed from the pane's real last-activity (not `now`), so a watchdog that just (re)started still
# sees an already-idle pane as idle-for-its-true-duration instead of resetting the clock.
since = _idle_since.get(session) or _pane_last_active(session) or now
_idle_since[session] = since
idle = now - since
grace = int(cfg["watchdog"].get("stall_grace", 180))