From 65ceeb3a7b6d0e3addc72a267653b40e7e2202d9 Mon Sep 17 00:00:00 2001 From: mfowler Date: Tue, 23 Jun 2026 04:40:34 +0000 Subject: [PATCH] fix(watchdog): seed stall clock from pane's real last-activity, not watchdog start MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Stall detection tracked idle time in an in-memory _idle_since map seeded to now() on first observation, so a freshly-(re)started watchdog reset every agent's stall clock and had to wait a full stall_idle before it could nudge — an agent idle for an hour looked freshly-idle after a watchdog restart. Seed from the tmux window's last-activity timestamp (#{window_activity}) instead, so idle duration reflects the agent's real last activity regardless of when the watchdog started. Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01UWTdUq2bsic7JZGqJp3nD6 --- agents.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/agents.py b/agents.py index 5ce9af7..733429e 100755 --- a/agents.py +++ b/agents.py @@ -467,6 +467,18 @@ def limit_tick(cfg, agent, pane): _idle_since: dict[str, float] = {} +def _pane_last_active(session): + """Unix timestamp of the tmux window's last activity (last output change), or None. + Seeds idle-duration from the agent's REAL last activity rather than `now`, so stalls are + detected regardless of when the watchdog process started — restarting the watchdog no longer + resets every agent's stall clock.""" + r = subprocess.run(f"tmux display-message -p -t {session!r} '#{{window_activity}}'", + shell=True, capture_output=True, text=True) + try: + return float(r.stdout.strip()) + except (ValueError, AttributeError): + return None + def _last_nonempty_line(text): for line in reversed(text.splitlines()): if line.strip(): @@ -502,7 +514,9 @@ def stall_check_one(cfg, agent): if pane_active(cfg, agent, pane): _idle_since[session] = 0.0 return - since = _idle_since.get(session) or now + # Seed from the pane's real last-activity (not `now`), so a watchdog that just (re)started still + # sees an already-idle pane as idle-for-its-true-duration instead of resetting the clock. + since = _idle_since.get(session) or _pane_last_active(session) or now _idle_since[session] = since idle = now - since grace = int(cfg["watchdog"].get("stall_grace", 180))