feat: builder-solo control runner (run after campaign) + limit-detect for it

run-solo-bench.sh runs the builder-solo variant (single builder, self-verify, no adversary) 5× on the same calculator and appends rows to the shared campaign data file (adversary col = 0). Separate script so the live campaign runner is untouched. analyze.py limit-detection now also covers the solo run layout. Engine example builder-solo committed at a0f7652; benchmark engine to be re- pinned to it before running solo (after the main campaign completes).
2026-06-15 02:36:58 +00:00
parent 25a77f5d3c
commit fc0608ede1
2 changed files with 166 additions and 7 deletions
--- a/analyze.py
+++ b/analyze.py
@ -30,14 +30,22 @@ for line in open(DATA):
 # flag runs whose watchdog log shows a usage-limit hit — their duration (and thus tok/sec) is
 # inflated by the idle pause, even though the token total is unaffected. Look in the newest campaign
 # run-root (repos are kept).
-_roots = sorted(glob.glob("/tmp/ao-campaign-*"))
-_root = _roots[-1] if _roots else ""
+_camp = sorted(glob.glob("/tmp/ao-campaign-*"))
+_camp_root = _camp[-1] if _camp else ""
+_solo = sorted(glob.glob("/tmp/ao-solo-*"))
+_solo_root = _solo[-1] if _solo else ""
 def _limit_hit(v, rep):
-    for wl in glob.glob(f"{_root}/{v}/r{rep}/.ao-state/*watchdog*.log"):
-        try:
-            if "limit hit" in open(wl, errors="ignore").read(): return True
-        except OSError:
-            pass
+    pats = []
+    if _camp_root:
+        pats.append(f"{_camp_root}/{v}/r{rep}/.ao-state/*watchdog*.log")   # campaign layout
+    if v == "builder-solo" and _solo_root:
+        pats.append(f"{_solo_root}/r{rep}/.ao-state/*watchdog*.log")        # solo layout (no variant subdir)
+    for pat in pats:
+        for wl in glob.glob(pat):
+            try:
+                if "limit hit" in open(wl, errors="ignore").read(): return True
+            except OSError:
+                pass
    return False
 for r in rows:
    r["limit"] = "LIMIT" if _limit_hit(r["v"], r["rep"]) else ""