feat: builder-solo control runner (run after campaign) + limit-detect for it

run-solo-bench.sh runs the builder-solo variant (single builder, self-verify,
no adversary) 5× on the same calculator and appends rows to the shared campaign
data file (adversary col = 0). Separate script so the live campaign runner is
untouched. analyze.py limit-detection now also covers the solo run layout.
Engine example builder-solo committed at a0f7652; benchmark engine to be re-
pinned to it before running solo (after the main campaign completes).
This commit is contained in:
2026-06-15 02:36:58 +00:00
parent 25a77f5d3c
commit fc0608ede1
2 changed files with 166 additions and 7 deletions

View File

@ -30,14 +30,22 @@ for line in open(DATA):
# flag runs whose watchdog log shows a usage-limit hit — their duration (and thus tok/sec) is
# inflated by the idle pause, even though the token total is unaffected. Look in the newest campaign
# run-root (repos are kept).
_roots = sorted(glob.glob("/tmp/ao-campaign-*"))
_root = _roots[-1] if _roots else ""
_camp = sorted(glob.glob("/tmp/ao-campaign-*"))
_camp_root = _camp[-1] if _camp else ""
_solo = sorted(glob.glob("/tmp/ao-solo-*"))
_solo_root = _solo[-1] if _solo else ""
def _limit_hit(v, rep):
for wl in glob.glob(f"{_root}/{v}/r{rep}/.ao-state/*watchdog*.log"):
try:
if "limit hit" in open(wl, errors="ignore").read(): return True
except OSError:
pass
pats = []
if _camp_root:
pats.append(f"{_camp_root}/{v}/r{rep}/.ao-state/*watchdog*.log") # campaign layout
if v == "builder-solo" and _solo_root:
pats.append(f"{_solo_root}/r{rep}/.ao-state/*watchdog*.log") # solo layout (no variant subdir)
for pat in pats:
for wl in glob.glob(pat):
try:
if "limit hit" in open(wl, errors="ignore").read(): return True
except OSError:
pass
return False
for r in rows:
r["limit"] = "LIMIT" if _limit_hit(r["v"], r["rep"]) else ""