feat: builder-solo control runner (run after campaign) + limit-detect for it
run-solo-bench.sh runs the builder-solo variant (single builder, self-verify, no adversary) 5× on the same calculator and appends rows to the shared campaign data file (adversary col = 0). Separate script so the live campaign runner is untouched. analyze.py limit-detection now also covers the solo run layout. Engine example builder-solo committed at a0f7652; benchmark engine to be re- pinned to it before running solo (after the main campaign completes).
This commit is contained in:
22
analyze.py
22
analyze.py
@ -30,14 +30,22 @@ for line in open(DATA):
|
||||
# flag runs whose watchdog log shows a usage-limit hit — their duration (and thus tok/sec) is
|
||||
# inflated by the idle pause, even though the token total is unaffected. Look in the newest campaign
|
||||
# run-root (repos are kept).
|
||||
_roots = sorted(glob.glob("/tmp/ao-campaign-*"))
|
||||
_root = _roots[-1] if _roots else ""
|
||||
_camp = sorted(glob.glob("/tmp/ao-campaign-*"))
|
||||
_camp_root = _camp[-1] if _camp else ""
|
||||
_solo = sorted(glob.glob("/tmp/ao-solo-*"))
|
||||
_solo_root = _solo[-1] if _solo else ""
|
||||
def _limit_hit(v, rep):
|
||||
for wl in glob.glob(f"{_root}/{v}/r{rep}/.ao-state/*watchdog*.log"):
|
||||
try:
|
||||
if "limit hit" in open(wl, errors="ignore").read(): return True
|
||||
except OSError:
|
||||
pass
|
||||
pats = []
|
||||
if _camp_root:
|
||||
pats.append(f"{_camp_root}/{v}/r{rep}/.ao-state/*watchdog*.log") # campaign layout
|
||||
if v == "builder-solo" and _solo_root:
|
||||
pats.append(f"{_solo_root}/r{rep}/.ao-state/*watchdog*.log") # solo layout (no variant subdir)
|
||||
for pat in pats:
|
||||
for wl in glob.glob(pat):
|
||||
try:
|
||||
if "limit hit" in open(wl, errors="ignore").read(): return True
|
||||
except OSError:
|
||||
pass
|
||||
return False
|
||||
for r in rows:
|
||||
r["limit"] = "LIMIT" if _limit_hit(r["v"], r["rep"]) else ""
|
||||
|
||||
Reference in New Issue
Block a user