feat: benchmark builder-adversary-deferred (4-phase incl review); limit-detect across all roots

This commit is contained in:
2026-06-16 00:07:43 +00:00
parent aeee484395
commit 819000417b
4 changed files with 32 additions and 10 deletions

View File

@ -30,16 +30,11 @@ for line in open(DATA):
# flag runs whose watchdog log shows a usage-limit hit — their duration (and thus tok/sec) is
# inflated by the idle pause, even though the token total is unaffected. Look in the newest campaign
# run-root (repos are kept).
_camp = sorted(glob.glob("/tmp/ao-campaign-*"))
_camp_root = _camp[-1] if _camp else ""
_solo = sorted(glob.glob("/tmp/ao-solo-*"))
_solo_root = _solo[-1] if _solo else ""
def _limit_hit(v, rep):
pats = []
if _camp_root:
pats.append(f"{_camp_root}/{v}/r{rep}/.ao-state/*watchdog*.log") # campaign layout
if v == "builder-solo" and _solo_root:
pats.append(f"{_solo_root}/r{rep}/.ao-state/*watchdog*.log") # solo layout (no variant subdir)
# search ALL run roots (a variant/rep may live in any campaign root; solo has its own layout)
pats = [f"/tmp/ao-campaign-*/{v}/r{rep}/.ao-state/*watchdog*.log"]
if v == "builder-solo":
pats.append(f"/tmp/ao-solo-*/r{rep}/.ao-state/*watchdog*.log")
for pat in pats:
for wl in glob.glob(pat):
try: