feat: benchmark builder-adversary-deferred (4-phase incl review); limit-detect across all roots

This commit is contained in:
2026-06-16 00:07:43 +00:00
parent aeee484395
commit 819000417b
4 changed files with 32 additions and 10 deletions

View File

@ -63,6 +63,9 @@ PY
gen_config() { # <variant> <run> <prefix>
local v="$1" run="$2" prefix="$3"
# builder-adversary-deferred adds a final comprehensive `review` phase (4 phases, not 3)
local review_phase=""
[ "$v" = "builder-adversary-deferred" ] && review_phase=" { id = \"review\", plan = \"$PLANS/review.md\", status = \"STATUS-review.md\" },"
cat > "$run/agents.toml" <<EOF
[watchdog]
signal_interval = 15
@ -118,6 +121,7 @@ phases = [
{ id = "lex", plan = "$PLANS/lex.md", status = "STATUS-lex.md" },
{ id = "parse", plan = "$PLANS/parse.md", status = "STATUS-parse.md" },
{ id = "eval", plan = "$PLANS/eval.md", status = "STATUS-eval.md" },
$review_phase
]
EOF
}