fix(2): F2-5 — dep teardown verify=True, errors propagate to run-fail (Adversary cold)
Per REVIEW-2 ## Q2 FAIL: runner/harness/deps.py::teardown_deps suppressed ALL exceptions via contextlib.suppress(Exception), silently swallowing teardown failures. The 'DEPS teardown' print fired even when undeploy actually raised — leaving leftover swarm services/volumes/secrets that broke the NEXT run targeting the same deterministic dep domain (this is what caused the Q3.1 dep flake I saw immediately after the Q2.4 acceptance run). Fix: - runner/harness/deps.py: teardown_deps now uses lifecycle.teardown_app(..., verify=True) so residuals raise TeardownError. Errors are LOGGED LOUDLY per-dep but we continue to other deps so one failure doesn't strand the rest. After all attempts: raise a combined TeardownError if any dep failed. - runner/run_recipe_ci.py: orchestrator catches the dep TeardownError in finally, prints it, captures into dep_teardown_error; the run summary surfaces it and the exit code is non-zero. The run STILL prints the diagnosable summary so a leak doesn't hide other failures. Per §9 teardown sacred / DG7: a green run that leaks state is not 'green'. F2-5 now correctly fails the run instead of silently passing. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@ -112,15 +112,31 @@ def deploy_deps(
|
|||||||
|
|
||||||
|
|
||||||
def teardown_deps(state: list[dict]) -> None:
|
def teardown_deps(state: list[dict]) -> None:
|
||||||
"""Undeploy each dep in reverse order. Suppresses exceptions per-dep so one teardown failure
|
"""Undeploy each dep in reverse order. **VERIFY=True (F2-5 fix)**: per plan §9 teardown is
|
||||||
doesn't strand the others. Mirrors the orchestrator's teardown_app(verify=False) pattern."""
|
sacred — a dep that leaks containers/volumes/secrets corrupts the next run that uses the same
|
||||||
|
deterministic dep domain.
|
||||||
|
|
||||||
|
Failures are LOGGED LOUDLY (not silently suppressed) so a leak is visible in the run output;
|
||||||
|
we continue to teardown other deps so one failure doesn't strand the rest; after all attempts
|
||||||
|
we **raise** if any dep failed to fully teardown — the orchestrator's outer `finally` then
|
||||||
|
decides whether the leak is a run-failure (it should be, mirroring lifecycle.teardown_app's
|
||||||
|
own raise-on-residual behaviour at `verify=True`).
|
||||||
|
"""
|
||||||
|
errors: list[str] = []
|
||||||
for entry in reversed(state):
|
for entry in reversed(state):
|
||||||
domain = entry.get("domain")
|
domain = entry.get("domain")
|
||||||
if not domain:
|
if not domain:
|
||||||
continue
|
continue
|
||||||
with contextlib.suppress(Exception):
|
recipe = entry.get("recipe", "?")
|
||||||
print(f" dep: tearing down {entry.get('recipe')} @ {domain}", flush=True)
|
print(f" dep: tearing down {recipe} @ {domain}", flush=True)
|
||||||
lifecycle.teardown_app(domain, verify=False)
|
try:
|
||||||
|
lifecycle.teardown_app(domain, verify=True)
|
||||||
|
except Exception as e: # noqa: BLE001 — every failure must be visible, but we want to try the rest first
|
||||||
|
msg = f"dep {recipe} @ {domain} teardown failed: {e}"
|
||||||
|
print(f" !! {msg}", flush=True)
|
||||||
|
errors.append(msg)
|
||||||
|
if errors:
|
||||||
|
raise lifecycle.TeardownError("dep teardown failures: " + " ; ".join(errors))
|
||||||
|
|
||||||
|
|
||||||
def load_run_state() -> list[dict]:
|
def load_run_state() -> list[dict]:
|
||||||
|
|||||||
@ -360,6 +360,7 @@ def main() -> int:
|
|||||||
results: dict[str, str] = {}
|
results: dict[str, str] = {}
|
||||||
lifecycle.janitor()
|
lifecycle.janitor()
|
||||||
dep_deploy_failed = False
|
dep_deploy_failed = False
|
||||||
|
dep_teardown_error: str | None = None
|
||||||
try:
|
try:
|
||||||
# ---- deps deploy FIRST (sequentially), if declared (Q2.3) ----
|
# ---- deps deploy FIRST (sequentially), if declared (Q2.3) ----
|
||||||
if declared:
|
if declared:
|
||||||
@ -437,10 +438,18 @@ def main() -> int:
|
|||||||
results[op] = "skip"
|
results[op] = "skip"
|
||||||
finally:
|
finally:
|
||||||
# Teardown the recipe under test FIRST, then deps in reverse declaration order.
|
# Teardown the recipe under test FIRST, then deps in reverse declaration order.
|
||||||
|
# Parent verify=False (Phase 1d): keep as-is so a parent residual doesn't mask a tier
|
||||||
|
# failure. Dep teardown uses verify=True via teardown_deps (F2-5 fix); failures are
|
||||||
|
# captured into dep_teardown_error and surfaced in the run summary + exit code, but
|
||||||
|
# we still print the diagnosable summary first.
|
||||||
lifecycle.teardown_app(domain, verify=False)
|
lifecycle.teardown_app(domain, verify=False)
|
||||||
if deps_state:
|
if deps_state:
|
||||||
print("\n===== DEPS teardown =====", flush=True)
|
print("\n===== DEPS teardown =====", flush=True)
|
||||||
deps_mod.teardown_deps(deps_state)
|
try:
|
||||||
|
deps_mod.teardown_deps(deps_state)
|
||||||
|
except lifecycle.TeardownError as e:
|
||||||
|
dep_teardown_error = str(e)
|
||||||
|
print(f"!! {dep_teardown_error}", flush=True)
|
||||||
|
|
||||||
# ---- deploy-count assertion (DG4.1) ----
|
# ---- deploy-count assertion (DG4.1) ----
|
||||||
with open(countfile) as f:
|
with open(countfile) as f:
|
||||||
@ -470,6 +479,10 @@ def main() -> int:
|
|||||||
file=sys.stderr,
|
file=sys.stderr,
|
||||||
)
|
)
|
||||||
overall = 1
|
overall = 1
|
||||||
|
if dep_teardown_error:
|
||||||
|
# F2-5: dep teardown leaks violate §9 (teardown sacred); fail the run loudly.
|
||||||
|
print(f"!! dep teardown leaked state: {dep_teardown_error}", file=sys.stderr)
|
||||||
|
overall = 1
|
||||||
if any(v == "fail" for v in results.values()):
|
if any(v == "fail" for v in results.values()):
|
||||||
overall = 1
|
overall = 1
|
||||||
if not results:
|
if not results:
|
||||||
|
|||||||
Reference in New Issue
Block a user