diff --git a/runner/harness/deps.py b/runner/harness/deps.py index f28069a..73c40a0 100644 --- a/runner/harness/deps.py +++ b/runner/harness/deps.py @@ -112,15 +112,31 @@ def deploy_deps( def teardown_deps(state: list[dict]) -> None: - """Undeploy each dep in reverse order. Suppresses exceptions per-dep so one teardown failure - doesn't strand the others. Mirrors the orchestrator's teardown_app(verify=False) pattern.""" + """Undeploy each dep in reverse order. **VERIFY=True (F2-5 fix)**: per plan §9 teardown is + sacred — a dep that leaks containers/volumes/secrets corrupts the next run that uses the same + deterministic dep domain. + + Failures are LOGGED LOUDLY (not silently suppressed) so a leak is visible in the run output; + we continue to teardown other deps so one failure doesn't strand the rest; after all attempts + we **raise** if any dep failed to fully teardown — the orchestrator's outer `finally` then + decides whether the leak is a run-failure (it should be, mirroring lifecycle.teardown_app's + own raise-on-residual behaviour at `verify=True`). + """ + errors: list[str] = [] for entry in reversed(state): domain = entry.get("domain") if not domain: continue - with contextlib.suppress(Exception): - print(f" dep: tearing down {entry.get('recipe')} @ {domain}", flush=True) - lifecycle.teardown_app(domain, verify=False) + recipe = entry.get("recipe", "?") + print(f" dep: tearing down {recipe} @ {domain}", flush=True) + try: + lifecycle.teardown_app(domain, verify=True) + except Exception as e: # noqa: BLE001 — every failure must be visible, but we want to try the rest first + msg = f"dep {recipe} @ {domain} teardown failed: {e}" + print(f" !! {msg}", flush=True) + errors.append(msg) + if errors: + raise lifecycle.TeardownError("dep teardown failures: " + " ; ".join(errors)) def load_run_state() -> list[dict]: diff --git a/runner/run_recipe_ci.py b/runner/run_recipe_ci.py index da01e64..fb11a0f 100644 --- a/runner/run_recipe_ci.py +++ b/runner/run_recipe_ci.py @@ -360,6 +360,7 @@ def main() -> int: results: dict[str, str] = {} lifecycle.janitor() dep_deploy_failed = False + dep_teardown_error: str | None = None try: # ---- deps deploy FIRST (sequentially), if declared (Q2.3) ---- if declared: @@ -437,10 +438,18 @@ def main() -> int: results[op] = "skip" finally: # Teardown the recipe under test FIRST, then deps in reverse declaration order. + # Parent verify=False (Phase 1d): keep as-is so a parent residual doesn't mask a tier + # failure. Dep teardown uses verify=True via teardown_deps (F2-5 fix); failures are + # captured into dep_teardown_error and surfaced in the run summary + exit code, but + # we still print the diagnosable summary first. lifecycle.teardown_app(domain, verify=False) if deps_state: print("\n===== DEPS teardown =====", flush=True) - deps_mod.teardown_deps(deps_state) + try: + deps_mod.teardown_deps(deps_state) + except lifecycle.TeardownError as e: + dep_teardown_error = str(e) + print(f"!! {dep_teardown_error}", flush=True) # ---- deploy-count assertion (DG4.1) ---- with open(countfile) as f: @@ -470,6 +479,10 @@ def main() -> int: file=sys.stderr, ) overall = 1 + if dep_teardown_error: + # F2-5: dep teardown leaks violate §9 (teardown sacred); fail the run loudly. + print(f"!! dep teardown leaked state: {dep_teardown_error}", file=sys.stderr) + overall = 1 if any(v == "fail" for v in results.values()): overall = 1 if not results: