cc-ci/runner/run_recipe_ci.py

#!/usr/bin/env python3
"""Top-level CI orchestrator (plan §4.3 + Phase 1d/1e), invoked by the Drone pipeline (or by hand).

Model: deploy the app ONCE, then run lifecycle TIERS against that single shared deployment, then ONE
teardown in `finally`. Per Phase 1e the orchestrator OWNS each mutating op (HC3): for a tier it runs
the optional pre-op seed hook (recipe ops.py `pre_<op>`), performs the op exactly ONCE
(upgrade/backup/restore — install has none), then runs BOTH the generic assertion file (the floor,
unless explicitly opted out) AND the recipe overlay assertion file (if any) against the shared
post-op state — generic and overlay are ADDITIVE, not override (HC3). Op results an assertion needs
(pre-upgrade identity, snapshot_id) pass op→assertion via a run-scoped JSON state file
($CCCI_OP_STATE_FILE). The upgrade op deploys the PR-HEAD code under test via `abra app deploy
--chaos` (HC1). Repo-local (PR-authored) overlays/hooks run only for allowlist-approved recipes (HC2,
gated in harness.discovery). The generic is the default for every op, so ANY recipe is testable with
zero config (DG1–DG4). The lifecycle OPS live in the shared harness (harness.generic), not per-recipe
(DG7 DRY).

Run parameters from env (set by the comment-bridge via Drone build params):
  RECIPE   recipe name (e.g. custom-html)        [required]
  REF      PR head commit sha                     [optional; used for fetch + run-domain hash]
  PR       PR number                              [optional, default 0]
  SRC      head repo full_name on the mirror      [optional]
  VERSION  upgrade target tag (else newest published) [optional]
  STAGES   comma filter of tiers to run            [optional, default install,upgrade,backup,restore,custom]

Run env (python + pytest + playwright) is provided by `cc-ci-run` (nix/modules/harness.nix);
invoke as:  cc-ci-run runner/run_recipe_ci.py
"""

from __future__ import annotations

import contextlib
import glob
import importlib.util
import json
import os
import shutil
import subprocess
import sys
import tempfile
import time

ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.insert(0, os.path.join(ROOT, "runner"))
from harness import (  # noqa: E402
    abra,
    canonical,
    discovery,
    generic,
    lifecycle,
    naming,
    warm,
    warmsnap,
)
from harness import (  # noqa: E402
    card as card_mod,
)
from harness import (  # noqa: E402
    deps as deps_mod,
)
from harness import (  # noqa: E402
    results as results_mod,
)
from harness import (  # noqa: E402
    screenshot as screenshot_mod,
)

ALL_STAGES = ("install", "upgrade", "backup", "restore", "custom")


def sso_dep_unverified(declared, deps_ready: bool, requires_deps_skipped: int) -> bool:
    """F2-11 gate predicate (pure, unit-tested). True when a recipe declares DEPS but its
    setup_custom_tests failed (deps not ready) AND that caused ≥1 `requires_deps` (SSO/OIDC) test
    to SKIP. In that case the recipe's characteristic SSO claim was NOT verified, so the run must
    NOT report GREEN — even though a skip-only pytest file exits 0 and leaves every tier 'pass'.
    Generic-tier failure-isolation is preserved (those results stand); only the green SIGNAL is
    corrected. Gated on skip>0 so a deps-declaring recipe with no requires_deps tests isn't
    false-failed."""
    return bool(declared) and not deps_ready and requires_deps_skipped > 0


def _truthy(v: str | None) -> bool:
    return str(v or "").strip().lower() in ("1", "true", "yes", "on")


def _redact_values() -> list[str]:
    """Values to scrub from published logs (D6 redaction filter, plan §4.4). The infra secrets
    materialised at /run/secrets/* — if any subprocess ever echoes one, mask it. Only >=8-char
    values, so it never false-positives on short strings / SHAs."""
    vals = set()
    for p in glob.glob("/run/secrets/*"):
        try:
            with open(p) as f:
                v = f.read().strip()
        except OSError:
            continue
        if len(v) >= 8:
            vals.add(v)
    return sorted(vals, key=len, reverse=True)


_REDACT = _redact_values()


def _scrub(text: str) -> str:
    """Mask any known infra-secret value in a string (D6 redaction, plan §4.4)."""
    for v in _REDACT:
        if v in text:
            text = text.replace(v, "***REDACTED***")
    return text


def run_redacted(cmd: list[str], env: dict | None = None) -> int:
    """Run a subprocess, streaming output live (so Drone logs stay tail-able) but masking any known
    infra-secret value first. Belt-and-suspenders: the harness never prints secrets and abra doesn't
    echo generated ones."""
    proc = subprocess.Popen(
        cmd,
        cwd=ROOT,
        env=env,
        stdout=subprocess.PIPE,
        stderr=subprocess.STDOUT,
        text=True,
        bufsize=1,
    )
    assert proc.stdout is not None
    for line in proc.stdout:
        sys.stdout.write(_scrub(line))
        sys.stdout.flush()
    return proc.wait()


def _gitea_token() -> str | None:
    tok = os.environ.get("GITEA_TOKEN")
    if not tok and os.path.exists("/run/secrets/bridge_gitea_token"):
        with open("/run/secrets/bridge_gitea_token") as f:
            tok = f.read().strip()
    return tok or None


def fetch_recipe(recipe: str, ref: str | None, src: str | None) -> None:
    """Make the recipe available at the code under test. If SRC+REF point at the mirror PR,
    clone it at that ref; otherwise fetch the catalogue copy. Private mirror repos need the bot
    token — passed via a per-command http.extraHeader (not persisted in .git/config, not printed)."""
    recipes_dir = os.path.expanduser("~/.abra/recipes")
    os.makedirs(recipes_dir, exist_ok=True)
    dest = os.path.join(recipes_dir, recipe)
    # CCCI_SKIP_FETCH=1: use the local recipe clone as-is (lets a test/Adversary stage a fake/broken
    # ref — e.g. a simulated broken PR head for the --quick rollback proof — without it being clobbered
    # by a re-fetch). Never set in production CI.
    if os.environ.get("CCCI_SKIP_FETCH") == "1":
        print(f"[fetch] CCCI_SKIP_FETCH=1 — using local {recipe} recipe clone as-is", flush=True)
        return
    if src and ref:
        url = f"https://git.autonomic.zone/{src}.git"
        git = ["git"]
        tok = _gitea_token()
        if tok:
            git += ["-c", f"http.extraHeader=Authorization: token {tok}"]
        subprocess.run(["rm", "-rf", dest], check=False)
        subprocess.run([*git, "clone", "--quiet", url, dest], check=True)
        subprocess.run([*git, "-C", dest, "checkout", "--quiet", ref], check=True)
        # Bring in published version TAGS from the public upstream so the upgrade tier can deploy a
        # previous published version (mirror PR branches carry no release tags). Read-only + plain git
        # (no bot token to a foreign host). Non-fatal: if unreachable, upgrade degrades to a skip.
        upstream = f"https://git.coopcloud.tech/coop-cloud/{recipe}.git"
        subprocess.run(
            ["git", "-C", dest, "fetch", "--quiet", upstream, "refs/tags/*:refs/tags/*"],
            check=False,
        )
    else:
        # Clean re-fetch from the catalogue. rm first so a leftover dir from a prior SRC+REF run
        # (origin → private mirror, maybe lacking tags) can't poison the catalogue fetch.
        subprocess.run(["rm", "-rf", dest], check=False)
        subprocess.run(["abra", "recipe", "fetch", recipe, "-n"], check=True)


def snapshot_recipe_tests(recipe: str) -> str | None:
    """Copy the recipe-shipped tests/ to a stable temp dir, immune to abra re-checking-out the
    recipe to a version tag during the run. Returns the snapshot path, or None if no tests/."""
    src = os.path.expanduser(f"~/.abra/recipes/{recipe}/tests")
    if not os.path.isdir(src):
        return None
    has_overlay = glob.glob(os.path.join(src, "test_*.py")) or os.path.isfile(
        os.path.join(src, "install_steps.sh")
    )
    if not has_overlay:
        return None
    dst = os.path.join(tempfile.gettempdir(), f"ccci-recipe-tests-{recipe}")
    shutil.rmtree(dst, ignore_errors=True)
    shutil.copytree(src, dst)
    return dst


def _load_meta(recipe: str) -> dict:
    """Mirror tests/conftest._recipe_meta so the orchestrator's deploy/wait uses the same per-recipe
    config the tiers see (timeouts, health path/codes)."""
    meta = {
        "HEALTH_PATH": "/",
        "HEALTH_OK": (200, 301, 302),
        "DEPLOY_TIMEOUT": 600,
        "HTTP_TIMEOUT": 300,
    }
    path = os.path.join(ROOT, "tests", recipe, "recipe_meta.py")
    if os.path.exists(path):
        ns: dict = {}
        with open(path) as fh:
            exec(compile(fh.read(), path, "exec"), ns)  # noqa: S102 (trusted, in-repo)
        for k in list(meta) + [
            "BACKUP_CAPABLE",
            "SKIP_GENERIC",
            "EXPECTED_NA",
            "OIDC_AT_INSTALL",
            "READY_PROBE",
            "UPGRADE_BASE_VERSION",
            "BACKUP_VERIFY",
            "UPGRADE_EXTRA_ENV",
        ]:
            if k in ns:
                meta[k] = ns[k]
    return meta


def _tier_env(domain: str) -> dict:
    return dict(os.environ, CCCI_APP_DOMAIN=domain, CCCI_BASE_URL=f"https://{domain}")


def _skip_generic(op: str, meta: dict) -> bool:
    """Whether the generic assertion for `op` is opted out (Phase 1e HC3). Default: run (additive).
    Opt-out, any of: env CCCI_SKIP_GENERIC (all ops), env CCCI_SKIP_GENERIC_<OP>, or the recipe's
    declarative recipe_meta.SKIP_GENERIC list (op name, or "all"/"*")."""
    if _truthy(os.environ.get("CCCI_SKIP_GENERIC")):
        return True
    if _truthy(os.environ.get(f"CCCI_SKIP_GENERIC_{op.upper()}")):
        return True
    sg = [str(s).lower() for s in (meta.get("SKIP_GENERIC") or [])]
    return "all" in sg or "*" in sg or op in sg


def _run_pre_hook(recipe: str, op: str, repo_local: str | None, domain: str, meta: dict) -> None:
    """Run the optional pre-op seed hook (recipe ops.py `pre_<op>`) BEFORE the harness performs the
    op (HC3 op/assertion split): overlays seed data-continuity markers / the backup→restore mutation
    here, then assert post-op in test_<op>.py. cc-ci's ops.py is trusted; a repo-local ops.py is
    consulted only for allowlist-approved recipes (HC2 gate is inside discovery.pre_op_hook). Imported
    in-process; the recipe dir is put on sys.path so an ops.py can import its sibling helpers."""
    hook = discovery.pre_op_hook(recipe, op, repo_local)
    if not hook:
        return
    source, path = hook
    d = os.path.dirname(path)
    sys.path.insert(0, d)
    try:
        spec = importlib.util.spec_from_file_location(f"ccci_ops_{recipe}_{op}", path)
        mod = importlib.util.module_from_spec(spec)
        spec.loader.exec_module(mod)
        print(f"  pre-op seed ({source}): {os.path.relpath(path, ROOT)}::pre_{op}", flush=True)
        getattr(mod, f"pre_{op}")(domain, meta)
    finally:
        if d in sys.path:
            sys.path.remove(d)


def _perform_op(
    op: str,
    domain: str,
    recipe: str,
    head_ref: str | None,
    op_state: dict,
    deploy_timeout: int = 900,
    meta: dict | None = None,
) -> None:
    """Perform the single mutating op ONCE (the harness owns the op, HC3). install has no op. Records
    what the assertions need (pre-upgrade identity, backup snapshot_id) into op_state. None of these
    call deploy_app, so the deploy-count guard (DG4.1) stays 1 — the in-place chaos upgrade is not a
    new install (HC1 reconciliation). `deploy_timeout` (recipe DEPLOY_TIMEOUT) is plumbed to the
    upgrade chaos redeploy so a heavy reconverge isn't SIGKILLed by the 900s default mid-wait; `meta`
    lets the upgrade op own a recipe-aware convergence+health wait (F2-12, READY_PROBE)."""
    if op == "upgrade":
        before = generic.perform_upgrade(
            domain, recipe, head_ref, deploy_timeout=deploy_timeout, meta=meta
        )
        op_state["upgrade"] = {"before": before, "head_ref": head_ref}
    elif op == "backup":
        # Backup integrity + retry (F2-14b). A recipe may define BACKUP_VERIFY(domain) -> bool that
        # confirms the backup actually captured the recipe's critical data AFTER the op. This guards a
        # real race: a DB recipe dumps its data in a backupbot pre-hook, but if the DB container cycles
        # mid-dump (intermittent under host load) the dump is truncated/absent, so restic snapshots an
        # empty path — `abra app backup create` still "succeeds", yet a later restore silently loses the
        # data (ghost: backup.sql.gz never written → restore can't reimport → seeded row gone). When
        # verify fails we re-run the WHOLE backup (fresh restic snapshot) with a re-stabilised DB, up to
        # 3 attempts. Recipes without BACKUP_VERIFY are unaffected (single backup, as before).
        snap = generic.perform_backup(domain)
        verify = meta.get("BACKUP_VERIFY") if meta else None
        attempt = 1
        while callable(verify) and not verify(domain) and attempt < 3:
            attempt += 1
            print(
                f"  backup-verify FAILED (attempt {attempt - 1}/3) — backup did not capture the "
                f"recipe's critical data (e.g. DB cycled mid-dump); re-running backup",
                flush=True,
            )
            snap = generic.perform_backup(domain)
        if callable(verify) and not verify(domain):
            print(
                f"  !! backup-verify still FAILED after {attempt} attempts — backup is incomplete",
                flush=True,
            )
        op_state["backup"] = {"snapshot_id": snap}
    elif op == "restore":
        generic.perform_restore(domain)
    # install: already deployed; no op


def run_lifecycle_tier(
    recipe: str,
    op: str,
    repo_local: str | None,
    domain: str,
    meta: dict,
    head_ref: str | None,
    op_state: dict,
    records: list[dict] | None = None,
    junit_dir: str | None = None,
) -> str:
    """Additive lifecycle tier (HC3): seed (pre-op hook) → perform the op ONCE → run the generic
    assertion file (unless opted out) AND the overlay assertion file, both against the shared post-op
    deployment. The upgrade op redeploys the PR head (head_ref) via chaos (HC1). Returns
    'pass' | 'fail' | 'skip'.

    Phase 3 (R1/R3): when `records`/`junit_dir` are given, each pytest file is run with --junitxml and
    a {tier,source,file,rc,junit} record appended, so the run can assemble per-stage/per-test
    results.json + the level afterwards. Purely additive — does not change the verdict."""
    overlay = discovery.resolve_overlay_op(recipe, op, repo_local)
    skip_gen = _skip_generic(op, meta)
    files: list[tuple[str, str]] = []
    if not skip_gen:
        files.append(discovery.generic_op(op))
    if overlay:
        files.append(overlay)
    if not files:
        # generic opted out AND no overlay → nothing would assert; don't perform a pointless mutating op
        print(f"\n===== TIER: {op} — SKIP (generic opted out, no overlay) =====", flush=True)
        return "skip"

    ov = f"{overlay[0]}:{os.path.relpath(overlay[1], ROOT)}" if overlay else "none"
    print(
        f"\n===== TIER: {op} (generic={'skip' if skip_gen else 'run'}, overlay={ov}) =====",
        flush=True,
    )
    # 1) pre-op seed hook + 2) the op ONCE (harness-owned). A failure here is an op failure → tier fail.
    try:
        _run_pre_hook(recipe, op, repo_local, domain, meta)
        _perform_op(
            op,
            domain,
            recipe,
            head_ref,
            op_state,
            deploy_timeout=int(meta.get("DEPLOY_TIMEOUT", 900)),
            meta=meta,
        )
        with open(os.environ["CCCI_OP_STATE_FILE"], "w") as f:
            json.dump(op_state, f)
    except Exception as e:  # noqa: BLE001 — a failed op is a reported tier failure, not a crash
        print(f"!! {op} op failed: {_scrub(str(e))}", flush=True)
        return "fail"

    # 3) assertions: generic (unless opted out) + overlay, each its own pytest, all against the
    #    single post-op deployment. Generic runs first so an overlay may assume readiness.
    rc_all = 0
    for source, path in files:
        print(f"  assert ({source}): {os.path.relpath(path, ROOT)}", flush=True)
        cmd = [sys.executable, "-m", "pytest", "-v", "-rA", path]
        jx = None
        if junit_dir is not None:
            jx = results_mod.junit_file(junit_dir, op, source, path)
            cmd.append(f"--junitxml={jx}")
        rc = run_redacted(cmd, env=_tier_env(domain))
        if records is not None:
            records.append(
                {
                    "tier": op,
                    "source": source,
                    "file": os.path.relpath(path, ROOT),
                    "rc": rc,
                    "junit": jx,
                }
            )
        if rc != 0:
            rc_all = rc
    return "pass" if rc_all == 0 else "fail"


def _enrich_deps_with_sso(parent_recipe: str, parent_domain: str, deps_list) -> dict[str, dict]:
    """For each dep, set up a fresh realm/client + test user via the harness's provider-specific
    setup function, then return a recipe→entry dict carrying domain + admin + realm/client/user
    info — the shape the `setup_custom_tests.sh` hook (and dependent tests) read.

    Provider routing: today only `keycloak` is supported. authentik will need a parallel
    `setup_authentik_realm` when an authentik-dep recipe enrolls (DEFERRED.md #9).
    """
    from harness import sso, warm  # local import — sso may not be needed for dep-less runs

    out: dict[str, dict] = {}
    for entry in deps_list or []:
        dep_recipe = entry.get("recipe")
        dep_domain = entry.get("domain")
        if not dep_recipe or not dep_domain:
            continue
        if dep_recipe != "keycloak":
            # Provider not yet supported — record bare entry; setup_custom_tests.sh / tests will
            # raise if they need realm/client info they don't see.
            out[dep_recipe] = entry
            continue
        # The realm is the per-run isolation unit on a (possibly shared live-warm) keycloak: name it
        # "<parent>-<6hex>" so concurrent dependents — even two PRs of the SAME recipe — never
        # collide on a realm (WC1). client_id stays the parent recipe name (isolated within the
        # unique realm; predictable for debugging).
        realm = warm.realm_for(parent_recipe, parent_domain)
        client_id = parent_recipe
        creds = sso.setup_keycloak_realm(
            dep_domain,
            realm=realm,
            client_id=client_id,
            redirect_uris=[f"https://{parent_domain}/*"],
            web_origins=[f"https://{parent_domain}"],
        )
        out[dep_recipe] = {
            "recipe": dep_recipe,
            "domain": dep_domain,
            "warm": bool(entry.get("warm")),
            "realm": creds["realm"],
            "client_id": creds["client_id"],
            "client_secret": creds["client_secret"],
            "user": creds["user"],
            "password": creds["password"],
            "email": creds["email"],
            "discovery_url": creds["discovery_url"],
            "token_url": creds["token_url"],
            "auth_url": creds["auth_url"],
            "userinfo_url": creds["userinfo_url"],
            "admin_user": "admin",
            "admin_password": sso.admin_password_inside(dep_domain),
        }
    return out


def _provision_deps(
    recipe: str, domain: str, ref: str | None, declared: list[str]
) -> dict[str, dict]:
    """Provision a run's declared deps and write `$CCCI_DEPS_FILE`; return the recipe→entry deps_state.

    Splits deps into live-warm (shared provider at a stable domain + a per-run realm) vs cold
    (co-deployed per run), provisions each dep's SSO realm/client/user, and persists the enriched
    dict the `setup_custom_tests.sh`/`install_steps.sh` hooks + dependent tests read. Raises on any
    failure (the caller marks deps-not-ready). Used by BOTH wiring paths:
    - post-deploy (legacy): provision AFTER generic tiers, then `setup_custom_tests.sh` does an
      in-place OIDC redeploy.
    - install-time (`OIDC_AT_INSTALL`, Q3.2a): provision BEFORE the single deploy so the
      install-tier `install_steps.sh` hook wires OIDC env into that one deploy — no reconverge.
    """
    warm_deps, cold_deps = [], []
    for d in declared:
        wd = warm.warm_domain(d)
        if wd and warm.is_warm_up(d, wd):
            warm_deps.append(d)
        else:
            if wd:
                print(f"  dep: {d} warm provider {wd} not up — cold fallback", flush=True)
            cold_deps.append(d)
    dep_metas = {d: _load_meta(d) for d in cold_deps}
    deps_list = (
        deps_mod.deploy_deps(recipe, os.environ.get("PR", "0"), ref, cold_deps, meta_for=dep_metas)
        if cold_deps
        else []
    )
    for d in warm_deps:
        wd = warm.warm_domain(d)
        reaped = warm.reap_orphan_realms(d, wd)
        if reaped:
            print(f"  dep: reaped {len(reaped)} orphan realm(s) on warm {d}: {reaped}", flush=True)
        deps_list.append({"recipe": d, "domain": wd, "warm": True})
        print(f"  dep: using live-warm {d} @ {wd} (per-run realm)", flush=True)
    deps_state = _enrich_deps_with_sso(recipe, domain, deps_list)
    deps_mod.write_run_state(deps_state)
    return deps_state


def _run_setup_custom_tests_hook(recipe: str, domain: str, deps_file: str) -> None:
    """Run `tests/<recipe>/setup_custom_tests.sh` if present (operator-2026-05-28 SSO-dep plan
    §3.2). The hook reads `$CCCI_DEPS_FILE`, sets OIDC env via `abra app config set` + secret
    insert, and triggers an in-place `abra app deploy --force --chaos`. Failure here propagates
    to mark deps-not-ready (caught in main())."""
    path = os.path.join(ROOT, "tests", recipe, "setup_custom_tests.sh")
    if not os.path.isfile(path):
        # No hook = recipe doesn't need post-deps wiring; deps are deployed + creds available
        # via deps_apps fixture as-is.
        print(
            f"  setup_custom_tests: no hook at {os.path.relpath(path, ROOT)} (deps creds ready in $CCCI_DEPS_FILE)",
            flush=True,
        )
        return
    print(f"  setup_custom_tests hook: {os.path.relpath(path, ROOT)}", flush=True)
    rc = subprocess.run(
        ["bash", path],
        check=False,
        env=dict(os.environ, CCCI_APP_DOMAIN=domain, CCCI_RECIPE=recipe, CCCI_DEPS_FILE=deps_file),
    )
    if rc.returncode != 0:
        raise RuntimeError(
            f"setup_custom_tests.sh exited {rc.returncode} (deps env not wired into parent)"
        )


def run_custom(
    recipe: str,
    repo_local: str | None,
    domain: str,
    records: list[dict] | None = None,
    junit_dir: str | None = None,
) -> str:
    """Run all discovered non-lifecycle custom test_*.py (both locations, additive). Returns
    'skip' if none defined, else 'pass'/'fail'. Phase 3: emits JUnit + records when given."""
    customs = discovery.custom_tests(recipe, repo_local)
    if not customs:
        return "skip"
    print("\n===== TIER: custom =====", flush=True)
    rc_all = 0
    for source, path in customs:
        rel = os.path.relpath(path, ROOT)
        print(f"  custom ({source}): {rel}", flush=True)
        cmd = [sys.executable, "-m", "pytest", "-v", "-rA", path]
        jx = None
        if junit_dir is not None:
            jx = results_mod.junit_file(junit_dir, "custom", source, path)
            cmd.append(f"--junitxml={jx}")
        rc = run_redacted(cmd, env=_tier_env(domain))
        if records is not None:
            records.append({"tier": "custom", "source": source, "file": rel, "rc": rc, "junit": jx})
        if rc != 0:
            rc_all = rc
    return "pass" if rc_all == 0 else "fail"


def _wait_undeployed(domain: str, timeout: int = 120) -> None:
    """Block until the stack's services are gone after an undeploy (so warmsnap.restore, which
    requires undeployed, doesn't race a half-removed stack)."""
    stack = lifecycle._stack_name(domain)  # noqa: SLF001
    deadline = time.time() + timeout
    while time.time() < deadline:
        if not lifecycle._docker_names("service", stack):  # noqa: SLF001
            return
        time.sleep(2)


def run_quick(
    recipe: str, ref: str | None, head_ref: str | None, repo_local: str | None, meta: dict
) -> int:
    """WC4 `--quick` opt-in fast lane (plan §2). Reattach the data-warm canonical (known-good volume)
    → upgrade IN PLACE to the PR head (chaos) → assert generic UPGRADE (reconverge+moved+serving) +
    overlay + custom. PASS → undeploy-keep-volume, **known-good UNCHANGED (NEVER promote)**; FAIL →
    restore the last-known-good snapshot + undeploy (roll back, data safe). Lower-confidence; does
    NOT gate merge (WC7). Caller has confirmed a canonical exists.

    NB: the deps wiring + temp-state scaffolding intentionally mirror main()'s cold path rather than
    refactoring it — keeping the gate-passed cold flow byte-identical (zero regression risk)."""
    import contextlib

    domain = canonical.canonical_domain(recipe)
    reg = canonical.read_registry(recipe) or {}
    print(
        f"\n== cc-ci run [MODE=quick]: recipe={recipe} canonical={domain} "
        f"known-good={reg.get('version')} ref={ref}\n"
        "   quick = LOWER-CONFIDENCE opt-in fast lane; does NOT gate merge; NEVER promotes the canonical",
        flush=True,
    )

    statefile = os.path.join(tempfile.gettempdir(), f"ccci-opstate-{domain}.json")
    with open(statefile, "w") as f:
        json.dump({}, f)
    os.environ["CCCI_OP_STATE_FILE"] = statefile
    depsfile = os.path.join(tempfile.gettempdir(), f"ccci-deps-{domain}.json")
    with open(depsfile, "w") as f:
        json.dump({}, f)
    os.environ["CCCI_DEPS_FILE"] = depsfile
    skipfile = os.path.join(tempfile.gettempdir(), f"ccci-depskip-{domain}.txt")
    with contextlib.suppress(OSError):
        os.remove(skipfile)
    os.environ["CCCI_DEPS_SKIP_REPORT"] = skipfile

    op_state: dict = {}
    results: dict[str, str] = {}
    declared = deps_mod.declared_deps(recipe)
    deps_state: dict = {}
    deps_ready = True
    deps_not_ready_reason = ""
    dep_teardown_error: str | None = None
    warm_ok = False
    rolled_back = False

    lifecycle.janitor()
    try:
        # 1) reattach the canonical (warm boot at the known-good version + retained volume)
        try:
            canonical.deploy_canonical(recipe, timeout=int(meta.get("DEPLOY_TIMEOUT", 900)))
            lifecycle.wait_healthy(
                domain,
                ok_codes=tuple(meta["HEALTH_OK"]),
                path=meta["HEALTH_PATH"],
                deploy_timeout=meta["DEPLOY_TIMEOUT"],
                http_timeout=meta["HTTP_TIMEOUT"],
            )
            warm_ok = True
        except Exception as e:  # noqa: BLE001
            print(f"!! canonical reattach/readiness failed: {_scrub(str(e))}", flush=True)

        if warm_ok:
            # 2) deps (warm keycloak + per-run realm) — mirrors main()'s warm/cold split
            if declared:
                print(f"\n===== setup_custom_tests (quick): deps {declared} =====", flush=True)
                try:
                    warm_deps, cold_deps = [], []
                    for d in declared:
                        wd = warm.warm_domain(d)
                        (warm_deps if (wd and warm.is_warm_up(d, wd)) else cold_deps).append(d)
                    dep_metas = {d: _load_meta(d) for d in cold_deps}
                    deps_list = (
                        deps_mod.deploy_deps(
                            recipe, os.environ.get("PR", "0"), ref, cold_deps, meta_for=dep_metas
                        )
                        if cold_deps
                        else []
                    )
                    for d in warm_deps:
                        wd = warm.warm_domain(d)
                        warm.reap_orphan_realms(d, wd)
                        deps_list.append({"recipe": d, "domain": wd, "warm": True})
                        print(f"  dep: using live-warm {d} @ {wd} (per-run realm)", flush=True)
                    deps_state = _enrich_deps_with_sso(recipe, domain, deps_list)
                    deps_mod.write_run_state(deps_state)
                    _run_setup_custom_tests_hook(recipe, domain, depsfile)
                except Exception as e:  # noqa: BLE001
                    deps_ready = False
                    deps_not_ready_reason = _scrub(str(e))[:300]
                    print(
                        f"!! setup_custom_tests failed (deps-not-ready): {deps_not_ready_reason}",
                        flush=True,
                    )

            # 3) UPGRADE to PR head (chaos) + assert (generic reconverge+moved+serving + overlay)
            results["upgrade"] = run_lifecycle_tier(
                recipe, "upgrade", repo_local, domain, meta, head_ref, op_state
            )
            # 4) custom tier
            os.environ["CCCI_DEPS_READY"] = "1" if deps_ready else "0"
            os.environ["CCCI_DEPS_NOT_READY_REASON"] = deps_not_ready_reason
            results["custom"] = run_custom(recipe, repo_local, domain)
        else:
            results["upgrade"] = "fail"
            results["custom"] = "skip"
    finally:
        # F2-11 skip count (read before deciding pass/fail)
        requires_deps_skipped = 0
        try:
            with open(skipfile) as f:
                requires_deps_skipped = sum(int(x) for x in f.read().split() if x.strip())
        except OSError:
            pass
        sso_unverified = sso_dep_unverified(declared, deps_ready, requires_deps_skipped)
        passed = (
            warm_ok
            and bool(results)
            and all(v != "fail" for v in results.values())
            and not sso_unverified
        )

        # dep teardown: delete per-run warm realms; undeploy cold deps (mirrors cold)
        if deps_state:
            ordered = (
                [deps_state[d] for d in declared if d in deps_state]
                if isinstance(deps_state, dict)
                else deps_state
            )
            for e in [x for x in ordered if x.get("warm")]:
                try:
                    from harness import sso

                    sso.delete_keycloak_realm(e["domain"], e["realm"])
                    print(
                        f"  dep: deleted per-run realm {e['realm']} on warm {e['recipe']}",
                        flush=True,
                    )
                except Exception as ex:  # noqa: BLE001
                    dep_teardown_error = f"warm realm delete failed for {e.get('realm')}: {ex}"
                    print(f"!! {dep_teardown_error}", flush=True)
            try:
                deps_mod.teardown_deps([x for x in ordered if not x.get("warm")])
            except lifecycle.TeardownError as e:
                dep_teardown_error = str(e)
                print(f"!! {dep_teardown_error}", flush=True)

        # canonical teardown — the WC4 contract:
        #   PASS → undeploy, KEEP volume, known-good UNCHANGED (never promote)
        #   FAIL → restore last-known-good snapshot (data safe) then leave undeployed (idle)
        try:
            if warm_ok and passed:
                canonical.undeploy_keep_volume(recipe)
                print(
                    "  quick PASS → canonical undeployed, volume retained, known-good UNCHANGED",
                    flush=True,
                )
            elif warm_ok:
                print(
                    "  quick FAIL → rolling back canonical to last-known-good snapshot", flush=True
                )
                abra.undeploy(domain)
                _wait_undeployed(domain)
                warmsnap.restore(recipe, domain)
                # reset recorded version to the known-good (the failed upgrade set TYPE to the broken
                # PR commit) so the idle canonical's .env agrees with the registry + re-warms cleanly.
                if reg.get("version"):
                    abra.env_set(domain, "TYPE", f"{recipe}:{reg['version']}")
                canonical._set_status(recipe, "idle")  # noqa: SLF001
                rolled_back = True
                print(
                    "  quick FAIL → restored known-good data; canonical idle (NOT promoted)",
                    flush=True,
                )
        except Exception as e:  # noqa: BLE001
            dep_teardown_error = (dep_teardown_error or "") + f" | quick teardown/rollback: {e}"
            print(f"!! quick teardown/rollback error: {e}", flush=True)

    with contextlib.suppress(OSError):
        os.remove(statefile)
    with contextlib.suppress(OSError):
        os.remove(depsfile)
    with contextlib.suppress(OSError):
        os.remove(skipfile)

    print("\n===== RUN SUMMARY =====", flush=True)
    print("mode = quick (LOWER-CONFIDENCE; opt-in; does not gate merge)")
    print(
        f"canonical = {domain}  known-good = {reg.get('version')} (UNCHANGED; quick never promotes)"
    )
    if rolled_back:
        print("rolled-back = yes (restored last-known-good snapshot)")
    for op in ("upgrade", "custom"):
        if op in results:
            suffix = ""
            if op == "custom" and requires_deps_skipped:
                suffix = f"  ({requires_deps_skipped} requires_deps SKIPPED — SSO UNVERIFIED)"
            print(f"  {op:8s}: {results[op]}{suffix}")

    overall = 0
    if any(v == "fail" for v in results.values()) or not warm_ok:
        overall = 1
    if sso_unverified:
        print(
            f"!! DEPS={declared} but setup_custom_tests failed and {requires_deps_skipped} "
            "requires_deps SKIPPED — SSO NOT verified (F2-11)",
            file=sys.stderr,
        )
        overall = 1
    if dep_teardown_error:
        print(f"!! teardown leaked/erred: {dep_teardown_error}", file=sys.stderr)
        overall = 1
    if not results:
        print("no tiers ran", file=sys.stderr)
        return 1
    return overall


def should_promote_canonical(recipe: str, ref: str | None, overall: int, quick: bool) -> bool:
    """WC5 gate (pure): a run advances/seeds the canonical iff the recipe is enrolled
    (WARM_CANONICAL), the run was GREEN (overall==0), it was COLD (not --quick), and it ran on LATEST
    (no PR head → `ref` empty: the nightly sweep or a manual `RECIPE=<r>` run). A PR `!testme` carries
    REF=PR-head and must NOT promote the canonical to a PR's code. Only cold-on-latest advances it."""
    return canonical.is_enrolled(recipe) and overall == 0 and not quick and not ref


def promote_canonical(recipe: str, head_ref: str | None) -> None:
    """WC5: (re)seed the canonical at the green-verified LATEST. Deploy `warm-<recipe>` at latest
    (reattaching the retained canonical volume if one exists — an in-place version bump — else a fresh
    install), wait healthy, undeploy, snapshot + record the registry (atomic replace of the
    last-known-good). The OLD known-good is replaced ONLY here, after green (never lost on a red run)."""
    import warm_reconcile as wr

    domain = canonical.canonical_domain(recipe)
    wr.fetch_recipe(recipe)
    latest = wr.latest_version(wr.recipe_tags(recipe))
    if not latest:
        print(f"WC5 promote: no version tags for {recipe} — skip", flush=True)
        return
    meta = _load_meta(recipe)
    # The cold run's deploy-count was already asserted + the countfile removed; don't perturb it.
    os.environ.pop("CCCI_DEPLOY_COUNT_FILE", None)
    print(
        f"\n===== WC5 promote-on-green-cold: (re)seed canonical {recipe} @ {latest} =====",
        flush=True,
    )
    lifecycle.deploy_app(
        recipe,
        domain,
        version=latest,
        secrets=True,
        deploy_timeout=int(meta.get("DEPLOY_TIMEOUT", 900)),
    )
    lifecycle.wait_healthy(
        domain,
        ok_codes=tuple(meta["HEALTH_OK"]),
        path=meta["HEALTH_PATH"],
        deploy_timeout=meta["DEPLOY_TIMEOUT"],
        http_timeout=meta["HTTP_TIMEOUT"],
    )
    abra.undeploy(domain)
    _wait_undeployed(domain)
    canonical.seed_canonical(recipe, latest, commit=head_ref)
    print(
        f"WC5 promote: canonical {recipe} advanced to known-good {latest} (idle, volume retained)",
        flush=True,
    )


def main() -> int:
    recipe = os.environ.get("RECIPE")
    if not recipe:
        print("RECIPE env is required", file=sys.stderr)
        return 2
    ref = os.environ.get("REF") or None
    src = os.environ.get("SRC") or None
    target = os.environ.get("VERSION") or None
    stages = {
        s.strip() for s in os.environ.get("STAGES", ",".join(ALL_STAGES)).split(",") if s.strip()
    }

    print(
        f"== cc-ci run: recipe={recipe} ref={ref} pr={os.environ.get('PR', '0')} stages={sorted(stages)}"
    )
    # Concurrent-run safety: runs of the SAME recipe serialise on a per-recipe flock — they share
    # ONE ~/.abra/recipes/<recipe> working tree which fetch_recipe (below) rm-rf's/reclones and the
    # upgrade tier git-checkouts mid-run. Must be taken BEFORE fetch_recipe. Different recipes run
    # in parallel (capacity=2). The reference must stay alive for the whole run: the kernel drops
    # the flock when the fd closes (including on any crash/SIGKILL — no stale-lock failure mode).
    _recipe_lock = lifecycle.acquire_recipe_lock(recipe)  # noqa: F841
    fetch_recipe(recipe, ref, src)
    # The PR-head commit the upgrade tier re-checks out for the chaos redeploy to the code under test
    # (HC1). Prefer the explicit PR head sha ($REF) — robust + exact; fall back to the recipe checkout
    # HEAD (the catalogue current) for a non-PR `!testme`. Captured before any version-tag checkout.
    head_ref = ref or lifecycle.recipe_head_commit(recipe)
    repo_local = snapshot_recipe_tests(recipe)
    meta = _load_meta(recipe)

    # WC4/WC7: opt-in `--quick` fast lane. Requires an existing data-warm canonical; if none, fall
    # back cleanly to the full COLD run below so the PR is still tested (DECISIONS Phase-2w).
    if os.environ.get("CCCI_QUICK") == "1" or os.environ.get("MODE") == "quick":
        if canonical.has_canonical(recipe):
            return run_quick(recipe, ref, head_ref, repo_local, meta)
        print(
            f"MODE=quick requested but no canonical for {recipe} — falling back to COLD run "
            "(no-canonical fallback, WC7)",
            flush=True,
        )

    domain = naming.app_domain(recipe, os.environ.get("PR", "0"), ref)

    # Deploy-once base version: previous published version when the upgrade tier will run and one
    # exists (so upgrade goes previous→target in place), else the target (current/$REF). (DECISIONS.)
    # A recipe may override the base via recipe_meta UPGRADE_BASE_VERSION when the harness default
    # (recipe_versions[-2]) is NOT the PR's true predecessor — e.g. a PR that adds a version ABOVE the
    # newest published tag, where the correct base is [-1] (the newest published), not [-2]. The
    # override must be an exact published version tag (deployed as a pinned base). (Adversary §7.1.)
    want_upgrade = "upgrade" in stages
    prev = (
        (meta.get("UPGRADE_BASE_VERSION") or lifecycle.previous_version(recipe))
        if want_upgrade
        else None
    )
    base = prev or target
    backup_cap = generic.backup_capable(recipe, meta)
    hook = discovery.install_steps(recipe, repo_local)

    # Deploy-count guard (DG4.1): exactly one deploy_app() per run.
    countfile = os.path.join(tempfile.gettempdir(), f"ccci-deploys-{domain}")
    with open(countfile, "w") as f:
        f.write("0")
    os.environ["CCCI_DEPLOY_COUNT_FILE"] = countfile

    # Phase 3 (R1/R3): per-run artifact dir + JUnit dir. The tiers emit JUnit per file and append a
    # {tier,source,file,rc,junit} record; after the run we assemble results.json (per-stage/per-test +
    # level) into the artifact dir. Best-effort — never changes the verdict (R7).
    run_artifact_dir = os.path.join(results_mod.runs_dir(), results_mod.run_id())
    junit_dir = os.path.join(run_artifact_dir, "junit")
    records: list[dict] = []
    with contextlib.suppress(OSError):
        os.makedirs(junit_dir, exist_ok=True)

    # Run-scoped op state (HC3): the orchestrator records op results (pre-upgrade identity, backup
    # snapshot_id) here for the assertion tiers (generic + overlay) to read via generic.op_state().
    statefile = os.path.join(tempfile.gettempdir(), f"ccci-opstate-{domain}.json")
    with open(statefile, "w") as f:
        json.dump({}, f)
    os.environ["CCCI_OP_STATE_FILE"] = statefile
    op_state: dict = {}

    # Run-scoped dep state (Phase 2 Q2.3, refined per operator-2026-05-28 SSO-dep plan §1):
    # deps now deploy AFTER generic tiers (between RESTORE and CUSTOM) so a failed dep deploy
    # cannot break the generic-tier signal. The `setup_custom_tests` step deploys each dep + runs
    # `tests/<recipe>/setup_custom_tests.sh` to wire OIDC env via in-place redeploy.
    # `$CCCI_DEPS_FILE` is written with the full creds dict the hook script needs (jq-readable).
    depsfile = os.path.join(tempfile.gettempdir(), f"ccci-deps-{domain}.json")
    with open(depsfile, "w") as f:
        json.dump({}, f)
    os.environ["CCCI_DEPS_FILE"] = depsfile
    # F2-11: conftest appends the count of requires_deps tests it skips (deps-not-ready) here.
    skipfile = os.path.join(tempfile.gettempdir(), f"ccci-depskip-{domain}.txt")
    with contextlib.suppress(OSError):
        os.remove(skipfile)
    os.environ["CCCI_DEPS_SKIP_REPORT"] = skipfile
    declared = deps_mod.declared_deps(recipe)
    # Q3.2a: a recipe that tolerates OIDC env at first boot AND whose deps are live-warm wires OIDC
    # at INSTALL time (provision the realm BEFORE the single deploy; install_steps.sh writes the env
    # into it) instead of the post-deploy in-place `--chaos` redeploy — which is flaky on the heavy
    # 12-service lasuite-drive stack (collabora WOPI race; see JOURNAL Step 0). Opt-in per recipe.
    oidc_at_install = bool(meta.get("OIDC_AT_INSTALL")) and bool(declared)
    if declared:
        when = "BEFORE deploy (install-time OIDC)" if oidc_at_install else "AFTER generic tiers"
        print(f"\n===== DEPS declared (provision {when}): {declared} =====", flush=True)
    deps_state: dict[str, dict] = {}  # new shape: recipe→entry dict (sso-dep plan §1)
    deps_ready = True
    deps_not_ready_reason: str = ""

    results: dict[str, str] = {}
    lifecycle.janitor()
    dep_teardown_error: str | None = None
    screenshot_rel: str | None = None  # Phase 3 U1 (R4): set once the app screenshot is captured
    try:
        # ---- (Q3.2a) install-time OIDC: provision the warm-dep realm BEFORE the single deploy so
        # install_steps.sh can read $CCCI_DEPS_FILE and wire the OIDC env into that one deploy. On
        # failure we mark deps-not-ready but STILL deploy the recipe alone (install_steps.sh no-ops
        # on an empty deps file) so the generic tiers run; the OIDC custom test then skips → F2-11. ----
        if oidc_at_install:
            print(
                f"\n===== install-time OIDC: provisioning deps {declared} BEFORE deploy =====",
                flush=True,
            )
            try:
                deps_state = _provision_deps(recipe, domain, ref, declared)
                print(
                    "  install-time OIDC: deps provisioned; install_steps.sh will wire OIDC env",
                    flush=True,
                )
            except Exception as e:  # noqa: BLE001 — isolated; recipe still deploys, OIDC test skips
                deps_ready = False
                deps_not_ready_reason = _scrub(str(e))[:300]
                print(
                    f"!! install-time dep provisioning failed (deps-not-ready): {deps_not_ready_reason}",
                    flush=True,
                )

        # ---- deploy RECIPE FIRST, alone (no deps yet — generic tiers run recipe-only) ----
        try:
            lifecycle.deploy_app(
                recipe,
                domain,
                version=base,
                secrets=True,
                install_steps_hook=hook,
                deploy_timeout=int(meta.get("DEPLOY_TIMEOUT", 900)),
            )
            lifecycle.wait_healthy(
                domain,
                ok_codes=tuple(meta["HEALTH_OK"]),
                path=meta["HEALTH_PATH"],
                deploy_timeout=meta["DEPLOY_TIMEOUT"],
                http_timeout=meta["HTTP_TIMEOUT"],
            )
            # Recipe READY_PROBE (e.g. lasuite-drive collabora WOPI discovery) — readiness beyond
            # replica convergence + app HEALTH_PATH; no-op for recipes without one.
            lifecycle.wait_ready_probes(meta, domain, timeout=int(meta.get("DEPLOY_TIMEOUT", 900)))
            deploy_ok = True
        except Exception as e:  # noqa: BLE001 — a failed deploy is a reported INSTALL failure
            print(f"!! deploy/readiness failed: {e}", flush=True)
            deploy_ok = False

        # ---- Phase 3 U1 (R4): capture a real app screenshot while the app is up, at the cleanest
        # "freshly installed + healthy" moment (before any tier mutates state and before teardown).
        # Placed OUTSIDE the deploy try/except so a screenshot issue can NEVER flip deploy_ok.
        # Secret-safe by default (landing page, never a credentials page; recipes opt into a
        # post-login view via a SCREENSHOT meta hook). Best-effort — capture() swallows all errors and
        # returns None, so this never blocks or fails the run (R7). None → results.json `screenshot`
        # stays null → the card shows the "no screenshot" placeholder (cosmetics never change verdict).
        if deploy_ok:
            # capture() already swallows all errors → None; the extra try/except is defense-in-depth
            # (U5 R7 hardening) so a screenshot can NEVER fail/crash the run even if that internal
            # contract regresses or a recipe SCREENSHOT hook raises. Cosmetics never change the verdict.
            try:
                shot = screenshot_mod.capture(
                    domain, screenshot_mod.screenshot_path(run_artifact_dir), recipe_meta=meta
                )
                screenshot_rel = os.path.basename(shot) if shot else None
            except Exception as e:  # noqa: BLE001 — screenshot is cosmetic; never fail a run on it (R7)
                print(
                    f"!! screenshot capture raised (non-fatal, verdict unaffected): {_scrub(str(e))}",
                    flush=True,
                )

        # ---- INSTALL tier (always; additive generic + overlay, no op) ----
        if "install" in stages:
            results["install"] = (
                run_lifecycle_tier(
                    recipe,
                    "install",
                    repo_local,
                    domain,
                    meta,
                    head_ref,
                    op_state,
                    records=records,
                    junit_dir=junit_dir,
                )
                if deploy_ok
                else "fail"
            )

        if deploy_ok:
            # ---- UPGRADE tier (op once → generic + overlay assert) ----
            if "upgrade" in stages:
                results["upgrade"] = (
                    run_lifecycle_tier(
                        recipe,
                        "upgrade",
                        repo_local,
                        domain,
                        meta,
                        head_ref,
                        op_state,
                        records=records,
                        junit_dir=junit_dir,
                    )
                    if prev
                    else "skip"  # only one published version → nothing to upgrade from
                )
            # ---- BACKUP + RESTORE tiers (backup-capable only; else clean N/A) ----
            if "backup" in stages:
                results["backup"] = (
                    run_lifecycle_tier(
                        recipe,
                        "backup",
                        repo_local,
                        domain,
                        meta,
                        head_ref,
                        op_state,
                        records=records,
                        junit_dir=junit_dir,
                    )
                    if backup_cap
                    else "skip"
                )
            if "restore" in stages:
                results["restore"] = (
                    run_lifecycle_tier(
                        recipe,
                        "restore",
                        repo_local,
                        domain,
                        meta,
                        head_ref,
                        op_state,
                        records=records,
                        junit_dir=junit_dir,
                    )
                    if backup_cap
                    else "skip"
                )
            # ---- setup_custom_tests step (NEW, operator-2026-05-28 SSO-dep plan §3.2) ----
            # Deploy each declared dep + wire OIDC env into the parent app via the per-recipe
            # setup_custom_tests.sh hook + in-place redeploy. Failure here marks deps-not-ready
            # but does NOT abort the run — @pytest.mark.requires_deps tests skip with reason;
            # non-deps custom tests still run normally.
            if declared and not oidc_at_install:
                # LEGACY post-deploy path: provision deps AFTER generic tiers, then wire OIDC env
                # into the parent via the setup_custom_tests.sh hook + an in-place `--chaos` redeploy.
                print("\n===== setup_custom_tests: deps + OIDC wiring =====", flush=True)
                try:
                    deps_state = _provision_deps(recipe, domain, ref, declared)
                    # Run the per-recipe post-deps hook (jq-driven OIDC wiring + in-place redeploy)
                    _run_setup_custom_tests_hook(recipe, domain, depsfile)
                except Exception as e:  # noqa: BLE001 — setup failure is ISOLATED to dep-marked tests
                    deps_ready = False
                    deps_not_ready_reason = _scrub(str(e))[:300]
                    print(
                        f"!! setup_custom_tests failed (deps-not-ready): {deps_not_ready_reason}",
                        flush=True,
                    )
            elif declared and oidc_at_install and deps_ready:
                # INSTALL-TIME path (Q3.2a): deps were provisioned BEFORE the single deploy and the
                # install-tier install_steps.sh hook already wired OIDC env into that one deploy —
                # so NO re-provision, NO reconverge here. Run only the post-deploy setup hook
                # (e.g. lasuite-drive's minio-createbuckets one-shot), which needs the live stack.
                print("\n===== post-deploy setup (OIDC already wired at install) =====", flush=True)
                try:
                    _run_setup_custom_tests_hook(recipe, domain, depsfile)
                except Exception as e:  # noqa: BLE001 — isolated to dep-marked / state-dependent tests
                    deps_ready = False
                    deps_not_ready_reason = _scrub(str(e))[:300]
                    print(
                        f"!! post-deploy setup failed: {deps_not_ready_reason}",
                        flush=True,
                    )

            # ---- CUSTOM tier ----
            if "custom" in stages:
                # Pass deps-ready state via env; conftest.py skips @pytest.mark.requires_deps
                # tests when CCCI_DEPS_READY=0.
                os.environ["CCCI_DEPS_READY"] = "1" if deps_ready else "0"
                os.environ["CCCI_DEPS_NOT_READY_REASON"] = deps_not_ready_reason
                results["custom"] = run_custom(
                    recipe, repo_local, domain, records=records, junit_dir=junit_dir
                )
        else:
            # install failed → the shared deployment is dead; remaining tiers cannot run on it.
            for op in ("upgrade", "backup", "restore", "custom"):
                if op in stages:
                    results[op] = "skip"
    finally:
        # Teardown the recipe under test FIRST, then deps in reverse declaration order.
        # Parent verify=False (Phase 1d): keep as-is so a parent residual doesn't mask a tier
        # failure. Dep teardown uses verify=True via teardown_deps (F2-5 fix); failures are
        # captured into dep_teardown_error and surfaced in the run summary + exit code, but
        # we still print the diagnosable summary first.
        lifecycle.teardown_app(domain, verify=False)
        if deps_state:
            print("\n===== DEPS teardown =====", flush=True)
            # Flatten the dict-shape state in declaration order; teardown_deps reverses for cold.
            if isinstance(deps_state, dict):
                ordered = [deps_state[d] for d in declared if d in deps_state]
            else:
                ordered = deps_state
            # WC1: warm deps are NOT undeployed — we only delete the per-run realm on the shared
            # live-warm provider (the app stays up for the next run). Cold deps undeploy as before.
            warm_entries = [e for e in ordered if e.get("warm")]
            cold_entries = [e for e in ordered if not e.get("warm")]
            for e in warm_entries:
                try:
                    from harness import sso

                    sso.delete_keycloak_realm(e["domain"], e["realm"])
                    print(
                        f"  dep: deleted per-run realm {e['realm']} on warm {e['recipe']}",
                        flush=True,
                    )
                except Exception as ex:  # noqa: BLE001 — a leaked realm is a teardown failure (§9)
                    dep_teardown_error = f"warm realm delete failed for {e.get('realm')}: {ex}"
                    print(f"!! {dep_teardown_error}", flush=True)
            try:
                deps_mod.teardown_deps(cold_entries)
            except lifecycle.TeardownError as e:
                dep_teardown_error = str(e)
                print(f"!! {dep_teardown_error}", flush=True)

    # ---- deploy-count assertion (DG4.1) ----
    with open(countfile) as f:
        deploy_count = int(f.read().strip() or "0")
    os.remove(countfile)
    with contextlib.suppress(OSError):
        os.remove(statefile)
    with contextlib.suppress(OSError):
        os.remove(depsfile)
    # F2-11: sum the requires_deps skip counts conftest recorded across the custom files.
    requires_deps_skipped = 0
    try:
        with open(skipfile) as f:
            requires_deps_skipped = sum(int(x) for x in f.read().split() if x.strip())
    except OSError:
        pass
    with contextlib.suppress(OSError):
        os.remove(skipfile)

    # ---- per-op summary (DG6 feed) ----
    # SSO-dep plan §1: DG4.1 generalised — one `abra app new` per app in the run (recipe + each
    # COLD dep). In-place reconfigure-and-redeploy (the setup_custom_tests step's
    # `abra app deploy --force --chaos`) is NOT a fresh `app_new` and does NOT increment the count.
    # WC1: a live-warm dep (keycloak) is NOT deployed by the run — it only gets a per-run realm — so
    # warm deps contribute 0. So expected = 1 + (number of COLD deps that actually got deployed).
    _dep_entries = deps_state.values() if isinstance(deps_state, dict) else (deps_state or [])
    deps_deployed_count = sum(
        1 for e in _dep_entries if not (isinstance(e, dict) and e.get("warm"))
    )
    expected_deploy_count = 1 + deps_deployed_count
    print("\n===== RUN SUMMARY =====", flush=True)
    print(f"deploy-count = {deploy_count} (expect {expected_deploy_count})")
    if deps_state:
        deps_list_for_summary = (
            list(deps_state.keys())
            if isinstance(deps_state, dict)
            else [d.get("recipe", "?") for d in deps_state]
        )
        print(f"  deps deployed: {deps_list_for_summary}")
        if not deps_ready:
            print(f"  deps-not-ready: {deps_not_ready_reason}")
    order = [s for s in ALL_STAGES if s in results]
    for op in order:
        suffix = ""
        # F2-11: annotate the custom tier when requires_deps (SSO) tests were skipped, so a reader
        # of the summary can't mistake a green custom tier for "SSO verified".
        if op == "custom" and requires_deps_skipped:
            suffix = f"  ({requires_deps_skipped} requires_deps SKIPPED: deps-not-ready — SSO UNVERIFIED)"
        print(f"  {op:8s}: {results[op]}{suffix}")

    overall = 0
    if deploy_count != expected_deploy_count:
        print(
            f"!! deploy-count {deploy_count} != {expected_deploy_count} (DG4.1 violation)",
            file=sys.stderr,
        )
        overall = 1
    if dep_teardown_error:
        # F2-5: dep teardown leaks violate §9 (teardown sacred); fail the run loudly.
        print(f"!! dep teardown leaked state: {dep_teardown_error}", file=sys.stderr)
        overall = 1
    if any(v == "fail" for v in results.values()):
        overall = 1
    # F2-11: a deps-declaring recipe whose setup_custom_tests failed has NOT verified its SSO/OIDC
    # claim — its requires_deps tests SKIPPED (a skip-only file exits 0, so without this the run
    # would report GREEN). Fail the run for that recipe; generic-tier results above are untouched.
    if sso_dep_unverified(declared, deps_ready, requires_deps_skipped):
        print(
            f"!! recipe declares DEPS={declared} but setup_custom_tests failed and "
            f"{requires_deps_skipped} requires_deps (SSO) test(s) were SKIPPED — SSO claim NOT "
            f"verified; failing run (F2-11). deps-not-ready: {deps_not_ready_reason}",
            file=sys.stderr,
        )
        overall = 1
    if not results:
        print("no tiers ran", file=sys.stderr)
        return 1

    # ---- Phase 3 (R1/R3): assemble results.json (per-stage/per-test + computed level). Best-effort:
    # a failure here NEVER changes `overall` (R7 — cosmetics never block the pipeline). ----
    data: dict | None = None
    try:
        clean_teardown = (deploy_count == expected_deploy_count) and not dep_teardown_error
        data = results_mod.build_results(
            recipe=recipe,
            version=target or (head_ref[:12] if head_ref else None),
            pr=os.environ.get("PR", "0"),
            ref=ref,
            records=records,
            results=results,
            backup_capable=backup_cap,
            clean_teardown=clean_teardown,
            no_secret_leak=True,  # narrowed below by an actual scan of the serialised artifact
            screenshot=screenshot_rel,  # Phase 3 U1 (R4): relative PNG name iff capture succeeded
            finished_ts=time.time(),
            expected_na=meta.get("EXPECTED_NA"),  # declared intentional-skip map (recipe_meta)
        )
        # Real (if narrow) leak check: no known infra-secret value may appear in the artifact (R7).
        blob = json.dumps(data)
        leaked = any(v in blob for v in _REDACT)
        data["flags"]["no_secret_leak"] = not leaked
        if leaked:
            print(
                "!! results.json leak-scan: a known secret value appeared — scrubbing flag set False",
                file=sys.stderr,
            )
        path = results_mod.write_results(data)
        print(
            f"results.json written: {path} (level={data['level']}"
            f"{' — ' + data['level_cap_reason'] if data['level_cap_reason'] else ''})",
            flush=True,
        )
        # Surface UNINTENTIONAL skips in the CI log (non-blocking, R7): a rung that was skipped (N/A)
        # but is not in the recipe's intentional list — either add the missing coverage or declare it.
        for rung in data.get("skips", {}).get("unintentional", []):
            print(
                f"⚠ coverage: rung '{rung}' was skipped (N/A) but is not declared intentional — add "
                f"the missing test/label, or list it in tests/{recipe}/recipe_meta.py "
                f"EXPECTED_NA = {{'{rung}': '<why>'}}.",
                flush=True,
            )
    except Exception as e:  # noqa: BLE001 — results assembly is cosmetic; never fail a run on it (R7)
        print(
            f"!! results.json assembly failed (non-fatal, verdict unaffected): {_scrub(str(e))}",
            file=sys.stderr,
        )

    # ---- Phase 3 U2 (R3/R6): render the summary CARD (HTML→PNG) + level BADGE (SVG) from the
    # results dict into the run artifact dir, alongside results.json + screenshot.png. The card
    # REPORTS results.json verbatim — it computes nothing, so it can never look greener than the tiers
    # (cardinal invariant, plan §6). Separate best-effort block (results.json is already written by
    # here) — any failure is swallowed and NEVER changes `overall` (R7); a render failure simply means
    # no summary.png, and U3/U4 fall back to text. ----
    if data is not None:
        try:
            html_path = os.path.join(run_artifact_dir, "summary.html")
            with open(html_path, "w", encoding="utf-8") as f:
                f.write(card_mod.render_card_html(data, screenshot_rel=data.get("screenshot")))
            png = card_mod.render_card_png(html_path, os.path.join(run_artifact_dir, "summary.png"))
            capped = data.get("level_cap_rung")
            sk = data.get("skips", {})
            cap_skip = (
                "intentional"
                if capped in (sk.get("intentional") or {})
                else "unintentional"
                if capped in (sk.get("unintentional") or [])
                else ""
            )
            with open(os.path.join(run_artifact_dir, "badge.svg"), "w", encoding="utf-8") as f:
                f.write(
                    card_mod.level_badge_svg(
                        data["level"], data.get("level_cap_reason", ""), cap_skip
                    )
                )
            print(
                f"summary card {'rendered ' + png if png else '(PNG render unavailable)'} + "
                f"badge.svg written into {run_artifact_dir}",
                flush=True,
            )
        except Exception as e:  # noqa: BLE001 — card/badge are cosmetic; never fail a run (R7)
            print(f"!! summary card/badge render failed (non-fatal): {_scrub(str(e))}", flush=True)

    # WC5 promote-on-green-cold: a GREEN COLD run on LATEST (no PR head) of an enrolled
    # (WARM_CANONICAL) recipe advances/seeds the canonical. ONLY cold-on-latest advances it (a PR
    # `!testme` carries REF and must NOT promote; `--quick` never promotes — handled in run_quick).
    # Non-fatal: a promote failure leaves the OLD known-good intact (never lose it) and is logged.
    if should_promote_canonical(recipe, ref, overall, quick=False):
        try:
            promote_canonical(recipe, head_ref)
        except Exception as e:  # noqa: BLE001 — promote is a post-green bonus; never fail a green run
            print(
                f"!! WC5 promote failed (non-fatal; known-good unchanged): {_scrub(str(e))}",
                flush=True,
            )

    return overall


if __name__ == "__main__":
    raise SystemExit(main())