Revert "feat(lvl5): P1 — 5-rung ladder (L5=abra recipe lint) + de-capped level semantics"

This reverts commit e219a7891d.
2026-06-11 07:46:57 +00:00
parent 589943f46e
commit cd62743055
12 changed files with 336 additions and 1065 deletions
--- a/runner/harness/level.py
+++ b/runner/harness/level.py
@ -1,67 +1,67 @@
-"""The level ladder — five rungs, no capping (phase lvl5, plan-phase-lvl5-lint-rung.md).
+"""Phase 3 — the level ladder (plan-phase3-results-ux.md §4.1, R1).

-A single integer **level** summarising how far up the quality ladder a recipe run climbed:
+A single integer **level** summarising how far up the quality ladder a recipe run climbed, with
+YunoHost semantics: **a gap caps the level** — you only earn level L if every rung 1..L was a clean
+PASS. The first rung that is not a clean PASS (a real FAIL *or* genuinely N/A for this recipe) stops
+the climb; `cap_reason` records why. This is deliberately conservative: presentation must NEVER make
+a run look greener than its tests (plan §6 cardinal guardrail), so an N/A rung caps just like a fail
+— with a recorded reason so the level is *fair*, not inflated.
+
+The ladder is the FOUR essential rungs every recipe is held to:
  L0 — install failed / app never became healthy.
  L1 — Installs: deploys + passes health/readiness.
  L2 — Upgrades: previous published version → PR version, stays healthy, data intact.
  L3 — Backup/restore: seeded data survives backup → wipe → restore.
  L4 — Functional: recipe-specific functional tests pass.
-  L5 — Lint: `abra recipe lint` passes against the exact ref under test.

-Semantics (operator-decided 2026-06-11, recorded in DECISIONS.md — replaces the Phase-3
-"N/A caps" rule):
+Integration (SSO/OIDC + cross-app) and recipe-local (the recipe repo's own tests/) are **OPTIONAL**
+capabilities — they are NOT part of the level ladder and never cap it. They still run when present
+(and SSO is still enforced for the run VERDICT via the deps/SSO checks in run_recipe_ci.py), but a
+recipe without an SSO surface or without repo-local tests is simply not penalised on the level.

-    level = max i such that rung_i == "pass" and every rung j < i is "pass" or "skip"; 0 if none.
+This module is PURE (no I/O) so it is cheaply unit-testable and the Adversary can re-run the unit
+test cold (`cc-ci-run -m pytest tests/unit/test_level.py -q`). The orchestrator
+(`run_recipe_ci.py`) is responsible for translating its raw per-tier results into the rung-status
+dict this function consumes; that mapping is documented in DECISIONS.md (Phase 3).

-A rung has one of FOUR statuses:
-  "pass"  — exercised and passed.
-  "fail"  — exercised and failed. Blocks: no rung above it can count.
-  "skip"  — INTENTIONAL skip: the rung genuinely does not apply to this recipe, from a
-            declared or structural fact (not backup-capable; only one published version;
-            declared in recipe_meta.EXPECTED_NA). Does NOT stop the climb.
-  "unver" — UNINTENTIONAL not-verified: the rung SHOULD have run but didn't (infra error,
-            missing tool, harness exception, prior-stage abort, timeout). Blocks exactly
-            like a fail — the level never rises above a rung that wasn't actually checked.
-
-The per-rung table (results.json `rungs`, card, dashboard) is the SOLE carrier of "why isn't
-this level higher" — there is no cap_reason. The classification of every N/A source into
-skip-vs-unver lives in derive_rungs (results.py) and is tabulated in DECISIONS.md; anything
-unclassifiable defaults to "unver" (conservative: never claim what wasn't checked).
-
-Integration (SSO/OIDC + cross-app) and recipe-local (the recipe repo's own tests/) remain
-OPTIONAL capabilities — not rungs, never counted (SSO is still enforced for the run VERDICT
-via the deps/SSO checks in run_recipe_ci.py).
-
-This module is PURE (no I/O) so it is cheaply unit-testable and the Adversary can re-run the
-unit test cold (`cc-ci-run -m pytest tests/unit/test_level.py -q`).
+Rung status vocabulary (each rung ∈ these three):
+  "pass" — the rung was exercised and passed.
+  "fail" — the rung was exercised and failed.
+  "na"   — the rung does not apply to this recipe (e.g. only one published version → no upgrade;
+           not backup-capable). N/A is NOT a failure, but it DOES cap the climb (with a distinct
+           cap_reason) so the level never overstates what was actually verified.
 """

 from __future__ import annotations

-# The climbable rungs in ascending order. install (L1) is the foundation; L0 means install
-# itself did not pass. These five are the ESSENTIAL rungs — integration/recipe-local are
-# optional and deliberately NOT in this tuple.
-RUNGS = ("install", "upgrade", "backup_restore", "functional", "lint")
+# The climbable rungs in ascending order. install (L1) is the foundation; L0 means install itself
+# did not pass. Each later rung requires every earlier rung to be a clean PASS. These four are the
+# ESSENTIAL rungs — integration/recipe-local are optional and deliberately NOT in this tuple.
+RUNGS = ("install", "upgrade", "backup_restore", "functional")

-# Human-readable label per rung level, for the summary card / docs.
+# Human-readable label per rung level, for cap_reason + the summary card.
 RUNG_LABEL = {
    1: "install (deploy + health)",
    2: "upgrade (prev published → PR)",
    3: "backup/restore (data integrity)",
    4: "functional (recipe-specific tests)",
-    5: "lint (abra recipe lint)",
 }

-VALID = {"pass", "fail", "skip", "unver"}
+VALID = {"pass", "fail", "na"}


-def compute_level(rungs: dict[str, str]) -> int:
-    """Map a rung-status dict → level 0..5.
+def compute_level(rungs: dict[str, str]) -> tuple[int, str]:
+    """Map a rung-status dict → (level 0..4, cap_reason).

-    `rungs` must contain a status in VALID for every name in RUNGS. The level is the highest
-    i such that rungs[i] == "pass" and every rung below i is "pass" or "skip" (an intentional
-    skip does not stop the climb). A "fail" or "unver" rung blocks: rungs above it cannot
-    count, however green. 0 when no rung qualifies.
+    `rungs` must contain a status in {"pass","fail","na"} for every name in RUNGS. The level is the
+    highest L such that rungs[1..L] are all "pass"; the first non-"pass" rung caps the climb. L0 is
+    returned when the install rung itself is not "pass" (install failed / never healthy).
+
+    cap_reason explains where the climb stopped:
+      - "" (empty) when the recipe earned the top rung (L4, full clean climb).
+      - "L<k> <label> FAILED" when a rung was exercised and failed.
+      - "L<k> <label> N/A" when a rung does not apply to this recipe.
+    Returns the reason for the FIRST rung that stopped the climb (the binding constraint).
    """
    for name in RUNGS:
        st = rungs.get(name)
@ -69,44 +69,52 @@ def compute_level(rungs: dict[str, str]) -> int:
            raise ValueError(
                f"rung {name!r} has invalid status {st!r} (expect one of {sorted(VALID)})"
            )
+
+    # L0: install did not pass.
+    if rungs["install"] != "pass":
+        if rungs["install"] == "fail":
+            return 0, "L1 " + RUNG_LABEL[1] + " FAILED"
+        # install N/A is not a real-world state for a deploy run, but handle it for totality.
+        return 0, "L1 " + RUNG_LABEL[1] + " N/A"
+
+    # Climb: stop at the first rung that is not a clean pass.
    level = 0
    for idx, name in enumerate(RUNGS, start=1):
-        st = rungs[name]
-        if st == "pass":
+        if rungs[name] == "pass":
            level = idx
-        elif st == "skip":
            continue
-        else:  # fail / unver — nothing above this rung can count
-            break
-    return level
+        # first non-pass rung — caps the climb
+        kind = "FAILED" if rungs[name] == "fail" else "N/A"
+        return level, f"L{idx} {RUNG_LABEL[idx]} {kind}"
+
+    # Full clean climb to the top rung.
+    return level, ""


 def backup_restore_status(backup: str | None, restore: str | None, backup_capable: bool) -> str:
    """Collapse the backup + restore tier results into the single L3 rung status.

-    Not backup-capable (a declared/structural fact: no backupbot labels, or
-    recipe_meta.BACKUP_CAPABLE=False) → "skip" — the rung genuinely does not apply.
-    Otherwise both tiers must pass for the rung to pass; a fail in either tier fails it; any
-    other shape (tier skipped or never ran while backup-capable — e.g. a prior-stage abort)
-    is "unver": the rung should have been verified and wasn't.
+    Both tiers must pass for the rung to pass (the rung is "seeded data survives backup→wipe→restore",
+    which is only verified if BOTH the backup and the restore tier are green). If the recipe is not
+    backup-capable, both tiers skip → the rung is N/A (caps at L2, recorded). A fail in either tier
+    fails the rung.
    """
    if not backup_capable:
-        return "skip"
+        return "na"
    vals = {backup, restore}
    if "fail" in vals:
        return "fail"
    if backup == "pass" and restore == "pass":
        return "pass"
-    return "unver"
+    # any skip/None while backup-capable → not verified → treat as N/A (cannot claim L3)
+    return "na"


 def tier_to_rung(status: str | None) -> str:
-    """Map a single tier result ('pass'|'fail'|'skip'|None) to a rung status, with NO
-    intentionality information: a tier that did not produce a pass/fail is "unver" (it should
-    have run and wasn't verified). The caller (derive_rungs) upgrades "unver" to "skip" where
-    a declared/structural fact makes the skip intentional — never the other way around."""
+    """Map a single tier result ('pass'|'fail'|'skip'|None) to a rung status. 'skip'/None → 'na'
+    (the tier did not apply / did not run), so it caps the climb without being counted as a failure."""
    if status == "pass":
        return "pass"
    if status == "fail":
        return "fail"
-    return "unver"
+    return "na"