feat(3 U0.1): pure level() ladder mapper (L0-L6, gap-caps) + unit tests

Phase-3 R1 foundation. harness.level.compute_level(rungs)->(level,cap_reason) with YunoHost gap-caps semantics: level = highest rung 1..L all clean PASS; first non-PASS (FAIL or N/A) caps, recorded in cap_reason. N/A caps like fail but distinctly (L5 'no integration surface' example). Helpers backup_restore_status + tier_to_rung. 16 unit tests incl U0 gate cases (L4-pass, L2-cap). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-05-31 05:46:23 +00:00
parent 805fbba2ad
commit 9773e3ff63
2 changed files with 251 additions and 0 deletions
--- a/runner/harness/level.py
+++ b/runner/harness/level.py
@ -0,0 +1,118 @@
+"""Phase 3 — the level ladder (plan-phase3-results-ux.md §4.1, R1).
+
+A single integer **level** summarising how far up the quality ladder a recipe run climbed, with
+YunoHost semantics: **a gap caps the level** — you only earn level L if every rung 1..L was a clean
+PASS. The first rung that is not a clean PASS (a real FAIL *or* genuinely N/A for this recipe) stops
+the climb; `cap_reason` records why. This is deliberately conservative: presentation must NEVER make
+a run look greener than its tests (plan §6 cardinal guardrail), so an N/A rung caps just like a fail
+(the L5 example in §4.1 — "recipes with no integration surface cap at L4 by definition" — is exactly
+this: N/A caps, with a recorded reason so the level is *fair*, not inflated).
+
+The ladder (§4.1):
+  L0 — install failed / app never became healthy.
+  L1 — Installs: deploys + passes health/readiness.
+  L2 — Upgrades: previous published version → PR version, stays healthy, data intact.
+  L3 — Backup/restore: seeded data survives backup → wipe → restore.
+  L4 — Functional: recipe-specific functional tests pass.
+  L5 — Integration: SSO/OIDC + cross-app integration tests pass.
+  L6 — Recipe-local: the recipe repo's own tests/ (D4) pass and are merged.
+
+This module is PURE (no I/O) so it is cheaply unit-testable and the Adversary can re-run the unit
+test cold (`cc-ci-run -m pytest tests/unit/test_level.py -q`). The orchestrator
+(`run_recipe_ci.py`) is responsible for translating its raw per-tier results + deps/SSO signals into
+the rung-status dict this function consumes; that mapping is documented in DECISIONS.md (Phase 3).
+
+Rung status vocabulary (each rung ∈ these three):
+  "pass" — the rung was exercised and passed.
+  "fail" — the rung was exercised and failed.
+  "na"   — the rung does not apply to this recipe (e.g. only one published version → no upgrade;
+           not backup-capable; no SSO/integration surface; no recipe-local tests). N/A is NOT a
+           failure, but it DOES cap the climb (with a distinct cap_reason) so the level never
+           overstates what was actually verified.
+"""
+
+from __future__ import annotations
+
+# The climbable rungs in ascending order. install (L1) is the foundation; L0 means install itself
+# did not pass. Each later rung requires every earlier rung to be a clean PASS.
+RUNGS = ("install", "upgrade", "backup_restore", "functional", "integration", "recipe_local")
+
+# Human-readable label per rung level, for cap_reason + the summary card.
+RUNG_LABEL = {
+    1: "install (deploy + health)",
+    2: "upgrade (prev published → PR)",
+    3: "backup/restore (data integrity)",
+    4: "functional (recipe-specific tests)",
+    5: "integration (SSO/OIDC + cross-app)",
+    6: "recipe-local (recipe repo tests/)",
+}
+
+VALID = {"pass", "fail", "na"}
+
+
+def compute_level(rungs: dict[str, str]) -> tuple[int, str]:
+    """Map a rung-status dict → (level 0..6, cap_reason).
+
+    `rungs` must contain a status in {"pass","fail","na"} for every name in RUNGS. The level is the
+    highest L such that rungs[1..L] are all "pass"; the first non-"pass" rung caps the climb. L0 is
+    returned when the install rung itself is not "pass" (install failed / never healthy).
+
+    cap_reason explains where the climb stopped:
+      - "" (empty) when the recipe earned the top rung (L6, full clean climb).
+      - "L<k> <label> FAILED" when a rung was exercised and failed.
+      - "L<k> <label> N/A" when a rung does not apply to this recipe.
+    Returns the reason for the FIRST rung that stopped the climb (the binding constraint).
+    """
+    for name in RUNGS:
+        st = rungs.get(name)
+        if st not in VALID:
+            raise ValueError(f"rung {name!r} has invalid status {st!r} (expect one of {sorted(VALID)})")
+
+    # L0: install did not pass.
+    if rungs["install"] != "pass":
+        if rungs["install"] == "fail":
+            return 0, "L1 " + RUNG_LABEL[1] + " FAILED"
+        # install N/A is not a real-world state for a deploy run, but handle it for totality.
+        return 0, "L1 " + RUNG_LABEL[1] + " N/A"
+
+    # Climb: stop at the first rung that is not a clean pass.
+    level = 0
+    for idx, name in enumerate(RUNGS, start=1):
+        if rungs[name] == "pass":
+            level = idx
+            continue
+        # first non-pass rung — caps the climb
+        kind = "FAILED" if rungs[name] == "fail" else "N/A"
+        return level, f"L{idx} {RUNG_LABEL[idx]} {kind}"
+
+    # Full clean climb to the top rung.
+    return level, ""
+
+
+def backup_restore_status(backup: str | None, restore: str | None, backup_capable: bool) -> str:
+    """Collapse the backup + restore tier results into the single L3 rung status.
+
+    Both tiers must pass for the rung to pass (the rung is "seeded data survives backup→wipe→restore",
+    which is only verified if BOTH the backup and the restore tier are green). If the recipe is not
+    backup-capable, both tiers skip → the rung is N/A (caps at L2, recorded). A fail in either tier
+    fails the rung.
+    """
+    if not backup_capable:
+        return "na"
+    vals = {backup, restore}
+    if "fail" in vals:
+        return "fail"
+    if backup == "pass" and restore == "pass":
+        return "pass"
+    # any skip/None while backup-capable → not verified → treat as N/A (cannot claim L3)
+    return "na"
+
+
+def tier_to_rung(status: str | None) -> str:
+    """Map a single tier result ('pass'|'fail'|'skip'|None) to a rung status. 'skip'/None → 'na'
+    (the tier did not apply / did not run), so it caps the climb without being counted as a failure."""
+    if status == "pass":
+        return "pass"
+    if status == "fail":
+        return "fail"
+    return "na"
--- a/tests/unit/test_level.py
+++ b/tests/unit/test_level.py
@ -0,0 +1,133 @@
+"""Unit tests for the Phase-3 level ladder (harness.level), plan-phase3-results-ux.md §4.1 / R1.
+
+Pure function — no I/O. Proves the YunoHost gap-caps-the-level semantics, including the U0 gate
+acceptance: a recipe that climbs through L4 reports 4, and one that fails at L2 is capped at 1
+(the level just below the failed rung). Run cold with:  cc-ci-run -m pytest tests/unit/test_level.py -q
+"""
+
+from __future__ import annotations
+
+import os
+import sys
+
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner"))
+from harness import level as L  # noqa: E402
+
+
+def _rungs(install="pass", upgrade="pass", backup_restore="pass", functional="pass",
+           integration="pass", recipe_local="pass"):
+    return {
+        "install": install,
+        "upgrade": upgrade,
+        "backup_restore": backup_restore,
+        "functional": functional,
+        "integration": integration,
+        "recipe_local": recipe_local,
+    }
+
+
+# ---- the U0 gate: L4-pass and L2-cap ----
+
+def test_full_clean_climb_to_L6():
+    lvl, reason = L.compute_level(_rungs())
+    assert lvl == 6
+    assert reason == ""
+
+
+def test_climbs_through_L4_then_no_integration_surface_caps_at_L4():
+    # GATE: a recipe whose functional tests pass but has no SSO/integration surface caps at L4.
+    lvl, reason = L.compute_level(_rungs(integration="na", recipe_local="na"))
+    assert lvl == 4
+    assert "L5" in reason and "N/A" in reason
+
+
+def test_fails_at_L2_capped_at_L1():
+    # GATE: upgrade fails → capped at L1 even though higher rungs would pass.
+    lvl, reason = L.compute_level(_rungs(upgrade="fail", backup_restore="pass", functional="pass"))
+    assert lvl == 1
+    assert "L2" in reason and "FAILED" in reason
+
+
+# ---- L0 / install ----
+
+def test_install_fail_is_L0():
+    lvl, reason = L.compute_level(_rungs(install="fail"))
+    assert lvl == 0
+    assert "L1" in reason and "FAILED" in reason
+
+
+# ---- gap-caps semantics: a higher pass can't rescue a lower gap ----
+
+def test_higher_pass_does_not_rescue_lower_na():
+    # backup/restore N/A (stateless app) caps at L2 even though functional would pass.
+    lvl, reason = L.compute_level(_rungs(backup_restore="na", functional="pass", integration="na"))
+    assert lvl == 2
+    assert "L3" in reason and "N/A" in reason
+
+
+def test_upgrade_na_caps_at_L1():
+    # only one published version → no upgrade possible → N/A caps at L1.
+    lvl, reason = L.compute_level(_rungs(upgrade="na"))
+    assert lvl == 1
+    assert "L2" in reason and "N/A" in reason
+
+
+def test_integration_fail_caps_at_L4():
+    # SSO declared but unverified (failed) → integration rung fails → cap at L4.
+    lvl, reason = L.compute_level(_rungs(integration="fail", recipe_local="na"))
+    assert lvl == 4
+    assert "L5" in reason and "FAILED" in reason
+
+
+def test_recipe_local_na_caps_at_L5():
+    # SSO passes but no recipe-local tests → cap at L5 (L6 N/A).
+    lvl, reason = L.compute_level(_rungs(recipe_local="na"))
+    assert lvl == 5
+    assert "L6" in reason and "N/A" in reason
+
+
+def test_functional_fail_caps_at_L3():
+    lvl, reason = L.compute_level(_rungs(functional="fail", integration="na"))
+    assert lvl == 3
+    assert "L4" in reason and "FAILED" in reason
+
+
+# ---- input validation ----
+
+def test_invalid_status_raises():
+    bad = _rungs()
+    bad["functional"] = "passed"  # not in the vocabulary
+    try:
+        L.compute_level(bad)
+    except ValueError:
+        return
+    raise AssertionError("expected ValueError on invalid rung status")
+
+
+# ---- helpers: backup_restore_status ----
+
+def test_backup_restore_status_pass():
+    assert L.backup_restore_status("pass", "pass", True) == "pass"
+
+
+def test_backup_restore_status_not_capable_is_na():
+    assert L.backup_restore_status("skip", "skip", False) == "na"
+
+
+def test_backup_restore_status_fail_on_either():
+    assert L.backup_restore_status("pass", "fail", True) == "fail"
+    assert L.backup_restore_status("fail", "pass", True) == "fail"
+
+
+def test_backup_restore_partial_is_na():
+    # backup-capable but restore didn't run cleanly (not pass, not fail) → cannot claim L3
+    assert L.backup_restore_status("pass", "skip", True) == "na"
+
+
+# ---- helpers: tier_to_rung ----
+
+def test_tier_to_rung_mapping():
+    assert L.tier_to_rung("pass") == "pass"
+    assert L.tier_to_rung("fail") == "fail"
+    assert L.tier_to_rung("skip") == "na"
+    assert L.tier_to_rung(None) == "na"