diff --git a/runner/harness/level.py b/runner/harness/level.py new file mode 100644 index 0000000..834f9f9 --- /dev/null +++ b/runner/harness/level.py @@ -0,0 +1,118 @@ +"""Phase 3 — the level ladder (plan-phase3-results-ux.md §4.1, R1). + +A single integer **level** summarising how far up the quality ladder a recipe run climbed, with +YunoHost semantics: **a gap caps the level** — you only earn level L if every rung 1..L was a clean +PASS. The first rung that is not a clean PASS (a real FAIL *or* genuinely N/A for this recipe) stops +the climb; `cap_reason` records why. This is deliberately conservative: presentation must NEVER make +a run look greener than its tests (plan §6 cardinal guardrail), so an N/A rung caps just like a fail +(the L5 example in §4.1 — "recipes with no integration surface cap at L4 by definition" — is exactly +this: N/A caps, with a recorded reason so the level is *fair*, not inflated). + +The ladder (§4.1): + L0 — install failed / app never became healthy. + L1 — Installs: deploys + passes health/readiness. + L2 — Upgrades: previous published version → PR version, stays healthy, data intact. + L3 — Backup/restore: seeded data survives backup → wipe → restore. + L4 — Functional: recipe-specific functional tests pass. + L5 — Integration: SSO/OIDC + cross-app integration tests pass. + L6 — Recipe-local: the recipe repo's own tests/ (D4) pass and are merged. + +This module is PURE (no I/O) so it is cheaply unit-testable and the Adversary can re-run the unit +test cold (`cc-ci-run -m pytest tests/unit/test_level.py -q`). The orchestrator +(`run_recipe_ci.py`) is responsible for translating its raw per-tier results + deps/SSO signals into +the rung-status dict this function consumes; that mapping is documented in DECISIONS.md (Phase 3). + +Rung status vocabulary (each rung ∈ these three): + "pass" — the rung was exercised and passed. + "fail" — the rung was exercised and failed. + "na" — the rung does not apply to this recipe (e.g. only one published version → no upgrade; + not backup-capable; no SSO/integration surface; no recipe-local tests). N/A is NOT a + failure, but it DOES cap the climb (with a distinct cap_reason) so the level never + overstates what was actually verified. +""" + +from __future__ import annotations + +# The climbable rungs in ascending order. install (L1) is the foundation; L0 means install itself +# did not pass. Each later rung requires every earlier rung to be a clean PASS. +RUNGS = ("install", "upgrade", "backup_restore", "functional", "integration", "recipe_local") + +# Human-readable label per rung level, for cap_reason + the summary card. +RUNG_LABEL = { + 1: "install (deploy + health)", + 2: "upgrade (prev published → PR)", + 3: "backup/restore (data integrity)", + 4: "functional (recipe-specific tests)", + 5: "integration (SSO/OIDC + cross-app)", + 6: "recipe-local (recipe repo tests/)", +} + +VALID = {"pass", "fail", "na"} + + +def compute_level(rungs: dict[str, str]) -> tuple[int, str]: + """Map a rung-status dict → (level 0..6, cap_reason). + + `rungs` must contain a status in {"pass","fail","na"} for every name in RUNGS. The level is the + highest L such that rungs[1..L] are all "pass"; the first non-"pass" rung caps the climb. L0 is + returned when the install rung itself is not "pass" (install failed / never healthy). + + cap_reason explains where the climb stopped: + - "" (empty) when the recipe earned the top rung (L6, full clean climb). + - "L