feat(3 U0.1): pure level() ladder mapper (L0-L6, gap-caps) + unit tests

Phase-3 R1 foundation. harness.level.compute_level(rungs)->(level,cap_reason) with YunoHost
gap-caps semantics: level = highest rung 1..L all clean PASS; first non-PASS (FAIL or N/A) caps,
recorded in cap_reason. N/A caps like fail but distinctly (L5 'no integration surface' example).
Helpers backup_restore_status + tier_to_rung. 16 unit tests incl U0 gate cases (L4-pass, L2-cap).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
autonomic-bot
2026-05-31 05:46:23 +00:00
parent 805fbba2ad
commit 9773e3ff63
2 changed files with 251 additions and 0 deletions

118
runner/harness/level.py Normal file
View File

@ -0,0 +1,118 @@
"""Phase 3 — the level ladder (plan-phase3-results-ux.md §4.1, R1).
A single integer **level** summarising how far up the quality ladder a recipe run climbed, with
YunoHost semantics: **a gap caps the level** — you only earn level L if every rung 1..L was a clean
PASS. The first rung that is not a clean PASS (a real FAIL *or* genuinely N/A for this recipe) stops
the climb; `cap_reason` records why. This is deliberately conservative: presentation must NEVER make
a run look greener than its tests (plan §6 cardinal guardrail), so an N/A rung caps just like a fail
(the L5 example in §4.1 — "recipes with no integration surface cap at L4 by definition" — is exactly
this: N/A caps, with a recorded reason so the level is *fair*, not inflated).
The ladder (§4.1):
L0 — install failed / app never became healthy.
L1 — Installs: deploys + passes health/readiness.
L2 — Upgrades: previous published version → PR version, stays healthy, data intact.
L3 — Backup/restore: seeded data survives backup → wipe → restore.
L4 — Functional: recipe-specific functional tests pass.
L5 — Integration: SSO/OIDC + cross-app integration tests pass.
L6 — Recipe-local: the recipe repo's own tests/ (D4) pass and are merged.
This module is PURE (no I/O) so it is cheaply unit-testable and the Adversary can re-run the unit
test cold (`cc-ci-run -m pytest tests/unit/test_level.py -q`). The orchestrator
(`run_recipe_ci.py`) is responsible for translating its raw per-tier results + deps/SSO signals into
the rung-status dict this function consumes; that mapping is documented in DECISIONS.md (Phase 3).
Rung status vocabulary (each rung ∈ these three):
"pass" — the rung was exercised and passed.
"fail" — the rung was exercised and failed.
"na" — the rung does not apply to this recipe (e.g. only one published version → no upgrade;
not backup-capable; no SSO/integration surface; no recipe-local tests). N/A is NOT a
failure, but it DOES cap the climb (with a distinct cap_reason) so the level never
overstates what was actually verified.
"""
from __future__ import annotations
# The climbable rungs in ascending order. install (L1) is the foundation; L0 means install itself
# did not pass. Each later rung requires every earlier rung to be a clean PASS.
RUNGS = ("install", "upgrade", "backup_restore", "functional", "integration", "recipe_local")
# Human-readable label per rung level, for cap_reason + the summary card.
RUNG_LABEL = {
1: "install (deploy + health)",
2: "upgrade (prev published → PR)",
3: "backup/restore (data integrity)",
4: "functional (recipe-specific tests)",
5: "integration (SSO/OIDC + cross-app)",
6: "recipe-local (recipe repo tests/)",
}
VALID = {"pass", "fail", "na"}
def compute_level(rungs: dict[str, str]) -> tuple[int, str]:
"""Map a rung-status dict → (level 0..6, cap_reason).
`rungs` must contain a status in {"pass","fail","na"} for every name in RUNGS. The level is the
highest L such that rungs[1..L] are all "pass"; the first non-"pass" rung caps the climb. L0 is
returned when the install rung itself is not "pass" (install failed / never healthy).
cap_reason explains where the climb stopped:
- "" (empty) when the recipe earned the top rung (L6, full clean climb).
- "L<k> <label> FAILED" when a rung was exercised and failed.
- "L<k> <label> N/A" when a rung does not apply to this recipe.
Returns the reason for the FIRST rung that stopped the climb (the binding constraint).
"""
for name in RUNGS:
st = rungs.get(name)
if st not in VALID:
raise ValueError(f"rung {name!r} has invalid status {st!r} (expect one of {sorted(VALID)})")
# L0: install did not pass.
if rungs["install"] != "pass":
if rungs["install"] == "fail":
return 0, "L1 " + RUNG_LABEL[1] + " FAILED"
# install N/A is not a real-world state for a deploy run, but handle it for totality.
return 0, "L1 " + RUNG_LABEL[1] + " N/A"
# Climb: stop at the first rung that is not a clean pass.
level = 0
for idx, name in enumerate(RUNGS, start=1):
if rungs[name] == "pass":
level = idx
continue
# first non-pass rung — caps the climb
kind = "FAILED" if rungs[name] == "fail" else "N/A"
return level, f"L{idx} {RUNG_LABEL[idx]} {kind}"
# Full clean climb to the top rung.
return level, ""
def backup_restore_status(backup: str | None, restore: str | None, backup_capable: bool) -> str:
"""Collapse the backup + restore tier results into the single L3 rung status.
Both tiers must pass for the rung to pass (the rung is "seeded data survives backup→wipe→restore",
which is only verified if BOTH the backup and the restore tier are green). If the recipe is not
backup-capable, both tiers skip → the rung is N/A (caps at L2, recorded). A fail in either tier
fails the rung.
"""
if not backup_capable:
return "na"
vals = {backup, restore}
if "fail" in vals:
return "fail"
if backup == "pass" and restore == "pass":
return "pass"
# any skip/None while backup-capable → not verified → treat as N/A (cannot claim L3)
return "na"
def tier_to_rung(status: str | None) -> str:
"""Map a single tier result ('pass'|'fail'|'skip'|None) to a rung status. 'skip'/None → 'na'
(the tier did not apply / did not run), so it caps the climb without being counted as a failure."""
if status == "pass":
return "pass"
if status == "fail":
return "fail"
return "na"

133
tests/unit/test_level.py Normal file
View File

@ -0,0 +1,133 @@
"""Unit tests for the Phase-3 level ladder (harness.level), plan-phase3-results-ux.md §4.1 / R1.
Pure function — no I/O. Proves the YunoHost gap-caps-the-level semantics, including the U0 gate
acceptance: a recipe that climbs through L4 reports 4, and one that fails at L2 is capped at 1
(the level just below the failed rung). Run cold with: cc-ci-run -m pytest tests/unit/test_level.py -q
"""
from __future__ import annotations
import os
import sys
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner"))
from harness import level as L # noqa: E402
def _rungs(install="pass", upgrade="pass", backup_restore="pass", functional="pass",
integration="pass", recipe_local="pass"):
return {
"install": install,
"upgrade": upgrade,
"backup_restore": backup_restore,
"functional": functional,
"integration": integration,
"recipe_local": recipe_local,
}
# ---- the U0 gate: L4-pass and L2-cap ----
def test_full_clean_climb_to_L6():
lvl, reason = L.compute_level(_rungs())
assert lvl == 6
assert reason == ""
def test_climbs_through_L4_then_no_integration_surface_caps_at_L4():
# GATE: a recipe whose functional tests pass but has no SSO/integration surface caps at L4.
lvl, reason = L.compute_level(_rungs(integration="na", recipe_local="na"))
assert lvl == 4
assert "L5" in reason and "N/A" in reason
def test_fails_at_L2_capped_at_L1():
# GATE: upgrade fails → capped at L1 even though higher rungs would pass.
lvl, reason = L.compute_level(_rungs(upgrade="fail", backup_restore="pass", functional="pass"))
assert lvl == 1
assert "L2" in reason and "FAILED" in reason
# ---- L0 / install ----
def test_install_fail_is_L0():
lvl, reason = L.compute_level(_rungs(install="fail"))
assert lvl == 0
assert "L1" in reason and "FAILED" in reason
# ---- gap-caps semantics: a higher pass can't rescue a lower gap ----
def test_higher_pass_does_not_rescue_lower_na():
# backup/restore N/A (stateless app) caps at L2 even though functional would pass.
lvl, reason = L.compute_level(_rungs(backup_restore="na", functional="pass", integration="na"))
assert lvl == 2
assert "L3" in reason and "N/A" in reason
def test_upgrade_na_caps_at_L1():
# only one published version → no upgrade possible → N/A caps at L1.
lvl, reason = L.compute_level(_rungs(upgrade="na"))
assert lvl == 1
assert "L2" in reason and "N/A" in reason
def test_integration_fail_caps_at_L4():
# SSO declared but unverified (failed) → integration rung fails → cap at L4.
lvl, reason = L.compute_level(_rungs(integration="fail", recipe_local="na"))
assert lvl == 4
assert "L5" in reason and "FAILED" in reason
def test_recipe_local_na_caps_at_L5():
# SSO passes but no recipe-local tests → cap at L5 (L6 N/A).
lvl, reason = L.compute_level(_rungs(recipe_local="na"))
assert lvl == 5
assert "L6" in reason and "N/A" in reason
def test_functional_fail_caps_at_L3():
lvl, reason = L.compute_level(_rungs(functional="fail", integration="na"))
assert lvl == 3
assert "L4" in reason and "FAILED" in reason
# ---- input validation ----
def test_invalid_status_raises():
bad = _rungs()
bad["functional"] = "passed" # not in the vocabulary
try:
L.compute_level(bad)
except ValueError:
return
raise AssertionError("expected ValueError on invalid rung status")
# ---- helpers: backup_restore_status ----
def test_backup_restore_status_pass():
assert L.backup_restore_status("pass", "pass", True) == "pass"
def test_backup_restore_status_not_capable_is_na():
assert L.backup_restore_status("skip", "skip", False) == "na"
def test_backup_restore_status_fail_on_either():
assert L.backup_restore_status("pass", "fail", True) == "fail"
assert L.backup_restore_status("fail", "pass", True) == "fail"
def test_backup_restore_partial_is_na():
# backup-capable but restore didn't run cleanly (not pass, not fail) → cannot claim L3
assert L.backup_restore_status("pass", "skip", True) == "na"
# ---- helpers: tier_to_rung ----
def test_tier_to_rung_mapping():
assert L.tier_to_rung("pass") == "pass"
assert L.tier_to_rung("fail") == "fail"
assert L.tier_to_rung("skip") == "na"
assert L.tier_to_rung(None) == "na"