diff --git a/runner/harness/card.py b/runner/harness/card.py index 21418dc..6f44d2a 100644 --- a/runner/harness/card.py +++ b/runner/harness/card.py @@ -141,14 +141,12 @@ def _stage_rows(stages: list[dict]) -> str: return "\n".join(rows) or 'no stages' -# Friendly rung labels for the skip rows. +# Friendly rung labels for the skip rows (the four essential rungs). RUNG_LABEL = { "install": "install", "upgrade": "upgrade", "backup_restore": "backup/restore", "functional": "functional", - "integration": "integration", - "recipe_local": "recipe-local", } SKIP_GREEN = "#57ab5a" # muted green — an intentional skip reads like a pass (but labelled, never inflating) @@ -241,7 +239,7 @@ tr.skipreason td{{color:#8b949e;font-size:.78rem;font-style:italic;padding-top:0
{FLOWER_SVG}

{recipe}

{version}
{level}level
-
{("capped: " + cap) if cap else "full clean climb — top level (6)"}
+
{("capped: " + cap) if cap else "full clean climb — top level (4)"}
{rows}
{shot_html}
{"".join(flag_bits)}
""" diff --git a/runner/harness/level.py b/runner/harness/level.py index f88d802..46f5ba6 100644 --- a/runner/harness/level.py +++ b/runner/harness/level.py @@ -5,37 +5,39 @@ YunoHost semantics: **a gap caps the level** — you only earn level L if every PASS. The first rung that is not a clean PASS (a real FAIL *or* genuinely N/A for this recipe) stops the climb; `cap_reason` records why. This is deliberately conservative: presentation must NEVER make a run look greener than its tests (plan §6 cardinal guardrail), so an N/A rung caps just like a fail -(the L5 example in §4.1 — "recipes with no integration surface cap at L4 by definition" — is exactly -this: N/A caps, with a recorded reason so the level is *fair*, not inflated). +— with a recorded reason so the level is *fair*, not inflated. -The ladder (§4.1): +The ladder is the FOUR essential rungs every recipe is held to: L0 — install failed / app never became healthy. L1 — Installs: deploys + passes health/readiness. L2 — Upgrades: previous published version → PR version, stays healthy, data intact. L3 — Backup/restore: seeded data survives backup → wipe → restore. L4 — Functional: recipe-specific functional tests pass. - L5 — Integration: SSO/OIDC + cross-app integration tests pass. - L6 — Recipe-local: the recipe repo's own tests/ (D4) pass and are merged. + +Integration (SSO/OIDC + cross-app) and recipe-local (the recipe repo's own tests/) are **OPTIONAL** +capabilities — they are NOT part of the level ladder and never cap it. They still run when present +(and SSO is still enforced for the run VERDICT via the deps/SSO checks in run_recipe_ci.py), but a +recipe without an SSO surface or without repo-local tests is simply not penalised on the level. This module is PURE (no I/O) so it is cheaply unit-testable and the Adversary can re-run the unit test cold (`cc-ci-run -m pytest tests/unit/test_level.py -q`). The orchestrator -(`run_recipe_ci.py`) is responsible for translating its raw per-tier results + deps/SSO signals into -the rung-status dict this function consumes; that mapping is documented in DECISIONS.md (Phase 3). +(`run_recipe_ci.py`) is responsible for translating its raw per-tier results into the rung-status +dict this function consumes; that mapping is documented in DECISIONS.md (Phase 3). Rung status vocabulary (each rung ∈ these three): "pass" — the rung was exercised and passed. "fail" — the rung was exercised and failed. "na" — the rung does not apply to this recipe (e.g. only one published version → no upgrade; - not backup-capable; no SSO/integration surface; no recipe-local tests). N/A is NOT a - failure, but it DOES cap the climb (with a distinct cap_reason) so the level never - overstates what was actually verified. + not backup-capable). N/A is NOT a failure, but it DOES cap the climb (with a distinct + cap_reason) so the level never overstates what was actually verified. """ from __future__ import annotations # The climbable rungs in ascending order. install (L1) is the foundation; L0 means install itself -# did not pass. Each later rung requires every earlier rung to be a clean PASS. -RUNGS = ("install", "upgrade", "backup_restore", "functional", "integration", "recipe_local") +# did not pass. Each later rung requires every earlier rung to be a clean PASS. These four are the +# ESSENTIAL rungs — integration/recipe-local are optional and deliberately NOT in this tuple. +RUNGS = ("install", "upgrade", "backup_restore", "functional") # Human-readable label per rung level, for cap_reason + the summary card. RUNG_LABEL = { @@ -43,22 +45,20 @@ RUNG_LABEL = { 2: "upgrade (prev published → PR)", 3: "backup/restore (data integrity)", 4: "functional (recipe-specific tests)", - 5: "integration (SSO/OIDC + cross-app)", - 6: "recipe-local (recipe repo tests/)", } VALID = {"pass", "fail", "na"} def compute_level(rungs: dict[str, str]) -> tuple[int, str]: - """Map a rung-status dict → (level 0..6, cap_reason). + """Map a rung-status dict → (level 0..4, cap_reason). `rungs` must contain a status in {"pass","fail","na"} for every name in RUNGS. The level is the highest L such that rungs[1..L] are all "pass"; the first non-"pass" rung caps the climb. L0 is returned when the install rung itself is not "pass" (install failed / never healthy). cap_reason explains where the climb stopped: - - "" (empty) when the recipe earned the top rung (L6, full clean climb). + - "" (empty) when the recipe earned the top rung (L4, full clean climb). - "L