diff --git a/dashboard/dashboard.py b/dashboard/dashboard.py index 48de3d3..18fcf14 100644 --- a/dashboard/dashboard.py +++ b/dashboard/dashboard.py @@ -38,6 +38,7 @@ _RUN_FILES = { "screenshot.png": "image/png", "badge.svg": "image/svg+xml", "summary.html": "text/html; charset=utf-8", + "lint.txt": "text/plain; charset=utf-8", } _RUN_ID_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9._-]*$") @@ -71,8 +72,7 @@ _LEVEL_COLOR = { 2: "#e0823d", 3: "#d9b343", 4: "#a0b93f", - 5: "#57ab5a", - 6: "#3fb950", + 5: "#3fb950", # bright green — full 5-rung climb incl. lint (phase lvl5) } @@ -152,7 +152,6 @@ def _build_row(b): "ref": ref[:8], "version": res.get("version") or ref[:12] or "—", "level": res.get("level"), - "level_cap_reason": res.get("level_cap_reason") or "", "has_screenshot": bool(res.get("screenshot")), "flags": res.get("flags") or {}, "finished": b.get("finished") or 0, @@ -220,7 +219,6 @@ a{color:#58a6ff;text-decoration:none} a:hover{text-decoration:underline} .name{font-weight:700;font-size:1.05rem;color:#e6edf3} .row{display:flex;align-items:center;gap:.5rem;flex-wrap:wrap;font-size:.82rem} .pill{color:#fff;padding:.08rem .5rem;border-radius:.5rem;font-size:.75rem;font-weight:600} -.cap{color:#8b949e;font-size:.75rem} code{background:#0d1117;border:1px solid #21262d;border-radius:.3rem;padding:0 .3rem;font-size:.78rem;color:#c9d1d9} .flags{display:flex;gap:.4rem;font-size:.72rem;color:#8b949e} .foot{margin-top:auto;display:flex;justify-content:space-between;font-size:.8rem;padding-top:.3rem;border-top:1px solid #21262d} @@ -274,17 +272,12 @@ def _card(r): f'' f'no screenshot{_level_pill(r["level"])}' ) - cap = ( - f'
{html.escape(r["level_cap_reason"])}
' - if r["level_cap_reason"] - else "" - ) return ( f'
{shot}
' f'
{html.escape(r["recipe"])}
' f'
{html.escape(r["status"])}' f'{html.escape(r["version"])}
' - f"{cap}{_flags_html(r['flags'])}" + f"{_flags_html(r['flags'])}" f'' f"
" diff --git a/docs/recipe-customization.md b/docs/recipe-customization.md index ef2fea9..a562288 100644 --- a/docs/recipe-customization.md +++ b/docs/recipe-customization.md @@ -115,8 +115,8 @@ _This table is GENERATED from the `runner/harness/meta.py` KEYS registry by `scr | `HEALTH_OK` | `tuple[int]` | `(200, 301, 302)` | Acceptable HTTP status codes for health. | | `DEPLOY_TIMEOUT` | `int` | `600` | Max seconds to wait for swarm convergence per deploy. | | `HTTP_TIMEOUT` | `int` | `300` | Max seconds to wait for HTTP health after convergence. | -| `BACKUP_CAPABLE` | `bool` | `None` | Override the backup-tier capability auto-detect (compose `backupbot.backup` labels). `False` forces N/A; `True` forces the tier on; unset = auto-detect. | -| `EXPECTED_NA` | `dict` | `None` | Declare an N/A rung intentional: `{rung: reason}`. The cap stands either way; only the report wording changes. | +| `BACKUP_CAPABLE` | `bool` | `None` | Override the backup-tier capability auto-detect (compose `backupbot.backup` labels). `False` forces an intentional skip of the backup/restore rung; `True` forces the tier on; unset = auto-detect. | +| `EXPECTED_NA` | `dict` | `None` | Declare a non-run rung an INTENTIONAL skip: `{rung: reason}` — the level climbs past it; an undeclared non-run rung is *unverified* and blocks the level above it (classification table: machine-docs/DECISIONS.md phase lvl5). Never overrides an exercised pass/fail; the `lint` rung has no escape hatch. | | `READY_PROBE` | `hook` | `None` | Callable `(ctx) -> [probe, ...]` returning extra readiness probes, run after install AND after upgrade: HTTP `{host, path, ok}` or TCP `{tcp_host, tcp_port, stable}`. | | `UPGRADE_BASE_VERSION` | `str` | `None` | Exact published tag overriding the upgrade tier's base (default: `recipe_versions[-2]`). | | `BACKUP_VERIFY` | `hook` | `None` | Callable `(ctx) -> bool` post-backup data-capture check; `False` re-runs the backup (truncated-dump race guard), retried up to 3 attempts. | diff --git a/docs/results-ux.md b/docs/results-ux.md index 270daba..6ca980e 100644 --- a/docs/results-ux.md +++ b/docs/results-ux.md @@ -10,12 +10,9 @@ It is the R8 reference for Phase 3 (`plan-phase3-results-ux.md`). --- -## 1. The level ladder (R1) +## 1. The level ladder (phase lvl5 semantics, operator-decided 2026-06-11) -Every run earns a single integer **level 0–6**. The ladder is cumulative with **YunoHost -gap-caps-the-level** semantics: you earn level `L` only if **every rung 1..L was a clean PASS**. The -first rung that is not a clean PASS — a real **FAIL** *or* genuinely **N/A** for this recipe — stops -the climb, and `level_cap_reason` records which rung and why. +Every run earns a single integer **level 0–5** over the FIVE essential rungs: | Level | Rung | Earned when | |------:|------|-------------| @@ -24,42 +21,52 @@ the climb, and `level_cap_reason` records which rung and why. | **L2** | upgrade | previous published version → PR/latest, stays healthy, data intact. | | **L3** | backup/restore | seeded data survives backup → wipe → restore. | | **L4** | functional | the recipe-specific functional tests pass. | -| **L5** | integration | SSO/OIDC + cross-app integration tests pass. | -| **L6** | recipe-local | the recipe repo's own `tests/` (D4) pass and are merged. | +| **L5** | lint | `abra recipe lint` passes against the exact ref under test. | -**N/A caps, fairly.** A rung that does not apply to a recipe (only one published version → no -upgrade; not backup-capable; no SSO/integration surface; no recipe-local tests) is **N/A**, which -caps the climb at the rung below it with a recorded reason — it is *not* counted as a failure. This is -the only fair reading of "a missing lower rung caps the level": e.g. a recipe with **no integration -surface caps at L4 by definition**, shown as `level_cap_reason = "L5 integration … N/A"`. A stateless -app whose functional tests pass but which cannot be backed up is honestly capped at **L2** (`"L3 -backup/restore … N/A"`) rather than shown as L4 — understating is safe; overstating is forbidden. +Each rung has one of FOUR statuses, and the level is: -Worked examples (real runs): -- `uptime-kuma` — install+upgrade+backup+restore+functional all pass, no SSO surface → **L4** - (`cap = "L5 integration (SSO/OIDC + cross-app) N/A"`). -- `custom-html-tiny` — stateless, not backup-capable: install+upgrade pass, backup/restore N/A → - **L2** (`cap = "L3 backup/restore (data integrity) N/A"`). + level = the highest rung that PASSED, where every rung below it is "pass" or an intentional skip + +- **pass / fail** — the rung was exercised. A FAIL blocks: no rung above it counts, however green. +- **skip (intentional)** — the rung *genuinely does not apply*, from a declared or structural fact: + not backup-capable (declared), only one published version (no upgrade target), or a declared + `EXPECTED_NA`. Intentional skips are **climbed past** — a stateless recipe with passing + functional tests and a clean lint reaches **L5**, not the old "capped at 2". +- **unver (unverified)** — the rung *should* have run but didn't: infra error, missing tool, + harness exception, prior-stage abort, timeout. **The level cannot rise above an unverified + rung** — it blocks exactly like a fail (we never claim what we didn't check). Anything + unclassifiable defaults to unver (conservative). + +There is **no capping concept** (no `cap_reason`, no `capped`): the per-rung table +(✔ / ✘ / intentional-skip / unverified) on the card and in `results.json.rungs` is the sole +carrier of "why isn't this level higher". Worked examples: + +- install ✔, upgrade ✘, backup ✔, functional ✔, lint ✔ → **level 1** (fail blocks). +- install ✔, upgrade ✔, backup skip (not capable), functional ✔, lint ✔ → **level 5**. +- install ✔, upgrade ✔, backup unver (harness error), functional ✔, lint ✔ → **level 2**. +- all four ✔, lint unver (abra missing) → **level 4** (an unverified top rung isn't earned). + +Integration (SSO/OIDC + cross-app) and recipe-local tests are **optional capabilities**, not +rungs — they never affect the level (SSO remains enforced for the run VERDICT). ### How tiers map to rungs (the translation layer) `run_recipe_ci.py` holds the run's per-tier results (`install/upgrade/backup/restore/custom`) + -deps/SSO signals; `runner/harness/results.py::derive_rungs` maps them to the rung-status dict that -`runner/harness/level.py::compute_level` scores. The mapping (also in `DECISIONS.md`, Phase 3): +structural signals; `runner/harness/results.py::derive_rungs` maps them to the rung-status dict +that `runner/harness/level.py::compute_level` scores. The full intentional-vs-unintentional +classification table for every N/A source is in `machine-docs/DECISIONS.md` (phase lvl5). Summary: -- **install** ← install tier (pass/fail). -- **upgrade** ← upgrade tier; `skip` → **na** (only one published version). +- **install** ← install tier (pass/fail; a non-run is unver — install always applies). +- **upgrade** ← upgrade tier; tier skipped with no upgrade target (single published version, + structural) → skip; declared `EXPECTED_NA` → skip; otherwise unver. - **backup_restore** ← backup AND restore tiers both pass → pass; either fail → fail; not - backup-capable → **na**. -- **functional** ← the custom tier minus its SSO tests; a custom failure conservatively fails this - rung (we don't split functional-vs-SSO failure → never inflate); no custom tests → **na**. -- **integration** ← applies only if the recipe declares deps; pass iff deps wired and SSO verified and - custom didn't fail; recipes with no declared deps → **na** (the "caps at L4" rule). -- **recipe_local** ← the recipe repo's own `tests/` (discovery source `repo-local`) ran and passed; - none present → **na**. - -The pure scorer is exhaustively unit-tested + fuzz-verified (all 729 rung combinations: level == -count of leading consecutive passes, zero inflation). + backup-capable (structural/declared) → skip; unverified-while-capable → unver. +- **functional** ← the custom tier; a custom failure conservatively fails this rung; no custom + tests is a coverage GAP → unver, unless declared `EXPECTED_NA["functional"]` → skip. +- **lint** ← the lint executor (`runner/harness/lint.py`): `abra recipe lint` on a pristine + scratch clone of the run's recipe tree at the exact tested sha, 60s hard budget, full output in + the run artifact `lint.txt`. pass/fail only — when lint can't run the rung is **unver** (never + a silent pass, never an intentional skip). Lint never changes the run verdict. ### Invariant flags (shown, not climbed) @@ -77,19 +84,29 @@ build number, or the run's unique app domain for a hand-run). Schema: ```json { - "schema": 1, "run_id": "...", "recipe": "...", "version": "...", "pr": "...", "ref": "...", + "schema": 2, "run_id": "...", "recipe": "...", "version": "...", "pr": "...", "ref": "...", "finished": 0.0, - "level": 4, "level_cap_reason": "L5 integration (SSO/OIDC + cross-app) N/A", - "rungs": {"install":"pass","upgrade":"pass","backup_restore":"pass","functional":"pass", - "integration":"na","recipe_local":"na"}, + "level": 5, + "rungs": {"install":"pass","upgrade":"pass","backup_restore":"skip","functional":"pass", + "lint":"pass"}, + "lint": {"status":"pass","detail":"","rules_failed":[]}, + "skips": {"intentional": {"backup_restore": "not backup-capable (no backupbot labels / declared)"}, + "unintentional": []}, "stages": [{"name":"install","status":"pass", "tests":[{"name":"test_serving","status":"pass","ms":168,"source":"generic"}]}], - "results": {"install":"pass","upgrade":"pass","backup":"pass","restore":"pass","custom":"pass"}, + "results": {"install":"pass","upgrade":"pass","backup":"skip","restore":"skip","custom":"pass"}, "flags": {"clean_teardown": true, "no_secret_leak": true}, "screenshot": "screenshot.png", "summary_card": "summary.png" } ``` +`rungs` carries the four-status vocabulary above; `skips.intentional` maps each intentionally +skipped rung to its (declared or structural) reason and `skips.unintentional` lists the +unverified rungs. `lint` carries the L5 rung outcome + failing rule ids; the full +`abra recipe lint` output is served at `/runs//lint.txt`. Pre-lvl5 artifacts +(`"schema": 1`, 4-rung ladder, `level_cap_reason`/`level_cap_rung` present, `"na"` statuses) +are still rendered as-is by the dashboard/card — their stored level is never recomputed. + Assembly is **best-effort**: a failure to build/write `results.json` is logged but never changes the run's exit code (cosmetics never block the pipeline, R7). diff --git a/runner/harness/card.py b/runner/harness/card.py index 28be566..562f8ba 100644 --- a/runner/harness/card.py +++ b/runner/harness/card.py @@ -21,23 +21,24 @@ from __future__ import annotations import html import os -# Level → colour ramp (YunoHost-ish): red at the floor, climbing to green at the top. +# Level → colour ramp (YunoHost-ish): red at the floor, climbing to green at the top (L5 = full +# clean climb incl. lint — phase lvl5). LEVEL_COLOR = { 0: "#e5534b", # red — install failed 1: "#e0823d", # orange 2: "#e0823d", 3: "#d9b343", # amber - 4: "#a0b93f", # yellow-green - 5: "#57ab5a", # green - 6: "#3fb950", # bright green — full climb + 4: "#a0b93f", # yellow-green — above functional, lint not earned + 5: "#3fb950", # bright green — full climb (lint passed) } -STATUS_MARK = {"pass": "✔", "fail": "✘", "skip": "–", "error": "✘", "na": "–"} +STATUS_MARK = {"pass": "✔", "fail": "✘", "skip": "–", "error": "✘", "na": "–", "unver": "⊘"} STATUS_COLOR = { "pass": "#3fb950", "fail": "#f85149", "error": "#f85149", "skip": "#8b949e", "na": "#8b949e", + "unver": "#d29922", # amber — exercised? no: should have run and wasn't verified } @@ -79,44 +80,15 @@ def render_badge_svg(label: str, message: str, color: str) -> str: ) -# Third-segment colours for the level badge: amber = an UNINTENTIONAL skip (a rung skipped but not -# in the recipe's intentional list — likely missing coverage) capped the climb; muted = an -# INTENTIONAL skip (declared in recipe_meta.EXPECTED_NA — nothing to fix). Font-safe text labels -# (no emoji) so the SVG renders anywhere. +# Amber for UNVERIFIED rung rows in the table (a rung that should have run and wasn't checked). GAP_COLOR = "#d29922" -EXPECT_COLOR = "#6e7681" -def level_badge_svg(level: int, cap_reason: str = "", cap_skip: str = "") -> str: - """Per-recipe/-run LEVEL badge: 'cc-ci | level N' coloured by level (R6), with a THIRD segment - that differentiates *why* the climb stopped when a SKIP capped it (`cap_skip`): - - "unintentional" (a rung skipped but not in the recipe's intentional list): amber 'gap?'. - - "intentional" (a skip declared in recipe_meta.EXPECTED_NA): muted 'expected'. - - "" (clean cap / full climb / a real failure): no third segment (the level + card carry it). - The badge never inflates — it only annotates the cap the level already reflects.""" - label, msg = "cc-ci", f"level {int(level)}" - lw, mw = _text_width(label), _text_width(msg) - third: tuple[str, str] | None = None - if cap_skip == "unintentional": - third = ("gap?", GAP_COLOR) - elif cap_skip == "intentional": - third = ("expected", EXPECT_COLOR) - if third is None: - return render_badge_svg(label, msg, level_color(level)) - txt, tcolor = third - tw = _text_width(txt) - w = lw + mw + tw - return ( - f'' - f'' - f'' - f'' - f'' - f'{html.escape(label)}' - f'{html.escape(msg)}' - f'{html.escape(txt)}' - ) +def level_badge_svg(level: int) -> str: + """Per-recipe/-run LEVEL badge: 'cc-ci | level N' coloured by level — NUMBER + COLOUR ONLY + (operator-specified, phase lvl5). 'Why isn't it higher' lives in the card's per-rung table, + never on the badge.""" + return render_badge_svg("cc-ci", f"level {int(level)}", level_color(level)) def _stage_rows(stages: list[dict]) -> str: @@ -141,12 +113,13 @@ def _stage_rows(stages: list[dict]) -> str: return "\n".join(rows) or 'no stages' -# Friendly rung labels for the skip rows (the four essential rungs). +# Friendly rung labels for the skip/unverified rows (the five essential rungs). RUNG_LABEL = { "install": "install", "upgrade": "upgrade", "backup_restore": "backup/restore", "functional": "functional", + "lint": "lint", } SKIP_GREEN = ( "#57ab5a" # muted green — an intentional skip reads like a pass (but labelled, never inflating) @@ -154,9 +127,10 @@ SKIP_GREEN = ( def _skip_rows(skips: dict) -> str: - """Render SKIPPED rungs as stage-like rows. An intentional (declared) skip looks like a pass row - but its status says 'INTENTIONAL SKIP' (muted green) with the declared reason on the line below; - an unintentional skip is amber 'UNINTENTIONAL SKIP' with a prompt to add a test or declare it.""" + """Render the non-run rungs as stage-like rows (phase lvl5 semantics). An INTENTIONAL skip + (declared/structural — the rung does not apply, the climb continues past it) is muted green + with its reason on the line below; an UNVERIFIED rung (should have run, wasn't checked — the + level cannot rise above it) is amber 'unverified'.""" rows = [] for rung, reason in (skips.get("intentional") or {}).items(): rows.append( @@ -171,11 +145,11 @@ def _skip_rows(skips: dict) -> str: rows.append( f'' f"{html.escape(RUNG_LABEL.get(rung, rung))}" - f'unintentional skip' + f'unverified' ) rows.append( - 'not declared in EXPECTED_NA — add the ' - "missing test/label, or declare the skip with a reason" + 'rung did not run / could not be ' + "checked — the level cannot rise above an unverified rung" ) return "\n".join(rows) @@ -184,13 +158,15 @@ def render_card_html(data: dict, screenshot_rel: str | None = "screenshot.png") """Build the summary-card HTML from a results.json dict. `screenshot_rel` is the relative path to the screenshot PNG (same dir as the card) — omitted from the card if None / absent. - The card shows exactly what the data says: recipe + version, the level badge + cap reason, the - per-stage/per-test ✔/✘ table, the invariant flags, and the app screenshot. No computation here.""" + The card shows exactly what the data says: recipe + version, the level, the per-stage/per-test + ✔/✘ table (+ skip/unverified rung rows — the SOLE carrier of "why isn't the level higher"), + the invariant flags, and the app screenshot. No computation here. Tolerates old (schema-1) + artifacts: the ladder height is read off the rungs the artifact actually has.""" recipe = html.escape(str(data.get("recipe", "?"))) version = html.escape(str(data.get("version") or data.get("ref") or "")) level = int(data.get("level", 0)) - cap_reason = str(data.get("level_cap_reason") or "") - cap = html.escape(cap_reason) + # Old (pre-lvl5) artifacts have a 4-rung ladder — render their "of N" honestly. + ladder_top = 5 if "lint" in (data.get("rungs") or {}) else 4 sk = data.get("skips", {}) or {} color = level_color(level) flags = data.get("flags", {}) or {} @@ -221,7 +197,7 @@ body{{margin:0;font-family:system-ui,-apple-system,Segoe UI,sans-serif;backgroun .lvl .num{{display:inline-block;min-width:64px;padding:.3rem .7rem;border-radius:10px; font-size:1.6rem;font-weight:700;color:#0d1117;background:{color}}} .lvl .lbl{{display:block;color:#8b949e;font-size:.72rem;text-transform:uppercase;margin-top:.2rem}} -.cap{{padding:.4rem 1.3rem;color:#8b949e;font-size:.82rem;border-bottom:1px solid #21262d}} +.ladder{{padding:.4rem 1.3rem;color:#8b949e;font-size:.82rem;border-bottom:1px solid #21262d}} .body{{display:flex;gap:1rem;padding:1rem 1.3rem}} .tbl{{flex:1}} table{{border-collapse:collapse;width:100%;font-size:.85rem}} @@ -238,12 +214,12 @@ tr.skipreason td{{color:#8b949e;font-size:.78rem;font-style:italic;padding-top:0 .shot.noshot{{display:flex;align-items:center;justify-content:center;height:225px;color:#8b949e;font-size:.85rem}} .flags{{display:flex;gap:.6rem;padding:.6rem 1.3rem 1rem}} .flag{{border:1px solid;border-radius:6px;padding:.15rem .5rem;font-size:.78rem;color:#c9d1d9}} -.cap b{{color:#c9d1d9}} +.ladder b{{color:#c9d1d9}}
{FLOWER_SVG}

{recipe}

{version}
{level}level
-
{("capped: " + cap) if cap else "full clean climb — top level (4)"}
+
level {level} of {ladder_top}
{rows}
{shot_html}
{"".join(flag_bits)}
""" diff --git a/runner/harness/level.py b/runner/harness/level.py index 46f5ba6..1bc08b2 100644 --- a/runner/harness/level.py +++ b/runner/harness/level.py @@ -1,67 +1,67 @@ -"""Phase 3 — the level ladder (plan-phase3-results-ux.md §4.1, R1). +"""The level ladder — five rungs, no capping (phase lvl5, plan-phase-lvl5-lint-rung.md). -A single integer **level** summarising how far up the quality ladder a recipe run climbed, with -YunoHost semantics: **a gap caps the level** — you only earn level L if every rung 1..L was a clean -PASS. The first rung that is not a clean PASS (a real FAIL *or* genuinely N/A for this recipe) stops -the climb; `cap_reason` records why. This is deliberately conservative: presentation must NEVER make -a run look greener than its tests (plan §6 cardinal guardrail), so an N/A rung caps just like a fail -— with a recorded reason so the level is *fair*, not inflated. - -The ladder is the FOUR essential rungs every recipe is held to: +A single integer **level** summarising how far up the quality ladder a recipe run climbed: L0 — install failed / app never became healthy. L1 — Installs: deploys + passes health/readiness. L2 — Upgrades: previous published version → PR version, stays healthy, data intact. L3 — Backup/restore: seeded data survives backup → wipe → restore. L4 — Functional: recipe-specific functional tests pass. + L5 — Lint: `abra recipe lint` passes against the exact ref under test. -Integration (SSO/OIDC + cross-app) and recipe-local (the recipe repo's own tests/) are **OPTIONAL** -capabilities — they are NOT part of the level ladder and never cap it. They still run when present -(and SSO is still enforced for the run VERDICT via the deps/SSO checks in run_recipe_ci.py), but a -recipe without an SSO surface or without repo-local tests is simply not penalised on the level. +Semantics (operator-decided 2026-06-11, recorded in DECISIONS.md — replaces the Phase-3 +"N/A caps" rule): -This module is PURE (no I/O) so it is cheaply unit-testable and the Adversary can re-run the unit -test cold (`cc-ci-run -m pytest tests/unit/test_level.py -q`). The orchestrator -(`run_recipe_ci.py`) is responsible for translating its raw per-tier results into the rung-status -dict this function consumes; that mapping is documented in DECISIONS.md (Phase 3). + level = max i such that rung_i == "pass" and every rung j < i is "pass" or "skip"; 0 if none. -Rung status vocabulary (each rung ∈ these three): - "pass" — the rung was exercised and passed. - "fail" — the rung was exercised and failed. - "na" — the rung does not apply to this recipe (e.g. only one published version → no upgrade; - not backup-capable). N/A is NOT a failure, but it DOES cap the climb (with a distinct - cap_reason) so the level never overstates what was actually verified. +A rung has one of FOUR statuses: + "pass" — exercised and passed. + "fail" — exercised and failed. Blocks: no rung above it can count. + "skip" — INTENTIONAL skip: the rung genuinely does not apply to this recipe, from a + declared or structural fact (not backup-capable; only one published version; + declared in recipe_meta.EXPECTED_NA). Does NOT stop the climb. + "unver" — UNINTENTIONAL not-verified: the rung SHOULD have run but didn't (infra error, + missing tool, harness exception, prior-stage abort, timeout). Blocks exactly + like a fail — the level never rises above a rung that wasn't actually checked. + +The per-rung table (results.json `rungs`, card, dashboard) is the SOLE carrier of "why isn't +this level higher" — there is no cap_reason. The classification of every N/A source into +skip-vs-unver lives in derive_rungs (results.py) and is tabulated in DECISIONS.md; anything +unclassifiable defaults to "unver" (conservative: never claim what wasn't checked). + +Integration (SSO/OIDC + cross-app) and recipe-local (the recipe repo's own tests/) remain +OPTIONAL capabilities — not rungs, never counted (SSO is still enforced for the run VERDICT +via the deps/SSO checks in run_recipe_ci.py). + +This module is PURE (no I/O) so it is cheaply unit-testable and the Adversary can re-run the +unit test cold (`cc-ci-run -m pytest tests/unit/test_level.py -q`). """ from __future__ import annotations -# The climbable rungs in ascending order. install (L1) is the foundation; L0 means install itself -# did not pass. Each later rung requires every earlier rung to be a clean PASS. These four are the -# ESSENTIAL rungs — integration/recipe-local are optional and deliberately NOT in this tuple. -RUNGS = ("install", "upgrade", "backup_restore", "functional") +# The climbable rungs in ascending order. install (L1) is the foundation; L0 means install +# itself did not pass. These five are the ESSENTIAL rungs — integration/recipe-local are +# optional and deliberately NOT in this tuple. +RUNGS = ("install", "upgrade", "backup_restore", "functional", "lint") -# Human-readable label per rung level, for cap_reason + the summary card. +# Human-readable label per rung level, for the summary card / docs. RUNG_LABEL = { 1: "install (deploy + health)", 2: "upgrade (prev published → PR)", 3: "backup/restore (data integrity)", 4: "functional (recipe-specific tests)", + 5: "lint (abra recipe lint)", } -VALID = {"pass", "fail", "na"} +VALID = {"pass", "fail", "skip", "unver"} -def compute_level(rungs: dict[str, str]) -> tuple[int, str]: - """Map a rung-status dict → (level 0..4, cap_reason). +def compute_level(rungs: dict[str, str]) -> int: + """Map a rung-status dict → level 0..5. - `rungs` must contain a status in {"pass","fail","na"} for every name in RUNGS. The level is the - highest L such that rungs[1..L] are all "pass"; the first non-"pass" rung caps the climb. L0 is - returned when the install rung itself is not "pass" (install failed / never healthy). - - cap_reason explains where the climb stopped: - - "" (empty) when the recipe earned the top rung (L4, full clean climb). - - "L