feat(lvl5): merge phase-lvl5 → main after M1 PASS (review cfc87fd) — implementation content taken verbatim from the Adversary-verified branch tip 3d8d286

2026-06-11 07:56:34 +00:00
parent cfc87fd8d3 3d8d286cf3
commit 08e6cc8273
14 changed files with 1125 additions and 379 deletions
--- a/dashboard/dashboard.py
+++ b/dashboard/dashboard.py
@ -38,6 +38,7 @@ _RUN_FILES = {
    "screenshot.png": "image/png",
    "badge.svg": "image/svg+xml",
    "summary.html": "text/html; charset=utf-8",
+    "lint.txt": "text/plain; charset=utf-8",
 }
 _RUN_ID_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9._-]*$")

@ -71,8 +72,7 @@ _LEVEL_COLOR = {
    2: "#e0823d",
    3: "#d9b343",
    4: "#a0b93f",
-    5: "#57ab5a",
-    6: "#3fb950",
+    5: "#3fb950",  # bright green — full 5-rung climb incl. lint (phase lvl5)
 }


@ -152,7 +152,6 @@ def _build_row(b):
        "ref": ref[:8],
        "version": res.get("version") or ref[:12] or "—",
        "level": res.get("level"),
-        "level_cap_reason": res.get("level_cap_reason") or "",
        "has_screenshot": bool(res.get("screenshot")),
        "flags": res.get("flags") or {},
        "finished": b.get("finished") or 0,
@ -220,7 +219,6 @@ a{color:#58a6ff;text-decoration:none} a:hover{text-decoration:underline}
 .name{font-weight:700;font-size:1.05rem;color:#e6edf3}
 .row{display:flex;align-items:center;gap:.5rem;flex-wrap:wrap;font-size:.82rem}
 .pill{color:#fff;padding:.08rem .5rem;border-radius:.5rem;font-size:.75rem;font-weight:600}
-.cap{color:#8b949e;font-size:.75rem}
 code{background:#0d1117;border:1px solid #21262d;border-radius:.3rem;padding:0 .3rem;font-size:.78rem;color:#c9d1d9}
 .flags{display:flex;gap:.4rem;font-size:.72rem;color:#8b949e}
 .foot{margin-top:auto;display:flex;justify-content:space-between;font-size:.8rem;padding-top:.3rem;border-top:1px solid #21262d}
@ -274,17 +272,12 @@ def _card(r):
            f'<a class="shot" href="{run_url}" title="open run">'
            f'<span class="ph">no screenshot</span>{_level_pill(r["level"])}</a>'
        )
-    cap = (
-        f'<div class="cap">{html.escape(r["level_cap_reason"])}</div>'
-        if r["level_cap_reason"]
-        else ""
-    )
    return (
        f'<div class="card">{shot}<div class="body">'
        f'<div class="name">{html.escape(r["recipe"])}</div>'
        f'<div class="row"><span class="pill" style="background:{color}">{html.escape(r["status"])}</span>'
        f'<code>{html.escape(r["version"])}</code></div>'
-        f"{cap}{_flags_html(r['flags'])}"
+        f"{_flags_html(r['flags'])}"
        f'<div class="foot"><a href="{run_url}">run #{num} · {_ago(r["finished"])}</a>'
        f'<a href="/recipe/{html.escape(r["recipe"])}">history →</a></div>'
        f"</div></div>"
--- a/docs/recipe-customization.md
+++ b/docs/recipe-customization.md
@ -115,8 +115,8 @@ _This table is GENERATED from the `runner/harness/meta.py` KEYS registry by `scr
 | `HEALTH_OK` | `tuple[int]` | `(200, 301, 302)` | Acceptable HTTP status codes for health. |
 | `DEPLOY_TIMEOUT` | `int` | `600` | Max seconds to wait for swarm convergence per deploy. |
 | `HTTP_TIMEOUT` | `int` | `300` | Max seconds to wait for HTTP health after convergence. |
-| `BACKUP_CAPABLE` | `bool` | `None` | Override the backup-tier capability auto-detect (compose `backupbot.backup` labels). `False` forces N/A; `True` forces the tier on; unset = auto-detect. |
-| `EXPECTED_NA` | `dict` | `None` | Declare an N/A rung intentional: `{rung: reason}`. The cap stands either way; only the report wording changes. |
+| `BACKUP_CAPABLE` | `bool` | `None` | Override the backup-tier capability auto-detect (compose `backupbot.backup` labels). `False` forces an intentional skip of the backup/restore rung; `True` forces the tier on; unset = auto-detect. |
+| `EXPECTED_NA` | `dict` | `None` | Declare a non-run rung an INTENTIONAL skip: `{rung: reason}` — the level climbs past it; an undeclared non-run rung is *unverified* and blocks the level above it (classification table: machine-docs/DECISIONS.md phase lvl5). Never overrides an exercised pass/fail; the `lint` rung has no escape hatch. |
 | `READY_PROBE` | `hook` | `None` | Callable `(ctx) -> [probe, ...]` returning extra readiness probes, run after install AND after upgrade: HTTP `{host, path, ok}` or TCP `{tcp_host, tcp_port, stable}`. |
 | `UPGRADE_BASE_VERSION` | `str` | `None` | Exact published tag overriding the upgrade tier's base (default: `recipe_versions[-2]`). |
 | `BACKUP_VERIFY` | `hook` | `None` | Callable `(ctx) -> bool` post-backup data-capture check; `False` re-runs the backup (truncated-dump race guard), retried up to 3 attempts. |
--- a/docs/results-ux.md
+++ b/docs/results-ux.md
@ -10,12 +10,9 @@ It is the R8 reference for Phase 3 (`plan-phase3-results-ux.md`).

 ---

-## 1. The level ladder (R1)
+## 1. The level ladder (phase lvl5 semantics, operator-decided 2026-06-11)

-Every run earns a single integer **level 0–6**. The ladder is cumulative with **YunoHost
-gap-caps-the-level** semantics: you earn level `L` only if **every rung 1..L was a clean PASS**. The
-first rung that is not a clean PASS — a real **FAIL** *or* genuinely **N/A** for this recipe — stops
-the climb, and `level_cap_reason` records which rung and why.
+Every run earns a single integer **level 0–5** over the FIVE essential rungs:

 | Level | Rung | Earned when |
 |------:|------|-------------|
@ -24,42 +21,52 @@ the climb, and `level_cap_reason` records which rung and why.
 | **L2** | upgrade | previous published version → PR/latest, stays healthy, data intact. |
 | **L3** | backup/restore | seeded data survives backup → wipe → restore. |
 | **L4** | functional | the recipe-specific functional tests pass. |
-| **L5** | integration | SSO/OIDC + cross-app integration tests pass. |
-| **L6** | recipe-local | the recipe repo's own `tests/` (D4) pass and are merged. |
+| **L5** | lint | `abra recipe lint` passes against the exact ref under test. |

-**N/A caps, fairly.** A rung that does not apply to a recipe (only one published version → no
-upgrade; not backup-capable; no SSO/integration surface; no recipe-local tests) is **N/A**, which
-caps the climb at the rung below it with a recorded reason — it is *not* counted as a failure. This is
-the only fair reading of "a missing lower rung caps the level": e.g. a recipe with **no integration
-surface caps at L4 by definition**, shown as `level_cap_reason = "L5 integration … N/A"`. A stateless
-app whose functional tests pass but which cannot be backed up is honestly capped at **L2** (`"L3
-backup/restore … N/A"`) rather than shown as L4 — understating is safe; overstating is forbidden.
+Each rung has one of FOUR statuses, and the level is:

-Worked examples (real runs):
- `uptime-kuma` — install+upgrade+backup+restore+functional all pass, no SSO surface → **L4**
-  (`cap = "L5 integration (SSO/OIDC + cross-app) N/A"`).
- `custom-html-tiny` — stateless, not backup-capable: install+upgrade pass, backup/restore N/A →
-  **L2** (`cap = "L3 backup/restore (data integrity) N/A"`).
+    level = the highest rung that PASSED, where every rung below it is "pass" or an intentional skip
+
+- **pass / fail** — the rung was exercised. A FAIL blocks: no rung above it counts, however green.
+- **skip (intentional)** — the rung *genuinely does not apply*, from a declared or structural fact:
+  not backup-capable (declared), only one published version (no upgrade target), or a declared
+  `EXPECTED_NA`. Intentional skips are **climbed past** — a stateless recipe with passing
+  functional tests and a clean lint reaches **L5**, not the old "capped at 2".
+- **unver (unverified)** — the rung *should* have run but didn't: infra error, missing tool,
+  harness exception, prior-stage abort, timeout. **The level cannot rise above an unverified
+  rung** — it blocks exactly like a fail (we never claim what we didn't check). Anything
+  unclassifiable defaults to unver (conservative).
+
+There is **no capping concept** (no `cap_reason`, no `capped`): the per-rung table
+(✔ / ✘ / intentional-skip / unverified) on the card and in `results.json.rungs` is the sole
+carrier of "why isn't this level higher". Worked examples:
+
+- install ✔, upgrade ✘, backup ✔, functional ✔, lint ✔ → **level 1** (fail blocks).
+- install ✔, upgrade ✔, backup skip (not capable), functional ✔, lint ✔ → **level 5**.
+- install ✔, upgrade ✔, backup unver (harness error), functional ✔, lint ✔ → **level 2**.
+- all four ✔, lint unver (abra missing) → **level 4** (an unverified top rung isn't earned).
+
+Integration (SSO/OIDC + cross-app) and recipe-local tests are **optional capabilities**, not
+rungs — they never affect the level (SSO remains enforced for the run VERDICT).

 ### How tiers map to rungs (the translation layer)

 `run_recipe_ci.py` holds the run's per-tier results (`install/upgrade/backup/restore/custom`) +
-deps/SSO signals; `runner/harness/results.py::derive_rungs` maps them to the rung-status dict that
-`runner/harness/level.py::compute_level` scores. The mapping (also in `DECISIONS.md`, Phase 3):
+structural signals; `runner/harness/results.py::derive_rungs` maps them to the rung-status dict
+that `runner/harness/level.py::compute_level` scores. The full intentional-vs-unintentional
+classification table for every N/A source is in `machine-docs/DECISIONS.md` (phase lvl5). Summary:

- **install** ← install tier (pass/fail).
- **upgrade** ← upgrade tier; `skip` → **na** (only one published version).
+- **install** ← install tier (pass/fail; a non-run is unver — install always applies).
+- **upgrade** ← upgrade tier; tier skipped with no upgrade target (single published version,
+  structural) → skip; declared `EXPECTED_NA` → skip; otherwise unver.
 - **backup_restore** ← backup AND restore tiers both pass → pass; either fail → fail; not
-  backup-capable → **na**.
- **functional** ← the custom tier minus its SSO tests; a custom failure conservatively fails this
-  rung (we don't split functional-vs-SSO failure → never inflate); no custom tests → **na**.
- **integration** ← applies only if the recipe declares deps; pass iff deps wired and SSO verified and
-  custom didn't fail; recipes with no declared deps → **na** (the "caps at L4" rule).
- **recipe_local** ← the recipe repo's own `tests/` (discovery source `repo-local`) ran and passed;
-  none present → **na**.
-
-The pure scorer is exhaustively unit-tested + fuzz-verified (all 729 rung combinations: level ==
-count of leading consecutive passes, zero inflation).
+  backup-capable (structural/declared) → skip; unverified-while-capable → unver.
+- **functional** ← the custom tier; a custom failure conservatively fails this rung; no custom
+  tests is a coverage GAP → unver, unless declared `EXPECTED_NA["functional"]` → skip.
+- **lint** ← the lint executor (`runner/harness/lint.py`): `abra recipe lint` on a pristine
+  scratch clone of the run's recipe tree at the exact tested sha, 60s hard budget, full output in
+  the run artifact `lint.txt`. pass/fail only — when lint can't run the rung is **unver** (never
+  a silent pass, never an intentional skip). Lint never changes the run verdict.

 ### Invariant flags (shown, not climbed)

@ -77,19 +84,29 @@ build number, or the run's unique app domain for a hand-run). Schema:

 ```json
 {
-  "schema": 1, "run_id": "...", "recipe": "...", "version": "...", "pr": "...", "ref": "...",
+  "schema": 2, "run_id": "...", "recipe": "...", "version": "...", "pr": "...", "ref": "...",
  "finished": 0.0,
-  "level": 4, "level_cap_reason": "L5 integration (SSO/OIDC + cross-app) N/A",
-  "rungs": {"install":"pass","upgrade":"pass","backup_restore":"pass","functional":"pass",
-            "integration":"na","recipe_local":"na"},
+  "level": 5,
+  "rungs": {"install":"pass","upgrade":"pass","backup_restore":"skip","functional":"pass",
+            "lint":"pass"},
+  "lint": {"status":"pass","detail":"","rules_failed":[]},
+  "skips": {"intentional": {"backup_restore": "not backup-capable (no backupbot labels / declared)"},
+            "unintentional": []},
  "stages": [{"name":"install","status":"pass",
              "tests":[{"name":"test_serving","status":"pass","ms":168,"source":"generic"}]}],
-  "results": {"install":"pass","upgrade":"pass","backup":"pass","restore":"pass","custom":"pass"},
+  "results": {"install":"pass","upgrade":"pass","backup":"skip","restore":"skip","custom":"pass"},
  "flags": {"clean_teardown": true, "no_secret_leak": true},
  "screenshot": "screenshot.png", "summary_card": "summary.png"
 }
 ```

+`rungs` carries the four-status vocabulary above; `skips.intentional` maps each intentionally
+skipped rung to its (declared or structural) reason and `skips.unintentional` lists the
+unverified rungs. `lint` carries the L5 rung outcome + failing rule ids; the full
+`abra recipe lint` output is served at `/runs/<run_id>/lint.txt`. Pre-lvl5 artifacts
+(`"schema": 1`, 4-rung ladder, `level_cap_reason`/`level_cap_rung` present, `"na"` statuses)
+are still rendered as-is by the dashboard/card — their stored level is never recomputed.
+
 Assembly is **best-effort**: a failure to build/write `results.json` is logged but never changes the
 run's exit code (cosmetics never block the pipeline, R7).

--- a/runner/harness/card.py
+++ b/runner/harness/card.py
@ -21,23 +21,24 @@ from __future__ import annotations
 import html
 import os

-# Level → colour ramp (YunoHost-ish): red at the floor, climbing to green at the top.
+# Level → colour ramp (YunoHost-ish): red at the floor, climbing to green at the top (L5 = full
+# clean climb incl. lint — phase lvl5).
 LEVEL_COLOR = {
    0: "#e5534b",  # red — install failed
    1: "#e0823d",  # orange
    2: "#e0823d",
    3: "#d9b343",  # amber
-    4: "#a0b93f",  # yellow-green
-    5: "#57ab5a",  # green
-    6: "#3fb950",  # bright green — full climb
+    4: "#a0b93f",  # yellow-green — above functional, lint not earned
+    5: "#3fb950",  # bright green — full climb (lint passed)
 }
-STATUS_MARK = {"pass": "✔", "fail": "✘", "skip": "–", "error": "✘", "na": "–"}
+STATUS_MARK = {"pass": "✔", "fail": "✘", "skip": "–", "error": "✘", "na": "–", "unver": "⊘"}
 STATUS_COLOR = {
    "pass": "#3fb950",
    "fail": "#f85149",
    "error": "#f85149",
    "skip": "#8b949e",
    "na": "#8b949e",
+    "unver": "#d29922",  # amber — exercised? no: should have run and wasn't verified
 }


@ -79,44 +80,15 @@ def render_badge_svg(label: str, message: str, color: str) -> str:
    )


-# Third-segment colours for the level badge: amber = an UNINTENTIONAL skip (a rung skipped but not
-# in the recipe's intentional list — likely missing coverage) capped the climb; muted = an
-# INTENTIONAL skip (declared in recipe_meta.EXPECTED_NA — nothing to fix). Font-safe text labels
-# (no emoji) so the SVG renders anywhere.
+# Amber for UNVERIFIED rung rows in the table (a rung that should have run and wasn't checked).
 GAP_COLOR = "#d29922"
-EXPECT_COLOR = "#6e7681"


-def level_badge_svg(level: int, cap_reason: str = "", cap_skip: str = "") -> str:
-    """Per-recipe/-run LEVEL badge: 'cc-ci | level N' coloured by level (R6), with a THIRD segment
-    that differentiates *why* the climb stopped when a SKIP capped it (`cap_skip`):
-      - "unintentional" (a rung skipped but not in the recipe's intentional list): amber 'gap?'.
-      - "intentional"   (a skip declared in recipe_meta.EXPECTED_NA): muted 'expected'.
-      - "" (clean cap / full climb / a real failure): no third segment (the level + card carry it).
-    The badge never inflates — it only annotates the cap the level already reflects."""
-    label, msg = "cc-ci", f"level {int(level)}"
-    lw, mw = _text_width(label), _text_width(msg)
-    third: tuple[str, str] | None = None
-    if cap_skip == "unintentional":
-        third = ("gap?", GAP_COLOR)
-    elif cap_skip == "intentional":
-        third = ("expected", EXPECT_COLOR)
-    if third is None:
-        return render_badge_svg(label, msg, level_color(level))
-    txt, tcolor = third
-    tw = _text_width(txt)
-    w = lw + mw + tw
-    return (
-        f'<svg xmlns="http://www.w3.org/2000/svg" width="{w}" height="20" role="img" '
-        f'aria-label="{html.escape(label)}: {html.escape(msg)} ({html.escape(txt)})">'
-        f'<rect width="{lw}" height="20" fill="#555"/>'
-        f'<rect x="{lw}" width="{mw}" height="20" fill="{level_color(level)}"/>'
-        f'<rect x="{lw + mw}" width="{tw}" height="20" fill="{tcolor}"/>'
-        f'<g fill="#fff" font-family="Verdana,Geneva,sans-serif" font-size="11">'
-        f'<text x="6" y="14">{html.escape(label)}</text>'
-        f'<text x="{lw + 6}" y="14">{html.escape(msg)}</text>'
-        f'<text x="{lw + mw + 6}" y="14">{html.escape(txt)}</text></g></svg>'
-    )
+def level_badge_svg(level: int) -> str:
+    """Per-recipe/-run LEVEL badge: 'cc-ci | level N' coloured by level — NUMBER + COLOUR ONLY
+    (operator-specified, phase lvl5). 'Why isn't it higher' lives in the card's per-rung table,
+    never on the badge."""
+    return render_badge_svg("cc-ci", f"level {int(level)}", level_color(level))


 def _stage_rows(stages: list[dict]) -> str:
@ -141,12 +113,13 @@ def _stage_rows(stages: list[dict]) -> str:
    return "\n".join(rows) or '<tr><td colspan="3">no stages</td></tr>'


-# Friendly rung labels for the skip rows (the four essential rungs).
+# Friendly rung labels for the skip/unverified rows (the five essential rungs).
 RUNG_LABEL = {
    "install": "install",
    "upgrade": "upgrade",
    "backup_restore": "backup/restore",
    "functional": "functional",
+    "lint": "lint",
 }
 SKIP_GREEN = (
    "#57ab5a"  # muted green — an intentional skip reads like a pass (but labelled, never inflating)
@ -154,9 +127,10 @@ SKIP_GREEN = (


 def _skip_rows(skips: dict) -> str:
-    """Render SKIPPED rungs as stage-like rows. An intentional (declared) skip looks like a pass row
-    but its status says 'INTENTIONAL SKIP' (muted green) with the declared reason on the line below;
-    an unintentional skip is amber 'UNINTENTIONAL SKIP' with a prompt to add a test or declare it."""
+    """Render the non-run rungs as stage-like rows (phase lvl5 semantics). An INTENTIONAL skip
+    (declared/structural — the rung does not apply, the climb continues past it) is muted green
+    with its reason on the line below; an UNVERIFIED rung (should have run, wasn't checked — the
+    level cannot rise above it) is amber 'unverified'."""
    rows = []
    for rung, reason in (skips.get("intentional") or {}).items():
        rows.append(
@ -171,11 +145,11 @@ def _skip_rows(skips: dict) -> str:
        rows.append(
            f'<tr class="stage"><td colspan="2"><span class="mark" style="color:{GAP_COLOR}">⊘</span>'
            f"<b>{html.escape(RUNG_LABEL.get(rung, rung))}</b></td>"
-            f'<td class="st" style="color:{GAP_COLOR}">unintentional skip</td></tr>'
+            f'<td class="st" style="color:{GAP_COLOR}">unverified</td></tr>'
        )
        rows.append(
-            '<tr class="skipreason"><td></td><td colspan="2">not declared in EXPECTED_NA — add the '
-            "missing test/label, or declare the skip with a reason</td></tr>"
+            '<tr class="skipreason"><td></td><td colspan="2">rung did not run / could not be '
+            "checked — the level cannot rise above an unverified rung</td></tr>"
        )
    return "\n".join(rows)

@ -184,13 +158,15 @@ def render_card_html(data: dict, screenshot_rel: str | None = "screenshot.png")
    """Build the summary-card HTML from a results.json dict. `screenshot_rel` is the relative path to
    the screenshot PNG (same dir as the card) — omitted from the card if None / absent.

-    The card shows exactly what the data says: recipe + version, the level badge + cap reason, the
-    per-stage/per-test ✔/✘ table, the invariant flags, and the app screenshot. No computation here."""
+    The card shows exactly what the data says: recipe + version, the level, the per-stage/per-test
+    ✔/✘ table (+ skip/unverified rung rows — the SOLE carrier of "why isn't the level higher"),
+    the invariant flags, and the app screenshot. No computation here. Tolerates old (schema-1)
+    artifacts: the ladder height is read off the rungs the artifact actually has."""
    recipe = html.escape(str(data.get("recipe", "?")))
    version = html.escape(str(data.get("version") or data.get("ref") or ""))
    level = int(data.get("level", 0))
-    cap_reason = str(data.get("level_cap_reason") or "")
-    cap = html.escape(cap_reason)
+    # Old (pre-lvl5) artifacts have a 4-rung ladder — render their "of N" honestly.
+    ladder_top = 5 if "lint" in (data.get("rungs") or {}) else 4
    sk = data.get("skips", {}) or {}
    color = level_color(level)
    flags = data.get("flags", {}) or {}
@ -221,7 +197,7 @@ body{{margin:0;font-family:system-ui,-apple-system,Segoe UI,sans-serif;backgroun
 .lvl .num{{display:inline-block;min-width:64px;padding:.3rem .7rem;border-radius:10px;
  font-size:1.6rem;font-weight:700;color:#0d1117;background:{color}}}
 .lvl .lbl{{display:block;color:#8b949e;font-size:.72rem;text-transform:uppercase;margin-top:.2rem}}
-.cap{{padding:.4rem 1.3rem;color:#8b949e;font-size:.82rem;border-bottom:1px solid #21262d}}
+.ladder{{padding:.4rem 1.3rem;color:#8b949e;font-size:.82rem;border-bottom:1px solid #21262d}}
 .body{{display:flex;gap:1rem;padding:1rem 1.3rem}}
 .tbl{{flex:1}}
 table{{border-collapse:collapse;width:100%;font-size:.85rem}}
@ -238,12 +214,12 @@ tr.skipreason td{{color:#8b949e;font-size:.78rem;font-style:italic;padding-top:0
 .shot.noshot{{display:flex;align-items:center;justify-content:center;height:225px;color:#8b949e;font-size:.85rem}}
 .flags{{display:flex;gap:.6rem;padding:.6rem 1.3rem 1rem}}
 .flag{{border:1px solid;border-radius:6px;padding:.15rem .5rem;font-size:.78rem;color:#c9d1d9}}
-.cap b{{color:#c9d1d9}}
+.ladder b{{color:#c9d1d9}}
 </style></head><body><div class="card">
 <div class="hd">{FLOWER_SVG}
 <div class="title"><h1>{recipe}</h1><span class="ver">{version}</span></div>
 <div class="lvl"><span class="num">{level}</span><span class="lbl">level</span></div></div>
-<div class="cap">{("<b>capped:</b> " + cap) if cap else "<b>full clean climb</b> — top level (4)"}</div>
+<div class="ladder"><b>level {level} of {ladder_top}</b></div>
 <div class="body"><div class="tbl"><table>{rows}</table></div>{shot_html}</div>
 <div class="flags">{"".join(flag_bits)}</div>
 </div></body></html>"""
--- a/runner/harness/level.py
+++ b/runner/harness/level.py
@ -1,67 +1,67 @@
-"""Phase 3 — the level ladder (plan-phase3-results-ux.md §4.1, R1).
+"""The level ladder — five rungs, no capping (phase lvl5, plan-phase-lvl5-lint-rung.md).

-A single integer **level** summarising how far up the quality ladder a recipe run climbed, with
-YunoHost semantics: **a gap caps the level** — you only earn level L if every rung 1..L was a clean
-PASS. The first rung that is not a clean PASS (a real FAIL *or* genuinely N/A for this recipe) stops
-the climb; `cap_reason` records why. This is deliberately conservative: presentation must NEVER make
-a run look greener than its tests (plan §6 cardinal guardrail), so an N/A rung caps just like a fail
-— with a recorded reason so the level is *fair*, not inflated.
-
-The ladder is the FOUR essential rungs every recipe is held to:
+A single integer **level** summarising how far up the quality ladder a recipe run climbed:
  L0 — install failed / app never became healthy.
  L1 — Installs: deploys + passes health/readiness.
  L2 — Upgrades: previous published version → PR version, stays healthy, data intact.
  L3 — Backup/restore: seeded data survives backup → wipe → restore.
  L4 — Functional: recipe-specific functional tests pass.
+  L5 — Lint: `abra recipe lint` passes against the exact ref under test.

-Integration (SSO/OIDC + cross-app) and recipe-local (the recipe repo's own tests/) are **OPTIONAL**
-capabilities — they are NOT part of the level ladder and never cap it. They still run when present
-(and SSO is still enforced for the run VERDICT via the deps/SSO checks in run_recipe_ci.py), but a
-recipe without an SSO surface or without repo-local tests is simply not penalised on the level.
+Semantics (operator-decided 2026-06-11, recorded in DECISIONS.md — replaces the Phase-3
+"N/A caps" rule):

-This module is PURE (no I/O) so it is cheaply unit-testable and the Adversary can re-run the unit
-test cold (`cc-ci-run -m pytest tests/unit/test_level.py -q`). The orchestrator
-(`run_recipe_ci.py`) is responsible for translating its raw per-tier results into the rung-status
-dict this function consumes; that mapping is documented in DECISIONS.md (Phase 3).
+    level = max i such that rung_i == "pass" and every rung j < i is "pass" or "skip"; 0 if none.

-Rung status vocabulary (each rung ∈ these three):
-  "pass" — the rung was exercised and passed.
-  "fail" — the rung was exercised and failed.
-  "na"   — the rung does not apply to this recipe (e.g. only one published version → no upgrade;
-           not backup-capable). N/A is NOT a failure, but it DOES cap the climb (with a distinct
-           cap_reason) so the level never overstates what was actually verified.
+A rung has one of FOUR statuses:
+  "pass"  — exercised and passed.
+  "fail"  — exercised and failed. Blocks: no rung above it can count.
+  "skip"  — INTENTIONAL skip: the rung genuinely does not apply to this recipe, from a
+            declared or structural fact (not backup-capable; only one published version;
+            declared in recipe_meta.EXPECTED_NA). Does NOT stop the climb.
+  "unver" — UNINTENTIONAL not-verified: the rung SHOULD have run but didn't (infra error,
+            missing tool, harness exception, prior-stage abort, timeout). Blocks exactly
+            like a fail — the level never rises above a rung that wasn't actually checked.
+
+The per-rung table (results.json `rungs`, card, dashboard) is the SOLE carrier of "why isn't
+this level higher" — there is no cap_reason. The classification of every N/A source into
+skip-vs-unver lives in derive_rungs (results.py) and is tabulated in DECISIONS.md; anything
+unclassifiable defaults to "unver" (conservative: never claim what wasn't checked).
+
+Integration (SSO/OIDC + cross-app) and recipe-local (the recipe repo's own tests/) remain
+OPTIONAL capabilities — not rungs, never counted (SSO is still enforced for the run VERDICT
+via the deps/SSO checks in run_recipe_ci.py).
+
+This module is PURE (no I/O) so it is cheaply unit-testable and the Adversary can re-run the
+unit test cold (`cc-ci-run -m pytest tests/unit/test_level.py -q`).
 """

 from __future__ import annotations

-# The climbable rungs in ascending order. install (L1) is the foundation; L0 means install itself
-# did not pass. Each later rung requires every earlier rung to be a clean PASS. These four are the
-# ESSENTIAL rungs — integration/recipe-local are optional and deliberately NOT in this tuple.
-RUNGS = ("install", "upgrade", "backup_restore", "functional")
+# The climbable rungs in ascending order. install (L1) is the foundation; L0 means install
+# itself did not pass. These five are the ESSENTIAL rungs — integration/recipe-local are
+# optional and deliberately NOT in this tuple.
+RUNGS = ("install", "upgrade", "backup_restore", "functional", "lint")

-# Human-readable label per rung level, for cap_reason + the summary card.
+# Human-readable label per rung level, for the summary card / docs.
 RUNG_LABEL = {
    1: "install (deploy + health)",
    2: "upgrade (prev published → PR)",
    3: "backup/restore (data integrity)",
    4: "functional (recipe-specific tests)",
+    5: "lint (abra recipe lint)",
 }

-VALID = {"pass", "fail", "na"}
+VALID = {"pass", "fail", "skip", "unver"}


-def compute_level(rungs: dict[str, str]) -> tuple[int, str]:
-    """Map a rung-status dict → (level 0..4, cap_reason).
+def compute_level(rungs: dict[str, str]) -> int:
+    """Map a rung-status dict → level 0..5.

-    `rungs` must contain a status in {"pass","fail","na"} for every name in RUNGS. The level is the
-    highest L such that rungs[1..L] are all "pass"; the first non-"pass" rung caps the climb. L0 is
-    returned when the install rung itself is not "pass" (install failed / never healthy).
-
-    cap_reason explains where the climb stopped:
-      - "" (empty) when the recipe earned the top rung (L4, full clean climb).
-      - "L<k> <label> FAILED" when a rung was exercised and failed.
-      - "L<k> <label> N/A" when a rung does not apply to this recipe.
-    Returns the reason for the FIRST rung that stopped the climb (the binding constraint).
+    `rungs` must contain a status in VALID for every name in RUNGS. The level is the highest
+    i such that rungs[i] == "pass" and every rung below i is "pass" or "skip" (an intentional
+    skip does not stop the climb). A "fail" or "unver" rung blocks: rungs above it cannot
+    count, however green. 0 when no rung qualifies.
    """
    for name in RUNGS:
        st = rungs.get(name)
@ -69,52 +69,44 @@ def compute_level(rungs: dict[str, str]) -> tuple[int, str]:
            raise ValueError(
                f"rung {name!r} has invalid status {st!r} (expect one of {sorted(VALID)})"
            )
-
-    # L0: install did not pass.
-    if rungs["install"] != "pass":
-        if rungs["install"] == "fail":
-            return 0, "L1 " + RUNG_LABEL[1] + " FAILED"
-        # install N/A is not a real-world state for a deploy run, but handle it for totality.
-        return 0, "L1 " + RUNG_LABEL[1] + " N/A"
-
-    # Climb: stop at the first rung that is not a clean pass.
    level = 0
    for idx, name in enumerate(RUNGS, start=1):
-        if rungs[name] == "pass":
+        st = rungs[name]
+        if st == "pass":
            level = idx
+        elif st == "skip":
            continue
-        # first non-pass rung — caps the climb
-        kind = "FAILED" if rungs[name] == "fail" else "N/A"
-        return level, f"L{idx} {RUNG_LABEL[idx]} {kind}"
-
-    # Full clean climb to the top rung.
-    return level, ""
+        else:  # fail / unver — nothing above this rung can count
+            break
+    return level


 def backup_restore_status(backup: str | None, restore: str | None, backup_capable: bool) -> str:
    """Collapse the backup + restore tier results into the single L3 rung status.

-    Both tiers must pass for the rung to pass (the rung is "seeded data survives backup→wipe→restore",
-    which is only verified if BOTH the backup and the restore tier are green). If the recipe is not
-    backup-capable, both tiers skip → the rung is N/A (caps at L2, recorded). A fail in either tier
-    fails the rung.
+    Not backup-capable (a declared/structural fact: no backupbot labels, or
+    recipe_meta.BACKUP_CAPABLE=False) → "skip" — the rung genuinely does not apply.
+    Otherwise both tiers must pass for the rung to pass; a fail in either tier fails it; any
+    other shape (tier skipped or never ran while backup-capable — e.g. a prior-stage abort)
+    is "unver": the rung should have been verified and wasn't.
    """
    if not backup_capable:
-        return "na"
+        return "skip"
    vals = {backup, restore}
    if "fail" in vals:
        return "fail"
    if backup == "pass" and restore == "pass":
        return "pass"
-    # any skip/None while backup-capable → not verified → treat as N/A (cannot claim L3)
-    return "na"
+    return "unver"


 def tier_to_rung(status: str | None) -> str:
-    """Map a single tier result ('pass'|'fail'|'skip'|None) to a rung status. 'skip'/None → 'na'
-    (the tier did not apply / did not run), so it caps the climb without being counted as a failure."""
+    """Map a single tier result ('pass'|'fail'|'skip'|None) to a rung status, with NO
+    intentionality information: a tier that did not produce a pass/fail is "unver" (it should
+    have run and wasn't verified). The caller (derive_rungs) upgrades "unver" to "skip" where
+    a declared/structural fact makes the skip intentional — never the other way around."""
    if status == "pass":
        return "pass"
    if status == "fail":
        return "fail"
-    return "na"
+    return "unver"
--- a/runner/harness/lint.py
+++ b/runner/harness/lint.py
@ -0,0 +1,174 @@
+"""L5 lint rung — run `abra recipe lint` against the exact ref under test (phase lvl5).
+
+Executor + classifier for the fifth ladder rung. Design constraints (plan-phase-lvl5 §2):
+
+- **Lints the recipe's CONTENT, not the harness plumbing.** abra lint reads every
+  `compose*.yml` in the tree (including the CI's untracked install_steps overlays) and
+  force-fetches tags from `origin` (which on PR runs is the private mirror, unauthenticated
+  here → FATA). Both are harness artifacts, so the executor lints a PRISTINE scratch clone of
+  the per-run tree, checked out at the exact tested ref: `origin` becomes a local path (tag
+  fetch works offline, no auth) and the run's true tag set rides along (fetch_recipe pulls the
+  upstream version tags into the per-run tree). No lint rule is filtered or ignored.
+- **rc is not the verdict.** `abra recipe lint` exits non-zero only when it cannot lint
+  (FATA); rule outcomes live in its table — error-severity ❌ rows print a trailing
+  "WARN critical errors present …" sentinel but still exit 0. So the classifier parses the
+  table: FAIL iff an error-severity rule is unsatisfied (or the FATA is content-attributable:
+  "unable to validate recipe" — the recipe config itself is invalid). PASS iff the table
+  rendered and no error rule failed. ANYTHING else — timeout, abra/script missing, tag-fetch
+  FATA, unparseable output — is "unver": loud, never a silent pass, never an intentional skip.
+- **Best-effort + time-bounded.** Hard ~60s timeout (observed runtime ≈0.7s); the caller
+  wraps run_lint in try/except besides — a wedged lint can never hang or fail a run, and the
+  run VERDICT is untouched by any lint outcome (lint is a level rung, not a gate).
+- Full command output (+ cmd, rc, ref header) is captured to `lint.txt` in the run artifact
+  dir; results.json carries status + short excerpt (failing rule ids).
+
+abra needs a PTY even with -n ("inappropriate ioctl on device") → run via util-linux
+`script -qec`, same trick as harness.abra._run_pty.
+"""
+
+from __future__ import annotations
+
+import os
+import re
+import shlex
+import shutil
+import subprocess
+import tempfile
+
+from . import abra
+
+LINT_TIMEOUT = 60  # hard budget, seconds; observed ~0.7s per recipe
+
+# Strip ANSI escape sequences from PTY output before parsing.
+_ANSI = re.compile(r"\x1b\[[0-9;?]*[A-Za-z]")
+
+# A table row: ┃ R014 ┃ description ┃ error ┃ ✅/❌ ┃ skipped ┃ how-to-fix ┃ — abra renders the
+# grid with HEAVY box-drawing verticals (┃ U+2503); accept the light variant (│ U+2502) too.
+_ROW = re.compile(
+    r"^\s*[│┃]\s*(R\d+)\s*[│┃](.*?)[│┃]\s*(warn|error)\s*[│┃]\s*(✅|❌)\s*[│┃]\s*([^│┃]*)[│┃]"
+)
+
+# abra's trailing sentinel when any error-severity rule is unsatisfied (cross-check only).
+_SENTINEL = "critical errors present"
+
+# FATA classes that are the RECIPE's fault (its config cannot even be validated) — a lint
+# FAIL, not an unverified rung. Everything else non-zero is environmental → unver.
+_CONTENT_FATA = "unable to validate recipe"
+
+
+def parse_table(output: str) -> list[dict]:
+    """Parse the lint table → rows {rule, desc, severity, satisfied(bool), skipped(bool)}.
+    Tolerant: lines that don't match are ignored; returns [] when no table rendered."""
+    rows = []
+    for line in _ANSI.sub("", output).replace("\r", "\n").splitlines():
+        m = _ROW.match(line)
+        if not m:
+            continue
+        rule, desc, severity, mark, skipped = m.groups()
+        rows.append(
+            {
+                "rule": rule,
+                "desc": desc.strip(),
+                "severity": severity,
+                "satisfied": mark == "✅",
+                "skipped": skipped.strip() not in ("", "-"),
+            }
+        )
+    return rows
+
+
+def classify(rc: int | None, output: str) -> tuple[str, str, list[str]]:
+    """(status, detail, failed_rule_ids) from a finished lint invocation.
+
+    status ∈ {"pass","fail","unver"}; never a silent pass: pass requires a parsed table with
+    zero unsatisfied error-severity rules AND no sentinel. `rc=None` means the run itself blew
+    up (timeout/missing binary) — always unver; the caller supplies the detail.
+    """
+    if rc is None:
+        return "unver", "lint did not run", []
+    if rc != 0:
+        first = next((ln for ln in _ANSI.sub("", output).splitlines() if "FATA" in ln), "").strip()
+        if _CONTENT_FATA in output:
+            # The recipe config itself failed validation — attributable to recipe content.
+            return "fail", first or "recipe config failed validation", []
+        return "unver", first or f"abra recipe lint exited {rc} with no table", []
+    rows = parse_table(output)
+    if not rows:
+        return "unver", "no lint table in output (rc=0)", []
+    failed = [
+        r["rule"]
+        for r in rows
+        if r["severity"] == "error" and not r["satisfied"] and not r["skipped"]
+    ]
+    if failed:
+        return "fail", f"error rule(s) unsatisfied: {', '.join(failed)}", failed
+    if _SENTINEL in output:
+        # abra says critical errors but our parse found none — distrust the parse, never inflate.
+        return "fail", "abra reported critical errors (table parse found none)", []
+    return "pass", "", []
+
+
+def run_lint(recipe: str, ref: str | None, out_dir: str | None) -> dict:
+    """Execute the lint rung for `recipe` at exactly `ref` (a sha; None → the per-run tree's
+    current HEAD). Returns {"status","detail","rules_failed"} and writes lint.txt into
+    `out_dir` (when given). Never raises: every failure mode is caught into status "unver"."""
+    scratch = None
+    rc: int | None = None
+    output = ""
+    try:
+        src_tree = abra.recipe_dir(recipe)
+        scratch = tempfile.mkdtemp(prefix="ccci-lint-")
+        lint_abra = os.path.join(scratch, "abra")
+        os.makedirs(os.path.join(lint_abra, "recipes"))
+        clone = os.path.join(lint_abra, "recipes", recipe)
+        subprocess.run(
+            ["git", "clone", "--quiet", src_tree, clone],
+            check=True,
+            capture_output=True,
+            text=True,
+            timeout=LINT_TIMEOUT,
+        )
+        if ref:
+            subprocess.run(
+                ["git", "-C", clone, "checkout", "-f", "--quiet", ref],
+                check=True,
+                capture_output=True,
+                text=True,
+                timeout=LINT_TIMEOUT,
+            )
+        # catalogue: R006 (published catalogue version) reads it; servers: harmless, some abra
+        # paths stat it. Symlink the live ones (read-only use).
+        for shared in ("catalogue", "servers"):
+            src = os.path.join(abra.abra_dir(), shared)
+            if os.path.exists(src):
+                os.symlink(os.path.realpath(src), os.path.join(lint_abra, shared))
+        env = dict(os.environ, ABRA_DIR=lint_abra)
+        proc = subprocess.run(
+            ["script", "-qec", f"abra recipe lint -n {shlex.quote(recipe)}", "/dev/null"],
+            capture_output=True,
+            text=True,
+            timeout=LINT_TIMEOUT,
+            env=env,
+        )
+        rc, output = proc.returncode, proc.stdout + proc.stderr
+        status, detail, failed = classify(rc, output)
+    except subprocess.TimeoutExpired:
+        status, detail, failed = "unver", f"lint timed out after {LINT_TIMEOUT}s", []
+    except Exception as e:  # noqa: BLE001 — rung must never break the run; unver is the honest floor
+        status, detail, failed = "unver", f"lint executor error: {e.__class__.__name__}: {e}", []
+    finally:
+        if scratch:
+            shutil.rmtree(scratch, ignore_errors=True)
+    if status == "unver":
+        print(f"!! lint rung UNVERIFIED for {recipe}: {detail}", flush=True)
+    if out_dir:
+        try:
+            os.makedirs(out_dir, exist_ok=True)
+            with open(os.path.join(out_dir, "lint.txt"), "w", encoding="utf-8") as f:
+                f.write(
+                    f"$ abra recipe lint -n {recipe}  (ref={ref or 'HEAD'})\n"
+                    f"rc={rc}  status={status}  {detail}\n\n{output}"
+                )
+        except OSError as e:
+            print(f"  lint: could not write lint.txt (non-fatal): {e}", flush=True)
+    return {"status": status, "detail": detail, "rules_failed": failed}
--- a/runner/harness/meta.py
+++ b/runner/harness/meta.py
@ -70,13 +70,13 @@ KEYS: tuple[Key, ...] = (
        "BACKUP_CAPABLE",
        "bool",
        None,
-        "Override the backup-tier capability auto-detect (compose `backupbot.backup` labels). `False` forces N/A; `True` forces the tier on; unset = auto-detect.",
+        "Override the backup-tier capability auto-detect (compose `backupbot.backup` labels). `False` forces an intentional skip of the backup/restore rung; `True` forces the tier on; unset = auto-detect.",
    ),
    Key(
        "EXPECTED_NA",
        "dict",
        None,
-        "Declare an N/A rung intentional: `{rung: reason}`. The cap stands either way; only the report wording changes.",
+        "Declare a non-run rung an INTENTIONAL skip: `{rung: reason}` — the level climbs past it; an undeclared non-run rung is *unverified* and blocks the level above it (classification table: machine-docs/DECISIONS.md phase lvl5). Never overrides an exercised pass/fail; the `lint` rung has no escape hatch.",
    ),
    Key(
        "READY_PROBE",
--- a/runner/harness/results.py
+++ b/runner/harness/results.py
@ -1,20 +1,22 @@
-"""Phase 3 — structured run results + results.json (plan-phase3-results-ux.md §4.2, R1/R3).
+"""Structured run results + results.json (Phase 3 §4.2 R1/R3; level semantics: phase lvl5).

-Turns a run's per-tier pytest outcomes into a single `results.json` artifact carrying, per the plan:
+Turns a run's per-tier pytest outcomes into a single `results.json` artifact carrying:
  { recipe, version, pr, ref, run_id, finished, stages:[{name,status,tests:[{name,status,ms}]}],
-    level, level_cap_reason, level_cap_rung, rungs,
+    level, rungs, lint:{status,detail,rules_failed},
    skips:{intentional:{rung:reason}, unintentional:[rung]},
    flags:{clean_teardown,no_secret_leak}, screenshot, summary_card }

-`skips` splits the N/A (skipped) rungs by a simple rule: a skip is INTENTIONAL iff the recipe lists
-it (with a reason) in `recipe_meta.EXPECTED_NA = {rung: reason}`; any rung skipped but not listed is
-UNINTENTIONAL (a coverage gap to fill or declare). Skips still cap the level either way — the harness
-never claims a rung it did not verify; this only labels *why* a skip happened.
+Rung statuses (phase lvl5, operator-decided — see harness.level + DECISIONS.md): every rung is
+"pass" | "fail" | "skip" (INTENTIONAL — a declared/structural fact says the rung does not apply)
+| "unver" (UNINTENTIONAL — the rung should have run and wasn't verified; blocks the level like a
+fail). `derive_rungs` is the single place every N/A source is classified; anything it cannot
+attribute to a declared/structural fact defaults to "unver" (conservative). `skips` mirrors that
+split into results.json: intentional {rung: reason} / unintentional [rung] (= the unver rungs).

 The per-test breakdown comes from JUnit XML emitted by each tier's pytest invocation (`--junitxml`),
 parsed here with the stdlib (no new dep). The integer **level** is computed by harness.level from a
-rung-status dict derived here (`derive_rungs`) from the tier results + deps/SSO signals the
-orchestrator holds; that mapping is documented in DECISIONS.md (Phase 3).
+rung-status dict derived here (`derive_rungs`) from the tier results + structural signals the
+orchestrator holds; the classification table is in DECISIONS.md (phase lvl5).

 This module is import-pure (no side effects at import). `write_results` is the only writer; the
 orchestrator calls the build/write path inside a try/except so a results failure NEVER changes the
@ -138,53 +140,90 @@ def derive_rungs(
    results: dict[str, str],
    *,
    backup_capable: bool,
-    has_custom: bool,
+    has_upgrade_target: bool,
+    expected_na: dict | None = None,
+    lint_status: str | None = None,
 ) -> dict[str, str]:
-    """Translate the orchestrator's tier results into the rung-status dict harness.level consumes —
-    the FOUR essential rungs only. Conservative by design — never reports a rung 'pass' it can't
-    substantiate (cardinal guardrail: presentation never inflates).
+    """Translate the orchestrator's tier results + structural signals into the rung-status dict
+    harness.level consumes — the FIVE essential rungs. This is the SINGLE place every N/A source
+    is classified intentional ("skip") vs unintentional ("unver"); the table lives in DECISIONS.md
+    (phase lvl5). Conservative by design: never reports "pass" it can't substantiate, and any
+    rung that did not produce a pass/fail and has NO declared/structural reason is "unver".

-      L1 install    : install tier pass.
-      L2 upgrade    : upgrade tier (skip → N/A: only one published version).
-      L3 backup/res : backup AND restore tiers pass (N/A if not backup-capable).
-      L4 functional : recipe-specific functional tests pass — the custom tier. N/A if none ran.
+      L1 install    : install tier pass. Always applies — never "skip" (non-run → unver).
+      L2 upgrade    : upgrade tier. Tier skipped + no upgrade target (only one published
+                      version, structural) → "skip"; declared in EXPECTED_NA → "skip";
+                      anything else non-pass/fail (prior-stage abort, tier excluded) → "unver".
+      L3 backup/res : backup AND restore tiers pass. Not backup-capable (declared/structural)
+                      → "skip"; EXPECTED_NA → "skip"; unverified-while-capable → "unver".
+      L4 functional : the custom tier. No custom tests / tier skipped → EXPECTED_NA-declared
+                      "skip", else "unver" (absent functional coverage is a gap, not an
+                      intentional property of the recipe).
+      L5 lint       : from the lint executor (harness.lint). pass/fail only — every recipe can
+                      be linted, so there is NO intentional-skip escape hatch: a lint that
+                      could not run (timeout, abra missing, executor error) is "unver".

    Integration (SSO/OIDC) and recipe-local are OPTIONAL and intentionally NOT rungs here — they
-    never cap the level (SSO is still enforced for the run VERDICT in run_recipe_ci.py).
+    never affect the level (SSO is still enforced for the run VERDICT in run_recipe_ci.py).
    """
+    expected = set((expected_na or {}).keys())
    rungs: dict[str, str] = {}
    rungs["install"] = level_mod.tier_to_rung(results.get("install"))
-    rungs["upgrade"] = level_mod.tier_to_rung(results.get("upgrade"))
-    rungs["backup_restore"] = level_mod.backup_restore_status(
+
+    up = results.get("upgrade")
+    if up in ("pass", "fail"):
+        rungs["upgrade"] = up
+    elif up == "skip" and not has_upgrade_target:
+        # The orchestrator skipped the tier for the structural reason: nothing to upgrade from.
+        rungs["upgrade"] = "skip"
+    elif "upgrade" in expected:
+        rungs["upgrade"] = "skip"
+    else:
+        rungs["upgrade"] = "unver"
+
+    br = level_mod.backup_restore_status(
        results.get("backup"), results.get("restore"), backup_capable
    )
+    if br == "unver" and "backup_restore" in expected:
+        br = "skip"
+    rungs["backup_restore"] = br

    custom = results.get("custom")
-    if not has_custom or custom == "skip" or custom is None:
-        rungs["functional"] = "na"
-    elif custom == "fail":
-        rungs["functional"] = "fail"
-    else:  # custom == "pass"
-        rungs["functional"] = "pass"
+    if custom in ("pass", "fail"):
+        rungs["functional"] = custom
+    elif "functional" in expected:
+        rungs["functional"] = "skip"
+    else:
+        rungs["functional"] = "unver"
+
+    rungs["lint"] = lint_status if lint_status in ("pass", "fail") else "unver"
    return rungs


-def skips(rungs: dict[str, str], expected_na: dict | None) -> dict:
-    """Split the SKIPPED (N/A) rungs into intentional vs unintentional (operator model).
+# Reasons attached to STRUCTURAL intentional skips (no EXPECTED_NA declaration needed — the
+# fact is read off the recipe itself).
+_STRUCTURAL_REASON = {
+    "upgrade": "only one published version — no upgrade target",
+    "backup_restore": "not backup-capable (no backupbot labels / declared)",
+}

-    A recipe lists the rungs it intentionally skips, each with a reason, in
-    `recipe_meta.EXPECTED_NA = {rung: reason}`. The rule is dead simple: a skipped rung is
-    **intentional** iff it is in that list; any rung that is skipped and NOT in the list is
-    **unintentional** (a coverage gap someone should either fill or declare). N/A still caps the
-    level either way — the harness never claims a rung it did not verify — this only labels *why* a
-    skip happened. Returns:
-      { "intentional": {rung: reason, ...},   # skipped AND declared in EXPECTED_NA
-        "unintentional": [rung, ...] }         # skipped but NOT declared
-    """
+
+def skips(
+    rungs: dict[str, str],
+    expected_na: dict | None,
+) -> dict:
+    """Mirror the rung classification into results.json's `skips` block:
+      { "intentional": {rung: reason, ...},   # status "skip" — declared/structural, with why
+        "unintentional": [rung, ...] }         # status "unver" — should have run, wasn't verified
+    The reason is the recipe's EXPECTED_NA declaration when present, else the structural fact
+    derive_rungs skipped on. Purely descriptive — the level math lives in harness.level."""
    expected = {str(k): str(v) for k, v in (expected_na or {}).items()}
-    na = [r for r, st in rungs.items() if st == "na"]
-    intentional = {r: expected[r] for r in na if r in expected}
-    unintentional = sorted(r for r in na if r not in expected)
+    intentional = {
+        r: expected.get(r) or _STRUCTURAL_REASON.get(r, "declared intentional")
+        for r, st in rungs.items()
+        if st == "skip"
+    }
+    unintentional = sorted(r for r, st in rungs.items() if st == "unver")
    return {"intentional": intentional, "unintentional": unintentional}


@ -200,6 +239,8 @@ def build_results(
    clean_teardown: bool,
    no_secret_leak: bool,
    finished_ts: float | None,
+    has_upgrade_target: bool = True,
+    lint: dict | None = None,
    screenshot: str | None = None,
    summary_card: str | None = None,
    expected_na: dict | None = None,
@ -207,17 +248,41 @@ def build_results(
 ) -> dict:
    """Assemble the full results.json dict (no I/O). `finished_ts` is passed in (the orchestrator
    stamps it) so this stays pure and deterministic for unit tests. `expected_na` is the recipe's
-    declared intentional-skip map (recipe_meta.EXPECTED_NA) used to distinguish a deliberate skip from
-    accidentally-missing coverage."""
+    declared intentional-skip map (recipe_meta.EXPECTED_NA); `has_upgrade_target` is the structural
+    "a previous published version exists" fact; `lint` is harness.lint.run_lint's result dict
+    (None — e.g. an old caller — derives the lint rung as "unver": never a silent pass)."""
    stages = collect_stages(records)
-    has_custom = any(r["tier"] == "custom" for r in records)
-    rungs = derive_rungs(results, backup_capable=backup_capable, has_custom=has_custom)
-    lvl, cap_reason = level_mod.compute_level(rungs)
-    # The rung that capped the climb (lowest non-pass), or None on a full climb — lets a consumer
-    # (card/badge) tell whether the cap was an intentional skip, an unintentional one, or a failure.
-    capped = level_mod.RUNGS[lvl] if cap_reason else None
+    lint = lint or {}
+    lint_status = lint.get("status")
+    rungs = derive_rungs(
+        results,
+        backup_capable=backup_capable,
+        has_upgrade_target=has_upgrade_target,
+        expected_na=expected_na,
+        lint_status=lint_status,
+    )
+    # Surface lint in the per-stage table too (it has no pytest/JUnit tier), so the card's
+    # stage breakdown carries all five rungs.
+    if rungs["lint"] != "skip":  # lint is never "skip", but stay defensive
+        stages.append(
+            {
+                "name": "lint",
+                "status": rungs["lint"],
+                "tests": [
+                    {
+                        "name": "abra recipe lint",
+                        "classname": "lint",
+                        "source": "harness",
+                        "status": rungs["lint"],
+                        "ms": 0,
+                        "message": str(lint.get("detail") or ""),
+                    }
+                ],
+            }
+        )
+    lvl = level_mod.compute_level(rungs)
    return {
-        "schema": 1,
+        "schema": 2,
        "run_id": run_id(),
        "recipe": recipe,
        "version": version,
@ -225,9 +290,12 @@ def build_results(
        "ref": (ref or "")[:12],
        "finished": finished_ts,
        "level": lvl,
-        "level_cap_reason": cap_reason,
-        "level_cap_rung": capped,
        "rungs": rungs,
+        "lint": {
+            "status": rungs["lint"],
+            "detail": str(lint.get("detail") or ""),
+            "rules_failed": list(lint.get("rules_failed") or []),
+        },
        "skips": skips(rungs, expected_na),
        "stages": stages,
        "results": results,
--- a/runner/run_recipe_ci.py
+++ b/runner/run_recipe_ci.py
@ -58,6 +58,9 @@ from harness import (  # noqa: E402
 from harness import (  # noqa: E402
    deps as deps_mod,
 )
+from harness import (  # noqa: E402
+    lint as lint_mod,
+)
 from harness import (  # noqa: E402
    manifest as manifest_mod,
 )
@ -928,6 +931,24 @@ def main() -> int:
    run_artifact_dir = os.path.join(results_mod.runs_dir(), results_mod.run_id())
    junit_dir = os.path.join(run_artifact_dir, "junit")
    records: list[dict] = []
+
+    # L5 lint rung (phase lvl5): `abra recipe lint` against the EXACT tested ref, in a pristine
+    # scratch clone (harness.lint — the per-run tree is still at head_ref here, before any
+    # version-pinning checkout). Level rung only — NEVER the verdict: run_lint catches every
+    # failure mode into status "unver" (60s hard budget) and this belt-and-braces wrap makes a
+    # crashed executor identical to "could not verify".
+    lint_result = {"status": "unver", "detail": "lint executor crashed", "rules_failed": []}
+    try:
+        lint_result = lint_mod.run_lint(recipe, head_ref, run_artifact_dir)
+    except Exception as e:  # noqa: BLE001 — lint is a rung, not a gate; never touches the verdict
+        print(
+            f"!! lint rung executor crashed (non-fatal, rung=unver): {_scrub(str(e))}", flush=True
+        )
+    print(
+        f"lint rung: {lint_result['status']}"
+        f"{' — ' + lint_result['detail'] if lint_result.get('detail') else ''}",
+        flush=True,
+    )
    with contextlib.suppress(OSError):
        os.makedirs(junit_dir, exist_ok=True)

@ -1253,6 +1274,8 @@ def main() -> int:
            records=records,
            results=results,
            backup_capable=backup_cap,
+            has_upgrade_target=prev is not None,  # structural: a previous published version exists
+            lint=lint_result,  # L5 rung (phase lvl5)
            clean_teardown=clean_teardown,
            no_secret_leak=True,  # narrowed below by an actual scan of the serialised artifact
            screenshot=screenshot_rel,  # Phase 3 U1 (R4): relative PNG name iff capture succeeded
@ -1270,17 +1293,15 @@ def main() -> int:
                file=sys.stderr,
            )
        path = results_mod.write_results(data)
-        print(
-            f"results.json written: {path} (level={data['level']}"
-            f"{' — ' + data['level_cap_reason'] if data['level_cap_reason'] else ''})",
-            flush=True,
-        )
-        # Surface UNINTENTIONAL skips in the CI log (non-blocking, R7): a rung that was skipped (N/A)
-        # but is not in the recipe's intentional list — either add the missing coverage or declare it.
+        print(f"results.json written: {path} (level={data['level']} of 5)", flush=True)
+        # Surface UNVERIFIED rungs in the CI log (non-blocking, R7): a rung that should have run
+        # and wasn't verified blocks the level above it — fill the coverage, or (where a
+        # declared/structural reason genuinely applies) declare it in EXPECTED_NA.
        for rung in data.get("skips", {}).get("unintentional", []):
            print(
-                f"⚠ coverage: rung '{rung}' was skipped (N/A) but is not declared intentional — add "
-                f"the missing test/label, or list it in tests/{recipe}/recipe_meta.py "
+                f"⚠ coverage: rung '{rung}' is UNVERIFIED (did not run / could not be checked) — "
+                f"the level cannot rise above it. Add the missing test/coverage, or declare a "
+                f"genuine inapplicability in tests/{recipe}/recipe_meta.py "
                f"EXPECTED_NA = {{'{rung}': '<why>'}}.",
                flush=True,
            )
@ -1302,21 +1323,10 @@ def main() -> int:
            with open(html_path, "w", encoding="utf-8") as f:
                f.write(card_mod.render_card_html(data, screenshot_rel=data.get("screenshot")))
            png = card_mod.render_card_png(html_path, os.path.join(run_artifact_dir, "summary.png"))
-            capped = data.get("level_cap_rung")
-            sk = data.get("skips", {})
-            cap_skip = (
-                "intentional"
-                if capped in (sk.get("intentional") or {})
-                else "unintentional"
-                if capped in (sk.get("unintentional") or [])
-                else ""
-            )
+            # Badge = level only (number + colour) — the per-rung table on the card is the sole
+            # carrier of "why isn't this higher" (operator-specified, phase lvl5).
            with open(os.path.join(run_artifact_dir, "badge.svg"), "w", encoding="utf-8") as f:
-                f.write(
-                    card_mod.level_badge_svg(
-                        data["level"], data.get("level_cap_reason", ""), cap_skip
-                    )
-                )
+                f.write(card_mod.level_badge_svg(data["level"]))
            print(
                f"summary card {'rendered ' + png if png else '(PNG render unavailable)'} + "
                f"badge.svg written into {run_artifact_dir}",
--- a/tests/unit/test_card.py
+++ b/tests/unit/test_card.py
@ -1,8 +1,11 @@
-"""Unit tests for the pure card/badge renderers (harness.card), Phase 3 U2 (R3/R6).
+"""Unit tests for the pure card/badge renderers (harness.card) — phase lvl5 semantics.

-Covers the deterministic HTML + SVG string builders (the PNG step needs Playwright + is exercised in
-the U2 live demo). The cardinal check: the card REPORTS the data verbatim — level/marks come straight
-from the dict, never recomputed. Run cold:  cc-ci-run -m pytest tests/unit/test_card.py -q
+Covers the deterministic HTML + SVG string builders (the PNG step needs Playwright + is exercised
+live). The cardinal check: the card REPORTS the data verbatim — level/marks come straight from the
+dict, never recomputed — the badge is NUMBER + COLOUR ONLY, and the per-rung table rows (incl.
+intentional-skip / unverified) are the sole carrier of "why isn't the level higher". Old schema-1
+artifacts (4-rung ladder, cap fields present) must render without error and without relabeling.
+Run cold:  cc-ci-run -m pytest tests/unit/test_card.py -q
 """

 from __future__ import annotations
@ -14,12 +17,19 @@ sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner")
 from harness import card as C  # noqa: E402


-def _data(level=3, cap="L4 functional (recipe-specific tests) N/A"):
-    return {
+def _data(level=5, **kw):
+    d = {
+        "schema": 2,
        "recipe": "uptime-kuma",
        "version": "1.23.0",
        "level": level,
-        "level_cap_reason": cap,
+        "rungs": {
+            "install": "pass",
+            "upgrade": "pass",
+            "backup_restore": "pass",
+            "functional": "pass",
+            "lint": "pass",
+        },
        "flags": {"clean_teardown": True, "no_secret_leak": True},
        "screenshot": "screenshot.png",
        "stages": [
@ -36,46 +46,54 @@ def _data(level=3, cap="L4 functional (recipe-specific tests) N/A"):
                    {"name": "test_broken", "status": "fail", "ms": 5},
                ],
            },
+            {
+                "name": "lint",
+                "status": "pass",
+                "tests": [{"name": "abra recipe lint", "status": "pass", "ms": 0}],
+            },
        ],
    }
+    d.update(kw)
+    return d


 def test_level_color_ramp():
-    assert C.level_color(0) != C.level_color(6)
-    assert C.level_color(6) == "#3fb950"
-    assert C.level_color(99) == "#8b949e"  # unknown → grey
+    # 0 (red) … 5 (bright green — full 5-rung climb); unknown → grey.
+    assert C.level_color(0) != C.level_color(5)
+    assert C.level_color(5) == "#3fb950"
+    assert C.level_color(99) == "#8b949e"


-def test_badge_svg_wellformed():
+def test_badge_svg_is_number_and_color_only():
    svg = C.level_badge_svg(4)
    assert svg.startswith("<svg") and svg.endswith("</svg>")
    assert "level 4" in svg
    assert C.level_color(4) in svg
-    # plain cap (no intent) → two-box badge, no third segment
-    assert "expected" not in svg and "gap?" not in svg
+    # operator-specified (phase lvl5): NOTHING but the level on the badge — no annotation
+    # segment of any kind, whatever the rung situation.
+    assert "expected" not in svg and "gap?" not in svg and "skip" not in svg


-def test_badge_svg_differentiates_intentional_vs_unintentional_skip():
-    # an intentional (declared) skip capped the climb → muted "expected" third segment
-    exp = C.level_badge_svg(2, "L3 backup/restore N/A", "intentional")
-    assert "level 2" in exp and "expected" in exp and C.EXPECT_COLOR in exp
-    assert "gap?" not in exp
-    # an unintentional skip (not declared) → amber "gap?" third segment
-    gap = C.level_badge_svg(2, "L3 backup/restore N/A", "unintentional")
-    assert "level 2" in gap and "gap?" in gap and C.GAP_COLOR in gap
-    assert "expected" not in gap
+def test_badge_svg_level5():
+    svg = C.level_badge_svg(5)
+    assert "level 5" in svg and "#3fb950" in svg


-def test_skip_rows_intentional_and_unintentional():
+def test_skip_rows_intentional_and_unverified():
    html_out = C._skip_rows(
        {"intentional": {"backup_restore": "no persistent data"}, "unintentional": ["functional"]}
    )
    # intentional skip: labelled row (muted green) + the reason on its own line
    assert "intentional skip" in html_out and C.SKIP_GREEN in html_out
    assert "backup/restore" in html_out and "no persistent data" in html_out
-    # unintentional skip: amber row + prompt to declare/add coverage
-    assert "unintentional skip" in html_out and C.GAP_COLOR in html_out
-    assert "functional" in html_out and "EXPECTED_NA" in html_out
+    # unverified rung: amber row + the blocks-the-level explanation
+    assert "unverified" in html_out and C.GAP_COLOR in html_out
+    assert "functional" in html_out and "cannot rise above" in html_out
+
+
+def test_skip_rows_lint_label_known():
+    html_out = C._skip_rows({"intentional": {}, "unintentional": ["lint"]})
+    assert ">lint<" in html_out.replace("</b>", "<")  # rung label renders, not a KeyError


 def test_skip_rows_empty_when_no_skips():
@ -83,22 +101,68 @@ def test_skip_rows_empty_when_no_skips():


 def test_card_html_reports_level_verbatim():
-    html = C.render_card_html(_data(level=2, cap="L3 backup/restore (data integrity) N/A"))
+    html = C.render_card_html(_data(level=2))
    assert "uptime-kuma" in html
    assert "1.23.0" in html
    # the level shown is exactly what was passed (no recompute)
    assert ">2<" in html
-    assert "L3 backup/restore" in html
+    assert "level 2 of 5" in html
    assert C.level_color(2) in html


-def test_card_html_shows_stage_and_test_marks():
+def test_card_html_no_cap_language():
+    html = C.render_card_html(_data())
+    assert "capped" not in html and "cap_reason" not in html
+    assert "level 5 of 5" in html
+
+
+def test_card_html_old_schema1_artifact_renders():
+    # history compatibility: a pre-lvl5 results.json (4-rung ladder, cap fields, "na" statuses)
+    # renders without KeyError and shows ITS OWN ladder height (no retroactive relabeling).
+    old = {
+        "schema": 1,
+        "recipe": "legacy",
+        "version": "0.9",
+        "level": 4,
+        "level_cap_reason": "",
+        "level_cap_rung": None,
+        "rungs": {
+            "install": "pass",
+            "upgrade": "pass",
+            "backup_restore": "pass",
+            "functional": "pass",
+        },
+        "skips": {"intentional": {}, "unintentional": []},
+        "flags": {"clean_teardown": True, "no_secret_leak": True},
+        "screenshot": None,
+        "stages": [],
+    }
+    html = C.render_card_html(old)
+    assert "legacy" in html
+    assert "level 4 of 4" in html  # the old top, not 5
+    assert "capped" not in html
+
+
+def test_card_html_shows_stage_and_test_marks_incl_lint():
    html = C.render_card_html(_data())
    assert "install" in html and "custom" in html
+    assert "abra recipe lint" in html
    assert "test_serving" in html and "test_broken" in html
    assert C.STATUS_MARK["pass"] in html and C.STATUS_MARK["fail"] in html


+def test_card_html_unver_stage_mark_renders():
+    d = _data()
+    d["stages"][2] = {
+        "name": "lint",
+        "status": "unver",
+        "tests": [{"name": "abra recipe lint", "status": "unver", "ms": 0, "message": "timed out"}],
+    }
+    html = C.render_card_html(d)
+    assert C.STATUS_MARK["unver"] in html
+    assert C.STATUS_COLOR["unver"] in html
+
+
 def test_card_html_flags_rendered():
    html = C.render_card_html(_data())
    assert "clean teardown" in html and "no secret leak" in html
--- a/tests/unit/test_dashboard.py
+++ b/tests/unit/test_dashboard.py
@ -28,7 +28,6 @@ def _row(**kw):
        "ref": "db9a9502",
        "version": "db9a95024e9d",
        "level": 4,
-        "level_cap_reason": "",
        "has_screenshot": True,
        "flags": {"clean_teardown": True, "no_secret_leak": True},
        "finished": 0,
@ -40,7 +39,7 @@ def _row(**kw):

 def test_level_color_ramp_and_fallback():
    assert dashboard.level_color(0) == "#e5534b"
-    assert dashboard.level_color(6) == "#3fb950"
+    assert dashboard.level_color(5) == "#3fb950"  # full 5-rung climb (phase lvl5)
    assert dashboard.level_color(4) == "#a0b93f"
    assert dashboard.level_color(99) == "#8b949e"
    assert dashboard.level_color(None) == "#8b949e"
@ -61,20 +60,12 @@ def test_overview_grid_mirrors_results():
 def test_overview_never_greener_than_data():
    # A failed run at level 0 must show level 0 + the failure pill — never a green/high level.
    out = dashboard.render_overview(
-        [
-            _row(
-                status="failure",
-                level=0,
-                has_screenshot=False,
-                flags={},
-                level_cap_reason="L1 install FAILED",
-            )
-        ]
+        [_row(status="failure", level=0, has_screenshot=False, flags={})]
    )
    assert "level 0" in out
    assert dashboard.level_color(0) in out  # red
    assert dashboard._COLORS["failure"] in out
-    assert "level 4" not in out and "level 5" not in out and "level 6" not in out
+    assert "level 4" not in out and "level 5" not in out
    assert "no screenshot" in out  # placeholder, no broken image


@ -104,7 +95,6 @@ def test_build_row_projects_results(monkeypatch):
        lambda n: {
            "version": "1.2.3",
            "level": 2,
-            "level_cap_reason": "cap",
            "screenshot": "screenshot.png",
            "flags": {"clean_teardown": True},
        },
@ -123,6 +113,38 @@ def test_build_row_projects_results(monkeypatch):
    assert r["url"].endswith("/cc-ci/7")


+def test_build_row_old_schema1_artifact_renders(monkeypatch):
+    # History compatibility (phase lvl5): pre-lvl5 results.json still carries cap fields and a
+    # 4-rung ladder — it must project + render without KeyError, level shown VERBATIM (no
+    # retroactive relabeling), and the old cap text simply isn't resurfaced anywhere.
+    monkeypatch.setattr(
+        dashboard,
+        "_results_for",
+        lambda n: {
+            "schema": 1,
+            "version": "0.9.1",
+            "level": 2,
+            "level_cap_reason": "L3 backup/restore (data integrity) N/A",
+            "level_cap_rung": "backup_restore",
+            "screenshot": "screenshot.png",
+            "flags": {"clean_teardown": True, "no_secret_leak": True},
+        },
+    )
+    b = {
+        "number": 11,
+        "status": "success",
+        "event": "custom",
+        "params": {"RECIPE": "legacy", "REF": "abc123"},
+        "finished": 5,
+    }
+    r = dashboard._build_row(b)
+    out = dashboard.render_overview([r])
+    assert "level 2" in out and dashboard.level_color(2) in out
+    assert "N/A" not in out and "capped" not in out  # cap language gone from the surface
+    hist = dashboard.render_history("legacy", [r])
+    assert "L2" in hist
+
+
 def test_build_row_degrades_without_results(monkeypatch):
    # No results.json (e.g. an old run): grid still renders from Drone fields, level absent.
    monkeypatch.setattr(dashboard, "_results_for", lambda n: {})
--- a/tests/unit/test_level.py
+++ b/tests/unit/test_level.py
@ -1,8 +1,14 @@
-"""Unit tests for the Phase-3 level ladder (harness.level), plan-phase3-results-ux.md §4.1 / R1.
+"""Unit tests for the level ladder (harness.level) — phase lvl5 semantics.

-Pure function — no I/O. Proves the YunoHost gap-caps-the-level semantics, including the U0 gate
-acceptance: a recipe that climbs through L4 reports 4, and one that fails at L2 is capped at 1
-(the level just below the failed rung). Run cold with:  cc-ci-run -m pytest tests/unit/test_level.py -q
+Pure function — no I/O. Proves the operator-decided rule (plan-phase-lvl5-lint-rung.md,
+DECISIONS.md phase lvl5):
+
+    level = max i such that rung_i == "pass" and every rung j < i is "pass" or "skip"
+
+— a real FAIL blocks, an UNVERIFIED rung blocks exactly like a fail, an INTENTIONAL skip is
+climbed past. Includes the mission's four worked examples verbatim, and the old N/A cases
+(single-published-version recipe, non-backup-capable recipe) now climbing past their former
+caps. Run cold with:  cc-ci-run -m pytest tests/unit/test_level.py -q
 """

 from __future__ import annotations
@ -19,69 +25,115 @@ def _rungs(
    upgrade="pass",
    backup_restore="pass",
    functional="pass",
+    lint="pass",
 ):
    return {
        "install": install,
        "upgrade": upgrade,
        "backup_restore": backup_restore,
        "functional": functional,
+        "lint": lint,
    }


-# ---- the ladder: four essential rungs, top is L4 (functional) ----
+# ---- the ladder: five essential rungs, top is L5 (lint) ----


-def test_full_clean_climb_to_L4():
-    # All four essential rungs pass → L4 (the top; integration/recipe-local are optional, not leveled).
-    lvl, reason = L.compute_level(_rungs())
-    assert lvl == 4
-    assert reason == ""
+def test_full_clean_climb_is_L5():
+    assert L.compute_level(_rungs()) == 5


-def test_fails_at_L2_capped_at_L1():
-    # GATE: upgrade fails → capped at L1 even though higher rungs would pass.
-    lvl, reason = L.compute_level(_rungs(upgrade="fail", backup_restore="pass", functional="pass"))
-    assert lvl == 1
-    assert "L2" in reason and "FAILED" in reason
+def test_ladder_is_five_rungs_lint_on_top():
+    assert L.RUNGS == ("install", "upgrade", "backup_restore", "functional", "lint")
+    assert "lint" in L.RUNG_LABEL[5]


-# ---- L0 / install ----
+# ---- mission worked examples (operator Q&A 2026-06-11, verbatim) ----
+
+
+def test_mission_example_fail_blocks():
+    # install ✔, upgrade ✘, backup ✔, functional ✔, lint ✔ → level 1 (fail blocks).
+    assert L.compute_level(_rungs(upgrade="fail")) == 1
+
+
+def test_mission_example_intentional_skip_climbs():
+    # install ✔, upgrade ✔, backup skip (not capable), functional ✔, lint ✔ → level 5
+    # (previously capped at 2 — the confusing part the operator removed).
+    assert L.compute_level(_rungs(backup_restore="skip")) == 5
+
+
+def test_mission_example_unverified_blocks():
+    # install ✔, upgrade ✔, backup UNVER (harness error), functional ✔, lint ✔ → level 2
+    # (we cannot claim what we didn't check).
+    assert L.compute_level(_rungs(backup_restore="unver")) == 2
+
+
+def test_mission_example_unverified_top_rung_not_earned():
+    # all four ✔, lint unver (abra missing) → level 4.
+    assert L.compute_level(_rungs(lint="unver")) == 4
+
+
+# ---- blocking semantics ----


 def test_install_fail_is_L0():
-    lvl, reason = L.compute_level(_rungs(install="fail"))
-    assert lvl == 0
-    assert "L1" in reason and "FAILED" in reason
+    assert L.compute_level(_rungs(install="fail")) == 0


-# ---- gap-caps semantics: a higher pass can't rescue a lower gap ----
+def test_install_unver_is_L0():
+    assert L.compute_level(_rungs(install="unver")) == 0


-def test_higher_pass_does_not_rescue_lower_na():
-    # backup/restore N/A (stateless app) caps at L2 even though functional would pass.
-    lvl, reason = L.compute_level(_rungs(backup_restore="na", functional="pass"))
-    assert lvl == 2
-    assert "L3" in reason and "N/A" in reason
+def test_higher_pass_never_rescues_a_fail():
+    # everything above a failed rung is dead, however green.
+    assert L.compute_level(_rungs(upgrade="fail", backup_restore="pass", functional="pass")) == 1


-def test_upgrade_na_caps_at_L1():
-    # only one published version → no upgrade possible → N/A caps at L1 (upgrade is essential).
-    lvl, reason = L.compute_level(_rungs(upgrade="na"))
-    assert lvl == 1
-    assert "L2" in reason and "N/A" in reason
+def test_lint_fail_blocks_at_4():
+    assert L.compute_level(_rungs(lint="fail")) == 4


-def test_functional_na_caps_at_L3():
-    # no recipe-specific functional tests → functional N/A caps at L3.
-    lvl, reason = L.compute_level(_rungs(functional="na"))
-    assert lvl == 3
-    assert "L4" in reason and "N/A" in reason
+def test_unver_blocks_even_after_a_skip():
+    # skip at L2 is climbed past, but the unver at L3 still blocks → level 1.
+    assert L.compute_level(_rungs(upgrade="skip", backup_restore="unver")) == 1


-def test_functional_fail_caps_at_L3():
-    lvl, reason = L.compute_level(_rungs(functional="fail"))
-    assert lvl == 3
-    assert "L4" in reason and "FAILED" in reason
+# ---- intentional-skip climbing (the de-cap) ----
+
+
+def test_single_version_recipe_climbs_past_upgrade_skip():
+    # old rule: upgrade N/A capped at L1. New rule: skip is climbed past → full climb 5.
+    assert L.compute_level(_rungs(upgrade="skip")) == 5
+
+
+def test_stateless_recipe_climbs_past_backup_skip_to_lint():
+    assert L.compute_level(_rungs(upgrade="skip", backup_restore="skip")) == 5
+
+
+def test_skip_does_not_count_as_pass():
+    # ALL skips → nothing passed → level 0 (a skip climbs, but never earns).
+    assert (
+        L.compute_level(
+            _rungs(
+                install="skip",
+                upgrade="skip",
+                backup_restore="skip",
+                functional="skip",
+                lint="skip",
+            )
+        )
+        == 0
+    )
+
+
+def test_skip_then_pass_earns_the_higher_rung():
+    # skip at L4, pass at L5 → level 5 (the skip below doesn't stop the climb).
+    assert L.compute_level(_rungs(functional="skip")) == 5
+
+
+def test_trailing_skip_keeps_last_pass():
+    # passes up to L3, skips above → level stays 3 (skips never raise).
+    assert L.compute_level(_rungs(functional="skip", lint="skip")) == 3


 # ---- input validation ----
@ -89,7 +141,7 @@ def test_functional_fail_caps_at_L3():

 def test_invalid_status_raises():
    bad = _rungs()
-    bad["functional"] = "passed"  # not in the vocabulary
+    bad["functional"] = "na"  # the OLD vocabulary is no longer valid — every N/A is classified
    try:
        L.compute_level(bad)
    except ValueError:
@ -97,6 +149,16 @@ def test_invalid_status_raises():
    raise AssertionError("expected ValueError on invalid rung status")


+def test_missing_rung_raises():
+    bad = _rungs()
+    del bad["lint"]
+    try:
+        L.compute_level(bad)
+    except ValueError:
+        return
+    raise AssertionError("expected ValueError on missing rung")
+
+
 # ---- helpers: backup_restore_status ----


@ -104,8 +166,8 @@ def test_backup_restore_status_pass():
    assert L.backup_restore_status("pass", "pass", True) == "pass"


-def test_backup_restore_status_not_capable_is_na():
-    assert L.backup_restore_status("skip", "skip", False) == "na"
+def test_backup_restore_status_not_capable_is_intentional_skip():
+    assert L.backup_restore_status("skip", "skip", False) == "skip"


 def test_backup_restore_status_fail_on_either():
@ -113,16 +175,20 @@ def test_backup_restore_status_fail_on_either():
    assert L.backup_restore_status("fail", "pass", True) == "fail"


-def test_backup_restore_partial_is_na():
-    # backup-capable but restore didn't run cleanly (not pass, not fail) → cannot claim L3
-    assert L.backup_restore_status("pass", "skip", True) == "na"
+def test_backup_restore_partial_is_unverified():
+    # backup-capable but restore didn't run cleanly (not pass, not fail) → cannot claim L3,
+    # and the non-run is NOT intentional → unver (blocks the level above it).
+    assert L.backup_restore_status("pass", "skip", True) == "unver"
+    assert L.backup_restore_status(None, None, True) == "unver"


 # ---- helpers: tier_to_rung ----


-def test_tier_to_rung_mapping():
+def test_tier_to_rung_mapping_defaults_unverified():
    assert L.tier_to_rung("pass") == "pass"
    assert L.tier_to_rung("fail") == "fail"
-    assert L.tier_to_rung("skip") == "na"
-    assert L.tier_to_rung(None) == "na"
+    # no intentionality information here — a non-run is unver; derive_rungs upgrades to "skip"
+    # only on a declared/structural fact, never the other way.
+    assert L.tier_to_rung("skip") == "unver"
+    assert L.tier_to_rung(None) == "unver"
--- a/tests/unit/test_lint.py
+++ b/tests/unit/test_lint.py
@ -0,0 +1,196 @@
+"""Unit tests for the L5 lint executor (harness.lint) — phase lvl5.
+
+Covers the table parser + classifier against real abra-0.13 output shapes (probed on the CI
+host 2026-06-11, JOURNAL-lvl5), and run_lint's never-raise / never-silent-pass guarantees via
+a fake-PATH `script` shim (no real abra needed). Run cold:
+  cc-ci-run -m pytest tests/unit/test_lint.py -q
+"""
+
+from __future__ import annotations
+
+import os
+import stat
+import subprocess
+import sys
+
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner"))
+from harness import lint as L  # noqa: E402
+
+# Realistic abra lint table rows, as captured on cc-ci: abra renders HEAVY box-drawing
+# verticals (┃ U+2503) — the parser must match those, not just the light │.
+TABLE_OK = (
+    "┏━━━━━━┳━━━━━━┓\r\n"
+    "┃ R001 ┃ compose config has expected version         ┃ warn     ┃ ✅        ┃ -       ┃ ensure ┃\r\n"
+    "┃ R015 ┃ long secret names                           ┃ warn     ┃ ❌        ┃ -       ┃ reduce ┃\r\n"
+    "┃ R008 ┃ .env.sample provided                        ┃ error    ┃ ✅        ┃ -       ┃ create ┃\r\n"
+    "┃ R014 ┃ only annotated tags used for recipe version ┃ error    ┃ ✅        ┃ -       ┃ retag  ┃\r\n"
+    "┗━━━━━━┻━━━━━━┛\r\n"
+    "WARN secret session_secret is longer than 12 characters\r\n"
+)
+
+# The light-vertical variant must parse identically (defensive: abra theme/version drift).
+TABLE_OK_LIGHT = TABLE_OK.replace("┃", "│")
+
+TABLE_R014_FAIL = (
+    TABLE_OK.replace(
+        "┃ R014 ┃ only annotated tags used for recipe version ┃ error    ┃ ✅",
+        "┃ R014 ┃ only annotated tags used for recipe version ┃ error    ┃ ❌",
+    )
+    + "WARN critical errors present in hedgedoc config\r\n"
+)
+
+TABLE_SKIPPED_ERROR = TABLE_OK.replace(
+    "┃ R014 ┃ only annotated tags used for recipe version ┃ error    ┃ ✅        ┃ -       ┃",
+    "┃ R014 ┃ only annotated tags used for recipe version ┃ error    ┃ ❌        ┃ skipped ┃",
+)
+
+
+# ---- parse_table ----
+
+
+def test_parse_table_rows_and_marks():
+    rows = L.parse_table(TABLE_OK)
+    by = {r["rule"]: r for r in rows}
+    assert set(by) == {"R001", "R015", "R008", "R014"}
+    assert by["R001"]["severity"] == "warn" and by["R001"]["satisfied"]
+    assert by["R015"]["severity"] == "warn" and not by["R015"]["satisfied"]
+    assert by["R014"]["severity"] == "error" and by["R014"]["satisfied"]
+    assert not any(r["skipped"] for r in rows)
+
+
+def test_parse_table_strips_ansi():
+    rows = L.parse_table("\x1b[1m" + TABLE_OK + "\x1b[0m")
+    assert len(rows) == 4
+
+
+def test_parse_table_light_verticals_too():
+    assert L.parse_table(TABLE_OK_LIGHT) == L.parse_table(TABLE_OK)
+
+
+def test_parse_table_garbage_is_empty():
+    assert L.parse_table("FATA something exploded\r\n") == []
+    assert L.parse_table("") == []
+
+
+# ---- classify ----
+
+
+def test_classify_pass_with_warn_misses_only():
+    # warn-severity ❌ (R015) does NOT fail the rung — only error-severity rules do.
+    assert L.classify(0, TABLE_OK) == ("pass", "", [])
+
+
+def test_classify_error_rule_fails():
+    status, detail, failed = L.classify(0, TABLE_R014_FAIL)
+    assert status == "fail"
+    assert failed == ["R014"]
+    assert "R014" in detail
+
+
+def test_classify_skipped_error_rule_does_not_fail_but_sentinel_guards():
+    # a skipped error rule isn't counted as failed by the parser, but abra's own sentinel line
+    # (if present) still forces fail — the classifier never out-greens abra.
+    status, _, failed = L.classify(0, TABLE_SKIPPED_ERROR)
+    assert failed == []
+    assert status == "pass"
+    status2, detail2, _ = L.classify(
+        0, TABLE_SKIPPED_ERROR + "WARN critical errors present in x config\r\n"
+    )
+    assert status2 == "fail"
+    assert "critical errors" in detail2
+
+
+def test_classify_rc0_without_table_is_unver():
+    # rc=0 but nothing parseable → cannot claim pass.
+    assert L.classify(0, "weird output")[0] == "unver"
+
+
+def test_classify_content_fata_is_fail():
+    out = "FATA unable to validate recipe: .env.sample for x couldn't be read\r\n"
+    status, detail, _ = L.classify(1, out)
+    assert status == "fail"
+    assert "unable to validate recipe" in detail
+
+
+def test_classify_environment_fata_is_unver():
+    out = "FATA unable to fetch tags in /x: repository not found: Not found.\r\n"
+    status, detail, _ = L.classify(1, out)
+    assert status == "unver"
+    assert "fetch tags" in detail
+
+
+def test_classify_did_not_run_is_unver():
+    assert L.classify(None, "")[0] == "unver"
+
+
+# ---- run_lint: never raises, never silently passes ----
+
+
+def _mkrecipe(tmp_path):
+    repo = tmp_path / "abra" / "recipes" / "fakerec"
+    repo.mkdir(parents=True)
+    (repo / "compose.yml").write_text("version: '3.8'\n")
+    for cmd in (
+        ["git", "init", "-q"],
+        ["git", "add", "."],
+        ["git", "-c", "user.email=t@t", "-c", "user.name=t", "commit", "-qm", "x"],
+    ):
+        subprocess.run(cmd, cwd=repo, check=True)
+    return repo
+
+
+def _shim(tmp_path, body):
+    """Drop a fake `script` executable on PATH (run_lint invokes `script -qec "abra ..."`)."""
+    bindir = tmp_path / "bin"
+    bindir.mkdir(exist_ok=True)
+    sh = bindir / "script"
+    sh.write_text("#!/bin/sh\n" + body)
+    sh.chmod(sh.stat().st_mode | stat.S_IEXEC)
+    return str(bindir)
+
+
+def test_run_lint_pass_via_shim(tmp_path, monkeypatch):
+    _mkrecipe(tmp_path)
+    monkeypatch.setenv("ABRA_DIR", str(tmp_path / "abra"))
+    out = TABLE_OK.replace("\r\n", "\\n")
+    monkeypatch.setenv(
+        "PATH", _shim(tmp_path, f'printf "{out}"\nexit 0\n') + os.pathsep + os.environ["PATH"]
+    )
+    res = L.run_lint("fakerec", None, str(tmp_path / "artifacts"))
+    assert res["status"] == "pass"
+    txt = (tmp_path / "artifacts" / "lint.txt").read_text()
+    assert "abra recipe lint -n fakerec" in txt and "R001" in txt
+
+
+def test_run_lint_fail_via_shim(tmp_path, monkeypatch):
+    _mkrecipe(tmp_path)
+    monkeypatch.setenv("ABRA_DIR", str(tmp_path / "abra"))
+    out = TABLE_R014_FAIL.replace("\r\n", "\\n")
+    monkeypatch.setenv(
+        "PATH", _shim(tmp_path, f'printf "{out}"\nexit 0\n') + os.pathsep + os.environ["PATH"]
+    )
+    res = L.run_lint("fakerec", None, str(tmp_path / "artifacts"))
+    assert res["status"] == "fail"
+    assert res["rules_failed"] == ["R014"]
+
+
+def test_run_lint_missing_recipe_is_unver_not_raise(tmp_path, monkeypatch):
+    monkeypatch.setenv("ABRA_DIR", str(tmp_path / "abra-none"))
+    res = L.run_lint("no-such-recipe", None, str(tmp_path / "artifacts"))
+    assert res["status"] == "unver"
+    assert res["detail"]
+    # lint.txt still written with the failure context (loud, never silent)
+    assert (tmp_path / "artifacts" / "lint.txt").exists()
+
+
+def test_run_lint_abra_blowup_is_unver(tmp_path, monkeypatch):
+    _mkrecipe(tmp_path)
+    monkeypatch.setenv("ABRA_DIR", str(tmp_path / "abra"))
+    monkeypatch.setenv(
+        "PATH",
+        _shim(tmp_path, 'echo "FATA inappropriate ioctl for device"\nexit 1\n')
+        + os.pathsep
+        + os.environ["PATH"],
+    )
+    res = L.run_lint("fakerec", None, None)
+    assert res["status"] == "unver"
--- a/tests/unit/test_results.py
+++ b/tests/unit/test_results.py
@ -1,7 +1,8 @@
-"""Unit tests for Phase-3 results assembly (harness.results), plan-phase3-results-ux.md §4.2 / R1/R3.
+"""Unit tests for results assembly (harness.results) — phase lvl5 semantics.

-Covers JUnit parsing, stage roll-up, the tier→rung derivation (the documented mapping the level
-depends on), and full results.json assembly incl. the U0 gate cases. Pure / tmp-file only. Run cold:
+Covers JUnit parsing, stage roll-up, the tier→rung derivation (the SINGLE place every N/A source
+is classified intentional-skip vs unverified — the table in DECISIONS.md phase lvl5), the L5 lint
+rung wiring, and full results.json assembly. Pure / tmp-file only. Run cold:
  cc-ci-run -m pytest tests/unit/test_results.py -q
 """

@ -27,6 +28,8 @@ JUNIT_MIXED = """<?xml version="1.0"?>
 <testcase classname="tests.y" name="test_skipped" time="0"><skipped message="no deps"/></testcase>
 </testsuite></testsuites>"""

+LINT_PASS = {"status": "pass", "detail": "", "rules_failed": []}
+

 def _write(tmp_path, name, content):
    p = tmp_path / name
@ -90,7 +93,7 @@ def test_collect_stages_synthesizes_when_no_junit():
    assert len(stages[0]["tests"]) == 1


-# ---- derive_rungs: the documented mapping ----
+# ---- derive_rungs: the documented N/A-classification mapping (DECISIONS.md phase lvl5) ----


 def _results(**kw):
@ -105,34 +108,113 @@ def _results(**kw):
    return base


-def test_derive_rungs_full_climb_four_essential():
-    rungs = R.derive_rungs(_results(), backup_capable=True, has_custom=True)
-    # only the four essential rungs — integration/recipe-local are optional, not produced here.
+def test_derive_rungs_full_climb_five_rungs():
+    rungs = R.derive_rungs(
+        _results(), backup_capable=True, has_upgrade_target=True, lint_status="pass"
+    )
+    # the five essential rungs — integration/recipe-local are optional, not produced here.
    assert rungs == {
        "install": "pass",
        "upgrade": "pass",
        "backup_restore": "pass",
        "functional": "pass",
+        "lint": "pass",
    }


-def test_derive_rungs_stateless_backup_and_functional_na():
+def test_derive_rungs_structural_skips_are_intentional():
+    # single published version (tier skipped, no upgrade target) + not backup-capable →
+    # both rungs are INTENTIONAL skips, not unverified.
    rungs = R.derive_rungs(
-        _results(backup="skip", restore="skip", custom="skip"),
+        _results(upgrade="skip", backup="skip", restore="skip"),
        backup_capable=False,
-        has_custom=False,
+        has_upgrade_target=False,
+        lint_status="pass",
    )
-    assert rungs["backup_restore"] == "na"
-    assert rungs["functional"] == "na"
+    assert rungs["upgrade"] == "skip"
+    assert rungs["backup_restore"] == "skip"
    assert "integration" not in rungs and "recipe_local" not in rungs


-def test_derive_rungs_functional_fail():
-    rungs = R.derive_rungs(_results(custom="fail"), backup_capable=True, has_custom=True)
+def test_derive_rungs_upgrade_skip_with_target_is_unverified():
+    # the tier skipped although an upgrade target exists (e.g. install failed → downstream
+    # skipped): NOT structural → unver.
+    rungs = R.derive_rungs(
+        _results(install="fail", upgrade="skip", backup="skip", restore="skip", custom="skip"),
+        backup_capable=True,
+        has_upgrade_target=True,
+        lint_status="pass",
+    )
+    assert rungs["install"] == "fail"
+    assert rungs["upgrade"] == "unver"
+    assert rungs["backup_restore"] == "unver"
+    assert rungs["functional"] == "unver"
+
+
+def test_derive_rungs_missing_tier_is_unverified():
+    # a tier excluded from the run entirely (dev CCCI_STAGES escape) → no result key → unver,
+    # never an intentional skip (the recipe didn't declare anything).
+    res = {"install": "pass"}
+    rungs = R.derive_rungs(res, backup_capable=True, has_upgrade_target=True, lint_status="pass")
+    assert rungs["upgrade"] == "unver"
+    assert rungs["backup_restore"] == "unver"
+    assert rungs["functional"] == "unver"
+
+
+def test_derive_rungs_expected_na_declares_intentional():
+    # EXPECTED_NA turns a non-run rung into an intentional skip (declared source).
+    rungs = R.derive_rungs(
+        _results(custom="skip"),
+        backup_capable=True,
+        has_upgrade_target=True,
+        expected_na={"functional": "no functional surface"},
+        lint_status="pass",
+    )
+    assert rungs["functional"] == "skip"
+
+
+def test_derive_rungs_no_custom_tests_defaults_unverified():
+    # absent functional coverage with NO declaration is a gap → unver (conservative default).
+    rungs = R.derive_rungs(
+        _results(custom="skip"), backup_capable=True, has_upgrade_target=True, lint_status="pass"
+    )
+    assert rungs["functional"] == "unver"
+
+
+def test_derive_rungs_expected_na_never_overrides_a_real_result():
+    # a declaration cannot soften an exercised rung: fail stays fail.
+    rungs = R.derive_rungs(
+        _results(custom="fail"),
+        backup_capable=True,
+        has_upgrade_target=True,
+        expected_na={"functional": "declared"},
+        lint_status="pass",
+    )
    assert rungs["functional"] == "fail"


-# ---- build_results: end-to-end incl level + flags ----
+def test_derive_rungs_lint_never_skips():
+    # lint has NO intentional-skip escape hatch: pass/fail from the executor, anything else
+    # (None, "unver", junk) → unver — even if a recipe tries to declare it away.
+    for status, want in (("pass", "pass"), ("fail", "fail"), ("unver", "unver"), (None, "unver")):
+        rungs = R.derive_rungs(
+            _results(),
+            backup_capable=True,
+            has_upgrade_target=True,
+            expected_na={"lint": "nope"},
+            lint_status=status,
+        )
+        assert rungs["lint"] == want, status
+
+
+def test_derive_rungs_functional_fail():
+    rungs = R.derive_rungs(
+        _results(custom="fail"), backup_capable=True, has_upgrade_target=True, lint_status="pass"
+    )
+    assert rungs["functional"] == "fail"
+
+
+# ---- build_results: end-to-end incl level + lint + flags ----


 def test_build_results_level_and_flags(tmp_path):
@ -163,17 +245,75 @@ def test_build_results_level_and_flags(tmp_path):
        clean_teardown=True,
        no_secret_leak=True,
        finished_ts=1234.0,
+        lint=LINT_PASS,
    )
-    # all four essential rungs pass → full climb to L4 (the top), no cap
-    assert data["level"] == 4
-    assert data["level_cap_reason"] == ""
+    # all five essential rungs pass → full climb to L5; no cap concept anywhere.
+    assert data["schema"] == 2
+    assert data["level"] == 5
+    assert "level_cap_reason" not in data and "level_cap_rung" not in data
    assert data["recipe"] == "hedgedoc"
    assert data["ref"] == "deadbeefcafe"
    assert data["flags"] == {"clean_teardown": True, "no_secret_leak": True}
-    assert [s["name"] for s in data["stages"]] == ["install", "custom"]
+    # lint appears as a synthetic stage so the card's table carries all five rungs.
+    assert [s["name"] for s in data["stages"]] == ["install", "custom", "lint"]
+    assert data["lint"] == {"status": "pass", "detail": "", "rules_failed": []}


-def test_build_results_capped_at_L1_on_upgrade_fail(tmp_path):
+def test_build_results_lint_fail_blocks_at_4(tmp_path):
+    recs = [
+        {
+            "tier": "install",
+            "source": "generic",
+            "file": "g/test_install.py",
+            "rc": 0,
+            "junit": _write(tmp_path, "i.xml", JUNIT_PASS),
+        }
+    ]
+    data = R.build_results(
+        recipe="x",
+        version=None,
+        pr="0",
+        ref=None,
+        records=recs,
+        results=_results(),
+        backup_capable=True,
+        clean_teardown=True,
+        no_secret_leak=True,
+        finished_ts=0.0,
+        lint={
+            "status": "fail",
+            "detail": "error rule(s) unsatisfied: R014",
+            "rules_failed": ["R014"],
+        },
+    )
+    assert data["level"] == 4
+    assert data["rungs"]["lint"] == "fail"
+    assert data["lint"]["rules_failed"] == ["R014"]
+    lint_stage = [s for s in data["stages"] if s["name"] == "lint"][0]
+    assert lint_stage["status"] == "fail"
+    assert "R014" in lint_stage["tests"][0]["message"]
+
+
+def test_build_results_no_lint_given_is_unverified_never_pass(tmp_path):
+    # an old/lint-less caller must NEVER get a free L5: the rung derives as unver → level 4 max.
+    data = R.build_results(
+        recipe="x",
+        version=None,
+        pr="0",
+        ref=None,
+        records=[],
+        results=_results(),
+        backup_capable=True,
+        clean_teardown=True,
+        no_secret_leak=True,
+        finished_ts=0.0,
+    )
+    assert data["rungs"]["lint"] == "unver"
+    assert data["level"] == 4
+    assert "lint" in data["skips"]["unintentional"]
+
+
+def test_build_results_level1_on_upgrade_fail(tmp_path):
    recs = [
        {
            "tier": "install",
@ -194,12 +334,13 @@ def test_build_results_capped_at_L1_on_upgrade_fail(tmp_path):
        clean_teardown=True,
        no_secret_leak=True,
        finished_ts=0.0,
+        lint=LINT_PASS,
    )
    assert data["level"] == 1
-    assert "L2" in data["level_cap_reason"]
+    assert data["rungs"]["upgrade"] == "fail"


-# ---- skips: intentional (declared) vs unintentional (everything else skipped) ----
+# ---- skips: intentional (declared/structural, with reason) vs unintentional (= unver) ----


 def _rungs(**kw):
@ -208,24 +349,26 @@ def _rungs(**kw):
        "upgrade": "pass",
        "backup_restore": "pass",
        "functional": "pass",
+        "lint": "pass",
    }
    base.update(kw)
    return base


-def test_skips_intentional_vs_unintentional():
-    rungs = _rungs(backup_restore="na", functional="na")
+def test_skips_declared_reason_and_unverified_split():
+    rungs = _rungs(backup_restore="skip", functional="unver")
    sk = R.skips(rungs, {"backup_restore": "stateless static server"})
-    # backup_restore is declared (intentional, with reason); functional skipped but not declared.
    assert sk["intentional"] == {"backup_restore": "stateless static server"}
    assert sk["unintentional"] == ["functional"]


-def test_skips_none_declared_all_unintentional():
-    rungs = _rungs(backup_restore="na")
+def test_skips_structural_reason_when_undeclared():
+    # a structural skip (derive_rungs) carries its structural reason even without EXPECTED_NA.
+    rungs = _rungs(upgrade="skip", backup_restore="skip")
    sk = R.skips(rungs, None)
-    assert sk["intentional"] == {}
-    assert sk["unintentional"] == ["backup_restore"]
+    assert "only one published version" in sk["intentional"]["upgrade"]
+    assert "not backup-capable" in sk["intentional"]["backup_restore"]
+    assert sk["unintentional"] == []


 def test_skips_declaration_only_counts_when_actually_skipped():
@ -236,9 +379,9 @@ def test_skips_declaration_only_counts_when_actually_skipped():
    assert "backup_restore" not in sk["unintentional"]


-def test_build_results_threads_expected_na(tmp_path):
-    # Mirrors custom-html-tiny post-change: install + a passing functional (custom) test, but no
-    # backup surface (backup_restore declared intentionally skipped).
+def test_build_results_stateless_recipe_climbs(tmp_path):
+    # custom-html-tiny shape: no backup surface (declared), single published version, passing
+    # functional — formerly capped at L2 by the N/A; now climbs to L5 (the de-cap, mission §2).
    recs = [
        {
            "tier": "install",
@ -261,23 +404,47 @@ def test_build_results_threads_expected_na(tmp_path):
        pr="0",
        ref=None,
        records=recs,
-        results=_results(backup="skip", restore="skip"),  # custom=pass (default) → functional pass
-        backup_capable=False,  # no backupbot label → backup_restore skipped (N/A)
+        results=_results(upgrade="skip", backup="skip", restore="skip"),
+        backup_capable=False,  # no backupbot label → structural intentional skip
+        has_upgrade_target=False,  # single published version → structural intentional skip
        clean_teardown=True,
        no_secret_leak=True,
        finished_ts=0.0,
+        lint=LINT_PASS,
        expected_na={"backup_restore": "stateless static file server"},
    )
-    # backup_restore skip still caps at L2 (never inflates) — even though functional passes above it,
-    # the skip caps the climb — but it's the declared (intentional) rung that capped.
-    assert data["level"] == 2
-    assert "L3" in data["level_cap_reason"]
-    assert data["level_cap_rung"] == "backup_restore"
-    assert data["rungs"]["functional"] == "pass"
+    assert data["level"] == 5  # skips are climbed past; nothing was inflated to get here
+    assert data["rungs"] == {
+        "install": "pass",
+        "upgrade": "skip",
+        "backup_restore": "skip",
+        "functional": "pass",
+        "lint": "pass",
+    }
    assert data["skips"]["intentional"]["backup_restore"] == "stateless static file server"
-    assert (
-        data["skips"]["unintentional"] == []
-    )  # backup_restore declared; functional passed → clean
+    assert "only one published version" in data["skips"]["intentional"]["upgrade"]
+    assert data["skips"]["unintentional"] == []
+
+
+def test_build_results_unverified_backup_blocks(tmp_path):
+    # synthesized tier abort: backup-capable but the tiers never produced a result → unver → the
+    # level stays below the unverified rung (mission worked example #3).
+    data = R.build_results(
+        recipe="x",
+        version=None,
+        pr="0",
+        ref=None,
+        records=[],
+        results=_results(backup="skip", restore="skip"),
+        backup_capable=True,
+        clean_teardown=True,
+        no_secret_leak=True,
+        finished_ts=0.0,
+        lint=LINT_PASS,
+    )
+    assert data["rungs"]["backup_restore"] == "unver"
+    assert data["level"] == 2
+    assert data["skips"]["unintentional"] == ["backup_restore"]


 def test_build_results_threads_customization(tmp_path):
@ -310,6 +477,7 @@ def test_build_results_threads_customization(tmp_path):
        "clean_teardown": True,
        "no_secret_leak": True,
        "finished_ts": 0.0,
+        "lint": LINT_PASS,
    }
    assert R.build_results(**kwargs, customization=cust)["customization"] == cust
    assert R.build_results(**kwargs)["customization"] is None