Revert "feat(lvl5): P1 — 5-rung ladder (L5=abra recipe lint) + de-capped level semantics"

This reverts commit e219a7891d.
2026-06-11 07:46:57 +00:00
parent 589943f46e
commit cd62743055
12 changed files with 336 additions and 1065 deletions
--- a/tests/unit/test_card.py
+++ b/tests/unit/test_card.py
@ -1,11 +1,8 @@
-"""Unit tests for the pure card/badge renderers (harness.card) — phase lvl5 semantics.
+"""Unit tests for the pure card/badge renderers (harness.card), Phase 3 U2 (R3/R6).

-Covers the deterministic HTML + SVG string builders (the PNG step needs Playwright + is exercised
-live). The cardinal check: the card REPORTS the data verbatim — level/marks come straight from the
-dict, never recomputed — the badge is NUMBER + COLOUR ONLY, and the per-rung table rows (incl.
-intentional-skip / unverified) are the sole carrier of "why isn't the level higher". Old schema-1
-artifacts (4-rung ladder, cap fields present) must render without error and without relabeling.
-Run cold:  cc-ci-run -m pytest tests/unit/test_card.py -q
+Covers the deterministic HTML + SVG string builders (the PNG step needs Playwright + is exercised in
+the U2 live demo). The cardinal check: the card REPORTS the data verbatim — level/marks come straight
+from the dict, never recomputed. Run cold:  cc-ci-run -m pytest tests/unit/test_card.py -q
 """

 from __future__ import annotations
@ -17,19 +14,12 @@ sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner")
 from harness import card as C  # noqa: E402


-def _data(level=5, **kw):
-    d = {
-        "schema": 2,
+def _data(level=3, cap="L4 functional (recipe-specific tests) N/A"):
+    return {
        "recipe": "uptime-kuma",
        "version": "1.23.0",
        "level": level,
-        "rungs": {
-            "install": "pass",
-            "upgrade": "pass",
-            "backup_restore": "pass",
-            "functional": "pass",
-            "lint": "pass",
-        },
+        "level_cap_reason": cap,
        "flags": {"clean_teardown": True, "no_secret_leak": True},
        "screenshot": "screenshot.png",
        "stages": [
@ -46,54 +36,46 @@ def _data(level=5, **kw):
                    {"name": "test_broken", "status": "fail", "ms": 5},
                ],
            },
-            {
-                "name": "lint",
-                "status": "pass",
-                "tests": [{"name": "abra recipe lint", "status": "pass", "ms": 0}],
-            },
        ],
    }
-    d.update(kw)
-    return d


 def test_level_color_ramp():
-    # 0 (red) … 5 (bright green — full 5-rung climb); unknown → grey.
-    assert C.level_color(0) != C.level_color(5)
-    assert C.level_color(5) == "#3fb950"
-    assert C.level_color(99) == "#8b949e"
+    assert C.level_color(0) != C.level_color(6)
+    assert C.level_color(6) == "#3fb950"
+    assert C.level_color(99) == "#8b949e"  # unknown → grey


-def test_badge_svg_is_number_and_color_only():
+def test_badge_svg_wellformed():
    svg = C.level_badge_svg(4)
    assert svg.startswith("<svg") and svg.endswith("</svg>")
    assert "level 4" in svg
    assert C.level_color(4) in svg
-    # operator-specified (phase lvl5): NOTHING but the level on the badge — no annotation
-    # segment of any kind, whatever the rung situation.
-    assert "expected" not in svg and "gap?" not in svg and "skip" not in svg
+    # plain cap (no intent) → two-box badge, no third segment
+    assert "expected" not in svg and "gap?" not in svg


-def test_badge_svg_level5():
-    svg = C.level_badge_svg(5)
-    assert "level 5" in svg and "#3fb950" in svg
+def test_badge_svg_differentiates_intentional_vs_unintentional_skip():
+    # an intentional (declared) skip capped the climb → muted "expected" third segment
+    exp = C.level_badge_svg(2, "L3 backup/restore N/A", "intentional")
+    assert "level 2" in exp and "expected" in exp and C.EXPECT_COLOR in exp
+    assert "gap?" not in exp
+    # an unintentional skip (not declared) → amber "gap?" third segment
+    gap = C.level_badge_svg(2, "L3 backup/restore N/A", "unintentional")
+    assert "level 2" in gap and "gap?" in gap and C.GAP_COLOR in gap
+    assert "expected" not in gap


-def test_skip_rows_intentional_and_unverified():
+def test_skip_rows_intentional_and_unintentional():
    html_out = C._skip_rows(
        {"intentional": {"backup_restore": "no persistent data"}, "unintentional": ["functional"]}
    )
    # intentional skip: labelled row (muted green) + the reason on its own line
    assert "intentional skip" in html_out and C.SKIP_GREEN in html_out
    assert "backup/restore" in html_out and "no persistent data" in html_out
-    # unverified rung: amber row + the blocks-the-level explanation
-    assert "unverified" in html_out and C.GAP_COLOR in html_out
-    assert "functional" in html_out and "cannot rise above" in html_out
-
-
-def test_skip_rows_lint_label_known():
-    html_out = C._skip_rows({"intentional": {}, "unintentional": ["lint"]})
-    assert ">lint<" in html_out.replace("</b>", "<")  # rung label renders, not a KeyError
+    # unintentional skip: amber row + prompt to declare/add coverage
+    assert "unintentional skip" in html_out and C.GAP_COLOR in html_out
+    assert "functional" in html_out and "EXPECTED_NA" in html_out


 def test_skip_rows_empty_when_no_skips():
@ -101,68 +83,22 @@ def test_skip_rows_empty_when_no_skips():


 def test_card_html_reports_level_verbatim():
-    html = C.render_card_html(_data(level=2))
+    html = C.render_card_html(_data(level=2, cap="L3 backup/restore (data integrity) N/A"))
    assert "uptime-kuma" in html
    assert "1.23.0" in html
    # the level shown is exactly what was passed (no recompute)
    assert ">2<" in html
-    assert "level 2 of 5" in html
+    assert "L3 backup/restore" in html
    assert C.level_color(2) in html


-def test_card_html_no_cap_language():
-    html = C.render_card_html(_data())
-    assert "capped" not in html and "cap_reason" not in html
-    assert "level 5 of 5" in html
-
-
-def test_card_html_old_schema1_artifact_renders():
-    # history compatibility: a pre-lvl5 results.json (4-rung ladder, cap fields, "na" statuses)
-    # renders without KeyError and shows ITS OWN ladder height (no retroactive relabeling).
-    old = {
-        "schema": 1,
-        "recipe": "legacy",
-        "version": "0.9",
-        "level": 4,
-        "level_cap_reason": "",
-        "level_cap_rung": None,
-        "rungs": {
-            "install": "pass",
-            "upgrade": "pass",
-            "backup_restore": "pass",
-            "functional": "pass",
-        },
-        "skips": {"intentional": {}, "unintentional": []},
-        "flags": {"clean_teardown": True, "no_secret_leak": True},
-        "screenshot": None,
-        "stages": [],
-    }
-    html = C.render_card_html(old)
-    assert "legacy" in html
-    assert "level 4 of 4" in html  # the old top, not 5
-    assert "capped" not in html
-
-
-def test_card_html_shows_stage_and_test_marks_incl_lint():
+def test_card_html_shows_stage_and_test_marks():
    html = C.render_card_html(_data())
    assert "install" in html and "custom" in html
-    assert "abra recipe lint" in html
    assert "test_serving" in html and "test_broken" in html
    assert C.STATUS_MARK["pass"] in html and C.STATUS_MARK["fail"] in html


-def test_card_html_unver_stage_mark_renders():
-    d = _data()
-    d["stages"][2] = {
-        "name": "lint",
-        "status": "unver",
-        "tests": [{"name": "abra recipe lint", "status": "unver", "ms": 0, "message": "timed out"}],
-    }
-    html = C.render_card_html(d)
-    assert C.STATUS_MARK["unver"] in html
-    assert C.STATUS_COLOR["unver"] in html
-
-
 def test_card_html_flags_rendered():
    html = C.render_card_html(_data())
    assert "clean teardown" in html and "no secret leak" in html
--- a/tests/unit/test_dashboard.py
+++ b/tests/unit/test_dashboard.py
@ -28,6 +28,7 @@ def _row(**kw):
        "ref": "db9a9502",
        "version": "db9a95024e9d",
        "level": 4,
+        "level_cap_reason": "",
        "has_screenshot": True,
        "flags": {"clean_teardown": True, "no_secret_leak": True},
        "finished": 0,
@ -39,7 +40,7 @@ def _row(**kw):

 def test_level_color_ramp_and_fallback():
    assert dashboard.level_color(0) == "#e5534b"
-    assert dashboard.level_color(5) == "#3fb950"  # full 5-rung climb (phase lvl5)
+    assert dashboard.level_color(6) == "#3fb950"
    assert dashboard.level_color(4) == "#a0b93f"
    assert dashboard.level_color(99) == "#8b949e"
    assert dashboard.level_color(None) == "#8b949e"
@ -60,12 +61,20 @@ def test_overview_grid_mirrors_results():
 def test_overview_never_greener_than_data():
    # A failed run at level 0 must show level 0 + the failure pill — never a green/high level.
    out = dashboard.render_overview(
-        [_row(status="failure", level=0, has_screenshot=False, flags={})]
+        [
+            _row(
+                status="failure",
+                level=0,
+                has_screenshot=False,
+                flags={},
+                level_cap_reason="L1 install FAILED",
+            )
+        ]
    )
    assert "level 0" in out
    assert dashboard.level_color(0) in out  # red
    assert dashboard._COLORS["failure"] in out
-    assert "level 4" not in out and "level 5" not in out
+    assert "level 4" not in out and "level 5" not in out and "level 6" not in out
    assert "no screenshot" in out  # placeholder, no broken image


@ -95,6 +104,7 @@ def test_build_row_projects_results(monkeypatch):
        lambda n: {
            "version": "1.2.3",
            "level": 2,
+            "level_cap_reason": "cap",
            "screenshot": "screenshot.png",
            "flags": {"clean_teardown": True},
        },
@ -113,38 +123,6 @@ def test_build_row_projects_results(monkeypatch):
    assert r["url"].endswith("/cc-ci/7")


-def test_build_row_old_schema1_artifact_renders(monkeypatch):
-    # History compatibility (phase lvl5): pre-lvl5 results.json still carries cap fields and a
-    # 4-rung ladder — it must project + render without KeyError, level shown VERBATIM (no
-    # retroactive relabeling), and the old cap text simply isn't resurfaced anywhere.
-    monkeypatch.setattr(
-        dashboard,
-        "_results_for",
-        lambda n: {
-            "schema": 1,
-            "version": "0.9.1",
-            "level": 2,
-            "level_cap_reason": "L3 backup/restore (data integrity) N/A",
-            "level_cap_rung": "backup_restore",
-            "screenshot": "screenshot.png",
-            "flags": {"clean_teardown": True, "no_secret_leak": True},
-        },
-    )
-    b = {
-        "number": 11,
-        "status": "success",
-        "event": "custom",
-        "params": {"RECIPE": "legacy", "REF": "abc123"},
-        "finished": 5,
-    }
-    r = dashboard._build_row(b)
-    out = dashboard.render_overview([r])
-    assert "level 2" in out and dashboard.level_color(2) in out
-    assert "N/A" not in out and "capped" not in out  # cap language gone from the surface
-    hist = dashboard.render_history("legacy", [r])
-    assert "L2" in hist
-
-
 def test_build_row_degrades_without_results(monkeypatch):
    # No results.json (e.g. an old run): grid still renders from Drone fields, level absent.
    monkeypatch.setattr(dashboard, "_results_for", lambda n: {})
--- a/tests/unit/test_level.py
+++ b/tests/unit/test_level.py
@ -1,14 +1,8 @@
-"""Unit tests for the level ladder (harness.level) — phase lvl5 semantics.
+"""Unit tests for the Phase-3 level ladder (harness.level), plan-phase3-results-ux.md §4.1 / R1.

-Pure function — no I/O. Proves the operator-decided rule (plan-phase-lvl5-lint-rung.md,
-DECISIONS.md phase lvl5):
-
-    level = max i such that rung_i == "pass" and every rung j < i is "pass" or "skip"
-
-— a real FAIL blocks, an UNVERIFIED rung blocks exactly like a fail, an INTENTIONAL skip is
-climbed past. Includes the mission's four worked examples verbatim, and the old N/A cases
-(single-published-version recipe, non-backup-capable recipe) now climbing past their former
-caps. Run cold with:  cc-ci-run -m pytest tests/unit/test_level.py -q
+Pure function — no I/O. Proves the YunoHost gap-caps-the-level semantics, including the U0 gate
+acceptance: a recipe that climbs through L4 reports 4, and one that fails at L2 is capped at 1
+(the level just below the failed rung). Run cold with:  cc-ci-run -m pytest tests/unit/test_level.py -q
 """

 from __future__ import annotations
@ -25,115 +19,69 @@ def _rungs(
    upgrade="pass",
    backup_restore="pass",
    functional="pass",
-    lint="pass",
 ):
    return {
        "install": install,
        "upgrade": upgrade,
        "backup_restore": backup_restore,
        "functional": functional,
-        "lint": lint,
    }


-# ---- the ladder: five essential rungs, top is L5 (lint) ----
+# ---- the ladder: four essential rungs, top is L4 (functional) ----


-def test_full_clean_climb_is_L5():
-    assert L.compute_level(_rungs()) == 5
+def test_full_clean_climb_to_L4():
+    # All four essential rungs pass → L4 (the top; integration/recipe-local are optional, not leveled).
+    lvl, reason = L.compute_level(_rungs())
+    assert lvl == 4
+    assert reason == ""


-def test_ladder_is_five_rungs_lint_on_top():
-    assert L.RUNGS == ("install", "upgrade", "backup_restore", "functional", "lint")
-    assert "lint" in L.RUNG_LABEL[5]
+def test_fails_at_L2_capped_at_L1():
+    # GATE: upgrade fails → capped at L1 even though higher rungs would pass.
+    lvl, reason = L.compute_level(_rungs(upgrade="fail", backup_restore="pass", functional="pass"))
+    assert lvl == 1
+    assert "L2" in reason and "FAILED" in reason


-# ---- mission worked examples (operator Q&A 2026-06-11, verbatim) ----
-
-
-def test_mission_example_fail_blocks():
-    # install ✔, upgrade ✘, backup ✔, functional ✔, lint ✔ → level 1 (fail blocks).
-    assert L.compute_level(_rungs(upgrade="fail")) == 1
-
-
-def test_mission_example_intentional_skip_climbs():
-    # install ✔, upgrade ✔, backup skip (not capable), functional ✔, lint ✔ → level 5
-    # (previously capped at 2 — the confusing part the operator removed).
-    assert L.compute_level(_rungs(backup_restore="skip")) == 5
-
-
-def test_mission_example_unverified_blocks():
-    # install ✔, upgrade ✔, backup UNVER (harness error), functional ✔, lint ✔ → level 2
-    # (we cannot claim what we didn't check).
-    assert L.compute_level(_rungs(backup_restore="unver")) == 2
-
-
-def test_mission_example_unverified_top_rung_not_earned():
-    # all four ✔, lint unver (abra missing) → level 4.
-    assert L.compute_level(_rungs(lint="unver")) == 4
-
-
-# ---- blocking semantics ----
+# ---- L0 / install ----


 def test_install_fail_is_L0():
-    assert L.compute_level(_rungs(install="fail")) == 0
+    lvl, reason = L.compute_level(_rungs(install="fail"))
+    assert lvl == 0
+    assert "L1" in reason and "FAILED" in reason


-def test_install_unver_is_L0():
-    assert L.compute_level(_rungs(install="unver")) == 0
+# ---- gap-caps semantics: a higher pass can't rescue a lower gap ----


-def test_higher_pass_never_rescues_a_fail():
-    # everything above a failed rung is dead, however green.
-    assert L.compute_level(_rungs(upgrade="fail", backup_restore="pass", functional="pass")) == 1
+def test_higher_pass_does_not_rescue_lower_na():
+    # backup/restore N/A (stateless app) caps at L2 even though functional would pass.
+    lvl, reason = L.compute_level(_rungs(backup_restore="na", functional="pass"))
+    assert lvl == 2
+    assert "L3" in reason and "N/A" in reason


-def test_lint_fail_blocks_at_4():
-    assert L.compute_level(_rungs(lint="fail")) == 4
+def test_upgrade_na_caps_at_L1():
+    # only one published version → no upgrade possible → N/A caps at L1 (upgrade is essential).
+    lvl, reason = L.compute_level(_rungs(upgrade="na"))
+    assert lvl == 1
+    assert "L2" in reason and "N/A" in reason


-def test_unver_blocks_even_after_a_skip():
-    # skip at L2 is climbed past, but the unver at L3 still blocks → level 1.
-    assert L.compute_level(_rungs(upgrade="skip", backup_restore="unver")) == 1
+def test_functional_na_caps_at_L3():
+    # no recipe-specific functional tests → functional N/A caps at L3.
+    lvl, reason = L.compute_level(_rungs(functional="na"))
+    assert lvl == 3
+    assert "L4" in reason and "N/A" in reason


-# ---- intentional-skip climbing (the de-cap) ----
-
-
-def test_single_version_recipe_climbs_past_upgrade_skip():
-    # old rule: upgrade N/A capped at L1. New rule: skip is climbed past → full climb 5.
-    assert L.compute_level(_rungs(upgrade="skip")) == 5
-
-
-def test_stateless_recipe_climbs_past_backup_skip_to_lint():
-    assert L.compute_level(_rungs(upgrade="skip", backup_restore="skip")) == 5
-
-
-def test_skip_does_not_count_as_pass():
-    # ALL skips → nothing passed → level 0 (a skip climbs, but never earns).
-    assert (
-        L.compute_level(
-            _rungs(
-                install="skip",
-                upgrade="skip",
-                backup_restore="skip",
-                functional="skip",
-                lint="skip",
-            )
-        )
-        == 0
-    )
-
-
-def test_skip_then_pass_earns_the_higher_rung():
-    # skip at L4, pass at L5 → level 5 (the skip below doesn't stop the climb).
-    assert L.compute_level(_rungs(functional="skip")) == 5
-
-
-def test_trailing_skip_keeps_last_pass():
-    # passes up to L3, skips above → level stays 3 (skips never raise).
-    assert L.compute_level(_rungs(functional="skip", lint="skip")) == 3
+def test_functional_fail_caps_at_L3():
+    lvl, reason = L.compute_level(_rungs(functional="fail"))
+    assert lvl == 3
+    assert "L4" in reason and "FAILED" in reason


 # ---- input validation ----
@ -141,7 +89,7 @@ def test_trailing_skip_keeps_last_pass():

 def test_invalid_status_raises():
    bad = _rungs()
-    bad["functional"] = "na"  # the OLD vocabulary is no longer valid — every N/A is classified
+    bad["functional"] = "passed"  # not in the vocabulary
    try:
        L.compute_level(bad)
    except ValueError:
@ -149,16 +97,6 @@ def test_invalid_status_raises():
    raise AssertionError("expected ValueError on invalid rung status")


-def test_missing_rung_raises():
-    bad = _rungs()
-    del bad["lint"]
-    try:
-        L.compute_level(bad)
-    except ValueError:
-        return
-    raise AssertionError("expected ValueError on missing rung")
-
-
 # ---- helpers: backup_restore_status ----


@ -166,8 +104,8 @@ def test_backup_restore_status_pass():
    assert L.backup_restore_status("pass", "pass", True) == "pass"


-def test_backup_restore_status_not_capable_is_intentional_skip():
-    assert L.backup_restore_status("skip", "skip", False) == "skip"
+def test_backup_restore_status_not_capable_is_na():
+    assert L.backup_restore_status("skip", "skip", False) == "na"


 def test_backup_restore_status_fail_on_either():
@ -175,20 +113,16 @@ def test_backup_restore_status_fail_on_either():
    assert L.backup_restore_status("fail", "pass", True) == "fail"


-def test_backup_restore_partial_is_unverified():
-    # backup-capable but restore didn't run cleanly (not pass, not fail) → cannot claim L3,
-    # and the non-run is NOT intentional → unver (blocks the level above it).
-    assert L.backup_restore_status("pass", "skip", True) == "unver"
-    assert L.backup_restore_status(None, None, True) == "unver"
+def test_backup_restore_partial_is_na():
+    # backup-capable but restore didn't run cleanly (not pass, not fail) → cannot claim L3
+    assert L.backup_restore_status("pass", "skip", True) == "na"


 # ---- helpers: tier_to_rung ----


-def test_tier_to_rung_mapping_defaults_unverified():
+def test_tier_to_rung_mapping():
    assert L.tier_to_rung("pass") == "pass"
    assert L.tier_to_rung("fail") == "fail"
-    # no intentionality information here — a non-run is unver; derive_rungs upgrades to "skip"
-    # only on a declared/structural fact, never the other way.
-    assert L.tier_to_rung("skip") == "unver"
-    assert L.tier_to_rung(None) == "unver"
+    assert L.tier_to_rung("skip") == "na"
+    assert L.tier_to_rung(None) == "na"
--- a/tests/unit/test_lint.py
+++ b/tests/unit/test_lint.py
@ -1,188 +0,0 @@
-"""Unit tests for the L5 lint executor (harness.lint) — phase lvl5.
-
-Covers the table parser + classifier against real abra-0.13 output shapes (probed on the CI
-host 2026-06-11, JOURNAL-lvl5), and run_lint's never-raise / never-silent-pass guarantees via
-a fake-PATH `script` shim (no real abra needed). Run cold:
-  cc-ci-run -m pytest tests/unit/test_lint.py -q
-"""
-
-from __future__ import annotations
-
-import os
-import stat
-import subprocess
-import sys
-
-sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner"))
-from harness import lint as L  # noqa: E402
-
-# Realistic abra lint table rows (unicode box drawing, ✅/❌ marks), as captured on cc-ci.
-TABLE_OK = (
-    "┏━━━━━━┳━━━━━━┓\r\n"
-    "│ R001 │ compose config has expected version         │ warn     │ ✅        │ -       │ ensure │\r\n"
-    "│ R015 │ long secret names                           │ warn     │ ❌        │ -       │ reduce │\r\n"
-    "│ R008 │ .env.sample provided                        │ error    │ ✅        │ -       │ create │\r\n"
-    "│ R014 │ only annotated tags used for recipe version │ error    │ ✅        │ -       │ retag  │\r\n"
-    "┗━━━━━━┻━━━━━━┛\r\n"
-    "WARN secret session_secret is longer than 12 characters\r\n"
-)
-
-TABLE_R014_FAIL = (
-    TABLE_OK.replace(
-        "│ R014 │ only annotated tags used for recipe version │ error    │ ✅",
-        "│ R014 │ only annotated tags used for recipe version │ error    │ ❌",
-    )
-    + "WARN critical errors present in hedgedoc config\r\n"
-)
-
-TABLE_SKIPPED_ERROR = TABLE_OK.replace(
-    "│ R014 │ only annotated tags used for recipe version │ error    │ ✅        │ -       │",
-    "│ R014 │ only annotated tags used for recipe version │ error    │ ❌        │ skipped │",
-)
-
-
-# ---- parse_table ----
-
-
-def test_parse_table_rows_and_marks():
-    rows = L.parse_table(TABLE_OK)
-    by = {r["rule"]: r for r in rows}
-    assert set(by) == {"R001", "R015", "R008", "R014"}
-    assert by["R001"]["severity"] == "warn" and by["R001"]["satisfied"]
-    assert by["R015"]["severity"] == "warn" and not by["R015"]["satisfied"]
-    assert by["R014"]["severity"] == "error" and by["R014"]["satisfied"]
-    assert not any(r["skipped"] for r in rows)
-
-
-def test_parse_table_strips_ansi():
-    rows = L.parse_table("\x1b[1m" + TABLE_OK + "\x1b[0m")
-    assert len(rows) == 4
-
-
-def test_parse_table_garbage_is_empty():
-    assert L.parse_table("FATA something exploded\r\n") == []
-    assert L.parse_table("") == []
-
-
-# ---- classify ----
-
-
-def test_classify_pass_with_warn_misses_only():
-    # warn-severity ❌ (R015) does NOT fail the rung — only error-severity rules do.
-    assert L.classify(0, TABLE_OK) == ("pass", "", [])
-
-
-def test_classify_error_rule_fails():
-    status, detail, failed = L.classify(0, TABLE_R014_FAIL)
-    assert status == "fail"
-    assert failed == ["R014"]
-    assert "R014" in detail
-
-
-def test_classify_skipped_error_rule_does_not_fail_but_sentinel_guards():
-    # a skipped error rule isn't counted as failed by the parser, but abra's own sentinel line
-    # (if present) still forces fail — the classifier never out-greens abra.
-    status, _, failed = L.classify(0, TABLE_SKIPPED_ERROR)
-    assert failed == []
-    assert status == "pass"
-    status2, detail2, _ = L.classify(
-        0, TABLE_SKIPPED_ERROR + "WARN critical errors present in x config\r\n"
-    )
-    assert status2 == "fail"
-    assert "critical errors" in detail2
-
-
-def test_classify_rc0_without_table_is_unver():
-    # rc=0 but nothing parseable → cannot claim pass.
-    assert L.classify(0, "weird output")[0] == "unver"
-
-
-def test_classify_content_fata_is_fail():
-    out = "FATA unable to validate recipe: .env.sample for x couldn't be read\r\n"
-    status, detail, _ = L.classify(1, out)
-    assert status == "fail"
-    assert "unable to validate recipe" in detail
-
-
-def test_classify_environment_fata_is_unver():
-    out = "FATA unable to fetch tags in /x: repository not found: Not found.\r\n"
-    status, detail, _ = L.classify(1, out)
-    assert status == "unver"
-    assert "fetch tags" in detail
-
-
-def test_classify_did_not_run_is_unver():
-    assert L.classify(None, "")[0] == "unver"
-
-
-# ---- run_lint: never raises, never silently passes ----
-
-
-def _mkrecipe(tmp_path):
-    repo = tmp_path / "abra" / "recipes" / "fakerec"
-    repo.mkdir(parents=True)
-    (repo / "compose.yml").write_text("version: '3.8'\n")
-    for cmd in (
-        ["git", "init", "-q"],
-        ["git", "add", "."],
-        ["git", "-c", "user.email=t@t", "-c", "user.name=t", "commit", "-qm", "x"],
-    ):
-        subprocess.run(cmd, cwd=repo, check=True)
-    return repo
-
-
-def _shim(tmp_path, body):
-    """Drop a fake `script` executable on PATH (run_lint invokes `script -qec "abra ..."`)."""
-    bindir = tmp_path / "bin"
-    bindir.mkdir(exist_ok=True)
-    sh = bindir / "script"
-    sh.write_text("#!/bin/sh\n" + body)
-    sh.chmod(sh.stat().st_mode | stat.S_IEXEC)
-    return str(bindir)
-
-
-def test_run_lint_pass_via_shim(tmp_path, monkeypatch):
-    _mkrecipe(tmp_path)
-    monkeypatch.setenv("ABRA_DIR", str(tmp_path / "abra"))
-    out = TABLE_OK.replace("\r\n", "\\n")
-    monkeypatch.setenv(
-        "PATH", _shim(tmp_path, f'printf "{out}"\nexit 0\n') + os.pathsep + os.environ["PATH"]
-    )
-    res = L.run_lint("fakerec", None, str(tmp_path / "artifacts"))
-    assert res["status"] == "pass"
-    txt = (tmp_path / "artifacts" / "lint.txt").read_text()
-    assert "abra recipe lint -n fakerec" in txt and "R001" in txt
-
-
-def test_run_lint_fail_via_shim(tmp_path, monkeypatch):
-    _mkrecipe(tmp_path)
-    monkeypatch.setenv("ABRA_DIR", str(tmp_path / "abra"))
-    out = TABLE_R014_FAIL.replace("\r\n", "\\n")
-    monkeypatch.setenv(
-        "PATH", _shim(tmp_path, f'printf "{out}"\nexit 0\n') + os.pathsep + os.environ["PATH"]
-    )
-    res = L.run_lint("fakerec", None, str(tmp_path / "artifacts"))
-    assert res["status"] == "fail"
-    assert res["rules_failed"] == ["R014"]
-
-
-def test_run_lint_missing_recipe_is_unver_not_raise(tmp_path, monkeypatch):
-    monkeypatch.setenv("ABRA_DIR", str(tmp_path / "abra-none"))
-    res = L.run_lint("no-such-recipe", None, str(tmp_path / "artifacts"))
-    assert res["status"] == "unver"
-    assert res["detail"]
-    # lint.txt still written with the failure context (loud, never silent)
-    assert (tmp_path / "artifacts" / "lint.txt").exists()
-
-
-def test_run_lint_abra_blowup_is_unver(tmp_path, monkeypatch):
-    _mkrecipe(tmp_path)
-    monkeypatch.setenv("ABRA_DIR", str(tmp_path / "abra"))
-    monkeypatch.setenv(
-        "PATH",
-        _shim(tmp_path, 'echo "FATA inappropriate ioctl for device"\nexit 1\n')
-        + os.pathsep
-        + os.environ["PATH"],
-    )
-    res = L.run_lint("fakerec", None, None)
-    assert res["status"] == "unver"
--- a/tests/unit/test_results.py
+++ b/tests/unit/test_results.py
@ -1,8 +1,7 @@
-"""Unit tests for results assembly (harness.results) — phase lvl5 semantics.
+"""Unit tests for Phase-3 results assembly (harness.results), plan-phase3-results-ux.md §4.2 / R1/R3.

-Covers JUnit parsing, stage roll-up, the tier→rung derivation (the SINGLE place every N/A source
-is classified intentional-skip vs unverified — the table in DECISIONS.md phase lvl5), the L5 lint
-rung wiring, and full results.json assembly. Pure / tmp-file only. Run cold:
+Covers JUnit parsing, stage roll-up, the tier→rung derivation (the documented mapping the level
+depends on), and full results.json assembly incl. the U0 gate cases. Pure / tmp-file only. Run cold:
  cc-ci-run -m pytest tests/unit/test_results.py -q
 """

@ -28,8 +27,6 @@ JUNIT_MIXED = """<?xml version="1.0"?>
 <testcase classname="tests.y" name="test_skipped" time="0"><skipped message="no deps"/></testcase>
 </testsuite></testsuites>"""

-LINT_PASS = {"status": "pass", "detail": "", "rules_failed": []}
-

 def _write(tmp_path, name, content):
    p = tmp_path / name
@ -93,7 +90,7 @@ def test_collect_stages_synthesizes_when_no_junit():
    assert len(stages[0]["tests"]) == 1


-# ---- derive_rungs: the documented N/A-classification mapping (DECISIONS.md phase lvl5) ----
+# ---- derive_rungs: the documented mapping ----


 def _results(**kw):
@ -108,113 +105,34 @@ def _results(**kw):
    return base


-def test_derive_rungs_full_climb_five_rungs():
-    rungs = R.derive_rungs(
-        _results(), backup_capable=True, has_upgrade_target=True, lint_status="pass"
-    )
-    # the five essential rungs — integration/recipe-local are optional, not produced here.
+def test_derive_rungs_full_climb_four_essential():
+    rungs = R.derive_rungs(_results(), backup_capable=True, has_custom=True)
+    # only the four essential rungs — integration/recipe-local are optional, not produced here.
    assert rungs == {
        "install": "pass",
        "upgrade": "pass",
        "backup_restore": "pass",
        "functional": "pass",
-        "lint": "pass",
    }


-def test_derive_rungs_structural_skips_are_intentional():
-    # single published version (tier skipped, no upgrade target) + not backup-capable →
-    # both rungs are INTENTIONAL skips, not unverified.
+def test_derive_rungs_stateless_backup_and_functional_na():
    rungs = R.derive_rungs(
-        _results(upgrade="skip", backup="skip", restore="skip"),
+        _results(backup="skip", restore="skip", custom="skip"),
        backup_capable=False,
-        has_upgrade_target=False,
-        lint_status="pass",
+        has_custom=False,
    )
-    assert rungs["upgrade"] == "skip"
-    assert rungs["backup_restore"] == "skip"
+    assert rungs["backup_restore"] == "na"
+    assert rungs["functional"] == "na"
    assert "integration" not in rungs and "recipe_local" not in rungs


-def test_derive_rungs_upgrade_skip_with_target_is_unverified():
-    # the tier skipped although an upgrade target exists (e.g. install failed → downstream
-    # skipped): NOT structural → unver.
-    rungs = R.derive_rungs(
-        _results(install="fail", upgrade="skip", backup="skip", restore="skip", custom="skip"),
-        backup_capable=True,
-        has_upgrade_target=True,
-        lint_status="pass",
-    )
-    assert rungs["install"] == "fail"
-    assert rungs["upgrade"] == "unver"
-    assert rungs["backup_restore"] == "unver"
-    assert rungs["functional"] == "unver"
-
-
-def test_derive_rungs_missing_tier_is_unverified():
-    # a tier excluded from the run entirely (dev CCCI_STAGES escape) → no result key → unver,
-    # never an intentional skip (the recipe didn't declare anything).
-    res = {"install": "pass"}
-    rungs = R.derive_rungs(res, backup_capable=True, has_upgrade_target=True, lint_status="pass")
-    assert rungs["upgrade"] == "unver"
-    assert rungs["backup_restore"] == "unver"
-    assert rungs["functional"] == "unver"
-
-
-def test_derive_rungs_expected_na_declares_intentional():
-    # EXPECTED_NA turns a non-run rung into an intentional skip (declared source).
-    rungs = R.derive_rungs(
-        _results(custom="skip"),
-        backup_capable=True,
-        has_upgrade_target=True,
-        expected_na={"functional": "no functional surface"},
-        lint_status="pass",
-    )
-    assert rungs["functional"] == "skip"
-
-
-def test_derive_rungs_no_custom_tests_defaults_unverified():
-    # absent functional coverage with NO declaration is a gap → unver (conservative default).
-    rungs = R.derive_rungs(
-        _results(custom="skip"), backup_capable=True, has_upgrade_target=True, lint_status="pass"
-    )
-    assert rungs["functional"] == "unver"
-
-
-def test_derive_rungs_expected_na_never_overrides_a_real_result():
-    # a declaration cannot soften an exercised rung: fail stays fail.
-    rungs = R.derive_rungs(
-        _results(custom="fail"),
-        backup_capable=True,
-        has_upgrade_target=True,
-        expected_na={"functional": "declared"},
-        lint_status="pass",
-    )
-    assert rungs["functional"] == "fail"
-
-
-def test_derive_rungs_lint_never_skips():
-    # lint has NO intentional-skip escape hatch: pass/fail from the executor, anything else
-    # (None, "unver", junk) → unver — even if a recipe tries to declare it away.
-    for status, want in (("pass", "pass"), ("fail", "fail"), ("unver", "unver"), (None, "unver")):
-        rungs = R.derive_rungs(
-            _results(),
-            backup_capable=True,
-            has_upgrade_target=True,
-            expected_na={"lint": "nope"},
-            lint_status=status,
-        )
-        assert rungs["lint"] == want, status
-
-
 def test_derive_rungs_functional_fail():
-    rungs = R.derive_rungs(
-        _results(custom="fail"), backup_capable=True, has_upgrade_target=True, lint_status="pass"
-    )
+    rungs = R.derive_rungs(_results(custom="fail"), backup_capable=True, has_custom=True)
    assert rungs["functional"] == "fail"


-# ---- build_results: end-to-end incl level + lint + flags ----
+# ---- build_results: end-to-end incl level + flags ----


 def test_build_results_level_and_flags(tmp_path):
@ -245,75 +163,17 @@ def test_build_results_level_and_flags(tmp_path):
        clean_teardown=True,
        no_secret_leak=True,
        finished_ts=1234.0,
-        lint=LINT_PASS,
    )
-    # all five essential rungs pass → full climb to L5; no cap concept anywhere.
-    assert data["schema"] == 2
-    assert data["level"] == 5
-    assert "level_cap_reason" not in data and "level_cap_rung" not in data
+    # all four essential rungs pass → full climb to L4 (the top), no cap
+    assert data["level"] == 4
+    assert data["level_cap_reason"] == ""
    assert data["recipe"] == "hedgedoc"
    assert data["ref"] == "deadbeefcafe"
    assert data["flags"] == {"clean_teardown": True, "no_secret_leak": True}
-    # lint appears as a synthetic stage so the card's table carries all five rungs.
-    assert [s["name"] for s in data["stages"]] == ["install", "custom", "lint"]
-    assert data["lint"] == {"status": "pass", "detail": "", "rules_failed": []}
+    assert [s["name"] for s in data["stages"]] == ["install", "custom"]


-def test_build_results_lint_fail_blocks_at_4(tmp_path):
-    recs = [
-        {
-            "tier": "install",
-            "source": "generic",
-            "file": "g/test_install.py",
-            "rc": 0,
-            "junit": _write(tmp_path, "i.xml", JUNIT_PASS),
-        }
-    ]
-    data = R.build_results(
-        recipe="x",
-        version=None,
-        pr="0",
-        ref=None,
-        records=recs,
-        results=_results(),
-        backup_capable=True,
-        clean_teardown=True,
-        no_secret_leak=True,
-        finished_ts=0.0,
-        lint={
-            "status": "fail",
-            "detail": "error rule(s) unsatisfied: R014",
-            "rules_failed": ["R014"],
-        },
-    )
-    assert data["level"] == 4
-    assert data["rungs"]["lint"] == "fail"
-    assert data["lint"]["rules_failed"] == ["R014"]
-    lint_stage = [s for s in data["stages"] if s["name"] == "lint"][0]
-    assert lint_stage["status"] == "fail"
-    assert "R014" in lint_stage["tests"][0]["message"]
-
-
-def test_build_results_no_lint_given_is_unverified_never_pass(tmp_path):
-    # an old/lint-less caller must NEVER get a free L5: the rung derives as unver → level 4 max.
-    data = R.build_results(
-        recipe="x",
-        version=None,
-        pr="0",
-        ref=None,
-        records=[],
-        results=_results(),
-        backup_capable=True,
-        clean_teardown=True,
-        no_secret_leak=True,
-        finished_ts=0.0,
-    )
-    assert data["rungs"]["lint"] == "unver"
-    assert data["level"] == 4
-    assert "lint" in data["skips"]["unintentional"]
-
-
-def test_build_results_level1_on_upgrade_fail(tmp_path):
+def test_build_results_capped_at_L1_on_upgrade_fail(tmp_path):
    recs = [
        {
            "tier": "install",
@ -334,13 +194,12 @@ def test_build_results_level1_on_upgrade_fail(tmp_path):
        clean_teardown=True,
        no_secret_leak=True,
        finished_ts=0.0,
-        lint=LINT_PASS,
    )
    assert data["level"] == 1
-    assert data["rungs"]["upgrade"] == "fail"
+    assert "L2" in data["level_cap_reason"]


-# ---- skips: intentional (declared/structural, with reason) vs unintentional (= unver) ----
+# ---- skips: intentional (declared) vs unintentional (everything else skipped) ----


 def _rungs(**kw):
@ -349,26 +208,24 @@ def _rungs(**kw):
        "upgrade": "pass",
        "backup_restore": "pass",
        "functional": "pass",
-        "lint": "pass",
    }
    base.update(kw)
    return base


-def test_skips_declared_reason_and_unverified_split():
-    rungs = _rungs(backup_restore="skip", functional="unver")
+def test_skips_intentional_vs_unintentional():
+    rungs = _rungs(backup_restore="na", functional="na")
    sk = R.skips(rungs, {"backup_restore": "stateless static server"})
+    # backup_restore is declared (intentional, with reason); functional skipped but not declared.
    assert sk["intentional"] == {"backup_restore": "stateless static server"}
    assert sk["unintentional"] == ["functional"]


-def test_skips_structural_reason_when_undeclared():
-    # a structural skip (derive_rungs) carries its structural reason even without EXPECTED_NA.
-    rungs = _rungs(upgrade="skip", backup_restore="skip")
+def test_skips_none_declared_all_unintentional():
+    rungs = _rungs(backup_restore="na")
    sk = R.skips(rungs, None)
-    assert "only one published version" in sk["intentional"]["upgrade"]
-    assert "not backup-capable" in sk["intentional"]["backup_restore"]
-    assert sk["unintentional"] == []
+    assert sk["intentional"] == {}
+    assert sk["unintentional"] == ["backup_restore"]


 def test_skips_declaration_only_counts_when_actually_skipped():
@ -379,9 +236,9 @@ def test_skips_declaration_only_counts_when_actually_skipped():
    assert "backup_restore" not in sk["unintentional"]


-def test_build_results_stateless_recipe_climbs(tmp_path):
-    # custom-html-tiny shape: no backup surface (declared), single published version, passing
-    # functional — formerly capped at L2 by the N/A; now climbs to L5 (the de-cap, mission §2).
+def test_build_results_threads_expected_na(tmp_path):
+    # Mirrors custom-html-tiny post-change: install + a passing functional (custom) test, but no
+    # backup surface (backup_restore declared intentionally skipped).
    recs = [
        {
            "tier": "install",
@ -404,47 +261,23 @@ def test_build_results_stateless_recipe_climbs(tmp_path):
        pr="0",
        ref=None,
        records=recs,
-        results=_results(upgrade="skip", backup="skip", restore="skip"),
-        backup_capable=False,  # no backupbot label → structural intentional skip
-        has_upgrade_target=False,  # single published version → structural intentional skip
+        results=_results(backup="skip", restore="skip"),  # custom=pass (default) → functional pass
+        backup_capable=False,  # no backupbot label → backup_restore skipped (N/A)
        clean_teardown=True,
        no_secret_leak=True,
        finished_ts=0.0,
-        lint=LINT_PASS,
        expected_na={"backup_restore": "stateless static file server"},
    )
-    assert data["level"] == 5  # skips are climbed past; nothing was inflated to get here
-    assert data["rungs"] == {
-        "install": "pass",
-        "upgrade": "skip",
-        "backup_restore": "skip",
-        "functional": "pass",
-        "lint": "pass",
-    }
-    assert data["skips"]["intentional"]["backup_restore"] == "stateless static file server"
-    assert "only one published version" in data["skips"]["intentional"]["upgrade"]
-    assert data["skips"]["unintentional"] == []
-
-
-def test_build_results_unverified_backup_blocks(tmp_path):
-    # synthesized tier abort: backup-capable but the tiers never produced a result → unver → the
-    # level stays below the unverified rung (mission worked example #3).
-    data = R.build_results(
-        recipe="x",
-        version=None,
-        pr="0",
-        ref=None,
-        records=[],
-        results=_results(backup="skip", restore="skip"),
-        backup_capable=True,
-        clean_teardown=True,
-        no_secret_leak=True,
-        finished_ts=0.0,
-        lint=LINT_PASS,
-    )
-    assert data["rungs"]["backup_restore"] == "unver"
+    # backup_restore skip still caps at L2 (never inflates) — even though functional passes above it,
+    # the skip caps the climb — but it's the declared (intentional) rung that capped.
    assert data["level"] == 2
-    assert data["skips"]["unintentional"] == ["backup_restore"]
+    assert "L3" in data["level_cap_reason"]
+    assert data["level_cap_rung"] == "backup_restore"
+    assert data["rungs"]["functional"] == "pass"
+    assert data["skips"]["intentional"]["backup_restore"] == "stateless static file server"
+    assert (
+        data["skips"]["unintentional"] == []
+    )  # backup_restore declared; functional passed → clean


 def test_build_results_threads_customization(tmp_path):
@ -477,7 +310,6 @@ def test_build_results_threads_customization(tmp_path):
        "clean_teardown": True,
        "no_secret_leak": True,
        "finished_ts": 0.0,
-        "lint": LINT_PASS,
    }
    assert R.build_results(**kwargs, customization=cust)["customization"] == cust
    assert R.build_results(**kwargs)["customization"] is None