From 3b0a3d14ea0afd23133d9e7bc94d2773de2f7ff5 Mon Sep 17 00:00:00 2001
From: autonomic-bot <autonomic-bot@git.autonomic.zone>
Date: Tue, 9 Jun 2026 01:59:28 +0000
Subject: [PATCH 1/7] feat(harness): declare intentional N/A tiers +
 custom-html-tiny functional test
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two changes the operator asked for after noticing custom-html-tiny PR #6 has no
backup/restore or functional coverage:

1) Intentional-vs-accidental N/A. A recipe can now declare
   recipe_meta.EXPECTED_NA = {rung: reason} to mark a tier as deliberately not
   applicable (e.g. a stateless static server has no backup surface). N/A still
   caps the level — the harness never claims a rung it did not verify — but the
   run is now annotated 'intentional · <reason>' instead of being indistinguishable
   from a forgotten test. An *undeclared* N/A on a gap-sensitive rung
   (backup_restore, functional) is surfaced as a 'possible coverage gap', and a
   stale EXPECTED_NA (declared N/A but actually exercised) is surfaced too. All
   non-blocking (R7): results.json gains level_cap_intent + an  block, the
   summary card shows the clause, and the CI log prints the gap/stale warnings.
   (results.classify_na/cap_intent are pure + unit-tested; level.py untouched.)

   custom-html-tiny declares backup_restore intentionally N/A.

2) custom-html-tiny functional test: writes a random file into the served content
   volume (via the volume mountpoint, like install_steps.sh, since the SWS image
   is shell-less), asserts exact-byte round-trip + a real 404 on a missing path —
   proving the static-web-server actually serves the volume, not a 200-everything
   fallback.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 runner/harness/card.py                        |  4 +-
 runner/harness/results.py                     | 59 ++++++++++-
 runner/run_recipe_ci.py                       | 22 ++++-
 .../functional/test_serves_content.py         | 87 ++++++++++++++++
 tests/custom-html-tiny/recipe_meta.py         | 12 +++
 tests/unit/test_results.py                    | 98 +++++++++++++++++++
 6 files changed, 279 insertions(+), 3 deletions(-)
 create mode 100644 tests/custom-html-tiny/functional/test_serves_content.py
diff --git a/runner/harness/card.py b/runner/harness/card.py
index 44cf4ae..36b717f 100644
--- a/runner/harness/card.py
+++ b/runner/harness/card.py
@@ -116,7 +116,9 @@ def render_card_html(data: dict, screenshot_rel: str | None = "screenshot.png")
     recipe = html.escape(str(data.get("recipe", "?")))
     version = html.escape(str(data.get("version") or data.get("ref") or ""))
     level = int(data.get("level", 0))
-    cap = html.escape(str(data.get("level_cap_reason") or ""))
+    cap_reason = str(data.get("level_cap_reason") or "")
+    cap_intent = str(data.get("level_cap_intent") or "")
+    cap = html.escape(cap_reason + (f" · {cap_intent}" if cap_intent else ""))
     color = level_color(level)
     flags = data.get("flags", {}) or {}
     flag_bits = []
diff --git a/runner/harness/results.py b/runner/harness/results.py
index c3fae07..3e6fed7 100644
--- a/runner/harness/results.py
+++ b/runner/harness/results.py
@@ -200,6 +200,56 @@ def derive_rungs(
     return rungs
 
 
+# Rungs where an *undeclared* N/A is suspicious — it usually means a recipe SHOULD have this coverage
+# but nobody added it (a backup label, a functional test), i.e. an accidental gap rather than a real
+# property of the recipe. For these, an undeclared N/A is surfaced as a "possible coverage gap" unless
+# the recipe declares it intentional via recipe_meta.EXPECTED_NA. The other rungs (upgrade — only one
+# published version; integration — no SSO surface; recipe_local — no repo-local tests) are
+# *structurally* optional: an N/A there is the normal case and is not flagged.
+GAP_SENSITIVE_RUNGS = ("backup_restore", "functional")
+
+
+def classify_na(rungs: dict[str, str], expected_na: dict | None) -> dict:
+    """Distinguish *intentionally* N/A rungs from *accidentally* missing ones (operator request).
+
+    A recipe declares intentional N/A in `recipe_meta.EXPECTED_NA = {rung: reason}`. N/A always caps
+    the level either way (the harness never inflates — a rung that wasn't verified wasn't verified);
+    this only EXPLAINS the cap so a reviewer can tell "this recipe legitimately has no backup surface"
+    from "someone forgot to add the backup test". Returns:
+      { "rungs": {rung: {"intent": "declared"|"undeclared", "reason": str}},  # one per N/A rung
+        "gaps": [rung, ...],            # gap-sensitive rungs that are N/A and NOT declared
+        "stale_declared": [rung, ...] } # rungs declared N/A but actually exercised (stale opt-out)
+    """
+    expected = {str(k): str(v) for k, v in (expected_na or {}).items()}
+    na: dict[str, dict] = {}
+    for rung, st in rungs.items():
+        if st != "na":
+            continue
+        if rung in expected:
+            na[rung] = {"intent": "declared", "reason": expected[rung]}
+        else:
+            na[rung] = {"intent": "undeclared", "reason": ""}
+    gaps = [r for r in GAP_SENSITIVE_RUNGS if na.get(r, {}).get("intent") == "undeclared"]
+    stale = sorted(r for r in expected if rungs.get(r) not in (None, "na"))
+    return {"rungs": na, "gaps": gaps, "stale_declared": stale}
+
+
+def cap_intent(rungs: dict[str, str], level: int, cap_reason: str, na_info: dict) -> str:
+    """A short clause explaining the level cap when the capping rung is N/A: the declared reason if
+    intentional, a 'possible coverage gap' note if it's an undeclared gap-sensitive rung, else ''."""
+    if not cap_reason:
+        return ""
+    capped = level_mod.RUNGS[level] if 0 <= level < len(level_mod.RUNGS) else None
+    if not capped or rungs.get(capped) != "na":
+        return ""
+    entry = na_info["rungs"].get(capped, {})
+    if entry.get("intent") == "declared":
+        return f"intentional · {entry['reason']}"
+    if capped in GAP_SENSITIVE_RUNGS:
+        return "undeclared N/A — possible coverage gap (add a test or declare EXPECTED_NA)"
+    return ""
+
+
 def build_results(
     *,
     recipe: str,
@@ -217,9 +267,12 @@ def build_results(
     finished_ts: float | None,
     screenshot: str | None = None,
     summary_card: str | None = None,
+    expected_na: dict | None = None,
 ) -> dict:
     """Assemble the full results.json dict (no I/O). `finished_ts` is passed in (the orchestrator
-    stamps it) so this stays pure and deterministic for unit tests."""
+    stamps it) so this stays pure and deterministic for unit tests. `expected_na` is the recipe's
+    declared intentional-N/A map (recipe_meta.EXPECTED_NA) used to distinguish a deliberate skip from
+    accidentally-missing coverage."""
     stages = collect_stages(records)
     has_custom = any(r["tier"] == "custom" for r in records)
     rungs = derive_rungs(
@@ -233,6 +286,8 @@ def build_results(
         repo_local_passed=_repo_local_passed(records),
     )
     lvl, cap_reason = level_mod.compute_level(rungs)
+    na_info = classify_na(rungs, expected_na)
+    intent = cap_intent(rungs, lvl, cap_reason, na_info)
     return {
         "schema": 1,
         "run_id": run_id(),
@@ -243,7 +298,9 @@ def build_results(
         "finished": finished_ts,
         "level": lvl,
         "level_cap_reason": cap_reason,
+        "level_cap_intent": intent,
         "rungs": rungs,
+        "na": na_info,
         "stages": stages,
         "results": results,
         "flags": {
diff --git a/runner/run_recipe_ci.py b/runner/run_recipe_ci.py
index 11fadd0..becc9c4 100644
--- a/runner/run_recipe_ci.py
+++ b/runner/run_recipe_ci.py
@@ -200,6 +200,7 @@ def _load_meta(recipe: str) -> dict:
         for k in list(meta) + [
             "BACKUP_CAPABLE",
             "SKIP_GENERIC",
+            "EXPECTED_NA",
             "OIDC_AT_INSTALL",
             "READY_PROBE",
             "UPGRADE_BASE_VERSION",
@@ -1241,6 +1242,7 @@ def main() -> int:
             no_secret_leak=True,  # narrowed below by an actual scan of the serialised artifact
             screenshot=screenshot_rel,  # Phase 3 U1 (R4): relative PNG name iff capture succeeded
             finished_ts=time.time(),
+            expected_na=meta.get("EXPECTED_NA"),  # declared intentional-N/A map (recipe_meta)
         )
         # Real (if narrow) leak check: no known infra-secret value may appear in the artifact (R7).
         blob = json.dumps(data)
@@ -1252,11 +1254,29 @@ def main() -> int:
                 file=sys.stderr,
             )
         path = results_mod.write_results(data)
+        intent = data.get("level_cap_intent") or ""
         print(
             f"results.json written: {path} (level={data['level']}"
-            f"{' — ' + data['level_cap_reason'] if data['level_cap_reason'] else ''})",
+            f"{' — ' + data['level_cap_reason'] if data['level_cap_reason'] else ''}"
+            f"{' [' + intent + ']' if intent else ''})",
             flush=True,
         )
+        # Surface the intentional-vs-accidental N/A signal in the CI log (non-blocking, R7): a
+        # gap-sensitive rung that is N/A but undeclared is a possible coverage hole; a stale
+        # EXPECTED_NA declares a tier N/A that actually ran.
+        na = data.get("na", {})
+        for rung in na.get("gaps", []):
+            print(
+                f"⚠ coverage: rung '{rung}' is N/A but not declared intentional — add a test or "
+                f"declare it in tests/{recipe}/recipe_meta.py EXPECTED_NA = {{'{rung}': '<why>'}}.",
+                flush=True,
+            )
+        for rung in na.get("stale_declared", []):
+            print(
+                f"⚠ stale EXPECTED_NA: rung '{rung}' is declared N/A but was actually exercised "
+                f"(status={data['rungs'].get(rung)}) — remove it from recipe_meta.EXPECTED_NA.",
+                flush=True,
+            )
     except Exception as e:  # noqa: BLE001 — results assembly is cosmetic; never fail a run on it (R7)
         print(
             f"!! results.json assembly failed (non-fatal, verdict unaffected): {_scrub(str(e))}",
diff --git a/tests/custom-html-tiny/functional/test_serves_content.py b/tests/custom-html-tiny/functional/test_serves_content.py
new file mode 100644
index 0000000..cb30917
--- /dev/null
+++ b/tests/custom-html-tiny/functional/test_serves_content.py
@@ -0,0 +1,87 @@
+"""custom-html-tiny — recipe-specific functional test (static-web-server).
+
+Proves the deployed static-web-server is *actually serving files from its `content` volume* with real
+file-server semantics, not merely returning 200 from a Traefik fallback or a generic stub:
+
+  1. exact-byte round-trip — write a uniquely-named file with random content into the served volume,
+     fetch it over HTTPS, and assert the bytes come back verbatim. Non-vacuous: the content is random
+     per run, so only a server that reads this file off the volume can pass.
+  2. real 404 — a random non-existent path returns 404, proving directory/file semantics (a
+     200-everything stub or mis-routed host would not 404).
+
+The recipe's image (joseluisq/static-web-server) is shell-less (scratch-based) and its content volume
+is seeded via the install_steps.sh host-mountpoint mechanism — so this test writes its probe file the
+same way (resolve the swarm volume's mountpoint with `docker volume inspect`, write directly) rather
+than `docker exec`-ing in a container that has no shell.
+
+Runs in the custom tier against the shared post-install deployment (the `live_app` fixture is its
+per-run domain). Mirrors install_steps.sh: the app's content volume is named `<stack>_content`, where
+`stack` is the domain with dots replaced by underscores; HTTP_SUBDIR is empty, so the volume root is
+served at `/`.
+"""
+
+from __future__ import annotations
+
+import contextlib
+import os
+import ssl
+import subprocess
+import urllib.error
+import urllib.request
+import uuid
+
+
+def _served_dir(domain: str) -> str:
+    """Host mountpoint of the app's served `content` volume (same naming as install_steps.sh)."""
+    vol = f"{domain.replace('.', '_')}_content"
+    out = subprocess.run(
+        ["docker", "volume", "inspect", vol, "--format", "{{.Mountpoint}}"],
+        capture_output=True,
+        text=True,
+        check=True,
+    )
+    mountpoint = out.stdout.strip()
+    assert mountpoint, f"could not resolve mountpoint for volume {vol!r}"
+    return mountpoint
+
+
+def _get(url: str) -> tuple[int, bytes]:
+    """GET the URL; return (status, body). A 4xx/5xx is returned, not raised (we assert on the code).
+    TLS verification is relaxed: the served wildcard cert is validated separately by the infra check;
+    here we care only about the app's response."""
+    ctx = ssl.create_default_context()
+    ctx.check_hostname = False
+    ctx.verify_mode = ssl.CERT_NONE
+    try:
+        with urllib.request.urlopen(url, timeout=20, context=ctx) as resp:
+            return resp.status, resp.read()
+    except urllib.error.HTTPError as e:
+        return e.code, e.read()
+
+
+def test_static_file_roundtrip_and_404(live_app):
+    """Write a random file into the served volume → fetch it → bytes match; and a missing path 404s."""
+    served = _served_dir(live_app)
+    token = uuid.uuid4().hex
+    name = f"ccci-probe-{token}.txt"
+    body = f"cc-ci-functional-{token}\n".encode()
+    path = os.path.join(served, name)
+    with open(path, "wb") as fh:
+        fh.write(body)
+    try:
+        status, got = _get(f"https://{live_app}/{name}")
+        assert status == 200, f"served probe file returned {status} (expected 200)"
+        assert got == body, (
+            f"content round-trip mismatch: served {got!r}, wrote {body!r} "
+            "(static-web-server not serving the content volume?)"
+        )
+
+        # A random non-existent path must 404 — proves real static-file semantics, distinguishing a
+        # working server from a 200-everything stub or a mis-routed Traefik fallback.
+        miss_status, _ = _get(f"https://{live_app}/ccci-missing-{uuid.uuid4().hex}.txt")
+        assert miss_status == 404, (
+            f"missing path returned {miss_status} (expected 404 — generic 200-returner / mis-route?)"
+        )
+    finally:
+        with contextlib.suppress(OSError):
+            os.remove(path)
diff --git a/tests/custom-html-tiny/recipe_meta.py b/tests/custom-html-tiny/recipe_meta.py
index 44603a9..25aac26 100644
--- a/tests/custom-html-tiny/recipe_meta.py
+++ b/tests/custom-html-tiny/recipe_meta.py
@@ -3,3 +3,15 @@
 # (DG5) is detected quickly instead of waiting the default 300s HTTP timeout.
 DEPLOY_TIMEOUT = 120
 HTTP_TIMEOUT = 90
+
+# Intentionally-N/A tiers (reviewed opt-out, NOT a coverage gap). custom-html-tiny is a stateless
+# static-web-server: it serves an ephemeral `content` volume that the harness seeds at deploy time
+# (install_steps.sh) and holds no persistent or user data, so there is nothing to back up or restore.
+# The recipe therefore declares no `backupbot.backup` label and the L3 backup/restore rung is N/A.
+# Declaring it here marks that N/A as deliberate, so the run is annotated "intentional" instead of
+# being flagged as a possible missing-coverage gap. (N/A still caps the level — the harness never
+# claims a rung it did not verify; this only explains *why* the cap is expected.)
+EXPECTED_NA = {
+    "backup_restore": "stateless static file server: serves an ephemeral content volume seeded at "
+    "deploy, with no persistent/user data to back up or restore (no backupbot.backup label)",
+}
diff --git a/tests/unit/test_results.py b/tests/unit/test_results.py
index d8bdd51..b52cd82 100644
--- a/tests/unit/test_results.py
+++ b/tests/unit/test_results.py
@@ -257,6 +257,104 @@ def test_build_results_capped_at_L1_on_upgrade_fail(tmp_path):
     assert "L2" in data["level_cap_reason"]
 
 
+# ---- classify_na / cap_intent: intentional-vs-accidental N/A (operator request) ----
+
+
+def _rungs(**kw):
+    base = {
+        "install": "pass",
+        "upgrade": "pass",
+        "backup_restore": "pass",
+        "functional": "pass",
+        "integration": "na",
+        "recipe_local": "na",
+    }
+    base.update(kw)
+    return base
+
+
+def test_classify_na_declared_vs_undeclared():
+    rungs = _rungs(backup_restore="na", functional="na")
+    info = R.classify_na(rungs, {"backup_restore": "stateless static server"})
+    # backup_restore is declared intentional; functional is an undeclared gap-sensitive N/A.
+    assert info["rungs"]["backup_restore"] == {
+        "intent": "declared",
+        "reason": "stateless static server",
+    }
+    assert info["rungs"]["functional"]["intent"] == "undeclared"
+    assert info["gaps"] == ["functional"]  # backup_restore declared → not a gap
+    assert info["stale_declared"] == []
+    # structurally-optional N/A (integration, recipe_local) are recorded but never flagged as gaps.
+    assert info["rungs"]["integration"]["intent"] == "undeclared"
+    assert "integration" not in info["gaps"]
+
+
+def test_classify_na_stale_declaration():
+    # backup_restore actually ran (pass) but is declared N/A → stale opt-out, surfaced.
+    rungs = _rungs(backup_restore="pass")
+    info = R.classify_na(rungs, {"backup_restore": "stale reason"})
+    assert info["stale_declared"] == ["backup_restore"]
+    assert "backup_restore" not in info["rungs"]  # not N/A, so not in the per-rung N/A map
+
+
+def test_cap_intent_declared_explains_cap():
+    # install+upgrade pass, backup_restore declared-N/A → caps at L2 with an intentional clause.
+    rungs = _rungs(backup_restore="na")
+    info = R.classify_na(rungs, {"backup_restore": "no persistent data"})
+    intent = R.cap_intent(rungs, 2, "L3 backup/restore (data integrity) N/A", info)
+    assert intent == "intentional · no persistent data"
+
+
+def test_cap_intent_undeclared_gap():
+    rungs = _rungs(backup_restore="na")
+    info = R.classify_na(rungs, None)
+    intent = R.cap_intent(rungs, 2, "L3 backup/restore (data integrity) N/A", info)
+    assert "possible coverage gap" in intent
+
+
+def test_cap_intent_blank_when_not_capped_on_na():
+    rungs = _rungs()  # full clean climb, capped only at integration (na, structurally optional)
+    info = R.classify_na(rungs, None)
+    # capping rung is integration (level 4) — structurally optional, so no intent clause.
+    assert R.cap_intent(rungs, 4, "L5 integration N/A", info) == ""
+    # and no cap at all → blank.
+    assert R.cap_intent(rungs, 6, "", info) == ""
+
+
+def test_build_results_threads_expected_na(tmp_path):
+    recs = [
+        {
+            "tier": "install",
+            "source": "generic",
+            "file": "g/test_install.py",
+            "rc": 0,
+            "junit": _write(tmp_path, "i.xml", JUNIT_PASS),
+        }
+    ]
+    data = R.build_results(
+        recipe="custom-html-tiny",
+        version="1.1.0",
+        pr="0",
+        ref=None,
+        records=recs,
+        results=_results(backup="skip", restore="skip", custom="skip"),
+        backup_capable=False,  # no backupbot label → backup_restore N/A
+        declared=[],
+        deps_ready=True,
+        sso_unverified=False,
+        clean_teardown=True,
+        no_secret_leak=True,
+        finished_ts=0.0,
+        expected_na={"backup_restore": "stateless static file server"},
+    )
+    # N/A still caps at L2 (never inflates), but now annotated intentional rather than flagged.
+    assert data["level"] == 2
+    assert "L3" in data["level_cap_reason"]
+    assert data["level_cap_intent"] == "intentional · stateless static file server"
+    assert data["na"]["rungs"]["backup_restore"]["intent"] == "declared"
+    assert data["na"]["gaps"] == []
+
+
 def test_write_results_roundtrip(tmp_path):
     data = {"run_id": "42", "level": 3, "stages": []}
     path = R.write_results(data, runs_dir_override=str(tmp_path))
-- 
2.49.0


From f3a1ad5388c89cfb251ed7545650658661e0aed7 Mon Sep 17 00:00:00 2001
From: autonomic-bot <autonomic-bot@git.autonomic.zone>
Date: Tue, 9 Jun 2026 02:00:16 +0000
Subject: [PATCH 2/7] test: representative expected_na scenario (functional
 covered, backup declared-N/A)

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 tests/unit/test_results.py | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/tests/unit/test_results.py b/tests/unit/test_results.py
index b52cd82..9da1aff 100644
--- a/tests/unit/test_results.py
+++ b/tests/unit/test_results.py
@@ -322,6 +322,8 @@ def test_cap_intent_blank_when_not_capped_on_na():
 
 
 def test_build_results_threads_expected_na(tmp_path):
+    # Mirrors custom-html-tiny post-change: install + a passing functional (custom) test, but no
+    # backup surface (backup_restore declared intentionally N/A).
     recs = [
         {
             "tier": "install",
@@ -329,7 +331,14 @@ def test_build_results_threads_expected_na(tmp_path):
             "file": "g/test_install.py",
             "rc": 0,
             "junit": _write(tmp_path, "i.xml", JUNIT_PASS),
-        }
+        },
+        {
+            "tier": "custom",
+            "source": "cc-ci",
+            "file": "c/test_serves_content.py",
+            "rc": 0,
+            "junit": _write(tmp_path, "c.xml", JUNIT_PASS),
+        },
     ]
     data = R.build_results(
         recipe="custom-html-tiny",
@@ -337,7 +346,7 @@ def test_build_results_threads_expected_na(tmp_path):
         pr="0",
         ref=None,
         records=recs,
-        results=_results(backup="skip", restore="skip", custom="skip"),
+        results=_results(backup="skip", restore="skip"),  # custom=pass (default) → functional pass
         backup_capable=False,  # no backupbot label → backup_restore N/A
         declared=[],
         deps_ready=True,
@@ -347,12 +356,14 @@ def test_build_results_threads_expected_na(tmp_path):
         finished_ts=0.0,
         expected_na={"backup_restore": "stateless static file server"},
     )
-    # N/A still caps at L2 (never inflates), but now annotated intentional rather than flagged.
+    # backup_restore N/A still caps at L2 (never inflates) — even though functional passes above it,
+    # the gap caps the climb — but the cap is now annotated intentional rather than flagged.
     assert data["level"] == 2
     assert "L3" in data["level_cap_reason"]
     assert data["level_cap_intent"] == "intentional · stateless static file server"
     assert data["na"]["rungs"]["backup_restore"]["intent"] == "declared"
-    assert data["na"]["gaps"] == []
+    assert data["rungs"]["functional"] == "pass"
+    assert data["na"]["gaps"] == []  # functional now covered; backup_restore declared → no gaps
 
 
 def test_write_results_roundtrip(tmp_path):
-- 
2.49.0


From d733e2c4ca3210fb09d458d0aaff8e5c89d2cfdc Mon Sep 17 00:00:00 2001
From: autonomic-bot <autonomic-bot@git.autonomic.zone>
Date: Tue, 9 Jun 2026 02:26:44 +0000
Subject: [PATCH 3/7] feat(card): badge differentiates expected vs unexpected
 skip
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The level badge gains a third segment derived from level_cap_intent:
- amber 'gap?'   when the climb was capped by an UNDECLARED gap-sensitive N/A
                 (backup_restore / functional) — a likely-missing test (unexpected skip)
- muted 'expected' when capped by a DECLARED intentional N/A (reviewed, nothing to fix)
- nothing extra for a clean cap, a full climb, or a real failure.

Font-safe text labels (no emoji) so the SVG renders headless/anywhere. Badge
never inflates — it only annotates the cap the level already reflects.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 runner/harness/card.py  | 42 +++++++++++++++++++++++++++++++++++++----
 runner/run_recipe_ci.py |  8 +++++++-
 tests/unit/test_card.py | 13 +++++++++++++
 3 files changed, 58 insertions(+), 5 deletions(-)

diff --git a/runner/harness/card.py b/runner/harness/card.py
index 36b717f..1cee8a6 100644
--- a/runner/harness/card.py
+++ b/runner/harness/card.py
@@ -79,10 +79,44 @@ def render_badge_svg(label: str, message: str, color: str) -> str:
     )
 
 
-def level_badge_svg(level: int, cap_reason: str = "") -> str:
-    """Per-recipe/-run LEVEL badge: 'cc-ci | level N'. Colour by level (R6)."""
-    msg = f"level {int(level)}"
-    return render_badge_svg("cc-ci", msg, level_color(level))
+# Third-segment colours for the level badge: amber = an UNEXPECTED skip (undeclared gap-sensitive
+# N/A — likely missing coverage) capped the climb; muted = an EXPECTED skip (declared intentional
+# N/A — reviewed, nothing to fix). Font-safe text labels (no emoji) so the SVG renders anywhere.
+GAP_COLOR = "#d29922"
+EXPECT_COLOR = "#6e7681"
+
+
+def level_badge_svg(level: int, cap_reason: str = "", cap_intent: str = "") -> str:
+    """Per-recipe/-run LEVEL badge: 'cc-ci | level N' coloured by level (R6), with a THIRD segment
+    that differentiates *why* the climb stopped when an N/A capped it:
+      - undeclared gap-sensitive N/A (an UNEXPECTED skip — likely missing coverage): amber 'gap?'.
+      - declared intentional N/A (an EXPECTED skip — reviewed, nothing to fix): muted 'expected'.
+      - clean cap / full climb / a real failure: no third segment (the level + card carry it).
+    Derived from `cap_intent` (results.level_cap_intent) so the badge never inflates — it only
+    annotates the cap the level already reflects."""
+    label, msg = "cc-ci", f"level {int(level)}"
+    lw, mw = _text_width(label), _text_width(msg)
+    third: tuple[str, str] | None = None
+    if cap_intent.startswith("undeclared"):
+        third = ("gap?", GAP_COLOR)
+    elif cap_intent.startswith("intentional"):
+        third = ("expected", EXPECT_COLOR)
+    if third is None:
+        return render_badge_svg(label, msg, level_color(level))
+    txt, tcolor = third
+    tw = _text_width(txt)
+    w = lw + mw + tw
+    return (
+        f'<svg xmlns="http://www.w3.org/2000/svg" width="{w}" height="20" role="img" '
+        f'aria-label="{html.escape(label)}: {html.escape(msg)} ({html.escape(txt)})">'
+        f'<rect width="{lw}" height="20" fill="#555"/>'
+        f'<rect x="{lw}" width="{mw}" height="20" fill="{level_color(level)}"/>'
+        f'<rect x="{lw + mw}" width="{tw}" height="20" fill="{tcolor}"/>'
+        f'<g fill="#fff" font-family="Verdana,Geneva,sans-serif" font-size="11">'
+        f'<text x="6" y="14">{html.escape(label)}</text>'
+        f'<text x="{lw + 6}" y="14">{html.escape(msg)}</text>'
+        f'<text x="{lw + mw + 6}" y="14">{html.escape(txt)}</text></g></svg>'
+    )
 
 
 def _stage_rows(stages: list[dict]) -> str:
diff --git a/runner/run_recipe_ci.py b/runner/run_recipe_ci.py
index becc9c4..b10966a 100644
--- a/runner/run_recipe_ci.py
+++ b/runner/run_recipe_ci.py
@@ -1296,7 +1296,13 @@ def main() -> int:
                 f.write(card_mod.render_card_html(data, screenshot_rel=data.get("screenshot")))
             png = card_mod.render_card_png(html_path, os.path.join(run_artifact_dir, "summary.png"))
             with open(os.path.join(run_artifact_dir, "badge.svg"), "w", encoding="utf-8") as f:
-                f.write(card_mod.level_badge_svg(data["level"], data.get("level_cap_reason", "")))
+                f.write(
+                    card_mod.level_badge_svg(
+                        data["level"],
+                        data.get("level_cap_reason", ""),
+                        data.get("level_cap_intent", ""),
+                    )
+                )
             print(
                 f"summary card {'rendered ' + png if png else '(PNG render unavailable)'} + "
                 f"badge.svg written into {run_artifact_dir}",
diff --git a/tests/unit/test_card.py b/tests/unit/test_card.py
index cdafc02..1abce04 100644
--- a/tests/unit/test_card.py
+++ b/tests/unit/test_card.py
@@ -51,6 +51,19 @@ def test_badge_svg_wellformed():
     assert svg.startswith("<svg") and svg.endswith("</svg>")
     assert "level 4" in svg
     assert C.level_color(4) in svg
+    # plain cap (no intent) → two-box badge, no third segment
+    assert "expected" not in svg and "gap?" not in svg
+
+
+def test_badge_svg_differentiates_expected_vs_unexpected_skip():
+    # declared intentional N/A capped the climb → muted "expected" third segment
+    exp = C.level_badge_svg(2, "L3 backup/restore N/A", "intentional · no persistent data")
+    assert "level 2" in exp and "expected" in exp and C.EXPECT_COLOR in exp
+    assert "gap?" not in exp
+    # undeclared gap-sensitive N/A → amber "gap?" third segment (an UNEXPECTED skip)
+    gap = C.level_badge_svg(2, "L3 backup/restore N/A", "undeclared N/A — possible coverage gap")
+    assert "level 2" in gap and "gap?" in gap and C.GAP_COLOR in gap
+    assert "expected" not in gap
 
 
 def test_card_html_reports_level_verbatim():
-- 
2.49.0


From b3ab68a9dd43d46ea47e8071dd2b8d0a6e29db69 Mon Sep 17 00:00:00 2001
From: autonomic-bot <autonomic-bot@git.autonomic.zone>
Date: Tue, 9 Jun 2026 02:36:53 +0000
Subject: [PATCH 4/7] refactor: simplify to a list of intentionally-skipped
 rungs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Per operator: drop the gap-sensitivity / cap-intent-clause / stale-detection
machinery. Model is now dead simple — recipe_meta.EXPECTED_NA = {rung: reason}
lists the rungs a recipe intentionally skips; ANY rung skipped (N/A) and not in
that list is unintentional.

results.json: replace the 'na' block + level_cap_intent with
  skips: { intentional: {rung: reason}, unintentional: [rung] }
plus level_cap_rung (which rung capped). Badge/card derive intentional-vs-
unintentional from whether the capping rung is in the intentional list. Skips
still cap the level (never inflate). custom-html-tiny lists all three rungs it
intentionally skips (backup_restore, integration, recipe_local).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 runner/harness/card.py                | 35 +++++++-----
 runner/harness/results.py             | 77 +++++++++----------------
 runner/run_recipe_ci.py               | 34 +++++------
 tests/custom-html-tiny/recipe_meta.py | 14 ++---
 tests/unit/test_card.py               | 10 ++--
 tests/unit/test_results.py            | 81 ++++++++++-----------------
 6 files changed, 105 insertions(+), 146 deletions(-)

diff --git a/runner/harness/card.py b/runner/harness/card.py
index 1cee8a6..e57d517 100644
--- a/runner/harness/card.py
+++ b/runner/harness/card.py
@@ -79,27 +79,27 @@ def render_badge_svg(label: str, message: str, color: str) -> str:
     )
 
 
-# Third-segment colours for the level badge: amber = an UNEXPECTED skip (undeclared gap-sensitive
-# N/A — likely missing coverage) capped the climb; muted = an EXPECTED skip (declared intentional
-# N/A — reviewed, nothing to fix). Font-safe text labels (no emoji) so the SVG renders anywhere.
+# Third-segment colours for the level badge: amber = an UNINTENTIONAL skip (a rung skipped but not
+# in the recipe's intentional list — likely missing coverage) capped the climb; muted = an
+# INTENTIONAL skip (declared in recipe_meta.EXPECTED_NA — nothing to fix). Font-safe text labels
+# (no emoji) so the SVG renders anywhere.
 GAP_COLOR = "#d29922"
 EXPECT_COLOR = "#6e7681"
 
 
-def level_badge_svg(level: int, cap_reason: str = "", cap_intent: str = "") -> str:
+def level_badge_svg(level: int, cap_reason: str = "", cap_skip: str = "") -> str:
     """Per-recipe/-run LEVEL badge: 'cc-ci | level N' coloured by level (R6), with a THIRD segment
-    that differentiates *why* the climb stopped when an N/A capped it:
-      - undeclared gap-sensitive N/A (an UNEXPECTED skip — likely missing coverage): amber 'gap?'.
-      - declared intentional N/A (an EXPECTED skip — reviewed, nothing to fix): muted 'expected'.
-      - clean cap / full climb / a real failure: no third segment (the level + card carry it).
-    Derived from `cap_intent` (results.level_cap_intent) so the badge never inflates — it only
-    annotates the cap the level already reflects."""
+    that differentiates *why* the climb stopped when a SKIP capped it (`cap_skip`):
+      - "unintentional" (a rung skipped but not in the recipe's intentional list): amber 'gap?'.
+      - "intentional"   (a skip declared in recipe_meta.EXPECTED_NA): muted 'expected'.
+      - "" (clean cap / full climb / a real failure): no third segment (the level + card carry it).
+    The badge never inflates — it only annotates the cap the level already reflects."""
     label, msg = "cc-ci", f"level {int(level)}"
     lw, mw = _text_width(label), _text_width(msg)
     third: tuple[str, str] | None = None
-    if cap_intent.startswith("undeclared"):
+    if cap_skip == "unintentional":
         third = ("gap?", GAP_COLOR)
-    elif cap_intent.startswith("intentional"):
+    elif cap_skip == "intentional":
         third = ("expected", EXPECT_COLOR)
     if third is None:
         return render_badge_svg(label, msg, level_color(level))
@@ -151,8 +151,15 @@ def render_card_html(data: dict, screenshot_rel: str | None = "screenshot.png")
     version = html.escape(str(data.get("version") or data.get("ref") or ""))
     level = int(data.get("level", 0))
     cap_reason = str(data.get("level_cap_reason") or "")
-    cap_intent = str(data.get("level_cap_intent") or "")
-    cap = html.escape(cap_reason + (f" · {cap_intent}" if cap_intent else ""))
+    # Annotate the cap line by whether the capping rung was an intentional skip (declared, with its
+    # reason) or an unintentional one (skipped but not declared).
+    capped = data.get("level_cap_rung")
+    sk = data.get("skips", {}) or {}
+    if capped and capped in (sk.get("intentional") or {}):
+        cap_reason += f" · intentional: {sk['intentional'][capped]}"
+    elif capped and capped in (sk.get("unintentional") or []):
+        cap_reason += " · unintentional skip (no EXPECTED_NA — add a test or declare it)"
+    cap = html.escape(cap_reason)
     color = level_color(level)
     flags = data.get("flags", {}) or {}
     flag_bits = []
diff --git a/runner/harness/results.py b/runner/harness/results.py
index 3e6fed7..ec7ca7b 100644
--- a/runner/harness/results.py
+++ b/runner/harness/results.py
@@ -2,7 +2,14 @@
 
 Turns a run's per-tier pytest outcomes into a single `results.json` artifact carrying, per the plan:
   { recipe, version, pr, ref, run_id, finished, stages:[{name,status,tests:[{name,status,ms}]}],
-    level, level_cap_reason, rungs, flags:{clean_teardown,no_secret_leak}, screenshot, summary_card }
+    level, level_cap_reason, level_cap_rung, rungs,
+    skips:{intentional:{rung:reason}, unintentional:[rung]},
+    flags:{clean_teardown,no_secret_leak}, screenshot, summary_card }
+
+`skips` splits the N/A (skipped) rungs by a simple rule: a skip is INTENTIONAL iff the recipe lists
+it (with a reason) in `recipe_meta.EXPECTED_NA = {rung: reason}`; any rung skipped but not listed is
+UNINTENTIONAL (a coverage gap to fill or declare). Skips still cap the level either way — the harness
+never claims a rung it did not verify; this only labels *why* a skip happened.
 
 The per-test breakdown comes from JUnit XML emitted by each tier's pytest invocation (`--junitxml`),
 parsed here with the stdlib (no new dep). The integer **level** is computed by harness.level from a
@@ -200,54 +207,23 @@ def derive_rungs(
     return rungs
 
 
-# Rungs where an *undeclared* N/A is suspicious — it usually means a recipe SHOULD have this coverage
-# but nobody added it (a backup label, a functional test), i.e. an accidental gap rather than a real
-# property of the recipe. For these, an undeclared N/A is surfaced as a "possible coverage gap" unless
-# the recipe declares it intentional via recipe_meta.EXPECTED_NA. The other rungs (upgrade — only one
-# published version; integration — no SSO surface; recipe_local — no repo-local tests) are
-# *structurally* optional: an N/A there is the normal case and is not flagged.
-GAP_SENSITIVE_RUNGS = ("backup_restore", "functional")
+def skips(rungs: dict[str, str], expected_na: dict | None) -> dict:
+    """Split the SKIPPED (N/A) rungs into intentional vs unintentional (operator model).
 
-
-def classify_na(rungs: dict[str, str], expected_na: dict | None) -> dict:
-    """Distinguish *intentionally* N/A rungs from *accidentally* missing ones (operator request).
-
-    A recipe declares intentional N/A in `recipe_meta.EXPECTED_NA = {rung: reason}`. N/A always caps
-    the level either way (the harness never inflates — a rung that wasn't verified wasn't verified);
-    this only EXPLAINS the cap so a reviewer can tell "this recipe legitimately has no backup surface"
-    from "someone forgot to add the backup test". Returns:
-      { "rungs": {rung: {"intent": "declared"|"undeclared", "reason": str}},  # one per N/A rung
-        "gaps": [rung, ...],            # gap-sensitive rungs that are N/A and NOT declared
-        "stale_declared": [rung, ...] } # rungs declared N/A but actually exercised (stale opt-out)
+    A recipe lists the rungs it intentionally skips, each with a reason, in
+    `recipe_meta.EXPECTED_NA = {rung: reason}`. The rule is dead simple: a skipped rung is
+    **intentional** iff it is in that list; any rung that is skipped and NOT in the list is
+    **unintentional** (a coverage gap someone should either fill or declare). N/A still caps the
+    level either way — the harness never claims a rung it did not verify — this only labels *why* a
+    skip happened. Returns:
+      { "intentional": {rung: reason, ...},   # skipped AND declared in EXPECTED_NA
+        "unintentional": [rung, ...] }         # skipped but NOT declared
     """
     expected = {str(k): str(v) for k, v in (expected_na or {}).items()}
-    na: dict[str, dict] = {}
-    for rung, st in rungs.items():
-        if st != "na":
-            continue
-        if rung in expected:
-            na[rung] = {"intent": "declared", "reason": expected[rung]}
-        else:
-            na[rung] = {"intent": "undeclared", "reason": ""}
-    gaps = [r for r in GAP_SENSITIVE_RUNGS if na.get(r, {}).get("intent") == "undeclared"]
-    stale = sorted(r for r in expected if rungs.get(r) not in (None, "na"))
-    return {"rungs": na, "gaps": gaps, "stale_declared": stale}
-
-
-def cap_intent(rungs: dict[str, str], level: int, cap_reason: str, na_info: dict) -> str:
-    """A short clause explaining the level cap when the capping rung is N/A: the declared reason if
-    intentional, a 'possible coverage gap' note if it's an undeclared gap-sensitive rung, else ''."""
-    if not cap_reason:
-        return ""
-    capped = level_mod.RUNGS[level] if 0 <= level < len(level_mod.RUNGS) else None
-    if not capped or rungs.get(capped) != "na":
-        return ""
-    entry = na_info["rungs"].get(capped, {})
-    if entry.get("intent") == "declared":
-        return f"intentional · {entry['reason']}"
-    if capped in GAP_SENSITIVE_RUNGS:
-        return "undeclared N/A — possible coverage gap (add a test or declare EXPECTED_NA)"
-    return ""
+    na = [r for r, st in rungs.items() if st == "na"]
+    intentional = {r: expected[r] for r in na if r in expected}
+    unintentional = sorted(r for r in na if r not in expected)
+    return {"intentional": intentional, "unintentional": unintentional}
 
 
 def build_results(
@@ -286,8 +262,9 @@ def build_results(
         repo_local_passed=_repo_local_passed(records),
     )
     lvl, cap_reason = level_mod.compute_level(rungs)
-    na_info = classify_na(rungs, expected_na)
-    intent = cap_intent(rungs, lvl, cap_reason, na_info)
+    # The rung that capped the climb (lowest non-pass), or None on a full climb — lets a consumer
+    # (card/badge) tell whether the cap was an intentional skip, an unintentional one, or a failure.
+    capped = level_mod.RUNGS[lvl] if cap_reason else None
     return {
         "schema": 1,
         "run_id": run_id(),
@@ -298,9 +275,9 @@ def build_results(
         "finished": finished_ts,
         "level": lvl,
         "level_cap_reason": cap_reason,
-        "level_cap_intent": intent,
+        "level_cap_rung": capped,
         "rungs": rungs,
-        "na": na_info,
+        "skips": skips(rungs, expected_na),
         "stages": stages,
         "results": results,
         "flags": {
diff --git a/runner/run_recipe_ci.py b/runner/run_recipe_ci.py
index b10966a..d187cae 100644
--- a/runner/run_recipe_ci.py
+++ b/runner/run_recipe_ci.py
@@ -1254,27 +1254,18 @@ def main() -> int:
                 file=sys.stderr,
             )
         path = results_mod.write_results(data)
-        intent = data.get("level_cap_intent") or ""
         print(
             f"results.json written: {path} (level={data['level']}"
-            f"{' — ' + data['level_cap_reason'] if data['level_cap_reason'] else ''}"
-            f"{' [' + intent + ']' if intent else ''})",
+            f"{' — ' + data['level_cap_reason'] if data['level_cap_reason'] else ''})",
             flush=True,
         )
-        # Surface the intentional-vs-accidental N/A signal in the CI log (non-blocking, R7): a
-        # gap-sensitive rung that is N/A but undeclared is a possible coverage hole; a stale
-        # EXPECTED_NA declares a tier N/A that actually ran.
-        na = data.get("na", {})
-        for rung in na.get("gaps", []):
+        # Surface UNINTENTIONAL skips in the CI log (non-blocking, R7): a rung that was skipped (N/A)
+        # but is not in the recipe's intentional list — either add the missing coverage or declare it.
+        for rung in data.get("skips", {}).get("unintentional", []):
             print(
-                f"⚠ coverage: rung '{rung}' is N/A but not declared intentional — add a test or "
-                f"declare it in tests/{recipe}/recipe_meta.py EXPECTED_NA = {{'{rung}': '<why>'}}.",
-                flush=True,
-            )
-        for rung in na.get("stale_declared", []):
-            print(
-                f"⚠ stale EXPECTED_NA: rung '{rung}' is declared N/A but was actually exercised "
-                f"(status={data['rungs'].get(rung)}) — remove it from recipe_meta.EXPECTED_NA.",
+                f"⚠ coverage: rung '{rung}' was skipped (N/A) but is not declared intentional — add "
+                f"the missing test/label, or list it in tests/{recipe}/recipe_meta.py "
+                f"EXPECTED_NA = {{'{rung}': '<why>'}}.",
                 flush=True,
             )
     except Exception as e:  # noqa: BLE001 — results assembly is cosmetic; never fail a run on it (R7)
@@ -1295,12 +1286,17 @@ def main() -> int:
             with open(html_path, "w", encoding="utf-8") as f:
                 f.write(card_mod.render_card_html(data, screenshot_rel=data.get("screenshot")))
             png = card_mod.render_card_png(html_path, os.path.join(run_artifact_dir, "summary.png"))
+            capped = data.get("level_cap_rung")
+            sk = data.get("skips", {})
+            cap_skip = (
+                "intentional" if capped in (sk.get("intentional") or {})
+                else "unintentional" if capped in (sk.get("unintentional") or [])
+                else ""
+            )
             with open(os.path.join(run_artifact_dir, "badge.svg"), "w", encoding="utf-8") as f:
                 f.write(
                     card_mod.level_badge_svg(
-                        data["level"],
-                        data.get("level_cap_reason", ""),
-                        data.get("level_cap_intent", ""),
+                        data["level"], data.get("level_cap_reason", ""), cap_skip
                     )
                 )
             print(
diff --git a/tests/custom-html-tiny/recipe_meta.py b/tests/custom-html-tiny/recipe_meta.py
index 25aac26..509317e 100644
--- a/tests/custom-html-tiny/recipe_meta.py
+++ b/tests/custom-html-tiny/recipe_meta.py
@@ -4,14 +4,14 @@
 DEPLOY_TIMEOUT = 120
 HTTP_TIMEOUT = 90
 
-# Intentionally-N/A tiers (reviewed opt-out, NOT a coverage gap). custom-html-tiny is a stateless
-# static-web-server: it serves an ephemeral `content` volume that the harness seeds at deploy time
-# (install_steps.sh) and holds no persistent or user data, so there is nothing to back up or restore.
-# The recipe therefore declares no `backupbot.backup` label and the L3 backup/restore rung is N/A.
-# Declaring it here marks that N/A as deliberate, so the run is annotated "intentional" instead of
-# being flagged as a possible missing-coverage gap. (N/A still caps the level — the harness never
-# claims a rung it did not verify; this only explains *why* the cap is expected.)
+# Rungs this recipe INTENTIONALLY skips, each with a reason. Any rung that is skipped (N/A) and is
+# NOT listed here is reported as an *unintentional* skip (a coverage gap to fill or declare). A skip
+# still caps the level either way — the harness never claims a rung it did not verify; this only
+# records that the skip is deliberate. custom-html-tiny is a stateless static-web-server, so:
 EXPECTED_NA = {
     "backup_restore": "stateless static file server: serves an ephemeral content volume seeded at "
     "deploy, with no persistent/user data to back up or restore (no backupbot.backup label)",
+    "integration": "no SSO/OIDC or cross-app surface — a static file server has no auth integration",
+    "recipe_local": "the upstream recipe ships no tests/ of its own; coverage is the cc-ci generic "
+    "install tier + the functional serve test",
 }
diff --git a/tests/unit/test_card.py b/tests/unit/test_card.py
index 1abce04..7862038 100644
--- a/tests/unit/test_card.py
+++ b/tests/unit/test_card.py
@@ -55,13 +55,13 @@ def test_badge_svg_wellformed():
     assert "expected" not in svg and "gap?" not in svg
 
 
-def test_badge_svg_differentiates_expected_vs_unexpected_skip():
-    # declared intentional N/A capped the climb → muted "expected" third segment
-    exp = C.level_badge_svg(2, "L3 backup/restore N/A", "intentional · no persistent data")
+def test_badge_svg_differentiates_intentional_vs_unintentional_skip():
+    # an intentional (declared) skip capped the climb → muted "expected" third segment
+    exp = C.level_badge_svg(2, "L3 backup/restore N/A", "intentional")
     assert "level 2" in exp and "expected" in exp and C.EXPECT_COLOR in exp
     assert "gap?" not in exp
-    # undeclared gap-sensitive N/A → amber "gap?" third segment (an UNEXPECTED skip)
-    gap = C.level_badge_svg(2, "L3 backup/restore N/A", "undeclared N/A — possible coverage gap")
+    # an unintentional skip (not declared) → amber "gap?" third segment
+    gap = C.level_badge_svg(2, "L3 backup/restore N/A", "unintentional")
     assert "level 2" in gap and "gap?" in gap and C.GAP_COLOR in gap
     assert "expected" not in gap
 
diff --git a/tests/unit/test_results.py b/tests/unit/test_results.py
index 9da1aff..f034911 100644
--- a/tests/unit/test_results.py
+++ b/tests/unit/test_results.py
@@ -257,7 +257,7 @@ def test_build_results_capped_at_L1_on_upgrade_fail(tmp_path):
     assert "L2" in data["level_cap_reason"]
 
 
-# ---- classify_na / cap_intent: intentional-vs-accidental N/A (operator request) ----
+# ---- skips: intentional (declared) vs unintentional (everything else skipped) ----
 
 
 def _rungs(**kw):
@@ -273,57 +273,32 @@ def _rungs(**kw):
     return base
 
 
-def test_classify_na_declared_vs_undeclared():
+def test_skips_intentional_vs_unintentional():
     rungs = _rungs(backup_restore="na", functional="na")
-    info = R.classify_na(rungs, {"backup_restore": "stateless static server"})
-    # backup_restore is declared intentional; functional is an undeclared gap-sensitive N/A.
-    assert info["rungs"]["backup_restore"] == {
-        "intent": "declared",
-        "reason": "stateless static server",
-    }
-    assert info["rungs"]["functional"]["intent"] == "undeclared"
-    assert info["gaps"] == ["functional"]  # backup_restore declared → not a gap
-    assert info["stale_declared"] == []
-    # structurally-optional N/A (integration, recipe_local) are recorded but never flagged as gaps.
-    assert info["rungs"]["integration"]["intent"] == "undeclared"
-    assert "integration" not in info["gaps"]
+    sk = R.skips(rungs, {"backup_restore": "stateless static server"})
+    # backup_restore is declared (intentional, with reason); everything else skipped is unintentional.
+    assert sk["intentional"] == {"backup_restore": "stateless static server"}
+    assert sk["unintentional"] == ["functional", "integration", "recipe_local"]
 
 
-def test_classify_na_stale_declaration():
-    # backup_restore actually ran (pass) but is declared N/A → stale opt-out, surfaced.
+def test_skips_none_declared_all_unintentional():
+    rungs = _rungs(backup_restore="na")
+    sk = R.skips(rungs, None)
+    assert sk["intentional"] == {}
+    assert sk["unintentional"] == ["backup_restore", "integration", "recipe_local"]
+
+
+def test_skips_declaration_only_counts_when_actually_skipped():
+    # backup_restore actually ran (pass) → not a skip, so a declaration for it is simply inert.
     rungs = _rungs(backup_restore="pass")
-    info = R.classify_na(rungs, {"backup_restore": "stale reason"})
-    assert info["stale_declared"] == ["backup_restore"]
-    assert "backup_restore" not in info["rungs"]  # not N/A, so not in the per-rung N/A map
-
-
-def test_cap_intent_declared_explains_cap():
-    # install+upgrade pass, backup_restore declared-N/A → caps at L2 with an intentional clause.
-    rungs = _rungs(backup_restore="na")
-    info = R.classify_na(rungs, {"backup_restore": "no persistent data"})
-    intent = R.cap_intent(rungs, 2, "L3 backup/restore (data integrity) N/A", info)
-    assert intent == "intentional · no persistent data"
-
-
-def test_cap_intent_undeclared_gap():
-    rungs = _rungs(backup_restore="na")
-    info = R.classify_na(rungs, None)
-    intent = R.cap_intent(rungs, 2, "L3 backup/restore (data integrity) N/A", info)
-    assert "possible coverage gap" in intent
-
-
-def test_cap_intent_blank_when_not_capped_on_na():
-    rungs = _rungs()  # full clean climb, capped only at integration (na, structurally optional)
-    info = R.classify_na(rungs, None)
-    # capping rung is integration (level 4) — structurally optional, so no intent clause.
-    assert R.cap_intent(rungs, 4, "L5 integration N/A", info) == ""
-    # and no cap at all → blank.
-    assert R.cap_intent(rungs, 6, "", info) == ""
+    sk = R.skips(rungs, {"backup_restore": "reason"})
+    assert "backup_restore" not in sk["intentional"]
+    assert "backup_restore" not in sk["unintentional"]
 
 
 def test_build_results_threads_expected_na(tmp_path):
     # Mirrors custom-html-tiny post-change: install + a passing functional (custom) test, but no
-    # backup surface (backup_restore declared intentionally N/A).
+    # backup surface (backup_restore declared intentionally skipped).
     recs = [
         {
             "tier": "install",
@@ -347,23 +322,27 @@ def test_build_results_threads_expected_na(tmp_path):
         ref=None,
         records=recs,
         results=_results(backup="skip", restore="skip"),  # custom=pass (default) → functional pass
-        backup_capable=False,  # no backupbot label → backup_restore N/A
+        backup_capable=False,  # no backupbot label → backup_restore skipped (N/A)
         declared=[],
         deps_ready=True,
         sso_unverified=False,
         clean_teardown=True,
         no_secret_leak=True,
         finished_ts=0.0,
-        expected_na={"backup_restore": "stateless static file server"},
+        expected_na={
+            "backup_restore": "stateless static file server",
+            "integration": "no SSO surface",
+            "recipe_local": "no upstream tests/",
+        },
     )
-    # backup_restore N/A still caps at L2 (never inflates) — even though functional passes above it,
-    # the gap caps the climb — but the cap is now annotated intentional rather than flagged.
+    # backup_restore skip still caps at L2 (never inflates) — even though functional passes above it,
+    # the skip caps the climb — but it's the declared (intentional) rung that capped.
     assert data["level"] == 2
     assert "L3" in data["level_cap_reason"]
-    assert data["level_cap_intent"] == "intentional · stateless static file server"
-    assert data["na"]["rungs"]["backup_restore"]["intent"] == "declared"
+    assert data["level_cap_rung"] == "backup_restore"
     assert data["rungs"]["functional"] == "pass"
-    assert data["na"]["gaps"] == []  # functional now covered; backup_restore declared → no gaps
+    assert data["skips"]["intentional"]["backup_restore"] == "stateless static file server"
+    assert data["skips"]["unintentional"] == []  # every skip accounted for → fully clean
 
 
 def test_write_results_roundtrip(tmp_path):
-- 
2.49.0


From d20ad1e989b8c59ca51bd67fa2a3bf193afed4e1 Mon Sep 17 00:00:00 2001
From: autonomic-bot <autonomic-bot@git.autonomic.zone>
Date: Tue, 9 Jun 2026 02:42:05 +0000
Subject: [PATCH 5/7] =?UTF-8?q?feat(card):=20show=20skipped=20rungs=20as?=
 =?UTF-8?q?=20rows=20=E2=80=94=20INTENTIONAL=20SKIP=20(green)=20with=20rea?=
 =?UTF-8?q?son=20below?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Per operator: intentional skips now render like a pass row but labelled
'INTENTIONAL SKIP' (muted green) with the declared reason on the line beneath;
unintentional skips render amber 'UNINTENTIONAL SKIP' with a prompt to add a test
or declare them. The cap line is back to just the level-cap reason (the per-rung
reason now lives in the rows). Labelled, so it never reads as a PASS.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 runner/harness/card.py | 49 ++++++++++++++++++++++++++++++++++--------
 1 file changed, 40 insertions(+), 9 deletions(-)

diff --git a/runner/harness/card.py b/runner/harness/card.py
index e57d517..21418dc 100644
--- a/runner/harness/card.py
+++ b/runner/harness/card.py
@@ -141,6 +141,43 @@ def _stage_rows(stages: list[dict]) -> str:
     return "\n".join(rows) or '<tr><td colspan="3">no stages</td></tr>'
 
 
+# Friendly rung labels for the skip rows.
+RUNG_LABEL = {
+    "install": "install",
+    "upgrade": "upgrade",
+    "backup_restore": "backup/restore",
+    "functional": "functional",
+    "integration": "integration",
+    "recipe_local": "recipe-local",
+}
+SKIP_GREEN = "#57ab5a"  # muted green — an intentional skip reads like a pass (but labelled, never inflating)
+
+
+def _skip_rows(skips: dict) -> str:
+    """Render SKIPPED rungs as stage-like rows. An intentional (declared) skip looks like a pass row
+    but its status says 'INTENTIONAL SKIP' (muted green) with the declared reason on the line below;
+    an unintentional skip is amber 'UNINTENTIONAL SKIP' with a prompt to add a test or declare it."""
+    rows = []
+    for rung, reason in (skips.get("intentional") or {}).items():
+        rows.append(
+            f'<tr class="stage"><td colspan="2"><span class="mark" style="color:{SKIP_GREEN}">⊘</span>'
+            f'<b>{html.escape(RUNG_LABEL.get(rung, rung))}</b></td>'
+            f'<td class="st" style="color:{SKIP_GREEN}">intentional skip</td></tr>'
+        )
+        rows.append(f'<tr class="skipreason"><td></td><td colspan="2">{html.escape(reason)}</td></tr>')
+    for rung in skips.get("unintentional") or []:
+        rows.append(
+            f'<tr class="stage"><td colspan="2"><span class="mark" style="color:{GAP_COLOR}">⊘</span>'
+            f'<b>{html.escape(RUNG_LABEL.get(rung, rung))}</b></td>'
+            f'<td class="st" style="color:{GAP_COLOR}">unintentional skip</td></tr>'
+        )
+        rows.append(
+            '<tr class="skipreason"><td></td><td colspan="2">not declared in EXPECTED_NA — add the '
+            "missing test/label, or declare the skip with a reason</td></tr>"
+        )
+    return "\n".join(rows)
+
+
 def render_card_html(data: dict, screenshot_rel: str | None = "screenshot.png") -> str:
     """Build the summary-card HTML from a results.json dict. `screenshot_rel` is the relative path to
     the screenshot PNG (same dir as the card) — omitted from the card if None / absent.
@@ -151,15 +188,8 @@ def render_card_html(data: dict, screenshot_rel: str | None = "screenshot.png")
     version = html.escape(str(data.get("version") or data.get("ref") or ""))
     level = int(data.get("level", 0))
     cap_reason = str(data.get("level_cap_reason") or "")
-    # Annotate the cap line by whether the capping rung was an intentional skip (declared, with its
-    # reason) or an unintentional one (skipped but not declared).
-    capped = data.get("level_cap_rung")
-    sk = data.get("skips", {}) or {}
-    if capped and capped in (sk.get("intentional") or {}):
-        cap_reason += f" · intentional: {sk['intentional'][capped]}"
-    elif capped and capped in (sk.get("unintentional") or []):
-        cap_reason += " · unintentional skip (no EXPECTED_NA — add a test or declare it)"
     cap = html.escape(cap_reason)
+    sk = data.get("skips", {}) or {}
     color = level_color(level)
     flags = data.get("flags", {}) or {}
     flag_bits = []
@@ -175,7 +205,7 @@ def render_card_html(data: dict, screenshot_rel: str | None = "screenshot.png")
         if show_shot
         else '<div class="shot noshot">no screenshot</div>'
     )
-    rows = _stage_rows(data.get("stages", []))
+    rows = _stage_rows(data.get("stages", [])) + "\n" + _skip_rows(sk)
     return f"""<!doctype html><html><head><meta charset="utf-8"><style>
 *{{box-sizing:border-box}}
 body{{margin:0;font-family:system-ui,-apple-system,Segoe UI,sans-serif;background:#0d1117;color:#c9d1d9}}
@@ -200,6 +230,7 @@ tr.stage td{{padding-top:.5rem;border-bottom:1px solid #30363d}}
 .test .tmark{{width:1.4rem;text-align:center}}
 .test .tname{{color:#c9d1d9;font-family:ui-monospace,monospace;font-size:.8rem}}
 .test .tms{{text-align:right;color:#8b949e;font-size:.74rem;width:5rem}}
+tr.skipreason td{{color:#8b949e;font-size:.78rem;font-style:italic;padding-top:0;padding-bottom:.45rem;border-bottom:1px solid #21262d}}
 .shot{{width:360px;flex:none;border:1px solid #30363d;border-radius:8px;overflow:hidden;background:#0d1117}}
 .shot img{{width:100%;display:block}}
 .shot.noshot{{display:flex;align-items:center;justify-content:center;height:225px;color:#8b949e;font-size:.85rem}}
-- 
2.49.0


From 39803407275585ae679e62e0e5a95d2ef5570c56 Mon Sep 17 00:00:00 2001
From: autonomic-bot <autonomic-bot@git.autonomic.zone>
Date: Tue, 9 Jun 2026 02:42:57 +0000
Subject: [PATCH 6/7] test(card): cover _skip_rows (intentional green /
 unintentional amber)

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 tests/unit/test_card.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/tests/unit/test_card.py b/tests/unit/test_card.py
index 7862038..f6ea3ac 100644
--- a/tests/unit/test_card.py
+++ b/tests/unit/test_card.py
@@ -66,6 +66,22 @@ def test_badge_svg_differentiates_intentional_vs_unintentional_skip():
     assert "expected" not in gap
 
 
+def test_skip_rows_intentional_and_unintentional():
+    html_out = C._skip_rows(
+        {"intentional": {"backup_restore": "no persistent data"}, "unintentional": ["functional"]}
+    )
+    # intentional skip: labelled row (muted green) + the reason on its own line
+    assert "intentional skip" in html_out and C.SKIP_GREEN in html_out
+    assert "backup/restore" in html_out and "no persistent data" in html_out
+    # unintentional skip: amber row + prompt to declare/add coverage
+    assert "unintentional skip" in html_out and C.GAP_COLOR in html_out
+    assert "functional" in html_out and "EXPECTED_NA" in html_out
+
+
+def test_skip_rows_empty_when_no_skips():
+    assert C._skip_rows({"intentional": {}, "unintentional": []}) == ""
+
+
 def test_card_html_reports_level_verbatim():
     html = C.render_card_html(_data(level=2, cap="L3 backup/restore (data integrity) N/A"))
     assert "uptime-kuma" in html
-- 
2.49.0


From 46e2cdb93e8036a625a8693adc4389dc3882114a Mon Sep 17 00:00:00 2001
From: autonomic-bot <autonomic-bot@git.autonomic.zone>
Date: Tue, 9 Jun 2026 02:55:47 +0000
Subject: [PATCH 7/7] =?UTF-8?q?refactor(level):=20four=20essential=20rungs?=
 =?UTF-8?q?=20only=20=E2=80=94=20integration=20&=20recipe-local=20are=20op?=
 =?UTF-8?q?tional?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Per operator: the level ladder is now the FOUR essential rungs every recipe is
held to — install, upgrade (essential), backup/restore, functional (top = L4).
Integration (SSO/OIDC) and recipe-local are OPTIONAL capabilities: they no longer
appear as level rungs or skip rows and never cap the level. SSO is still enforced
for the run VERDICT (unchanged in run_recipe_ci.py); it just doesn't affect the
level. derive_rungs simplified accordingly (drops declared/deps/sso/repo-local
inputs). custom-html-tiny's EXPECTED_NA is back to just backup_restore.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 runner/harness/card.py                |  6 +-
 runner/harness/level.py               | 32 ++++-----
 runner/harness/results.py             | 68 +++----------------
 runner/run_recipe_ci.py               |  6 +-
 tests/custom-html-tiny/recipe_meta.py | 13 ++--
 tests/unit/test_card.py               |  2 +-
 tests/unit/test_dashboard.py          |  2 +-
 tests/unit/test_level.py              | 41 ++++-------
 tests/unit/test_results.py            | 97 +++++----------------------
 9 files changed, 63 insertions(+), 204 deletions(-)

diff --git a/runner/harness/card.py b/runner/harness/card.py
index 21418dc..6f44d2a 100644
--- a/runner/harness/card.py
+++ b/runner/harness/card.py
@@ -141,14 +141,12 @@ def _stage_rows(stages: list[dict]) -> str:
     return "\n".join(rows) or '<tr><td colspan="3">no stages</td></tr>'
 
 
-# Friendly rung labels for the skip rows.
+# Friendly rung labels for the skip rows (the four essential rungs).
 RUNG_LABEL = {
     "install": "install",
     "upgrade": "upgrade",
     "backup_restore": "backup/restore",
     "functional": "functional",
-    "integration": "integration",
-    "recipe_local": "recipe-local",
 }
 SKIP_GREEN = "#57ab5a"  # muted green — an intentional skip reads like a pass (but labelled, never inflating)
 
@@ -241,7 +239,7 @@ tr.skipreason td{{color:#8b949e;font-size:.78rem;font-style:italic;padding-top:0
 <div class="hd">{FLOWER_SVG}
 <div class="title"><h1>{recipe}</h1><span class="ver">{version}</span></div>
 <div class="lvl"><span class="num">{level}</span><span class="lbl">level</span></div></div>
-<div class="cap">{("<b>capped:</b> " + cap) if cap else "<b>full clean climb</b> — top level (6)"}</div>
+<div class="cap">{("<b>capped:</b> " + cap) if cap else "<b>full clean climb</b> — top level (4)"}</div>
 <div class="body"><div class="tbl"><table>{rows}</table></div>{shot_html}</div>
 <div class="flags">{"".join(flag_bits)}</div>
 </div></body></html>"""
diff --git a/runner/harness/level.py b/runner/harness/level.py
index f88d802..46f5ba6 100644
--- a/runner/harness/level.py
+++ b/runner/harness/level.py
@@ -5,37 +5,39 @@ YunoHost semantics: **a gap caps the level** — you only earn level L if every
 PASS. The first rung that is not a clean PASS (a real FAIL *or* genuinely N/A for this recipe) stops
 the climb; `cap_reason` records why. This is deliberately conservative: presentation must NEVER make
 a run look greener than its tests (plan §6 cardinal guardrail), so an N/A rung caps just like a fail
-(the L5 example in §4.1 — "recipes with no integration surface cap at L4 by definition" — is exactly
-this: N/A caps, with a recorded reason so the level is *fair*, not inflated).
+— with a recorded reason so the level is *fair*, not inflated.
 
-The ladder (§4.1):
+The ladder is the FOUR essential rungs every recipe is held to:
   L0 — install failed / app never became healthy.
   L1 — Installs: deploys + passes health/readiness.
   L2 — Upgrades: previous published version → PR version, stays healthy, data intact.
   L3 — Backup/restore: seeded data survives backup → wipe → restore.
   L4 — Functional: recipe-specific functional tests pass.
-  L5 — Integration: SSO/OIDC + cross-app integration tests pass.
-  L6 — Recipe-local: the recipe repo's own tests/ (D4) pass and are merged.
+
+Integration (SSO/OIDC + cross-app) and recipe-local (the recipe repo's own tests/) are **OPTIONAL**
+capabilities — they are NOT part of the level ladder and never cap it. They still run when present
+(and SSO is still enforced for the run VERDICT via the deps/SSO checks in run_recipe_ci.py), but a
+recipe without an SSO surface or without repo-local tests is simply not penalised on the level.
 
 This module is PURE (no I/O) so it is cheaply unit-testable and the Adversary can re-run the unit
 test cold (`cc-ci-run -m pytest tests/unit/test_level.py -q`). The orchestrator
-(`run_recipe_ci.py`) is responsible for translating its raw per-tier results + deps/SSO signals into
-the rung-status dict this function consumes; that mapping is documented in DECISIONS.md (Phase 3).
+(`run_recipe_ci.py`) is responsible for translating its raw per-tier results into the rung-status
+dict this function consumes; that mapping is documented in DECISIONS.md (Phase 3).
 
 Rung status vocabulary (each rung ∈ these three):
   "pass" — the rung was exercised and passed.
   "fail" — the rung was exercised and failed.
   "na"   — the rung does not apply to this recipe (e.g. only one published version → no upgrade;
-           not backup-capable; no SSO/integration surface; no recipe-local tests). N/A is NOT a
-           failure, but it DOES cap the climb (with a distinct cap_reason) so the level never
-           overstates what was actually verified.
+           not backup-capable). N/A is NOT a failure, but it DOES cap the climb (with a distinct
+           cap_reason) so the level never overstates what was actually verified.
 """
 
 from __future__ import annotations
 
 # The climbable rungs in ascending order. install (L1) is the foundation; L0 means install itself
-# did not pass. Each later rung requires every earlier rung to be a clean PASS.
-RUNGS = ("install", "upgrade", "backup_restore", "functional", "integration", "recipe_local")
+# did not pass. Each later rung requires every earlier rung to be a clean PASS. These four are the
+# ESSENTIAL rungs — integration/recipe-local are optional and deliberately NOT in this tuple.
+RUNGS = ("install", "upgrade", "backup_restore", "functional")
 
 # Human-readable label per rung level, for cap_reason + the summary card.
 RUNG_LABEL = {
@@ -43,22 +45,20 @@ RUNG_LABEL = {
     2: "upgrade (prev published → PR)",
     3: "backup/restore (data integrity)",
     4: "functional (recipe-specific tests)",
-    5: "integration (SSO/OIDC + cross-app)",
-    6: "recipe-local (recipe repo tests/)",
 }
 
 VALID = {"pass", "fail", "na"}
 
 
 def compute_level(rungs: dict[str, str]) -> tuple[int, str]:
-    """Map a rung-status dict → (level 0..6, cap_reason).
+    """Map a rung-status dict → (level 0..4, cap_reason).
 
     `rungs` must contain a status in {"pass","fail","na"} for every name in RUNGS. The level is the
     highest L such that rungs[1..L] are all "pass"; the first non-"pass" rung caps the climb. L0 is
     returned when the install rung itself is not "pass" (install failed / never healthy).
 
     cap_reason explains where the climb stopped:
-      - "" (empty) when the recipe earned the top rung (L6, full clean climb).
+      - "" (empty) when the recipe earned the top rung (L4, full clean climb).
       - "L<k> <label> FAILED" when a rung was exercised and failed.
       - "L<k> <label> N/A" when a rung does not apply to this recipe.
     Returns the reason for the FIRST rung that stopped the climb (the binding constraint).
diff --git a/runner/harness/results.py b/runner/harness/results.py
index ec7ca7b..910e827 100644
--- a/runner/harness/results.py
+++ b/runner/harness/results.py
@@ -134,41 +134,24 @@ def collect_stages(records: list[dict]) -> list[dict]:
     return stages
 
 
-def _has_repo_local(records: list[dict]) -> bool:
-    return any(r.get("source") == "repo-local" for r in records)
-
-
-def _repo_local_passed(records: list[dict]) -> bool:
-    repo = [r for r in records if r.get("source") == "repo-local"]
-    return bool(repo) and all(r.get("rc", 1) == 0 for r in repo)
-
-
 def derive_rungs(
     results: dict[str, str],
     *,
     backup_capable: bool,
-    declared: list[str] | None,
-    deps_ready: bool,
-    sso_unverified: bool,
     has_custom: bool,
-    has_repo_local: bool,
-    repo_local_passed: bool,
 ) -> dict[str, str]:
-    """Translate the orchestrator's tier results + deps/SSO signals into the rung-status dict
-    harness.level consumes. Documented in DECISIONS.md (Phase 3). Conservative by design — never
-    reports a rung 'pass' it can't substantiate (cardinal guardrail: presentation never inflates).
+    """Translate the orchestrator's tier results into the rung-status dict harness.level consumes —
+    the FOUR essential rungs only. Conservative by design — never reports a rung 'pass' it can't
+    substantiate (cardinal guardrail: presentation never inflates).
 
       L1 install    : install tier pass.
       L2 upgrade    : upgrade tier (skip → N/A: only one published version).
       L3 backup/res : backup AND restore tiers pass (N/A if not backup-capable).
-      L4 functional : the recipe-specific functional (non-deps) tests pass — the custom tier, minus
-                      its SSO/integration tests. N/A if the recipe has no custom tests at all.
-      L5 integration: SSO/OIDC + cross-app. Applies ONLY if the recipe declares deps (else N/A — the
-                      "no integration surface caps at L4" rule, §4.1). pass iff deps wired
-                      (deps_ready) and not sso_unverified and the custom tier didn't fail.
-      L6 recipe-loc : the recipe repo's own tests/ (repo-local source) ran and passed (N/A if none).
+      L4 functional : recipe-specific functional tests pass — the custom tier. N/A if none ran.
+
+    Integration (SSO/OIDC) and recipe-local are OPTIONAL and intentionally NOT rungs here — they
+    never cap the level (SSO is still enforced for the run VERDICT in run_recipe_ci.py).
     """
-    declared = declared or []
     rungs: dict[str, str] = {}
     rungs["install"] = level_mod.tier_to_rung(results.get("install"))
     rungs["upgrade"] = level_mod.tier_to_rung(results.get("upgrade"))
@@ -177,33 +160,12 @@ def derive_rungs(
     )
 
     custom = results.get("custom")
-    # Functional rung (L4): the non-deps custom tests.
     if not has_custom or custom == "skip" or custom is None:
         rungs["functional"] = "na"
     elif custom == "fail":
-        # A custom test failed. With declared deps we cannot cheaply tell functional-vs-SSO apart, so
-        # conservatively fail the functional rung (caps at L3) — never inflate.
         rungs["functional"] = "fail"
     else:  # custom == "pass"
         rungs["functional"] = "pass"
-
-    # Integration rung (L5): only recipes with an SSO/integration surface (declared deps) can climb.
-    if not declared:
-        rungs["integration"] = "na"
-    elif sso_unverified or not deps_ready or custom == "fail":
-        # SSO not wired/verified, or a custom test failed → integration not verified.
-        rungs["integration"] = "fail"
-    elif custom == "pass":
-        rungs["integration"] = "pass"
-    else:
-        # declared deps but no custom tests ran — can't claim integration verified
-        rungs["integration"] = "na"
-
-    # Recipe-local rung (L6).
-    if not has_repo_local:
-        rungs["recipe_local"] = "na"
-    else:
-        rungs["recipe_local"] = "pass" if repo_local_passed else "fail"
     return rungs
 
 
@@ -235,9 +197,6 @@ def build_results(
     records: list[dict],
     results: dict[str, str],
     backup_capable: bool,
-    declared: list[str] | None,
-    deps_ready: bool,
-    sso_unverified: bool,
     clean_teardown: bool,
     no_secret_leak: bool,
     finished_ts: float | None,
@@ -247,20 +206,11 @@ def build_results(
 ) -> dict:
     """Assemble the full results.json dict (no I/O). `finished_ts` is passed in (the orchestrator
     stamps it) so this stays pure and deterministic for unit tests. `expected_na` is the recipe's
-    declared intentional-N/A map (recipe_meta.EXPECTED_NA) used to distinguish a deliberate skip from
+    declared intentional-skip map (recipe_meta.EXPECTED_NA) used to distinguish a deliberate skip from
     accidentally-missing coverage."""
     stages = collect_stages(records)
     has_custom = any(r["tier"] == "custom" for r in records)
-    rungs = derive_rungs(
-        results,
-        backup_capable=backup_capable,
-        declared=declared,
-        deps_ready=deps_ready,
-        sso_unverified=sso_unverified,
-        has_custom=has_custom,
-        has_repo_local=_has_repo_local(records),
-        repo_local_passed=_repo_local_passed(records),
-    )
+    rungs = derive_rungs(results, backup_capable=backup_capable, has_custom=has_custom)
     lvl, cap_reason = level_mod.compute_level(rungs)
     # The rung that capped the climb (lowest non-pass), or None on a full climb — lets a consumer
     # (card/badge) tell whether the cap was an intentional skip, an unintentional one, or a failure.
diff --git a/runner/run_recipe_ci.py b/runner/run_recipe_ci.py
index d187cae..b948087 100644
--- a/runner/run_recipe_ci.py
+++ b/runner/run_recipe_ci.py
@@ -1225,7 +1225,6 @@ def main() -> int:
     # a failure here NEVER changes `overall` (R7 — cosmetics never block the pipeline). ----
     data: dict | None = None
     try:
-        sso_unverified = sso_dep_unverified(declared, deps_ready, requires_deps_skipped)
         clean_teardown = (deploy_count == expected_deploy_count) and not dep_teardown_error
         data = results_mod.build_results(
             recipe=recipe,
@@ -1235,14 +1234,11 @@ def main() -> int:
             records=records,
             results=results,
             backup_capable=backup_cap,
-            declared=declared,
-            deps_ready=deps_ready,
-            sso_unverified=sso_unverified,
             clean_teardown=clean_teardown,
             no_secret_leak=True,  # narrowed below by an actual scan of the serialised artifact
             screenshot=screenshot_rel,  # Phase 3 U1 (R4): relative PNG name iff capture succeeded
             finished_ts=time.time(),
-            expected_na=meta.get("EXPECTED_NA"),  # declared intentional-N/A map (recipe_meta)
+            expected_na=meta.get("EXPECTED_NA"),  # declared intentional-skip map (recipe_meta)
         )
         # Real (if narrow) leak check: no known infra-secret value may appear in the artifact (R7).
         blob = json.dumps(data)
diff --git a/tests/custom-html-tiny/recipe_meta.py b/tests/custom-html-tiny/recipe_meta.py
index 509317e..0c61bd8 100644
--- a/tests/custom-html-tiny/recipe_meta.py
+++ b/tests/custom-html-tiny/recipe_meta.py
@@ -4,14 +4,13 @@
 DEPLOY_TIMEOUT = 120
 HTTP_TIMEOUT = 90
 
-# Rungs this recipe INTENTIONALLY skips, each with a reason. Any rung that is skipped (N/A) and is
-# NOT listed here is reported as an *unintentional* skip (a coverage gap to fill or declare). A skip
-# still caps the level either way — the harness never claims a rung it did not verify; this only
-# records that the skip is deliberate. custom-html-tiny is a stateless static-web-server, so:
+# Rungs this recipe INTENTIONALLY skips, each with a reason. Any essential rung skipped (N/A) and NOT
+# listed here is reported as an *unintentional* skip (a coverage gap to fill or declare). A skip still
+# caps the level either way — the harness never claims a rung it did not verify; this only records
+# that the skip is deliberate. (The level ladder is the four essential rungs install/upgrade/
+# backup_restore/functional; integration + recipe-local are optional and not leveled.)
+# custom-html-tiny is a stateless static-web-server, so it has no backup surface:
 EXPECTED_NA = {
     "backup_restore": "stateless static file server: serves an ephemeral content volume seeded at "
     "deploy, with no persistent/user data to back up or restore (no backupbot.backup label)",
-    "integration": "no SSO/OIDC or cross-app surface — a static file server has no auth integration",
-    "recipe_local": "the upstream recipe ships no tests/ of its own; coverage is the cc-ci generic "
-    "install tier + the functional serve test",
 }
diff --git a/tests/unit/test_card.py b/tests/unit/test_card.py
index f6ea3ac..3cf4e4d 100644
--- a/tests/unit/test_card.py
+++ b/tests/unit/test_card.py
@@ -14,7 +14,7 @@ sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner")
 from harness import card as C  # noqa: E402
 
 
-def _data(level=4, cap="L5 integration (SSO/OIDC + cross-app) N/A"):
+def _data(level=3, cap="L4 functional (recipe-specific tests) N/A"):
     return {
         "recipe": "uptime-kuma",
         "version": "1.23.0",
diff --git a/tests/unit/test_dashboard.py b/tests/unit/test_dashboard.py
index 8d50d73..944f3e7 100644
--- a/tests/unit/test_dashboard.py
+++ b/tests/unit/test_dashboard.py
@@ -24,7 +24,7 @@ import dashboard  # noqa: E402
 def _row(**kw):
     base = {
         "recipe": "custom-html", "status": "success", "number": 4, "ref": "db9a9502",
-        "version": "db9a95024e9d", "level": 4, "level_cap_reason": "L5 integration N/A",
+        "version": "db9a95024e9d", "level": 4, "level_cap_reason": "",
         "has_screenshot": True, "flags": {"clean_teardown": True, "no_secret_leak": True},
         "finished": 0, "url": "https://drone.x/cc-ci/4",
     }
diff --git a/tests/unit/test_level.py b/tests/unit/test_level.py
index bb8994a..529e373 100644
--- a/tests/unit/test_level.py
+++ b/tests/unit/test_level.py
@@ -19,33 +19,23 @@ def _rungs(
     upgrade="pass",
     backup_restore="pass",
     functional="pass",
-    integration="pass",
-    recipe_local="pass",
 ):
     return {
         "install": install,
         "upgrade": upgrade,
         "backup_restore": backup_restore,
         "functional": functional,
-        "integration": integration,
-        "recipe_local": recipe_local,
     }
 
 
-# ---- the U0 gate: L4-pass and L2-cap ----
+# ---- the ladder: four essential rungs, top is L4 (functional) ----
 
 
-def test_full_clean_climb_to_L6():
+def test_full_clean_climb_to_L4():
+    # All four essential rungs pass → L4 (the top; integration/recipe-local are optional, not leveled).
     lvl, reason = L.compute_level(_rungs())
-    assert lvl == 6
-    assert reason == ""
-
-
-def test_climbs_through_L4_then_no_integration_surface_caps_at_L4():
-    # GATE: a recipe whose functional tests pass but has no SSO/integration surface caps at L4.
-    lvl, reason = L.compute_level(_rungs(integration="na", recipe_local="na"))
     assert lvl == 4
-    assert "L5" in reason and "N/A" in reason
+    assert reason == ""
 
 
 def test_fails_at_L2_capped_at_L1():
@@ -69,34 +59,27 @@ def test_install_fail_is_L0():
 
 def test_higher_pass_does_not_rescue_lower_na():
     # backup/restore N/A (stateless app) caps at L2 even though functional would pass.
-    lvl, reason = L.compute_level(_rungs(backup_restore="na", functional="pass", integration="na"))
+    lvl, reason = L.compute_level(_rungs(backup_restore="na", functional="pass"))
     assert lvl == 2
     assert "L3" in reason and "N/A" in reason
 
 
 def test_upgrade_na_caps_at_L1():
-    # only one published version → no upgrade possible → N/A caps at L1.
+    # only one published version → no upgrade possible → N/A caps at L1 (upgrade is essential).
     lvl, reason = L.compute_level(_rungs(upgrade="na"))
     assert lvl == 1
     assert "L2" in reason and "N/A" in reason
 
 
-def test_integration_fail_caps_at_L4():
-    # SSO declared but unverified (failed) → integration rung fails → cap at L4.
-    lvl, reason = L.compute_level(_rungs(integration="fail", recipe_local="na"))
-    assert lvl == 4
-    assert "L5" in reason and "FAILED" in reason
-
-
-def test_recipe_local_na_caps_at_L5():
-    # SSO passes but no recipe-local tests → cap at L5 (L6 N/A).
-    lvl, reason = L.compute_level(_rungs(recipe_local="na"))
-    assert lvl == 5
-    assert "L6" in reason and "N/A" in reason
+def test_functional_na_caps_at_L3():
+    # no recipe-specific functional tests → functional N/A caps at L3.
+    lvl, reason = L.compute_level(_rungs(functional="na"))
+    assert lvl == 3
+    assert "L4" in reason and "N/A" in reason
 
 
 def test_functional_fail_caps_at_L3():
-    lvl, reason = L.compute_level(_rungs(functional="fail", integration="na"))
+    lvl, reason = L.compute_level(_rungs(functional="fail"))
     assert lvl == 3
     assert "L4" in reason and "FAILED" in reason
 
diff --git a/tests/unit/test_results.py b/tests/unit/test_results.py
index f034911..f4059d6 100644
--- a/tests/unit/test_results.py
+++ b/tests/unit/test_results.py
@@ -105,83 +105,31 @@ def _results(**kw):
     return base
 
 
-def test_derive_rungs_full_stateful_sso():
-    rungs = R.derive_rungs(
-        _results(),
-        backup_capable=True,
-        declared=["keycloak"],
-        deps_ready=True,
-        sso_unverified=False,
-        has_custom=True,
-        has_repo_local=False,
-        repo_local_passed=False,
-    )
+def test_derive_rungs_full_climb_four_essential():
+    rungs = R.derive_rungs(_results(), backup_capable=True, has_custom=True)
+    # only the four essential rungs — integration/recipe-local are optional, not produced here.
     assert rungs == {
         "install": "pass",
         "upgrade": "pass",
         "backup_restore": "pass",
         "functional": "pass",
-        "integration": "pass",
-        "recipe_local": "na",
     }
 
 
-def test_derive_rungs_no_sso_surface_is_integration_na():
-    rungs = R.derive_rungs(
-        _results(),
-        backup_capable=True,
-        declared=[],
-        deps_ready=True,
-        sso_unverified=False,
-        has_custom=True,
-        has_repo_local=False,
-        repo_local_passed=False,
-    )
-    assert rungs["integration"] == "na"
-    assert rungs["functional"] == "pass"
-
-
-def test_derive_rungs_stateless_backup_na():
+def test_derive_rungs_stateless_backup_and_functional_na():
     rungs = R.derive_rungs(
         _results(backup="skip", restore="skip", custom="skip"),
         backup_capable=False,
-        declared=[],
-        deps_ready=True,
-        sso_unverified=False,
         has_custom=False,
-        has_repo_local=False,
-        repo_local_passed=False,
     )
     assert rungs["backup_restore"] == "na"
     assert rungs["functional"] == "na"
+    assert "integration" not in rungs and "recipe_local" not in rungs
 
 
-def test_derive_rungs_sso_unverified_is_integration_fail():
-    rungs = R.derive_rungs(
-        _results(),
-        backup_capable=True,
-        declared=["keycloak"],
-        deps_ready=False,
-        sso_unverified=True,
-        has_custom=True,
-        has_repo_local=False,
-        repo_local_passed=False,
-    )
-    assert rungs["integration"] == "fail"
-
-
-def test_derive_rungs_repo_local_pass():
-    rungs = R.derive_rungs(
-        _results(),
-        backup_capable=True,
-        declared=[],
-        deps_ready=True,
-        sso_unverified=False,
-        has_custom=True,
-        has_repo_local=True,
-        repo_local_passed=True,
-    )
-    assert rungs["recipe_local"] == "pass"
+def test_derive_rungs_functional_fail():
+    rungs = R.derive_rungs(_results(custom="fail"), backup_capable=True, has_custom=True)
+    assert rungs["functional"] == "fail"
 
 
 # ---- build_results: end-to-end incl level + flags ----
@@ -212,16 +160,13 @@ def test_build_results_level_and_flags(tmp_path):
         records=recs,
         results=_results(),
         backup_capable=True,
-        declared=[],
-        deps_ready=True,
-        sso_unverified=False,
         clean_teardown=True,
         no_secret_leak=True,
         finished_ts=1234.0,
     )
-    # stateful, functional pass, no SSO surface, no repo-local → caps at L4
+    # all four essential rungs pass → full climb to L4 (the top), no cap
     assert data["level"] == 4
-    assert "L5" in data["level_cap_reason"]
+    assert data["level_cap_reason"] == ""
     assert data["recipe"] == "hedgedoc"
     assert data["ref"] == "deadbeefcafe"
     assert data["flags"] == {"clean_teardown": True, "no_secret_leak": True}
@@ -246,9 +191,6 @@ def test_build_results_capped_at_L1_on_upgrade_fail(tmp_path):
         records=recs,
         results=_results(upgrade="fail"),
         backup_capable=True,
-        declared=[],
-        deps_ready=True,
-        sso_unverified=False,
         clean_teardown=True,
         no_secret_leak=True,
         finished_ts=0.0,
@@ -266,8 +208,6 @@ def _rungs(**kw):
         "upgrade": "pass",
         "backup_restore": "pass",
         "functional": "pass",
-        "integration": "na",
-        "recipe_local": "na",
     }
     base.update(kw)
     return base
@@ -276,16 +216,16 @@ def _rungs(**kw):
 def test_skips_intentional_vs_unintentional():
     rungs = _rungs(backup_restore="na", functional="na")
     sk = R.skips(rungs, {"backup_restore": "stateless static server"})
-    # backup_restore is declared (intentional, with reason); everything else skipped is unintentional.
+    # backup_restore is declared (intentional, with reason); functional skipped but not declared.
     assert sk["intentional"] == {"backup_restore": "stateless static server"}
-    assert sk["unintentional"] == ["functional", "integration", "recipe_local"]
+    assert sk["unintentional"] == ["functional"]
 
 
 def test_skips_none_declared_all_unintentional():
     rungs = _rungs(backup_restore="na")
     sk = R.skips(rungs, None)
     assert sk["intentional"] == {}
-    assert sk["unintentional"] == ["backup_restore", "integration", "recipe_local"]
+    assert sk["unintentional"] == ["backup_restore"]
 
 
 def test_skips_declaration_only_counts_when_actually_skipped():
@@ -323,17 +263,10 @@ def test_build_results_threads_expected_na(tmp_path):
         records=recs,
         results=_results(backup="skip", restore="skip"),  # custom=pass (default) → functional pass
         backup_capable=False,  # no backupbot label → backup_restore skipped (N/A)
-        declared=[],
-        deps_ready=True,
-        sso_unverified=False,
         clean_teardown=True,
         no_secret_leak=True,
         finished_ts=0.0,
-        expected_na={
-            "backup_restore": "stateless static file server",
-            "integration": "no SSO surface",
-            "recipe_local": "no upstream tests/",
-        },
+        expected_na={"backup_restore": "stateless static file server"},
     )
     # backup_restore skip still caps at L2 (never inflates) — even though functional passes above it,
     # the skip caps the climb — but it's the declared (intentional) rung that capped.
@@ -342,7 +275,7 @@ def test_build_results_threads_expected_na(tmp_path):
     assert data["level_cap_rung"] == "backup_restore"
     assert data["rungs"]["functional"] == "pass"
     assert data["skips"]["intentional"]["backup_restore"] == "stateless static file server"
-    assert data["skips"]["unintentional"] == []  # every skip accounted for → fully clean
+    assert data["skips"]["unintentional"] == []  # backup_restore declared; functional passed → clean
 
 
 def test_write_results_roundtrip(tmp_path):
-- 
2.49.0