feat(harness): intentional skips + custom-html-tiny functional test; 4-rung ladder (#6)

Declare intentional skips + custom-html-tiny functional test; 4-rung level ladder - recipe_meta.EXPECTED_NA = {rung: reason} lists intentionally-skipped rungs; any essential rung skipped and not listed is unintentional. Skips still cap the level (never inflate). results.json: skips:{intentional,unintentional} + level_cap_rung. - Level ladder = the four essential rungs (install, upgrade, backup/restore, functional; top = L4). integration & recipe-local are optional, not leveled (SSO still enforced for the run verdict, unchanged). - Card shows skipped rungs as INTENTIONAL SKIP (green, reason below) / UNINTENTIONAL SKIP (amber); level badge gains an expected/gap? third segment. - custom-html-tiny: functional serve test (exact-byte round-trip + 404); declares backup_restore intentionally skipped (stateless static server). Independently verified by the adversary: 138 unit tests pass cold; live full-stage run on custom-html-tiny green (upgrade tier ran; level 2; correct skips/badge); clean teardown.
2026-06-09 03:12:11 +00:00
parent f5a6f7196f
commit c51cd84159
10 changed files with 392 additions and 187 deletions
--- a/tests/unit/test_card.py
+++ b/tests/unit/test_card.py
@ -14,7 +14,7 @@ sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner")
 from harness import card as C  # noqa: E402


-def _data(level=4, cap="L5 integration (SSO/OIDC + cross-app) N/A"):
+def _data(level=3, cap="L4 functional (recipe-specific tests) N/A"):
    return {
        "recipe": "uptime-kuma",
        "version": "1.23.0",
@ -51,6 +51,35 @@ def test_badge_svg_wellformed():
    assert svg.startswith("<svg") and svg.endswith("</svg>")
    assert "level 4" in svg
    assert C.level_color(4) in svg
+    # plain cap (no intent) → two-box badge, no third segment
+    assert "expected" not in svg and "gap?" not in svg
+
+
+def test_badge_svg_differentiates_intentional_vs_unintentional_skip():
+    # an intentional (declared) skip capped the climb → muted "expected" third segment
+    exp = C.level_badge_svg(2, "L3 backup/restore N/A", "intentional")
+    assert "level 2" in exp and "expected" in exp and C.EXPECT_COLOR in exp
+    assert "gap?" not in exp
+    # an unintentional skip (not declared) → amber "gap?" third segment
+    gap = C.level_badge_svg(2, "L3 backup/restore N/A", "unintentional")
+    assert "level 2" in gap and "gap?" in gap and C.GAP_COLOR in gap
+    assert "expected" not in gap
+
+
+def test_skip_rows_intentional_and_unintentional():
+    html_out = C._skip_rows(
+        {"intentional": {"backup_restore": "no persistent data"}, "unintentional": ["functional"]}
+    )
+    # intentional skip: labelled row (muted green) + the reason on its own line
+    assert "intentional skip" in html_out and C.SKIP_GREEN in html_out
+    assert "backup/restore" in html_out and "no persistent data" in html_out
+    # unintentional skip: amber row + prompt to declare/add coverage
+    assert "unintentional skip" in html_out and C.GAP_COLOR in html_out
+    assert "functional" in html_out and "EXPECTED_NA" in html_out
+
+
+def test_skip_rows_empty_when_no_skips():
+    assert C._skip_rows({"intentional": {}, "unintentional": []}) == ""


 def test_card_html_reports_level_verbatim():
--- a/tests/unit/test_dashboard.py
+++ b/tests/unit/test_dashboard.py
@ -24,7 +24,7 @@ import dashboard  # noqa: E402
 def _row(**kw):
    base = {
        "recipe": "custom-html", "status": "success", "number": 4, "ref": "db9a9502",
-        "version": "db9a95024e9d", "level": 4, "level_cap_reason": "L5 integration N/A",
+        "version": "db9a95024e9d", "level": 4, "level_cap_reason": "",
        "has_screenshot": True, "flags": {"clean_teardown": True, "no_secret_leak": True},
        "finished": 0, "url": "https://drone.x/cc-ci/4",
    }
--- a/tests/unit/test_level.py
+++ b/tests/unit/test_level.py
@ -19,33 +19,23 @@ def _rungs(
    upgrade="pass",
    backup_restore="pass",
    functional="pass",
-    integration="pass",
-    recipe_local="pass",
 ):
    return {
        "install": install,
        "upgrade": upgrade,
        "backup_restore": backup_restore,
        "functional": functional,
-        "integration": integration,
-        "recipe_local": recipe_local,
    }


-# ---- the U0 gate: L4-pass and L2-cap ----
+# ---- the ladder: four essential rungs, top is L4 (functional) ----


-def test_full_clean_climb_to_L6():
+def test_full_clean_climb_to_L4():
+    # All four essential rungs pass → L4 (the top; integration/recipe-local are optional, not leveled).
    lvl, reason = L.compute_level(_rungs())
-    assert lvl == 6
-    assert reason == ""
-
-
-def test_climbs_through_L4_then_no_integration_surface_caps_at_L4():
-    # GATE: a recipe whose functional tests pass but has no SSO/integration surface caps at L4.
-    lvl, reason = L.compute_level(_rungs(integration="na", recipe_local="na"))
    assert lvl == 4
-    assert "L5" in reason and "N/A" in reason
+    assert reason == ""


 def test_fails_at_L2_capped_at_L1():
@ -69,34 +59,27 @@ def test_install_fail_is_L0():

 def test_higher_pass_does_not_rescue_lower_na():
    # backup/restore N/A (stateless app) caps at L2 even though functional would pass.
-    lvl, reason = L.compute_level(_rungs(backup_restore="na", functional="pass", integration="na"))
+    lvl, reason = L.compute_level(_rungs(backup_restore="na", functional="pass"))
    assert lvl == 2
    assert "L3" in reason and "N/A" in reason


 def test_upgrade_na_caps_at_L1():
-    # only one published version → no upgrade possible → N/A caps at L1.
+    # only one published version → no upgrade possible → N/A caps at L1 (upgrade is essential).
    lvl, reason = L.compute_level(_rungs(upgrade="na"))
    assert lvl == 1
    assert "L2" in reason and "N/A" in reason


-def test_integration_fail_caps_at_L4():
-    # SSO declared but unverified (failed) → integration rung fails → cap at L4.
-    lvl, reason = L.compute_level(_rungs(integration="fail", recipe_local="na"))
-    assert lvl == 4
-    assert "L5" in reason and "FAILED" in reason
-
-
-def test_recipe_local_na_caps_at_L5():
-    # SSO passes but no recipe-local tests → cap at L5 (L6 N/A).
-    lvl, reason = L.compute_level(_rungs(recipe_local="na"))
-    assert lvl == 5
-    assert "L6" in reason and "N/A" in reason
+def test_functional_na_caps_at_L3():
+    # no recipe-specific functional tests → functional N/A caps at L3.
+    lvl, reason = L.compute_level(_rungs(functional="na"))
+    assert lvl == 3
+    assert "L4" in reason and "N/A" in reason


 def test_functional_fail_caps_at_L3():
-    lvl, reason = L.compute_level(_rungs(functional="fail", integration="na"))
+    lvl, reason = L.compute_level(_rungs(functional="fail"))
    assert lvl == 3
    assert "L4" in reason and "FAILED" in reason

--- a/tests/unit/test_results.py
+++ b/tests/unit/test_results.py
@ -105,83 +105,31 @@ def _results(**kw):
    return base


-def test_derive_rungs_full_stateful_sso():
-    rungs = R.derive_rungs(
-        _results(),
-        backup_capable=True,
-        declared=["keycloak"],
-        deps_ready=True,
-        sso_unverified=False,
-        has_custom=True,
-        has_repo_local=False,
-        repo_local_passed=False,
-    )
+def test_derive_rungs_full_climb_four_essential():
+    rungs = R.derive_rungs(_results(), backup_capable=True, has_custom=True)
+    # only the four essential rungs — integration/recipe-local are optional, not produced here.
    assert rungs == {
        "install": "pass",
        "upgrade": "pass",
        "backup_restore": "pass",
        "functional": "pass",
-        "integration": "pass",
-        "recipe_local": "na",
    }


-def test_derive_rungs_no_sso_surface_is_integration_na():
-    rungs = R.derive_rungs(
-        _results(),
-        backup_capable=True,
-        declared=[],
-        deps_ready=True,
-        sso_unverified=False,
-        has_custom=True,
-        has_repo_local=False,
-        repo_local_passed=False,
-    )
-    assert rungs["integration"] == "na"
-    assert rungs["functional"] == "pass"
-
-
-def test_derive_rungs_stateless_backup_na():
+def test_derive_rungs_stateless_backup_and_functional_na():
    rungs = R.derive_rungs(
        _results(backup="skip", restore="skip", custom="skip"),
        backup_capable=False,
-        declared=[],
-        deps_ready=True,
-        sso_unverified=False,
        has_custom=False,
-        has_repo_local=False,
-        repo_local_passed=False,
    )
    assert rungs["backup_restore"] == "na"
    assert rungs["functional"] == "na"
+    assert "integration" not in rungs and "recipe_local" not in rungs


-def test_derive_rungs_sso_unverified_is_integration_fail():
-    rungs = R.derive_rungs(
-        _results(),
-        backup_capable=True,
-        declared=["keycloak"],
-        deps_ready=False,
-        sso_unverified=True,
-        has_custom=True,
-        has_repo_local=False,
-        repo_local_passed=False,
-    )
-    assert rungs["integration"] == "fail"
-
-
-def test_derive_rungs_repo_local_pass():
-    rungs = R.derive_rungs(
-        _results(),
-        backup_capable=True,
-        declared=[],
-        deps_ready=True,
-        sso_unverified=False,
-        has_custom=True,
-        has_repo_local=True,
-        repo_local_passed=True,
-    )
-    assert rungs["recipe_local"] == "pass"
+def test_derive_rungs_functional_fail():
+    rungs = R.derive_rungs(_results(custom="fail"), backup_capable=True, has_custom=True)
+    assert rungs["functional"] == "fail"


 # ---- build_results: end-to-end incl level + flags ----
@ -212,16 +160,13 @@ def test_build_results_level_and_flags(tmp_path):
        records=recs,
        results=_results(),
        backup_capable=True,
-        declared=[],
-        deps_ready=True,
-        sso_unverified=False,
        clean_teardown=True,
        no_secret_leak=True,
        finished_ts=1234.0,
    )
-    # stateful, functional pass, no SSO surface, no repo-local → caps at L4
+    # all four essential rungs pass → full climb to L4 (the top), no cap
    assert data["level"] == 4
-    assert "L5" in data["level_cap_reason"]
+    assert data["level_cap_reason"] == ""
    assert data["recipe"] == "hedgedoc"
    assert data["ref"] == "deadbeefcafe"
    assert data["flags"] == {"clean_teardown": True, "no_secret_leak": True}
@ -246,9 +191,6 @@ def test_build_results_capped_at_L1_on_upgrade_fail(tmp_path):
        records=recs,
        results=_results(upgrade="fail"),
        backup_capable=True,
-        declared=[],
-        deps_ready=True,
-        sso_unverified=False,
        clean_teardown=True,
        no_secret_leak=True,
        finished_ts=0.0,
@ -257,6 +199,85 @@ def test_build_results_capped_at_L1_on_upgrade_fail(tmp_path):
    assert "L2" in data["level_cap_reason"]


+# ---- skips: intentional (declared) vs unintentional (everything else skipped) ----
+
+
+def _rungs(**kw):
+    base = {
+        "install": "pass",
+        "upgrade": "pass",
+        "backup_restore": "pass",
+        "functional": "pass",
+    }
+    base.update(kw)
+    return base
+
+
+def test_skips_intentional_vs_unintentional():
+    rungs = _rungs(backup_restore="na", functional="na")
+    sk = R.skips(rungs, {"backup_restore": "stateless static server"})
+    # backup_restore is declared (intentional, with reason); functional skipped but not declared.
+    assert sk["intentional"] == {"backup_restore": "stateless static server"}
+    assert sk["unintentional"] == ["functional"]
+
+
+def test_skips_none_declared_all_unintentional():
+    rungs = _rungs(backup_restore="na")
+    sk = R.skips(rungs, None)
+    assert sk["intentional"] == {}
+    assert sk["unintentional"] == ["backup_restore"]
+
+
+def test_skips_declaration_only_counts_when_actually_skipped():
+    # backup_restore actually ran (pass) → not a skip, so a declaration for it is simply inert.
+    rungs = _rungs(backup_restore="pass")
+    sk = R.skips(rungs, {"backup_restore": "reason"})
+    assert "backup_restore" not in sk["intentional"]
+    assert "backup_restore" not in sk["unintentional"]
+
+
+def test_build_results_threads_expected_na(tmp_path):
+    # Mirrors custom-html-tiny post-change: install + a passing functional (custom) test, but no
+    # backup surface (backup_restore declared intentionally skipped).
+    recs = [
+        {
+            "tier": "install",
+            "source": "generic",
+            "file": "g/test_install.py",
+            "rc": 0,
+            "junit": _write(tmp_path, "i.xml", JUNIT_PASS),
+        },
+        {
+            "tier": "custom",
+            "source": "cc-ci",
+            "file": "c/test_serves_content.py",
+            "rc": 0,
+            "junit": _write(tmp_path, "c.xml", JUNIT_PASS),
+        },
+    ]
+    data = R.build_results(
+        recipe="custom-html-tiny",
+        version="1.1.0",
+        pr="0",
+        ref=None,
+        records=recs,
+        results=_results(backup="skip", restore="skip"),  # custom=pass (default) → functional pass
+        backup_capable=False,  # no backupbot label → backup_restore skipped (N/A)
+        clean_teardown=True,
+        no_secret_leak=True,
+        finished_ts=0.0,
+        expected_na={"backup_restore": "stateless static file server"},
+    )
+    # backup_restore skip still caps at L2 (never inflates) — even though functional passes above it,
+    # the skip caps the climb — but it's the declared (intentional) rung that capped.
+    assert data["level"] == 2
+    assert "L3" in data["level_cap_reason"]
+    assert data["level_cap_rung"] == "backup_restore"
+    assert data["rungs"]["functional"] == "pass"
+    assert data["skips"]["intentional"]["backup_restore"] == "stateless static file server"
+    assert data["skips"]["unintentional"] == []  # backup_restore declared; functional passed → clean
+
+
 def test_write_results_roundtrip(tmp_path):
    data = {"run_id": "42", "level": 3, "stages": []}
    path = R.write_results(data, runs_dir_override=str(tmp_path))