feat(harness): intentional skips + custom-html-tiny functional test; 4-rung ladder (#6)
Some checks failed
continuous-integration/drone/push Build is failing

Declare intentional skips + custom-html-tiny functional test; 4-rung level ladder

- recipe_meta.EXPECTED_NA = {rung: reason} lists intentionally-skipped rungs; any
  essential rung skipped and not listed is unintentional. Skips still cap the level
  (never inflate). results.json: skips:{intentional,unintentional} + level_cap_rung.
- Level ladder = the four essential rungs (install, upgrade, backup/restore,
  functional; top = L4). integration & recipe-local are optional, not leveled
  (SSO still enforced for the run verdict, unchanged).
- Card shows skipped rungs as INTENTIONAL SKIP (green, reason below) / UNINTENTIONAL
  SKIP (amber); level badge gains an expected/gap? third segment.
- custom-html-tiny: functional serve test (exact-byte round-trip + 404); declares
  backup_restore intentionally skipped (stateless static server).

Independently verified by the adversary: 138 unit tests pass cold; live full-stage
run on custom-html-tiny green (upgrade tier ran; level 2; correct skips/badge);
clean teardown.
This commit is contained in:
2026-06-09 03:12:11 +00:00
parent f5a6f7196f
commit c51cd84159
10 changed files with 392 additions and 187 deletions

View File

@ -14,7 +14,7 @@ sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner")
from harness import card as C # noqa: E402
def _data(level=4, cap="L5 integration (SSO/OIDC + cross-app) N/A"):
def _data(level=3, cap="L4 functional (recipe-specific tests) N/A"):
return {
"recipe": "uptime-kuma",
"version": "1.23.0",
@ -51,6 +51,35 @@ def test_badge_svg_wellformed():
assert svg.startswith("<svg") and svg.endswith("</svg>")
assert "level 4" in svg
assert C.level_color(4) in svg
# plain cap (no intent) → two-box badge, no third segment
assert "expected" not in svg and "gap?" not in svg
def test_badge_svg_differentiates_intentional_vs_unintentional_skip():
# an intentional (declared) skip capped the climb → muted "expected" third segment
exp = C.level_badge_svg(2, "L3 backup/restore N/A", "intentional")
assert "level 2" in exp and "expected" in exp and C.EXPECT_COLOR in exp
assert "gap?" not in exp
# an unintentional skip (not declared) → amber "gap?" third segment
gap = C.level_badge_svg(2, "L3 backup/restore N/A", "unintentional")
assert "level 2" in gap and "gap?" in gap and C.GAP_COLOR in gap
assert "expected" not in gap
def test_skip_rows_intentional_and_unintentional():
html_out = C._skip_rows(
{"intentional": {"backup_restore": "no persistent data"}, "unintentional": ["functional"]}
)
# intentional skip: labelled row (muted green) + the reason on its own line
assert "intentional skip" in html_out and C.SKIP_GREEN in html_out
assert "backup/restore" in html_out and "no persistent data" in html_out
# unintentional skip: amber row + prompt to declare/add coverage
assert "unintentional skip" in html_out and C.GAP_COLOR in html_out
assert "functional" in html_out and "EXPECTED_NA" in html_out
def test_skip_rows_empty_when_no_skips():
assert C._skip_rows({"intentional": {}, "unintentional": []}) == ""
def test_card_html_reports_level_verbatim():

View File

@ -24,7 +24,7 @@ import dashboard # noqa: E402
def _row(**kw):
base = {
"recipe": "custom-html", "status": "success", "number": 4, "ref": "db9a9502",
"version": "db9a95024e9d", "level": 4, "level_cap_reason": "L5 integration N/A",
"version": "db9a95024e9d", "level": 4, "level_cap_reason": "",
"has_screenshot": True, "flags": {"clean_teardown": True, "no_secret_leak": True},
"finished": 0, "url": "https://drone.x/cc-ci/4",
}

View File

@ -19,33 +19,23 @@ def _rungs(
upgrade="pass",
backup_restore="pass",
functional="pass",
integration="pass",
recipe_local="pass",
):
return {
"install": install,
"upgrade": upgrade,
"backup_restore": backup_restore,
"functional": functional,
"integration": integration,
"recipe_local": recipe_local,
}
# ---- the U0 gate: L4-pass and L2-cap ----
# ---- the ladder: four essential rungs, top is L4 (functional) ----
def test_full_clean_climb_to_L6():
def test_full_clean_climb_to_L4():
# All four essential rungs pass → L4 (the top; integration/recipe-local are optional, not leveled).
lvl, reason = L.compute_level(_rungs())
assert lvl == 6
assert reason == ""
def test_climbs_through_L4_then_no_integration_surface_caps_at_L4():
# GATE: a recipe whose functional tests pass but has no SSO/integration surface caps at L4.
lvl, reason = L.compute_level(_rungs(integration="na", recipe_local="na"))
assert lvl == 4
assert "L5" in reason and "N/A" in reason
assert reason == ""
def test_fails_at_L2_capped_at_L1():
@ -69,34 +59,27 @@ def test_install_fail_is_L0():
def test_higher_pass_does_not_rescue_lower_na():
# backup/restore N/A (stateless app) caps at L2 even though functional would pass.
lvl, reason = L.compute_level(_rungs(backup_restore="na", functional="pass", integration="na"))
lvl, reason = L.compute_level(_rungs(backup_restore="na", functional="pass"))
assert lvl == 2
assert "L3" in reason and "N/A" in reason
def test_upgrade_na_caps_at_L1():
# only one published version → no upgrade possible → N/A caps at L1.
# only one published version → no upgrade possible → N/A caps at L1 (upgrade is essential).
lvl, reason = L.compute_level(_rungs(upgrade="na"))
assert lvl == 1
assert "L2" in reason and "N/A" in reason
def test_integration_fail_caps_at_L4():
# SSO declared but unverified (failed) → integration rung fails → cap at L4.
lvl, reason = L.compute_level(_rungs(integration="fail", recipe_local="na"))
assert lvl == 4
assert "L5" in reason and "FAILED" in reason
def test_recipe_local_na_caps_at_L5():
# SSO passes but no recipe-local tests → cap at L5 (L6 N/A).
lvl, reason = L.compute_level(_rungs(recipe_local="na"))
assert lvl == 5
assert "L6" in reason and "N/A" in reason
def test_functional_na_caps_at_L3():
# no recipe-specific functional tests → functional N/A caps at L3.
lvl, reason = L.compute_level(_rungs(functional="na"))
assert lvl == 3
assert "L4" in reason and "N/A" in reason
def test_functional_fail_caps_at_L3():
lvl, reason = L.compute_level(_rungs(functional="fail", integration="na"))
lvl, reason = L.compute_level(_rungs(functional="fail"))
assert lvl == 3
assert "L4" in reason and "FAILED" in reason

View File

@ -105,83 +105,31 @@ def _results(**kw):
return base
def test_derive_rungs_full_stateful_sso():
rungs = R.derive_rungs(
_results(),
backup_capable=True,
declared=["keycloak"],
deps_ready=True,
sso_unverified=False,
has_custom=True,
has_repo_local=False,
repo_local_passed=False,
)
def test_derive_rungs_full_climb_four_essential():
rungs = R.derive_rungs(_results(), backup_capable=True, has_custom=True)
# only the four essential rungs — integration/recipe-local are optional, not produced here.
assert rungs == {
"install": "pass",
"upgrade": "pass",
"backup_restore": "pass",
"functional": "pass",
"integration": "pass",
"recipe_local": "na",
}
def test_derive_rungs_no_sso_surface_is_integration_na():
rungs = R.derive_rungs(
_results(),
backup_capable=True,
declared=[],
deps_ready=True,
sso_unverified=False,
has_custom=True,
has_repo_local=False,
repo_local_passed=False,
)
assert rungs["integration"] == "na"
assert rungs["functional"] == "pass"
def test_derive_rungs_stateless_backup_na():
def test_derive_rungs_stateless_backup_and_functional_na():
rungs = R.derive_rungs(
_results(backup="skip", restore="skip", custom="skip"),
backup_capable=False,
declared=[],
deps_ready=True,
sso_unverified=False,
has_custom=False,
has_repo_local=False,
repo_local_passed=False,
)
assert rungs["backup_restore"] == "na"
assert rungs["functional"] == "na"
assert "integration" not in rungs and "recipe_local" not in rungs
def test_derive_rungs_sso_unverified_is_integration_fail():
rungs = R.derive_rungs(
_results(),
backup_capable=True,
declared=["keycloak"],
deps_ready=False,
sso_unverified=True,
has_custom=True,
has_repo_local=False,
repo_local_passed=False,
)
assert rungs["integration"] == "fail"
def test_derive_rungs_repo_local_pass():
rungs = R.derive_rungs(
_results(),
backup_capable=True,
declared=[],
deps_ready=True,
sso_unverified=False,
has_custom=True,
has_repo_local=True,
repo_local_passed=True,
)
assert rungs["recipe_local"] == "pass"
def test_derive_rungs_functional_fail():
rungs = R.derive_rungs(_results(custom="fail"), backup_capable=True, has_custom=True)
assert rungs["functional"] == "fail"
# ---- build_results: end-to-end incl level + flags ----
@ -212,16 +160,13 @@ def test_build_results_level_and_flags(tmp_path):
records=recs,
results=_results(),
backup_capable=True,
declared=[],
deps_ready=True,
sso_unverified=False,
clean_teardown=True,
no_secret_leak=True,
finished_ts=1234.0,
)
# stateful, functional pass, no SSO surface, no repo-local → caps at L4
# all four essential rungs pass → full climb to L4 (the top), no cap
assert data["level"] == 4
assert "L5" in data["level_cap_reason"]
assert data["level_cap_reason"] == ""
assert data["recipe"] == "hedgedoc"
assert data["ref"] == "deadbeefcafe"
assert data["flags"] == {"clean_teardown": True, "no_secret_leak": True}
@ -246,9 +191,6 @@ def test_build_results_capped_at_L1_on_upgrade_fail(tmp_path):
records=recs,
results=_results(upgrade="fail"),
backup_capable=True,
declared=[],
deps_ready=True,
sso_unverified=False,
clean_teardown=True,
no_secret_leak=True,
finished_ts=0.0,
@ -257,6 +199,85 @@ def test_build_results_capped_at_L1_on_upgrade_fail(tmp_path):
assert "L2" in data["level_cap_reason"]
# ---- skips: intentional (declared) vs unintentional (everything else skipped) ----
def _rungs(**kw):
base = {
"install": "pass",
"upgrade": "pass",
"backup_restore": "pass",
"functional": "pass",
}
base.update(kw)
return base
def test_skips_intentional_vs_unintentional():
rungs = _rungs(backup_restore="na", functional="na")
sk = R.skips(rungs, {"backup_restore": "stateless static server"})
# backup_restore is declared (intentional, with reason); functional skipped but not declared.
assert sk["intentional"] == {"backup_restore": "stateless static server"}
assert sk["unintentional"] == ["functional"]
def test_skips_none_declared_all_unintentional():
rungs = _rungs(backup_restore="na")
sk = R.skips(rungs, None)
assert sk["intentional"] == {}
assert sk["unintentional"] == ["backup_restore"]
def test_skips_declaration_only_counts_when_actually_skipped():
# backup_restore actually ran (pass) → not a skip, so a declaration for it is simply inert.
rungs = _rungs(backup_restore="pass")
sk = R.skips(rungs, {"backup_restore": "reason"})
assert "backup_restore" not in sk["intentional"]
assert "backup_restore" not in sk["unintentional"]
def test_build_results_threads_expected_na(tmp_path):
# Mirrors custom-html-tiny post-change: install + a passing functional (custom) test, but no
# backup surface (backup_restore declared intentionally skipped).
recs = [
{
"tier": "install",
"source": "generic",
"file": "g/test_install.py",
"rc": 0,
"junit": _write(tmp_path, "i.xml", JUNIT_PASS),
},
{
"tier": "custom",
"source": "cc-ci",
"file": "c/test_serves_content.py",
"rc": 0,
"junit": _write(tmp_path, "c.xml", JUNIT_PASS),
},
]
data = R.build_results(
recipe="custom-html-tiny",
version="1.1.0",
pr="0",
ref=None,
records=recs,
results=_results(backup="skip", restore="skip"), # custom=pass (default) → functional pass
backup_capable=False, # no backupbot label → backup_restore skipped (N/A)
clean_teardown=True,
no_secret_leak=True,
finished_ts=0.0,
expected_na={"backup_restore": "stateless static file server"},
)
# backup_restore skip still caps at L2 (never inflates) — even though functional passes above it,
# the skip caps the climb — but it's the declared (intentional) rung that capped.
assert data["level"] == 2
assert "L3" in data["level_cap_reason"]
assert data["level_cap_rung"] == "backup_restore"
assert data["rungs"]["functional"] == "pass"
assert data["skips"]["intentional"]["backup_restore"] == "stateless static file server"
assert data["skips"]["unintentional"] == [] # backup_restore declared; functional passed → clean
def test_write_results_roundtrip(tmp_path):
data = {"run_id": "42", "level": 3, "stages": []}
path = R.write_results(data, runs_dir_override=str(tmp_path))