Revert "feat(lvl5): P1 — 5-rung ladder (L5=abra recipe lint) + de-capped level semantics"
All checks were successful
continuous-integration/drone/push Build is passing

This reverts commit e219a7891d.
This commit is contained in:
autonomic-bot
2026-06-11 07:46:57 +00:00
parent 589943f46e
commit cd62743055
12 changed files with 336 additions and 1065 deletions

View File

@ -1,11 +1,8 @@
"""Unit tests for the pure card/badge renderers (harness.card) — phase lvl5 semantics.
"""Unit tests for the pure card/badge renderers (harness.card), Phase 3 U2 (R3/R6).
Covers the deterministic HTML + SVG string builders (the PNG step needs Playwright + is exercised
live). The cardinal check: the card REPORTS the data verbatim — level/marks come straight from the
dict, never recomputed — the badge is NUMBER + COLOUR ONLY, and the per-rung table rows (incl.
intentional-skip / unverified) are the sole carrier of "why isn't the level higher". Old schema-1
artifacts (4-rung ladder, cap fields present) must render without error and without relabeling.
Run cold: cc-ci-run -m pytest tests/unit/test_card.py -q
Covers the deterministic HTML + SVG string builders (the PNG step needs Playwright + is exercised in
the U2 live demo). The cardinal check: the card REPORTS the data verbatim — level/marks come straight
from the dict, never recomputed. Run cold: cc-ci-run -m pytest tests/unit/test_card.py -q
"""
from __future__ import annotations
@ -17,19 +14,12 @@ sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner")
from harness import card as C # noqa: E402
def _data(level=5, **kw):
d = {
"schema": 2,
def _data(level=3, cap="L4 functional (recipe-specific tests) N/A"):
return {
"recipe": "uptime-kuma",
"version": "1.23.0",
"level": level,
"rungs": {
"install": "pass",
"upgrade": "pass",
"backup_restore": "pass",
"functional": "pass",
"lint": "pass",
},
"level_cap_reason": cap,
"flags": {"clean_teardown": True, "no_secret_leak": True},
"screenshot": "screenshot.png",
"stages": [
@ -46,54 +36,46 @@ def _data(level=5, **kw):
{"name": "test_broken", "status": "fail", "ms": 5},
],
},
{
"name": "lint",
"status": "pass",
"tests": [{"name": "abra recipe lint", "status": "pass", "ms": 0}],
},
],
}
d.update(kw)
return d
def test_level_color_ramp():
# 0 (red) … 5 (bright green — full 5-rung climb); unknown → grey.
assert C.level_color(0) != C.level_color(5)
assert C.level_color(5) == "#3fb950"
assert C.level_color(99) == "#8b949e"
assert C.level_color(0) != C.level_color(6)
assert C.level_color(6) == "#3fb950"
assert C.level_color(99) == "#8b949e" # unknown → grey
def test_badge_svg_is_number_and_color_only():
def test_badge_svg_wellformed():
svg = C.level_badge_svg(4)
assert svg.startswith("<svg") and svg.endswith("</svg>")
assert "level 4" in svg
assert C.level_color(4) in svg
# operator-specified (phase lvl5): NOTHING but the level on the badge no annotation
# segment of any kind, whatever the rung situation.
assert "expected" not in svg and "gap?" not in svg and "skip" not in svg
# plain cap (no intent) → two-box badge, no third segment
assert "expected" not in svg and "gap?" not in svg
def test_badge_svg_level5():
svg = C.level_badge_svg(5)
assert "level 5" in svg and "#3fb950" in svg
def test_badge_svg_differentiates_intentional_vs_unintentional_skip():
# an intentional (declared) skip capped the climb → muted "expected" third segment
exp = C.level_badge_svg(2, "L3 backup/restore N/A", "intentional")
assert "level 2" in exp and "expected" in exp and C.EXPECT_COLOR in exp
assert "gap?" not in exp
# an unintentional skip (not declared) → amber "gap?" third segment
gap = C.level_badge_svg(2, "L3 backup/restore N/A", "unintentional")
assert "level 2" in gap and "gap?" in gap and C.GAP_COLOR in gap
assert "expected" not in gap
def test_skip_rows_intentional_and_unverified():
def test_skip_rows_intentional_and_unintentional():
html_out = C._skip_rows(
{"intentional": {"backup_restore": "no persistent data"}, "unintentional": ["functional"]}
)
# intentional skip: labelled row (muted green) + the reason on its own line
assert "intentional skip" in html_out and C.SKIP_GREEN in html_out
assert "backup/restore" in html_out and "no persistent data" in html_out
# unverified rung: amber row + the blocks-the-level explanation
assert "unverified" in html_out and C.GAP_COLOR in html_out
assert "functional" in html_out and "cannot rise above" in html_out
def test_skip_rows_lint_label_known():
html_out = C._skip_rows({"intentional": {}, "unintentional": ["lint"]})
assert ">lint<" in html_out.replace("</b>", "<") # rung label renders, not a KeyError
# unintentional skip: amber row + prompt to declare/add coverage
assert "unintentional skip" in html_out and C.GAP_COLOR in html_out
assert "functional" in html_out and "EXPECTED_NA" in html_out
def test_skip_rows_empty_when_no_skips():
@ -101,68 +83,22 @@ def test_skip_rows_empty_when_no_skips():
def test_card_html_reports_level_verbatim():
html = C.render_card_html(_data(level=2))
html = C.render_card_html(_data(level=2, cap="L3 backup/restore (data integrity) N/A"))
assert "uptime-kuma" in html
assert "1.23.0" in html
# the level shown is exactly what was passed (no recompute)
assert ">2<" in html
assert "level 2 of 5" in html
assert "L3 backup/restore" in html
assert C.level_color(2) in html
def test_card_html_no_cap_language():
html = C.render_card_html(_data())
assert "capped" not in html and "cap_reason" not in html
assert "level 5 of 5" in html
def test_card_html_old_schema1_artifact_renders():
# history compatibility: a pre-lvl5 results.json (4-rung ladder, cap fields, "na" statuses)
# renders without KeyError and shows ITS OWN ladder height (no retroactive relabeling).
old = {
"schema": 1,
"recipe": "legacy",
"version": "0.9",
"level": 4,
"level_cap_reason": "",
"level_cap_rung": None,
"rungs": {
"install": "pass",
"upgrade": "pass",
"backup_restore": "pass",
"functional": "pass",
},
"skips": {"intentional": {}, "unintentional": []},
"flags": {"clean_teardown": True, "no_secret_leak": True},
"screenshot": None,
"stages": [],
}
html = C.render_card_html(old)
assert "legacy" in html
assert "level 4 of 4" in html # the old top, not 5
assert "capped" not in html
def test_card_html_shows_stage_and_test_marks_incl_lint():
def test_card_html_shows_stage_and_test_marks():
html = C.render_card_html(_data())
assert "install" in html and "custom" in html
assert "abra recipe lint" in html
assert "test_serving" in html and "test_broken" in html
assert C.STATUS_MARK["pass"] in html and C.STATUS_MARK["fail"] in html
def test_card_html_unver_stage_mark_renders():
d = _data()
d["stages"][2] = {
"name": "lint",
"status": "unver",
"tests": [{"name": "abra recipe lint", "status": "unver", "ms": 0, "message": "timed out"}],
}
html = C.render_card_html(d)
assert C.STATUS_MARK["unver"] in html
assert C.STATUS_COLOR["unver"] in html
def test_card_html_flags_rendered():
html = C.render_card_html(_data())
assert "clean teardown" in html and "no secret leak" in html

View File

@ -28,6 +28,7 @@ def _row(**kw):
"ref": "db9a9502",
"version": "db9a95024e9d",
"level": 4,
"level_cap_reason": "",
"has_screenshot": True,
"flags": {"clean_teardown": True, "no_secret_leak": True},
"finished": 0,
@ -39,7 +40,7 @@ def _row(**kw):
def test_level_color_ramp_and_fallback():
assert dashboard.level_color(0) == "#e5534b"
assert dashboard.level_color(5) == "#3fb950" # full 5-rung climb (phase lvl5)
assert dashboard.level_color(6) == "#3fb950"
assert dashboard.level_color(4) == "#a0b93f"
assert dashboard.level_color(99) == "#8b949e"
assert dashboard.level_color(None) == "#8b949e"
@ -60,12 +61,20 @@ def test_overview_grid_mirrors_results():
def test_overview_never_greener_than_data():
# A failed run at level 0 must show level 0 + the failure pill — never a green/high level.
out = dashboard.render_overview(
[_row(status="failure", level=0, has_screenshot=False, flags={})]
[
_row(
status="failure",
level=0,
has_screenshot=False,
flags={},
level_cap_reason="L1 install FAILED",
)
]
)
assert "level 0" in out
assert dashboard.level_color(0) in out # red
assert dashboard._COLORS["failure"] in out
assert "level 4" not in out and "level 5" not in out
assert "level 4" not in out and "level 5" not in out and "level 6" not in out
assert "no screenshot" in out # placeholder, no broken image
@ -95,6 +104,7 @@ def test_build_row_projects_results(monkeypatch):
lambda n: {
"version": "1.2.3",
"level": 2,
"level_cap_reason": "cap",
"screenshot": "screenshot.png",
"flags": {"clean_teardown": True},
},
@ -113,38 +123,6 @@ def test_build_row_projects_results(monkeypatch):
assert r["url"].endswith("/cc-ci/7")
def test_build_row_old_schema1_artifact_renders(monkeypatch):
# History compatibility (phase lvl5): pre-lvl5 results.json still carries cap fields and a
# 4-rung ladder — it must project + render without KeyError, level shown VERBATIM (no
# retroactive relabeling), and the old cap text simply isn't resurfaced anywhere.
monkeypatch.setattr(
dashboard,
"_results_for",
lambda n: {
"schema": 1,
"version": "0.9.1",
"level": 2,
"level_cap_reason": "L3 backup/restore (data integrity) N/A",
"level_cap_rung": "backup_restore",
"screenshot": "screenshot.png",
"flags": {"clean_teardown": True, "no_secret_leak": True},
},
)
b = {
"number": 11,
"status": "success",
"event": "custom",
"params": {"RECIPE": "legacy", "REF": "abc123"},
"finished": 5,
}
r = dashboard._build_row(b)
out = dashboard.render_overview([r])
assert "level 2" in out and dashboard.level_color(2) in out
assert "N/A" not in out and "capped" not in out # cap language gone from the surface
hist = dashboard.render_history("legacy", [r])
assert "L2" in hist
def test_build_row_degrades_without_results(monkeypatch):
# No results.json (e.g. an old run): grid still renders from Drone fields, level absent.
monkeypatch.setattr(dashboard, "_results_for", lambda n: {})

View File

@ -1,14 +1,8 @@
"""Unit tests for the level ladder (harness.level) — phase lvl5 semantics.
"""Unit tests for the Phase-3 level ladder (harness.level), plan-phase3-results-ux.md §4.1 / R1.
Pure function — no I/O. Proves the operator-decided rule (plan-phase-lvl5-lint-rung.md,
DECISIONS.md phase lvl5):
level = max i such that rung_i == "pass" and every rung j < i is "pass" or "skip"
— a real FAIL blocks, an UNVERIFIED rung blocks exactly like a fail, an INTENTIONAL skip is
climbed past. Includes the mission's four worked examples verbatim, and the old N/A cases
(single-published-version recipe, non-backup-capable recipe) now climbing past their former
caps. Run cold with: cc-ci-run -m pytest tests/unit/test_level.py -q
Pure function — no I/O. Proves the YunoHost gap-caps-the-level semantics, including the U0 gate
acceptance: a recipe that climbs through L4 reports 4, and one that fails at L2 is capped at 1
(the level just below the failed rung). Run cold with: cc-ci-run -m pytest tests/unit/test_level.py -q
"""
from __future__ import annotations
@ -25,115 +19,69 @@ def _rungs(
upgrade="pass",
backup_restore="pass",
functional="pass",
lint="pass",
):
return {
"install": install,
"upgrade": upgrade,
"backup_restore": backup_restore,
"functional": functional,
"lint": lint,
}
# ---- the ladder: five essential rungs, top is L5 (lint) ----
# ---- the ladder: four essential rungs, top is L4 (functional) ----
def test_full_clean_climb_is_L5():
assert L.compute_level(_rungs()) == 5
def test_full_clean_climb_to_L4():
# All four essential rungs pass → L4 (the top; integration/recipe-local are optional, not leveled).
lvl, reason = L.compute_level(_rungs())
assert lvl == 4
assert reason == ""
def test_ladder_is_five_rungs_lint_on_top():
assert L.RUNGS == ("install", "upgrade", "backup_restore", "functional", "lint")
assert "lint" in L.RUNG_LABEL[5]
def test_fails_at_L2_capped_at_L1():
# GATE: upgrade fails → capped at L1 even though higher rungs would pass.
lvl, reason = L.compute_level(_rungs(upgrade="fail", backup_restore="pass", functional="pass"))
assert lvl == 1
assert "L2" in reason and "FAILED" in reason
# ---- mission worked examples (operator Q&A 2026-06-11, verbatim) ----
def test_mission_example_fail_blocks():
# install ✔, upgrade ✘, backup ✔, functional ✔, lint ✔ → level 1 (fail blocks).
assert L.compute_level(_rungs(upgrade="fail")) == 1
def test_mission_example_intentional_skip_climbs():
# install ✔, upgrade ✔, backup skip (not capable), functional ✔, lint ✔ → level 5
# (previously capped at 2 — the confusing part the operator removed).
assert L.compute_level(_rungs(backup_restore="skip")) == 5
def test_mission_example_unverified_blocks():
# install ✔, upgrade ✔, backup UNVER (harness error), functional ✔, lint ✔ → level 2
# (we cannot claim what we didn't check).
assert L.compute_level(_rungs(backup_restore="unver")) == 2
def test_mission_example_unverified_top_rung_not_earned():
# all four ✔, lint unver (abra missing) → level 4.
assert L.compute_level(_rungs(lint="unver")) == 4
# ---- blocking semantics ----
# ---- L0 / install ----
def test_install_fail_is_L0():
assert L.compute_level(_rungs(install="fail")) == 0
lvl, reason = L.compute_level(_rungs(install="fail"))
assert lvl == 0
assert "L1" in reason and "FAILED" in reason
def test_install_unver_is_L0():
assert L.compute_level(_rungs(install="unver")) == 0
# ---- gap-caps semantics: a higher pass can't rescue a lower gap ----
def test_higher_pass_never_rescues_a_fail():
# everything above a failed rung is dead, however green.
assert L.compute_level(_rungs(upgrade="fail", backup_restore="pass", functional="pass")) == 1
def test_higher_pass_does_not_rescue_lower_na():
# backup/restore N/A (stateless app) caps at L2 even though functional would pass.
lvl, reason = L.compute_level(_rungs(backup_restore="na", functional="pass"))
assert lvl == 2
assert "L3" in reason and "N/A" in reason
def test_lint_fail_blocks_at_4():
assert L.compute_level(_rungs(lint="fail")) == 4
def test_upgrade_na_caps_at_L1():
# only one published version → no upgrade possible → N/A caps at L1 (upgrade is essential).
lvl, reason = L.compute_level(_rungs(upgrade="na"))
assert lvl == 1
assert "L2" in reason and "N/A" in reason
def test_unver_blocks_even_after_a_skip():
# skip at L2 is climbed past, but the unver at L3 still blocks → level 1.
assert L.compute_level(_rungs(upgrade="skip", backup_restore="unver")) == 1
def test_functional_na_caps_at_L3():
# no recipe-specific functional tests → functional N/A caps at L3.
lvl, reason = L.compute_level(_rungs(functional="na"))
assert lvl == 3
assert "L4" in reason and "N/A" in reason
# ---- intentional-skip climbing (the de-cap) ----
def test_single_version_recipe_climbs_past_upgrade_skip():
# old rule: upgrade N/A capped at L1. New rule: skip is climbed past → full climb 5.
assert L.compute_level(_rungs(upgrade="skip")) == 5
def test_stateless_recipe_climbs_past_backup_skip_to_lint():
assert L.compute_level(_rungs(upgrade="skip", backup_restore="skip")) == 5
def test_skip_does_not_count_as_pass():
# ALL skips → nothing passed → level 0 (a skip climbs, but never earns).
assert (
L.compute_level(
_rungs(
install="skip",
upgrade="skip",
backup_restore="skip",
functional="skip",
lint="skip",
)
)
== 0
)
def test_skip_then_pass_earns_the_higher_rung():
# skip at L4, pass at L5 → level 5 (the skip below doesn't stop the climb).
assert L.compute_level(_rungs(functional="skip")) == 5
def test_trailing_skip_keeps_last_pass():
# passes up to L3, skips above → level stays 3 (skips never raise).
assert L.compute_level(_rungs(functional="skip", lint="skip")) == 3
def test_functional_fail_caps_at_L3():
lvl, reason = L.compute_level(_rungs(functional="fail"))
assert lvl == 3
assert "L4" in reason and "FAILED" in reason
# ---- input validation ----
@ -141,7 +89,7 @@ def test_trailing_skip_keeps_last_pass():
def test_invalid_status_raises():
bad = _rungs()
bad["functional"] = "na" # the OLD vocabulary is no longer valid — every N/A is classified
bad["functional"] = "passed" # not in the vocabulary
try:
L.compute_level(bad)
except ValueError:
@ -149,16 +97,6 @@ def test_invalid_status_raises():
raise AssertionError("expected ValueError on invalid rung status")
def test_missing_rung_raises():
bad = _rungs()
del bad["lint"]
try:
L.compute_level(bad)
except ValueError:
return
raise AssertionError("expected ValueError on missing rung")
# ---- helpers: backup_restore_status ----
@ -166,8 +104,8 @@ def test_backup_restore_status_pass():
assert L.backup_restore_status("pass", "pass", True) == "pass"
def test_backup_restore_status_not_capable_is_intentional_skip():
assert L.backup_restore_status("skip", "skip", False) == "skip"
def test_backup_restore_status_not_capable_is_na():
assert L.backup_restore_status("skip", "skip", False) == "na"
def test_backup_restore_status_fail_on_either():
@ -175,20 +113,16 @@ def test_backup_restore_status_fail_on_either():
assert L.backup_restore_status("fail", "pass", True) == "fail"
def test_backup_restore_partial_is_unverified():
# backup-capable but restore didn't run cleanly (not pass, not fail) → cannot claim L3,
# and the non-run is NOT intentional → unver (blocks the level above it).
assert L.backup_restore_status("pass", "skip", True) == "unver"
assert L.backup_restore_status(None, None, True) == "unver"
def test_backup_restore_partial_is_na():
# backup-capable but restore didn't run cleanly (not pass, not fail) → cannot claim L3
assert L.backup_restore_status("pass", "skip", True) == "na"
# ---- helpers: tier_to_rung ----
def test_tier_to_rung_mapping_defaults_unverified():
def test_tier_to_rung_mapping():
assert L.tier_to_rung("pass") == "pass"
assert L.tier_to_rung("fail") == "fail"
# no intentionality information here — a non-run is unver; derive_rungs upgrades to "skip"
# only on a declared/structural fact, never the other way.
assert L.tier_to_rung("skip") == "unver"
assert L.tier_to_rung(None) == "unver"
assert L.tier_to_rung("skip") == "na"
assert L.tier_to_rung(None) == "na"

View File

@ -1,188 +0,0 @@
"""Unit tests for the L5 lint executor (harness.lint) — phase lvl5.
Covers the table parser + classifier against real abra-0.13 output shapes (probed on the CI
host 2026-06-11, JOURNAL-lvl5), and run_lint's never-raise / never-silent-pass guarantees via
a fake-PATH `script` shim (no real abra needed). Run cold:
cc-ci-run -m pytest tests/unit/test_lint.py -q
"""
from __future__ import annotations
import os
import stat
import subprocess
import sys
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner"))
from harness import lint as L # noqa: E402
# Realistic abra lint table rows (unicode box drawing, ✅/❌ marks), as captured on cc-ci.
TABLE_OK = (
"┏━━━━━━┳━━━━━━┓\r\n"
"│ R001 │ compose config has expected version │ warn │ ✅ │ - │ ensure │\r\n"
"│ R015 │ long secret names │ warn │ ❌ │ - │ reduce │\r\n"
"│ R008 │ .env.sample provided │ error │ ✅ │ - │ create │\r\n"
"│ R014 │ only annotated tags used for recipe version │ error │ ✅ │ - │ retag │\r\n"
"┗━━━━━━┻━━━━━━┛\r\n"
"WARN secret session_secret is longer than 12 characters\r\n"
)
TABLE_R014_FAIL = (
TABLE_OK.replace(
"│ R014 │ only annotated tags used for recipe version │ error │ ✅",
"│ R014 │ only annotated tags used for recipe version │ error │ ❌",
)
+ "WARN critical errors present in hedgedoc config\r\n"
)
TABLE_SKIPPED_ERROR = TABLE_OK.replace(
"│ R014 │ only annotated tags used for recipe version │ error │ ✅ │ - │",
"│ R014 │ only annotated tags used for recipe version │ error │ ❌ │ skipped │",
)
# ---- parse_table ----
def test_parse_table_rows_and_marks():
rows = L.parse_table(TABLE_OK)
by = {r["rule"]: r for r in rows}
assert set(by) == {"R001", "R015", "R008", "R014"}
assert by["R001"]["severity"] == "warn" and by["R001"]["satisfied"]
assert by["R015"]["severity"] == "warn" and not by["R015"]["satisfied"]
assert by["R014"]["severity"] == "error" and by["R014"]["satisfied"]
assert not any(r["skipped"] for r in rows)
def test_parse_table_strips_ansi():
rows = L.parse_table("\x1b[1m" + TABLE_OK + "\x1b[0m")
assert len(rows) == 4
def test_parse_table_garbage_is_empty():
assert L.parse_table("FATA something exploded\r\n") == []
assert L.parse_table("") == []
# ---- classify ----
def test_classify_pass_with_warn_misses_only():
# warn-severity ❌ (R015) does NOT fail the rung — only error-severity rules do.
assert L.classify(0, TABLE_OK) == ("pass", "", [])
def test_classify_error_rule_fails():
status, detail, failed = L.classify(0, TABLE_R014_FAIL)
assert status == "fail"
assert failed == ["R014"]
assert "R014" in detail
def test_classify_skipped_error_rule_does_not_fail_but_sentinel_guards():
# a skipped error rule isn't counted as failed by the parser, but abra's own sentinel line
# (if present) still forces fail — the classifier never out-greens abra.
status, _, failed = L.classify(0, TABLE_SKIPPED_ERROR)
assert failed == []
assert status == "pass"
status2, detail2, _ = L.classify(
0, TABLE_SKIPPED_ERROR + "WARN critical errors present in x config\r\n"
)
assert status2 == "fail"
assert "critical errors" in detail2
def test_classify_rc0_without_table_is_unver():
# rc=0 but nothing parseable → cannot claim pass.
assert L.classify(0, "weird output")[0] == "unver"
def test_classify_content_fata_is_fail():
out = "FATA unable to validate recipe: .env.sample for x couldn't be read\r\n"
status, detail, _ = L.classify(1, out)
assert status == "fail"
assert "unable to validate recipe" in detail
def test_classify_environment_fata_is_unver():
out = "FATA unable to fetch tags in /x: repository not found: Not found.\r\n"
status, detail, _ = L.classify(1, out)
assert status == "unver"
assert "fetch tags" in detail
def test_classify_did_not_run_is_unver():
assert L.classify(None, "")[0] == "unver"
# ---- run_lint: never raises, never silently passes ----
def _mkrecipe(tmp_path):
repo = tmp_path / "abra" / "recipes" / "fakerec"
repo.mkdir(parents=True)
(repo / "compose.yml").write_text("version: '3.8'\n")
for cmd in (
["git", "init", "-q"],
["git", "add", "."],
["git", "-c", "user.email=t@t", "-c", "user.name=t", "commit", "-qm", "x"],
):
subprocess.run(cmd, cwd=repo, check=True)
return repo
def _shim(tmp_path, body):
"""Drop a fake `script` executable on PATH (run_lint invokes `script -qec "abra ..."`)."""
bindir = tmp_path / "bin"
bindir.mkdir(exist_ok=True)
sh = bindir / "script"
sh.write_text("#!/bin/sh\n" + body)
sh.chmod(sh.stat().st_mode | stat.S_IEXEC)
return str(bindir)
def test_run_lint_pass_via_shim(tmp_path, monkeypatch):
_mkrecipe(tmp_path)
monkeypatch.setenv("ABRA_DIR", str(tmp_path / "abra"))
out = TABLE_OK.replace("\r\n", "\\n")
monkeypatch.setenv(
"PATH", _shim(tmp_path, f'printf "{out}"\nexit 0\n') + os.pathsep + os.environ["PATH"]
)
res = L.run_lint("fakerec", None, str(tmp_path / "artifacts"))
assert res["status"] == "pass"
txt = (tmp_path / "artifacts" / "lint.txt").read_text()
assert "abra recipe lint -n fakerec" in txt and "R001" in txt
def test_run_lint_fail_via_shim(tmp_path, monkeypatch):
_mkrecipe(tmp_path)
monkeypatch.setenv("ABRA_DIR", str(tmp_path / "abra"))
out = TABLE_R014_FAIL.replace("\r\n", "\\n")
monkeypatch.setenv(
"PATH", _shim(tmp_path, f'printf "{out}"\nexit 0\n') + os.pathsep + os.environ["PATH"]
)
res = L.run_lint("fakerec", None, str(tmp_path / "artifacts"))
assert res["status"] == "fail"
assert res["rules_failed"] == ["R014"]
def test_run_lint_missing_recipe_is_unver_not_raise(tmp_path, monkeypatch):
monkeypatch.setenv("ABRA_DIR", str(tmp_path / "abra-none"))
res = L.run_lint("no-such-recipe", None, str(tmp_path / "artifacts"))
assert res["status"] == "unver"
assert res["detail"]
# lint.txt still written with the failure context (loud, never silent)
assert (tmp_path / "artifacts" / "lint.txt").exists()
def test_run_lint_abra_blowup_is_unver(tmp_path, monkeypatch):
_mkrecipe(tmp_path)
monkeypatch.setenv("ABRA_DIR", str(tmp_path / "abra"))
monkeypatch.setenv(
"PATH",
_shim(tmp_path, 'echo "FATA inappropriate ioctl for device"\nexit 1\n')
+ os.pathsep
+ os.environ["PATH"],
)
res = L.run_lint("fakerec", None, None)
assert res["status"] == "unver"

View File

@ -1,8 +1,7 @@
"""Unit tests for results assembly (harness.results) — phase lvl5 semantics.
"""Unit tests for Phase-3 results assembly (harness.results), plan-phase3-results-ux.md §4.2 / R1/R3.
Covers JUnit parsing, stage roll-up, the tier→rung derivation (the SINGLE place every N/A source
is classified intentional-skip vs unverified — the table in DECISIONS.md phase lvl5), the L5 lint
rung wiring, and full results.json assembly. Pure / tmp-file only. Run cold:
Covers JUnit parsing, stage roll-up, the tier→rung derivation (the documented mapping the level
depends on), and full results.json assembly incl. the U0 gate cases. Pure / tmp-file only. Run cold:
cc-ci-run -m pytest tests/unit/test_results.py -q
"""
@ -28,8 +27,6 @@ JUNIT_MIXED = """<?xml version="1.0"?>
<testcase classname="tests.y" name="test_skipped" time="0"><skipped message="no deps"/></testcase>
</testsuite></testsuites>"""
LINT_PASS = {"status": "pass", "detail": "", "rules_failed": []}
def _write(tmp_path, name, content):
p = tmp_path / name
@ -93,7 +90,7 @@ def test_collect_stages_synthesizes_when_no_junit():
assert len(stages[0]["tests"]) == 1
# ---- derive_rungs: the documented N/A-classification mapping (DECISIONS.md phase lvl5) ----
# ---- derive_rungs: the documented mapping ----
def _results(**kw):
@ -108,113 +105,34 @@ def _results(**kw):
return base
def test_derive_rungs_full_climb_five_rungs():
rungs = R.derive_rungs(
_results(), backup_capable=True, has_upgrade_target=True, lint_status="pass"
)
# the five essential rungs — integration/recipe-local are optional, not produced here.
def test_derive_rungs_full_climb_four_essential():
rungs = R.derive_rungs(_results(), backup_capable=True, has_custom=True)
# only the four essential rungs — integration/recipe-local are optional, not produced here.
assert rungs == {
"install": "pass",
"upgrade": "pass",
"backup_restore": "pass",
"functional": "pass",
"lint": "pass",
}
def test_derive_rungs_structural_skips_are_intentional():
# single published version (tier skipped, no upgrade target) + not backup-capable →
# both rungs are INTENTIONAL skips, not unverified.
def test_derive_rungs_stateless_backup_and_functional_na():
rungs = R.derive_rungs(
_results(upgrade="skip", backup="skip", restore="skip"),
_results(backup="skip", restore="skip", custom="skip"),
backup_capable=False,
has_upgrade_target=False,
lint_status="pass",
has_custom=False,
)
assert rungs["upgrade"] == "skip"
assert rungs["backup_restore"] == "skip"
assert rungs["backup_restore"] == "na"
assert rungs["functional"] == "na"
assert "integration" not in rungs and "recipe_local" not in rungs
def test_derive_rungs_upgrade_skip_with_target_is_unverified():
# the tier skipped although an upgrade target exists (e.g. install failed → downstream
# skipped): NOT structural → unver.
rungs = R.derive_rungs(
_results(install="fail", upgrade="skip", backup="skip", restore="skip", custom="skip"),
backup_capable=True,
has_upgrade_target=True,
lint_status="pass",
)
assert rungs["install"] == "fail"
assert rungs["upgrade"] == "unver"
assert rungs["backup_restore"] == "unver"
assert rungs["functional"] == "unver"
def test_derive_rungs_missing_tier_is_unverified():
# a tier excluded from the run entirely (dev CCCI_STAGES escape) → no result key → unver,
# never an intentional skip (the recipe didn't declare anything).
res = {"install": "pass"}
rungs = R.derive_rungs(res, backup_capable=True, has_upgrade_target=True, lint_status="pass")
assert rungs["upgrade"] == "unver"
assert rungs["backup_restore"] == "unver"
assert rungs["functional"] == "unver"
def test_derive_rungs_expected_na_declares_intentional():
# EXPECTED_NA turns a non-run rung into an intentional skip (declared source).
rungs = R.derive_rungs(
_results(custom="skip"),
backup_capable=True,
has_upgrade_target=True,
expected_na={"functional": "no functional surface"},
lint_status="pass",
)
assert rungs["functional"] == "skip"
def test_derive_rungs_no_custom_tests_defaults_unverified():
# absent functional coverage with NO declaration is a gap → unver (conservative default).
rungs = R.derive_rungs(
_results(custom="skip"), backup_capable=True, has_upgrade_target=True, lint_status="pass"
)
assert rungs["functional"] == "unver"
def test_derive_rungs_expected_na_never_overrides_a_real_result():
# a declaration cannot soften an exercised rung: fail stays fail.
rungs = R.derive_rungs(
_results(custom="fail"),
backup_capable=True,
has_upgrade_target=True,
expected_na={"functional": "declared"},
lint_status="pass",
)
assert rungs["functional"] == "fail"
def test_derive_rungs_lint_never_skips():
# lint has NO intentional-skip escape hatch: pass/fail from the executor, anything else
# (None, "unver", junk) → unver — even if a recipe tries to declare it away.
for status, want in (("pass", "pass"), ("fail", "fail"), ("unver", "unver"), (None, "unver")):
rungs = R.derive_rungs(
_results(),
backup_capable=True,
has_upgrade_target=True,
expected_na={"lint": "nope"},
lint_status=status,
)
assert rungs["lint"] == want, status
def test_derive_rungs_functional_fail():
rungs = R.derive_rungs(
_results(custom="fail"), backup_capable=True, has_upgrade_target=True, lint_status="pass"
)
rungs = R.derive_rungs(_results(custom="fail"), backup_capable=True, has_custom=True)
assert rungs["functional"] == "fail"
# ---- build_results: end-to-end incl level + lint + flags ----
# ---- build_results: end-to-end incl level + flags ----
def test_build_results_level_and_flags(tmp_path):
@ -245,75 +163,17 @@ def test_build_results_level_and_flags(tmp_path):
clean_teardown=True,
no_secret_leak=True,
finished_ts=1234.0,
lint=LINT_PASS,
)
# all five essential rungs pass → full climb to L5; no cap concept anywhere.
assert data["schema"] == 2
assert data["level"] == 5
assert "level_cap_reason" not in data and "level_cap_rung" not in data
# all four essential rungs pass → full climb to L4 (the top), no cap
assert data["level"] == 4
assert data["level_cap_reason"] == ""
assert data["recipe"] == "hedgedoc"
assert data["ref"] == "deadbeefcafe"
assert data["flags"] == {"clean_teardown": True, "no_secret_leak": True}
# lint appears as a synthetic stage so the card's table carries all five rungs.
assert [s["name"] for s in data["stages"]] == ["install", "custom", "lint"]
assert data["lint"] == {"status": "pass", "detail": "", "rules_failed": []}
assert [s["name"] for s in data["stages"]] == ["install", "custom"]
def test_build_results_lint_fail_blocks_at_4(tmp_path):
recs = [
{
"tier": "install",
"source": "generic",
"file": "g/test_install.py",
"rc": 0,
"junit": _write(tmp_path, "i.xml", JUNIT_PASS),
}
]
data = R.build_results(
recipe="x",
version=None,
pr="0",
ref=None,
records=recs,
results=_results(),
backup_capable=True,
clean_teardown=True,
no_secret_leak=True,
finished_ts=0.0,
lint={
"status": "fail",
"detail": "error rule(s) unsatisfied: R014",
"rules_failed": ["R014"],
},
)
assert data["level"] == 4
assert data["rungs"]["lint"] == "fail"
assert data["lint"]["rules_failed"] == ["R014"]
lint_stage = [s for s in data["stages"] if s["name"] == "lint"][0]
assert lint_stage["status"] == "fail"
assert "R014" in lint_stage["tests"][0]["message"]
def test_build_results_no_lint_given_is_unverified_never_pass(tmp_path):
# an old/lint-less caller must NEVER get a free L5: the rung derives as unver → level 4 max.
data = R.build_results(
recipe="x",
version=None,
pr="0",
ref=None,
records=[],
results=_results(),
backup_capable=True,
clean_teardown=True,
no_secret_leak=True,
finished_ts=0.0,
)
assert data["rungs"]["lint"] == "unver"
assert data["level"] == 4
assert "lint" in data["skips"]["unintentional"]
def test_build_results_level1_on_upgrade_fail(tmp_path):
def test_build_results_capped_at_L1_on_upgrade_fail(tmp_path):
recs = [
{
"tier": "install",
@ -334,13 +194,12 @@ def test_build_results_level1_on_upgrade_fail(tmp_path):
clean_teardown=True,
no_secret_leak=True,
finished_ts=0.0,
lint=LINT_PASS,
)
assert data["level"] == 1
assert data["rungs"]["upgrade"] == "fail"
assert "L2" in data["level_cap_reason"]
# ---- skips: intentional (declared/structural, with reason) vs unintentional (= unver) ----
# ---- skips: intentional (declared) vs unintentional (everything else skipped) ----
def _rungs(**kw):
@ -349,26 +208,24 @@ def _rungs(**kw):
"upgrade": "pass",
"backup_restore": "pass",
"functional": "pass",
"lint": "pass",
}
base.update(kw)
return base
def test_skips_declared_reason_and_unverified_split():
rungs = _rungs(backup_restore="skip", functional="unver")
def test_skips_intentional_vs_unintentional():
rungs = _rungs(backup_restore="na", functional="na")
sk = R.skips(rungs, {"backup_restore": "stateless static server"})
# backup_restore is declared (intentional, with reason); functional skipped but not declared.
assert sk["intentional"] == {"backup_restore": "stateless static server"}
assert sk["unintentional"] == ["functional"]
def test_skips_structural_reason_when_undeclared():
# a structural skip (derive_rungs) carries its structural reason even without EXPECTED_NA.
rungs = _rungs(upgrade="skip", backup_restore="skip")
def test_skips_none_declared_all_unintentional():
rungs = _rungs(backup_restore="na")
sk = R.skips(rungs, None)
assert "only one published version" in sk["intentional"]["upgrade"]
assert "not backup-capable" in sk["intentional"]["backup_restore"]
assert sk["unintentional"] == []
assert sk["intentional"] == {}
assert sk["unintentional"] == ["backup_restore"]
def test_skips_declaration_only_counts_when_actually_skipped():
@ -379,9 +236,9 @@ def test_skips_declaration_only_counts_when_actually_skipped():
assert "backup_restore" not in sk["unintentional"]
def test_build_results_stateless_recipe_climbs(tmp_path):
# custom-html-tiny shape: no backup surface (declared), single published version, passing
# functional — formerly capped at L2 by the N/A; now climbs to L5 (the de-cap, mission §2).
def test_build_results_threads_expected_na(tmp_path):
# Mirrors custom-html-tiny post-change: install + a passing functional (custom) test, but no
# backup surface (backup_restore declared intentionally skipped).
recs = [
{
"tier": "install",
@ -404,47 +261,23 @@ def test_build_results_stateless_recipe_climbs(tmp_path):
pr="0",
ref=None,
records=recs,
results=_results(upgrade="skip", backup="skip", restore="skip"),
backup_capable=False, # no backupbot label → structural intentional skip
has_upgrade_target=False, # single published version → structural intentional skip
results=_results(backup="skip", restore="skip"), # custom=pass (default) → functional pass
backup_capable=False, # no backupbot label → backup_restore skipped (N/A)
clean_teardown=True,
no_secret_leak=True,
finished_ts=0.0,
lint=LINT_PASS,
expected_na={"backup_restore": "stateless static file server"},
)
assert data["level"] == 5 # skips are climbed past; nothing was inflated to get here
assert data["rungs"] == {
"install": "pass",
"upgrade": "skip",
"backup_restore": "skip",
"functional": "pass",
"lint": "pass",
}
assert data["skips"]["intentional"]["backup_restore"] == "stateless static file server"
assert "only one published version" in data["skips"]["intentional"]["upgrade"]
assert data["skips"]["unintentional"] == []
def test_build_results_unverified_backup_blocks(tmp_path):
# synthesized tier abort: backup-capable but the tiers never produced a result → unver → the
# level stays below the unverified rung (mission worked example #3).
data = R.build_results(
recipe="x",
version=None,
pr="0",
ref=None,
records=[],
results=_results(backup="skip", restore="skip"),
backup_capable=True,
clean_teardown=True,
no_secret_leak=True,
finished_ts=0.0,
lint=LINT_PASS,
)
assert data["rungs"]["backup_restore"] == "unver"
# backup_restore skip still caps at L2 (never inflates) — even though functional passes above it,
# the skip caps the climb — but it's the declared (intentional) rung that capped.
assert data["level"] == 2
assert data["skips"]["unintentional"] == ["backup_restore"]
assert "L3" in data["level_cap_reason"]
assert data["level_cap_rung"] == "backup_restore"
assert data["rungs"]["functional"] == "pass"
assert data["skips"]["intentional"]["backup_restore"] == "stateless static file server"
assert (
data["skips"]["unintentional"] == []
) # backup_restore declared; functional passed → clean
def test_build_results_threads_customization(tmp_path):
@ -477,7 +310,6 @@ def test_build_results_threads_customization(tmp_path):
"clean_teardown": True,
"no_secret_leak": True,
"finished_ts": 0.0,
"lint": LINT_PASS,
}
assert R.build_results(**kwargs, customization=cust)["customization"] == cust
assert R.build_results(**kwargs)["customization"] is None