feat(3 U0.2+U0.3): per-test results + results.json with computed level

harness/results.py: JUnit-XML parsing (stdlib) → per-stage/per-test rows; derive_rungs (documented
tier+deps/SSO → rung mapping); build_results assembles results.json {recipe,version,pr,ref,run_id,
stages[],level,level_cap_reason,rungs,flags{clean_teardown,no_secret_leak},screenshot,summary_card};
write_results (atomic). run_recipe_ci.py: tiers emit --junitxml + append {tier,source,file,rc,junit}
records; main() assembles+writes results.json wrapped so a failure NEVER changes the verdict (R7),
incl. a narrow leak-scan of the serialised artifact. 17 new unit tests (test_results.py).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
autonomic-bot
2026-05-31 05:55:52 +00:00
parent df54693449
commit 52e5d210d8
5 changed files with 819 additions and 63 deletions

View File

@ -14,8 +14,14 @@ sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner")
from harness import level as L # noqa: E402
def _rungs(install="pass", upgrade="pass", backup_restore="pass", functional="pass",
integration="pass", recipe_local="pass"):
def _rungs(
install="pass",
upgrade="pass",
backup_restore="pass",
functional="pass",
integration="pass",
recipe_local="pass",
):
return {
"install": install,
"upgrade": upgrade,
@ -28,6 +34,7 @@ def _rungs(install="pass", upgrade="pass", backup_restore="pass", functional="pa
# ---- the U0 gate: L4-pass and L2-cap ----
def test_full_clean_climb_to_L6():
lvl, reason = L.compute_level(_rungs())
assert lvl == 6
@ -50,6 +57,7 @@ def test_fails_at_L2_capped_at_L1():
# ---- L0 / install ----
def test_install_fail_is_L0():
lvl, reason = L.compute_level(_rungs(install="fail"))
assert lvl == 0
@ -58,6 +66,7 @@ def test_install_fail_is_L0():
# ---- gap-caps semantics: a higher pass can't rescue a lower gap ----
def test_higher_pass_does_not_rescue_lower_na():
# backup/restore N/A (stateless app) caps at L2 even though functional would pass.
lvl, reason = L.compute_level(_rungs(backup_restore="na", functional="pass", integration="na"))
@ -94,6 +103,7 @@ def test_functional_fail_caps_at_L3():
# ---- input validation ----
def test_invalid_status_raises():
bad = _rungs()
bad["functional"] = "passed" # not in the vocabulary
@ -106,6 +116,7 @@ def test_invalid_status_raises():
# ---- helpers: backup_restore_status ----
def test_backup_restore_status_pass():
assert L.backup_restore_status("pass", "pass", True) == "pass"
@ -126,6 +137,7 @@ def test_backup_restore_partial_is_na():
# ---- helpers: tier_to_rung ----
def test_tier_to_rung_mapping():
assert L.tier_to_rung("pass") == "pass"
assert L.tier_to_rung("fail") == "fail"