feat(3 U0.2+U0.3): per-test results + results.json with computed level
harness/results.py: JUnit-XML parsing (stdlib) → per-stage/per-test rows; derive_rungs (documented
tier+deps/SSO → rung mapping); build_results assembles results.json {recipe,version,pr,ref,run_id,
stages[],level,level_cap_reason,rungs,flags{clean_teardown,no_secret_leak},screenshot,summary_card};
write_results (atomic). run_recipe_ci.py: tiers emit --junitxml + append {tier,source,file,rc,junit}
records; main() assembles+writes results.json wrapped so a failure NEVER changes the verdict (R7),
incl. a narrow leak-scan of the serialised artifact. 17 new unit tests (test_results.py).
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@ -14,8 +14,14 @@ sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner")
|
||||
from harness import level as L # noqa: E402
|
||||
|
||||
|
||||
def _rungs(install="pass", upgrade="pass", backup_restore="pass", functional="pass",
|
||||
integration="pass", recipe_local="pass"):
|
||||
def _rungs(
|
||||
install="pass",
|
||||
upgrade="pass",
|
||||
backup_restore="pass",
|
||||
functional="pass",
|
||||
integration="pass",
|
||||
recipe_local="pass",
|
||||
):
|
||||
return {
|
||||
"install": install,
|
||||
"upgrade": upgrade,
|
||||
@ -28,6 +34,7 @@ def _rungs(install="pass", upgrade="pass", backup_restore="pass", functional="pa
|
||||
|
||||
# ---- the U0 gate: L4-pass and L2-cap ----
|
||||
|
||||
|
||||
def test_full_clean_climb_to_L6():
|
||||
lvl, reason = L.compute_level(_rungs())
|
||||
assert lvl == 6
|
||||
@ -50,6 +57,7 @@ def test_fails_at_L2_capped_at_L1():
|
||||
|
||||
# ---- L0 / install ----
|
||||
|
||||
|
||||
def test_install_fail_is_L0():
|
||||
lvl, reason = L.compute_level(_rungs(install="fail"))
|
||||
assert lvl == 0
|
||||
@ -58,6 +66,7 @@ def test_install_fail_is_L0():
|
||||
|
||||
# ---- gap-caps semantics: a higher pass can't rescue a lower gap ----
|
||||
|
||||
|
||||
def test_higher_pass_does_not_rescue_lower_na():
|
||||
# backup/restore N/A (stateless app) caps at L2 even though functional would pass.
|
||||
lvl, reason = L.compute_level(_rungs(backup_restore="na", functional="pass", integration="na"))
|
||||
@ -94,6 +103,7 @@ def test_functional_fail_caps_at_L3():
|
||||
|
||||
# ---- input validation ----
|
||||
|
||||
|
||||
def test_invalid_status_raises():
|
||||
bad = _rungs()
|
||||
bad["functional"] = "passed" # not in the vocabulary
|
||||
@ -106,6 +116,7 @@ def test_invalid_status_raises():
|
||||
|
||||
# ---- helpers: backup_restore_status ----
|
||||
|
||||
|
||||
def test_backup_restore_status_pass():
|
||||
assert L.backup_restore_status("pass", "pass", True) == "pass"
|
||||
|
||||
@ -126,6 +137,7 @@ def test_backup_restore_partial_is_na():
|
||||
|
||||
# ---- helpers: tier_to_rung ----
|
||||
|
||||
|
||||
def test_tier_to_rung_mapping():
|
||||
assert L.tier_to_rung("pass") == "pass"
|
||||
assert L.tier_to_rung("fail") == "fail"
|
||||
|
||||
Reference in New Issue
Block a user