feat(3 U0.2+U0.3): per-test results + results.json with computed level

harness/results.py: JUnit-XML parsing (stdlib) → per-stage/per-test rows; derive_rungs (documented tier+deps/SSO → rung mapping); build_results assembles results.json {recipe,version,pr,ref,run_id, stages[],level,level_cap_reason,rungs,flags{clean_teardown,no_secret_leak},screenshot,summary_card}; write_results (atomic). run_recipe_ci.py: tiers emit --junitxml + append {tier,source,file,rc,junit} records; main() assembles+writes results.json wrapped so a failure NEVER changes the verdict (R7), incl. a narrow leak-scan of the serialised artifact. 17 new unit tests (test_results.py). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-05-31 05:55:52 +00:00
parent df54693449
commit 52e5d210d8
5 changed files with 819 additions and 63 deletions
--- a/tests/unit/test_level.py
+++ b/tests/unit/test_level.py
@ -14,8 +14,14 @@ sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner")
 from harness import level as L  # noqa: E402


-def _rungs(install="pass", upgrade="pass", backup_restore="pass", functional="pass",
-           integration="pass", recipe_local="pass"):
+def _rungs(
+    install="pass",
+    upgrade="pass",
+    backup_restore="pass",
+    functional="pass",
+    integration="pass",
+    recipe_local="pass",
+):
    return {
        "install": install,
        "upgrade": upgrade,
@ -28,6 +34,7 @@ def _rungs(install="pass", upgrade="pass", backup_restore="pass", functional="pa

 # ---- the U0 gate: L4-pass and L2-cap ----

+
 def test_full_clean_climb_to_L6():
    lvl, reason = L.compute_level(_rungs())
    assert lvl == 6
@ -50,6 +57,7 @@ def test_fails_at_L2_capped_at_L1():

 # ---- L0 / install ----

+
 def test_install_fail_is_L0():
    lvl, reason = L.compute_level(_rungs(install="fail"))
    assert lvl == 0
@ -58,6 +66,7 @@ def test_install_fail_is_L0():

 # ---- gap-caps semantics: a higher pass can't rescue a lower gap ----

+
 def test_higher_pass_does_not_rescue_lower_na():
    # backup/restore N/A (stateless app) caps at L2 even though functional would pass.
    lvl, reason = L.compute_level(_rungs(backup_restore="na", functional="pass", integration="na"))
@ -94,6 +103,7 @@ def test_functional_fail_caps_at_L3():

 # ---- input validation ----

+
 def test_invalid_status_raises():
    bad = _rungs()
    bad["functional"] = "passed"  # not in the vocabulary
@ -106,6 +116,7 @@ def test_invalid_status_raises():

 # ---- helpers: backup_restore_status ----

+
 def test_backup_restore_status_pass():
    assert L.backup_restore_status("pass", "pass", True) == "pass"

@ -126,6 +137,7 @@ def test_backup_restore_partial_is_na():

 # ---- helpers: tier_to_rung ----

+
 def test_tier_to_rung_mapping():
    assert L.tier_to_rung("pass") == "pass"
    assert L.tier_to_rung("fail") == "fail"
--- a/tests/unit/test_results.py
+++ b/tests/unit/test_results.py
@ -0,0 +1,265 @@
+"""Unit tests for Phase-3 results assembly (harness.results), plan-phase3-results-ux.md §4.2 / R1/R3.
+
+Covers JUnit parsing, stage roll-up, the tier→rung derivation (the documented mapping the level
+depends on), and full results.json assembly incl. the U0 gate cases. Pure / tmp-file only. Run cold:
+  cc-ci-run -m pytest tests/unit/test_results.py -q
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import sys
+
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner"))
+from harness import results as R  # noqa: E402
+
+JUNIT_PASS = """<?xml version="1.0"?>
+<testsuites><testsuite name="pytest" tests="2">
+<testcase classname="tests.x" name="test_a" time="0.012"/>
+<testcase classname="tests.x" name="test_b" time="1.5"/>
+</testsuite></testsuites>"""
+
+JUNIT_MIXED = """<?xml version="1.0"?>
+<testsuites><testsuite name="pytest" tests="3">
+<testcase classname="tests.y" name="test_ok" time="0.1"/>
+<testcase classname="tests.y" name="test_bad" time="0.2"><failure message="boom">trace</failure></testcase>
+<testcase classname="tests.y" name="test_skipped" time="0"><skipped message="no deps"/></testcase>
+</testsuite></testsuites>"""
+
+
+def _write(tmp_path, name, content):
+    p = tmp_path / name
+    p.write_text(content)
+    return str(p)
+
+
+def test_parse_junit_pass(tmp_path):
+    rows = R.parse_junit(_write(tmp_path, "p.xml", JUNIT_PASS))
+    assert len(rows) == 2
+    assert {r["status"] for r in rows} == {"pass"}
+    assert rows[1]["ms"] == 1500
+
+
+def test_parse_junit_mixed(tmp_path):
+    rows = R.parse_junit(_write(tmp_path, "m.xml", JUNIT_MIXED))
+    by = {r["name"]: r["status"] for r in rows}
+    assert by == {"test_ok": "pass", "test_bad": "fail", "test_skipped": "skip"}
+
+
+def test_parse_junit_missing_file_is_empty():
+    assert R.parse_junit("/nonexistent/x.xml") == []
+
+
+def test_collect_stages_orders_and_rolls_up(tmp_path):
+    recs = [
+        {
+            "tier": "install",
+            "source": "generic",
+            "file": "g/test_install.py",
+            "rc": 0,
+            "junit": _write(tmp_path, "i.xml", JUNIT_PASS),
+        },
+        {
+            "tier": "custom",
+            "source": "cc-ci",
+            "file": "c/test_x.py",
+            "rc": 1,
+            "junit": _write(tmp_path, "c.xml", JUNIT_MIXED),
+        },
+    ]
+    stages = R.collect_stages(recs)
+    assert [s["name"] for s in stages] == ["install", "custom"]  # install before custom
+    assert stages[0]["status"] == "pass"
+    assert stages[1]["status"] == "fail"  # the failure in JUNIT_MIXED
+    assert len(stages[1]["tests"]) == 3
+
+
+def test_collect_stages_synthesizes_when_no_junit():
+    recs = [
+        {
+            "tier": "install",
+            "source": "generic",
+            "file": "g/test_install.py",
+            "rc": 1,
+            "junit": None,
+        }
+    ]
+    stages = R.collect_stages(recs)
+    assert stages[0]["status"] == "fail"
+    assert len(stages[0]["tests"]) == 1
+
+
+# ---- derive_rungs: the documented mapping ----
+
+
+def _results(**kw):
+    base = {
+        "install": "pass",
+        "upgrade": "pass",
+        "backup": "pass",
+        "restore": "pass",
+        "custom": "pass",
+    }
+    base.update(kw)
+    return base
+
+
+def test_derive_rungs_full_stateful_sso():
+    rungs = R.derive_rungs(
+        _results(),
+        backup_capable=True,
+        declared=["keycloak"],
+        deps_ready=True,
+        sso_unverified=False,
+        has_custom=True,
+        has_repo_local=False,
+        repo_local_passed=False,
+    )
+    assert rungs == {
+        "install": "pass",
+        "upgrade": "pass",
+        "backup_restore": "pass",
+        "functional": "pass",
+        "integration": "pass",
+        "recipe_local": "na",
+    }
+
+
+def test_derive_rungs_no_sso_surface_is_integration_na():
+    rungs = R.derive_rungs(
+        _results(),
+        backup_capable=True,
+        declared=[],
+        deps_ready=True,
+        sso_unverified=False,
+        has_custom=True,
+        has_repo_local=False,
+        repo_local_passed=False,
+    )
+    assert rungs["integration"] == "na"
+    assert rungs["functional"] == "pass"
+
+
+def test_derive_rungs_stateless_backup_na():
+    rungs = R.derive_rungs(
+        _results(backup="skip", restore="skip", custom="skip"),
+        backup_capable=False,
+        declared=[],
+        deps_ready=True,
+        sso_unverified=False,
+        has_custom=False,
+        has_repo_local=False,
+        repo_local_passed=False,
+    )
+    assert rungs["backup_restore"] == "na"
+    assert rungs["functional"] == "na"
+
+
+def test_derive_rungs_sso_unverified_is_integration_fail():
+    rungs = R.derive_rungs(
+        _results(),
+        backup_capable=True,
+        declared=["keycloak"],
+        deps_ready=False,
+        sso_unverified=True,
+        has_custom=True,
+        has_repo_local=False,
+        repo_local_passed=False,
+    )
+    assert rungs["integration"] == "fail"
+
+
+def test_derive_rungs_repo_local_pass():
+    rungs = R.derive_rungs(
+        _results(),
+        backup_capable=True,
+        declared=[],
+        deps_ready=True,
+        sso_unverified=False,
+        has_custom=True,
+        has_repo_local=True,
+        repo_local_passed=True,
+    )
+    assert rungs["recipe_local"] == "pass"
+
+
+# ---- build_results: end-to-end incl level + flags ----
+
+
+def test_build_results_level_and_flags(tmp_path):
+    recs = [
+        {
+            "tier": "install",
+            "source": "generic",
+            "file": "g/test_install.py",
+            "rc": 0,
+            "junit": _write(tmp_path, "i.xml", JUNIT_PASS),
+        },
+        {
+            "tier": "custom",
+            "source": "cc-ci",
+            "file": "c/test_func.py",
+            "rc": 0,
+            "junit": _write(tmp_path, "c.xml", JUNIT_PASS),
+        },
+    ]
+    data = R.build_results(
+        recipe="hedgedoc",
+        version="1.2.3",
+        pr="7",
+        ref="deadbeefcafe0000",
+        records=recs,
+        results=_results(),
+        backup_capable=True,
+        declared=[],
+        deps_ready=True,
+        sso_unverified=False,
+        clean_teardown=True,
+        no_secret_leak=True,
+        finished_ts=1234.0,
+    )
+    # stateful, functional pass, no SSO surface, no repo-local → caps at L4
+    assert data["level"] == 4
+    assert "L5" in data["level_cap_reason"]
+    assert data["recipe"] == "hedgedoc"
+    assert data["ref"] == "deadbeefcafe"
+    assert data["flags"] == {"clean_teardown": True, "no_secret_leak": True}
+    assert [s["name"] for s in data["stages"]] == ["install", "custom"]
+
+
+def test_build_results_capped_at_L1_on_upgrade_fail(tmp_path):
+    recs = [
+        {
+            "tier": "install",
+            "source": "generic",
+            "file": "g/test_install.py",
+            "rc": 0,
+            "junit": _write(tmp_path, "i.xml", JUNIT_PASS),
+        }
+    ]
+    data = R.build_results(
+        recipe="x",
+        version=None,
+        pr="0",
+        ref=None,
+        records=recs,
+        results=_results(upgrade="fail"),
+        backup_capable=True,
+        declared=[],
+        deps_ready=True,
+        sso_unverified=False,
+        clean_teardown=True,
+        no_secret_leak=True,
+        finished_ts=0.0,
+    )
+    assert data["level"] == 1
+    assert "L2" in data["level_cap_reason"]
+
+
+def test_write_results_roundtrip(tmp_path):
+    data = {"run_id": "42", "level": 3, "stages": []}
+    path = R.write_results(data, runs_dir_override=str(tmp_path))
+    assert path.endswith("/42/results.json")
+    with open(path) as f:
+        assert json.load(f)["level"] == 3