cc-ci/tests/unit/test_results.py

"""Unit tests for results assembly (harness.results) — phase lvl5 semantics.

Covers JUnit parsing, stage roll-up, the tier→rung derivation (the SINGLE place every N/A source
is classified intentional-skip vs unverified — the table in DECISIONS.md phase lvl5), the L5 lint
rung wiring, and full results.json assembly. Pure / tmp-file only. Run cold:
  cc-ci-run -m pytest tests/unit/test_results.py -q
"""

from __future__ import annotations

import json
import os
import sys

sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner"))
from harness import results as R  # noqa: E402

JUNIT_PASS = """<?xml version="1.0"?>
<testsuites><testsuite name="pytest" tests="2">
<testcase classname="tests.x" name="test_a" time="0.012"/>
<testcase classname="tests.x" name="test_b" time="1.5"/>
</testsuite></testsuites>"""

JUNIT_MIXED = """<?xml version="1.0"?>
<testsuites><testsuite name="pytest" tests="3">
<testcase classname="tests.y" name="test_ok" time="0.1"/>
<testcase classname="tests.y" name="test_bad" time="0.2"><failure message="boom">trace</failure></testcase>
<testcase classname="tests.y" name="test_skipped" time="0"><skipped message="no deps"/></testcase>
</testsuite></testsuites>"""

LINT_PASS = {"status": "pass", "detail": "", "rules_failed": []}


def _write(tmp_path, name, content):
    p = tmp_path / name
    p.write_text(content)
    return str(p)


def test_parse_junit_pass(tmp_path):
    rows = R.parse_junit(_write(tmp_path, "p.xml", JUNIT_PASS))
    assert len(rows) == 2
    assert {r["status"] for r in rows} == {"pass"}
    assert rows[1]["ms"] == 1500


def test_parse_junit_mixed(tmp_path):
    rows = R.parse_junit(_write(tmp_path, "m.xml", JUNIT_MIXED))
    by = {r["name"]: r["status"] for r in rows}
    assert by == {"test_ok": "pass", "test_bad": "fail", "test_skipped": "skip"}


def test_parse_junit_missing_file_is_empty():
    assert R.parse_junit("/nonexistent/x.xml") == []


def test_collect_stages_orders_and_rolls_up(tmp_path):
    recs = [
        {
            "tier": "install",
            "source": "generic",
            "file": "g/test_install.py",
            "rc": 0,
            "junit": _write(tmp_path, "i.xml", JUNIT_PASS),
        },
        {
            "tier": "custom",
            "source": "cc-ci",
            "file": "c/test_x.py",
            "rc": 1,
            "junit": _write(tmp_path, "c.xml", JUNIT_MIXED),
        },
    ]
    stages = R.collect_stages(recs)
    assert [s["name"] for s in stages] == ["install", "custom"]  # install before custom
    assert stages[0]["status"] == "pass"
    assert stages[1]["status"] == "fail"  # the failure in JUNIT_MIXED
    assert len(stages[1]["tests"]) == 3


def test_collect_stages_synthesizes_when_no_junit():
    recs = [
        {
            "tier": "install",
            "source": "generic",
            "file": "g/test_install.py",
            "rc": 1,
            "junit": None,
        }
    ]
    stages = R.collect_stages(recs)
    assert stages[0]["status"] == "fail"
    assert len(stages[0]["tests"]) == 1


# ---- derive_rungs: the documented N/A-classification mapping (DECISIONS.md phase lvl5) ----


def _results(**kw):
    base = {
        "install": "pass",
        "upgrade": "pass",
        "backup": "pass",
        "restore": "pass",
        "custom": "pass",
    }
    base.update(kw)
    return base


def test_derive_rungs_full_climb_five_rungs():
    rungs = R.derive_rungs(
        _results(), backup_capable=True, has_upgrade_target=True, lint_status="pass"
    )
    # the five essential rungs — integration/recipe-local are optional, not produced here.
    assert rungs == {
        "install": "pass",
        "upgrade": "pass",
        "backup_restore": "pass",
        "functional": "pass",
        "lint": "pass",
    }


def test_derive_rungs_structural_skips_are_intentional():
    # single published version (tier skipped, no upgrade target) + not backup-capable →
    # both rungs are INTENTIONAL skips, not unverified.
    rungs = R.derive_rungs(
        _results(upgrade="skip", backup="skip", restore="skip"),
        backup_capable=False,
        has_upgrade_target=False,
        lint_status="pass",
    )
    assert rungs["upgrade"] == "skip"
    assert rungs["backup_restore"] == "skip"
    assert "integration" not in rungs and "recipe_local" not in rungs


def test_derive_rungs_upgrade_skip_with_target_is_unverified():
    # the tier skipped although an upgrade target exists (e.g. install failed → downstream
    # skipped): NOT structural → unver.
    rungs = R.derive_rungs(
        _results(install="fail", upgrade="skip", backup="skip", restore="skip", custom="skip"),
        backup_capable=True,
        has_upgrade_target=True,
        lint_status="pass",
    )
    assert rungs["install"] == "fail"
    assert rungs["upgrade"] == "unver"
    assert rungs["backup_restore"] == "unver"
    assert rungs["functional"] == "unver"


def test_derive_rungs_missing_tier_is_unverified():
    # a tier excluded from the run entirely (dev CCCI_STAGES escape) → no result key → unver,
    # never an intentional skip (the recipe didn't declare anything).
    res = {"install": "pass"}
    rungs = R.derive_rungs(res, backup_capable=True, has_upgrade_target=True, lint_status="pass")
    assert rungs["upgrade"] == "unver"
    assert rungs["backup_restore"] == "unver"
    assert rungs["functional"] == "unver"


def test_derive_rungs_expected_na_declares_intentional():
    # EXPECTED_NA turns a non-run rung into an intentional skip (declared source).
    rungs = R.derive_rungs(
        _results(custom="skip"),
        backup_capable=True,
        has_upgrade_target=True,
        expected_na={"functional": "no functional surface"},
        lint_status="pass",
    )
    assert rungs["functional"] == "skip"


def test_derive_rungs_no_custom_tests_defaults_unverified():
    # absent functional coverage with NO declaration is a gap → unver (conservative default).
    rungs = R.derive_rungs(
        _results(custom="skip"), backup_capable=True, has_upgrade_target=True, lint_status="pass"
    )
    assert rungs["functional"] == "unver"


def test_derive_rungs_expected_na_never_overrides_a_real_result():
    # a declaration cannot soften an exercised rung: fail stays fail.
    rungs = R.derive_rungs(
        _results(custom="fail"),
        backup_capable=True,
        has_upgrade_target=True,
        expected_na={"functional": "declared"},
        lint_status="pass",
    )
    assert rungs["functional"] == "fail"


def test_derive_rungs_lint_never_skips():
    # lint has NO intentional-skip escape hatch: pass/fail from the executor, anything else
    # (None, "unver", junk) → unver — even if a recipe tries to declare it away.
    for status, want in (("pass", "pass"), ("fail", "fail"), ("unver", "unver"), (None, "unver")):
        rungs = R.derive_rungs(
            _results(),
            backup_capable=True,
            has_upgrade_target=True,
            expected_na={"lint": "nope"},
            lint_status=status,
        )
        assert rungs["lint"] == want, status


def test_derive_rungs_functional_fail():
    rungs = R.derive_rungs(
        _results(custom="fail"), backup_capable=True, has_upgrade_target=True, lint_status="pass"
    )
    assert rungs["functional"] == "fail"


# ---- build_results: end-to-end incl level + lint + flags ----


def test_build_results_level_and_flags(tmp_path):
    recs = [
        {
            "tier": "install",
            "source": "generic",
            "file": "g/test_install.py",
            "rc": 0,
            "junit": _write(tmp_path, "i.xml", JUNIT_PASS),
        },
        {
            "tier": "custom",
            "source": "cc-ci",
            "file": "c/test_func.py",
            "rc": 0,
            "junit": _write(tmp_path, "c.xml", JUNIT_PASS),
        },
    ]
    data = R.build_results(
        recipe="hedgedoc",
        version="1.2.3",
        pr="7",
        ref="deadbeefcafe0000",
        records=recs,
        results=_results(),
        backup_capable=True,
        clean_teardown=True,
        no_secret_leak=True,
        finished_ts=1234.0,
        lint=LINT_PASS,
    )
    # all five essential rungs pass → full climb to L5; no cap concept anywhere.
    assert data["schema"] == 2
    assert data["level"] == 5
    assert "level_cap_reason" not in data and "level_cap_rung" not in data
    assert data["recipe"] == "hedgedoc"
    assert data["ref"] == "deadbeefcafe"
    assert data["flags"] == {"clean_teardown": True, "no_secret_leak": True}
    # lint appears as a synthetic stage so the card's table carries all five rungs.
    assert [s["name"] for s in data["stages"]] == ["install", "custom", "lint"]
    assert data["lint"] == {"status": "pass", "detail": "", "rules_failed": []}


def test_build_results_lint_fail_blocks_at_4(tmp_path):
    recs = [
        {
            "tier": "install",
            "source": "generic",
            "file": "g/test_install.py",
            "rc": 0,
            "junit": _write(tmp_path, "i.xml", JUNIT_PASS),
        }
    ]
    data = R.build_results(
        recipe="x",
        version=None,
        pr="0",
        ref=None,
        records=recs,
        results=_results(),
        backup_capable=True,
        clean_teardown=True,
        no_secret_leak=True,
        finished_ts=0.0,
        lint={
            "status": "fail",
            "detail": "error rule(s) unsatisfied: R014",
            "rules_failed": ["R014"],
        },
    )
    assert data["level"] == 4
    assert data["rungs"]["lint"] == "fail"
    assert data["lint"]["rules_failed"] == ["R014"]
    lint_stage = [s for s in data["stages"] if s["name"] == "lint"][0]
    assert lint_stage["status"] == "fail"
    assert "R014" in lint_stage["tests"][0]["message"]


def test_build_results_no_lint_given_is_unverified_never_pass(tmp_path):
    # an old/lint-less caller must NEVER get a free L5: the rung derives as unver → level 4 max.
    data = R.build_results(
        recipe="x",
        version=None,
        pr="0",
        ref=None,
        records=[],
        results=_results(),
        backup_capable=True,
        clean_teardown=True,
        no_secret_leak=True,
        finished_ts=0.0,
    )
    assert data["rungs"]["lint"] == "unver"
    assert data["level"] == 4
    assert "lint" in data["skips"]["unintentional"]


def test_build_results_level1_on_upgrade_fail(tmp_path):
    recs = [
        {
            "tier": "install",
            "source": "generic",
            "file": "g/test_install.py",
            "rc": 0,
            "junit": _write(tmp_path, "i.xml", JUNIT_PASS),
        }
    ]
    data = R.build_results(
        recipe="x",
        version=None,
        pr="0",
        ref=None,
        records=recs,
        results=_results(upgrade="fail"),
        backup_capable=True,
        clean_teardown=True,
        no_secret_leak=True,
        finished_ts=0.0,
        lint=LINT_PASS,
    )
    assert data["level"] == 1
    assert data["rungs"]["upgrade"] == "fail"


# ---- skips: intentional (declared/structural, with reason) vs unintentional (= unver) ----


def _rungs(**kw):
    base = {
        "install": "pass",
        "upgrade": "pass",
        "backup_restore": "pass",
        "functional": "pass",
        "lint": "pass",
    }
    base.update(kw)
    return base


def test_skips_declared_reason_and_unverified_split():
    rungs = _rungs(backup_restore="skip", functional="unver")
    sk = R.skips(rungs, {"backup_restore": "stateless static server"})
    assert sk["intentional"] == {"backup_restore": "stateless static server"}
    assert sk["unintentional"] == ["functional"]


def test_skips_structural_reason_when_undeclared():
    # a structural skip (derive_rungs) carries its structural reason even without EXPECTED_NA.
    rungs = _rungs(upgrade="skip", backup_restore="skip")
    sk = R.skips(rungs, None)
    assert "only one published version" in sk["intentional"]["upgrade"]
    assert "not backup-capable" in sk["intentional"]["backup_restore"]
    assert sk["unintentional"] == []


def test_skips_declaration_only_counts_when_actually_skipped():
    # backup_restore actually ran (pass) → not a skip, so a declaration for it is simply inert.
    rungs = _rungs(backup_restore="pass")
    sk = R.skips(rungs, {"backup_restore": "reason"})
    assert "backup_restore" not in sk["intentional"]
    assert "backup_restore" not in sk["unintentional"]


def test_build_results_stateless_recipe_climbs(tmp_path):
    # custom-html-tiny shape: no backup surface (declared), single published version, passing
    # functional — formerly capped at L2 by the N/A; now climbs to L5 (the de-cap, mission §2).
    recs = [
        {
            "tier": "install",
            "source": "generic",
            "file": "g/test_install.py",
            "rc": 0,
            "junit": _write(tmp_path, "i.xml", JUNIT_PASS),
        },
        {
            "tier": "custom",
            "source": "cc-ci",
            "file": "c/test_serves_content.py",
            "rc": 0,
            "junit": _write(tmp_path, "c.xml", JUNIT_PASS),
        },
    ]
    data = R.build_results(
        recipe="custom-html-tiny",
        version="1.1.0",
        pr="0",
        ref=None,
        records=recs,
        results=_results(upgrade="skip", backup="skip", restore="skip"),
        backup_capable=False,  # no backupbot label → structural intentional skip
        has_upgrade_target=False,  # single published version → structural intentional skip
        clean_teardown=True,
        no_secret_leak=True,
        finished_ts=0.0,
        lint=LINT_PASS,
        expected_na={"backup_restore": "stateless static file server"},
    )
    assert data["level"] == 5  # skips are climbed past; nothing was inflated to get here
    assert data["rungs"] == {
        "install": "pass",
        "upgrade": "skip",
        "backup_restore": "skip",
        "functional": "pass",
        "lint": "pass",
    }
    assert data["skips"]["intentional"]["backup_restore"] == "stateless static file server"
    assert "only one published version" in data["skips"]["intentional"]["upgrade"]
    assert data["skips"]["unintentional"] == []


def test_build_results_unverified_backup_blocks(tmp_path):
    # synthesized tier abort: backup-capable but the tiers never produced a result → unver → the
    # level stays below the unverified rung (mission worked example #3).
    data = R.build_results(
        recipe="x",
        version=None,
        pr="0",
        ref=None,
        records=[],
        results=_results(backup="skip", restore="skip"),
        backup_capable=True,
        clean_teardown=True,
        no_secret_leak=True,
        finished_ts=0.0,
        lint=LINT_PASS,
    )
    assert data["rungs"]["backup_restore"] == "unver"
    assert data["level"] == 2
    assert data["skips"]["unintentional"] == ["backup_restore"]


def test_build_results_threads_customization(tmp_path):
    # rcust P5: the run-start customization manifest lands verbatim under "customization";
    # omitted -> explicit None (key always present in the schema).
    recs = [
        {
            "tier": "install",
            "source": "generic",
            "file": "g/test_install.py",
            "rc": 0,
            "junit": _write(tmp_path, "i.xml", JUNIT_PASS),
        },
    ]
    cust = {
        "meta_non_default": {"HTTP_TIMEOUT": 600},
        "hooks": {"install_steps.sh": "cc-ci"},
        "overlays": {},
        "custom_tests": {"cc-ci": {"functional": 2}},
        "env_overrides": [],
    }
    kwargs = {
        "recipe": "hedgedoc",
        "version": "1.2.3",
        "pr": "7",
        "ref": None,
        "records": recs,
        "results": _results(),
        "backup_capable": True,
        "clean_teardown": True,
        "no_secret_leak": True,
        "finished_ts": 0.0,
        "lint": LINT_PASS,
    }
    assert R.build_results(**kwargs, customization=cust)["customization"] == cust
    assert R.build_results(**kwargs)["customization"] is None


def test_write_results_roundtrip(tmp_path):
    data = {"run_id": "42", "level": 3, "stages": []}
    path = R.write_results(data, runs_dir_override=str(tmp_path))
    assert path.endswith("/42/results.json")
    with open(path) as f:
        assert json.load(f)["level"] == 3