"""Structured run results + results.json (Phase 3 §4.2 R1/R3; level semantics: phase lvl5). Turns a run's per-tier pytest outcomes into a single `results.json` artifact carrying: { recipe, version, pr, ref, run_id, finished, stages:[{name,status,tests:[{name,status,ms}]}], level, rungs, lint:{status,detail,rules_failed}, skips:{intentional:{rung:reason}, unintentional:[rung]}, flags:{clean_teardown,no_secret_leak}, screenshot, summary_card } Rung statuses (phase lvl5, operator-decided — see harness.level + DECISIONS.md): every rung is "pass" | "fail" | "skip" (INTENTIONAL — a declared/structural fact says the rung does not apply) | "unver" (UNINTENTIONAL — the rung should have run and wasn't verified; blocks the level like a fail). `derive_rungs` is the single place every N/A source is classified; anything it cannot attribute to a declared/structural fact defaults to "unver" (conservative). `skips` mirrors that split into results.json: intentional {rung: reason} / unintentional [rung] (= the unver rungs). The per-test breakdown comes from JUnit XML emitted by each tier's pytest invocation (`--junitxml`), parsed here with the stdlib (no new dep). The integer **level** is computed by harness.level from a rung-status dict derived here (`derive_rungs`) from the tier results + structural signals the orchestrator holds; the classification table is in DECISIONS.md (phase lvl5). This module is import-pure (no side effects at import). `write_results` is the only writer; the orchestrator calls the build/write path inside a try/except so a results failure NEVER changes the run's exit code (R7 — cosmetics never block the pipeline). """ from __future__ import annotations import json import os import xml.etree.ElementTree as ET from . import level as level_mod # Where per-run artifacts (results.json, screenshot, summary card) are written on the runner host. # The dashboard serves these read-only at /runs//... (U0.4). Overridable for tests. RUNS_DIR_DEFAULT = "/var/lib/cc-ci-runs" def runs_dir() -> str: return os.environ.get("CCCI_RUNS_DIR", RUNS_DIR_DEFAULT) def run_id() -> str: """Stable id for this run. Prefer the Drone build number (what the PR comment + dashboard link to); fall back to the unique run domain so a hand-run still gets a distinct artifact dir.""" n = os.environ.get("DRONE_BUILD_NUMBER") if n and n.strip(): return n.strip() return os.environ.get("CCCI_APP_DOMAIN") or os.environ.get("CCCI_RUN_ID") or "manual" def junit_file(junit_dir: str, tier: str, source: str, path: str) -> str: """Deterministic per-(tier,source,file) JUnit XML path under junit_dir.""" base = os.path.splitext(os.path.basename(path))[0] safe = f"{tier}__{source}__{base}".replace("/", "_").replace(os.sep, "_") return os.path.join(junit_dir, safe + ".xml") def _case_status(case: ET.Element) -> tuple[str, str]: """(status, message) for one . JUnit: child //, else passed.""" for tag, st in (("error", "error"), ("failure", "fail"), ("skipped", "skip")): el = case.find(tag) if el is not None: return st, (el.get("message") or "").strip() return "pass", "" def parse_junit(xml_path: str) -> list[dict]: """Parse one JUnit XML file → list of per-test rows {name, classname, status, ms, message}. Tolerant: a missing/corrupt file yields [].""" try: tree = ET.parse(xml_path) except (OSError, ET.ParseError): return [] rows: list[dict] = [] for case in tree.iter("testcase"): status, message = _case_status(case) try: ms = int(round(float(case.get("time", "0")) * 1000)) except (TypeError, ValueError): ms = 0 rows.append( { "name": case.get("name", "?"), "classname": case.get("classname", ""), "status": status, "ms": ms, "message": message, } ) return rows def _stage_status(tests: list[dict]) -> str: """Roll per-test rows up to a stage status. Any error/fail → fail; else if any pass → pass; else (all skipped / empty) → skip.""" sts = {t["status"] for t in tests} if "fail" in sts or "error" in sts: return "fail" if "pass" in sts: return "pass" return "skip" def collect_stages(records: list[dict]) -> list[dict]: """Group per-file run records into ordered stage dicts with their per-test breakdown. `records` items: {tier, source, file, rc, junit}. Tests are read from each file's JUnit XML; if a file produced no JUnit (e.g. pytest crashed before writing), fall back to a single synthetic row derived from its exit code so the stage still reflects reality (rc!=0 → fail). """ order = ("install", "upgrade", "backup", "restore", "custom") by_tier: dict[str, list[dict]] = {} for rec in records: tests = parse_junit(rec.get("junit", "")) if rec.get("junit") else [] if not tests: # No JUnit rows — synthesize from the exit code so a crash isn't shown as "no tests". base = os.path.basename(rec.get("file", "?")) tests = [ { "name": base, "classname": rec.get("source", ""), "status": "pass" if rec.get("rc", 1) == 0 else "fail", "ms": 0, "message": "" if rec.get("rc", 1) == 0 else "tier produced no JUnit; exit!=0", } ] for t in tests: t["source"] = rec.get("source", "") by_tier.setdefault(rec["tier"], []).extend(tests) stages = [] for tier in order: if tier in by_tier: tests = by_tier[tier] stages.append({"name": tier, "status": _stage_status(tests), "tests": tests}) return stages def derive_rungs( results: dict[str, str], *, backup_capable: bool, has_upgrade_target: bool, expected_na: dict | None = None, lint_status: str | None = None, ) -> dict[str, str]: """Translate the orchestrator's tier results + structural signals into the rung-status dict harness.level consumes — the FIVE essential rungs. This is the SINGLE place every N/A source is classified intentional ("skip") vs unintentional ("unver"); the table lives in DECISIONS.md (phase lvl5). Conservative by design: never reports "pass" it can't substantiate, and any rung that did not produce a pass/fail and has NO declared/structural reason is "unver". L1 install : install tier pass. Always applies — never "skip" (non-run → unver). L2 upgrade : upgrade tier. Tier skipped + no upgrade target (only one published version, structural) → "skip"; declared in EXPECTED_NA → "skip"; anything else non-pass/fail (prior-stage abort, tier excluded) → "unver". L3 backup/res : backup AND restore tiers pass. Not backup-capable (declared/structural) → "skip"; EXPECTED_NA → "skip"; unverified-while-capable → "unver". L4 functional : the custom tier. No custom tests / tier skipped → EXPECTED_NA-declared "skip", else "unver" (absent functional coverage is a gap, not an intentional property of the recipe). L5 lint : from the lint executor (harness.lint). pass/fail only — every recipe can be linted, so there is NO intentional-skip escape hatch: a lint that could not run (timeout, abra missing, executor error) is "unver". Integration (SSO/OIDC) and recipe-local are OPTIONAL and intentionally NOT rungs here — they never affect the level (SSO is still enforced for the run VERDICT in run_recipe_ci.py). """ expected = set((expected_na or {}).keys()) rungs: dict[str, str] = {} rungs["install"] = level_mod.tier_to_rung(results.get("install")) up = results.get("upgrade") if up in ("pass", "fail"): rungs["upgrade"] = up elif up == "skip" and not has_upgrade_target: # The orchestrator skipped the tier for the structural reason: nothing to upgrade from. rungs["upgrade"] = "skip" elif "upgrade" in expected: rungs["upgrade"] = "skip" else: rungs["upgrade"] = "unver" br = level_mod.backup_restore_status( results.get("backup"), results.get("restore"), backup_capable ) if br == "unver" and "backup_restore" in expected: br = "skip" rungs["backup_restore"] = br custom = results.get("custom") if custom in ("pass", "fail"): rungs["functional"] = custom elif "functional" in expected: rungs["functional"] = "skip" else: rungs["functional"] = "unver" rungs["lint"] = lint_status if lint_status in ("pass", "fail") else "unver" return rungs # Reasons attached to STRUCTURAL intentional skips (no EXPECTED_NA declaration needed — the # fact is read off the recipe itself). _STRUCTURAL_REASON = { "upgrade": "only one published version — no upgrade target", "backup_restore": "not backup-capable (no backupbot labels / declared)", } def skips( rungs: dict[str, str], expected_na: dict | None, ) -> dict: """Mirror the rung classification into results.json's `skips` block: { "intentional": {rung: reason, ...}, # status "skip" — declared/structural, with why "unintentional": [rung, ...] } # status "unver" — should have run, wasn't verified The reason is the recipe's EXPECTED_NA declaration when present, else the structural fact derive_rungs skipped on. Purely descriptive — the level math lives in harness.level.""" expected = {str(k): str(v) for k, v in (expected_na or {}).items()} intentional = { r: expected.get(r) or _STRUCTURAL_REASON.get(r, "declared intentional") for r, st in rungs.items() if st == "skip" } unintentional = sorted(r for r, st in rungs.items() if st == "unver") return {"intentional": intentional, "unintentional": unintentional} def build_results( *, recipe: str, version: str | None, pr: str, ref: str | None, records: list[dict], results: dict[str, str], backup_capable: bool, clean_teardown: bool, no_secret_leak: bool, finished_ts: float | None, has_upgrade_target: bool = True, lint: dict | None = None, screenshot: str | None = None, summary_card: str | None = None, expected_na: dict | None = None, customization: dict | None = None, ) -> dict: """Assemble the full results.json dict (no I/O). `finished_ts` is passed in (the orchestrator stamps it) so this stays pure and deterministic for unit tests. `expected_na` is the recipe's declared intentional-skip map (recipe_meta.EXPECTED_NA); `has_upgrade_target` is the structural "a previous published version exists" fact; `lint` is harness.lint.run_lint's result dict (None — e.g. an old caller — derives the lint rung as "unver": never a silent pass).""" stages = collect_stages(records) lint = lint or {} lint_status = lint.get("status") rungs = derive_rungs( results, backup_capable=backup_capable, has_upgrade_target=has_upgrade_target, expected_na=expected_na, lint_status=lint_status, ) # Surface lint in the per-stage table too (it has no pytest/JUnit tier), so the card's # stage breakdown carries all five rungs. if rungs["lint"] != "skip": # lint is never "skip", but stay defensive stages.append( { "name": "lint", "status": rungs["lint"], "tests": [ { "name": "abra recipe lint", "classname": "lint", "source": "harness", "status": rungs["lint"], "ms": 0, "message": str(lint.get("detail") or ""), } ], } ) lvl = level_mod.compute_level(rungs) return { "schema": 2, "run_id": run_id(), "recipe": recipe, "version": version, "pr": str(pr), "ref": (ref or "")[:12], "finished": finished_ts, "level": lvl, "rungs": rungs, "lint": { "status": rungs["lint"], "detail": str(lint.get("detail") or ""), "rules_failed": list(lint.get("rules_failed") or []), }, "skips": skips(rungs, expected_na), "stages": stages, "results": results, "flags": { "clean_teardown": bool(clean_teardown), "no_secret_leak": bool(no_secret_leak), }, "screenshot": screenshot, "summary_card": summary_card, # rcust P5: the run's resolved customization manifest (pure presentation — consumers must # never derive a verdict from it). "customization": customization, } def write_results(data: dict, runs_dir_override: str | None = None) -> str: """Write results.json into the run's artifact dir; return its path. Creates the dir.""" rd = runs_dir_override or runs_dir() out_dir = os.path.join(rd, data["run_id"]) os.makedirs(out_dir, exist_ok=True) path = os.path.join(out_dir, "results.json") tmp = path + ".tmp" with open(tmp, "w") as f: json.dump(data, f, indent=2, sort_keys=True) os.replace(tmp, path) return path