All checks were successful
continuous-integration/drone/push Build is passing
level.py: RUNGS += lint; statuses {pass,fail,skip,unver}; compute_level = max passed
rung with all below pass-or-skip (fail/unver block); cap_reason/capped DELETED.
harness/lint.py: lint executor — pristine scratch clone of the per-run tree at the
exact tested ref (mirror-origin + untracked-overlay pollution solved by context, no
rule filtered), PTY via script -qec, 60s hard budget, lint.txt artifact, table-parse
classifier (rc only signals FATA), unver on any non-run (never silent pass).
results.py: derive_rungs classifies every N/A source (structural/declared → skip,
else unver), lint rung + synthetic lint stage + lint block in results.json, schema 2,
cap fields removed. run_recipe_ci.py: lint call before tiers (double-wrapped,
verdict-neutral), badge = level only. card/dashboard: 0-5 ramp, cap line → 'level N
of {4|5}', unverified rows, badge number+colour only, lint.txt servable, old schema-1
artifacts render untouched. Unit suite rewritten: 245 passed on cc-ci venv.
325 lines
13 KiB
Python
325 lines
13 KiB
Python
"""Structured run results + results.json (Phase 3 §4.2 R1/R3; level semantics: phase lvl5).
|
|
|
|
Turns a run's per-tier pytest outcomes into a single `results.json` artifact carrying:
|
|
{ recipe, version, pr, ref, run_id, finished, stages:[{name,status,tests:[{name,status,ms}]}],
|
|
level, rungs, lint:{status,detail,rules_failed},
|
|
skips:{intentional:{rung:reason}, unintentional:[rung]},
|
|
flags:{clean_teardown,no_secret_leak}, screenshot, summary_card }
|
|
|
|
Rung statuses (phase lvl5, operator-decided — see harness.level + DECISIONS.md): every rung is
|
|
"pass" | "fail" | "skip" (INTENTIONAL — a declared/structural fact says the rung does not apply)
|
|
| "unver" (UNINTENTIONAL — the rung should have run and wasn't verified; blocks the level like a
|
|
fail). `derive_rungs` is the single place every N/A source is classified; anything it cannot
|
|
attribute to a declared/structural fact defaults to "unver" (conservative). `skips` mirrors that
|
|
split into results.json: intentional {rung: reason} / unintentional [rung] (= the unver rungs).
|
|
|
|
The per-test breakdown comes from JUnit XML emitted by each tier's pytest invocation (`--junitxml`),
|
|
parsed here with the stdlib (no new dep). The integer **level** is computed by harness.level from a
|
|
rung-status dict derived here (`derive_rungs`) from the tier results + structural signals the
|
|
orchestrator holds; the classification table is in DECISIONS.md (phase lvl5).
|
|
|
|
This module is import-pure (no side effects at import). `write_results` is the only writer; the
|
|
orchestrator calls the build/write path inside a try/except so a results failure NEVER changes the
|
|
run's exit code (R7 — cosmetics never block the pipeline).
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import os
|
|
import xml.etree.ElementTree as ET
|
|
|
|
from . import level as level_mod
|
|
|
|
# Where per-run artifacts (results.json, screenshot, summary card) are written on the runner host.
|
|
# The dashboard serves these read-only at /runs/<run_id>/... (U0.4). Overridable for tests.
|
|
RUNS_DIR_DEFAULT = "/var/lib/cc-ci-runs"
|
|
|
|
|
|
def runs_dir() -> str:
|
|
return os.environ.get("CCCI_RUNS_DIR", RUNS_DIR_DEFAULT)
|
|
|
|
|
|
def run_id() -> str:
|
|
"""Stable id for this run. Prefer the Drone build number (what the PR comment + dashboard link
|
|
to); fall back to the unique run domain so a hand-run still gets a distinct artifact dir."""
|
|
n = os.environ.get("DRONE_BUILD_NUMBER")
|
|
if n and n.strip():
|
|
return n.strip()
|
|
return os.environ.get("CCCI_APP_DOMAIN") or os.environ.get("CCCI_RUN_ID") or "manual"
|
|
|
|
|
|
def junit_file(junit_dir: str, tier: str, source: str, path: str) -> str:
|
|
"""Deterministic per-(tier,source,file) JUnit XML path under junit_dir."""
|
|
base = os.path.splitext(os.path.basename(path))[0]
|
|
safe = f"{tier}__{source}__{base}".replace("/", "_").replace(os.sep, "_")
|
|
return os.path.join(junit_dir, safe + ".xml")
|
|
|
|
|
|
def _case_status(case: ET.Element) -> tuple[str, str]:
|
|
"""(status, message) for one <testcase>. JUnit: child <failure>/<error>/<skipped>, else passed."""
|
|
for tag, st in (("error", "error"), ("failure", "fail"), ("skipped", "skip")):
|
|
el = case.find(tag)
|
|
if el is not None:
|
|
return st, (el.get("message") or "").strip()
|
|
return "pass", ""
|
|
|
|
|
|
def parse_junit(xml_path: str) -> list[dict]:
|
|
"""Parse one JUnit XML file → list of per-test rows {name, classname, status, ms, message}.
|
|
Tolerant: a missing/corrupt file yields []."""
|
|
try:
|
|
tree = ET.parse(xml_path)
|
|
except (OSError, ET.ParseError):
|
|
return []
|
|
rows: list[dict] = []
|
|
for case in tree.iter("testcase"):
|
|
status, message = _case_status(case)
|
|
try:
|
|
ms = int(round(float(case.get("time", "0")) * 1000))
|
|
except (TypeError, ValueError):
|
|
ms = 0
|
|
rows.append(
|
|
{
|
|
"name": case.get("name", "?"),
|
|
"classname": case.get("classname", ""),
|
|
"status": status,
|
|
"ms": ms,
|
|
"message": message,
|
|
}
|
|
)
|
|
return rows
|
|
|
|
|
|
def _stage_status(tests: list[dict]) -> str:
|
|
"""Roll per-test rows up to a stage status. Any error/fail → fail; else if any pass → pass;
|
|
else (all skipped / empty) → skip."""
|
|
sts = {t["status"] for t in tests}
|
|
if "fail" in sts or "error" in sts:
|
|
return "fail"
|
|
if "pass" in sts:
|
|
return "pass"
|
|
return "skip"
|
|
|
|
|
|
def collect_stages(records: list[dict]) -> list[dict]:
|
|
"""Group per-file run records into ordered stage dicts with their per-test breakdown.
|
|
|
|
`records` items: {tier, source, file, rc, junit}. Tests are read from each file's JUnit XML; if a
|
|
file produced no JUnit (e.g. pytest crashed before writing), fall back to a single synthetic row
|
|
derived from its exit code so the stage still reflects reality (rc!=0 → fail).
|
|
"""
|
|
order = ("install", "upgrade", "backup", "restore", "custom")
|
|
by_tier: dict[str, list[dict]] = {}
|
|
for rec in records:
|
|
tests = parse_junit(rec.get("junit", "")) if rec.get("junit") else []
|
|
if not tests:
|
|
# No JUnit rows — synthesize from the exit code so a crash isn't shown as "no tests".
|
|
base = os.path.basename(rec.get("file", "?"))
|
|
tests = [
|
|
{
|
|
"name": base,
|
|
"classname": rec.get("source", ""),
|
|
"status": "pass" if rec.get("rc", 1) == 0 else "fail",
|
|
"ms": 0,
|
|
"message": "" if rec.get("rc", 1) == 0 else "tier produced no JUnit; exit!=0",
|
|
}
|
|
]
|
|
for t in tests:
|
|
t["source"] = rec.get("source", "")
|
|
by_tier.setdefault(rec["tier"], []).extend(tests)
|
|
stages = []
|
|
for tier in order:
|
|
if tier in by_tier:
|
|
tests = by_tier[tier]
|
|
stages.append({"name": tier, "status": _stage_status(tests), "tests": tests})
|
|
return stages
|
|
|
|
|
|
def derive_rungs(
|
|
results: dict[str, str],
|
|
*,
|
|
backup_capable: bool,
|
|
has_upgrade_target: bool,
|
|
expected_na: dict | None = None,
|
|
lint_status: str | None = None,
|
|
) -> dict[str, str]:
|
|
"""Translate the orchestrator's tier results + structural signals into the rung-status dict
|
|
harness.level consumes — the FIVE essential rungs. This is the SINGLE place every N/A source
|
|
is classified intentional ("skip") vs unintentional ("unver"); the table lives in DECISIONS.md
|
|
(phase lvl5). Conservative by design: never reports "pass" it can't substantiate, and any
|
|
rung that did not produce a pass/fail and has NO declared/structural reason is "unver".
|
|
|
|
L1 install : install tier pass. Always applies — never "skip" (non-run → unver).
|
|
L2 upgrade : upgrade tier. Tier skipped + no upgrade target (only one published
|
|
version, structural) → "skip"; declared in EXPECTED_NA → "skip";
|
|
anything else non-pass/fail (prior-stage abort, tier excluded) → "unver".
|
|
L3 backup/res : backup AND restore tiers pass. Not backup-capable (declared/structural)
|
|
→ "skip"; EXPECTED_NA → "skip"; unverified-while-capable → "unver".
|
|
L4 functional : the custom tier. No custom tests / tier skipped → EXPECTED_NA-declared
|
|
"skip", else "unver" (absent functional coverage is a gap, not an
|
|
intentional property of the recipe).
|
|
L5 lint : from the lint executor (harness.lint). pass/fail only — every recipe can
|
|
be linted, so there is NO intentional-skip escape hatch: a lint that
|
|
could not run (timeout, abra missing, executor error) is "unver".
|
|
|
|
Integration (SSO/OIDC) and recipe-local are OPTIONAL and intentionally NOT rungs here — they
|
|
never affect the level (SSO is still enforced for the run VERDICT in run_recipe_ci.py).
|
|
"""
|
|
expected = set((expected_na or {}).keys())
|
|
rungs: dict[str, str] = {}
|
|
rungs["install"] = level_mod.tier_to_rung(results.get("install"))
|
|
|
|
up = results.get("upgrade")
|
|
if up in ("pass", "fail"):
|
|
rungs["upgrade"] = up
|
|
elif up == "skip" and not has_upgrade_target:
|
|
# The orchestrator skipped the tier for the structural reason: nothing to upgrade from.
|
|
rungs["upgrade"] = "skip"
|
|
elif "upgrade" in expected:
|
|
rungs["upgrade"] = "skip"
|
|
else:
|
|
rungs["upgrade"] = "unver"
|
|
|
|
br = level_mod.backup_restore_status(
|
|
results.get("backup"), results.get("restore"), backup_capable
|
|
)
|
|
if br == "unver" and "backup_restore" in expected:
|
|
br = "skip"
|
|
rungs["backup_restore"] = br
|
|
|
|
custom = results.get("custom")
|
|
if custom in ("pass", "fail"):
|
|
rungs["functional"] = custom
|
|
elif "functional" in expected:
|
|
rungs["functional"] = "skip"
|
|
else:
|
|
rungs["functional"] = "unver"
|
|
|
|
rungs["lint"] = lint_status if lint_status in ("pass", "fail") else "unver"
|
|
return rungs
|
|
|
|
|
|
# Reasons attached to STRUCTURAL intentional skips (no EXPECTED_NA declaration needed — the
|
|
# fact is read off the recipe itself).
|
|
_STRUCTURAL_REASON = {
|
|
"upgrade": "only one published version — no upgrade target",
|
|
"backup_restore": "not backup-capable (no backupbot labels / declared)",
|
|
}
|
|
|
|
|
|
def skips(
|
|
rungs: dict[str, str],
|
|
expected_na: dict | None,
|
|
) -> dict:
|
|
"""Mirror the rung classification into results.json's `skips` block:
|
|
{ "intentional": {rung: reason, ...}, # status "skip" — declared/structural, with why
|
|
"unintentional": [rung, ...] } # status "unver" — should have run, wasn't verified
|
|
The reason is the recipe's EXPECTED_NA declaration when present, else the structural fact
|
|
derive_rungs skipped on. Purely descriptive — the level math lives in harness.level."""
|
|
expected = {str(k): str(v) for k, v in (expected_na or {}).items()}
|
|
intentional = {
|
|
r: expected.get(r) or _STRUCTURAL_REASON.get(r, "declared intentional")
|
|
for r, st in rungs.items()
|
|
if st == "skip"
|
|
}
|
|
unintentional = sorted(r for r, st in rungs.items() if st == "unver")
|
|
return {"intentional": intentional, "unintentional": unintentional}
|
|
|
|
|
|
def build_results(
|
|
*,
|
|
recipe: str,
|
|
version: str | None,
|
|
pr: str,
|
|
ref: str | None,
|
|
records: list[dict],
|
|
results: dict[str, str],
|
|
backup_capable: bool,
|
|
clean_teardown: bool,
|
|
no_secret_leak: bool,
|
|
finished_ts: float | None,
|
|
has_upgrade_target: bool = True,
|
|
lint: dict | None = None,
|
|
screenshot: str | None = None,
|
|
summary_card: str | None = None,
|
|
expected_na: dict | None = None,
|
|
customization: dict | None = None,
|
|
) -> dict:
|
|
"""Assemble the full results.json dict (no I/O). `finished_ts` is passed in (the orchestrator
|
|
stamps it) so this stays pure and deterministic for unit tests. `expected_na` is the recipe's
|
|
declared intentional-skip map (recipe_meta.EXPECTED_NA); `has_upgrade_target` is the structural
|
|
"a previous published version exists" fact; `lint` is harness.lint.run_lint's result dict
|
|
(None — e.g. an old caller — derives the lint rung as "unver": never a silent pass)."""
|
|
stages = collect_stages(records)
|
|
lint = lint or {}
|
|
lint_status = lint.get("status")
|
|
rungs = derive_rungs(
|
|
results,
|
|
backup_capable=backup_capable,
|
|
has_upgrade_target=has_upgrade_target,
|
|
expected_na=expected_na,
|
|
lint_status=lint_status,
|
|
)
|
|
# Surface lint in the per-stage table too (it has no pytest/JUnit tier), so the card's
|
|
# stage breakdown carries all five rungs.
|
|
if rungs["lint"] != "skip": # lint is never "skip", but stay defensive
|
|
stages.append(
|
|
{
|
|
"name": "lint",
|
|
"status": rungs["lint"],
|
|
"tests": [
|
|
{
|
|
"name": "abra recipe lint",
|
|
"classname": "lint",
|
|
"source": "harness",
|
|
"status": rungs["lint"],
|
|
"ms": 0,
|
|
"message": str(lint.get("detail") or ""),
|
|
}
|
|
],
|
|
}
|
|
)
|
|
lvl = level_mod.compute_level(rungs)
|
|
return {
|
|
"schema": 2,
|
|
"run_id": run_id(),
|
|
"recipe": recipe,
|
|
"version": version,
|
|
"pr": str(pr),
|
|
"ref": (ref or "")[:12],
|
|
"finished": finished_ts,
|
|
"level": lvl,
|
|
"rungs": rungs,
|
|
"lint": {
|
|
"status": rungs["lint"],
|
|
"detail": str(lint.get("detail") or ""),
|
|
"rules_failed": list(lint.get("rules_failed") or []),
|
|
},
|
|
"skips": skips(rungs, expected_na),
|
|
"stages": stages,
|
|
"results": results,
|
|
"flags": {
|
|
"clean_teardown": bool(clean_teardown),
|
|
"no_secret_leak": bool(no_secret_leak),
|
|
},
|
|
"screenshot": screenshot,
|
|
"summary_card": summary_card,
|
|
# rcust P5: the run's resolved customization manifest (pure presentation — consumers must
|
|
# never derive a verdict from it).
|
|
"customization": customization,
|
|
}
|
|
|
|
|
|
def write_results(data: dict, runs_dir_override: str | None = None) -> str:
|
|
"""Write results.json into the run's artifact dir; return its path. Creates the dir."""
|
|
rd = runs_dir_override or runs_dir()
|
|
out_dir = os.path.join(rd, data["run_id"])
|
|
os.makedirs(out_dir, exist_ok=True)
|
|
path = os.path.join(out_dir, "results.json")
|
|
tmp = path + ".tmp"
|
|
with open(tmp, "w") as f:
|
|
json.dump(data, f, indent=2, sort_keys=True)
|
|
os.replace(tmp, path)
|
|
return path
|