refactor: simplify to a list of intentionally-skipped rungs
Some checks failed
continuous-integration/drone/push Build is failing
Some checks failed
continuous-integration/drone/push Build is failing
Per operator: drop the gap-sensitivity / cap-intent-clause / stale-detection
machinery. Model is now dead simple — recipe_meta.EXPECTED_NA = {rung: reason}
lists the rungs a recipe intentionally skips; ANY rung skipped (N/A) and not in
that list is unintentional.
results.json: replace the 'na' block + level_cap_intent with
skips: { intentional: {rung: reason}, unintentional: [rung] }
plus level_cap_rung (which rung capped). Badge/card derive intentional-vs-
unintentional from whether the capping rung is in the intentional list. Skips
still cap the level (never inflate). custom-html-tiny lists all three rungs it
intentionally skips (backup_restore, integration, recipe_local).
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@ -2,7 +2,14 @@
|
||||
|
||||
Turns a run's per-tier pytest outcomes into a single `results.json` artifact carrying, per the plan:
|
||||
{ recipe, version, pr, ref, run_id, finished, stages:[{name,status,tests:[{name,status,ms}]}],
|
||||
level, level_cap_reason, rungs, flags:{clean_teardown,no_secret_leak}, screenshot, summary_card }
|
||||
level, level_cap_reason, level_cap_rung, rungs,
|
||||
skips:{intentional:{rung:reason}, unintentional:[rung]},
|
||||
flags:{clean_teardown,no_secret_leak}, screenshot, summary_card }
|
||||
|
||||
`skips` splits the N/A (skipped) rungs by a simple rule: a skip is INTENTIONAL iff the recipe lists
|
||||
it (with a reason) in `recipe_meta.EXPECTED_NA = {rung: reason}`; any rung skipped but not listed is
|
||||
UNINTENTIONAL (a coverage gap to fill or declare). Skips still cap the level either way — the harness
|
||||
never claims a rung it did not verify; this only labels *why* a skip happened.
|
||||
|
||||
The per-test breakdown comes from JUnit XML emitted by each tier's pytest invocation (`--junitxml`),
|
||||
parsed here with the stdlib (no new dep). The integer **level** is computed by harness.level from a
|
||||
@ -200,54 +207,23 @@ def derive_rungs(
|
||||
return rungs
|
||||
|
||||
|
||||
# Rungs where an *undeclared* N/A is suspicious — it usually means a recipe SHOULD have this coverage
|
||||
# but nobody added it (a backup label, a functional test), i.e. an accidental gap rather than a real
|
||||
# property of the recipe. For these, an undeclared N/A is surfaced as a "possible coverage gap" unless
|
||||
# the recipe declares it intentional via recipe_meta.EXPECTED_NA. The other rungs (upgrade — only one
|
||||
# published version; integration — no SSO surface; recipe_local — no repo-local tests) are
|
||||
# *structurally* optional: an N/A there is the normal case and is not flagged.
|
||||
GAP_SENSITIVE_RUNGS = ("backup_restore", "functional")
|
||||
def skips(rungs: dict[str, str], expected_na: dict | None) -> dict:
|
||||
"""Split the SKIPPED (N/A) rungs into intentional vs unintentional (operator model).
|
||||
|
||||
|
||||
def classify_na(rungs: dict[str, str], expected_na: dict | None) -> dict:
|
||||
"""Distinguish *intentionally* N/A rungs from *accidentally* missing ones (operator request).
|
||||
|
||||
A recipe declares intentional N/A in `recipe_meta.EXPECTED_NA = {rung: reason}`. N/A always caps
|
||||
the level either way (the harness never inflates — a rung that wasn't verified wasn't verified);
|
||||
this only EXPLAINS the cap so a reviewer can tell "this recipe legitimately has no backup surface"
|
||||
from "someone forgot to add the backup test". Returns:
|
||||
{ "rungs": {rung: {"intent": "declared"|"undeclared", "reason": str}}, # one per N/A rung
|
||||
"gaps": [rung, ...], # gap-sensitive rungs that are N/A and NOT declared
|
||||
"stale_declared": [rung, ...] } # rungs declared N/A but actually exercised (stale opt-out)
|
||||
A recipe lists the rungs it intentionally skips, each with a reason, in
|
||||
`recipe_meta.EXPECTED_NA = {rung: reason}`. The rule is dead simple: a skipped rung is
|
||||
**intentional** iff it is in that list; any rung that is skipped and NOT in the list is
|
||||
**unintentional** (a coverage gap someone should either fill or declare). N/A still caps the
|
||||
level either way — the harness never claims a rung it did not verify — this only labels *why* a
|
||||
skip happened. Returns:
|
||||
{ "intentional": {rung: reason, ...}, # skipped AND declared in EXPECTED_NA
|
||||
"unintentional": [rung, ...] } # skipped but NOT declared
|
||||
"""
|
||||
expected = {str(k): str(v) for k, v in (expected_na or {}).items()}
|
||||
na: dict[str, dict] = {}
|
||||
for rung, st in rungs.items():
|
||||
if st != "na":
|
||||
continue
|
||||
if rung in expected:
|
||||
na[rung] = {"intent": "declared", "reason": expected[rung]}
|
||||
else:
|
||||
na[rung] = {"intent": "undeclared", "reason": ""}
|
||||
gaps = [r for r in GAP_SENSITIVE_RUNGS if na.get(r, {}).get("intent") == "undeclared"]
|
||||
stale = sorted(r for r in expected if rungs.get(r) not in (None, "na"))
|
||||
return {"rungs": na, "gaps": gaps, "stale_declared": stale}
|
||||
|
||||
|
||||
def cap_intent(rungs: dict[str, str], level: int, cap_reason: str, na_info: dict) -> str:
|
||||
"""A short clause explaining the level cap when the capping rung is N/A: the declared reason if
|
||||
intentional, a 'possible coverage gap' note if it's an undeclared gap-sensitive rung, else ''."""
|
||||
if not cap_reason:
|
||||
return ""
|
||||
capped = level_mod.RUNGS[level] if 0 <= level < len(level_mod.RUNGS) else None
|
||||
if not capped or rungs.get(capped) != "na":
|
||||
return ""
|
||||
entry = na_info["rungs"].get(capped, {})
|
||||
if entry.get("intent") == "declared":
|
||||
return f"intentional · {entry['reason']}"
|
||||
if capped in GAP_SENSITIVE_RUNGS:
|
||||
return "undeclared N/A — possible coverage gap (add a test or declare EXPECTED_NA)"
|
||||
return ""
|
||||
na = [r for r, st in rungs.items() if st == "na"]
|
||||
intentional = {r: expected[r] for r in na if r in expected}
|
||||
unintentional = sorted(r for r in na if r not in expected)
|
||||
return {"intentional": intentional, "unintentional": unintentional}
|
||||
|
||||
|
||||
def build_results(
|
||||
@ -286,8 +262,9 @@ def build_results(
|
||||
repo_local_passed=_repo_local_passed(records),
|
||||
)
|
||||
lvl, cap_reason = level_mod.compute_level(rungs)
|
||||
na_info = classify_na(rungs, expected_na)
|
||||
intent = cap_intent(rungs, lvl, cap_reason, na_info)
|
||||
# The rung that capped the climb (lowest non-pass), or None on a full climb — lets a consumer
|
||||
# (card/badge) tell whether the cap was an intentional skip, an unintentional one, or a failure.
|
||||
capped = level_mod.RUNGS[lvl] if cap_reason else None
|
||||
return {
|
||||
"schema": 1,
|
||||
"run_id": run_id(),
|
||||
@ -298,9 +275,9 @@ def build_results(
|
||||
"finished": finished_ts,
|
||||
"level": lvl,
|
||||
"level_cap_reason": cap_reason,
|
||||
"level_cap_intent": intent,
|
||||
"level_cap_rung": capped,
|
||||
"rungs": rungs,
|
||||
"na": na_info,
|
||||
"skips": skips(rungs, expected_na),
|
||||
"stages": stages,
|
||||
"results": results,
|
||||
"flags": {
|
||||
|
||||
Reference in New Issue
Block a user