refactor: simplify to a list of intentionally-skipped rungs
Some checks failed
continuous-integration/drone/push Build is failing

Per operator: drop the gap-sensitivity / cap-intent-clause / stale-detection
machinery. Model is now dead simple — recipe_meta.EXPECTED_NA = {rung: reason}
lists the rungs a recipe intentionally skips; ANY rung skipped (N/A) and not in
that list is unintentional.

results.json: replace the 'na' block + level_cap_intent with
  skips: { intentional: {rung: reason}, unintentional: [rung] }
plus level_cap_rung (which rung capped). Badge/card derive intentional-vs-
unintentional from whether the capping rung is in the intentional list. Skips
still cap the level (never inflate). custom-html-tiny lists all three rungs it
intentionally skips (backup_restore, integration, recipe_local).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
autonomic-bot
2026-06-09 02:36:53 +00:00
parent d733e2c4ca
commit b3ab68a9dd
6 changed files with 105 additions and 146 deletions

View File

@ -79,27 +79,27 @@ def render_badge_svg(label: str, message: str, color: str) -> str:
) )
# Third-segment colours for the level badge: amber = an UNEXPECTED skip (undeclared gap-sensitive # Third-segment colours for the level badge: amber = an UNINTENTIONAL skip (a rung skipped but not
# N/A — likely missing coverage) capped the climb; muted = an EXPECTED skip (declared intentional # in the recipe's intentional list — likely missing coverage) capped the climb; muted = an
# N/A — reviewed, nothing to fix). Font-safe text labels (no emoji) so the SVG renders anywhere. # INTENTIONAL skip (declared in recipe_meta.EXPECTED_NA — nothing to fix). Font-safe text labels
# (no emoji) so the SVG renders anywhere.
GAP_COLOR = "#d29922" GAP_COLOR = "#d29922"
EXPECT_COLOR = "#6e7681" EXPECT_COLOR = "#6e7681"
def level_badge_svg(level: int, cap_reason: str = "", cap_intent: str = "") -> str: def level_badge_svg(level: int, cap_reason: str = "", cap_skip: str = "") -> str:
"""Per-recipe/-run LEVEL badge: 'cc-ci | level N' coloured by level (R6), with a THIRD segment """Per-recipe/-run LEVEL badge: 'cc-ci | level N' coloured by level (R6), with a THIRD segment
that differentiates *why* the climb stopped when an N/A capped it: that differentiates *why* the climb stopped when a SKIP capped it (`cap_skip`):
- undeclared gap-sensitive N/A (an UNEXPECTED skip — likely missing coverage): amber 'gap?'. - "unintentional" (a rung skipped but not in the recipe's intentional list): amber 'gap?'.
- declared intentional N/A (an EXPECTED skip — reviewed, nothing to fix): muted 'expected'. - "intentional" (a skip declared in recipe_meta.EXPECTED_NA): muted 'expected'.
- clean cap / full climb / a real failure: no third segment (the level + card carry it). - "" (clean cap / full climb / a real failure): no third segment (the level + card carry it).
Derived from `cap_intent` (results.level_cap_intent) so the badge never inflates — it only The badge never inflates — it only annotates the cap the level already reflects."""
annotates the cap the level already reflects."""
label, msg = "cc-ci", f"level {int(level)}" label, msg = "cc-ci", f"level {int(level)}"
lw, mw = _text_width(label), _text_width(msg) lw, mw = _text_width(label), _text_width(msg)
third: tuple[str, str] | None = None third: tuple[str, str] | None = None
if cap_intent.startswith("undeclared"): if cap_skip == "unintentional":
third = ("gap?", GAP_COLOR) third = ("gap?", GAP_COLOR)
elif cap_intent.startswith("intentional"): elif cap_skip == "intentional":
third = ("expected", EXPECT_COLOR) third = ("expected", EXPECT_COLOR)
if third is None: if third is None:
return render_badge_svg(label, msg, level_color(level)) return render_badge_svg(label, msg, level_color(level))
@ -151,8 +151,15 @@ def render_card_html(data: dict, screenshot_rel: str | None = "screenshot.png")
version = html.escape(str(data.get("version") or data.get("ref") or "")) version = html.escape(str(data.get("version") or data.get("ref") or ""))
level = int(data.get("level", 0)) level = int(data.get("level", 0))
cap_reason = str(data.get("level_cap_reason") or "") cap_reason = str(data.get("level_cap_reason") or "")
cap_intent = str(data.get("level_cap_intent") or "") # Annotate the cap line by whether the capping rung was an intentional skip (declared, with its
cap = html.escape(cap_reason + (f" · {cap_intent}" if cap_intent else "")) # reason) or an unintentional one (skipped but not declared).
capped = data.get("level_cap_rung")
sk = data.get("skips", {}) or {}
if capped and capped in (sk.get("intentional") or {}):
cap_reason += f" · intentional: {sk['intentional'][capped]}"
elif capped and capped in (sk.get("unintentional") or []):
cap_reason += " · unintentional skip (no EXPECTED_NA — add a test or declare it)"
cap = html.escape(cap_reason)
color = level_color(level) color = level_color(level)
flags = data.get("flags", {}) or {} flags = data.get("flags", {}) or {}
flag_bits = [] flag_bits = []

View File

@ -2,7 +2,14 @@
Turns a run's per-tier pytest outcomes into a single `results.json` artifact carrying, per the plan: Turns a run's per-tier pytest outcomes into a single `results.json` artifact carrying, per the plan:
{ recipe, version, pr, ref, run_id, finished, stages:[{name,status,tests:[{name,status,ms}]}], { recipe, version, pr, ref, run_id, finished, stages:[{name,status,tests:[{name,status,ms}]}],
level, level_cap_reason, rungs, flags:{clean_teardown,no_secret_leak}, screenshot, summary_card } level, level_cap_reason, level_cap_rung, rungs,
skips:{intentional:{rung:reason}, unintentional:[rung]},
flags:{clean_teardown,no_secret_leak}, screenshot, summary_card }
`skips` splits the N/A (skipped) rungs by a simple rule: a skip is INTENTIONAL iff the recipe lists
it (with a reason) in `recipe_meta.EXPECTED_NA = {rung: reason}`; any rung skipped but not listed is
UNINTENTIONAL (a coverage gap to fill or declare). Skips still cap the level either way — the harness
never claims a rung it did not verify; this only labels *why* a skip happened.
The per-test breakdown comes from JUnit XML emitted by each tier's pytest invocation (`--junitxml`), The per-test breakdown comes from JUnit XML emitted by each tier's pytest invocation (`--junitxml`),
parsed here with the stdlib (no new dep). The integer **level** is computed by harness.level from a parsed here with the stdlib (no new dep). The integer **level** is computed by harness.level from a
@ -200,54 +207,23 @@ def derive_rungs(
return rungs return rungs
# Rungs where an *undeclared* N/A is suspicious — it usually means a recipe SHOULD have this coverage def skips(rungs: dict[str, str], expected_na: dict | None) -> dict:
# but nobody added it (a backup label, a functional test), i.e. an accidental gap rather than a real """Split the SKIPPED (N/A) rungs into intentional vs unintentional (operator model).
# property of the recipe. For these, an undeclared N/A is surfaced as a "possible coverage gap" unless
# the recipe declares it intentional via recipe_meta.EXPECTED_NA. The other rungs (upgrade — only one
# published version; integration — no SSO surface; recipe_local — no repo-local tests) are
# *structurally* optional: an N/A there is the normal case and is not flagged.
GAP_SENSITIVE_RUNGS = ("backup_restore", "functional")
A recipe lists the rungs it intentionally skips, each with a reason, in
def classify_na(rungs: dict[str, str], expected_na: dict | None) -> dict: `recipe_meta.EXPECTED_NA = {rung: reason}`. The rule is dead simple: a skipped rung is
"""Distinguish *intentionally* N/A rungs from *accidentally* missing ones (operator request). **intentional** iff it is in that list; any rung that is skipped and NOT in the list is
**unintentional** (a coverage gap someone should either fill or declare). N/A still caps the
A recipe declares intentional N/A in `recipe_meta.EXPECTED_NA = {rung: reason}`. N/A always caps level either way — the harness never claims a rung it did not verify — this only labels *why* a
the level either way (the harness never inflates — a rung that wasn't verified wasn't verified); skip happened. Returns:
this only EXPLAINS the cap so a reviewer can tell "this recipe legitimately has no backup surface" { "intentional": {rung: reason, ...}, # skipped AND declared in EXPECTED_NA
from "someone forgot to add the backup test". Returns: "unintentional": [rung, ...] } # skipped but NOT declared
{ "rungs": {rung: {"intent": "declared"|"undeclared", "reason": str}}, # one per N/A rung
"gaps": [rung, ...], # gap-sensitive rungs that are N/A and NOT declared
"stale_declared": [rung, ...] } # rungs declared N/A but actually exercised (stale opt-out)
""" """
expected = {str(k): str(v) for k, v in (expected_na or {}).items()} expected = {str(k): str(v) for k, v in (expected_na or {}).items()}
na: dict[str, dict] = {} na = [r for r, st in rungs.items() if st == "na"]
for rung, st in rungs.items(): intentional = {r: expected[r] for r in na if r in expected}
if st != "na": unintentional = sorted(r for r in na if r not in expected)
continue return {"intentional": intentional, "unintentional": unintentional}
if rung in expected:
na[rung] = {"intent": "declared", "reason": expected[rung]}
else:
na[rung] = {"intent": "undeclared", "reason": ""}
gaps = [r for r in GAP_SENSITIVE_RUNGS if na.get(r, {}).get("intent") == "undeclared"]
stale = sorted(r for r in expected if rungs.get(r) not in (None, "na"))
return {"rungs": na, "gaps": gaps, "stale_declared": stale}
def cap_intent(rungs: dict[str, str], level: int, cap_reason: str, na_info: dict) -> str:
"""A short clause explaining the level cap when the capping rung is N/A: the declared reason if
intentional, a 'possible coverage gap' note if it's an undeclared gap-sensitive rung, else ''."""
if not cap_reason:
return ""
capped = level_mod.RUNGS[level] if 0 <= level < len(level_mod.RUNGS) else None
if not capped or rungs.get(capped) != "na":
return ""
entry = na_info["rungs"].get(capped, {})
if entry.get("intent") == "declared":
return f"intentional · {entry['reason']}"
if capped in GAP_SENSITIVE_RUNGS:
return "undeclared N/A — possible coverage gap (add a test or declare EXPECTED_NA)"
return ""
def build_results( def build_results(
@ -286,8 +262,9 @@ def build_results(
repo_local_passed=_repo_local_passed(records), repo_local_passed=_repo_local_passed(records),
) )
lvl, cap_reason = level_mod.compute_level(rungs) lvl, cap_reason = level_mod.compute_level(rungs)
na_info = classify_na(rungs, expected_na) # The rung that capped the climb (lowest non-pass), or None on a full climb — lets a consumer
intent = cap_intent(rungs, lvl, cap_reason, na_info) # (card/badge) tell whether the cap was an intentional skip, an unintentional one, or a failure.
capped = level_mod.RUNGS[lvl] if cap_reason else None
return { return {
"schema": 1, "schema": 1,
"run_id": run_id(), "run_id": run_id(),
@ -298,9 +275,9 @@ def build_results(
"finished": finished_ts, "finished": finished_ts,
"level": lvl, "level": lvl,
"level_cap_reason": cap_reason, "level_cap_reason": cap_reason,
"level_cap_intent": intent, "level_cap_rung": capped,
"rungs": rungs, "rungs": rungs,
"na": na_info, "skips": skips(rungs, expected_na),
"stages": stages, "stages": stages,
"results": results, "results": results,
"flags": { "flags": {

View File

@ -1254,27 +1254,18 @@ def main() -> int:
file=sys.stderr, file=sys.stderr,
) )
path = results_mod.write_results(data) path = results_mod.write_results(data)
intent = data.get("level_cap_intent") or ""
print( print(
f"results.json written: {path} (level={data['level']}" f"results.json written: {path} (level={data['level']}"
f"{'' + data['level_cap_reason'] if data['level_cap_reason'] else ''}" f"{'' + data['level_cap_reason'] if data['level_cap_reason'] else ''})",
f"{' [' + intent + ']' if intent else ''})",
flush=True, flush=True,
) )
# Surface the intentional-vs-accidental N/A signal in the CI log (non-blocking, R7): a # Surface UNINTENTIONAL skips in the CI log (non-blocking, R7): a rung that was skipped (N/A)
# gap-sensitive rung that is N/A but undeclared is a possible coverage hole; a stale # but is not in the recipe's intentional list — either add the missing coverage or declare it.
# EXPECTED_NA declares a tier N/A that actually ran. for rung in data.get("skips", {}).get("unintentional", []):
na = data.get("na", {})
for rung in na.get("gaps", []):
print( print(
f"⚠ coverage: rung '{rung}' is N/A but not declared intentional — add a test or " f"⚠ coverage: rung '{rung}' was skipped (N/A) but is not declared intentional — add "
f"declare it in tests/{recipe}/recipe_meta.py EXPECTED_NA = {{'{rung}': '<why>'}}.", f"the missing test/label, or list it in tests/{recipe}/recipe_meta.py "
flush=True, f"EXPECTED_NA = {{'{rung}': '<why>'}}.",
)
for rung in na.get("stale_declared", []):
print(
f"⚠ stale EXPECTED_NA: rung '{rung}' is declared N/A but was actually exercised "
f"(status={data['rungs'].get(rung)}) — remove it from recipe_meta.EXPECTED_NA.",
flush=True, flush=True,
) )
except Exception as e: # noqa: BLE001 — results assembly is cosmetic; never fail a run on it (R7) except Exception as e: # noqa: BLE001 — results assembly is cosmetic; never fail a run on it (R7)
@ -1295,12 +1286,17 @@ def main() -> int:
with open(html_path, "w", encoding="utf-8") as f: with open(html_path, "w", encoding="utf-8") as f:
f.write(card_mod.render_card_html(data, screenshot_rel=data.get("screenshot"))) f.write(card_mod.render_card_html(data, screenshot_rel=data.get("screenshot")))
png = card_mod.render_card_png(html_path, os.path.join(run_artifact_dir, "summary.png")) png = card_mod.render_card_png(html_path, os.path.join(run_artifact_dir, "summary.png"))
capped = data.get("level_cap_rung")
sk = data.get("skips", {})
cap_skip = (
"intentional" if capped in (sk.get("intentional") or {})
else "unintentional" if capped in (sk.get("unintentional") or [])
else ""
)
with open(os.path.join(run_artifact_dir, "badge.svg"), "w", encoding="utf-8") as f: with open(os.path.join(run_artifact_dir, "badge.svg"), "w", encoding="utf-8") as f:
f.write( f.write(
card_mod.level_badge_svg( card_mod.level_badge_svg(
data["level"], data["level"], data.get("level_cap_reason", ""), cap_skip
data.get("level_cap_reason", ""),
data.get("level_cap_intent", ""),
) )
) )
print( print(

View File

@ -4,14 +4,14 @@
DEPLOY_TIMEOUT = 120 DEPLOY_TIMEOUT = 120
HTTP_TIMEOUT = 90 HTTP_TIMEOUT = 90
# Intentionally-N/A tiers (reviewed opt-out, NOT a coverage gap). custom-html-tiny is a stateless # Rungs this recipe INTENTIONALLY skips, each with a reason. Any rung that is skipped (N/A) and is
# static-web-server: it serves an ephemeral `content` volume that the harness seeds at deploy time # NOT listed here is reported as an *unintentional* skip (a coverage gap to fill or declare). A skip
# (install_steps.sh) and holds no persistent or user data, so there is nothing to back up or restore. # still caps the level either way — the harness never claims a rung it did not verify; this only
# The recipe therefore declares no `backupbot.backup` label and the L3 backup/restore rung is N/A. # records that the skip is deliberate. custom-html-tiny is a stateless static-web-server, so:
# Declaring it here marks that N/A as deliberate, so the run is annotated "intentional" instead of
# being flagged as a possible missing-coverage gap. (N/A still caps the level — the harness never
# claims a rung it did not verify; this only explains *why* the cap is expected.)
EXPECTED_NA = { EXPECTED_NA = {
"backup_restore": "stateless static file server: serves an ephemeral content volume seeded at " "backup_restore": "stateless static file server: serves an ephemeral content volume seeded at "
"deploy, with no persistent/user data to back up or restore (no backupbot.backup label)", "deploy, with no persistent/user data to back up or restore (no backupbot.backup label)",
"integration": "no SSO/OIDC or cross-app surface — a static file server has no auth integration",
"recipe_local": "the upstream recipe ships no tests/ of its own; coverage is the cc-ci generic "
"install tier + the functional serve test",
} }

View File

@ -55,13 +55,13 @@ def test_badge_svg_wellformed():
assert "expected" not in svg and "gap?" not in svg assert "expected" not in svg and "gap?" not in svg
def test_badge_svg_differentiates_expected_vs_unexpected_skip(): def test_badge_svg_differentiates_intentional_vs_unintentional_skip():
# declared intentional N/A capped the climb → muted "expected" third segment # an intentional (declared) skip capped the climb → muted "expected" third segment
exp = C.level_badge_svg(2, "L3 backup/restore N/A", "intentional · no persistent data") exp = C.level_badge_svg(2, "L3 backup/restore N/A", "intentional")
assert "level 2" in exp and "expected" in exp and C.EXPECT_COLOR in exp assert "level 2" in exp and "expected" in exp and C.EXPECT_COLOR in exp
assert "gap?" not in exp assert "gap?" not in exp
# undeclared gap-sensitive N/A → amber "gap?" third segment (an UNEXPECTED skip) # an unintentional skip (not declared) → amber "gap?" third segment
gap = C.level_badge_svg(2, "L3 backup/restore N/A", "undeclared N/A — possible coverage gap") gap = C.level_badge_svg(2, "L3 backup/restore N/A", "unintentional")
assert "level 2" in gap and "gap?" in gap and C.GAP_COLOR in gap assert "level 2" in gap and "gap?" in gap and C.GAP_COLOR in gap
assert "expected" not in gap assert "expected" not in gap

View File

@ -257,7 +257,7 @@ def test_build_results_capped_at_L1_on_upgrade_fail(tmp_path):
assert "L2" in data["level_cap_reason"] assert "L2" in data["level_cap_reason"]
# ---- classify_na / cap_intent: intentional-vs-accidental N/A (operator request) ---- # ---- skips: intentional (declared) vs unintentional (everything else skipped) ----
def _rungs(**kw): def _rungs(**kw):
@ -273,57 +273,32 @@ def _rungs(**kw):
return base return base
def test_classify_na_declared_vs_undeclared(): def test_skips_intentional_vs_unintentional():
rungs = _rungs(backup_restore="na", functional="na") rungs = _rungs(backup_restore="na", functional="na")
info = R.classify_na(rungs, {"backup_restore": "stateless static server"}) sk = R.skips(rungs, {"backup_restore": "stateless static server"})
# backup_restore is declared intentional; functional is an undeclared gap-sensitive N/A. # backup_restore is declared (intentional, with reason); everything else skipped is unintentional.
assert info["rungs"]["backup_restore"] == { assert sk["intentional"] == {"backup_restore": "stateless static server"}
"intent": "declared", assert sk["unintentional"] == ["functional", "integration", "recipe_local"]
"reason": "stateless static server",
}
assert info["rungs"]["functional"]["intent"] == "undeclared"
assert info["gaps"] == ["functional"] # backup_restore declared → not a gap
assert info["stale_declared"] == []
# structurally-optional N/A (integration, recipe_local) are recorded but never flagged as gaps.
assert info["rungs"]["integration"]["intent"] == "undeclared"
assert "integration" not in info["gaps"]
def test_classify_na_stale_declaration(): def test_skips_none_declared_all_unintentional():
# backup_restore actually ran (pass) but is declared N/A → stale opt-out, surfaced. rungs = _rungs(backup_restore="na")
sk = R.skips(rungs, None)
assert sk["intentional"] == {}
assert sk["unintentional"] == ["backup_restore", "integration", "recipe_local"]
def test_skips_declaration_only_counts_when_actually_skipped():
# backup_restore actually ran (pass) → not a skip, so a declaration for it is simply inert.
rungs = _rungs(backup_restore="pass") rungs = _rungs(backup_restore="pass")
info = R.classify_na(rungs, {"backup_restore": "stale reason"}) sk = R.skips(rungs, {"backup_restore": "reason"})
assert info["stale_declared"] == ["backup_restore"] assert "backup_restore" not in sk["intentional"]
assert "backup_restore" not in info["rungs"] # not N/A, so not in the per-rung N/A map assert "backup_restore" not in sk["unintentional"]
def test_cap_intent_declared_explains_cap():
# install+upgrade pass, backup_restore declared-N/A → caps at L2 with an intentional clause.
rungs = _rungs(backup_restore="na")
info = R.classify_na(rungs, {"backup_restore": "no persistent data"})
intent = R.cap_intent(rungs, 2, "L3 backup/restore (data integrity) N/A", info)
assert intent == "intentional · no persistent data"
def test_cap_intent_undeclared_gap():
rungs = _rungs(backup_restore="na")
info = R.classify_na(rungs, None)
intent = R.cap_intent(rungs, 2, "L3 backup/restore (data integrity) N/A", info)
assert "possible coverage gap" in intent
def test_cap_intent_blank_when_not_capped_on_na():
rungs = _rungs() # full clean climb, capped only at integration (na, structurally optional)
info = R.classify_na(rungs, None)
# capping rung is integration (level 4) — structurally optional, so no intent clause.
assert R.cap_intent(rungs, 4, "L5 integration N/A", info) == ""
# and no cap at all → blank.
assert R.cap_intent(rungs, 6, "", info) == ""
def test_build_results_threads_expected_na(tmp_path): def test_build_results_threads_expected_na(tmp_path):
# Mirrors custom-html-tiny post-change: install + a passing functional (custom) test, but no # Mirrors custom-html-tiny post-change: install + a passing functional (custom) test, but no
# backup surface (backup_restore declared intentionally N/A). # backup surface (backup_restore declared intentionally skipped).
recs = [ recs = [
{ {
"tier": "install", "tier": "install",
@ -347,23 +322,27 @@ def test_build_results_threads_expected_na(tmp_path):
ref=None, ref=None,
records=recs, records=recs,
results=_results(backup="skip", restore="skip"), # custom=pass (default) → functional pass results=_results(backup="skip", restore="skip"), # custom=pass (default) → functional pass
backup_capable=False, # no backupbot label → backup_restore N/A backup_capable=False, # no backupbot label → backup_restore skipped (N/A)
declared=[], declared=[],
deps_ready=True, deps_ready=True,
sso_unverified=False, sso_unverified=False,
clean_teardown=True, clean_teardown=True,
no_secret_leak=True, no_secret_leak=True,
finished_ts=0.0, finished_ts=0.0,
expected_na={"backup_restore": "stateless static file server"}, expected_na={
"backup_restore": "stateless static file server",
"integration": "no SSO surface",
"recipe_local": "no upstream tests/",
},
) )
# backup_restore N/A still caps at L2 (never inflates) — even though functional passes above it, # backup_restore skip still caps at L2 (never inflates) — even though functional passes above it,
# the gap caps the climb — but the cap is now annotated intentional rather than flagged. # the skip caps the climb — but it's the declared (intentional) rung that capped.
assert data["level"] == 2 assert data["level"] == 2
assert "L3" in data["level_cap_reason"] assert "L3" in data["level_cap_reason"]
assert data["level_cap_intent"] == "intentional · stateless static file server" assert data["level_cap_rung"] == "backup_restore"
assert data["na"]["rungs"]["backup_restore"]["intent"] == "declared"
assert data["rungs"]["functional"] == "pass" assert data["rungs"]["functional"] == "pass"
assert data["na"]["gaps"] == [] # functional now covered; backup_restore declared → no gaps assert data["skips"]["intentional"]["backup_restore"] == "stateless static file server"
assert data["skips"]["unintentional"] == [] # every skip accounted for → fully clean
def test_write_results_roundtrip(tmp_path): def test_write_results_roundtrip(tmp_path):