From b3ab68a9dd43d46ea47e8071dd2b8d0a6e29db69 Mon Sep 17 00:00:00 2001 From: autonomic-bot Date: Tue, 9 Jun 2026 02:36:53 +0000 Subject: [PATCH] refactor: simplify to a list of intentionally-skipped rungs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per operator: drop the gap-sensitivity / cap-intent-clause / stale-detection machinery. Model is now dead simple — recipe_meta.EXPECTED_NA = {rung: reason} lists the rungs a recipe intentionally skips; ANY rung skipped (N/A) and not in that list is unintentional. results.json: replace the 'na' block + level_cap_intent with skips: { intentional: {rung: reason}, unintentional: [rung] } plus level_cap_rung (which rung capped). Badge/card derive intentional-vs- unintentional from whether the capping rung is in the intentional list. Skips still cap the level (never inflate). custom-html-tiny lists all three rungs it intentionally skips (backup_restore, integration, recipe_local). Co-Authored-By: Claude Opus 4.8 --- runner/harness/card.py | 35 +++++++----- runner/harness/results.py | 77 +++++++++---------------- runner/run_recipe_ci.py | 34 +++++------ tests/custom-html-tiny/recipe_meta.py | 14 ++--- tests/unit/test_card.py | 10 ++-- tests/unit/test_results.py | 81 ++++++++++----------------- 6 files changed, 105 insertions(+), 146 deletions(-) diff --git a/runner/harness/card.py b/runner/harness/card.py index 1cee8a6..e57d517 100644 --- a/runner/harness/card.py +++ b/runner/harness/card.py @@ -79,27 +79,27 @@ def render_badge_svg(label: str, message: str, color: str) -> str: ) -# Third-segment colours for the level badge: amber = an UNEXPECTED skip (undeclared gap-sensitive -# N/A — likely missing coverage) capped the climb; muted = an EXPECTED skip (declared intentional -# N/A — reviewed, nothing to fix). Font-safe text labels (no emoji) so the SVG renders anywhere. +# Third-segment colours for the level badge: amber = an UNINTENTIONAL skip (a rung skipped but not +# in the recipe's intentional list — likely missing coverage) capped the climb; muted = an +# INTENTIONAL skip (declared in recipe_meta.EXPECTED_NA — nothing to fix). Font-safe text labels +# (no emoji) so the SVG renders anywhere. GAP_COLOR = "#d29922" EXPECT_COLOR = "#6e7681" -def level_badge_svg(level: int, cap_reason: str = "", cap_intent: str = "") -> str: +def level_badge_svg(level: int, cap_reason: str = "", cap_skip: str = "") -> str: """Per-recipe/-run LEVEL badge: 'cc-ci | level N' coloured by level (R6), with a THIRD segment - that differentiates *why* the climb stopped when an N/A capped it: - - undeclared gap-sensitive N/A (an UNEXPECTED skip — likely missing coverage): amber 'gap?'. - - declared intentional N/A (an EXPECTED skip — reviewed, nothing to fix): muted 'expected'. - - clean cap / full climb / a real failure: no third segment (the level + card carry it). - Derived from `cap_intent` (results.level_cap_intent) so the badge never inflates — it only - annotates the cap the level already reflects.""" + that differentiates *why* the climb stopped when a SKIP capped it (`cap_skip`): + - "unintentional" (a rung skipped but not in the recipe's intentional list): amber 'gap?'. + - "intentional" (a skip declared in recipe_meta.EXPECTED_NA): muted 'expected'. + - "" (clean cap / full climb / a real failure): no third segment (the level + card carry it). + The badge never inflates — it only annotates the cap the level already reflects.""" label, msg = "cc-ci", f"level {int(level)}" lw, mw = _text_width(label), _text_width(msg) third: tuple[str, str] | None = None - if cap_intent.startswith("undeclared"): + if cap_skip == "unintentional": third = ("gap?", GAP_COLOR) - elif cap_intent.startswith("intentional"): + elif cap_skip == "intentional": third = ("expected", EXPECT_COLOR) if third is None: return render_badge_svg(label, msg, level_color(level)) @@ -151,8 +151,15 @@ def render_card_html(data: dict, screenshot_rel: str | None = "screenshot.png") version = html.escape(str(data.get("version") or data.get("ref") or "")) level = int(data.get("level", 0)) cap_reason = str(data.get("level_cap_reason") or "") - cap_intent = str(data.get("level_cap_intent") or "") - cap = html.escape(cap_reason + (f" · {cap_intent}" if cap_intent else "")) + # Annotate the cap line by whether the capping rung was an intentional skip (declared, with its + # reason) or an unintentional one (skipped but not declared). + capped = data.get("level_cap_rung") + sk = data.get("skips", {}) or {} + if capped and capped in (sk.get("intentional") or {}): + cap_reason += f" · intentional: {sk['intentional'][capped]}" + elif capped and capped in (sk.get("unintentional") or []): + cap_reason += " · unintentional skip (no EXPECTED_NA — add a test or declare it)" + cap = html.escape(cap_reason) color = level_color(level) flags = data.get("flags", {}) or {} flag_bits = [] diff --git a/runner/harness/results.py b/runner/harness/results.py index 3e6fed7..ec7ca7b 100644 --- a/runner/harness/results.py +++ b/runner/harness/results.py @@ -2,7 +2,14 @@ Turns a run's per-tier pytest outcomes into a single `results.json` artifact carrying, per the plan: { recipe, version, pr, ref, run_id, finished, stages:[{name,status,tests:[{name,status,ms}]}], - level, level_cap_reason, rungs, flags:{clean_teardown,no_secret_leak}, screenshot, summary_card } + level, level_cap_reason, level_cap_rung, rungs, + skips:{intentional:{rung:reason}, unintentional:[rung]}, + flags:{clean_teardown,no_secret_leak}, screenshot, summary_card } + +`skips` splits the N/A (skipped) rungs by a simple rule: a skip is INTENTIONAL iff the recipe lists +it (with a reason) in `recipe_meta.EXPECTED_NA = {rung: reason}`; any rung skipped but not listed is +UNINTENTIONAL (a coverage gap to fill or declare). Skips still cap the level either way — the harness +never claims a rung it did not verify; this only labels *why* a skip happened. The per-test breakdown comes from JUnit XML emitted by each tier's pytest invocation (`--junitxml`), parsed here with the stdlib (no new dep). The integer **level** is computed by harness.level from a @@ -200,54 +207,23 @@ def derive_rungs( return rungs -# Rungs where an *undeclared* N/A is suspicious — it usually means a recipe SHOULD have this coverage -# but nobody added it (a backup label, a functional test), i.e. an accidental gap rather than a real -# property of the recipe. For these, an undeclared N/A is surfaced as a "possible coverage gap" unless -# the recipe declares it intentional via recipe_meta.EXPECTED_NA. The other rungs (upgrade — only one -# published version; integration — no SSO surface; recipe_local — no repo-local tests) are -# *structurally* optional: an N/A there is the normal case and is not flagged. -GAP_SENSITIVE_RUNGS = ("backup_restore", "functional") +def skips(rungs: dict[str, str], expected_na: dict | None) -> dict: + """Split the SKIPPED (N/A) rungs into intentional vs unintentional (operator model). - -def classify_na(rungs: dict[str, str], expected_na: dict | None) -> dict: - """Distinguish *intentionally* N/A rungs from *accidentally* missing ones (operator request). - - A recipe declares intentional N/A in `recipe_meta.EXPECTED_NA = {rung: reason}`. N/A always caps - the level either way (the harness never inflates — a rung that wasn't verified wasn't verified); - this only EXPLAINS the cap so a reviewer can tell "this recipe legitimately has no backup surface" - from "someone forgot to add the backup test". Returns: - { "rungs": {rung: {"intent": "declared"|"undeclared", "reason": str}}, # one per N/A rung - "gaps": [rung, ...], # gap-sensitive rungs that are N/A and NOT declared - "stale_declared": [rung, ...] } # rungs declared N/A but actually exercised (stale opt-out) + A recipe lists the rungs it intentionally skips, each with a reason, in + `recipe_meta.EXPECTED_NA = {rung: reason}`. The rule is dead simple: a skipped rung is + **intentional** iff it is in that list; any rung that is skipped and NOT in the list is + **unintentional** (a coverage gap someone should either fill or declare). N/A still caps the + level either way — the harness never claims a rung it did not verify — this only labels *why* a + skip happened. Returns: + { "intentional": {rung: reason, ...}, # skipped AND declared in EXPECTED_NA + "unintentional": [rung, ...] } # skipped but NOT declared """ expected = {str(k): str(v) for k, v in (expected_na or {}).items()} - na: dict[str, dict] = {} - for rung, st in rungs.items(): - if st != "na": - continue - if rung in expected: - na[rung] = {"intent": "declared", "reason": expected[rung]} - else: - na[rung] = {"intent": "undeclared", "reason": ""} - gaps = [r for r in GAP_SENSITIVE_RUNGS if na.get(r, {}).get("intent") == "undeclared"] - stale = sorted(r for r in expected if rungs.get(r) not in (None, "na")) - return {"rungs": na, "gaps": gaps, "stale_declared": stale} - - -def cap_intent(rungs: dict[str, str], level: int, cap_reason: str, na_info: dict) -> str: - """A short clause explaining the level cap when the capping rung is N/A: the declared reason if - intentional, a 'possible coverage gap' note if it's an undeclared gap-sensitive rung, else ''.""" - if not cap_reason: - return "" - capped = level_mod.RUNGS[level] if 0 <= level < len(level_mod.RUNGS) else None - if not capped or rungs.get(capped) != "na": - return "" - entry = na_info["rungs"].get(capped, {}) - if entry.get("intent") == "declared": - return f"intentional · {entry['reason']}" - if capped in GAP_SENSITIVE_RUNGS: - return "undeclared N/A — possible coverage gap (add a test or declare EXPECTED_NA)" - return "" + na = [r for r, st in rungs.items() if st == "na"] + intentional = {r: expected[r] for r in na if r in expected} + unintentional = sorted(r for r in na if r not in expected) + return {"intentional": intentional, "unintentional": unintentional} def build_results( @@ -286,8 +262,9 @@ def build_results( repo_local_passed=_repo_local_passed(records), ) lvl, cap_reason = level_mod.compute_level(rungs) - na_info = classify_na(rungs, expected_na) - intent = cap_intent(rungs, lvl, cap_reason, na_info) + # The rung that capped the climb (lowest non-pass), or None on a full climb — lets a consumer + # (card/badge) tell whether the cap was an intentional skip, an unintentional one, or a failure. + capped = level_mod.RUNGS[lvl] if cap_reason else None return { "schema": 1, "run_id": run_id(), @@ -298,9 +275,9 @@ def build_results( "finished": finished_ts, "level": lvl, "level_cap_reason": cap_reason, - "level_cap_intent": intent, + "level_cap_rung": capped, "rungs": rungs, - "na": na_info, + "skips": skips(rungs, expected_na), "stages": stages, "results": results, "flags": { diff --git a/runner/run_recipe_ci.py b/runner/run_recipe_ci.py index b10966a..d187cae 100644 --- a/runner/run_recipe_ci.py +++ b/runner/run_recipe_ci.py @@ -1254,27 +1254,18 @@ def main() -> int: file=sys.stderr, ) path = results_mod.write_results(data) - intent = data.get("level_cap_intent") or "" print( f"results.json written: {path} (level={data['level']}" - f"{' — ' + data['level_cap_reason'] if data['level_cap_reason'] else ''}" - f"{' [' + intent + ']' if intent else ''})", + f"{' — ' + data['level_cap_reason'] if data['level_cap_reason'] else ''})", flush=True, ) - # Surface the intentional-vs-accidental N/A signal in the CI log (non-blocking, R7): a - # gap-sensitive rung that is N/A but undeclared is a possible coverage hole; a stale - # EXPECTED_NA declares a tier N/A that actually ran. - na = data.get("na", {}) - for rung in na.get("gaps", []): + # Surface UNINTENTIONAL skips in the CI log (non-blocking, R7): a rung that was skipped (N/A) + # but is not in the recipe's intentional list — either add the missing coverage or declare it. + for rung in data.get("skips", {}).get("unintentional", []): print( - f"⚠ coverage: rung '{rung}' is N/A but not declared intentional — add a test or " - f"declare it in tests/{recipe}/recipe_meta.py EXPECTED_NA = {{'{rung}': ''}}.", - flush=True, - ) - for rung in na.get("stale_declared", []): - print( - f"⚠ stale EXPECTED_NA: rung '{rung}' is declared N/A but was actually exercised " - f"(status={data['rungs'].get(rung)}) — remove it from recipe_meta.EXPECTED_NA.", + f"⚠ coverage: rung '{rung}' was skipped (N/A) but is not declared intentional — add " + f"the missing test/label, or list it in tests/{recipe}/recipe_meta.py " + f"EXPECTED_NA = {{'{rung}': ''}}.", flush=True, ) except Exception as e: # noqa: BLE001 — results assembly is cosmetic; never fail a run on it (R7) @@ -1295,12 +1286,17 @@ def main() -> int: with open(html_path, "w", encoding="utf-8") as f: f.write(card_mod.render_card_html(data, screenshot_rel=data.get("screenshot"))) png = card_mod.render_card_png(html_path, os.path.join(run_artifact_dir, "summary.png")) + capped = data.get("level_cap_rung") + sk = data.get("skips", {}) + cap_skip = ( + "intentional" if capped in (sk.get("intentional") or {}) + else "unintentional" if capped in (sk.get("unintentional") or []) + else "" + ) with open(os.path.join(run_artifact_dir, "badge.svg"), "w", encoding="utf-8") as f: f.write( card_mod.level_badge_svg( - data["level"], - data.get("level_cap_reason", ""), - data.get("level_cap_intent", ""), + data["level"], data.get("level_cap_reason", ""), cap_skip ) ) print( diff --git a/tests/custom-html-tiny/recipe_meta.py b/tests/custom-html-tiny/recipe_meta.py index 25aac26..509317e 100644 --- a/tests/custom-html-tiny/recipe_meta.py +++ b/tests/custom-html-tiny/recipe_meta.py @@ -4,14 +4,14 @@ DEPLOY_TIMEOUT = 120 HTTP_TIMEOUT = 90 -# Intentionally-N/A tiers (reviewed opt-out, NOT a coverage gap). custom-html-tiny is a stateless -# static-web-server: it serves an ephemeral `content` volume that the harness seeds at deploy time -# (install_steps.sh) and holds no persistent or user data, so there is nothing to back up or restore. -# The recipe therefore declares no `backupbot.backup` label and the L3 backup/restore rung is N/A. -# Declaring it here marks that N/A as deliberate, so the run is annotated "intentional" instead of -# being flagged as a possible missing-coverage gap. (N/A still caps the level — the harness never -# claims a rung it did not verify; this only explains *why* the cap is expected.) +# Rungs this recipe INTENTIONALLY skips, each with a reason. Any rung that is skipped (N/A) and is +# NOT listed here is reported as an *unintentional* skip (a coverage gap to fill or declare). A skip +# still caps the level either way — the harness never claims a rung it did not verify; this only +# records that the skip is deliberate. custom-html-tiny is a stateless static-web-server, so: EXPECTED_NA = { "backup_restore": "stateless static file server: serves an ephemeral content volume seeded at " "deploy, with no persistent/user data to back up or restore (no backupbot.backup label)", + "integration": "no SSO/OIDC or cross-app surface — a static file server has no auth integration", + "recipe_local": "the upstream recipe ships no tests/ of its own; coverage is the cc-ci generic " + "install tier + the functional serve test", } diff --git a/tests/unit/test_card.py b/tests/unit/test_card.py index 1abce04..7862038 100644 --- a/tests/unit/test_card.py +++ b/tests/unit/test_card.py @@ -55,13 +55,13 @@ def test_badge_svg_wellformed(): assert "expected" not in svg and "gap?" not in svg -def test_badge_svg_differentiates_expected_vs_unexpected_skip(): - # declared intentional N/A capped the climb → muted "expected" third segment - exp = C.level_badge_svg(2, "L3 backup/restore N/A", "intentional · no persistent data") +def test_badge_svg_differentiates_intentional_vs_unintentional_skip(): + # an intentional (declared) skip capped the climb → muted "expected" third segment + exp = C.level_badge_svg(2, "L3 backup/restore N/A", "intentional") assert "level 2" in exp and "expected" in exp and C.EXPECT_COLOR in exp assert "gap?" not in exp - # undeclared gap-sensitive N/A → amber "gap?" third segment (an UNEXPECTED skip) - gap = C.level_badge_svg(2, "L3 backup/restore N/A", "undeclared N/A — possible coverage gap") + # an unintentional skip (not declared) → amber "gap?" third segment + gap = C.level_badge_svg(2, "L3 backup/restore N/A", "unintentional") assert "level 2" in gap and "gap?" in gap and C.GAP_COLOR in gap assert "expected" not in gap diff --git a/tests/unit/test_results.py b/tests/unit/test_results.py index 9da1aff..f034911 100644 --- a/tests/unit/test_results.py +++ b/tests/unit/test_results.py @@ -257,7 +257,7 @@ def test_build_results_capped_at_L1_on_upgrade_fail(tmp_path): assert "L2" in data["level_cap_reason"] -# ---- classify_na / cap_intent: intentional-vs-accidental N/A (operator request) ---- +# ---- skips: intentional (declared) vs unintentional (everything else skipped) ---- def _rungs(**kw): @@ -273,57 +273,32 @@ def _rungs(**kw): return base -def test_classify_na_declared_vs_undeclared(): +def test_skips_intentional_vs_unintentional(): rungs = _rungs(backup_restore="na", functional="na") - info = R.classify_na(rungs, {"backup_restore": "stateless static server"}) - # backup_restore is declared intentional; functional is an undeclared gap-sensitive N/A. - assert info["rungs"]["backup_restore"] == { - "intent": "declared", - "reason": "stateless static server", - } - assert info["rungs"]["functional"]["intent"] == "undeclared" - assert info["gaps"] == ["functional"] # backup_restore declared → not a gap - assert info["stale_declared"] == [] - # structurally-optional N/A (integration, recipe_local) are recorded but never flagged as gaps. - assert info["rungs"]["integration"]["intent"] == "undeclared" - assert "integration" not in info["gaps"] + sk = R.skips(rungs, {"backup_restore": "stateless static server"}) + # backup_restore is declared (intentional, with reason); everything else skipped is unintentional. + assert sk["intentional"] == {"backup_restore": "stateless static server"} + assert sk["unintentional"] == ["functional", "integration", "recipe_local"] -def test_classify_na_stale_declaration(): - # backup_restore actually ran (pass) but is declared N/A → stale opt-out, surfaced. +def test_skips_none_declared_all_unintentional(): + rungs = _rungs(backup_restore="na") + sk = R.skips(rungs, None) + assert sk["intentional"] == {} + assert sk["unintentional"] == ["backup_restore", "integration", "recipe_local"] + + +def test_skips_declaration_only_counts_when_actually_skipped(): + # backup_restore actually ran (pass) → not a skip, so a declaration for it is simply inert. rungs = _rungs(backup_restore="pass") - info = R.classify_na(rungs, {"backup_restore": "stale reason"}) - assert info["stale_declared"] == ["backup_restore"] - assert "backup_restore" not in info["rungs"] # not N/A, so not in the per-rung N/A map - - -def test_cap_intent_declared_explains_cap(): - # install+upgrade pass, backup_restore declared-N/A → caps at L2 with an intentional clause. - rungs = _rungs(backup_restore="na") - info = R.classify_na(rungs, {"backup_restore": "no persistent data"}) - intent = R.cap_intent(rungs, 2, "L3 backup/restore (data integrity) N/A", info) - assert intent == "intentional · no persistent data" - - -def test_cap_intent_undeclared_gap(): - rungs = _rungs(backup_restore="na") - info = R.classify_na(rungs, None) - intent = R.cap_intent(rungs, 2, "L3 backup/restore (data integrity) N/A", info) - assert "possible coverage gap" in intent - - -def test_cap_intent_blank_when_not_capped_on_na(): - rungs = _rungs() # full clean climb, capped only at integration (na, structurally optional) - info = R.classify_na(rungs, None) - # capping rung is integration (level 4) — structurally optional, so no intent clause. - assert R.cap_intent(rungs, 4, "L5 integration N/A", info) == "" - # and no cap at all → blank. - assert R.cap_intent(rungs, 6, "", info) == "" + sk = R.skips(rungs, {"backup_restore": "reason"}) + assert "backup_restore" not in sk["intentional"] + assert "backup_restore" not in sk["unintentional"] def test_build_results_threads_expected_na(tmp_path): # Mirrors custom-html-tiny post-change: install + a passing functional (custom) test, but no - # backup surface (backup_restore declared intentionally N/A). + # backup surface (backup_restore declared intentionally skipped). recs = [ { "tier": "install", @@ -347,23 +322,27 @@ def test_build_results_threads_expected_na(tmp_path): ref=None, records=recs, results=_results(backup="skip", restore="skip"), # custom=pass (default) → functional pass - backup_capable=False, # no backupbot label → backup_restore N/A + backup_capable=False, # no backupbot label → backup_restore skipped (N/A) declared=[], deps_ready=True, sso_unverified=False, clean_teardown=True, no_secret_leak=True, finished_ts=0.0, - expected_na={"backup_restore": "stateless static file server"}, + expected_na={ + "backup_restore": "stateless static file server", + "integration": "no SSO surface", + "recipe_local": "no upstream tests/", + }, ) - # backup_restore N/A still caps at L2 (never inflates) — even though functional passes above it, - # the gap caps the climb — but the cap is now annotated intentional rather than flagged. + # backup_restore skip still caps at L2 (never inflates) — even though functional passes above it, + # the skip caps the climb — but it's the declared (intentional) rung that capped. assert data["level"] == 2 assert "L3" in data["level_cap_reason"] - assert data["level_cap_intent"] == "intentional · stateless static file server" - assert data["na"]["rungs"]["backup_restore"]["intent"] == "declared" + assert data["level_cap_rung"] == "backup_restore" assert data["rungs"]["functional"] == "pass" - assert data["na"]["gaps"] == [] # functional now covered; backup_restore declared → no gaps + assert data["skips"]["intentional"]["backup_restore"] == "stateless static file server" + assert data["skips"]["unintentional"] == [] # every skip accounted for → fully clean def test_write_results_roundtrip(tmp_path):