From 3b0a3d14ea0afd23133d9e7bc94d2773de2f7ff5 Mon Sep 17 00:00:00 2001 From: autonomic-bot Date: Tue, 9 Jun 2026 01:59:28 +0000 Subject: [PATCH 1/7] feat(harness): declare intentional N/A tiers + custom-html-tiny functional test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two changes the operator asked for after noticing custom-html-tiny PR #6 has no backup/restore or functional coverage: 1) Intentional-vs-accidental N/A. A recipe can now declare recipe_meta.EXPECTED_NA = {rung: reason} to mark a tier as deliberately not applicable (e.g. a stateless static server has no backup surface). N/A still caps the level — the harness never claims a rung it did not verify — but the run is now annotated 'intentional · ' instead of being indistinguishable from a forgotten test. An *undeclared* N/A on a gap-sensitive rung (backup_restore, functional) is surfaced as a 'possible coverage gap', and a stale EXPECTED_NA (declared N/A but actually exercised) is surfaced too. All non-blocking (R7): results.json gains level_cap_intent + an block, the summary card shows the clause, and the CI log prints the gap/stale warnings. (results.classify_na/cap_intent are pure + unit-tested; level.py untouched.) custom-html-tiny declares backup_restore intentionally N/A. 2) custom-html-tiny functional test: writes a random file into the served content volume (via the volume mountpoint, like install_steps.sh, since the SWS image is shell-less), asserts exact-byte round-trip + a real 404 on a missing path — proving the static-web-server actually serves the volume, not a 200-everything fallback. Co-Authored-By: Claude Opus 4.8 --- runner/harness/card.py | 4 +- runner/harness/results.py | 59 ++++++++++- runner/run_recipe_ci.py | 22 ++++- .../functional/test_serves_content.py | 87 ++++++++++++++++ tests/custom-html-tiny/recipe_meta.py | 12 +++ tests/unit/test_results.py | 98 +++++++++++++++++++ 6 files changed, 279 insertions(+), 3 deletions(-) create mode 100644 tests/custom-html-tiny/functional/test_serves_content.py diff --git a/runner/harness/card.py b/runner/harness/card.py index 44cf4ae..36b717f 100644 --- a/runner/harness/card.py +++ b/runner/harness/card.py @@ -116,7 +116,9 @@ def render_card_html(data: dict, screenshot_rel: str | None = "screenshot.png") recipe = html.escape(str(data.get("recipe", "?"))) version = html.escape(str(data.get("version") or data.get("ref") or "")) level = int(data.get("level", 0)) - cap = html.escape(str(data.get("level_cap_reason") or "")) + cap_reason = str(data.get("level_cap_reason") or "") + cap_intent = str(data.get("level_cap_intent") or "") + cap = html.escape(cap_reason + (f" · {cap_intent}" if cap_intent else "")) color = level_color(level) flags = data.get("flags", {}) or {} flag_bits = [] diff --git a/runner/harness/results.py b/runner/harness/results.py index c3fae07..3e6fed7 100644 --- a/runner/harness/results.py +++ b/runner/harness/results.py @@ -200,6 +200,56 @@ def derive_rungs( return rungs +# Rungs where an *undeclared* N/A is suspicious — it usually means a recipe SHOULD have this coverage +# but nobody added it (a backup label, a functional test), i.e. an accidental gap rather than a real +# property of the recipe. For these, an undeclared N/A is surfaced as a "possible coverage gap" unless +# the recipe declares it intentional via recipe_meta.EXPECTED_NA. The other rungs (upgrade — only one +# published version; integration — no SSO surface; recipe_local — no repo-local tests) are +# *structurally* optional: an N/A there is the normal case and is not flagged. +GAP_SENSITIVE_RUNGS = ("backup_restore", "functional") + + +def classify_na(rungs: dict[str, str], expected_na: dict | None) -> dict: + """Distinguish *intentionally* N/A rungs from *accidentally* missing ones (operator request). + + A recipe declares intentional N/A in `recipe_meta.EXPECTED_NA = {rung: reason}`. N/A always caps + the level either way (the harness never inflates — a rung that wasn't verified wasn't verified); + this only EXPLAINS the cap so a reviewer can tell "this recipe legitimately has no backup surface" + from "someone forgot to add the backup test". Returns: + { "rungs": {rung: {"intent": "declared"|"undeclared", "reason": str}}, # one per N/A rung + "gaps": [rung, ...], # gap-sensitive rungs that are N/A and NOT declared + "stale_declared": [rung, ...] } # rungs declared N/A but actually exercised (stale opt-out) + """ + expected = {str(k): str(v) for k, v in (expected_na or {}).items()} + na: dict[str, dict] = {} + for rung, st in rungs.items(): + if st != "na": + continue + if rung in expected: + na[rung] = {"intent": "declared", "reason": expected[rung]} + else: + na[rung] = {"intent": "undeclared", "reason": ""} + gaps = [r for r in GAP_SENSITIVE_RUNGS if na.get(r, {}).get("intent") == "undeclared"] + stale = sorted(r for r in expected if rungs.get(r) not in (None, "na")) + return {"rungs": na, "gaps": gaps, "stale_declared": stale} + + +def cap_intent(rungs: dict[str, str], level: int, cap_reason: str, na_info: dict) -> str: + """A short clause explaining the level cap when the capping rung is N/A: the declared reason if + intentional, a 'possible coverage gap' note if it's an undeclared gap-sensitive rung, else ''.""" + if not cap_reason: + return "" + capped = level_mod.RUNGS[level] if 0 <= level < len(level_mod.RUNGS) else None + if not capped or rungs.get(capped) != "na": + return "" + entry = na_info["rungs"].get(capped, {}) + if entry.get("intent") == "declared": + return f"intentional · {entry['reason']}" + if capped in GAP_SENSITIVE_RUNGS: + return "undeclared N/A — possible coverage gap (add a test or declare EXPECTED_NA)" + return "" + + def build_results( *, recipe: str, @@ -217,9 +267,12 @@ def build_results( finished_ts: float | None, screenshot: str | None = None, summary_card: str | None = None, + expected_na: dict | None = None, ) -> dict: """Assemble the full results.json dict (no I/O). `finished_ts` is passed in (the orchestrator - stamps it) so this stays pure and deterministic for unit tests.""" + stamps it) so this stays pure and deterministic for unit tests. `expected_na` is the recipe's + declared intentional-N/A map (recipe_meta.EXPECTED_NA) used to distinguish a deliberate skip from + accidentally-missing coverage.""" stages = collect_stages(records) has_custom = any(r["tier"] == "custom" for r in records) rungs = derive_rungs( @@ -233,6 +286,8 @@ def build_results( repo_local_passed=_repo_local_passed(records), ) lvl, cap_reason = level_mod.compute_level(rungs) + na_info = classify_na(rungs, expected_na) + intent = cap_intent(rungs, lvl, cap_reason, na_info) return { "schema": 1, "run_id": run_id(), @@ -243,7 +298,9 @@ def build_results( "finished": finished_ts, "level": lvl, "level_cap_reason": cap_reason, + "level_cap_intent": intent, "rungs": rungs, + "na": na_info, "stages": stages, "results": results, "flags": { diff --git a/runner/run_recipe_ci.py b/runner/run_recipe_ci.py index 11fadd0..becc9c4 100644 --- a/runner/run_recipe_ci.py +++ b/runner/run_recipe_ci.py @@ -200,6 +200,7 @@ def _load_meta(recipe: str) -> dict: for k in list(meta) + [ "BACKUP_CAPABLE", "SKIP_GENERIC", + "EXPECTED_NA", "OIDC_AT_INSTALL", "READY_PROBE", "UPGRADE_BASE_VERSION", @@ -1241,6 +1242,7 @@ def main() -> int: no_secret_leak=True, # narrowed below by an actual scan of the serialised artifact screenshot=screenshot_rel, # Phase 3 U1 (R4): relative PNG name iff capture succeeded finished_ts=time.time(), + expected_na=meta.get("EXPECTED_NA"), # declared intentional-N/A map (recipe_meta) ) # Real (if narrow) leak check: no known infra-secret value may appear in the artifact (R7). blob = json.dumps(data) @@ -1252,11 +1254,29 @@ def main() -> int: file=sys.stderr, ) path = results_mod.write_results(data) + intent = data.get("level_cap_intent") or "" print( f"results.json written: {path} (level={data['level']}" - f"{' — ' + data['level_cap_reason'] if data['level_cap_reason'] else ''})", + f"{' — ' + data['level_cap_reason'] if data['level_cap_reason'] else ''}" + f"{' [' + intent + ']' if intent else ''})", flush=True, ) + # Surface the intentional-vs-accidental N/A signal in the CI log (non-blocking, R7): a + # gap-sensitive rung that is N/A but undeclared is a possible coverage hole; a stale + # EXPECTED_NA declares a tier N/A that actually ran. + na = data.get("na", {}) + for rung in na.get("gaps", []): + print( + f"⚠ coverage: rung '{rung}' is N/A but not declared intentional — add a test or " + f"declare it in tests/{recipe}/recipe_meta.py EXPECTED_NA = {{'{rung}': ''}}.", + flush=True, + ) + for rung in na.get("stale_declared", []): + print( + f"⚠ stale EXPECTED_NA: rung '{rung}' is declared N/A but was actually exercised " + f"(status={data['rungs'].get(rung)}) — remove it from recipe_meta.EXPECTED_NA.", + flush=True, + ) except Exception as e: # noqa: BLE001 — results assembly is cosmetic; never fail a run on it (R7) print( f"!! results.json assembly failed (non-fatal, verdict unaffected): {_scrub(str(e))}", diff --git a/tests/custom-html-tiny/functional/test_serves_content.py b/tests/custom-html-tiny/functional/test_serves_content.py new file mode 100644 index 0000000..cb30917 --- /dev/null +++ b/tests/custom-html-tiny/functional/test_serves_content.py @@ -0,0 +1,87 @@ +"""custom-html-tiny — recipe-specific functional test (static-web-server). + +Proves the deployed static-web-server is *actually serving files from its `content` volume* with real +file-server semantics, not merely returning 200 from a Traefik fallback or a generic stub: + + 1. exact-byte round-trip — write a uniquely-named file with random content into the served volume, + fetch it over HTTPS, and assert the bytes come back verbatim. Non-vacuous: the content is random + per run, so only a server that reads this file off the volume can pass. + 2. real 404 — a random non-existent path returns 404, proving directory/file semantics (a + 200-everything stub or mis-routed host would not 404). + +The recipe's image (joseluisq/static-web-server) is shell-less (scratch-based) and its content volume +is seeded via the install_steps.sh host-mountpoint mechanism — so this test writes its probe file the +same way (resolve the swarm volume's mountpoint with `docker volume inspect`, write directly) rather +than `docker exec`-ing in a container that has no shell. + +Runs in the custom tier against the shared post-install deployment (the `live_app` fixture is its +per-run domain). Mirrors install_steps.sh: the app's content volume is named `_content`, where +`stack` is the domain with dots replaced by underscores; HTTP_SUBDIR is empty, so the volume root is +served at `/`. +""" + +from __future__ import annotations + +import contextlib +import os +import ssl +import subprocess +import urllib.error +import urllib.request +import uuid + + +def _served_dir(domain: str) -> str: + """Host mountpoint of the app's served `content` volume (same naming as install_steps.sh).""" + vol = f"{domain.replace('.', '_')}_content" + out = subprocess.run( + ["docker", "volume", "inspect", vol, "--format", "{{.Mountpoint}}"], + capture_output=True, + text=True, + check=True, + ) + mountpoint = out.stdout.strip() + assert mountpoint, f"could not resolve mountpoint for volume {vol!r}" + return mountpoint + + +def _get(url: str) -> tuple[int, bytes]: + """GET the URL; return (status, body). A 4xx/5xx is returned, not raised (we assert on the code). + TLS verification is relaxed: the served wildcard cert is validated separately by the infra check; + here we care only about the app's response.""" + ctx = ssl.create_default_context() + ctx.check_hostname = False + ctx.verify_mode = ssl.CERT_NONE + try: + with urllib.request.urlopen(url, timeout=20, context=ctx) as resp: + return resp.status, resp.read() + except urllib.error.HTTPError as e: + return e.code, e.read() + + +def test_static_file_roundtrip_and_404(live_app): + """Write a random file into the served volume → fetch it → bytes match; and a missing path 404s.""" + served = _served_dir(live_app) + token = uuid.uuid4().hex + name = f"ccci-probe-{token}.txt" + body = f"cc-ci-functional-{token}\n".encode() + path = os.path.join(served, name) + with open(path, "wb") as fh: + fh.write(body) + try: + status, got = _get(f"https://{live_app}/{name}") + assert status == 200, f"served probe file returned {status} (expected 200)" + assert got == body, ( + f"content round-trip mismatch: served {got!r}, wrote {body!r} " + "(static-web-server not serving the content volume?)" + ) + + # A random non-existent path must 404 — proves real static-file semantics, distinguishing a + # working server from a 200-everything stub or a mis-routed Traefik fallback. + miss_status, _ = _get(f"https://{live_app}/ccci-missing-{uuid.uuid4().hex}.txt") + assert miss_status == 404, ( + f"missing path returned {miss_status} (expected 404 — generic 200-returner / mis-route?)" + ) + finally: + with contextlib.suppress(OSError): + os.remove(path) diff --git a/tests/custom-html-tiny/recipe_meta.py b/tests/custom-html-tiny/recipe_meta.py index 44603a9..25aac26 100644 --- a/tests/custom-html-tiny/recipe_meta.py +++ b/tests/custom-html-tiny/recipe_meta.py @@ -3,3 +3,15 @@ # (DG5) is detected quickly instead of waiting the default 300s HTTP timeout. DEPLOY_TIMEOUT = 120 HTTP_TIMEOUT = 90 + +# Intentionally-N/A tiers (reviewed opt-out, NOT a coverage gap). custom-html-tiny is a stateless +# static-web-server: it serves an ephemeral `content` volume that the harness seeds at deploy time +# (install_steps.sh) and holds no persistent or user data, so there is nothing to back up or restore. +# The recipe therefore declares no `backupbot.backup` label and the L3 backup/restore rung is N/A. +# Declaring it here marks that N/A as deliberate, so the run is annotated "intentional" instead of +# being flagged as a possible missing-coverage gap. (N/A still caps the level — the harness never +# claims a rung it did not verify; this only explains *why* the cap is expected.) +EXPECTED_NA = { + "backup_restore": "stateless static file server: serves an ephemeral content volume seeded at " + "deploy, with no persistent/user data to back up or restore (no backupbot.backup label)", +} diff --git a/tests/unit/test_results.py b/tests/unit/test_results.py index d8bdd51..b52cd82 100644 --- a/tests/unit/test_results.py +++ b/tests/unit/test_results.py @@ -257,6 +257,104 @@ def test_build_results_capped_at_L1_on_upgrade_fail(tmp_path): assert "L2" in data["level_cap_reason"] +# ---- classify_na / cap_intent: intentional-vs-accidental N/A (operator request) ---- + + +def _rungs(**kw): + base = { + "install": "pass", + "upgrade": "pass", + "backup_restore": "pass", + "functional": "pass", + "integration": "na", + "recipe_local": "na", + } + base.update(kw) + return base + + +def test_classify_na_declared_vs_undeclared(): + rungs = _rungs(backup_restore="na", functional="na") + info = R.classify_na(rungs, {"backup_restore": "stateless static server"}) + # backup_restore is declared intentional; functional is an undeclared gap-sensitive N/A. + assert info["rungs"]["backup_restore"] == { + "intent": "declared", + "reason": "stateless static server", + } + assert info["rungs"]["functional"]["intent"] == "undeclared" + assert info["gaps"] == ["functional"] # backup_restore declared → not a gap + assert info["stale_declared"] == [] + # structurally-optional N/A (integration, recipe_local) are recorded but never flagged as gaps. + assert info["rungs"]["integration"]["intent"] == "undeclared" + assert "integration" not in info["gaps"] + + +def test_classify_na_stale_declaration(): + # backup_restore actually ran (pass) but is declared N/A → stale opt-out, surfaced. + rungs = _rungs(backup_restore="pass") + info = R.classify_na(rungs, {"backup_restore": "stale reason"}) + assert info["stale_declared"] == ["backup_restore"] + assert "backup_restore" not in info["rungs"] # not N/A, so not in the per-rung N/A map + + +def test_cap_intent_declared_explains_cap(): + # install+upgrade pass, backup_restore declared-N/A → caps at L2 with an intentional clause. + rungs = _rungs(backup_restore="na") + info = R.classify_na(rungs, {"backup_restore": "no persistent data"}) + intent = R.cap_intent(rungs, 2, "L3 backup/restore (data integrity) N/A", info) + assert intent == "intentional · no persistent data" + + +def test_cap_intent_undeclared_gap(): + rungs = _rungs(backup_restore="na") + info = R.classify_na(rungs, None) + intent = R.cap_intent(rungs, 2, "L3 backup/restore (data integrity) N/A", info) + assert "possible coverage gap" in intent + + +def test_cap_intent_blank_when_not_capped_on_na(): + rungs = _rungs() # full clean climb, capped only at integration (na, structurally optional) + info = R.classify_na(rungs, None) + # capping rung is integration (level 4) — structurally optional, so no intent clause. + assert R.cap_intent(rungs, 4, "L5 integration N/A", info) == "" + # and no cap at all → blank. + assert R.cap_intent(rungs, 6, "", info) == "" + + +def test_build_results_threads_expected_na(tmp_path): + recs = [ + { + "tier": "install", + "source": "generic", + "file": "g/test_install.py", + "rc": 0, + "junit": _write(tmp_path, "i.xml", JUNIT_PASS), + } + ] + data = R.build_results( + recipe="custom-html-tiny", + version="1.1.0", + pr="0", + ref=None, + records=recs, + results=_results(backup="skip", restore="skip", custom="skip"), + backup_capable=False, # no backupbot label → backup_restore N/A + declared=[], + deps_ready=True, + sso_unverified=False, + clean_teardown=True, + no_secret_leak=True, + finished_ts=0.0, + expected_na={"backup_restore": "stateless static file server"}, + ) + # N/A still caps at L2 (never inflates), but now annotated intentional rather than flagged. + assert data["level"] == 2 + assert "L3" in data["level_cap_reason"] + assert data["level_cap_intent"] == "intentional · stateless static file server" + assert data["na"]["rungs"]["backup_restore"]["intent"] == "declared" + assert data["na"]["gaps"] == [] + + def test_write_results_roundtrip(tmp_path): data = {"run_id": "42", "level": 3, "stages": []} path = R.write_results(data, runs_dir_override=str(tmp_path)) -- 2.49.0 From f3a1ad5388c89cfb251ed7545650658661e0aed7 Mon Sep 17 00:00:00 2001 From: autonomic-bot Date: Tue, 9 Jun 2026 02:00:16 +0000 Subject: [PATCH 2/7] test: representative expected_na scenario (functional covered, backup declared-N/A) Co-Authored-By: Claude Opus 4.8 --- tests/unit/test_results.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/tests/unit/test_results.py b/tests/unit/test_results.py index b52cd82..9da1aff 100644 --- a/tests/unit/test_results.py +++ b/tests/unit/test_results.py @@ -322,6 +322,8 @@ def test_cap_intent_blank_when_not_capped_on_na(): def test_build_results_threads_expected_na(tmp_path): + # Mirrors custom-html-tiny post-change: install + a passing functional (custom) test, but no + # backup surface (backup_restore declared intentionally N/A). recs = [ { "tier": "install", @@ -329,7 +331,14 @@ def test_build_results_threads_expected_na(tmp_path): "file": "g/test_install.py", "rc": 0, "junit": _write(tmp_path, "i.xml", JUNIT_PASS), - } + }, + { + "tier": "custom", + "source": "cc-ci", + "file": "c/test_serves_content.py", + "rc": 0, + "junit": _write(tmp_path, "c.xml", JUNIT_PASS), + }, ] data = R.build_results( recipe="custom-html-tiny", @@ -337,7 +346,7 @@ def test_build_results_threads_expected_na(tmp_path): pr="0", ref=None, records=recs, - results=_results(backup="skip", restore="skip", custom="skip"), + results=_results(backup="skip", restore="skip"), # custom=pass (default) → functional pass backup_capable=False, # no backupbot label → backup_restore N/A declared=[], deps_ready=True, @@ -347,12 +356,14 @@ def test_build_results_threads_expected_na(tmp_path): finished_ts=0.0, expected_na={"backup_restore": "stateless static file server"}, ) - # N/A still caps at L2 (never inflates), but now annotated intentional rather than flagged. + # backup_restore N/A still caps at L2 (never inflates) — even though functional passes above it, + # the gap caps the climb — but the cap is now annotated intentional rather than flagged. assert data["level"] == 2 assert "L3" in data["level_cap_reason"] assert data["level_cap_intent"] == "intentional · stateless static file server" assert data["na"]["rungs"]["backup_restore"]["intent"] == "declared" - assert data["na"]["gaps"] == [] + assert data["rungs"]["functional"] == "pass" + assert data["na"]["gaps"] == [] # functional now covered; backup_restore declared → no gaps def test_write_results_roundtrip(tmp_path): -- 2.49.0 From d733e2c4ca3210fb09d458d0aaff8e5c89d2cfdc Mon Sep 17 00:00:00 2001 From: autonomic-bot Date: Tue, 9 Jun 2026 02:26:44 +0000 Subject: [PATCH 3/7] feat(card): badge differentiates expected vs unexpected skip MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The level badge gains a third segment derived from level_cap_intent: - amber 'gap?' when the climb was capped by an UNDECLARED gap-sensitive N/A (backup_restore / functional) — a likely-missing test (unexpected skip) - muted 'expected' when capped by a DECLARED intentional N/A (reviewed, nothing to fix) - nothing extra for a clean cap, a full climb, or a real failure. Font-safe text labels (no emoji) so the SVG renders headless/anywhere. Badge never inflates — it only annotates the cap the level already reflects. Co-Authored-By: Claude Opus 4.8 --- runner/harness/card.py | 42 +++++++++++++++++++++++++++++++++++++---- runner/run_recipe_ci.py | 8 +++++++- tests/unit/test_card.py | 13 +++++++++++++ 3 files changed, 58 insertions(+), 5 deletions(-) diff --git a/runner/harness/card.py b/runner/harness/card.py index 36b717f..1cee8a6 100644 --- a/runner/harness/card.py +++ b/runner/harness/card.py @@ -79,10 +79,44 @@ def render_badge_svg(label: str, message: str, color: str) -> str: ) -def level_badge_svg(level: int, cap_reason: str = "") -> str: - """Per-recipe/-run LEVEL badge: 'cc-ci | level N'. Colour by level (R6).""" - msg = f"level {int(level)}" - return render_badge_svg("cc-ci", msg, level_color(level)) +# Third-segment colours for the level badge: amber = an UNEXPECTED skip (undeclared gap-sensitive +# N/A — likely missing coverage) capped the climb; muted = an EXPECTED skip (declared intentional +# N/A — reviewed, nothing to fix). Font-safe text labels (no emoji) so the SVG renders anywhere. +GAP_COLOR = "#d29922" +EXPECT_COLOR = "#6e7681" + + +def level_badge_svg(level: int, cap_reason: str = "", cap_intent: str = "") -> str: + """Per-recipe/-run LEVEL badge: 'cc-ci | level N' coloured by level (R6), with a THIRD segment + that differentiates *why* the climb stopped when an N/A capped it: + - undeclared gap-sensitive N/A (an UNEXPECTED skip — likely missing coverage): amber 'gap?'. + - declared intentional N/A (an EXPECTED skip — reviewed, nothing to fix): muted 'expected'. + - clean cap / full climb / a real failure: no third segment (the level + card carry it). + Derived from `cap_intent` (results.level_cap_intent) so the badge never inflates — it only + annotates the cap the level already reflects.""" + label, msg = "cc-ci", f"level {int(level)}" + lw, mw = _text_width(label), _text_width(msg) + third: tuple[str, str] | None = None + if cap_intent.startswith("undeclared"): + third = ("gap?", GAP_COLOR) + elif cap_intent.startswith("intentional"): + third = ("expected", EXPECT_COLOR) + if third is None: + return render_badge_svg(label, msg, level_color(level)) + txt, tcolor = third + tw = _text_width(txt) + w = lw + mw + tw + return ( + f'' + f'' + f'' + f'' + f'' + f'{html.escape(label)}' + f'{html.escape(msg)}' + f'{html.escape(txt)}' + ) def _stage_rows(stages: list[dict]) -> str: diff --git a/runner/run_recipe_ci.py b/runner/run_recipe_ci.py index becc9c4..b10966a 100644 --- a/runner/run_recipe_ci.py +++ b/runner/run_recipe_ci.py @@ -1296,7 +1296,13 @@ def main() -> int: f.write(card_mod.render_card_html(data, screenshot_rel=data.get("screenshot"))) png = card_mod.render_card_png(html_path, os.path.join(run_artifact_dir, "summary.png")) with open(os.path.join(run_artifact_dir, "badge.svg"), "w", encoding="utf-8") as f: - f.write(card_mod.level_badge_svg(data["level"], data.get("level_cap_reason", ""))) + f.write( + card_mod.level_badge_svg( + data["level"], + data.get("level_cap_reason", ""), + data.get("level_cap_intent", ""), + ) + ) print( f"summary card {'rendered ' + png if png else '(PNG render unavailable)'} + " f"badge.svg written into {run_artifact_dir}", diff --git a/tests/unit/test_card.py b/tests/unit/test_card.py index cdafc02..1abce04 100644 --- a/tests/unit/test_card.py +++ b/tests/unit/test_card.py @@ -51,6 +51,19 @@ def test_badge_svg_wellformed(): assert svg.startswith("") assert "level 4" in svg assert C.level_color(4) in svg + # plain cap (no intent) → two-box badge, no third segment + assert "expected" not in svg and "gap?" not in svg + + +def test_badge_svg_differentiates_expected_vs_unexpected_skip(): + # declared intentional N/A capped the climb → muted "expected" third segment + exp = C.level_badge_svg(2, "L3 backup/restore N/A", "intentional · no persistent data") + assert "level 2" in exp and "expected" in exp and C.EXPECT_COLOR in exp + assert "gap?" not in exp + # undeclared gap-sensitive N/A → amber "gap?" third segment (an UNEXPECTED skip) + gap = C.level_badge_svg(2, "L3 backup/restore N/A", "undeclared N/A — possible coverage gap") + assert "level 2" in gap and "gap?" in gap and C.GAP_COLOR in gap + assert "expected" not in gap def test_card_html_reports_level_verbatim(): -- 2.49.0 From b3ab68a9dd43d46ea47e8071dd2b8d0a6e29db69 Mon Sep 17 00:00:00 2001 From: autonomic-bot Date: Tue, 9 Jun 2026 02:36:53 +0000 Subject: [PATCH 4/7] refactor: simplify to a list of intentionally-skipped rungs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per operator: drop the gap-sensitivity / cap-intent-clause / stale-detection machinery. Model is now dead simple — recipe_meta.EXPECTED_NA = {rung: reason} lists the rungs a recipe intentionally skips; ANY rung skipped (N/A) and not in that list is unintentional. results.json: replace the 'na' block + level_cap_intent with skips: { intentional: {rung: reason}, unintentional: [rung] } plus level_cap_rung (which rung capped). Badge/card derive intentional-vs- unintentional from whether the capping rung is in the intentional list. Skips still cap the level (never inflate). custom-html-tiny lists all three rungs it intentionally skips (backup_restore, integration, recipe_local). Co-Authored-By: Claude Opus 4.8 --- runner/harness/card.py | 35 +++++++----- runner/harness/results.py | 77 +++++++++---------------- runner/run_recipe_ci.py | 34 +++++------ tests/custom-html-tiny/recipe_meta.py | 14 ++--- tests/unit/test_card.py | 10 ++-- tests/unit/test_results.py | 81 ++++++++++----------------- 6 files changed, 105 insertions(+), 146 deletions(-) diff --git a/runner/harness/card.py b/runner/harness/card.py index 1cee8a6..e57d517 100644 --- a/runner/harness/card.py +++ b/runner/harness/card.py @@ -79,27 +79,27 @@ def render_badge_svg(label: str, message: str, color: str) -> str: ) -# Third-segment colours for the level badge: amber = an UNEXPECTED skip (undeclared gap-sensitive -# N/A — likely missing coverage) capped the climb; muted = an EXPECTED skip (declared intentional -# N/A — reviewed, nothing to fix). Font-safe text labels (no emoji) so the SVG renders anywhere. +# Third-segment colours for the level badge: amber = an UNINTENTIONAL skip (a rung skipped but not +# in the recipe's intentional list — likely missing coverage) capped the climb; muted = an +# INTENTIONAL skip (declared in recipe_meta.EXPECTED_NA — nothing to fix). Font-safe text labels +# (no emoji) so the SVG renders anywhere. GAP_COLOR = "#d29922" EXPECT_COLOR = "#6e7681" -def level_badge_svg(level: int, cap_reason: str = "", cap_intent: str = "") -> str: +def level_badge_svg(level: int, cap_reason: str = "", cap_skip: str = "") -> str: """Per-recipe/-run LEVEL badge: 'cc-ci | level N' coloured by level (R6), with a THIRD segment - that differentiates *why* the climb stopped when an N/A capped it: - - undeclared gap-sensitive N/A (an UNEXPECTED skip — likely missing coverage): amber 'gap?'. - - declared intentional N/A (an EXPECTED skip — reviewed, nothing to fix): muted 'expected'. - - clean cap / full climb / a real failure: no third segment (the level + card carry it). - Derived from `cap_intent` (results.level_cap_intent) so the badge never inflates — it only - annotates the cap the level already reflects.""" + that differentiates *why* the climb stopped when a SKIP capped it (`cap_skip`): + - "unintentional" (a rung skipped but not in the recipe's intentional list): amber 'gap?'. + - "intentional" (a skip declared in recipe_meta.EXPECTED_NA): muted 'expected'. + - "" (clean cap / full climb / a real failure): no third segment (the level + card carry it). + The badge never inflates — it only annotates the cap the level already reflects.""" label, msg = "cc-ci", f"level {int(level)}" lw, mw = _text_width(label), _text_width(msg) third: tuple[str, str] | None = None - if cap_intent.startswith("undeclared"): + if cap_skip == "unintentional": third = ("gap?", GAP_COLOR) - elif cap_intent.startswith("intentional"): + elif cap_skip == "intentional": third = ("expected", EXPECT_COLOR) if third is None: return render_badge_svg(label, msg, level_color(level)) @@ -151,8 +151,15 @@ def render_card_html(data: dict, screenshot_rel: str | None = "screenshot.png") version = html.escape(str(data.get("version") or data.get("ref") or "")) level = int(data.get("level", 0)) cap_reason = str(data.get("level_cap_reason") or "") - cap_intent = str(data.get("level_cap_intent") or "") - cap = html.escape(cap_reason + (f" · {cap_intent}" if cap_intent else "")) + # Annotate the cap line by whether the capping rung was an intentional skip (declared, with its + # reason) or an unintentional one (skipped but not declared). + capped = data.get("level_cap_rung") + sk = data.get("skips", {}) or {} + if capped and capped in (sk.get("intentional") or {}): + cap_reason += f" · intentional: {sk['intentional'][capped]}" + elif capped and capped in (sk.get("unintentional") or []): + cap_reason += " · unintentional skip (no EXPECTED_NA — add a test or declare it)" + cap = html.escape(cap_reason) color = level_color(level) flags = data.get("flags", {}) or {} flag_bits = [] diff --git a/runner/harness/results.py b/runner/harness/results.py index 3e6fed7..ec7ca7b 100644 --- a/runner/harness/results.py +++ b/runner/harness/results.py @@ -2,7 +2,14 @@ Turns a run's per-tier pytest outcomes into a single `results.json` artifact carrying, per the plan: { recipe, version, pr, ref, run_id, finished, stages:[{name,status,tests:[{name,status,ms}]}], - level, level_cap_reason, rungs, flags:{clean_teardown,no_secret_leak}, screenshot, summary_card } + level, level_cap_reason, level_cap_rung, rungs, + skips:{intentional:{rung:reason}, unintentional:[rung]}, + flags:{clean_teardown,no_secret_leak}, screenshot, summary_card } + +`skips` splits the N/A (skipped) rungs by a simple rule: a skip is INTENTIONAL iff the recipe lists +it (with a reason) in `recipe_meta.EXPECTED_NA = {rung: reason}`; any rung skipped but not listed is +UNINTENTIONAL (a coverage gap to fill or declare). Skips still cap the level either way — the harness +never claims a rung it did not verify; this only labels *why* a skip happened. The per-test breakdown comes from JUnit XML emitted by each tier's pytest invocation (`--junitxml`), parsed here with the stdlib (no new dep). The integer **level** is computed by harness.level from a @@ -200,54 +207,23 @@ def derive_rungs( return rungs -# Rungs where an *undeclared* N/A is suspicious — it usually means a recipe SHOULD have this coverage -# but nobody added it (a backup label, a functional test), i.e. an accidental gap rather than a real -# property of the recipe. For these, an undeclared N/A is surfaced as a "possible coverage gap" unless -# the recipe declares it intentional via recipe_meta.EXPECTED_NA. The other rungs (upgrade — only one -# published version; integration — no SSO surface; recipe_local — no repo-local tests) are -# *structurally* optional: an N/A there is the normal case and is not flagged. -GAP_SENSITIVE_RUNGS = ("backup_restore", "functional") +def skips(rungs: dict[str, str], expected_na: dict | None) -> dict: + """Split the SKIPPED (N/A) rungs into intentional vs unintentional (operator model). - -def classify_na(rungs: dict[str, str], expected_na: dict | None) -> dict: - """Distinguish *intentionally* N/A rungs from *accidentally* missing ones (operator request). - - A recipe declares intentional N/A in `recipe_meta.EXPECTED_NA = {rung: reason}`. N/A always caps - the level either way (the harness never inflates — a rung that wasn't verified wasn't verified); - this only EXPLAINS the cap so a reviewer can tell "this recipe legitimately has no backup surface" - from "someone forgot to add the backup test". Returns: - { "rungs": {rung: {"intent": "declared"|"undeclared", "reason": str}}, # one per N/A rung - "gaps": [rung, ...], # gap-sensitive rungs that are N/A and NOT declared - "stale_declared": [rung, ...] } # rungs declared N/A but actually exercised (stale opt-out) + A recipe lists the rungs it intentionally skips, each with a reason, in + `recipe_meta.EXPECTED_NA = {rung: reason}`. The rule is dead simple: a skipped rung is + **intentional** iff it is in that list; any rung that is skipped and NOT in the list is + **unintentional** (a coverage gap someone should either fill or declare). N/A still caps the + level either way — the harness never claims a rung it did not verify — this only labels *why* a + skip happened. Returns: + { "intentional": {rung: reason, ...}, # skipped AND declared in EXPECTED_NA + "unintentional": [rung, ...] } # skipped but NOT declared """ expected = {str(k): str(v) for k, v in (expected_na or {}).items()} - na: dict[str, dict] = {} - for rung, st in rungs.items(): - if st != "na": - continue - if rung in expected: - na[rung] = {"intent": "declared", "reason": expected[rung]} - else: - na[rung] = {"intent": "undeclared", "reason": ""} - gaps = [r for r in GAP_SENSITIVE_RUNGS if na.get(r, {}).get("intent") == "undeclared"] - stale = sorted(r for r in expected if rungs.get(r) not in (None, "na")) - return {"rungs": na, "gaps": gaps, "stale_declared": stale} - - -def cap_intent(rungs: dict[str, str], level: int, cap_reason: str, na_info: dict) -> str: - """A short clause explaining the level cap when the capping rung is N/A: the declared reason if - intentional, a 'possible coverage gap' note if it's an undeclared gap-sensitive rung, else ''.""" - if not cap_reason: - return "" - capped = level_mod.RUNGS[level] if 0 <= level < len(level_mod.RUNGS) else None - if not capped or rungs.get(capped) != "na": - return "" - entry = na_info["rungs"].get(capped, {}) - if entry.get("intent") == "declared": - return f"intentional · {entry['reason']}" - if capped in GAP_SENSITIVE_RUNGS: - return "undeclared N/A — possible coverage gap (add a test or declare EXPECTED_NA)" - return "" + na = [r for r, st in rungs.items() if st == "na"] + intentional = {r: expected[r] for r in na if r in expected} + unintentional = sorted(r for r in na if r not in expected) + return {"intentional": intentional, "unintentional": unintentional} def build_results( @@ -286,8 +262,9 @@ def build_results( repo_local_passed=_repo_local_passed(records), ) lvl, cap_reason = level_mod.compute_level(rungs) - na_info = classify_na(rungs, expected_na) - intent = cap_intent(rungs, lvl, cap_reason, na_info) + # The rung that capped the climb (lowest non-pass), or None on a full climb — lets a consumer + # (card/badge) tell whether the cap was an intentional skip, an unintentional one, or a failure. + capped = level_mod.RUNGS[lvl] if cap_reason else None return { "schema": 1, "run_id": run_id(), @@ -298,9 +275,9 @@ def build_results( "finished": finished_ts, "level": lvl, "level_cap_reason": cap_reason, - "level_cap_intent": intent, + "level_cap_rung": capped, "rungs": rungs, - "na": na_info, + "skips": skips(rungs, expected_na), "stages": stages, "results": results, "flags": { diff --git a/runner/run_recipe_ci.py b/runner/run_recipe_ci.py index b10966a..d187cae 100644 --- a/runner/run_recipe_ci.py +++ b/runner/run_recipe_ci.py @@ -1254,27 +1254,18 @@ def main() -> int: file=sys.stderr, ) path = results_mod.write_results(data) - intent = data.get("level_cap_intent") or "" print( f"results.json written: {path} (level={data['level']}" - f"{' — ' + data['level_cap_reason'] if data['level_cap_reason'] else ''}" - f"{' [' + intent + ']' if intent else ''})", + f"{' — ' + data['level_cap_reason'] if data['level_cap_reason'] else ''})", flush=True, ) - # Surface the intentional-vs-accidental N/A signal in the CI log (non-blocking, R7): a - # gap-sensitive rung that is N/A but undeclared is a possible coverage hole; a stale - # EXPECTED_NA declares a tier N/A that actually ran. - na = data.get("na", {}) - for rung in na.get("gaps", []): + # Surface UNINTENTIONAL skips in the CI log (non-blocking, R7): a rung that was skipped (N/A) + # but is not in the recipe's intentional list — either add the missing coverage or declare it. + for rung in data.get("skips", {}).get("unintentional", []): print( - f"⚠ coverage: rung '{rung}' is N/A but not declared intentional — add a test or " - f"declare it in tests/{recipe}/recipe_meta.py EXPECTED_NA = {{'{rung}': ''}}.", - flush=True, - ) - for rung in na.get("stale_declared", []): - print( - f"⚠ stale EXPECTED_NA: rung '{rung}' is declared N/A but was actually exercised " - f"(status={data['rungs'].get(rung)}) — remove it from recipe_meta.EXPECTED_NA.", + f"⚠ coverage: rung '{rung}' was skipped (N/A) but is not declared intentional — add " + f"the missing test/label, or list it in tests/{recipe}/recipe_meta.py " + f"EXPECTED_NA = {{'{rung}': ''}}.", flush=True, ) except Exception as e: # noqa: BLE001 — results assembly is cosmetic; never fail a run on it (R7) @@ -1295,12 +1286,17 @@ def main() -> int: with open(html_path, "w", encoding="utf-8") as f: f.write(card_mod.render_card_html(data, screenshot_rel=data.get("screenshot"))) png = card_mod.render_card_png(html_path, os.path.join(run_artifact_dir, "summary.png")) + capped = data.get("level_cap_rung") + sk = data.get("skips", {}) + cap_skip = ( + "intentional" if capped in (sk.get("intentional") or {}) + else "unintentional" if capped in (sk.get("unintentional") or []) + else "" + ) with open(os.path.join(run_artifact_dir, "badge.svg"), "w", encoding="utf-8") as f: f.write( card_mod.level_badge_svg( - data["level"], - data.get("level_cap_reason", ""), - data.get("level_cap_intent", ""), + data["level"], data.get("level_cap_reason", ""), cap_skip ) ) print( diff --git a/tests/custom-html-tiny/recipe_meta.py b/tests/custom-html-tiny/recipe_meta.py index 25aac26..509317e 100644 --- a/tests/custom-html-tiny/recipe_meta.py +++ b/tests/custom-html-tiny/recipe_meta.py @@ -4,14 +4,14 @@ DEPLOY_TIMEOUT = 120 HTTP_TIMEOUT = 90 -# Intentionally-N/A tiers (reviewed opt-out, NOT a coverage gap). custom-html-tiny is a stateless -# static-web-server: it serves an ephemeral `content` volume that the harness seeds at deploy time -# (install_steps.sh) and holds no persistent or user data, so there is nothing to back up or restore. -# The recipe therefore declares no `backupbot.backup` label and the L3 backup/restore rung is N/A. -# Declaring it here marks that N/A as deliberate, so the run is annotated "intentional" instead of -# being flagged as a possible missing-coverage gap. (N/A still caps the level — the harness never -# claims a rung it did not verify; this only explains *why* the cap is expected.) +# Rungs this recipe INTENTIONALLY skips, each with a reason. Any rung that is skipped (N/A) and is +# NOT listed here is reported as an *unintentional* skip (a coverage gap to fill or declare). A skip +# still caps the level either way — the harness never claims a rung it did not verify; this only +# records that the skip is deliberate. custom-html-tiny is a stateless static-web-server, so: EXPECTED_NA = { "backup_restore": "stateless static file server: serves an ephemeral content volume seeded at " "deploy, with no persistent/user data to back up or restore (no backupbot.backup label)", + "integration": "no SSO/OIDC or cross-app surface — a static file server has no auth integration", + "recipe_local": "the upstream recipe ships no tests/ of its own; coverage is the cc-ci generic " + "install tier + the functional serve test", } diff --git a/tests/unit/test_card.py b/tests/unit/test_card.py index 1abce04..7862038 100644 --- a/tests/unit/test_card.py +++ b/tests/unit/test_card.py @@ -55,13 +55,13 @@ def test_badge_svg_wellformed(): assert "expected" not in svg and "gap?" not in svg -def test_badge_svg_differentiates_expected_vs_unexpected_skip(): - # declared intentional N/A capped the climb → muted "expected" third segment - exp = C.level_badge_svg(2, "L3 backup/restore N/A", "intentional · no persistent data") +def test_badge_svg_differentiates_intentional_vs_unintentional_skip(): + # an intentional (declared) skip capped the climb → muted "expected" third segment + exp = C.level_badge_svg(2, "L3 backup/restore N/A", "intentional") assert "level 2" in exp and "expected" in exp and C.EXPECT_COLOR in exp assert "gap?" not in exp - # undeclared gap-sensitive N/A → amber "gap?" third segment (an UNEXPECTED skip) - gap = C.level_badge_svg(2, "L3 backup/restore N/A", "undeclared N/A — possible coverage gap") + # an unintentional skip (not declared) → amber "gap?" third segment + gap = C.level_badge_svg(2, "L3 backup/restore N/A", "unintentional") assert "level 2" in gap and "gap?" in gap and C.GAP_COLOR in gap assert "expected" not in gap diff --git a/tests/unit/test_results.py b/tests/unit/test_results.py index 9da1aff..f034911 100644 --- a/tests/unit/test_results.py +++ b/tests/unit/test_results.py @@ -257,7 +257,7 @@ def test_build_results_capped_at_L1_on_upgrade_fail(tmp_path): assert "L2" in data["level_cap_reason"] -# ---- classify_na / cap_intent: intentional-vs-accidental N/A (operator request) ---- +# ---- skips: intentional (declared) vs unintentional (everything else skipped) ---- def _rungs(**kw): @@ -273,57 +273,32 @@ def _rungs(**kw): return base -def test_classify_na_declared_vs_undeclared(): +def test_skips_intentional_vs_unintentional(): rungs = _rungs(backup_restore="na", functional="na") - info = R.classify_na(rungs, {"backup_restore": "stateless static server"}) - # backup_restore is declared intentional; functional is an undeclared gap-sensitive N/A. - assert info["rungs"]["backup_restore"] == { - "intent": "declared", - "reason": "stateless static server", - } - assert info["rungs"]["functional"]["intent"] == "undeclared" - assert info["gaps"] == ["functional"] # backup_restore declared → not a gap - assert info["stale_declared"] == [] - # structurally-optional N/A (integration, recipe_local) are recorded but never flagged as gaps. - assert info["rungs"]["integration"]["intent"] == "undeclared" - assert "integration" not in info["gaps"] + sk = R.skips(rungs, {"backup_restore": "stateless static server"}) + # backup_restore is declared (intentional, with reason); everything else skipped is unintentional. + assert sk["intentional"] == {"backup_restore": "stateless static server"} + assert sk["unintentional"] == ["functional", "integration", "recipe_local"] -def test_classify_na_stale_declaration(): - # backup_restore actually ran (pass) but is declared N/A → stale opt-out, surfaced. +def test_skips_none_declared_all_unintentional(): + rungs = _rungs(backup_restore="na") + sk = R.skips(rungs, None) + assert sk["intentional"] == {} + assert sk["unintentional"] == ["backup_restore", "integration", "recipe_local"] + + +def test_skips_declaration_only_counts_when_actually_skipped(): + # backup_restore actually ran (pass) → not a skip, so a declaration for it is simply inert. rungs = _rungs(backup_restore="pass") - info = R.classify_na(rungs, {"backup_restore": "stale reason"}) - assert info["stale_declared"] == ["backup_restore"] - assert "backup_restore" not in info["rungs"] # not N/A, so not in the per-rung N/A map - - -def test_cap_intent_declared_explains_cap(): - # install+upgrade pass, backup_restore declared-N/A → caps at L2 with an intentional clause. - rungs = _rungs(backup_restore="na") - info = R.classify_na(rungs, {"backup_restore": "no persistent data"}) - intent = R.cap_intent(rungs, 2, "L3 backup/restore (data integrity) N/A", info) - assert intent == "intentional · no persistent data" - - -def test_cap_intent_undeclared_gap(): - rungs = _rungs(backup_restore="na") - info = R.classify_na(rungs, None) - intent = R.cap_intent(rungs, 2, "L3 backup/restore (data integrity) N/A", info) - assert "possible coverage gap" in intent - - -def test_cap_intent_blank_when_not_capped_on_na(): - rungs = _rungs() # full clean climb, capped only at integration (na, structurally optional) - info = R.classify_na(rungs, None) - # capping rung is integration (level 4) — structurally optional, so no intent clause. - assert R.cap_intent(rungs, 4, "L5 integration N/A", info) == "" - # and no cap at all → blank. - assert R.cap_intent(rungs, 6, "", info) == "" + sk = R.skips(rungs, {"backup_restore": "reason"}) + assert "backup_restore" not in sk["intentional"] + assert "backup_restore" not in sk["unintentional"] def test_build_results_threads_expected_na(tmp_path): # Mirrors custom-html-tiny post-change: install + a passing functional (custom) test, but no - # backup surface (backup_restore declared intentionally N/A). + # backup surface (backup_restore declared intentionally skipped). recs = [ { "tier": "install", @@ -347,23 +322,27 @@ def test_build_results_threads_expected_na(tmp_path): ref=None, records=recs, results=_results(backup="skip", restore="skip"), # custom=pass (default) → functional pass - backup_capable=False, # no backupbot label → backup_restore N/A + backup_capable=False, # no backupbot label → backup_restore skipped (N/A) declared=[], deps_ready=True, sso_unverified=False, clean_teardown=True, no_secret_leak=True, finished_ts=0.0, - expected_na={"backup_restore": "stateless static file server"}, + expected_na={ + "backup_restore": "stateless static file server", + "integration": "no SSO surface", + "recipe_local": "no upstream tests/", + }, ) - # backup_restore N/A still caps at L2 (never inflates) — even though functional passes above it, - # the gap caps the climb — but the cap is now annotated intentional rather than flagged. + # backup_restore skip still caps at L2 (never inflates) — even though functional passes above it, + # the skip caps the climb — but it's the declared (intentional) rung that capped. assert data["level"] == 2 assert "L3" in data["level_cap_reason"] - assert data["level_cap_intent"] == "intentional · stateless static file server" - assert data["na"]["rungs"]["backup_restore"]["intent"] == "declared" + assert data["level_cap_rung"] == "backup_restore" assert data["rungs"]["functional"] == "pass" - assert data["na"]["gaps"] == [] # functional now covered; backup_restore declared → no gaps + assert data["skips"]["intentional"]["backup_restore"] == "stateless static file server" + assert data["skips"]["unintentional"] == [] # every skip accounted for → fully clean def test_write_results_roundtrip(tmp_path): -- 2.49.0 From d20ad1e989b8c59ca51bd67fa2a3bf193afed4e1 Mon Sep 17 00:00:00 2001 From: autonomic-bot Date: Tue, 9 Jun 2026 02:42:05 +0000 Subject: [PATCH 5/7] =?UTF-8?q?feat(card):=20show=20skipped=20rungs=20as?= =?UTF-8?q?=20rows=20=E2=80=94=20INTENTIONAL=20SKIP=20(green)=20with=20rea?= =?UTF-8?q?son=20below?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per operator: intentional skips now render like a pass row but labelled 'INTENTIONAL SKIP' (muted green) with the declared reason on the line beneath; unintentional skips render amber 'UNINTENTIONAL SKIP' with a prompt to add a test or declare them. The cap line is back to just the level-cap reason (the per-rung reason now lives in the rows). Labelled, so it never reads as a PASS. Co-Authored-By: Claude Opus 4.8 --- runner/harness/card.py | 49 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 40 insertions(+), 9 deletions(-) diff --git a/runner/harness/card.py b/runner/harness/card.py index e57d517..21418dc 100644 --- a/runner/harness/card.py +++ b/runner/harness/card.py @@ -141,6 +141,43 @@ def _stage_rows(stages: list[dict]) -> str: return "\n".join(rows) or 'no stages' +# Friendly rung labels for the skip rows. +RUNG_LABEL = { + "install": "install", + "upgrade": "upgrade", + "backup_restore": "backup/restore", + "functional": "functional", + "integration": "integration", + "recipe_local": "recipe-local", +} +SKIP_GREEN = "#57ab5a" # muted green — an intentional skip reads like a pass (but labelled, never inflating) + + +def _skip_rows(skips: dict) -> str: + """Render SKIPPED rungs as stage-like rows. An intentional (declared) skip looks like a pass row + but its status says 'INTENTIONAL SKIP' (muted green) with the declared reason on the line below; + an unintentional skip is amber 'UNINTENTIONAL SKIP' with a prompt to add a test or declare it.""" + rows = [] + for rung, reason in (skips.get("intentional") or {}).items(): + rows.append( + f'' + f'{html.escape(RUNG_LABEL.get(rung, rung))}' + f'intentional skip' + ) + rows.append(f'{html.escape(reason)}') + for rung in skips.get("unintentional") or []: + rows.append( + f'' + f'{html.escape(RUNG_LABEL.get(rung, rung))}' + f'unintentional skip' + ) + rows.append( + 'not declared in EXPECTED_NA — add the ' + "missing test/label, or declare the skip with a reason" + ) + return "\n".join(rows) + + def render_card_html(data: dict, screenshot_rel: str | None = "screenshot.png") -> str: """Build the summary-card HTML from a results.json dict. `screenshot_rel` is the relative path to the screenshot PNG (same dir as the card) — omitted from the card if None / absent. @@ -151,15 +188,8 @@ def render_card_html(data: dict, screenshot_rel: str | None = "screenshot.png") version = html.escape(str(data.get("version") or data.get("ref") or "")) level = int(data.get("level", 0)) cap_reason = str(data.get("level_cap_reason") or "") - # Annotate the cap line by whether the capping rung was an intentional skip (declared, with its - # reason) or an unintentional one (skipped but not declared). - capped = data.get("level_cap_rung") - sk = data.get("skips", {}) or {} - if capped and capped in (sk.get("intentional") or {}): - cap_reason += f" · intentional: {sk['intentional'][capped]}" - elif capped and capped in (sk.get("unintentional") or []): - cap_reason += " · unintentional skip (no EXPECTED_NA — add a test or declare it)" cap = html.escape(cap_reason) + sk = data.get("skips", {}) or {} color = level_color(level) flags = data.get("flags", {}) or {} flag_bits = [] @@ -175,7 +205,7 @@ def render_card_html(data: dict, screenshot_rel: str | None = "screenshot.png") if show_shot else '
no screenshot
' ) - rows = _stage_rows(data.get("stages", [])) + rows = _stage_rows(data.get("stages", [])) + "\n" + _skip_rows(sk) return f"""