From 3b0a3d14ea0afd23133d9e7bc94d2773de2f7ff5 Mon Sep 17 00:00:00 2001 From: autonomic-bot Date: Tue, 9 Jun 2026 01:59:28 +0000 Subject: [PATCH] feat(harness): declare intentional N/A tiers + custom-html-tiny functional test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two changes the operator asked for after noticing custom-html-tiny PR #6 has no backup/restore or functional coverage: 1) Intentional-vs-accidental N/A. A recipe can now declare recipe_meta.EXPECTED_NA = {rung: reason} to mark a tier as deliberately not applicable (e.g. a stateless static server has no backup surface). N/A still caps the level — the harness never claims a rung it did not verify — but the run is now annotated 'intentional · ' instead of being indistinguishable from a forgotten test. An *undeclared* N/A on a gap-sensitive rung (backup_restore, functional) is surfaced as a 'possible coverage gap', and a stale EXPECTED_NA (declared N/A but actually exercised) is surfaced too. All non-blocking (R7): results.json gains level_cap_intent + an block, the summary card shows the clause, and the CI log prints the gap/stale warnings. (results.classify_na/cap_intent are pure + unit-tested; level.py untouched.) custom-html-tiny declares backup_restore intentionally N/A. 2) custom-html-tiny functional test: writes a random file into the served content volume (via the volume mountpoint, like install_steps.sh, since the SWS image is shell-less), asserts exact-byte round-trip + a real 404 on a missing path — proving the static-web-server actually serves the volume, not a 200-everything fallback. Co-Authored-By: Claude Opus 4.8 --- runner/harness/card.py | 4 +- runner/harness/results.py | 59 ++++++++++- runner/run_recipe_ci.py | 22 ++++- .../functional/test_serves_content.py | 87 ++++++++++++++++ tests/custom-html-tiny/recipe_meta.py | 12 +++ tests/unit/test_results.py | 98 +++++++++++++++++++ 6 files changed, 279 insertions(+), 3 deletions(-) create mode 100644 tests/custom-html-tiny/functional/test_serves_content.py diff --git a/runner/harness/card.py b/runner/harness/card.py index 44cf4ae..36b717f 100644 --- a/runner/harness/card.py +++ b/runner/harness/card.py @@ -116,7 +116,9 @@ def render_card_html(data: dict, screenshot_rel: str | None = "screenshot.png") recipe = html.escape(str(data.get("recipe", "?"))) version = html.escape(str(data.get("version") or data.get("ref") or "")) level = int(data.get("level", 0)) - cap = html.escape(str(data.get("level_cap_reason") or "")) + cap_reason = str(data.get("level_cap_reason") or "") + cap_intent = str(data.get("level_cap_intent") or "") + cap = html.escape(cap_reason + (f" · {cap_intent}" if cap_intent else "")) color = level_color(level) flags = data.get("flags", {}) or {} flag_bits = [] diff --git a/runner/harness/results.py b/runner/harness/results.py index c3fae07..3e6fed7 100644 --- a/runner/harness/results.py +++ b/runner/harness/results.py @@ -200,6 +200,56 @@ def derive_rungs( return rungs +# Rungs where an *undeclared* N/A is suspicious — it usually means a recipe SHOULD have this coverage +# but nobody added it (a backup label, a functional test), i.e. an accidental gap rather than a real +# property of the recipe. For these, an undeclared N/A is surfaced as a "possible coverage gap" unless +# the recipe declares it intentional via recipe_meta.EXPECTED_NA. The other rungs (upgrade — only one +# published version; integration — no SSO surface; recipe_local — no repo-local tests) are +# *structurally* optional: an N/A there is the normal case and is not flagged. +GAP_SENSITIVE_RUNGS = ("backup_restore", "functional") + + +def classify_na(rungs: dict[str, str], expected_na: dict | None) -> dict: + """Distinguish *intentionally* N/A rungs from *accidentally* missing ones (operator request). + + A recipe declares intentional N/A in `recipe_meta.EXPECTED_NA = {rung: reason}`. N/A always caps + the level either way (the harness never inflates — a rung that wasn't verified wasn't verified); + this only EXPLAINS the cap so a reviewer can tell "this recipe legitimately has no backup surface" + from "someone forgot to add the backup test". Returns: + { "rungs": {rung: {"intent": "declared"|"undeclared", "reason": str}}, # one per N/A rung + "gaps": [rung, ...], # gap-sensitive rungs that are N/A and NOT declared + "stale_declared": [rung, ...] } # rungs declared N/A but actually exercised (stale opt-out) + """ + expected = {str(k): str(v) for k, v in (expected_na or {}).items()} + na: dict[str, dict] = {} + for rung, st in rungs.items(): + if st != "na": + continue + if rung in expected: + na[rung] = {"intent": "declared", "reason": expected[rung]} + else: + na[rung] = {"intent": "undeclared", "reason": ""} + gaps = [r for r in GAP_SENSITIVE_RUNGS if na.get(r, {}).get("intent") == "undeclared"] + stale = sorted(r for r in expected if rungs.get(r) not in (None, "na")) + return {"rungs": na, "gaps": gaps, "stale_declared": stale} + + +def cap_intent(rungs: dict[str, str], level: int, cap_reason: str, na_info: dict) -> str: + """A short clause explaining the level cap when the capping rung is N/A: the declared reason if + intentional, a 'possible coverage gap' note if it's an undeclared gap-sensitive rung, else ''.""" + if not cap_reason: + return "" + capped = level_mod.RUNGS[level] if 0 <= level < len(level_mod.RUNGS) else None + if not capped or rungs.get(capped) != "na": + return "" + entry = na_info["rungs"].get(capped, {}) + if entry.get("intent") == "declared": + return f"intentional · {entry['reason']}" + if capped in GAP_SENSITIVE_RUNGS: + return "undeclared N/A — possible coverage gap (add a test or declare EXPECTED_NA)" + return "" + + def build_results( *, recipe: str, @@ -217,9 +267,12 @@ def build_results( finished_ts: float | None, screenshot: str | None = None, summary_card: str | None = None, + expected_na: dict | None = None, ) -> dict: """Assemble the full results.json dict (no I/O). `finished_ts` is passed in (the orchestrator - stamps it) so this stays pure and deterministic for unit tests.""" + stamps it) so this stays pure and deterministic for unit tests. `expected_na` is the recipe's + declared intentional-N/A map (recipe_meta.EXPECTED_NA) used to distinguish a deliberate skip from + accidentally-missing coverage.""" stages = collect_stages(records) has_custom = any(r["tier"] == "custom" for r in records) rungs = derive_rungs( @@ -233,6 +286,8 @@ def build_results( repo_local_passed=_repo_local_passed(records), ) lvl, cap_reason = level_mod.compute_level(rungs) + na_info = classify_na(rungs, expected_na) + intent = cap_intent(rungs, lvl, cap_reason, na_info) return { "schema": 1, "run_id": run_id(), @@ -243,7 +298,9 @@ def build_results( "finished": finished_ts, "level": lvl, "level_cap_reason": cap_reason, + "level_cap_intent": intent, "rungs": rungs, + "na": na_info, "stages": stages, "results": results, "flags": { diff --git a/runner/run_recipe_ci.py b/runner/run_recipe_ci.py index 11fadd0..becc9c4 100644 --- a/runner/run_recipe_ci.py +++ b/runner/run_recipe_ci.py @@ -200,6 +200,7 @@ def _load_meta(recipe: str) -> dict: for k in list(meta) + [ "BACKUP_CAPABLE", "SKIP_GENERIC", + "EXPECTED_NA", "OIDC_AT_INSTALL", "READY_PROBE", "UPGRADE_BASE_VERSION", @@ -1241,6 +1242,7 @@ def main() -> int: no_secret_leak=True, # narrowed below by an actual scan of the serialised artifact screenshot=screenshot_rel, # Phase 3 U1 (R4): relative PNG name iff capture succeeded finished_ts=time.time(), + expected_na=meta.get("EXPECTED_NA"), # declared intentional-N/A map (recipe_meta) ) # Real (if narrow) leak check: no known infra-secret value may appear in the artifact (R7). blob = json.dumps(data) @@ -1252,11 +1254,29 @@ def main() -> int: file=sys.stderr, ) path = results_mod.write_results(data) + intent = data.get("level_cap_intent") or "" print( f"results.json written: {path} (level={data['level']}" - f"{' — ' + data['level_cap_reason'] if data['level_cap_reason'] else ''})", + f"{' — ' + data['level_cap_reason'] if data['level_cap_reason'] else ''}" + f"{' [' + intent + ']' if intent else ''})", flush=True, ) + # Surface the intentional-vs-accidental N/A signal in the CI log (non-blocking, R7): a + # gap-sensitive rung that is N/A but undeclared is a possible coverage hole; a stale + # EXPECTED_NA declares a tier N/A that actually ran. + na = data.get("na", {}) + for rung in na.get("gaps", []): + print( + f"⚠ coverage: rung '{rung}' is N/A but not declared intentional — add a test or " + f"declare it in tests/{recipe}/recipe_meta.py EXPECTED_NA = {{'{rung}': ''}}.", + flush=True, + ) + for rung in na.get("stale_declared", []): + print( + f"⚠ stale EXPECTED_NA: rung '{rung}' is declared N/A but was actually exercised " + f"(status={data['rungs'].get(rung)}) — remove it from recipe_meta.EXPECTED_NA.", + flush=True, + ) except Exception as e: # noqa: BLE001 — results assembly is cosmetic; never fail a run on it (R7) print( f"!! results.json assembly failed (non-fatal, verdict unaffected): {_scrub(str(e))}", diff --git a/tests/custom-html-tiny/functional/test_serves_content.py b/tests/custom-html-tiny/functional/test_serves_content.py new file mode 100644 index 0000000..cb30917 --- /dev/null +++ b/tests/custom-html-tiny/functional/test_serves_content.py @@ -0,0 +1,87 @@ +"""custom-html-tiny — recipe-specific functional test (static-web-server). + +Proves the deployed static-web-server is *actually serving files from its `content` volume* with real +file-server semantics, not merely returning 200 from a Traefik fallback or a generic stub: + + 1. exact-byte round-trip — write a uniquely-named file with random content into the served volume, + fetch it over HTTPS, and assert the bytes come back verbatim. Non-vacuous: the content is random + per run, so only a server that reads this file off the volume can pass. + 2. real 404 — a random non-existent path returns 404, proving directory/file semantics (a + 200-everything stub or mis-routed host would not 404). + +The recipe's image (joseluisq/static-web-server) is shell-less (scratch-based) and its content volume +is seeded via the install_steps.sh host-mountpoint mechanism — so this test writes its probe file the +same way (resolve the swarm volume's mountpoint with `docker volume inspect`, write directly) rather +than `docker exec`-ing in a container that has no shell. + +Runs in the custom tier against the shared post-install deployment (the `live_app` fixture is its +per-run domain). Mirrors install_steps.sh: the app's content volume is named `_content`, where +`stack` is the domain with dots replaced by underscores; HTTP_SUBDIR is empty, so the volume root is +served at `/`. +""" + +from __future__ import annotations + +import contextlib +import os +import ssl +import subprocess +import urllib.error +import urllib.request +import uuid + + +def _served_dir(domain: str) -> str: + """Host mountpoint of the app's served `content` volume (same naming as install_steps.sh).""" + vol = f"{domain.replace('.', '_')}_content" + out = subprocess.run( + ["docker", "volume", "inspect", vol, "--format", "{{.Mountpoint}}"], + capture_output=True, + text=True, + check=True, + ) + mountpoint = out.stdout.strip() + assert mountpoint, f"could not resolve mountpoint for volume {vol!r}" + return mountpoint + + +def _get(url: str) -> tuple[int, bytes]: + """GET the URL; return (status, body). A 4xx/5xx is returned, not raised (we assert on the code). + TLS verification is relaxed: the served wildcard cert is validated separately by the infra check; + here we care only about the app's response.""" + ctx = ssl.create_default_context() + ctx.check_hostname = False + ctx.verify_mode = ssl.CERT_NONE + try: + with urllib.request.urlopen(url, timeout=20, context=ctx) as resp: + return resp.status, resp.read() + except urllib.error.HTTPError as e: + return e.code, e.read() + + +def test_static_file_roundtrip_and_404(live_app): + """Write a random file into the served volume → fetch it → bytes match; and a missing path 404s.""" + served = _served_dir(live_app) + token = uuid.uuid4().hex + name = f"ccci-probe-{token}.txt" + body = f"cc-ci-functional-{token}\n".encode() + path = os.path.join(served, name) + with open(path, "wb") as fh: + fh.write(body) + try: + status, got = _get(f"https://{live_app}/{name}") + assert status == 200, f"served probe file returned {status} (expected 200)" + assert got == body, ( + f"content round-trip mismatch: served {got!r}, wrote {body!r} " + "(static-web-server not serving the content volume?)" + ) + + # A random non-existent path must 404 — proves real static-file semantics, distinguishing a + # working server from a 200-everything stub or a mis-routed Traefik fallback. + miss_status, _ = _get(f"https://{live_app}/ccci-missing-{uuid.uuid4().hex}.txt") + assert miss_status == 404, ( + f"missing path returned {miss_status} (expected 404 — generic 200-returner / mis-route?)" + ) + finally: + with contextlib.suppress(OSError): + os.remove(path) diff --git a/tests/custom-html-tiny/recipe_meta.py b/tests/custom-html-tiny/recipe_meta.py index 44603a9..25aac26 100644 --- a/tests/custom-html-tiny/recipe_meta.py +++ b/tests/custom-html-tiny/recipe_meta.py @@ -3,3 +3,15 @@ # (DG5) is detected quickly instead of waiting the default 300s HTTP timeout. DEPLOY_TIMEOUT = 120 HTTP_TIMEOUT = 90 + +# Intentionally-N/A tiers (reviewed opt-out, NOT a coverage gap). custom-html-tiny is a stateless +# static-web-server: it serves an ephemeral `content` volume that the harness seeds at deploy time +# (install_steps.sh) and holds no persistent or user data, so there is nothing to back up or restore. +# The recipe therefore declares no `backupbot.backup` label and the L3 backup/restore rung is N/A. +# Declaring it here marks that N/A as deliberate, so the run is annotated "intentional" instead of +# being flagged as a possible missing-coverage gap. (N/A still caps the level — the harness never +# claims a rung it did not verify; this only explains *why* the cap is expected.) +EXPECTED_NA = { + "backup_restore": "stateless static file server: serves an ephemeral content volume seeded at " + "deploy, with no persistent/user data to back up or restore (no backupbot.backup label)", +} diff --git a/tests/unit/test_results.py b/tests/unit/test_results.py index d8bdd51..b52cd82 100644 --- a/tests/unit/test_results.py +++ b/tests/unit/test_results.py @@ -257,6 +257,104 @@ def test_build_results_capped_at_L1_on_upgrade_fail(tmp_path): assert "L2" in data["level_cap_reason"] +# ---- classify_na / cap_intent: intentional-vs-accidental N/A (operator request) ---- + + +def _rungs(**kw): + base = { + "install": "pass", + "upgrade": "pass", + "backup_restore": "pass", + "functional": "pass", + "integration": "na", + "recipe_local": "na", + } + base.update(kw) + return base + + +def test_classify_na_declared_vs_undeclared(): + rungs = _rungs(backup_restore="na", functional="na") + info = R.classify_na(rungs, {"backup_restore": "stateless static server"}) + # backup_restore is declared intentional; functional is an undeclared gap-sensitive N/A. + assert info["rungs"]["backup_restore"] == { + "intent": "declared", + "reason": "stateless static server", + } + assert info["rungs"]["functional"]["intent"] == "undeclared" + assert info["gaps"] == ["functional"] # backup_restore declared → not a gap + assert info["stale_declared"] == [] + # structurally-optional N/A (integration, recipe_local) are recorded but never flagged as gaps. + assert info["rungs"]["integration"]["intent"] == "undeclared" + assert "integration" not in info["gaps"] + + +def test_classify_na_stale_declaration(): + # backup_restore actually ran (pass) but is declared N/A → stale opt-out, surfaced. + rungs = _rungs(backup_restore="pass") + info = R.classify_na(rungs, {"backup_restore": "stale reason"}) + assert info["stale_declared"] == ["backup_restore"] + assert "backup_restore" not in info["rungs"] # not N/A, so not in the per-rung N/A map + + +def test_cap_intent_declared_explains_cap(): + # install+upgrade pass, backup_restore declared-N/A → caps at L2 with an intentional clause. + rungs = _rungs(backup_restore="na") + info = R.classify_na(rungs, {"backup_restore": "no persistent data"}) + intent = R.cap_intent(rungs, 2, "L3 backup/restore (data integrity) N/A", info) + assert intent == "intentional · no persistent data" + + +def test_cap_intent_undeclared_gap(): + rungs = _rungs(backup_restore="na") + info = R.classify_na(rungs, None) + intent = R.cap_intent(rungs, 2, "L3 backup/restore (data integrity) N/A", info) + assert "possible coverage gap" in intent + + +def test_cap_intent_blank_when_not_capped_on_na(): + rungs = _rungs() # full clean climb, capped only at integration (na, structurally optional) + info = R.classify_na(rungs, None) + # capping rung is integration (level 4) — structurally optional, so no intent clause. + assert R.cap_intent(rungs, 4, "L5 integration N/A", info) == "" + # and no cap at all → blank. + assert R.cap_intent(rungs, 6, "", info) == "" + + +def test_build_results_threads_expected_na(tmp_path): + recs = [ + { + "tier": "install", + "source": "generic", + "file": "g/test_install.py", + "rc": 0, + "junit": _write(tmp_path, "i.xml", JUNIT_PASS), + } + ] + data = R.build_results( + recipe="custom-html-tiny", + version="1.1.0", + pr="0", + ref=None, + records=recs, + results=_results(backup="skip", restore="skip", custom="skip"), + backup_capable=False, # no backupbot label → backup_restore N/A + declared=[], + deps_ready=True, + sso_unverified=False, + clean_teardown=True, + no_secret_leak=True, + finished_ts=0.0, + expected_na={"backup_restore": "stateless static file server"}, + ) + # N/A still caps at L2 (never inflates), but now annotated intentional rather than flagged. + assert data["level"] == 2 + assert "L3" in data["level_cap_reason"] + assert data["level_cap_intent"] == "intentional · stateless static file server" + assert data["na"]["rungs"]["backup_restore"]["intent"] == "declared" + assert data["na"]["gaps"] == [] + + def test_write_results_roundtrip(tmp_path): data = {"run_id": "42", "level": 3, "stages": []} path = R.write_results(data, runs_dir_override=str(tmp_path))