feat(harness): declare intentional N/A tiers + custom-html-tiny functional test
Some checks failed
continuous-integration/drone/push Build is failing

Two changes the operator asked for after noticing custom-html-tiny PR #6 has no
backup/restore or functional coverage:

1) Intentional-vs-accidental N/A. A recipe can now declare
   recipe_meta.EXPECTED_NA = {rung: reason} to mark a tier as deliberately not
   applicable (e.g. a stateless static server has no backup surface). N/A still
   caps the level — the harness never claims a rung it did not verify — but the
   run is now annotated 'intentional · <reason>' instead of being indistinguishable
   from a forgotten test. An *undeclared* N/A on a gap-sensitive rung
   (backup_restore, functional) is surfaced as a 'possible coverage gap', and a
   stale EXPECTED_NA (declared N/A but actually exercised) is surfaced too. All
   non-blocking (R7): results.json gains level_cap_intent + an  block, the
   summary card shows the clause, and the CI log prints the gap/stale warnings.
   (results.classify_na/cap_intent are pure + unit-tested; level.py untouched.)

   custom-html-tiny declares backup_restore intentionally N/A.

2) custom-html-tiny functional test: writes a random file into the served content
   volume (via the volume mountpoint, like install_steps.sh, since the SWS image
   is shell-less), asserts exact-byte round-trip + a real 404 on a missing path —
   proving the static-web-server actually serves the volume, not a 200-everything
   fallback.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
autonomic-bot
2026-06-09 01:59:28 +00:00
parent f5a6f7196f
commit 3b0a3d14ea
6 changed files with 279 additions and 3 deletions

View File

@ -116,7 +116,9 @@ def render_card_html(data: dict, screenshot_rel: str | None = "screenshot.png")
recipe = html.escape(str(data.get("recipe", "?")))
version = html.escape(str(data.get("version") or data.get("ref") or ""))
level = int(data.get("level", 0))
cap = html.escape(str(data.get("level_cap_reason") or ""))
cap_reason = str(data.get("level_cap_reason") or "")
cap_intent = str(data.get("level_cap_intent") or "")
cap = html.escape(cap_reason + (f" · {cap_intent}" if cap_intent else ""))
color = level_color(level)
flags = data.get("flags", {}) or {}
flag_bits = []

View File

@ -200,6 +200,56 @@ def derive_rungs(
return rungs
# Rungs where an *undeclared* N/A is suspicious — it usually means a recipe SHOULD have this coverage
# but nobody added it (a backup label, a functional test), i.e. an accidental gap rather than a real
# property of the recipe. For these, an undeclared N/A is surfaced as a "possible coverage gap" unless
# the recipe declares it intentional via recipe_meta.EXPECTED_NA. The other rungs (upgrade — only one
# published version; integration — no SSO surface; recipe_local — no repo-local tests) are
# *structurally* optional: an N/A there is the normal case and is not flagged.
GAP_SENSITIVE_RUNGS = ("backup_restore", "functional")
def classify_na(rungs: dict[str, str], expected_na: dict | None) -> dict:
"""Distinguish *intentionally* N/A rungs from *accidentally* missing ones (operator request).
A recipe declares intentional N/A in `recipe_meta.EXPECTED_NA = {rung: reason}`. N/A always caps
the level either way (the harness never inflates — a rung that wasn't verified wasn't verified);
this only EXPLAINS the cap so a reviewer can tell "this recipe legitimately has no backup surface"
from "someone forgot to add the backup test". Returns:
{ "rungs": {rung: {"intent": "declared"|"undeclared", "reason": str}}, # one per N/A rung
"gaps": [rung, ...], # gap-sensitive rungs that are N/A and NOT declared
"stale_declared": [rung, ...] } # rungs declared N/A but actually exercised (stale opt-out)
"""
expected = {str(k): str(v) for k, v in (expected_na or {}).items()}
na: dict[str, dict] = {}
for rung, st in rungs.items():
if st != "na":
continue
if rung in expected:
na[rung] = {"intent": "declared", "reason": expected[rung]}
else:
na[rung] = {"intent": "undeclared", "reason": ""}
gaps = [r for r in GAP_SENSITIVE_RUNGS if na.get(r, {}).get("intent") == "undeclared"]
stale = sorted(r for r in expected if rungs.get(r) not in (None, "na"))
return {"rungs": na, "gaps": gaps, "stale_declared": stale}
def cap_intent(rungs: dict[str, str], level: int, cap_reason: str, na_info: dict) -> str:
"""A short clause explaining the level cap when the capping rung is N/A: the declared reason if
intentional, a 'possible coverage gap' note if it's an undeclared gap-sensitive rung, else ''."""
if not cap_reason:
return ""
capped = level_mod.RUNGS[level] if 0 <= level < len(level_mod.RUNGS) else None
if not capped or rungs.get(capped) != "na":
return ""
entry = na_info["rungs"].get(capped, {})
if entry.get("intent") == "declared":
return f"intentional · {entry['reason']}"
if capped in GAP_SENSITIVE_RUNGS:
return "undeclared N/A — possible coverage gap (add a test or declare EXPECTED_NA)"
return ""
def build_results(
*,
recipe: str,
@ -217,9 +267,12 @@ def build_results(
finished_ts: float | None,
screenshot: str | None = None,
summary_card: str | None = None,
expected_na: dict | None = None,
) -> dict:
"""Assemble the full results.json dict (no I/O). `finished_ts` is passed in (the orchestrator
stamps it) so this stays pure and deterministic for unit tests."""
stamps it) so this stays pure and deterministic for unit tests. `expected_na` is the recipe's
declared intentional-N/A map (recipe_meta.EXPECTED_NA) used to distinguish a deliberate skip from
accidentally-missing coverage."""
stages = collect_stages(records)
has_custom = any(r["tier"] == "custom" for r in records)
rungs = derive_rungs(
@ -233,6 +286,8 @@ def build_results(
repo_local_passed=_repo_local_passed(records),
)
lvl, cap_reason = level_mod.compute_level(rungs)
na_info = classify_na(rungs, expected_na)
intent = cap_intent(rungs, lvl, cap_reason, na_info)
return {
"schema": 1,
"run_id": run_id(),
@ -243,7 +298,9 @@ def build_results(
"finished": finished_ts,
"level": lvl,
"level_cap_reason": cap_reason,
"level_cap_intent": intent,
"rungs": rungs,
"na": na_info,
"stages": stages,
"results": results,
"flags": {

View File

@ -200,6 +200,7 @@ def _load_meta(recipe: str) -> dict:
for k in list(meta) + [
"BACKUP_CAPABLE",
"SKIP_GENERIC",
"EXPECTED_NA",
"OIDC_AT_INSTALL",
"READY_PROBE",
"UPGRADE_BASE_VERSION",
@ -1241,6 +1242,7 @@ def main() -> int:
no_secret_leak=True, # narrowed below by an actual scan of the serialised artifact
screenshot=screenshot_rel, # Phase 3 U1 (R4): relative PNG name iff capture succeeded
finished_ts=time.time(),
expected_na=meta.get("EXPECTED_NA"), # declared intentional-N/A map (recipe_meta)
)
# Real (if narrow) leak check: no known infra-secret value may appear in the artifact (R7).
blob = json.dumps(data)
@ -1252,11 +1254,29 @@ def main() -> int:
file=sys.stderr,
)
path = results_mod.write_results(data)
intent = data.get("level_cap_intent") or ""
print(
f"results.json written: {path} (level={data['level']}"
f"{'' + data['level_cap_reason'] if data['level_cap_reason'] else ''})",
f"{'' + data['level_cap_reason'] if data['level_cap_reason'] else ''}"
f"{' [' + intent + ']' if intent else ''})",
flush=True,
)
# Surface the intentional-vs-accidental N/A signal in the CI log (non-blocking, R7): a
# gap-sensitive rung that is N/A but undeclared is a possible coverage hole; a stale
# EXPECTED_NA declares a tier N/A that actually ran.
na = data.get("na", {})
for rung in na.get("gaps", []):
print(
f"⚠ coverage: rung '{rung}' is N/A but not declared intentional — add a test or "
f"declare it in tests/{recipe}/recipe_meta.py EXPECTED_NA = {{'{rung}': '<why>'}}.",
flush=True,
)
for rung in na.get("stale_declared", []):
print(
f"⚠ stale EXPECTED_NA: rung '{rung}' is declared N/A but was actually exercised "
f"(status={data['rungs'].get(rung)}) — remove it from recipe_meta.EXPECTED_NA.",
flush=True,
)
except Exception as e: # noqa: BLE001 — results assembly is cosmetic; never fail a run on it (R7)
print(
f"!! results.json assembly failed (non-fatal, verdict unaffected): {_scrub(str(e))}",