feat(harness): declare intentional N/A tiers + custom-html-tiny functional test
Some checks failed
continuous-integration/drone/push Build is failing
Some checks failed
continuous-integration/drone/push Build is failing
Two changes the operator asked for after noticing custom-html-tiny PR #6 has no backup/restore or functional coverage: 1) Intentional-vs-accidental N/A. A recipe can now declare recipe_meta.EXPECTED_NA = {rung: reason} to mark a tier as deliberately not applicable (e.g. a stateless static server has no backup surface). N/A still caps the level — the harness never claims a rung it did not verify — but the run is now annotated 'intentional · <reason>' instead of being indistinguishable from a forgotten test. An *undeclared* N/A on a gap-sensitive rung (backup_restore, functional) is surfaced as a 'possible coverage gap', and a stale EXPECTED_NA (declared N/A but actually exercised) is surfaced too. All non-blocking (R7): results.json gains level_cap_intent + an block, the summary card shows the clause, and the CI log prints the gap/stale warnings. (results.classify_na/cap_intent are pure + unit-tested; level.py untouched.) custom-html-tiny declares backup_restore intentionally N/A. 2) custom-html-tiny functional test: writes a random file into the served content volume (via the volume mountpoint, like install_steps.sh, since the SWS image is shell-less), asserts exact-byte round-trip + a real 404 on a missing path — proving the static-web-server actually serves the volume, not a 200-everything fallback. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@ -116,7 +116,9 @@ def render_card_html(data: dict, screenshot_rel: str | None = "screenshot.png")
|
||||
recipe = html.escape(str(data.get("recipe", "?")))
|
||||
version = html.escape(str(data.get("version") or data.get("ref") or ""))
|
||||
level = int(data.get("level", 0))
|
||||
cap = html.escape(str(data.get("level_cap_reason") or ""))
|
||||
cap_reason = str(data.get("level_cap_reason") or "")
|
||||
cap_intent = str(data.get("level_cap_intent") or "")
|
||||
cap = html.escape(cap_reason + (f" · {cap_intent}" if cap_intent else ""))
|
||||
color = level_color(level)
|
||||
flags = data.get("flags", {}) or {}
|
||||
flag_bits = []
|
||||
|
||||
@ -200,6 +200,56 @@ def derive_rungs(
|
||||
return rungs
|
||||
|
||||
|
||||
# Rungs where an *undeclared* N/A is suspicious — it usually means a recipe SHOULD have this coverage
|
||||
# but nobody added it (a backup label, a functional test), i.e. an accidental gap rather than a real
|
||||
# property of the recipe. For these, an undeclared N/A is surfaced as a "possible coverage gap" unless
|
||||
# the recipe declares it intentional via recipe_meta.EXPECTED_NA. The other rungs (upgrade — only one
|
||||
# published version; integration — no SSO surface; recipe_local — no repo-local tests) are
|
||||
# *structurally* optional: an N/A there is the normal case and is not flagged.
|
||||
GAP_SENSITIVE_RUNGS = ("backup_restore", "functional")
|
||||
|
||||
|
||||
def classify_na(rungs: dict[str, str], expected_na: dict | None) -> dict:
|
||||
"""Distinguish *intentionally* N/A rungs from *accidentally* missing ones (operator request).
|
||||
|
||||
A recipe declares intentional N/A in `recipe_meta.EXPECTED_NA = {rung: reason}`. N/A always caps
|
||||
the level either way (the harness never inflates — a rung that wasn't verified wasn't verified);
|
||||
this only EXPLAINS the cap so a reviewer can tell "this recipe legitimately has no backup surface"
|
||||
from "someone forgot to add the backup test". Returns:
|
||||
{ "rungs": {rung: {"intent": "declared"|"undeclared", "reason": str}}, # one per N/A rung
|
||||
"gaps": [rung, ...], # gap-sensitive rungs that are N/A and NOT declared
|
||||
"stale_declared": [rung, ...] } # rungs declared N/A but actually exercised (stale opt-out)
|
||||
"""
|
||||
expected = {str(k): str(v) for k, v in (expected_na or {}).items()}
|
||||
na: dict[str, dict] = {}
|
||||
for rung, st in rungs.items():
|
||||
if st != "na":
|
||||
continue
|
||||
if rung in expected:
|
||||
na[rung] = {"intent": "declared", "reason": expected[rung]}
|
||||
else:
|
||||
na[rung] = {"intent": "undeclared", "reason": ""}
|
||||
gaps = [r for r in GAP_SENSITIVE_RUNGS if na.get(r, {}).get("intent") == "undeclared"]
|
||||
stale = sorted(r for r in expected if rungs.get(r) not in (None, "na"))
|
||||
return {"rungs": na, "gaps": gaps, "stale_declared": stale}
|
||||
|
||||
|
||||
def cap_intent(rungs: dict[str, str], level: int, cap_reason: str, na_info: dict) -> str:
|
||||
"""A short clause explaining the level cap when the capping rung is N/A: the declared reason if
|
||||
intentional, a 'possible coverage gap' note if it's an undeclared gap-sensitive rung, else ''."""
|
||||
if not cap_reason:
|
||||
return ""
|
||||
capped = level_mod.RUNGS[level] if 0 <= level < len(level_mod.RUNGS) else None
|
||||
if not capped or rungs.get(capped) != "na":
|
||||
return ""
|
||||
entry = na_info["rungs"].get(capped, {})
|
||||
if entry.get("intent") == "declared":
|
||||
return f"intentional · {entry['reason']}"
|
||||
if capped in GAP_SENSITIVE_RUNGS:
|
||||
return "undeclared N/A — possible coverage gap (add a test or declare EXPECTED_NA)"
|
||||
return ""
|
||||
|
||||
|
||||
def build_results(
|
||||
*,
|
||||
recipe: str,
|
||||
@ -217,9 +267,12 @@ def build_results(
|
||||
finished_ts: float | None,
|
||||
screenshot: str | None = None,
|
||||
summary_card: str | None = None,
|
||||
expected_na: dict | None = None,
|
||||
) -> dict:
|
||||
"""Assemble the full results.json dict (no I/O). `finished_ts` is passed in (the orchestrator
|
||||
stamps it) so this stays pure and deterministic for unit tests."""
|
||||
stamps it) so this stays pure and deterministic for unit tests. `expected_na` is the recipe's
|
||||
declared intentional-N/A map (recipe_meta.EXPECTED_NA) used to distinguish a deliberate skip from
|
||||
accidentally-missing coverage."""
|
||||
stages = collect_stages(records)
|
||||
has_custom = any(r["tier"] == "custom" for r in records)
|
||||
rungs = derive_rungs(
|
||||
@ -233,6 +286,8 @@ def build_results(
|
||||
repo_local_passed=_repo_local_passed(records),
|
||||
)
|
||||
lvl, cap_reason = level_mod.compute_level(rungs)
|
||||
na_info = classify_na(rungs, expected_na)
|
||||
intent = cap_intent(rungs, lvl, cap_reason, na_info)
|
||||
return {
|
||||
"schema": 1,
|
||||
"run_id": run_id(),
|
||||
@ -243,7 +298,9 @@ def build_results(
|
||||
"finished": finished_ts,
|
||||
"level": lvl,
|
||||
"level_cap_reason": cap_reason,
|
||||
"level_cap_intent": intent,
|
||||
"rungs": rungs,
|
||||
"na": na_info,
|
||||
"stages": stages,
|
||||
"results": results,
|
||||
"flags": {
|
||||
|
||||
@ -200,6 +200,7 @@ def _load_meta(recipe: str) -> dict:
|
||||
for k in list(meta) + [
|
||||
"BACKUP_CAPABLE",
|
||||
"SKIP_GENERIC",
|
||||
"EXPECTED_NA",
|
||||
"OIDC_AT_INSTALL",
|
||||
"READY_PROBE",
|
||||
"UPGRADE_BASE_VERSION",
|
||||
@ -1241,6 +1242,7 @@ def main() -> int:
|
||||
no_secret_leak=True, # narrowed below by an actual scan of the serialised artifact
|
||||
screenshot=screenshot_rel, # Phase 3 U1 (R4): relative PNG name iff capture succeeded
|
||||
finished_ts=time.time(),
|
||||
expected_na=meta.get("EXPECTED_NA"), # declared intentional-N/A map (recipe_meta)
|
||||
)
|
||||
# Real (if narrow) leak check: no known infra-secret value may appear in the artifact (R7).
|
||||
blob = json.dumps(data)
|
||||
@ -1252,11 +1254,29 @@ def main() -> int:
|
||||
file=sys.stderr,
|
||||
)
|
||||
path = results_mod.write_results(data)
|
||||
intent = data.get("level_cap_intent") or ""
|
||||
print(
|
||||
f"results.json written: {path} (level={data['level']}"
|
||||
f"{' — ' + data['level_cap_reason'] if data['level_cap_reason'] else ''})",
|
||||
f"{' — ' + data['level_cap_reason'] if data['level_cap_reason'] else ''}"
|
||||
f"{' [' + intent + ']' if intent else ''})",
|
||||
flush=True,
|
||||
)
|
||||
# Surface the intentional-vs-accidental N/A signal in the CI log (non-blocking, R7): a
|
||||
# gap-sensitive rung that is N/A but undeclared is a possible coverage hole; a stale
|
||||
# EXPECTED_NA declares a tier N/A that actually ran.
|
||||
na = data.get("na", {})
|
||||
for rung in na.get("gaps", []):
|
||||
print(
|
||||
f"⚠ coverage: rung '{rung}' is N/A but not declared intentional — add a test or "
|
||||
f"declare it in tests/{recipe}/recipe_meta.py EXPECTED_NA = {{'{rung}': '<why>'}}.",
|
||||
flush=True,
|
||||
)
|
||||
for rung in na.get("stale_declared", []):
|
||||
print(
|
||||
f"⚠ stale EXPECTED_NA: rung '{rung}' is declared N/A but was actually exercised "
|
||||
f"(status={data['rungs'].get(rung)}) — remove it from recipe_meta.EXPECTED_NA.",
|
||||
flush=True,
|
||||
)
|
||||
except Exception as e: # noqa: BLE001 — results assembly is cosmetic; never fail a run on it (R7)
|
||||
print(
|
||||
f"!! results.json assembly failed (non-fatal, verdict unaffected): {_scrub(str(e))}",
|
||||
|
||||
Reference in New Issue
Block a user