feat(harness): declare intentional N/A tiers + custom-html-tiny functional test
Some checks failed
continuous-integration/drone/push Build is failing
Some checks failed
continuous-integration/drone/push Build is failing
Two changes the operator asked for after noticing custom-html-tiny PR #6 has no backup/restore or functional coverage: 1) Intentional-vs-accidental N/A. A recipe can now declare recipe_meta.EXPECTED_NA = {rung: reason} to mark a tier as deliberately not applicable (e.g. a stateless static server has no backup surface). N/A still caps the level — the harness never claims a rung it did not verify — but the run is now annotated 'intentional · <reason>' instead of being indistinguishable from a forgotten test. An *undeclared* N/A on a gap-sensitive rung (backup_restore, functional) is surfaced as a 'possible coverage gap', and a stale EXPECTED_NA (declared N/A but actually exercised) is surfaced too. All non-blocking (R7): results.json gains level_cap_intent + an block, the summary card shows the clause, and the CI log prints the gap/stale warnings. (results.classify_na/cap_intent are pure + unit-tested; level.py untouched.) custom-html-tiny declares backup_restore intentionally N/A. 2) custom-html-tiny functional test: writes a random file into the served content volume (via the volume mountpoint, like install_steps.sh, since the SWS image is shell-less), asserts exact-byte round-trip + a real 404 on a missing path — proving the static-web-server actually serves the volume, not a 200-everything fallback. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@ -116,7 +116,9 @@ def render_card_html(data: dict, screenshot_rel: str | None = "screenshot.png")
|
||||
recipe = html.escape(str(data.get("recipe", "?")))
|
||||
version = html.escape(str(data.get("version") or data.get("ref") or ""))
|
||||
level = int(data.get("level", 0))
|
||||
cap = html.escape(str(data.get("level_cap_reason") or ""))
|
||||
cap_reason = str(data.get("level_cap_reason") or "")
|
||||
cap_intent = str(data.get("level_cap_intent") or "")
|
||||
cap = html.escape(cap_reason + (f" · {cap_intent}" if cap_intent else ""))
|
||||
color = level_color(level)
|
||||
flags = data.get("flags", {}) or {}
|
||||
flag_bits = []
|
||||
|
||||
@ -200,6 +200,56 @@ def derive_rungs(
|
||||
return rungs
|
||||
|
||||
|
||||
# Rungs where an *undeclared* N/A is suspicious — it usually means a recipe SHOULD have this coverage
|
||||
# but nobody added it (a backup label, a functional test), i.e. an accidental gap rather than a real
|
||||
# property of the recipe. For these, an undeclared N/A is surfaced as a "possible coverage gap" unless
|
||||
# the recipe declares it intentional via recipe_meta.EXPECTED_NA. The other rungs (upgrade — only one
|
||||
# published version; integration — no SSO surface; recipe_local — no repo-local tests) are
|
||||
# *structurally* optional: an N/A there is the normal case and is not flagged.
|
||||
GAP_SENSITIVE_RUNGS = ("backup_restore", "functional")
|
||||
|
||||
|
||||
def classify_na(rungs: dict[str, str], expected_na: dict | None) -> dict:
|
||||
"""Distinguish *intentionally* N/A rungs from *accidentally* missing ones (operator request).
|
||||
|
||||
A recipe declares intentional N/A in `recipe_meta.EXPECTED_NA = {rung: reason}`. N/A always caps
|
||||
the level either way (the harness never inflates — a rung that wasn't verified wasn't verified);
|
||||
this only EXPLAINS the cap so a reviewer can tell "this recipe legitimately has no backup surface"
|
||||
from "someone forgot to add the backup test". Returns:
|
||||
{ "rungs": {rung: {"intent": "declared"|"undeclared", "reason": str}}, # one per N/A rung
|
||||
"gaps": [rung, ...], # gap-sensitive rungs that are N/A and NOT declared
|
||||
"stale_declared": [rung, ...] } # rungs declared N/A but actually exercised (stale opt-out)
|
||||
"""
|
||||
expected = {str(k): str(v) for k, v in (expected_na or {}).items()}
|
||||
na: dict[str, dict] = {}
|
||||
for rung, st in rungs.items():
|
||||
if st != "na":
|
||||
continue
|
||||
if rung in expected:
|
||||
na[rung] = {"intent": "declared", "reason": expected[rung]}
|
||||
else:
|
||||
na[rung] = {"intent": "undeclared", "reason": ""}
|
||||
gaps = [r for r in GAP_SENSITIVE_RUNGS if na.get(r, {}).get("intent") == "undeclared"]
|
||||
stale = sorted(r for r in expected if rungs.get(r) not in (None, "na"))
|
||||
return {"rungs": na, "gaps": gaps, "stale_declared": stale}
|
||||
|
||||
|
||||
def cap_intent(rungs: dict[str, str], level: int, cap_reason: str, na_info: dict) -> str:
|
||||
"""A short clause explaining the level cap when the capping rung is N/A: the declared reason if
|
||||
intentional, a 'possible coverage gap' note if it's an undeclared gap-sensitive rung, else ''."""
|
||||
if not cap_reason:
|
||||
return ""
|
||||
capped = level_mod.RUNGS[level] if 0 <= level < len(level_mod.RUNGS) else None
|
||||
if not capped or rungs.get(capped) != "na":
|
||||
return ""
|
||||
entry = na_info["rungs"].get(capped, {})
|
||||
if entry.get("intent") == "declared":
|
||||
return f"intentional · {entry['reason']}"
|
||||
if capped in GAP_SENSITIVE_RUNGS:
|
||||
return "undeclared N/A — possible coverage gap (add a test or declare EXPECTED_NA)"
|
||||
return ""
|
||||
|
||||
|
||||
def build_results(
|
||||
*,
|
||||
recipe: str,
|
||||
@ -217,9 +267,12 @@ def build_results(
|
||||
finished_ts: float | None,
|
||||
screenshot: str | None = None,
|
||||
summary_card: str | None = None,
|
||||
expected_na: dict | None = None,
|
||||
) -> dict:
|
||||
"""Assemble the full results.json dict (no I/O). `finished_ts` is passed in (the orchestrator
|
||||
stamps it) so this stays pure and deterministic for unit tests."""
|
||||
stamps it) so this stays pure and deterministic for unit tests. `expected_na` is the recipe's
|
||||
declared intentional-N/A map (recipe_meta.EXPECTED_NA) used to distinguish a deliberate skip from
|
||||
accidentally-missing coverage."""
|
||||
stages = collect_stages(records)
|
||||
has_custom = any(r["tier"] == "custom" for r in records)
|
||||
rungs = derive_rungs(
|
||||
@ -233,6 +286,8 @@ def build_results(
|
||||
repo_local_passed=_repo_local_passed(records),
|
||||
)
|
||||
lvl, cap_reason = level_mod.compute_level(rungs)
|
||||
na_info = classify_na(rungs, expected_na)
|
||||
intent = cap_intent(rungs, lvl, cap_reason, na_info)
|
||||
return {
|
||||
"schema": 1,
|
||||
"run_id": run_id(),
|
||||
@ -243,7 +298,9 @@ def build_results(
|
||||
"finished": finished_ts,
|
||||
"level": lvl,
|
||||
"level_cap_reason": cap_reason,
|
||||
"level_cap_intent": intent,
|
||||
"rungs": rungs,
|
||||
"na": na_info,
|
||||
"stages": stages,
|
||||
"results": results,
|
||||
"flags": {
|
||||
|
||||
Reference in New Issue
Block a user