feat(harness): declare intentional N/A tiers + custom-html-tiny functional test
Some checks failed
continuous-integration/drone/push Build is failing
Some checks failed
continuous-integration/drone/push Build is failing
Two changes the operator asked for after noticing custom-html-tiny PR #6 has no backup/restore or functional coverage: 1) Intentional-vs-accidental N/A. A recipe can now declare recipe_meta.EXPECTED_NA = {rung: reason} to mark a tier as deliberately not applicable (e.g. a stateless static server has no backup surface). N/A still caps the level — the harness never claims a rung it did not verify — but the run is now annotated 'intentional · <reason>' instead of being indistinguishable from a forgotten test. An *undeclared* N/A on a gap-sensitive rung (backup_restore, functional) is surfaced as a 'possible coverage gap', and a stale EXPECTED_NA (declared N/A but actually exercised) is surfaced too. All non-blocking (R7): results.json gains level_cap_intent + an block, the summary card shows the clause, and the CI log prints the gap/stale warnings. (results.classify_na/cap_intent are pure + unit-tested; level.py untouched.) custom-html-tiny declares backup_restore intentionally N/A. 2) custom-html-tiny functional test: writes a random file into the served content volume (via the volume mountpoint, like install_steps.sh, since the SWS image is shell-less), asserts exact-byte round-trip + a real 404 on a missing path — proving the static-web-server actually serves the volume, not a 200-everything fallback. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@ -116,7 +116,9 @@ def render_card_html(data: dict, screenshot_rel: str | None = "screenshot.png")
|
||||
recipe = html.escape(str(data.get("recipe", "?")))
|
||||
version = html.escape(str(data.get("version") or data.get("ref") or ""))
|
||||
level = int(data.get("level", 0))
|
||||
cap = html.escape(str(data.get("level_cap_reason") or ""))
|
||||
cap_reason = str(data.get("level_cap_reason") or "")
|
||||
cap_intent = str(data.get("level_cap_intent") or "")
|
||||
cap = html.escape(cap_reason + (f" · {cap_intent}" if cap_intent else ""))
|
||||
color = level_color(level)
|
||||
flags = data.get("flags", {}) or {}
|
||||
flag_bits = []
|
||||
|
||||
@ -200,6 +200,56 @@ def derive_rungs(
|
||||
return rungs
|
||||
|
||||
|
||||
# Rungs where an *undeclared* N/A is suspicious — it usually means a recipe SHOULD have this coverage
|
||||
# but nobody added it (a backup label, a functional test), i.e. an accidental gap rather than a real
|
||||
# property of the recipe. For these, an undeclared N/A is surfaced as a "possible coverage gap" unless
|
||||
# the recipe declares it intentional via recipe_meta.EXPECTED_NA. The other rungs (upgrade — only one
|
||||
# published version; integration — no SSO surface; recipe_local — no repo-local tests) are
|
||||
# *structurally* optional: an N/A there is the normal case and is not flagged.
|
||||
GAP_SENSITIVE_RUNGS = ("backup_restore", "functional")
|
||||
|
||||
|
||||
def classify_na(rungs: dict[str, str], expected_na: dict | None) -> dict:
|
||||
"""Distinguish *intentionally* N/A rungs from *accidentally* missing ones (operator request).
|
||||
|
||||
A recipe declares intentional N/A in `recipe_meta.EXPECTED_NA = {rung: reason}`. N/A always caps
|
||||
the level either way (the harness never inflates — a rung that wasn't verified wasn't verified);
|
||||
this only EXPLAINS the cap so a reviewer can tell "this recipe legitimately has no backup surface"
|
||||
from "someone forgot to add the backup test". Returns:
|
||||
{ "rungs": {rung: {"intent": "declared"|"undeclared", "reason": str}}, # one per N/A rung
|
||||
"gaps": [rung, ...], # gap-sensitive rungs that are N/A and NOT declared
|
||||
"stale_declared": [rung, ...] } # rungs declared N/A but actually exercised (stale opt-out)
|
||||
"""
|
||||
expected = {str(k): str(v) for k, v in (expected_na or {}).items()}
|
||||
na: dict[str, dict] = {}
|
||||
for rung, st in rungs.items():
|
||||
if st != "na":
|
||||
continue
|
||||
if rung in expected:
|
||||
na[rung] = {"intent": "declared", "reason": expected[rung]}
|
||||
else:
|
||||
na[rung] = {"intent": "undeclared", "reason": ""}
|
||||
gaps = [r for r in GAP_SENSITIVE_RUNGS if na.get(r, {}).get("intent") == "undeclared"]
|
||||
stale = sorted(r for r in expected if rungs.get(r) not in (None, "na"))
|
||||
return {"rungs": na, "gaps": gaps, "stale_declared": stale}
|
||||
|
||||
|
||||
def cap_intent(rungs: dict[str, str], level: int, cap_reason: str, na_info: dict) -> str:
|
||||
"""A short clause explaining the level cap when the capping rung is N/A: the declared reason if
|
||||
intentional, a 'possible coverage gap' note if it's an undeclared gap-sensitive rung, else ''."""
|
||||
if not cap_reason:
|
||||
return ""
|
||||
capped = level_mod.RUNGS[level] if 0 <= level < len(level_mod.RUNGS) else None
|
||||
if not capped or rungs.get(capped) != "na":
|
||||
return ""
|
||||
entry = na_info["rungs"].get(capped, {})
|
||||
if entry.get("intent") == "declared":
|
||||
return f"intentional · {entry['reason']}"
|
||||
if capped in GAP_SENSITIVE_RUNGS:
|
||||
return "undeclared N/A — possible coverage gap (add a test or declare EXPECTED_NA)"
|
||||
return ""
|
||||
|
||||
|
||||
def build_results(
|
||||
*,
|
||||
recipe: str,
|
||||
@ -217,9 +267,12 @@ def build_results(
|
||||
finished_ts: float | None,
|
||||
screenshot: str | None = None,
|
||||
summary_card: str | None = None,
|
||||
expected_na: dict | None = None,
|
||||
) -> dict:
|
||||
"""Assemble the full results.json dict (no I/O). `finished_ts` is passed in (the orchestrator
|
||||
stamps it) so this stays pure and deterministic for unit tests."""
|
||||
stamps it) so this stays pure and deterministic for unit tests. `expected_na` is the recipe's
|
||||
declared intentional-N/A map (recipe_meta.EXPECTED_NA) used to distinguish a deliberate skip from
|
||||
accidentally-missing coverage."""
|
||||
stages = collect_stages(records)
|
||||
has_custom = any(r["tier"] == "custom" for r in records)
|
||||
rungs = derive_rungs(
|
||||
@ -233,6 +286,8 @@ def build_results(
|
||||
repo_local_passed=_repo_local_passed(records),
|
||||
)
|
||||
lvl, cap_reason = level_mod.compute_level(rungs)
|
||||
na_info = classify_na(rungs, expected_na)
|
||||
intent = cap_intent(rungs, lvl, cap_reason, na_info)
|
||||
return {
|
||||
"schema": 1,
|
||||
"run_id": run_id(),
|
||||
@ -243,7 +298,9 @@ def build_results(
|
||||
"finished": finished_ts,
|
||||
"level": lvl,
|
||||
"level_cap_reason": cap_reason,
|
||||
"level_cap_intent": intent,
|
||||
"rungs": rungs,
|
||||
"na": na_info,
|
||||
"stages": stages,
|
||||
"results": results,
|
||||
"flags": {
|
||||
|
||||
@ -200,6 +200,7 @@ def _load_meta(recipe: str) -> dict:
|
||||
for k in list(meta) + [
|
||||
"BACKUP_CAPABLE",
|
||||
"SKIP_GENERIC",
|
||||
"EXPECTED_NA",
|
||||
"OIDC_AT_INSTALL",
|
||||
"READY_PROBE",
|
||||
"UPGRADE_BASE_VERSION",
|
||||
@ -1241,6 +1242,7 @@ def main() -> int:
|
||||
no_secret_leak=True, # narrowed below by an actual scan of the serialised artifact
|
||||
screenshot=screenshot_rel, # Phase 3 U1 (R4): relative PNG name iff capture succeeded
|
||||
finished_ts=time.time(),
|
||||
expected_na=meta.get("EXPECTED_NA"), # declared intentional-N/A map (recipe_meta)
|
||||
)
|
||||
# Real (if narrow) leak check: no known infra-secret value may appear in the artifact (R7).
|
||||
blob = json.dumps(data)
|
||||
@ -1252,11 +1254,29 @@ def main() -> int:
|
||||
file=sys.stderr,
|
||||
)
|
||||
path = results_mod.write_results(data)
|
||||
intent = data.get("level_cap_intent") or ""
|
||||
print(
|
||||
f"results.json written: {path} (level={data['level']}"
|
||||
f"{' — ' + data['level_cap_reason'] if data['level_cap_reason'] else ''})",
|
||||
f"{' — ' + data['level_cap_reason'] if data['level_cap_reason'] else ''}"
|
||||
f"{' [' + intent + ']' if intent else ''})",
|
||||
flush=True,
|
||||
)
|
||||
# Surface the intentional-vs-accidental N/A signal in the CI log (non-blocking, R7): a
|
||||
# gap-sensitive rung that is N/A but undeclared is a possible coverage hole; a stale
|
||||
# EXPECTED_NA declares a tier N/A that actually ran.
|
||||
na = data.get("na", {})
|
||||
for rung in na.get("gaps", []):
|
||||
print(
|
||||
f"⚠ coverage: rung '{rung}' is N/A but not declared intentional — add a test or "
|
||||
f"declare it in tests/{recipe}/recipe_meta.py EXPECTED_NA = {{'{rung}': '<why>'}}.",
|
||||
flush=True,
|
||||
)
|
||||
for rung in na.get("stale_declared", []):
|
||||
print(
|
||||
f"⚠ stale EXPECTED_NA: rung '{rung}' is declared N/A but was actually exercised "
|
||||
f"(status={data['rungs'].get(rung)}) — remove it from recipe_meta.EXPECTED_NA.",
|
||||
flush=True,
|
||||
)
|
||||
except Exception as e: # noqa: BLE001 — results assembly is cosmetic; never fail a run on it (R7)
|
||||
print(
|
||||
f"!! results.json assembly failed (non-fatal, verdict unaffected): {_scrub(str(e))}",
|
||||
|
||||
87
tests/custom-html-tiny/functional/test_serves_content.py
Normal file
87
tests/custom-html-tiny/functional/test_serves_content.py
Normal file
@ -0,0 +1,87 @@
|
||||
"""custom-html-tiny — recipe-specific functional test (static-web-server).
|
||||
|
||||
Proves the deployed static-web-server is *actually serving files from its `content` volume* with real
|
||||
file-server semantics, not merely returning 200 from a Traefik fallback or a generic stub:
|
||||
|
||||
1. exact-byte round-trip — write a uniquely-named file with random content into the served volume,
|
||||
fetch it over HTTPS, and assert the bytes come back verbatim. Non-vacuous: the content is random
|
||||
per run, so only a server that reads this file off the volume can pass.
|
||||
2. real 404 — a random non-existent path returns 404, proving directory/file semantics (a
|
||||
200-everything stub or mis-routed host would not 404).
|
||||
|
||||
The recipe's image (joseluisq/static-web-server) is shell-less (scratch-based) and its content volume
|
||||
is seeded via the install_steps.sh host-mountpoint mechanism — so this test writes its probe file the
|
||||
same way (resolve the swarm volume's mountpoint with `docker volume inspect`, write directly) rather
|
||||
than `docker exec`-ing in a container that has no shell.
|
||||
|
||||
Runs in the custom tier against the shared post-install deployment (the `live_app` fixture is its
|
||||
per-run domain). Mirrors install_steps.sh: the app's content volume is named `<stack>_content`, where
|
||||
`stack` is the domain with dots replaced by underscores; HTTP_SUBDIR is empty, so the volume root is
|
||||
served at `/`.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import contextlib
|
||||
import os
|
||||
import ssl
|
||||
import subprocess
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
import uuid
|
||||
|
||||
|
||||
def _served_dir(domain: str) -> str:
|
||||
"""Host mountpoint of the app's served `content` volume (same naming as install_steps.sh)."""
|
||||
vol = f"{domain.replace('.', '_')}_content"
|
||||
out = subprocess.run(
|
||||
["docker", "volume", "inspect", vol, "--format", "{{.Mountpoint}}"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=True,
|
||||
)
|
||||
mountpoint = out.stdout.strip()
|
||||
assert mountpoint, f"could not resolve mountpoint for volume {vol!r}"
|
||||
return mountpoint
|
||||
|
||||
|
||||
def _get(url: str) -> tuple[int, bytes]:
|
||||
"""GET the URL; return (status, body). A 4xx/5xx is returned, not raised (we assert on the code).
|
||||
TLS verification is relaxed: the served wildcard cert is validated separately by the infra check;
|
||||
here we care only about the app's response."""
|
||||
ctx = ssl.create_default_context()
|
||||
ctx.check_hostname = False
|
||||
ctx.verify_mode = ssl.CERT_NONE
|
||||
try:
|
||||
with urllib.request.urlopen(url, timeout=20, context=ctx) as resp:
|
||||
return resp.status, resp.read()
|
||||
except urllib.error.HTTPError as e:
|
||||
return e.code, e.read()
|
||||
|
||||
|
||||
def test_static_file_roundtrip_and_404(live_app):
|
||||
"""Write a random file into the served volume → fetch it → bytes match; and a missing path 404s."""
|
||||
served = _served_dir(live_app)
|
||||
token = uuid.uuid4().hex
|
||||
name = f"ccci-probe-{token}.txt"
|
||||
body = f"cc-ci-functional-{token}\n".encode()
|
||||
path = os.path.join(served, name)
|
||||
with open(path, "wb") as fh:
|
||||
fh.write(body)
|
||||
try:
|
||||
status, got = _get(f"https://{live_app}/{name}")
|
||||
assert status == 200, f"served probe file returned {status} (expected 200)"
|
||||
assert got == body, (
|
||||
f"content round-trip mismatch: served {got!r}, wrote {body!r} "
|
||||
"(static-web-server not serving the content volume?)"
|
||||
)
|
||||
|
||||
# A random non-existent path must 404 — proves real static-file semantics, distinguishing a
|
||||
# working server from a 200-everything stub or a mis-routed Traefik fallback.
|
||||
miss_status, _ = _get(f"https://{live_app}/ccci-missing-{uuid.uuid4().hex}.txt")
|
||||
assert miss_status == 404, (
|
||||
f"missing path returned {miss_status} (expected 404 — generic 200-returner / mis-route?)"
|
||||
)
|
||||
finally:
|
||||
with contextlib.suppress(OSError):
|
||||
os.remove(path)
|
||||
@ -3,3 +3,15 @@
|
||||
# (DG5) is detected quickly instead of waiting the default 300s HTTP timeout.
|
||||
DEPLOY_TIMEOUT = 120
|
||||
HTTP_TIMEOUT = 90
|
||||
|
||||
# Intentionally-N/A tiers (reviewed opt-out, NOT a coverage gap). custom-html-tiny is a stateless
|
||||
# static-web-server: it serves an ephemeral `content` volume that the harness seeds at deploy time
|
||||
# (install_steps.sh) and holds no persistent or user data, so there is nothing to back up or restore.
|
||||
# The recipe therefore declares no `backupbot.backup` label and the L3 backup/restore rung is N/A.
|
||||
# Declaring it here marks that N/A as deliberate, so the run is annotated "intentional" instead of
|
||||
# being flagged as a possible missing-coverage gap. (N/A still caps the level — the harness never
|
||||
# claims a rung it did not verify; this only explains *why* the cap is expected.)
|
||||
EXPECTED_NA = {
|
||||
"backup_restore": "stateless static file server: serves an ephemeral content volume seeded at "
|
||||
"deploy, with no persistent/user data to back up or restore (no backupbot.backup label)",
|
||||
}
|
||||
|
||||
@ -257,6 +257,104 @@ def test_build_results_capped_at_L1_on_upgrade_fail(tmp_path):
|
||||
assert "L2" in data["level_cap_reason"]
|
||||
|
||||
|
||||
# ---- classify_na / cap_intent: intentional-vs-accidental N/A (operator request) ----
|
||||
|
||||
|
||||
def _rungs(**kw):
|
||||
base = {
|
||||
"install": "pass",
|
||||
"upgrade": "pass",
|
||||
"backup_restore": "pass",
|
||||
"functional": "pass",
|
||||
"integration": "na",
|
||||
"recipe_local": "na",
|
||||
}
|
||||
base.update(kw)
|
||||
return base
|
||||
|
||||
|
||||
def test_classify_na_declared_vs_undeclared():
|
||||
rungs = _rungs(backup_restore="na", functional="na")
|
||||
info = R.classify_na(rungs, {"backup_restore": "stateless static server"})
|
||||
# backup_restore is declared intentional; functional is an undeclared gap-sensitive N/A.
|
||||
assert info["rungs"]["backup_restore"] == {
|
||||
"intent": "declared",
|
||||
"reason": "stateless static server",
|
||||
}
|
||||
assert info["rungs"]["functional"]["intent"] == "undeclared"
|
||||
assert info["gaps"] == ["functional"] # backup_restore declared → not a gap
|
||||
assert info["stale_declared"] == []
|
||||
# structurally-optional N/A (integration, recipe_local) are recorded but never flagged as gaps.
|
||||
assert info["rungs"]["integration"]["intent"] == "undeclared"
|
||||
assert "integration" not in info["gaps"]
|
||||
|
||||
|
||||
def test_classify_na_stale_declaration():
|
||||
# backup_restore actually ran (pass) but is declared N/A → stale opt-out, surfaced.
|
||||
rungs = _rungs(backup_restore="pass")
|
||||
info = R.classify_na(rungs, {"backup_restore": "stale reason"})
|
||||
assert info["stale_declared"] == ["backup_restore"]
|
||||
assert "backup_restore" not in info["rungs"] # not N/A, so not in the per-rung N/A map
|
||||
|
||||
|
||||
def test_cap_intent_declared_explains_cap():
|
||||
# install+upgrade pass, backup_restore declared-N/A → caps at L2 with an intentional clause.
|
||||
rungs = _rungs(backup_restore="na")
|
||||
info = R.classify_na(rungs, {"backup_restore": "no persistent data"})
|
||||
intent = R.cap_intent(rungs, 2, "L3 backup/restore (data integrity) N/A", info)
|
||||
assert intent == "intentional · no persistent data"
|
||||
|
||||
|
||||
def test_cap_intent_undeclared_gap():
|
||||
rungs = _rungs(backup_restore="na")
|
||||
info = R.classify_na(rungs, None)
|
||||
intent = R.cap_intent(rungs, 2, "L3 backup/restore (data integrity) N/A", info)
|
||||
assert "possible coverage gap" in intent
|
||||
|
||||
|
||||
def test_cap_intent_blank_when_not_capped_on_na():
|
||||
rungs = _rungs() # full clean climb, capped only at integration (na, structurally optional)
|
||||
info = R.classify_na(rungs, None)
|
||||
# capping rung is integration (level 4) — structurally optional, so no intent clause.
|
||||
assert R.cap_intent(rungs, 4, "L5 integration N/A", info) == ""
|
||||
# and no cap at all → blank.
|
||||
assert R.cap_intent(rungs, 6, "", info) == ""
|
||||
|
||||
|
||||
def test_build_results_threads_expected_na(tmp_path):
|
||||
recs = [
|
||||
{
|
||||
"tier": "install",
|
||||
"source": "generic",
|
||||
"file": "g/test_install.py",
|
||||
"rc": 0,
|
||||
"junit": _write(tmp_path, "i.xml", JUNIT_PASS),
|
||||
}
|
||||
]
|
||||
data = R.build_results(
|
||||
recipe="custom-html-tiny",
|
||||
version="1.1.0",
|
||||
pr="0",
|
||||
ref=None,
|
||||
records=recs,
|
||||
results=_results(backup="skip", restore="skip", custom="skip"),
|
||||
backup_capable=False, # no backupbot label → backup_restore N/A
|
||||
declared=[],
|
||||
deps_ready=True,
|
||||
sso_unverified=False,
|
||||
clean_teardown=True,
|
||||
no_secret_leak=True,
|
||||
finished_ts=0.0,
|
||||
expected_na={"backup_restore": "stateless static file server"},
|
||||
)
|
||||
# N/A still caps at L2 (never inflates), but now annotated intentional rather than flagged.
|
||||
assert data["level"] == 2
|
||||
assert "L3" in data["level_cap_reason"]
|
||||
assert data["level_cap_intent"] == "intentional · stateless static file server"
|
||||
assert data["na"]["rungs"]["backup_restore"]["intent"] == "declared"
|
||||
assert data["na"]["gaps"] == []
|
||||
|
||||
|
||||
def test_write_results_roundtrip(tmp_path):
|
||||
data = {"run_id": "42", "level": 3, "stages": []}
|
||||
path = R.write_results(data, runs_dir_override=str(tmp_path))
|
||||
|
||||
Reference in New Issue
Block a user