feat(harness): intentional skips + custom-html-tiny functional test; 4-rung ladder (#6)
Some checks failed
continuous-integration/drone/push Build is failing

Declare intentional skips + custom-html-tiny functional test; 4-rung level ladder

- recipe_meta.EXPECTED_NA = {rung: reason} lists intentionally-skipped rungs; any
  essential rung skipped and not listed is unintentional. Skips still cap the level
  (never inflate). results.json: skips:{intentional,unintentional} + level_cap_rung.
- Level ladder = the four essential rungs (install, upgrade, backup/restore,
  functional; top = L4). integration & recipe-local are optional, not leveled
  (SSO still enforced for the run verdict, unchanged).
- Card shows skipped rungs as INTENTIONAL SKIP (green, reason below) / UNINTENTIONAL
  SKIP (amber); level badge gains an expected/gap? third segment.
- custom-html-tiny: functional serve test (exact-byte round-trip + 404); declares
  backup_restore intentionally skipped (stateless static server).

Independently verified by the adversary: 138 unit tests pass cold; live full-stage
run on custom-html-tiny green (upgrade tier ran; level 2; correct skips/badge);
clean teardown.
This commit is contained in:
2026-06-09 03:12:11 +00:00
parent f5a6f7196f
commit c51cd84159
10 changed files with 392 additions and 187 deletions

View File

@ -79,10 +79,44 @@ def render_badge_svg(label: str, message: str, color: str) -> str:
) )
def level_badge_svg(level: int, cap_reason: str = "") -> str: # Third-segment colours for the level badge: amber = an UNINTENTIONAL skip (a rung skipped but not
"""Per-recipe/-run LEVEL badge: 'cc-ci | level N'. Colour by level (R6).""" # in the recipe's intentional list — likely missing coverage) capped the climb; muted = an
msg = f"level {int(level)}" # INTENTIONAL skip (declared in recipe_meta.EXPECTED_NA — nothing to fix). Font-safe text labels
return render_badge_svg("cc-ci", msg, level_color(level)) # (no emoji) so the SVG renders anywhere.
GAP_COLOR = "#d29922"
EXPECT_COLOR = "#6e7681"
def level_badge_svg(level: int, cap_reason: str = "", cap_skip: str = "") -> str:
"""Per-recipe/-run LEVEL badge: 'cc-ci | level N' coloured by level (R6), with a THIRD segment
that differentiates *why* the climb stopped when a SKIP capped it (`cap_skip`):
- "unintentional" (a rung skipped but not in the recipe's intentional list): amber 'gap?'.
- "intentional" (a skip declared in recipe_meta.EXPECTED_NA): muted 'expected'.
- "" (clean cap / full climb / a real failure): no third segment (the level + card carry it).
The badge never inflates — it only annotates the cap the level already reflects."""
label, msg = "cc-ci", f"level {int(level)}"
lw, mw = _text_width(label), _text_width(msg)
third: tuple[str, str] | None = None
if cap_skip == "unintentional":
third = ("gap?", GAP_COLOR)
elif cap_skip == "intentional":
third = ("expected", EXPECT_COLOR)
if third is None:
return render_badge_svg(label, msg, level_color(level))
txt, tcolor = third
tw = _text_width(txt)
w = lw + mw + tw
return (
f'<svg xmlns="http://www.w3.org/2000/svg" width="{w}" height="20" role="img" '
f'aria-label="{html.escape(label)}: {html.escape(msg)} ({html.escape(txt)})">'
f'<rect width="{lw}" height="20" fill="#555"/>'
f'<rect x="{lw}" width="{mw}" height="20" fill="{level_color(level)}"/>'
f'<rect x="{lw + mw}" width="{tw}" height="20" fill="{tcolor}"/>'
f'<g fill="#fff" font-family="Verdana,Geneva,sans-serif" font-size="11">'
f'<text x="6" y="14">{html.escape(label)}</text>'
f'<text x="{lw + 6}" y="14">{html.escape(msg)}</text>'
f'<text x="{lw + mw + 6}" y="14">{html.escape(txt)}</text></g></svg>'
)
def _stage_rows(stages: list[dict]) -> str: def _stage_rows(stages: list[dict]) -> str:
@ -107,6 +141,41 @@ def _stage_rows(stages: list[dict]) -> str:
return "\n".join(rows) or '<tr><td colspan="3">no stages</td></tr>' return "\n".join(rows) or '<tr><td colspan="3">no stages</td></tr>'
# Friendly rung labels for the skip rows (the four essential rungs).
RUNG_LABEL = {
"install": "install",
"upgrade": "upgrade",
"backup_restore": "backup/restore",
"functional": "functional",
}
SKIP_GREEN = "#57ab5a" # muted green — an intentional skip reads like a pass (but labelled, never inflating)
def _skip_rows(skips: dict) -> str:
"""Render SKIPPED rungs as stage-like rows. An intentional (declared) skip looks like a pass row
but its status says 'INTENTIONAL SKIP' (muted green) with the declared reason on the line below;
an unintentional skip is amber 'UNINTENTIONAL SKIP' with a prompt to add a test or declare it."""
rows = []
for rung, reason in (skips.get("intentional") or {}).items():
rows.append(
f'<tr class="stage"><td colspan="2"><span class="mark" style="color:{SKIP_GREEN}">⊘</span>'
f'<b>{html.escape(RUNG_LABEL.get(rung, rung))}</b></td>'
f'<td class="st" style="color:{SKIP_GREEN}">intentional skip</td></tr>'
)
rows.append(f'<tr class="skipreason"><td></td><td colspan="2">{html.escape(reason)}</td></tr>')
for rung in skips.get("unintentional") or []:
rows.append(
f'<tr class="stage"><td colspan="2"><span class="mark" style="color:{GAP_COLOR}">⊘</span>'
f'<b>{html.escape(RUNG_LABEL.get(rung, rung))}</b></td>'
f'<td class="st" style="color:{GAP_COLOR}">unintentional skip</td></tr>'
)
rows.append(
'<tr class="skipreason"><td></td><td colspan="2">not declared in EXPECTED_NA — add the '
"missing test/label, or declare the skip with a reason</td></tr>"
)
return "\n".join(rows)
def render_card_html(data: dict, screenshot_rel: str | None = "screenshot.png") -> str: def render_card_html(data: dict, screenshot_rel: str | None = "screenshot.png") -> str:
"""Build the summary-card HTML from a results.json dict. `screenshot_rel` is the relative path to """Build the summary-card HTML from a results.json dict. `screenshot_rel` is the relative path to
the screenshot PNG (same dir as the card) — omitted from the card if None / absent. the screenshot PNG (same dir as the card) — omitted from the card if None / absent.
@ -116,7 +185,9 @@ def render_card_html(data: dict, screenshot_rel: str | None = "screenshot.png")
recipe = html.escape(str(data.get("recipe", "?"))) recipe = html.escape(str(data.get("recipe", "?")))
version = html.escape(str(data.get("version") or data.get("ref") or "")) version = html.escape(str(data.get("version") or data.get("ref") or ""))
level = int(data.get("level", 0)) level = int(data.get("level", 0))
cap = html.escape(str(data.get("level_cap_reason") or "")) cap_reason = str(data.get("level_cap_reason") or "")
cap = html.escape(cap_reason)
sk = data.get("skips", {}) or {}
color = level_color(level) color = level_color(level)
flags = data.get("flags", {}) or {} flags = data.get("flags", {}) or {}
flag_bits = [] flag_bits = []
@ -132,7 +203,7 @@ def render_card_html(data: dict, screenshot_rel: str | None = "screenshot.png")
if show_shot if show_shot
else '<div class="shot noshot">no screenshot</div>' else '<div class="shot noshot">no screenshot</div>'
) )
rows = _stage_rows(data.get("stages", [])) rows = _stage_rows(data.get("stages", [])) + "\n" + _skip_rows(sk)
return f"""<!doctype html><html><head><meta charset="utf-8"><style> return f"""<!doctype html><html><head><meta charset="utf-8"><style>
*{{box-sizing:border-box}} *{{box-sizing:border-box}}
body{{margin:0;font-family:system-ui,-apple-system,Segoe UI,sans-serif;background:#0d1117;color:#c9d1d9}} body{{margin:0;font-family:system-ui,-apple-system,Segoe UI,sans-serif;background:#0d1117;color:#c9d1d9}}
@ -157,6 +228,7 @@ tr.stage td{{padding-top:.5rem;border-bottom:1px solid #30363d}}
.test .tmark{{width:1.4rem;text-align:center}} .test .tmark{{width:1.4rem;text-align:center}}
.test .tname{{color:#c9d1d9;font-family:ui-monospace,monospace;font-size:.8rem}} .test .tname{{color:#c9d1d9;font-family:ui-monospace,monospace;font-size:.8rem}}
.test .tms{{text-align:right;color:#8b949e;font-size:.74rem;width:5rem}} .test .tms{{text-align:right;color:#8b949e;font-size:.74rem;width:5rem}}
tr.skipreason td{{color:#8b949e;font-size:.78rem;font-style:italic;padding-top:0;padding-bottom:.45rem;border-bottom:1px solid #21262d}}
.shot{{width:360px;flex:none;border:1px solid #30363d;border-radius:8px;overflow:hidden;background:#0d1117}} .shot{{width:360px;flex:none;border:1px solid #30363d;border-radius:8px;overflow:hidden;background:#0d1117}}
.shot img{{width:100%;display:block}} .shot img{{width:100%;display:block}}
.shot.noshot{{display:flex;align-items:center;justify-content:center;height:225px;color:#8b949e;font-size:.85rem}} .shot.noshot{{display:flex;align-items:center;justify-content:center;height:225px;color:#8b949e;font-size:.85rem}}
@ -167,7 +239,7 @@ tr.stage td{{padding-top:.5rem;border-bottom:1px solid #30363d}}
<div class="hd">{FLOWER_SVG} <div class="hd">{FLOWER_SVG}
<div class="title"><h1>{recipe}</h1><span class="ver">{version}</span></div> <div class="title"><h1>{recipe}</h1><span class="ver">{version}</span></div>
<div class="lvl"><span class="num">{level}</span><span class="lbl">level</span></div></div> <div class="lvl"><span class="num">{level}</span><span class="lbl">level</span></div></div>
<div class="cap">{("<b>capped:</b> " + cap) if cap else "<b>full clean climb</b> — top level (6)"}</div> <div class="cap">{("<b>capped:</b> " + cap) if cap else "<b>full clean climb</b> — top level (4)"}</div>
<div class="body"><div class="tbl"><table>{rows}</table></div>{shot_html}</div> <div class="body"><div class="tbl"><table>{rows}</table></div>{shot_html}</div>
<div class="flags">{"".join(flag_bits)}</div> <div class="flags">{"".join(flag_bits)}</div>
</div></body></html>""" </div></body></html>"""

View File

@ -5,37 +5,39 @@ YunoHost semantics: **a gap caps the level** — you only earn level L if every
PASS. The first rung that is not a clean PASS (a real FAIL *or* genuinely N/A for this recipe) stops PASS. The first rung that is not a clean PASS (a real FAIL *or* genuinely N/A for this recipe) stops
the climb; `cap_reason` records why. This is deliberately conservative: presentation must NEVER make the climb; `cap_reason` records why. This is deliberately conservative: presentation must NEVER make
a run look greener than its tests (plan §6 cardinal guardrail), so an N/A rung caps just like a fail a run look greener than its tests (plan §6 cardinal guardrail), so an N/A rung caps just like a fail
(the L5 example in §4.1 — "recipes with no integration surface cap at L4 by definition" — is exactly — with a recorded reason so the level is *fair*, not inflated.
this: N/A caps, with a recorded reason so the level is *fair*, not inflated).
The ladder (§4.1): The ladder is the FOUR essential rungs every recipe is held to:
L0 — install failed / app never became healthy. L0 — install failed / app never became healthy.
L1 — Installs: deploys + passes health/readiness. L1 — Installs: deploys + passes health/readiness.
L2 — Upgrades: previous published version → PR version, stays healthy, data intact. L2 — Upgrades: previous published version → PR version, stays healthy, data intact.
L3 — Backup/restore: seeded data survives backup → wipe → restore. L3 — Backup/restore: seeded data survives backup → wipe → restore.
L4 — Functional: recipe-specific functional tests pass. L4 — Functional: recipe-specific functional tests pass.
L5 — Integration: SSO/OIDC + cross-app integration tests pass.
L6 — Recipe-local: the recipe repo's own tests/ (D4) pass and are merged. Integration (SSO/OIDC + cross-app) and recipe-local (the recipe repo's own tests/) are **OPTIONAL**
capabilities — they are NOT part of the level ladder and never cap it. They still run when present
(and SSO is still enforced for the run VERDICT via the deps/SSO checks in run_recipe_ci.py), but a
recipe without an SSO surface or without repo-local tests is simply not penalised on the level.
This module is PURE (no I/O) so it is cheaply unit-testable and the Adversary can re-run the unit This module is PURE (no I/O) so it is cheaply unit-testable and the Adversary can re-run the unit
test cold (`cc-ci-run -m pytest tests/unit/test_level.py -q`). The orchestrator test cold (`cc-ci-run -m pytest tests/unit/test_level.py -q`). The orchestrator
(`run_recipe_ci.py`) is responsible for translating its raw per-tier results + deps/SSO signals into (`run_recipe_ci.py`) is responsible for translating its raw per-tier results into the rung-status
the rung-status dict this function consumes; that mapping is documented in DECISIONS.md (Phase 3). dict this function consumes; that mapping is documented in DECISIONS.md (Phase 3).
Rung status vocabulary (each rung ∈ these three): Rung status vocabulary (each rung ∈ these three):
"pass" — the rung was exercised and passed. "pass" — the rung was exercised and passed.
"fail" — the rung was exercised and failed. "fail" — the rung was exercised and failed.
"na" — the rung does not apply to this recipe (e.g. only one published version → no upgrade; "na" — the rung does not apply to this recipe (e.g. only one published version → no upgrade;
not backup-capable; no SSO/integration surface; no recipe-local tests). N/A is NOT a not backup-capable). N/A is NOT a failure, but it DOES cap the climb (with a distinct
failure, but it DOES cap the climb (with a distinct cap_reason) so the level never cap_reason) so the level never overstates what was actually verified.
overstates what was actually verified.
""" """
from __future__ import annotations from __future__ import annotations
# The climbable rungs in ascending order. install (L1) is the foundation; L0 means install itself # The climbable rungs in ascending order. install (L1) is the foundation; L0 means install itself
# did not pass. Each later rung requires every earlier rung to be a clean PASS. # did not pass. Each later rung requires every earlier rung to be a clean PASS. These four are the
RUNGS = ("install", "upgrade", "backup_restore", "functional", "integration", "recipe_local") # ESSENTIAL rungs — integration/recipe-local are optional and deliberately NOT in this tuple.
RUNGS = ("install", "upgrade", "backup_restore", "functional")
# Human-readable label per rung level, for cap_reason + the summary card. # Human-readable label per rung level, for cap_reason + the summary card.
RUNG_LABEL = { RUNG_LABEL = {
@ -43,22 +45,20 @@ RUNG_LABEL = {
2: "upgrade (prev published → PR)", 2: "upgrade (prev published → PR)",
3: "backup/restore (data integrity)", 3: "backup/restore (data integrity)",
4: "functional (recipe-specific tests)", 4: "functional (recipe-specific tests)",
5: "integration (SSO/OIDC + cross-app)",
6: "recipe-local (recipe repo tests/)",
} }
VALID = {"pass", "fail", "na"} VALID = {"pass", "fail", "na"}
def compute_level(rungs: dict[str, str]) -> tuple[int, str]: def compute_level(rungs: dict[str, str]) -> tuple[int, str]:
"""Map a rung-status dict → (level 0..6, cap_reason). """Map a rung-status dict → (level 0..4, cap_reason).
`rungs` must contain a status in {"pass","fail","na"} for every name in RUNGS. The level is the `rungs` must contain a status in {"pass","fail","na"} for every name in RUNGS. The level is the
highest L such that rungs[1..L] are all "pass"; the first non-"pass" rung caps the climb. L0 is highest L such that rungs[1..L] are all "pass"; the first non-"pass" rung caps the climb. L0 is
returned when the install rung itself is not "pass" (install failed / never healthy). returned when the install rung itself is not "pass" (install failed / never healthy).
cap_reason explains where the climb stopped: cap_reason explains where the climb stopped:
- "" (empty) when the recipe earned the top rung (L6, full clean climb). - "" (empty) when the recipe earned the top rung (L4, full clean climb).
- "L<k> <label> FAILED" when a rung was exercised and failed. - "L<k> <label> FAILED" when a rung was exercised and failed.
- "L<k> <label> N/A" when a rung does not apply to this recipe. - "L<k> <label> N/A" when a rung does not apply to this recipe.
Returns the reason for the FIRST rung that stopped the climb (the binding constraint). Returns the reason for the FIRST rung that stopped the climb (the binding constraint).

View File

@ -2,7 +2,14 @@
Turns a run's per-tier pytest outcomes into a single `results.json` artifact carrying, per the plan: Turns a run's per-tier pytest outcomes into a single `results.json` artifact carrying, per the plan:
{ recipe, version, pr, ref, run_id, finished, stages:[{name,status,tests:[{name,status,ms}]}], { recipe, version, pr, ref, run_id, finished, stages:[{name,status,tests:[{name,status,ms}]}],
level, level_cap_reason, rungs, flags:{clean_teardown,no_secret_leak}, screenshot, summary_card } level, level_cap_reason, level_cap_rung, rungs,
skips:{intentional:{rung:reason}, unintentional:[rung]},
flags:{clean_teardown,no_secret_leak}, screenshot, summary_card }
`skips` splits the N/A (skipped) rungs by a simple rule: a skip is INTENTIONAL iff the recipe lists
it (with a reason) in `recipe_meta.EXPECTED_NA = {rung: reason}`; any rung skipped but not listed is
UNINTENTIONAL (a coverage gap to fill or declare). Skips still cap the level either way — the harness
never claims a rung it did not verify; this only labels *why* a skip happened.
The per-test breakdown comes from JUnit XML emitted by each tier's pytest invocation (`--junitxml`), The per-test breakdown comes from JUnit XML emitted by each tier's pytest invocation (`--junitxml`),
parsed here with the stdlib (no new dep). The integer **level** is computed by harness.level from a parsed here with the stdlib (no new dep). The integer **level** is computed by harness.level from a
@ -127,41 +134,24 @@ def collect_stages(records: list[dict]) -> list[dict]:
return stages return stages
def _has_repo_local(records: list[dict]) -> bool:
return any(r.get("source") == "repo-local" for r in records)
def _repo_local_passed(records: list[dict]) -> bool:
repo = [r for r in records if r.get("source") == "repo-local"]
return bool(repo) and all(r.get("rc", 1) == 0 for r in repo)
def derive_rungs( def derive_rungs(
results: dict[str, str], results: dict[str, str],
*, *,
backup_capable: bool, backup_capable: bool,
declared: list[str] | None,
deps_ready: bool,
sso_unverified: bool,
has_custom: bool, has_custom: bool,
has_repo_local: bool,
repo_local_passed: bool,
) -> dict[str, str]: ) -> dict[str, str]:
"""Translate the orchestrator's tier results + deps/SSO signals into the rung-status dict """Translate the orchestrator's tier results into the rung-status dict harness.level consumes —
harness.level consumes. Documented in DECISIONS.md (Phase 3). Conservative by design — never the FOUR essential rungs only. Conservative by design — never reports a rung 'pass' it can't
reports a rung 'pass' it can't substantiate (cardinal guardrail: presentation never inflates). substantiate (cardinal guardrail: presentation never inflates).
L1 install : install tier pass. L1 install : install tier pass.
L2 upgrade : upgrade tier (skip → N/A: only one published version). L2 upgrade : upgrade tier (skip → N/A: only one published version).
L3 backup/res : backup AND restore tiers pass (N/A if not backup-capable). L3 backup/res : backup AND restore tiers pass (N/A if not backup-capable).
L4 functional : the recipe-specific functional (non-deps) tests pass — the custom tier, minus L4 functional : recipe-specific functional tests pass — the custom tier. N/A if none ran.
its SSO/integration tests. N/A if the recipe has no custom tests at all.
L5 integration: SSO/OIDC + cross-app. Applies ONLY if the recipe declares deps (else N/A — the Integration (SSO/OIDC) and recipe-local are OPTIONAL and intentionally NOT rungs here — they
"no integration surface caps at L4" rule, §4.1). pass iff deps wired never cap the level (SSO is still enforced for the run VERDICT in run_recipe_ci.py).
(deps_ready) and not sso_unverified and the custom tier didn't fail.
L6 recipe-loc : the recipe repo's own tests/ (repo-local source) ran and passed (N/A if none).
""" """
declared = declared or []
rungs: dict[str, str] = {} rungs: dict[str, str] = {}
rungs["install"] = level_mod.tier_to_rung(results.get("install")) rungs["install"] = level_mod.tier_to_rung(results.get("install"))
rungs["upgrade"] = level_mod.tier_to_rung(results.get("upgrade")) rungs["upgrade"] = level_mod.tier_to_rung(results.get("upgrade"))
@ -170,36 +160,34 @@ def derive_rungs(
) )
custom = results.get("custom") custom = results.get("custom")
# Functional rung (L4): the non-deps custom tests.
if not has_custom or custom == "skip" or custom is None: if not has_custom or custom == "skip" or custom is None:
rungs["functional"] = "na" rungs["functional"] = "na"
elif custom == "fail": elif custom == "fail":
# A custom test failed. With declared deps we cannot cheaply tell functional-vs-SSO apart, so
# conservatively fail the functional rung (caps at L3) — never inflate.
rungs["functional"] = "fail" rungs["functional"] = "fail"
else: # custom == "pass" else: # custom == "pass"
rungs["functional"] = "pass" rungs["functional"] = "pass"
# Integration rung (L5): only recipes with an SSO/integration surface (declared deps) can climb.
if not declared:
rungs["integration"] = "na"
elif sso_unverified or not deps_ready or custom == "fail":
# SSO not wired/verified, or a custom test failed → integration not verified.
rungs["integration"] = "fail"
elif custom == "pass":
rungs["integration"] = "pass"
else:
# declared deps but no custom tests ran — can't claim integration verified
rungs["integration"] = "na"
# Recipe-local rung (L6).
if not has_repo_local:
rungs["recipe_local"] = "na"
else:
rungs["recipe_local"] = "pass" if repo_local_passed else "fail"
return rungs return rungs
def skips(rungs: dict[str, str], expected_na: dict | None) -> dict:
"""Split the SKIPPED (N/A) rungs into intentional vs unintentional (operator model).
A recipe lists the rungs it intentionally skips, each with a reason, in
`recipe_meta.EXPECTED_NA = {rung: reason}`. The rule is dead simple: a skipped rung is
**intentional** iff it is in that list; any rung that is skipped and NOT in the list is
**unintentional** (a coverage gap someone should either fill or declare). N/A still caps the
level either way — the harness never claims a rung it did not verify — this only labels *why* a
skip happened. Returns:
{ "intentional": {rung: reason, ...}, # skipped AND declared in EXPECTED_NA
"unintentional": [rung, ...] } # skipped but NOT declared
"""
expected = {str(k): str(v) for k, v in (expected_na or {}).items()}
na = [r for r, st in rungs.items() if st == "na"]
intentional = {r: expected[r] for r in na if r in expected}
unintentional = sorted(r for r in na if r not in expected)
return {"intentional": intentional, "unintentional": unintentional}
def build_results( def build_results(
*, *,
recipe: str, recipe: str,
@ -209,30 +197,24 @@ def build_results(
records: list[dict], records: list[dict],
results: dict[str, str], results: dict[str, str],
backup_capable: bool, backup_capable: bool,
declared: list[str] | None,
deps_ready: bool,
sso_unverified: bool,
clean_teardown: bool, clean_teardown: bool,
no_secret_leak: bool, no_secret_leak: bool,
finished_ts: float | None, finished_ts: float | None,
screenshot: str | None = None, screenshot: str | None = None,
summary_card: str | None = None, summary_card: str | None = None,
expected_na: dict | None = None,
) -> dict: ) -> dict:
"""Assemble the full results.json dict (no I/O). `finished_ts` is passed in (the orchestrator """Assemble the full results.json dict (no I/O). `finished_ts` is passed in (the orchestrator
stamps it) so this stays pure and deterministic for unit tests.""" stamps it) so this stays pure and deterministic for unit tests. `expected_na` is the recipe's
declared intentional-skip map (recipe_meta.EXPECTED_NA) used to distinguish a deliberate skip from
accidentally-missing coverage."""
stages = collect_stages(records) stages = collect_stages(records)
has_custom = any(r["tier"] == "custom" for r in records) has_custom = any(r["tier"] == "custom" for r in records)
rungs = derive_rungs( rungs = derive_rungs(results, backup_capable=backup_capable, has_custom=has_custom)
results,
backup_capable=backup_capable,
declared=declared,
deps_ready=deps_ready,
sso_unverified=sso_unverified,
has_custom=has_custom,
has_repo_local=_has_repo_local(records),
repo_local_passed=_repo_local_passed(records),
)
lvl, cap_reason = level_mod.compute_level(rungs) lvl, cap_reason = level_mod.compute_level(rungs)
# The rung that capped the climb (lowest non-pass), or None on a full climb — lets a consumer
# (card/badge) tell whether the cap was an intentional skip, an unintentional one, or a failure.
capped = level_mod.RUNGS[lvl] if cap_reason else None
return { return {
"schema": 1, "schema": 1,
"run_id": run_id(), "run_id": run_id(),
@ -243,7 +225,9 @@ def build_results(
"finished": finished_ts, "finished": finished_ts,
"level": lvl, "level": lvl,
"level_cap_reason": cap_reason, "level_cap_reason": cap_reason,
"level_cap_rung": capped,
"rungs": rungs, "rungs": rungs,
"skips": skips(rungs, expected_na),
"stages": stages, "stages": stages,
"results": results, "results": results,
"flags": { "flags": {

View File

@ -200,6 +200,7 @@ def _load_meta(recipe: str) -> dict:
for k in list(meta) + [ for k in list(meta) + [
"BACKUP_CAPABLE", "BACKUP_CAPABLE",
"SKIP_GENERIC", "SKIP_GENERIC",
"EXPECTED_NA",
"OIDC_AT_INSTALL", "OIDC_AT_INSTALL",
"READY_PROBE", "READY_PROBE",
"UPGRADE_BASE_VERSION", "UPGRADE_BASE_VERSION",
@ -1224,7 +1225,6 @@ def main() -> int:
# a failure here NEVER changes `overall` (R7 — cosmetics never block the pipeline). ---- # a failure here NEVER changes `overall` (R7 — cosmetics never block the pipeline). ----
data: dict | None = None data: dict | None = None
try: try:
sso_unverified = sso_dep_unverified(declared, deps_ready, requires_deps_skipped)
clean_teardown = (deploy_count == expected_deploy_count) and not dep_teardown_error clean_teardown = (deploy_count == expected_deploy_count) and not dep_teardown_error
data = results_mod.build_results( data = results_mod.build_results(
recipe=recipe, recipe=recipe,
@ -1234,13 +1234,11 @@ def main() -> int:
records=records, records=records,
results=results, results=results,
backup_capable=backup_cap, backup_capable=backup_cap,
declared=declared,
deps_ready=deps_ready,
sso_unverified=sso_unverified,
clean_teardown=clean_teardown, clean_teardown=clean_teardown,
no_secret_leak=True, # narrowed below by an actual scan of the serialised artifact no_secret_leak=True, # narrowed below by an actual scan of the serialised artifact
screenshot=screenshot_rel, # Phase 3 U1 (R4): relative PNG name iff capture succeeded screenshot=screenshot_rel, # Phase 3 U1 (R4): relative PNG name iff capture succeeded
finished_ts=time.time(), finished_ts=time.time(),
expected_na=meta.get("EXPECTED_NA"), # declared intentional-skip map (recipe_meta)
) )
# Real (if narrow) leak check: no known infra-secret value may appear in the artifact (R7). # Real (if narrow) leak check: no known infra-secret value may appear in the artifact (R7).
blob = json.dumps(data) blob = json.dumps(data)
@ -1257,6 +1255,15 @@ def main() -> int:
f"{'' + data['level_cap_reason'] if data['level_cap_reason'] else ''})", f"{'' + data['level_cap_reason'] if data['level_cap_reason'] else ''})",
flush=True, flush=True,
) )
# Surface UNINTENTIONAL skips in the CI log (non-blocking, R7): a rung that was skipped (N/A)
# but is not in the recipe's intentional list — either add the missing coverage or declare it.
for rung in data.get("skips", {}).get("unintentional", []):
print(
f"⚠ coverage: rung '{rung}' was skipped (N/A) but is not declared intentional — add "
f"the missing test/label, or list it in tests/{recipe}/recipe_meta.py "
f"EXPECTED_NA = {{'{rung}': '<why>'}}.",
flush=True,
)
except Exception as e: # noqa: BLE001 — results assembly is cosmetic; never fail a run on it (R7) except Exception as e: # noqa: BLE001 — results assembly is cosmetic; never fail a run on it (R7)
print( print(
f"!! results.json assembly failed (non-fatal, verdict unaffected): {_scrub(str(e))}", f"!! results.json assembly failed (non-fatal, verdict unaffected): {_scrub(str(e))}",
@ -1275,8 +1282,19 @@ def main() -> int:
with open(html_path, "w", encoding="utf-8") as f: with open(html_path, "w", encoding="utf-8") as f:
f.write(card_mod.render_card_html(data, screenshot_rel=data.get("screenshot"))) f.write(card_mod.render_card_html(data, screenshot_rel=data.get("screenshot")))
png = card_mod.render_card_png(html_path, os.path.join(run_artifact_dir, "summary.png")) png = card_mod.render_card_png(html_path, os.path.join(run_artifact_dir, "summary.png"))
capped = data.get("level_cap_rung")
sk = data.get("skips", {})
cap_skip = (
"intentional" if capped in (sk.get("intentional") or {})
else "unintentional" if capped in (sk.get("unintentional") or [])
else ""
)
with open(os.path.join(run_artifact_dir, "badge.svg"), "w", encoding="utf-8") as f: with open(os.path.join(run_artifact_dir, "badge.svg"), "w", encoding="utf-8") as f:
f.write(card_mod.level_badge_svg(data["level"], data.get("level_cap_reason", ""))) f.write(
card_mod.level_badge_svg(
data["level"], data.get("level_cap_reason", ""), cap_skip
)
)
print( print(
f"summary card {'rendered ' + png if png else '(PNG render unavailable)'} + " f"summary card {'rendered ' + png if png else '(PNG render unavailable)'} + "
f"badge.svg written into {run_artifact_dir}", f"badge.svg written into {run_artifact_dir}",

View File

@ -0,0 +1,87 @@
"""custom-html-tiny — recipe-specific functional test (static-web-server).
Proves the deployed static-web-server is *actually serving files from its `content` volume* with real
file-server semantics, not merely returning 200 from a Traefik fallback or a generic stub:
1. exact-byte round-trip — write a uniquely-named file with random content into the served volume,
fetch it over HTTPS, and assert the bytes come back verbatim. Non-vacuous: the content is random
per run, so only a server that reads this file off the volume can pass.
2. real 404 — a random non-existent path returns 404, proving directory/file semantics (a
200-everything stub or mis-routed host would not 404).
The recipe's image (joseluisq/static-web-server) is shell-less (scratch-based) and its content volume
is seeded via the install_steps.sh host-mountpoint mechanism — so this test writes its probe file the
same way (resolve the swarm volume's mountpoint with `docker volume inspect`, write directly) rather
than `docker exec`-ing in a container that has no shell.
Runs in the custom tier against the shared post-install deployment (the `live_app` fixture is its
per-run domain). Mirrors install_steps.sh: the app's content volume is named `<stack>_content`, where
`stack` is the domain with dots replaced by underscores; HTTP_SUBDIR is empty, so the volume root is
served at `/`.
"""
from __future__ import annotations
import contextlib
import os
import ssl
import subprocess
import urllib.error
import urllib.request
import uuid
def _served_dir(domain: str) -> str:
"""Host mountpoint of the app's served `content` volume (same naming as install_steps.sh)."""
vol = f"{domain.replace('.', '_')}_content"
out = subprocess.run(
["docker", "volume", "inspect", vol, "--format", "{{.Mountpoint}}"],
capture_output=True,
text=True,
check=True,
)
mountpoint = out.stdout.strip()
assert mountpoint, f"could not resolve mountpoint for volume {vol!r}"
return mountpoint
def _get(url: str) -> tuple[int, bytes]:
"""GET the URL; return (status, body). A 4xx/5xx is returned, not raised (we assert on the code).
TLS verification is relaxed: the served wildcard cert is validated separately by the infra check;
here we care only about the app's response."""
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
try:
with urllib.request.urlopen(url, timeout=20, context=ctx) as resp:
return resp.status, resp.read()
except urllib.error.HTTPError as e:
return e.code, e.read()
def test_static_file_roundtrip_and_404(live_app):
"""Write a random file into the served volume → fetch it → bytes match; and a missing path 404s."""
served = _served_dir(live_app)
token = uuid.uuid4().hex
name = f"ccci-probe-{token}.txt"
body = f"cc-ci-functional-{token}\n".encode()
path = os.path.join(served, name)
with open(path, "wb") as fh:
fh.write(body)
try:
status, got = _get(f"https://{live_app}/{name}")
assert status == 200, f"served probe file returned {status} (expected 200)"
assert got == body, (
f"content round-trip mismatch: served {got!r}, wrote {body!r} "
"(static-web-server not serving the content volume?)"
)
# A random non-existent path must 404 — proves real static-file semantics, distinguishing a
# working server from a 200-everything stub or a mis-routed Traefik fallback.
miss_status, _ = _get(f"https://{live_app}/ccci-missing-{uuid.uuid4().hex}.txt")
assert miss_status == 404, (
f"missing path returned {miss_status} (expected 404 — generic 200-returner / mis-route?)"
)
finally:
with contextlib.suppress(OSError):
os.remove(path)

View File

@ -3,3 +3,14 @@
# (DG5) is detected quickly instead of waiting the default 300s HTTP timeout. # (DG5) is detected quickly instead of waiting the default 300s HTTP timeout.
DEPLOY_TIMEOUT = 120 DEPLOY_TIMEOUT = 120
HTTP_TIMEOUT = 90 HTTP_TIMEOUT = 90
# Rungs this recipe INTENTIONALLY skips, each with a reason. Any essential rung skipped (N/A) and NOT
# listed here is reported as an *unintentional* skip (a coverage gap to fill or declare). A skip still
# caps the level either way — the harness never claims a rung it did not verify; this only records
# that the skip is deliberate. (The level ladder is the four essential rungs install/upgrade/
# backup_restore/functional; integration + recipe-local are optional and not leveled.)
# custom-html-tiny is a stateless static-web-server, so it has no backup surface:
EXPECTED_NA = {
"backup_restore": "stateless static file server: serves an ephemeral content volume seeded at "
"deploy, with no persistent/user data to back up or restore (no backupbot.backup label)",
}

View File

@ -14,7 +14,7 @@ sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner")
from harness import card as C # noqa: E402 from harness import card as C # noqa: E402
def _data(level=4, cap="L5 integration (SSO/OIDC + cross-app) N/A"): def _data(level=3, cap="L4 functional (recipe-specific tests) N/A"):
return { return {
"recipe": "uptime-kuma", "recipe": "uptime-kuma",
"version": "1.23.0", "version": "1.23.0",
@ -51,6 +51,35 @@ def test_badge_svg_wellformed():
assert svg.startswith("<svg") and svg.endswith("</svg>") assert svg.startswith("<svg") and svg.endswith("</svg>")
assert "level 4" in svg assert "level 4" in svg
assert C.level_color(4) in svg assert C.level_color(4) in svg
# plain cap (no intent) → two-box badge, no third segment
assert "expected" not in svg and "gap?" not in svg
def test_badge_svg_differentiates_intentional_vs_unintentional_skip():
# an intentional (declared) skip capped the climb → muted "expected" third segment
exp = C.level_badge_svg(2, "L3 backup/restore N/A", "intentional")
assert "level 2" in exp and "expected" in exp and C.EXPECT_COLOR in exp
assert "gap?" not in exp
# an unintentional skip (not declared) → amber "gap?" third segment
gap = C.level_badge_svg(2, "L3 backup/restore N/A", "unintentional")
assert "level 2" in gap and "gap?" in gap and C.GAP_COLOR in gap
assert "expected" not in gap
def test_skip_rows_intentional_and_unintentional():
html_out = C._skip_rows(
{"intentional": {"backup_restore": "no persistent data"}, "unintentional": ["functional"]}
)
# intentional skip: labelled row (muted green) + the reason on its own line
assert "intentional skip" in html_out and C.SKIP_GREEN in html_out
assert "backup/restore" in html_out and "no persistent data" in html_out
# unintentional skip: amber row + prompt to declare/add coverage
assert "unintentional skip" in html_out and C.GAP_COLOR in html_out
assert "functional" in html_out and "EXPECTED_NA" in html_out
def test_skip_rows_empty_when_no_skips():
assert C._skip_rows({"intentional": {}, "unintentional": []}) == ""
def test_card_html_reports_level_verbatim(): def test_card_html_reports_level_verbatim():

View File

@ -24,7 +24,7 @@ import dashboard # noqa: E402
def _row(**kw): def _row(**kw):
base = { base = {
"recipe": "custom-html", "status": "success", "number": 4, "ref": "db9a9502", "recipe": "custom-html", "status": "success", "number": 4, "ref": "db9a9502",
"version": "db9a95024e9d", "level": 4, "level_cap_reason": "L5 integration N/A", "version": "db9a95024e9d", "level": 4, "level_cap_reason": "",
"has_screenshot": True, "flags": {"clean_teardown": True, "no_secret_leak": True}, "has_screenshot": True, "flags": {"clean_teardown": True, "no_secret_leak": True},
"finished": 0, "url": "https://drone.x/cc-ci/4", "finished": 0, "url": "https://drone.x/cc-ci/4",
} }

View File

@ -19,33 +19,23 @@ def _rungs(
upgrade="pass", upgrade="pass",
backup_restore="pass", backup_restore="pass",
functional="pass", functional="pass",
integration="pass",
recipe_local="pass",
): ):
return { return {
"install": install, "install": install,
"upgrade": upgrade, "upgrade": upgrade,
"backup_restore": backup_restore, "backup_restore": backup_restore,
"functional": functional, "functional": functional,
"integration": integration,
"recipe_local": recipe_local,
} }
# ---- the U0 gate: L4-pass and L2-cap ---- # ---- the ladder: four essential rungs, top is L4 (functional) ----
def test_full_clean_climb_to_L6(): def test_full_clean_climb_to_L4():
# All four essential rungs pass → L4 (the top; integration/recipe-local are optional, not leveled).
lvl, reason = L.compute_level(_rungs()) lvl, reason = L.compute_level(_rungs())
assert lvl == 6
assert reason == ""
def test_climbs_through_L4_then_no_integration_surface_caps_at_L4():
# GATE: a recipe whose functional tests pass but has no SSO/integration surface caps at L4.
lvl, reason = L.compute_level(_rungs(integration="na", recipe_local="na"))
assert lvl == 4 assert lvl == 4
assert "L5" in reason and "N/A" in reason assert reason == ""
def test_fails_at_L2_capped_at_L1(): def test_fails_at_L2_capped_at_L1():
@ -69,34 +59,27 @@ def test_install_fail_is_L0():
def test_higher_pass_does_not_rescue_lower_na(): def test_higher_pass_does_not_rescue_lower_na():
# backup/restore N/A (stateless app) caps at L2 even though functional would pass. # backup/restore N/A (stateless app) caps at L2 even though functional would pass.
lvl, reason = L.compute_level(_rungs(backup_restore="na", functional="pass", integration="na")) lvl, reason = L.compute_level(_rungs(backup_restore="na", functional="pass"))
assert lvl == 2 assert lvl == 2
assert "L3" in reason and "N/A" in reason assert "L3" in reason and "N/A" in reason
def test_upgrade_na_caps_at_L1(): def test_upgrade_na_caps_at_L1():
# only one published version → no upgrade possible → N/A caps at L1. # only one published version → no upgrade possible → N/A caps at L1 (upgrade is essential).
lvl, reason = L.compute_level(_rungs(upgrade="na")) lvl, reason = L.compute_level(_rungs(upgrade="na"))
assert lvl == 1 assert lvl == 1
assert "L2" in reason and "N/A" in reason assert "L2" in reason and "N/A" in reason
def test_integration_fail_caps_at_L4(): def test_functional_na_caps_at_L3():
# SSO declared but unverified (failed) → integration rung fails → cap at L4. # no recipe-specific functional tests → functional N/A caps at L3.
lvl, reason = L.compute_level(_rungs(integration="fail", recipe_local="na")) lvl, reason = L.compute_level(_rungs(functional="na"))
assert lvl == 4 assert lvl == 3
assert "L5" in reason and "FAILED" in reason assert "L4" in reason and "N/A" in reason
def test_recipe_local_na_caps_at_L5():
# SSO passes but no recipe-local tests → cap at L5 (L6 N/A).
lvl, reason = L.compute_level(_rungs(recipe_local="na"))
assert lvl == 5
assert "L6" in reason and "N/A" in reason
def test_functional_fail_caps_at_L3(): def test_functional_fail_caps_at_L3():
lvl, reason = L.compute_level(_rungs(functional="fail", integration="na")) lvl, reason = L.compute_level(_rungs(functional="fail"))
assert lvl == 3 assert lvl == 3
assert "L4" in reason and "FAILED" in reason assert "L4" in reason and "FAILED" in reason

View File

@ -105,83 +105,31 @@ def _results(**kw):
return base return base
def test_derive_rungs_full_stateful_sso(): def test_derive_rungs_full_climb_four_essential():
rungs = R.derive_rungs( rungs = R.derive_rungs(_results(), backup_capable=True, has_custom=True)
_results(), # only the four essential rungs — integration/recipe-local are optional, not produced here.
backup_capable=True,
declared=["keycloak"],
deps_ready=True,
sso_unverified=False,
has_custom=True,
has_repo_local=False,
repo_local_passed=False,
)
assert rungs == { assert rungs == {
"install": "pass", "install": "pass",
"upgrade": "pass", "upgrade": "pass",
"backup_restore": "pass", "backup_restore": "pass",
"functional": "pass", "functional": "pass",
"integration": "pass",
"recipe_local": "na",
} }
def test_derive_rungs_no_sso_surface_is_integration_na(): def test_derive_rungs_stateless_backup_and_functional_na():
rungs = R.derive_rungs(
_results(),
backup_capable=True,
declared=[],
deps_ready=True,
sso_unverified=False,
has_custom=True,
has_repo_local=False,
repo_local_passed=False,
)
assert rungs["integration"] == "na"
assert rungs["functional"] == "pass"
def test_derive_rungs_stateless_backup_na():
rungs = R.derive_rungs( rungs = R.derive_rungs(
_results(backup="skip", restore="skip", custom="skip"), _results(backup="skip", restore="skip", custom="skip"),
backup_capable=False, backup_capable=False,
declared=[],
deps_ready=True,
sso_unverified=False,
has_custom=False, has_custom=False,
has_repo_local=False,
repo_local_passed=False,
) )
assert rungs["backup_restore"] == "na" assert rungs["backup_restore"] == "na"
assert rungs["functional"] == "na" assert rungs["functional"] == "na"
assert "integration" not in rungs and "recipe_local" not in rungs
def test_derive_rungs_sso_unverified_is_integration_fail(): def test_derive_rungs_functional_fail():
rungs = R.derive_rungs( rungs = R.derive_rungs(_results(custom="fail"), backup_capable=True, has_custom=True)
_results(), assert rungs["functional"] == "fail"
backup_capable=True,
declared=["keycloak"],
deps_ready=False,
sso_unverified=True,
has_custom=True,
has_repo_local=False,
repo_local_passed=False,
)
assert rungs["integration"] == "fail"
def test_derive_rungs_repo_local_pass():
rungs = R.derive_rungs(
_results(),
backup_capable=True,
declared=[],
deps_ready=True,
sso_unverified=False,
has_custom=True,
has_repo_local=True,
repo_local_passed=True,
)
assert rungs["recipe_local"] == "pass"
# ---- build_results: end-to-end incl level + flags ---- # ---- build_results: end-to-end incl level + flags ----
@ -212,16 +160,13 @@ def test_build_results_level_and_flags(tmp_path):
records=recs, records=recs,
results=_results(), results=_results(),
backup_capable=True, backup_capable=True,
declared=[],
deps_ready=True,
sso_unverified=False,
clean_teardown=True, clean_teardown=True,
no_secret_leak=True, no_secret_leak=True,
finished_ts=1234.0, finished_ts=1234.0,
) )
# stateful, functional pass, no SSO surface, no repo-local → caps at L4 # all four essential rungs pass → full climb to L4 (the top), no cap
assert data["level"] == 4 assert data["level"] == 4
assert "L5" in data["level_cap_reason"] assert data["level_cap_reason"] == ""
assert data["recipe"] == "hedgedoc" assert data["recipe"] == "hedgedoc"
assert data["ref"] == "deadbeefcafe" assert data["ref"] == "deadbeefcafe"
assert data["flags"] == {"clean_teardown": True, "no_secret_leak": True} assert data["flags"] == {"clean_teardown": True, "no_secret_leak": True}
@ -246,9 +191,6 @@ def test_build_results_capped_at_L1_on_upgrade_fail(tmp_path):
records=recs, records=recs,
results=_results(upgrade="fail"), results=_results(upgrade="fail"),
backup_capable=True, backup_capable=True,
declared=[],
deps_ready=True,
sso_unverified=False,
clean_teardown=True, clean_teardown=True,
no_secret_leak=True, no_secret_leak=True,
finished_ts=0.0, finished_ts=0.0,
@ -257,6 +199,85 @@ def test_build_results_capped_at_L1_on_upgrade_fail(tmp_path):
assert "L2" in data["level_cap_reason"] assert "L2" in data["level_cap_reason"]
# ---- skips: intentional (declared) vs unintentional (everything else skipped) ----
def _rungs(**kw):
base = {
"install": "pass",
"upgrade": "pass",
"backup_restore": "pass",
"functional": "pass",
}
base.update(kw)
return base
def test_skips_intentional_vs_unintentional():
rungs = _rungs(backup_restore="na", functional="na")
sk = R.skips(rungs, {"backup_restore": "stateless static server"})
# backup_restore is declared (intentional, with reason); functional skipped but not declared.
assert sk["intentional"] == {"backup_restore": "stateless static server"}
assert sk["unintentional"] == ["functional"]
def test_skips_none_declared_all_unintentional():
rungs = _rungs(backup_restore="na")
sk = R.skips(rungs, None)
assert sk["intentional"] == {}
assert sk["unintentional"] == ["backup_restore"]
def test_skips_declaration_only_counts_when_actually_skipped():
# backup_restore actually ran (pass) → not a skip, so a declaration for it is simply inert.
rungs = _rungs(backup_restore="pass")
sk = R.skips(rungs, {"backup_restore": "reason"})
assert "backup_restore" not in sk["intentional"]
assert "backup_restore" not in sk["unintentional"]
def test_build_results_threads_expected_na(tmp_path):
# Mirrors custom-html-tiny post-change: install + a passing functional (custom) test, but no
# backup surface (backup_restore declared intentionally skipped).
recs = [
{
"tier": "install",
"source": "generic",
"file": "g/test_install.py",
"rc": 0,
"junit": _write(tmp_path, "i.xml", JUNIT_PASS),
},
{
"tier": "custom",
"source": "cc-ci",
"file": "c/test_serves_content.py",
"rc": 0,
"junit": _write(tmp_path, "c.xml", JUNIT_PASS),
},
]
data = R.build_results(
recipe="custom-html-tiny",
version="1.1.0",
pr="0",
ref=None,
records=recs,
results=_results(backup="skip", restore="skip"), # custom=pass (default) → functional pass
backup_capable=False, # no backupbot label → backup_restore skipped (N/A)
clean_teardown=True,
no_secret_leak=True,
finished_ts=0.0,
expected_na={"backup_restore": "stateless static file server"},
)
# backup_restore skip still caps at L2 (never inflates) — even though functional passes above it,
# the skip caps the climb — but it's the declared (intentional) rung that capped.
assert data["level"] == 2
assert "L3" in data["level_cap_reason"]
assert data["level_cap_rung"] == "backup_restore"
assert data["rungs"]["functional"] == "pass"
assert data["skips"]["intentional"]["backup_restore"] == "stateless static file server"
assert data["skips"]["unintentional"] == [] # backup_restore declared; functional passed → clean
def test_write_results_roundtrip(tmp_path): def test_write_results_roundtrip(tmp_path):
data = {"run_id": "42", "level": 3, "stages": []} data = {"run_id": "42", "level": 3, "stages": []}
path = R.write_results(data, runs_dir_override=str(tmp_path)) path = R.write_results(data, runs_dir_override=str(tmp_path))