diff --git a/JOURNAL-lvl5.md b/JOURNAL-lvl5.md index 79c1a10..f6f9c1b 100644 --- a/JOURNAL-lvl5.md +++ b/JOURNAL-lvl5.md @@ -6,14 +6,3 @@ - Notable: card.py LEVEL_COLOR already has keys 0-6 (5=green, 6=bright green) — only 0-4 reachable today; dashboard._LEVEL_COLOR needs checking for the same. - Lint context: abra.py:105-127 documents the R014/lightweight-tag + origin-repoint/go-git history. Per-run recipe tree = $ABRA_DIR/recipes/, origin = private mirror (SRC) on PR runs, upstream tags fetched in by fetch_recipe. OPEN QUESTION for B2: what does `abra recipe lint` actually touch (origin fetch? auth? R014 against which tags?) — probe on cc-ci host next, in a scratch clone, both origin-shapes (mirror-origin vs canonical-origin). - Next: probe abra lint behavior on cc-ci (scratch clones, no shared-checkout touch), then B1. - -## 2026-06-11 abra lint probe (B2 design input) — all on cc-ci, scratch ABRA_DIR=/tmp/lvl5-lint-probe/abra -- `abra recipe lint hedgedoc` (fresh canonical clone): FATA "inappropriate ioctl for device" rc=1 — needs a PTY even with `-n`. Under `script -qec "abra recipe lint -n hedgedoc" /dev/null`: rc=0, 21-line unicode table R001–R016 (cols: ref|rule|severity|satisfied ✅/❌|skipped|how-to-fix), maxlen 146 no wrapping, wall time 0.7s. -- rc SEMANTICS: rc≠0 ONLY on FATA (cannot lint). Probes: - - rm .env.sample + commit → rc=1 FATA "unable to validate recipe: .env.sample ... no such file" (content-attributable FATA). - - lightweight tag added → table renders R014 error ❌, final line `WARN critical errors present in config`, **rc=0**. So pass/fail MUST be parsed from the table (error-severity ❌ rows), sentinel line as cross-check. Baseline warn-only ❌ (R015) → NO sentinel, rc=0 → pass. - - untracked compose.ccci.yml (CI overlay) in tree → FATA "version mismatched between two composefiles" rc=1 — abra lint globs compose*.yml INCLUDING untracked harness overlays ⇒ lint MUST run on a pristine clone of the exact ref, not the deploy tree. - - origin repointed to auth-required mirror URL → rc=1 FATA "unable to fetch tags in ...: repository not found" — lint force-fetches tags from origin ⇒ scratch clone's origin must be fetchable without auth. Cloning FROM the per-run tree (local path origin) satisfies this offline and preserves the run's true tag set (fetch_recipe pulls upstream tags into the per-run tree). -- run_quick emits no results.json/card (build_results only at run_recipe_ci.py:1248, cold path) → lint rung wiring is full-path only. -- Executor design settled (DECISIONS.md entry to come with B2): scratch ABRA_DIR (recipes/ = `git clone ` + `checkout -f `; catalogue/servers symlinks to canonical), `script -qec "abra recipe lint -n "`, hard 60s timeout, full output → lint.txt artifact, parse table rows; status = fail iff any error-severity row ❌(not skipped) or content-attributable FATA ("unable to validate recipe"); pass iff table rendered & no error-row ❌; anything else (timeout, abra missing, fetch FATA, unparseable) → unver + loud log. No rule filtering needed (mirror pollution solved by context, not by ignoring rules). -- Tier-skip sources mapped for derive_rungs classification (run_recipe_ci.py:1040-1131): upgrade skip ⟺ `prev` falsy ("only one published version", structural-intentional) given install passed; backup/restore skip ⟺ not backup_cap (structural-intentional); install-fail → downstream tiers skip (unintentional); custom skip ⟺ no custom tests (unintentional unless EXPECTED_NA declares functional); tier absent from `stages` (CCCI_STAGES dev escape) → missing key (unintentional). diff --git a/dashboard/dashboard.py b/dashboard/dashboard.py index 18fcf14..48de3d3 100644 --- a/dashboard/dashboard.py +++ b/dashboard/dashboard.py @@ -38,7 +38,6 @@ _RUN_FILES = { "screenshot.png": "image/png", "badge.svg": "image/svg+xml", "summary.html": "text/html; charset=utf-8", - "lint.txt": "text/plain; charset=utf-8", } _RUN_ID_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9._-]*$") @@ -72,7 +71,8 @@ _LEVEL_COLOR = { 2: "#e0823d", 3: "#d9b343", 4: "#a0b93f", - 5: "#3fb950", # bright green — full 5-rung climb incl. lint (phase lvl5) + 5: "#57ab5a", + 6: "#3fb950", } @@ -152,6 +152,7 @@ def _build_row(b): "ref": ref[:8], "version": res.get("version") or ref[:12] or "—", "level": res.get("level"), + "level_cap_reason": res.get("level_cap_reason") or "", "has_screenshot": bool(res.get("screenshot")), "flags": res.get("flags") or {}, "finished": b.get("finished") or 0, @@ -219,6 +220,7 @@ a{color:#58a6ff;text-decoration:none} a:hover{text-decoration:underline} .name{font-weight:700;font-size:1.05rem;color:#e6edf3} .row{display:flex;align-items:center;gap:.5rem;flex-wrap:wrap;font-size:.82rem} .pill{color:#fff;padding:.08rem .5rem;border-radius:.5rem;font-size:.75rem;font-weight:600} +.cap{color:#8b949e;font-size:.75rem} code{background:#0d1117;border:1px solid #21262d;border-radius:.3rem;padding:0 .3rem;font-size:.78rem;color:#c9d1d9} .flags{display:flex;gap:.4rem;font-size:.72rem;color:#8b949e} .foot{margin-top:auto;display:flex;justify-content:space-between;font-size:.8rem;padding-top:.3rem;border-top:1px solid #21262d} @@ -272,12 +274,17 @@ def _card(r): f'' f'no screenshot{_level_pill(r["level"])}' ) + cap = ( + f'
{html.escape(r["level_cap_reason"])}
' + if r["level_cap_reason"] + else "" + ) return ( f'
{shot}
' f'
{html.escape(r["recipe"])}
' f'
{html.escape(r["status"])}' f'{html.escape(r["version"])}
' - f"{_flags_html(r['flags'])}" + f"{cap}{_flags_html(r['flags'])}" f'' f"
" diff --git a/runner/harness/card.py b/runner/harness/card.py index 562f8ba..28be566 100644 --- a/runner/harness/card.py +++ b/runner/harness/card.py @@ -21,24 +21,23 @@ from __future__ import annotations import html import os -# Level → colour ramp (YunoHost-ish): red at the floor, climbing to green at the top (L5 = full -# clean climb incl. lint — phase lvl5). +# Level → colour ramp (YunoHost-ish): red at the floor, climbing to green at the top. LEVEL_COLOR = { 0: "#e5534b", # red — install failed 1: "#e0823d", # orange 2: "#e0823d", 3: "#d9b343", # amber - 4: "#a0b93f", # yellow-green — above functional, lint not earned - 5: "#3fb950", # bright green — full climb (lint passed) + 4: "#a0b93f", # yellow-green + 5: "#57ab5a", # green + 6: "#3fb950", # bright green — full climb } -STATUS_MARK = {"pass": "✔", "fail": "✘", "skip": "–", "error": "✘", "na": "–", "unver": "⊘"} +STATUS_MARK = {"pass": "✔", "fail": "✘", "skip": "–", "error": "✘", "na": "–"} STATUS_COLOR = { "pass": "#3fb950", "fail": "#f85149", "error": "#f85149", "skip": "#8b949e", "na": "#8b949e", - "unver": "#d29922", # amber — exercised? no: should have run and wasn't verified } @@ -80,15 +79,44 @@ def render_badge_svg(label: str, message: str, color: str) -> str: ) -# Amber for UNVERIFIED rung rows in the table (a rung that should have run and wasn't checked). +# Third-segment colours for the level badge: amber = an UNINTENTIONAL skip (a rung skipped but not +# in the recipe's intentional list — likely missing coverage) capped the climb; muted = an +# INTENTIONAL skip (declared in recipe_meta.EXPECTED_NA — nothing to fix). Font-safe text labels +# (no emoji) so the SVG renders anywhere. GAP_COLOR = "#d29922" +EXPECT_COLOR = "#6e7681" -def level_badge_svg(level: int) -> str: - """Per-recipe/-run LEVEL badge: 'cc-ci | level N' coloured by level — NUMBER + COLOUR ONLY - (operator-specified, phase lvl5). 'Why isn't it higher' lives in the card's per-rung table, - never on the badge.""" - return render_badge_svg("cc-ci", f"level {int(level)}", level_color(level)) +def level_badge_svg(level: int, cap_reason: str = "", cap_skip: str = "") -> str: + """Per-recipe/-run LEVEL badge: 'cc-ci | level N' coloured by level (R6), with a THIRD segment + that differentiates *why* the climb stopped when a SKIP capped it (`cap_skip`): + - "unintentional" (a rung skipped but not in the recipe's intentional list): amber 'gap?'. + - "intentional" (a skip declared in recipe_meta.EXPECTED_NA): muted 'expected'. + - "" (clean cap / full climb / a real failure): no third segment (the level + card carry it). + The badge never inflates — it only annotates the cap the level already reflects.""" + label, msg = "cc-ci", f"level {int(level)}" + lw, mw = _text_width(label), _text_width(msg) + third: tuple[str, str] | None = None + if cap_skip == "unintentional": + third = ("gap?", GAP_COLOR) + elif cap_skip == "intentional": + third = ("expected", EXPECT_COLOR) + if third is None: + return render_badge_svg(label, msg, level_color(level)) + txt, tcolor = third + tw = _text_width(txt) + w = lw + mw + tw + return ( + f'' + f'' + f'' + f'' + f'' + f'{html.escape(label)}' + f'{html.escape(msg)}' + f'{html.escape(txt)}' + ) def _stage_rows(stages: list[dict]) -> str: @@ -113,13 +141,12 @@ def _stage_rows(stages: list[dict]) -> str: return "\n".join(rows) or 'no stages' -# Friendly rung labels for the skip/unverified rows (the five essential rungs). +# Friendly rung labels for the skip rows (the four essential rungs). RUNG_LABEL = { "install": "install", "upgrade": "upgrade", "backup_restore": "backup/restore", "functional": "functional", - "lint": "lint", } SKIP_GREEN = ( "#57ab5a" # muted green — an intentional skip reads like a pass (but labelled, never inflating) @@ -127,10 +154,9 @@ SKIP_GREEN = ( def _skip_rows(skips: dict) -> str: - """Render the non-run rungs as stage-like rows (phase lvl5 semantics). An INTENTIONAL skip - (declared/structural — the rung does not apply, the climb continues past it) is muted green - with its reason on the line below; an UNVERIFIED rung (should have run, wasn't checked — the - level cannot rise above it) is amber 'unverified'.""" + """Render SKIPPED rungs as stage-like rows. An intentional (declared) skip looks like a pass row + but its status says 'INTENTIONAL SKIP' (muted green) with the declared reason on the line below; + an unintentional skip is amber 'UNINTENTIONAL SKIP' with a prompt to add a test or declare it.""" rows = [] for rung, reason in (skips.get("intentional") or {}).items(): rows.append( @@ -145,11 +171,11 @@ def _skip_rows(skips: dict) -> str: rows.append( f'' f"{html.escape(RUNG_LABEL.get(rung, rung))}" - f'unverified' + f'unintentional skip' ) rows.append( - 'rung did not run / could not be ' - "checked — the level cannot rise above an unverified rung" + 'not declared in EXPECTED_NA — add the ' + "missing test/label, or declare the skip with a reason" ) return "\n".join(rows) @@ -158,15 +184,13 @@ def render_card_html(data: dict, screenshot_rel: str | None = "screenshot.png") """Build the summary-card HTML from a results.json dict. `screenshot_rel` is the relative path to the screenshot PNG (same dir as the card) — omitted from the card if None / absent. - The card shows exactly what the data says: recipe + version, the level, the per-stage/per-test - ✔/✘ table (+ skip/unverified rung rows — the SOLE carrier of "why isn't the level higher"), - the invariant flags, and the app screenshot. No computation here. Tolerates old (schema-1) - artifacts: the ladder height is read off the rungs the artifact actually has.""" + The card shows exactly what the data says: recipe + version, the level badge + cap reason, the + per-stage/per-test ✔/✘ table, the invariant flags, and the app screenshot. No computation here.""" recipe = html.escape(str(data.get("recipe", "?"))) version = html.escape(str(data.get("version") or data.get("ref") or "")) level = int(data.get("level", 0)) - # Old (pre-lvl5) artifacts have a 4-rung ladder — render their "of N" honestly. - ladder_top = 5 if "lint" in (data.get("rungs") or {}) else 4 + cap_reason = str(data.get("level_cap_reason") or "") + cap = html.escape(cap_reason) sk = data.get("skips", {}) or {} color = level_color(level) flags = data.get("flags", {}) or {} @@ -197,7 +221,7 @@ body{{margin:0;font-family:system-ui,-apple-system,Segoe UI,sans-serif;backgroun .lvl .num{{display:inline-block;min-width:64px;padding:.3rem .7rem;border-radius:10px; font-size:1.6rem;font-weight:700;color:#0d1117;background:{color}}} .lvl .lbl{{display:block;color:#8b949e;font-size:.72rem;text-transform:uppercase;margin-top:.2rem}} -.ladder{{padding:.4rem 1.3rem;color:#8b949e;font-size:.82rem;border-bottom:1px solid #21262d}} +.cap{{padding:.4rem 1.3rem;color:#8b949e;font-size:.82rem;border-bottom:1px solid #21262d}} .body{{display:flex;gap:1rem;padding:1rem 1.3rem}} .tbl{{flex:1}} table{{border-collapse:collapse;width:100%;font-size:.85rem}} @@ -214,12 +238,12 @@ tr.skipreason td{{color:#8b949e;font-size:.78rem;font-style:italic;padding-top:0 .shot.noshot{{display:flex;align-items:center;justify-content:center;height:225px;color:#8b949e;font-size:.85rem}} .flags{{display:flex;gap:.6rem;padding:.6rem 1.3rem 1rem}} .flag{{border:1px solid;border-radius:6px;padding:.15rem .5rem;font-size:.78rem;color:#c9d1d9}} -.ladder b{{color:#c9d1d9}} +.cap b{{color:#c9d1d9}}
{FLOWER_SVG}

{recipe}

{version}
{level}level
-
level {level} of {ladder_top}
+
{("capped: " + cap) if cap else "full clean climb — top level (4)"}
{rows}
{shot_html}
{"".join(flag_bits)}
""" diff --git a/runner/harness/level.py b/runner/harness/level.py index 1bc08b2..46f5ba6 100644 --- a/runner/harness/level.py +++ b/runner/harness/level.py @@ -1,67 +1,67 @@ -"""The level ladder — five rungs, no capping (phase lvl5, plan-phase-lvl5-lint-rung.md). +"""Phase 3 — the level ladder (plan-phase3-results-ux.md §4.1, R1). -A single integer **level** summarising how far up the quality ladder a recipe run climbed: +A single integer **level** summarising how far up the quality ladder a recipe run climbed, with +YunoHost semantics: **a gap caps the level** — you only earn level L if every rung 1..L was a clean +PASS. The first rung that is not a clean PASS (a real FAIL *or* genuinely N/A for this recipe) stops +the climb; `cap_reason` records why. This is deliberately conservative: presentation must NEVER make +a run look greener than its tests (plan §6 cardinal guardrail), so an N/A rung caps just like a fail +— with a recorded reason so the level is *fair*, not inflated. + +The ladder is the FOUR essential rungs every recipe is held to: L0 — install failed / app never became healthy. L1 — Installs: deploys + passes health/readiness. L2 — Upgrades: previous published version → PR version, stays healthy, data intact. L3 — Backup/restore: seeded data survives backup → wipe → restore. L4 — Functional: recipe-specific functional tests pass. - L5 — Lint: `abra recipe lint` passes against the exact ref under test. -Semantics (operator-decided 2026-06-11, recorded in DECISIONS.md — replaces the Phase-3 -"N/A caps" rule): +Integration (SSO/OIDC + cross-app) and recipe-local (the recipe repo's own tests/) are **OPTIONAL** +capabilities — they are NOT part of the level ladder and never cap it. They still run when present +(and SSO is still enforced for the run VERDICT via the deps/SSO checks in run_recipe_ci.py), but a +recipe without an SSO surface or without repo-local tests is simply not penalised on the level. - level = max i such that rung_i == "pass" and every rung j < i is "pass" or "skip"; 0 if none. +This module is PURE (no I/O) so it is cheaply unit-testable and the Adversary can re-run the unit +test cold (`cc-ci-run -m pytest tests/unit/test_level.py -q`). The orchestrator +(`run_recipe_ci.py`) is responsible for translating its raw per-tier results into the rung-status +dict this function consumes; that mapping is documented in DECISIONS.md (Phase 3). -A rung has one of FOUR statuses: - "pass" — exercised and passed. - "fail" — exercised and failed. Blocks: no rung above it can count. - "skip" — INTENTIONAL skip: the rung genuinely does not apply to this recipe, from a - declared or structural fact (not backup-capable; only one published version; - declared in recipe_meta.EXPECTED_NA). Does NOT stop the climb. - "unver" — UNINTENTIONAL not-verified: the rung SHOULD have run but didn't (infra error, - missing tool, harness exception, prior-stage abort, timeout). Blocks exactly - like a fail — the level never rises above a rung that wasn't actually checked. - -The per-rung table (results.json `rungs`, card, dashboard) is the SOLE carrier of "why isn't -this level higher" — there is no cap_reason. The classification of every N/A source into -skip-vs-unver lives in derive_rungs (results.py) and is tabulated in DECISIONS.md; anything -unclassifiable defaults to "unver" (conservative: never claim what wasn't checked). - -Integration (SSO/OIDC + cross-app) and recipe-local (the recipe repo's own tests/) remain -OPTIONAL capabilities — not rungs, never counted (SSO is still enforced for the run VERDICT -via the deps/SSO checks in run_recipe_ci.py). - -This module is PURE (no I/O) so it is cheaply unit-testable and the Adversary can re-run the -unit test cold (`cc-ci-run -m pytest tests/unit/test_level.py -q`). +Rung status vocabulary (each rung ∈ these three): + "pass" — the rung was exercised and passed. + "fail" — the rung was exercised and failed. + "na" — the rung does not apply to this recipe (e.g. only one published version → no upgrade; + not backup-capable). N/A is NOT a failure, but it DOES cap the climb (with a distinct + cap_reason) so the level never overstates what was actually verified. """ from __future__ import annotations -# The climbable rungs in ascending order. install (L1) is the foundation; L0 means install -# itself did not pass. These five are the ESSENTIAL rungs — integration/recipe-local are -# optional and deliberately NOT in this tuple. -RUNGS = ("install", "upgrade", "backup_restore", "functional", "lint") +# The climbable rungs in ascending order. install (L1) is the foundation; L0 means install itself +# did not pass. Each later rung requires every earlier rung to be a clean PASS. These four are the +# ESSENTIAL rungs — integration/recipe-local are optional and deliberately NOT in this tuple. +RUNGS = ("install", "upgrade", "backup_restore", "functional") -# Human-readable label per rung level, for the summary card / docs. +# Human-readable label per rung level, for cap_reason + the summary card. RUNG_LABEL = { 1: "install (deploy + health)", 2: "upgrade (prev published → PR)", 3: "backup/restore (data integrity)", 4: "functional (recipe-specific tests)", - 5: "lint (abra recipe lint)", } -VALID = {"pass", "fail", "skip", "unver"} +VALID = {"pass", "fail", "na"} -def compute_level(rungs: dict[str, str]) -> int: - """Map a rung-status dict → level 0..5. +def compute_level(rungs: dict[str, str]) -> tuple[int, str]: + """Map a rung-status dict → (level 0..4, cap_reason). - `rungs` must contain a status in VALID for every name in RUNGS. The level is the highest - i such that rungs[i] == "pass" and every rung below i is "pass" or "skip" (an intentional - skip does not stop the climb). A "fail" or "unver" rung blocks: rungs above it cannot - count, however green. 0 when no rung qualifies. + `rungs` must contain a status in {"pass","fail","na"} for every name in RUNGS. The level is the + highest L such that rungs[1..L] are all "pass"; the first non-"pass" rung caps the climb. L0 is + returned when the install rung itself is not "pass" (install failed / never healthy). + + cap_reason explains where the climb stopped: + - "" (empty) when the recipe earned the top rung (L4, full clean climb). + - "L