From 3595e80d085dce58f4ee137f79d549ea38c8a03d Mon Sep 17 00:00:00 2001 From: autonomic-bot Date: Wed, 17 Jun 2026 16:25:39 +0000 Subject: [PATCH] claim(M1): per-recipe history sourced from local /var/lib/cc-ci-runs artifacts (full history, not Drone 100-build slice) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit history_for() now enumerates run dirs' results.json, groups by recipe, sorts newest-first by finished timestamp (mixed numeric+named ids — timestamp is the only correct key), caps at HISTORY_CAP=30, skips malformed/empty/no-recipe dirs. Overview + badges + /runs + security guards + stdlib-only unchanged. Local verify: 13/13 unit tests; full-fixture vs 308 real results.json → bluesky-pds=8 in exact ts order, plausible capped 30 newest, edge dirs skipped. Co-Authored-By: Claude Opus 4.8 --- dashboard/dashboard.py | 91 ++++++++++++++++++++++++++++++++---- machine-docs/BACKLOG-dash.md | 14 ++++++ machine-docs/DECISIONS.md | 13 ++++++ machine-docs/JOURNAL-dash.md | 44 +++++++++++++++++ machine-docs/STATUS-dash.md | 68 +++++++++++++++++++++++++++ tests/unit/test_dashboard.py | 55 ++++++++++++++++++++++ 6 files changed, 277 insertions(+), 8 deletions(-) create mode 100644 machine-docs/BACKLOG-dash.md create mode 100644 machine-docs/JOURNAL-dash.md create mode 100644 machine-docs/STATUS-dash.md diff --git a/dashboard/dashboard.py b/dashboard/dashboard.py index 18fcf14..9ec36a4 100644 --- a/dashboard/dashboard.py +++ b/dashboard/dashboard.py @@ -25,6 +25,9 @@ from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer DRONE_URL = os.environ.get("DRONE_URL", "https://drone.ci.commoninternet.net") CI_REPO = os.environ.get("CI_REPO", "recipe-maintainers/cc-ci") CACHE_TTL = int(os.environ.get("CACHE_TTL", "30")) +# Per-recipe history display cap (phase dash): a long-lived recipe (plausible/custom-html have 30+ +# runs) stays bounded; newest runs are kept (the list is sorted newest-first before the slice). +HISTORY_CAP = int(os.environ.get("HISTORY_CAP", "30")) # Phase 3 (R3/R6/U2.3): per-run artifacts (results.json, summary card PNG, app screenshot, level # badge) written by run_recipe_ci.py under this host dir, bind-mounted read-only into the dashboard @@ -51,9 +54,14 @@ def _read(path): DRONE_TOKEN = _read(os.environ["DRONE_TOKEN_FILE"]) _CACHE = {"ts": 0.0, "recipes": []} -# Raw custom builds (newest-first), cached so the overview AND the per-recipe history page share one -# Drone fetch within CACHE_TTL (U4 history reads the same list latest_per_recipe groups from). +# Raw custom builds (newest-first), cached within CACHE_TTL. Feeds the OVERVIEW (latest-per-recipe). +# The per-recipe HISTORY page no longer reads this slice — it sources the full history from the local +# run artifacts instead (see _local_history / phase dash), because this Drone slice is capped at the +# latest 100 builds and drops a recipe's older runs out of view. _BUILDS = {"ts": 0.0, "builds": []} +# Per-recipe history sourced from the LOCAL run artifacts under CCCI_RUNS_DIR (complete: 300+ runs, +# durable, independent of Drone's 100-build window). Whole-dir scan grouped by recipe, cached CACHE_TTL. +_LOCAL = {"ts": 0.0, "by_recipe": {}} _COLORS = { "success": "#3fb950", @@ -172,13 +180,80 @@ def latest_per_recipe(): return [_build_row(latest[r]) for r in sorted(latest)] +def _numeric_id(n): + """run dir name as int for sort tiebreak; -1 for named ids (m2r-*, ab-*) so the PRIMARY sort key + (finished timestamp) decides their position, never int() on a non-numeric id (would crash).""" + try: + return int(n) + except (TypeError, ValueError): + return -1 + + +def _run_status(res): + """Overall pass/fail for a finished run, derived from its per-stage results map (results.json has + no single top-level status field). Any failed/errored stage → failure; all pass/skip → success; + empty/unknown → unknown. A skip alone is not a failure.""" + vals = list((res.get("results") or {}).values()) + if any(v in ("fail", "error") for v in vals): + return "failure" + if vals and all(v in ("pass", "skip") for v in vals): + return "success" + return "unknown" + + +def _local_history_row(run_id, res): + """Project a local run artifact (results.json) into the same display-row shape _build_row emits, + so render_history is unchanged. `number` is the run dir name (the /runs// path + _results_for + key); link to the Drone build when the id is numeric, else to the local summary card.""" + ref = res.get("ref") or "" + url = f"{DRONE_URL}/{CI_REPO}/{run_id}" if str(run_id).isdigit() else f"/runs/{run_id}/summary.html" + return { + "recipe": res.get("recipe"), + "status": _run_status(res), + "number": run_id, + "ref": ref[:8], + "version": res.get("version") or ref[:12] or "—", + "level": res.get("level"), + "has_screenshot": bool(res.get("screenshot")), + "flags": res.get("flags") or {}, + "finished": res.get("finished") or 0, + "url": url, + } + + +def _local_history(): + """Scan CCCI_RUNS_DIR once (cached CACHE_TTL), group runs by recipe sorted newest-first by the + `finished` timestamp. Run dirs with no/malformed results.json (in-flight / failed-early) are + skipped via _results_for ({} on miss) — never raises, never emits a garbage row. {recipe: [row]}.""" + now = time.time() + if now - _LOCAL["ts"] <= CACHE_TTL and _LOCAL["by_recipe"]: + return _LOCAL["by_recipe"] + by_recipe = {} + try: + names = os.listdir(CCCI_RUNS_DIR) + except OSError as e: + log("local runs scan failed", e) + return _LOCAL["by_recipe"] + for name in names: + res = _results_for(name) # traversal-guarded read; {} on miss / malformed / non-dir + recipe = res.get("recipe") + if not recipe: + continue + by_recipe.setdefault(recipe, []).append(_local_history_row(name, res)) + # Sort newest-first by finished timestamp (ids are MIXED numeric + named, so a numeric/lexical id + # sort would misorder — timestamp is the only correct key); numeric id is a stable tiebreak only. + for rows in by_recipe.values(): + rows.sort(key=lambda r: (r["finished"], _numeric_id(r["number"])), reverse=True) + _LOCAL["by_recipe"] = by_recipe + _LOCAL["ts"] = now + return by_recipe + + def history_for(recipe): - """All runs for one recipe (newest first), augmented from results.json — the per-recipe history - page (R5 'link to history'). [] if none / None on fetch error.""" - builds = _custom_recipe_builds() - if builds is None: - return None - return [_build_row(b) for b in builds if (b.get("params") or {}).get("RECIPE") == recipe] + """All runs for one recipe (newest first, display-capped at HISTORY_CAP), sourced from the LOCAL + run artifacts under CCCI_RUNS_DIR — complete + durable, independent of Drone's 100-build window + (phase dash root cause). [] when the recipe has no local runs.""" + return _local_history().get(recipe, [])[:HISTORY_CAP] def recipes_cached(): diff --git a/machine-docs/BACKLOG-dash.md b/machine-docs/BACKLOG-dash.md new file mode 100644 index 0000000..7d8c90a --- /dev/null +++ b/machine-docs/BACKLOG-dash.md @@ -0,0 +1,14 @@ +# BACKLOG — phase `dash` + +## Build backlog + +- [x] Root-cause confirmed (Drone 100-build window) + host artifact schema inspected. +- [x] M1: rewrite `history_for` to source from `/var/lib/cc-ci-runs` local artifacts, newest-first by + `finished`, capped at HISTORY_CAP, malformed/empty dirs skipped, security/other routes unchanged. +- [x] M1: unit test for local sourcing (count/order/cap/skip) + full-fixture verify vs real data. +- [ ] M1: awaiting Adversary PASS in REVIEW-dash.md. +- [ ] M2: deploy (rebuild dashboard image via deploy-dashboard reconcile / nixos-rebuild; content-hash + tag rolls on dashboard.py change), verify live on `/recipe/bluesky-pds` + ≥2 recipes, overview + + badges still 200, host health after. +- [ ] M2: confirm retention does not trim `/var/lib/cc-ci-runs` (record in DECISIONS if a cap needed). +- [ ] DONE: both gates Adversary-PASS in REVIEW-dash.md → write `## DONE` in STATUS-dash.md. diff --git a/machine-docs/DECISIONS.md b/machine-docs/DECISIONS.md index 48ac320..cafbffd 100644 --- a/machine-docs/DECISIONS.md +++ b/machine-docs/DECISIONS.md @@ -1566,3 +1566,16 @@ so the fallback (decouple version-record from retained volume) is NOT needed. Me at the full 20-enrolled set. WC8 disk-hygiene (`ci-docker-prune`) keeps residue bounded. Conclusion: keep all-enrolled with retained volumes; revisit only if `/` free drops below a single recipe's largest restore (~1–2G working set). No recipe dropped for disk. + +## phase dash — per-recipe history sourced from local run artifacts (2026-06-17) +The dashboard's per-recipe history page (`/recipe/`) sources its run list from the local +`/var/lib/cc-ci-runs/*/results.json` artifacts (complete: 308 finished runs; durable; already +bind-mounted read-only), NOT the Drone `…/builds?per_page=100` slice (root cause: that 100-build +window dropped each recipe's older runs out of view after the regall sweep → most recipes showed 1 +run). Newest-first by the `results.json` `finished` timestamp (run ids are MIXED numeric + named, so +only a timestamp sort is correct — `int(run_id)` would crash on `m2r-*`/`ab-*`); display-capped at +`HISTORY_CAP=30`. Status derived from the per-stage `results` map (no top-level status field). The +OVERVIEW (`/`) and badges keep their Drone latest-per-recipe source unchanged. Deliberately did NOT +merge Drone live "running" status into history (optional per plan; re-adds the network dependency the +local source removes; overview already shows live status). Retention: 308 parseable runs present, no +trim job observed → adequate; revisit only if a cap is ever needed. diff --git a/machine-docs/JOURNAL-dash.md b/machine-docs/JOURNAL-dash.md new file mode 100644 index 0000000..3cb46be --- /dev/null +++ b/machine-docs/JOURNAL-dash.md @@ -0,0 +1,44 @@ +# JOURNAL — phase `dash` (reasoning; Adversary does not read before verdict) + +## 2026-06-17 — M1 design + implementation + +**Root cause (confirmed against plan §1 + host):** `history_for` read `_custom_recipe_builds()`, +which fetches a single Drone page `…/builds?per_page=100`. The recent `regall` sweep `!testme`'d all +21 recipes once, filling the latest-100 window, so each recipe's older runs fell outside it → most +recipes rendered exactly 1 history row. Host has 432 run dirs (308 parseable `results.json`). + +**Why source from local artifacts, not paginate Drone:** the plan's chosen design. Local artifacts +are complete (308 finished runs vs 100-build Drone window), durable (independent of Drone +retention/pagination), already bind-mounted read-only, and already read per-run by `_results_for`. +Pure-local also removes a network dependency + failure mode from the history page. I deliberately did +NOT merge in Drone "currently running" live status (plan lists it as an optional "e.g." value-add): +it re-introduces the Drone dependency and the overview already shows live status; the DoD asks only +that the *historical* list come from local artifacts. Recorded as a decision. + +**Status derivation:** `results.json` (schema 2) has no top-level status field. Derived from the +per-stage `results` map: any `fail`/`error` → failure; all `pass`/`skip` → success; else unknown. +A skip alone is not a failure (e.g. custom-html-bkp-bad: backup=fail → failure; level-5 plausible: +all pass → success). This matches what the run actually did without inventing a Drone call. + +**The sort trap (flagged by Adversary's pre-claim baseline too):** run ids are MIXED numeric +(`753`,`556`) and named (`m2r-bluesky-pds`,`ab-bluesky-pds-oldmain`). `int(run_id)` would crash on +named ids; lexical sort would scatter them and misorder `9…` vs `7…`. The ONLY correct order is by +`finished` timestamp. Sort key = `(finished, _numeric_id)` reverse — finished is primary, numeric id +is a stable tiebreak (named ids get -1, so timestamp always decides their slot). Verified the output +matches the Adversary's independently-derived bluesky-pds order byte-for-byte. + +**Cap:** `HISTORY_CAP=30` (env-overridable). Sorted newest-first BEFORE slicing, so the cap keeps the +30 newest and drops the oldest — verified plausible (33 runs) keeps the newest 30, drops oldest 3. + +**Caching:** `_local_history` scans the whole runs dir once per `CACHE_TTL` (reuses the existing 30s +TTL) and groups by recipe, so a busy page doesn't json-load 300+ files per request. `_results_for` +(already traversal-guarded) is reused for each dir read, so the path-traversal guarantee is unchanged. + +**Retention:** 308 parseable runs present spanning many days — retention is adequate; no trimming of +`/var/lib/cc-ci-runs` observed that would vanish history. Will confirm no cleanlogs/prune job trims it +during M2 and record in DECISIONS if a cap is ever needed (none needed now). + +**Local verification (M1):** 13/13 unit tests pass (incl. new local-sourcing test). Full-fixture run +against all 308 real `results.json` + injected malformed/empty/no-recipe dirs: bluesky-pds=8 in exact +timestamp order, plausible capped 30 (newest kept), 308 total grouped, edge dirs skipped without +raising, security guards (`_RUN_ID_RE`, `_results_for`, `serve_run_file`) all still reject traversal. diff --git a/machine-docs/STATUS-dash.md b/machine-docs/STATUS-dash.md new file mode 100644 index 0000000..cf89c08 --- /dev/null +++ b/machine-docs/STATUS-dash.md @@ -0,0 +1,68 @@ +# STATUS — phase `dash` (per-recipe run history fix) + +SSOT: /srv/cc-ci/cc-ci-plan/plan-phase-dash-recipe-history.md +Gates: M1 (fix implemented + locally verified) · M2 (deployed + verified live) + +## Gate: M1 CLAIMED, awaiting Adversary + +**WHAT** — `history_for(recipe)` in `dashboard/dashboard.py` now sources the FULL per-recipe run +history from the local run artifacts under `/var/lib/cc-ci-runs` (each run dir's `results.json`), +newest-first by the `finished` timestamp, display-capped at `HISTORY_CAP` (default 30). It no longer +reads the Drone `…/builds?per_page=100` slice (the root cause: that window dropped a recipe's older +runs out of view, so most recipes showed 1 run). Overview (`/`), `/badge/.svg`, +`/runs//`, security guards, and stdlib-only constraint are unchanged. + +**WHERE** — +- Commit: see `git log` on origin/main for the `claim(M1)` commit (this push). +- Changed files: `dashboard/dashboard.py` (new `_run_status`, `_numeric_id`, `_local_history_row`, + `_local_history`; rewritten `history_for`; new `HISTORY_CAP`; new `_LOCAL` cache), and + `tests/unit/test_dashboard.py` (new `test_history_sourced_from_local_artifacts`). +- Host artifacts the page reads: `/var/lib/cc-ci-runs//results.json` (bind-mounted read-only into + the dashboard container, unchanged from before). + +**HOW to verify (cold, from a fresh clone)** — +1. Unit suite (stdlib render + new local-sourcing test): + ``` + nix-shell -p 'python3.withPackages(ps:[ps.pytest])' --run \ + 'DRONE_TOKEN_FILE=$(mktemp) python3 -m pytest tests/unit/test_dashboard.py -q' + ``` + EXPECTED: `13 passed`. +2. Verify against the REAL host artifacts. Build a fixture of every `results.json` and run + `history_for` against it (no Drone, no network): + ``` + FIX=/tmp/advfix; rm -rf $FIX; mkdir -p $FIX + ssh cc-ci 'cd /var/lib/cc-ci-runs && tar -cf - */results.json 2>/dev/null' | tar -xf - -C $FIX + printf x > /tmp/t.tok + DRONE_TOKEN_FILE=/tmp/t.tok CCCI_RUNS_DIR=$FIX python3 -c ' + import sys; sys.path.insert(0,"dashboard"); import dashboard as d + r=d.history_for("bluesky-pds") + print("count", len(r), [x["number"] for x in r]) + print("total parseable", sum(len(v) for v in d._local_history().values())) + print("plausible cap", len(d.history_for("plausible")))' + ``` + EXPECTED: + - `bluesky-pds` count **8**, order EXACTLY + `['753','556','435','427','423','ab-bluesky-pds-oldmain','m2rr-bluesky-pds','m2r-bluesky-pds']` + (newest-first by `finished`; note 423 sorts BELOW 427 though id 423<427, and named ids land in + their timestamp positions — the mixed numeric+named id trap). + - total parseable grouped rows **308** (matches host: 432 dirs, 308 with parseable `results.json`). + - `plausible` capped at **30** (of 33), newest kept. + +**EXPECTED — invariants the Adversary's break-tests should confirm hold** +- The 124 run dirs with no/malformed `results.json` are skipped (no 500, no garbage row): `_results_for` + returns `{}` on miss/malformed/non-dir, `_local_history` skips any row with no `recipe`. +- Security preserved (untouched code paths): `/recipe/` still gated by `_RUN_ID_RE` + (`^[A-Za-z0-9][A-Za-z0-9._-]*$` → rejects `../..`, `foo/..`, spaces, `;`); `_results_for` / + `serve_run_file` still realpath-guarded against escaping `/var/lib/cc-ci-runs`. +- stdlib-only: no new imports (still `html,json,os,re,sys,time,urllib,http.server`). +- Overview (`/`) and `/badge/.svg` still sourced from Drone latest-per-recipe (`_custom_recipe_builds` + / `latest_per_recipe` unchanged) — only the *history* page changed source. +- Run-link resolution: numeric id → `{DRONE_URL}/{CI_REPO}/`; named id (`m2r-*`, `ab-*`) → + `/runs//summary.html` (local, since no Drone build number exists). +- Status pill derived from the per-stage `results` map (`results.json` has no top-level status): + any `fail`/`error` → failure; all `pass`/`skip` → success; else unknown. + +## Gate: M2 — NOT STARTED (deploy + live verify; begins after M1 PASS) + +## Blocked +(none) diff --git a/tests/unit/test_dashboard.py b/tests/unit/test_dashboard.py index a177fd2..67cc5d6 100644 --- a/tests/unit/test_dashboard.py +++ b/tests/unit/test_dashboard.py @@ -171,6 +171,61 @@ def test_level_badge_shows_level_coloured(monkeypatch): assert "level 5" not in svg and "level 6" not in svg +def _write_run(base, run_id, recipe, finished, **kw): + d = os.path.join(base, run_id) + os.makedirs(d, exist_ok=True) + doc = {"recipe": recipe, "finished": finished, "run_id": run_id, + "ref": kw.get("ref", "deadbeefcafe"), "version": kw.get("version"), + "level": kw.get("level", 5), "screenshot": kw.get("screenshot", "screenshot.png"), + "results": kw.get("results", {"install": "pass"}), "flags": kw.get("flags", {})} + with open(os.path.join(d, "results.json"), "w") as fh: + json.dump(doc, fh) + + +def test_history_sourced_from_local_artifacts(tmp_path, monkeypatch): + """phase dash: history_for sources the FULL per-recipe run list from CCCI_RUNS_DIR (not the + capped Drone slice), newest-first by `finished` even with mixed numeric+named run ids, capped, + and skips malformed/empty/no-recipe dirs without raising.""" + base = str(tmp_path) + monkeypatch.setattr(dashboard, "CCCI_RUNS_DIR", base) + monkeypatch.setattr(dashboard, "HISTORY_CAP", 3) + dashboard._LOCAL.update(ts=0.0, by_recipe={}) # bypass scan cache + # mixed numeric + named ids; out-of-order on disk; the timestamp MUST decide order, not the id + _write_run(base, "753", "bsky", 1781663348, results={"install": "pass"}) + _write_run(base, "427", "bsky", 1781178768, results={"install": "pass"}) + _write_run(base, "m2r-bsky", "bsky", 1781121610, level=0, results={"install": "pass", "backup": "fail"}) + _write_run(base, "423", "bsky", 1781178063, results={"install": "pass"}) # 423<427 numerically but OLDER + _write_run(base, "9", "other", 1781000000) # different recipe, must not leak in + # graceful-skip cases (the host's in-flight/failed-early dirs) + os.makedirs(os.path.join(base, "EMPTY"), exist_ok=True) # in-flight dir, no results.json + os.makedirs(os.path.join(base, "MALFORMED"), exist_ok=True) + with open(os.path.join(base, "MALFORMED", "results.json"), "w") as fh: + fh.write("{ not json") + os.makedirs(os.path.join(base, "NORECIPE"), exist_ok=True) + with open(os.path.join(base, "NORECIPE", "results.json"), "w") as fh: + json.dump({"finished": 1.0}, fh) + + rows = dashboard.history_for("bsky") + # 4 bsky runs but HISTORY_CAP=3 → 3 newest, in finished-desc order (753,427,423 — NOT by id) + assert [r["number"] for r in rows] == ["753", "427", "423"] + assert [r["finished"] for r in rows] == [1781663348, 1781178768, 1781178063] + # capped: the oldest (m2r-bsky) is dropped, newest kept + assert "m2r-bsky" not in [r["number"] for r in rows] + # status derived from per-stage results map (no top-level status field) + assert rows[0]["status"] == "success" + # numeric id → Drone link; named id → local summary link + assert rows[0]["url"].endswith("/753") + dashboard._LOCAL.update(ts=0.0, by_recipe={}) + monkeypatch.setattr(dashboard, "HISTORY_CAP", 30) + full = dashboard.history_for("bsky") + assert [r["number"] for r in full] == ["753", "427", "423", "m2r-bsky"] + assert full[-1]["url"] == "/runs/m2r-bsky/summary.html" # named id → local summary + # malformed/empty/no-recipe dirs never surface as recipes and never raise + assert "other" not in dashboard._local_history() or True + assert set(dashboard._local_history().keys()) == {"bsky", "other"} + assert dashboard.history_for("nope") == [] + + def test_status_badge_fallback_when_no_level(): # Recipe with no results.json level → status badge, not a fabricated level. svg = dashboard.render_badge("ghost", "failure")