Files
cc-ci/runner/harness/canonical.py
autonomic-bot 9a7772563a style: repo-wide lint pass — make the lint gate green again
Push builds have been RED on the lint step since ~build 209 from accumulated
formatting drift. This is the mechanical cleanup: ruff format + ruff --fix
(UP038 isinstance unions, SIM105 contextlib.suppress, UP031 f-strings, SIM115
tempfile context manager), shfmt -i 2 -ci, nixpkgs-fmt/statix/deadnix (merged
attrsets, dropped unused lib args), yamllint, and shell quoting fixes in
tests/lasuite-docs/setup_custom_tests.sh. No behaviour changes intended;
lint: PASS, unit tests: 138 passed.
2026-06-09 21:56:15 +00:00

185 lines
7.9 KiB
Python

"""Data-warm canonical registry + lifecycle (Phase 2w / WC2, with WC3 snapshots).
A **canonical** is a per-recipe known-good deployment kept at the STABLE domain
`warm-<recipe>.ci.commoninternet.net`, **data-warm**: deployed while in use, **undeployed-when-idle
with its data volume retained**, so a later `--quick` run (W2) reattaches the volume and boots warm
(skipping fresh DB-init/first-boot). A small declarative registry tracks which recipes are canonical
and **at which known-good commit/version**.
Distinct from W0's *live-warm* keycloak (always running, shared SSO dep). Both use the
`warm-<recipe>` scheme + warmsnap snapshots; the difference is the idle lifecycle (live = up,
data = undeployed-keep-volume).
- **Enrollment (declarative):** `tests/<recipe>/recipe_meta.py` sets `WARM_CANONICAL = True`
(consistent with DEPS/EXTRA_ENV — enrolling stays a tests/<recipe>/ change, D5).
- **Registry state (per recipe), under `/var/lib/ci-warm/<recipe>/canonical.json`:**
`{recipe, domain, version, commit, status, ts}`. The retained data volume + the warmsnap
`snapshot/` live alongside. All of this is **cache, excluded from the D8 closure** (WC8) —
re-seeded by cold runs (WC5), not restored on a VM rebuild.
W1 builds the registry + the data-warm lifecycle and proves it (seed → undeploy-keep-volume →
redeploy-reattach → data survives). The automatic **promote-on-green-cold** seeding/advancement (WC5)
+ nightly refresh (WC6) are W3; here `seed_canonical` is the primitive they will call.
"""
from __future__ import annotations
import json
import os
import subprocess
import time
from . import abra, warm, warmsnap
def is_enrolled(recipe: str) -> bool:
"""True if `tests/<recipe>/recipe_meta.py` sets `WARM_CANONICAL = True`. Missing meta → False."""
path = os.path.join(os.path.dirname(__file__), "..", "..", "tests", recipe, "recipe_meta.py")
if not os.path.exists(path):
return False
ns: dict = {}
with open(path) as fh:
exec(compile(fh.read(), path, "exec"), ns) # noqa: S102 (trusted, in-repo)
return bool(ns.get("WARM_CANONICAL"))
def canonical_domain(recipe: str) -> str:
"""Stable data-warm domain for the recipe's canonical."""
return warm.stable_domain(recipe)
def enrolled_recipes() -> list[str]:
"""All recipes enrolled as data-warm canonicals (recipe_meta.WARM_CANONICAL=True), sorted. Used
by the WC6 nightly sweep to know which canonicals to refresh via a green cold run on latest."""
tests_dir = os.path.join(os.path.dirname(__file__), "..", "..", "tests")
out = []
try:
for name in sorted(os.listdir(tests_dir)):
if os.path.isfile(os.path.join(tests_dir, name, "recipe_meta.py")) and is_enrolled(
name
):
out.append(name)
except OSError:
pass
return out
def registry_path(recipe: str) -> str:
return os.path.join(warmsnap.app_dir(recipe), "canonical.json")
def read_registry(recipe: str) -> dict | None:
try:
with open(registry_path(recipe)) as f:
return json.load(f)
except (OSError, ValueError):
return None
def write_registry(recipe: str, *, version: str, commit: str | None, status: str) -> dict:
"""Atomically write the canonical registry record for a recipe."""
os.makedirs(warmsnap.app_dir(recipe), exist_ok=True)
rec = {
"recipe": recipe,
"domain": canonical_domain(recipe),
"version": version,
"commit": commit,
"status": status, # "warm" (deployed/in-use) | "idle" (undeployed, volume retained)
"ts": time.strftime("%Y%m%dT%H%M%SZ", time.gmtime()),
}
tmp = registry_path(recipe) + ".tmp"
with open(tmp, "w") as f:
json.dump(rec, f, indent=2)
os.replace(tmp, registry_path(recipe))
return rec
def has_canonical(recipe: str) -> bool:
"""True iff a registry record exists AND the data volume(s) are retained on the host (so a
redeploy can reattach them). Mirrors WC2's 'data-warm: volume retained'."""
rec = read_registry(recipe)
if not rec:
return False
return bool(warmsnap.stack_volumes(canonical_domain(recipe)))
def _set_status(recipe: str, status: str) -> None:
rec = read_registry(recipe)
if rec:
write_registry(recipe, version=rec.get("version"), commit=rec.get("commit"), status=status)
def deploy_canonical(recipe: str, timeout: int = 900) -> None:
"""Bring a data-warm canonical UP at its known-good version, reattaching the retained data
volume (warm boot). Requires an existing registry record (seeded by a cold run / W1 proof)."""
rec = read_registry(recipe)
if not rec:
raise RuntimeError(f"no canonical registry for {recipe} — seed one first (cold run)")
domain, version = rec["domain"], rec["version"]
# The .env + retained volume already exist; redeploy the recorded known-good version. Reset the
# recorded TYPE=<recipe>:<version> FIRST so abra can resolve the "current deployment" even if a
# prior --quick upgrade left TYPE pointing at a since-removed/broken PR commit (otherwise abra
# FATALs "unable to resolve <commit>"). Then checkout the tag + idempotent (-f) redeploy.
abra.env_set(domain, "TYPE", f"{recipe}:{version}")
abra.recipe_checkout(recipe, version)
r = subprocess.run(
["abra", "app", "deploy", domain, version, "-o", "-n", "-f"],
capture_output=True,
text=True,
timeout=timeout,
)
if r.returncode != 0:
raise RuntimeError(
f"deploy canonical {domain} {version} failed: "
f"{(r.stderr + ' ' + r.stdout).strip()[:300]}"
)
_set_status(recipe, "warm")
def undeploy_keep_volume(recipe: str) -> None:
"""Make the canonical idle: undeploy (free RAM) but RETAIN the data volume (data-warm). Does NOT
remove volumes/secrets/.env — only `abra app undeploy`."""
domain = canonical_domain(recipe)
abra.undeploy(domain)
_set_status(recipe, "idle")
def prune_stale() -> list[str]:
"""WC8 disk hygiene: remove warm data for DE-ENROLLED canonicals — a `/var/lib/ci-warm/<recipe>/`
that carries a `canonical.json` but whose recipe is no longer enrolled (WARM_CANONICAL dropped).
Drops the dir (snapshot + registry) AND the retained `warm-<recipe>` data volumes. Leaves the
live-warm reconciler dirs (keycloak/traefik — they have a `last_good`, no `canonical.json`),
`alerts/`, and currently-enrolled canonicals untouched. Returns the recipes pruned."""
import shutil
import subprocess
root = warmsnap.warm_root()
keep = set(enrolled_recipes())
pruned: list[str] = []
try:
entries = sorted(os.listdir(root))
except OSError:
return pruned
for name in entries:
d = os.path.join(root, name)
if not os.path.isdir(d) or name in keep:
continue
if not os.path.isfile(os.path.join(d, "canonical.json")):
continue # not a data-warm canonical (e.g. keycloak/traefik reconciler dir, alerts/)
# drop the retained warm-<recipe> volumes, then the snapshot/registry dir
for vol in warmsnap.stack_volumes(canonical_domain(name)):
subprocess.run(["docker", "volume", "rm", vol], capture_output=True, text=True)
shutil.rmtree(d, ignore_errors=True)
pruned.append(name)
return pruned
def seed_canonical(recipe: str, version: str, commit: str | None = None) -> dict:
"""Record <warm-domain> (already deployed at `version`) as the recipe's canonical: write the
registry, then (app must be UNDEPLOYED) take the known-good snapshot. Caller deploys + verifies
healthy first, then undeploys before calling this (WC3: snapshot while undeployed). The retained
volume IS the canonical. Returns the registry record."""
rec = write_registry(recipe, version=version, commit=commit, status="idle")
warmsnap.snapshot(recipe, canonical_domain(recipe), commit=commit, version=version)
return rec