"""Data-warm canonical registry + lifecycle (Phase 2w / WC2, with WC3 snapshots). A **canonical** is a per-recipe known-good deployment kept at the STABLE domain `warm-.ci.commoninternet.net`, **data-warm**: deployed while in use, **undeployed-when-idle with its data volume retained**, so a later `--quick` run (W2) reattaches the volume and boots warm (skipping fresh DB-init/first-boot). A small declarative registry tracks which recipes are canonical and **at which known-good commit/version**. Distinct from W0's *live-warm* keycloak (always running, shared SSO dep). Both use the `warm-` scheme + warmsnap snapshots; the difference is the idle lifecycle (live = up, data = undeployed-keep-volume). - **Enrollment (declarative):** `tests//recipe_meta.py` sets `WARM_CANONICAL = True` (consistent with DEPS/EXTRA_ENV — enrolling stays a tests// change, D5). - **Registry state (per recipe), under `/var/lib/ci-warm//canonical.json`:** `{recipe, domain, version, commit, status, ts}`. The retained data volume + the warmsnap `snapshot/` live alongside. All of this is **cache, excluded from the D8 closure** (WC8) — re-seeded by cold runs (WC5), not restored on a VM rebuild. W1 builds the registry + the data-warm lifecycle and proves it (seed → undeploy-keep-volume → redeploy-reattach → data survives). The automatic **promote-on-green-cold** seeding/advancement (WC5) + nightly refresh (WC6) are W3; here `seed_canonical` is the primitive they will call. """ from __future__ import annotations import json import os import subprocess import time from . import abra, warm, warmsnap def is_enrolled(recipe: str) -> bool: """True if `tests//recipe_meta.py` sets `WARM_CANONICAL = True`. Missing meta → False.""" path = os.path.join(os.path.dirname(__file__), "..", "..", "tests", recipe, "recipe_meta.py") if not os.path.exists(path): return False ns: dict = {} with open(path) as fh: exec(compile(fh.read(), path, "exec"), ns) # noqa: S102 (trusted, in-repo) return bool(ns.get("WARM_CANONICAL")) def canonical_domain(recipe: str) -> str: """Stable data-warm domain for the recipe's canonical.""" return warm.stable_domain(recipe) def enrolled_recipes() -> list[str]: """All recipes enrolled as data-warm canonicals (recipe_meta.WARM_CANONICAL=True), sorted. Used by the WC6 nightly sweep to know which canonicals to refresh via a green cold run on latest.""" tests_dir = os.path.join(os.path.dirname(__file__), "..", "..", "tests") out = [] try: for name in sorted(os.listdir(tests_dir)): if os.path.isfile(os.path.join(tests_dir, name, "recipe_meta.py")) and is_enrolled(name): out.append(name) except OSError: pass return out def registry_path(recipe: str) -> str: return os.path.join(warmsnap.app_dir(recipe), "canonical.json") def read_registry(recipe: str) -> dict | None: try: with open(registry_path(recipe)) as f: return json.load(f) except (OSError, ValueError): return None def write_registry(recipe: str, *, version: str, commit: str | None, status: str) -> dict: """Atomically write the canonical registry record for a recipe.""" os.makedirs(warmsnap.app_dir(recipe), exist_ok=True) rec = { "recipe": recipe, "domain": canonical_domain(recipe), "version": version, "commit": commit, "status": status, # "warm" (deployed/in-use) | "idle" (undeployed, volume retained) "ts": time.strftime("%Y%m%dT%H%M%SZ", time.gmtime()), } tmp = registry_path(recipe) + ".tmp" with open(tmp, "w") as f: json.dump(rec, f, indent=2) os.replace(tmp, registry_path(recipe)) return rec def has_canonical(recipe: str) -> bool: """True iff a registry record exists AND the data volume(s) are retained on the host (so a redeploy can reattach them). Mirrors WC2's 'data-warm: volume retained'.""" rec = read_registry(recipe) if not rec: return False return bool(warmsnap.stack_volumes(canonical_domain(recipe))) def _set_status(recipe: str, status: str) -> None: rec = read_registry(recipe) if rec: write_registry(recipe, version=rec.get("version"), commit=rec.get("commit"), status=status) def deploy_canonical(recipe: str, timeout: int = 900) -> None: """Bring a data-warm canonical UP at its known-good version, reattaching the retained data volume (warm boot). Requires an existing registry record (seeded by a cold run / W1 proof).""" rec = read_registry(recipe) if not rec: raise RuntimeError(f"no canonical registry for {recipe} — seed one first (cold run)") domain, version = rec["domain"], rec["version"] # The .env + retained volume already exist; redeploy the recorded known-good version. Reset the # recorded TYPE=: FIRST so abra can resolve the "current deployment" even if a # prior --quick upgrade left TYPE pointing at a since-removed/broken PR commit (otherwise abra # FATALs "unable to resolve "). Then checkout the tag + idempotent (-f) redeploy. abra.env_set(domain, "TYPE", f"{recipe}:{version}") abra.recipe_checkout(recipe, version) r = subprocess.run( ["abra", "app", "deploy", domain, version, "-o", "-n", "-f"], capture_output=True, text=True, timeout=timeout, ) if r.returncode != 0: raise RuntimeError(f"deploy canonical {domain} {version} failed: " f"{(r.stderr + ' ' + r.stdout).strip()[:300]}") _set_status(recipe, "warm") def undeploy_keep_volume(recipe: str) -> None: """Make the canonical idle: undeploy (free RAM) but RETAIN the data volume (data-warm). Does NOT remove volumes/secrets/.env — only `abra app undeploy`.""" domain = canonical_domain(recipe) abra.undeploy(domain) _set_status(recipe, "idle") def prune_stale() -> list[str]: """WC8 disk hygiene: remove warm data for DE-ENROLLED canonicals — a `/var/lib/ci-warm//` that carries a `canonical.json` but whose recipe is no longer enrolled (WARM_CANONICAL dropped). Drops the dir (snapshot + registry) AND the retained `warm-` data volumes. Leaves the live-warm reconciler dirs (keycloak/traefik — they have a `last_good`, no `canonical.json`), `alerts/`, and currently-enrolled canonicals untouched. Returns the recipes pruned.""" import shutil import subprocess root = warmsnap.warm_root() keep = set(enrolled_recipes()) pruned: list[str] = [] try: entries = sorted(os.listdir(root)) except OSError: return pruned for name in entries: d = os.path.join(root, name) if not os.path.isdir(d) or name in keep: continue if not os.path.isfile(os.path.join(d, "canonical.json")): continue # not a data-warm canonical (e.g. keycloak/traefik reconciler dir, alerts/) # drop the retained warm- volumes, then the snapshot/registry dir for vol in warmsnap.stack_volumes(canonical_domain(name)): subprocess.run(["docker", "volume", "rm", vol], capture_output=True, text=True) shutil.rmtree(d, ignore_errors=True) pruned.append(name) return pruned def seed_canonical(recipe: str, version: str, commit: str | None = None) -> dict: """Record (already deployed at `version`) as the recipe's canonical: write the registry, then (app must be UNDEPLOYED) take the known-good snapshot. Caller deploys + verifies healthy first, then undeploys before calling this (WC3: snapshot while undeployed). The retained volume IS the canonical. Returns the registry record.""" rec = write_registry(recipe, version=version, commit=commit, status="idle") warmsnap.snapshot(recipe, canonical_domain(recipe), commit=commit, version=version) return rec