"""Warm-infrastructure harness primitive (Phase 2w / WC1+). Phase 2w keeps a small set of apps "warm" at STABLE domains (distinct from the cold per-run `-<6hex>` scheme — see DECISIONS.md Phase-2w): - **live-warm** — actually deployed and running (keycloak today): a shared SSO provider that dependent runs use instead of co-deploying a fresh provider. The per-run *realm* (not the app) is the isolation unit — created at run start, deleted at run end (see harness.sso WC1 helpers). - **data-warm** (W1+) — undeployed-when-idle canonicals whose data volume is retained. This module owns the stable-domain scheme + the "is the warm provider actually usable right now?" probe + the live-app-hex scan used to reap orphan realms concurrency-safely. It deliberately does NOT deploy the warm provider — that's the declarative Nix reconciler's job (nix/modules/warm-keycloak.nix). The harness only *uses* a warm provider when one is up, and falls back to cold co-deploy otherwise. """ from __future__ import annotations import re import ssl import subprocess import urllib.error import urllib.request # Recipes that, when declared as a dep, are served from a shared live-warm instance at a stable # domain instead of being co-deployed per run. Maps dep-recipe -> stable domain. WARM_DOMAINS = { "keycloak": "warm-keycloak.ci.commoninternet.net", } # Health probe per warm provider: (path, ok-codes). Mirrors the recipe_meta health contract. _WARM_HEALTH = { "keycloak": ("/realms/master", (200,)), } _CTX = ssl.create_default_context() _CTX.check_hostname = False _CTX.verify_mode = ssl.CERT_NONE # A cold per-run stack name looks like "-<6hex>_ci_commoninternet_net_"; extract the hex. _STACK_HEX_RE = re.compile(r"^[a-z0-9]{1,4}-([0-9a-f]{6})_ci_commoninternet_net_") def stable_domain(recipe: str) -> str: """The stable warm domain for a recipe: `warm-.ci.commoninternet.net` — the canonical scheme for BOTH the live-warm keycloak and the data-warm canonicals (WC2), distinct from cold per-run `-<6hex>`. (WARM_DOMAINS['keycloak'] equals stable_domain('keycloak').)""" return f"warm-{recipe}.ci.commoninternet.net" def warm_domain(recipe: str) -> str | None: """The stable warm domain for a dep recipe, or None if this recipe is not served warm.""" return WARM_DOMAINS.get(recipe) def is_warm_up(recipe: str, domain: str | None = None, timeout: int = 10) -> bool: """True iff the warm provider for `recipe` answers its health endpoint right now. Used to decide whether to use the warm path or fall back to cold co-deploy. Conservative: any error → False.""" domain = domain or warm_domain(recipe) if not domain: return False path, ok = _WARM_HEALTH.get(recipe, ("/", (200, 301, 302))) req = urllib.request.Request(f"https://{domain}{path}", method="GET") try: with urllib.request.urlopen(req, timeout=timeout, context=_CTX) as r: return r.status in ok except urllib.error.HTTPError as e: return e.code in ok except Exception: # noqa: BLE001 — down / unreachable / TLS / DNS → not usable return False def live_app_hexes() -> set[str]: """The set of 6hex suffixes of currently-deployed cold per-run app stacks. Used to reap orphan realms safely: a realm whose hex maps to a live stack belongs to an in-flight run and is kept. Reads docker service names directly so it works even when an app's .env was already removed.""" out: set[str] = set() try: res = subprocess.run( ["docker", "service", "ls", "--format", "{{.Name}}"], capture_output=True, text=True, timeout=30, ) except Exception: # noqa: BLE001 return out for name in res.stdout.splitlines(): m = _STACK_HEX_RE.match(name.strip()) if m: out.add(m.group(1)) return out def reap_orphan_realms(recipe: str, domain: str | None = None) -> list[str]: """Reap per-run realms on the warm provider left behind by crashed/killed dependent runs. Safe under concurrency: realms whose hex maps to a currently-live app stack are kept. Returns the realms actually deleted; [] on any error (best-effort run-start cleanup, never fatal).""" domain = domain or warm_domain(recipe) if recipe != "keycloak" or not domain: return [] from . import sso # local import avoids import cycle at module load try: return sso.reap_orphaned_realms(domain, live_app_hexes()) except Exception: # noqa: BLE001 — reaping is hygiene, not correctness-critical return [] def realm_for(parent_recipe: str, parent_domain: str) -> str: """The per-run realm name for a dependent run: "-<6hex>" where the 6hex is the parent's per-run domain label suffix. Unique per (parent, pr, ref) so concurrent dependents never collide on a shared keycloak, and traceable back to the app stack for reaping/debugging.""" label = parent_domain.split(".", 1)[0] # "lasu-0a6fb2" m = re.search(r"-([0-9a-f]{6})$", label) suffix = m.group(1) if m else label return f"{parent_recipe}-{suffix}"