"""Generic, recipe-agnostic lifecycle assertions + op helpers (Phase 1d, plan §2.1). These are THE default for each lifecycle op: when a recipe ships no `test_.py` overlay, the generic tier (tests/_generic/test_.py) runs these against the single shared deployment the orchestrator brought up. The lifecycle OPERATIONS (upgrade/backup/restore) live here too — owned by the shared harness, not copy-pasted per recipe (DG7 DRY) — so overlays are assertions-only and may reuse these by composition (`from harness import generic; generic.assert_serving(...)`). Design + precedence: machine-docs/DECISIONS.md (Phase 1d). """ from __future__ import annotations import glob import os import re import socket import ssl from . import abra, lifecycle # A recipe is backup-capable iff a compose file carries a truthy backupbot.backup label. _BACKUPBOT_RE = re.compile(r"backupbot\.backup\b[^\n]*\btrue\b", re.IGNORECASE) def _recipe_dir(recipe: str) -> str: return os.path.expanduser(f"~/.abra/recipes/{recipe}") def backup_capable(recipe: str, meta: dict | None = None) -> bool: """Whether the harness should run the backup/restore tiers (else they are a clean N/A skip, DG3). `recipe_meta.BACKUP_CAPABLE` (bool) overrides; otherwise auto-detect by scanning the recipe's compose*.yml for a truthy `backupbot.backup` label (the Co-op Cloud backup convention).""" if meta and "BACKUP_CAPABLE" in meta: return bool(meta["BACKUP_CAPABLE"]) for path in glob.glob(os.path.join(_recipe_dir(recipe), "compose*.yml")): try: with open(path) as fh: if _BACKUPBOT_RE.search(fh.read()): return True except OSError: continue return False def served_cert(domain: str, port: int = 443) -> tuple[bool, str]: """CA-verified TLS handshake to `domain` (via the gateway passthrough to cc-ci's Traefik). Returns (verified, detail). The pre-issued wildcard is a publicly-trusted Let's Encrypt cert, so a real serve VERIFIES against the system CA bundle and matches the hostname; Traefik's self-signed DEFAULT cert (served only when no router/cert matches the SNI) FAILS verification — so this is a genuine 'not the default cert' assertion with no openssl dependency. detail carries CN+SAN on success, or the failure reason.""" ctx = ssl.create_default_context() # verifies chain against system CAs + checks hostname try: with ( socket.create_connection((domain, port), timeout=20) as sock, ctx.wrap_socket(sock, server_hostname=domain) as ssock, ): cert = ssock.getpeercert() except ssl.SSLCertVerificationError as e: return (False, f"cert did not verify (Traefik default/self-signed?): {e}") except (OSError, ssl.SSLError) as e: return (False, f"TLS handshake error: {e}") cn = next( (v for rdn in cert.get("subject", ()) for k, v in rdn if k == "commonName"), "", ) sans = [v for typ, v in cert.get("subjectAltName", ()) if typ == "DNS"] return (True, f"CN={cn} SAN={sans}") def assert_serving(domain: str, meta: dict) -> None: """The single generic "is the app really serving?" assertion (DG1). Proves, end-to-end: 1. every service in the stack converged (the app's own containers, not just Traefik); 2. a real HTTP(S) response over the run domain with a status in HEALTH_OK — which EXCLUDES 404, so a Traefik unmatched-router fallback fails here; 3. the body is not Traefik's default 404 page; 4. the served TLS cert is the wildcard, not Traefik's default cert. No bare sleeps, no health-only shortcut.""" assert lifecycle.services_converged(domain), f"{domain}: not all services converged" path = meta["HEALTH_PATH"] ok = tuple(meta["HEALTH_OK"]) status = lifecycle.http_get(domain, path) assert status in ok, ( f"{domain}{path}: HTTP {status} not in {ok} — app not serving " "(a Traefik 404 fallback or an unhealthy backend)" ) if status == 200: body = lifecycle.http_body(domain, path) assert ( "404 page not found" not in body ), f"{domain}{path}: served Traefik's default 404 page, not the app" verified, detail = served_cert(domain) assert verified, f"{domain}: TLS cert is not the trusted wildcard — {detail}" assert "commoninternet.net" in detail.lower(), f"{domain}: served cert unexpected — {detail}" def wait_serving(domain: str, meta: dict) -> None: """Wait for converged + healthy (per recipe_meta timeouts), then run the full serving assertion.""" lifecycle.wait_healthy( domain, ok_codes=tuple(meta["HEALTH_OK"]), path=meta["HEALTH_PATH"], deploy_timeout=meta["DEPLOY_TIMEOUT"], http_timeout=meta["HTTP_TIMEOUT"], ) assert_serving(domain, meta) def do_upgrade(domain: str, target: str | None, meta: dict) -> None: """UPGRADE op (in place on the shared deployment): abra app upgrade -> target, then wait serving.""" lifecycle.upgrade_app(domain, version=target) wait_serving(domain, meta) def snapshots(domain: str) -> list[str]: """Snapshot ids backup-bot-two holds for this app (the backup 'artifact', DG3).""" proc = abra._run(["app", "backup", "snapshots", domain, "-n", "-o"], check=False) ids = [] for ln in proc.stdout.splitlines(): # restic snapshot rows start with an 8-hex short id m = re.match(r"^([0-9a-f]{8})\b", ln.strip()) if m: ids.append(m.group(1)) return ids def do_backup(domain: str) -> list[str]: """BACKUP op: create a snapshot, then assert an artifact now exists (returns snapshot ids).""" lifecycle.backup_app(domain) snaps = snapshots(domain) assert ( snaps ), f"{domain}: backup produced no snapshot artifact (abra app backup snapshots empty)" return snaps def do_restore(domain: str, meta: dict) -> None: """RESTORE op: restore the latest snapshot, then assert the app is healthy + serving again.""" lifecycle.restore_app(domain) wait_serving(domain, meta)