- harness/generic.py: recipe-agnostic assert_serving (converged + real HTTP, 404-excluded + not Traefik 404 body + CA-verified trusted wildcard cert), op helpers, backup_capable detect - harness/discovery.py: per-op overlay resolution (repo-local > cc-ci > generic), custom + hook - tests/_generic/: assertion-only tiers (install/upgrade/backup/restore) on the shared deployment - run_recipe_ci.py: deploy-ONCE orchestrator, per-op summary, deploy-count guard (DG4.1) - conftest live_app fixture; lifecycle deploy-count + install-steps hook + pin DOMAIN to run domain DG1 cold-verified green on hedgedoc (pure generic, deploy-count=1, clean teardown). G0 CLAIMED. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
146 lines
6.0 KiB
Python
146 lines
6.0 KiB
Python
"""Generic, recipe-agnostic lifecycle assertions + op helpers (Phase 1d, plan §2.1).
|
|
|
|
These are THE default for each lifecycle op: when a recipe ships no `test_<op>.py` overlay, the
|
|
generic tier (tests/_generic/test_<op>.py) runs these against the single shared deployment the
|
|
orchestrator brought up. The lifecycle OPERATIONS (upgrade/backup/restore) live here too — owned by
|
|
the shared harness, not copy-pasted per recipe (DG7 DRY) — so overlays are assertions-only and may
|
|
reuse these by composition (`from harness import generic; generic.assert_serving(...)`).
|
|
|
|
Design + precedence: machine-docs/DECISIONS.md (Phase 1d).
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import glob
|
|
import os
|
|
import re
|
|
import socket
|
|
import ssl
|
|
|
|
from . import abra, lifecycle
|
|
|
|
# A recipe is backup-capable iff a compose file carries a truthy backupbot.backup label.
|
|
_BACKUPBOT_RE = re.compile(r"backupbot\.backup\b[^\n]*\btrue\b", re.IGNORECASE)
|
|
|
|
|
|
def _recipe_dir(recipe: str) -> str:
|
|
return os.path.expanduser(f"~/.abra/recipes/{recipe}")
|
|
|
|
|
|
def backup_capable(recipe: str, meta: dict | None = None) -> bool:
|
|
"""Whether the harness should run the backup/restore tiers (else they are a clean N/A skip, DG3).
|
|
|
|
`recipe_meta.BACKUP_CAPABLE` (bool) overrides; otherwise auto-detect by scanning the recipe's
|
|
compose*.yml for a truthy `backupbot.backup` label (the Co-op Cloud backup convention)."""
|
|
if meta and "BACKUP_CAPABLE" in meta:
|
|
return bool(meta["BACKUP_CAPABLE"])
|
|
for path in glob.glob(os.path.join(_recipe_dir(recipe), "compose*.yml")):
|
|
try:
|
|
with open(path) as fh:
|
|
if _BACKUPBOT_RE.search(fh.read()):
|
|
return True
|
|
except OSError:
|
|
continue
|
|
return False
|
|
|
|
|
|
def served_cert(domain: str, port: int = 443) -> tuple[bool, str]:
|
|
"""CA-verified TLS handshake to `domain` (via the gateway passthrough to cc-ci's Traefik).
|
|
Returns (verified, detail). The pre-issued wildcard is a publicly-trusted Let's Encrypt cert, so
|
|
a real serve VERIFIES against the system CA bundle and matches the hostname; Traefik's self-signed
|
|
DEFAULT cert (served only when no router/cert matches the SNI) FAILS verification — so this is a
|
|
genuine 'not the default cert' assertion with no openssl dependency. detail carries CN+SAN on
|
|
success, or the failure reason."""
|
|
ctx = ssl.create_default_context() # verifies chain against system CAs + checks hostname
|
|
try:
|
|
with (
|
|
socket.create_connection((domain, port), timeout=20) as sock,
|
|
ctx.wrap_socket(sock, server_hostname=domain) as ssock,
|
|
):
|
|
cert = ssock.getpeercert()
|
|
except ssl.SSLCertVerificationError as e:
|
|
return (False, f"cert did not verify (Traefik default/self-signed?): {e}")
|
|
except (OSError, ssl.SSLError) as e:
|
|
return (False, f"TLS handshake error: {e}")
|
|
cn = next(
|
|
(v for rdn in cert.get("subject", ()) for k, v in rdn if k == "commonName"),
|
|
"",
|
|
)
|
|
sans = [v for typ, v in cert.get("subjectAltName", ()) if typ == "DNS"]
|
|
return (True, f"CN={cn} SAN={sans}")
|
|
|
|
|
|
def assert_serving(domain: str, meta: dict) -> None:
|
|
"""The single generic "is the app really serving?" assertion (DG1). Proves, end-to-end:
|
|
1. every service in the stack converged (the app's own containers, not just Traefik);
|
|
2. a real HTTP(S) response over the run domain with a status in HEALTH_OK — which EXCLUDES
|
|
404, so a Traefik unmatched-router fallback fails here;
|
|
3. the body is not Traefik's default 404 page;
|
|
4. the served TLS cert is the wildcard, not Traefik's default cert.
|
|
No bare sleeps, no health-only shortcut."""
|
|
assert lifecycle.services_converged(domain), f"{domain}: not all services converged"
|
|
|
|
path = meta["HEALTH_PATH"]
|
|
ok = tuple(meta["HEALTH_OK"])
|
|
status = lifecycle.http_get(domain, path)
|
|
assert status in ok, (
|
|
f"{domain}{path}: HTTP {status} not in {ok} — app not serving "
|
|
"(a Traefik 404 fallback or an unhealthy backend)"
|
|
)
|
|
|
|
if status == 200:
|
|
body = lifecycle.http_body(domain, path)
|
|
assert (
|
|
"404 page not found" not in body
|
|
), f"{domain}{path}: served Traefik's default 404 page, not the app"
|
|
|
|
verified, detail = served_cert(domain)
|
|
assert verified, f"{domain}: TLS cert is not the trusted wildcard — {detail}"
|
|
assert "commoninternet.net" in detail.lower(), f"{domain}: served cert unexpected — {detail}"
|
|
|
|
|
|
def wait_serving(domain: str, meta: dict) -> None:
|
|
"""Wait for converged + healthy (per recipe_meta timeouts), then run the full serving assertion."""
|
|
lifecycle.wait_healthy(
|
|
domain,
|
|
ok_codes=tuple(meta["HEALTH_OK"]),
|
|
path=meta["HEALTH_PATH"],
|
|
deploy_timeout=meta["DEPLOY_TIMEOUT"],
|
|
http_timeout=meta["HTTP_TIMEOUT"],
|
|
)
|
|
assert_serving(domain, meta)
|
|
|
|
|
|
def do_upgrade(domain: str, target: str | None, meta: dict) -> None:
|
|
"""UPGRADE op (in place on the shared deployment): abra app upgrade -> target, then wait serving."""
|
|
lifecycle.upgrade_app(domain, version=target)
|
|
wait_serving(domain, meta)
|
|
|
|
|
|
def snapshots(domain: str) -> list[str]:
|
|
"""Snapshot ids backup-bot-two holds for this app (the backup 'artifact', DG3)."""
|
|
proc = abra._run(["app", "backup", "snapshots", domain, "-n", "-o"], check=False)
|
|
ids = []
|
|
for ln in proc.stdout.splitlines():
|
|
# restic snapshot rows start with an 8-hex short id
|
|
m = re.match(r"^([0-9a-f]{8})\b", ln.strip())
|
|
if m:
|
|
ids.append(m.group(1))
|
|
return ids
|
|
|
|
|
|
def do_backup(domain: str) -> list[str]:
|
|
"""BACKUP op: create a snapshot, then assert an artifact now exists (returns snapshot ids)."""
|
|
lifecycle.backup_app(domain)
|
|
snaps = snapshots(domain)
|
|
assert (
|
|
snaps
|
|
), f"{domain}: backup produced no snapshot artifact (abra app backup snapshots empty)"
|
|
return snaps
|
|
|
|
|
|
def do_restore(domain: str, meta: dict) -> None:
|
|
"""RESTORE op: restore the latest snapshot, then assert the app is healthy + serving again."""
|
|
lifecycle.restore_app(domain)
|
|
wait_serving(domain, meta)
|