feat(1d): G0 — generic install + deploy-once orchestrator (DG1 green on hedgedoc)
- harness/generic.py: recipe-agnostic assert_serving (converged + real HTTP, 404-excluded + not Traefik 404 body + CA-verified trusted wildcard cert), op helpers, backup_capable detect - harness/discovery.py: per-op overlay resolution (repo-local > cc-ci > generic), custom + hook - tests/_generic/: assertion-only tiers (install/upgrade/backup/restore) on the shared deployment - run_recipe_ci.py: deploy-ONCE orchestrator, per-op summary, deploy-count guard (DG4.1) - conftest live_app fixture; lifecycle deploy-count + install-steps hook + pin DOMAIN to run domain DG1 cold-verified green on hedgedoc (pure generic, deploy-count=1, clean teardown). G0 CLAIMED. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
145
runner/harness/generic.py
Normal file
145
runner/harness/generic.py
Normal file
@ -0,0 +1,145 @@
|
||||
"""Generic, recipe-agnostic lifecycle assertions + op helpers (Phase 1d, plan §2.1).
|
||||
|
||||
These are THE default for each lifecycle op: when a recipe ships no `test_<op>.py` overlay, the
|
||||
generic tier (tests/_generic/test_<op>.py) runs these against the single shared deployment the
|
||||
orchestrator brought up. The lifecycle OPERATIONS (upgrade/backup/restore) live here too — owned by
|
||||
the shared harness, not copy-pasted per recipe (DG7 DRY) — so overlays are assertions-only and may
|
||||
reuse these by composition (`from harness import generic; generic.assert_serving(...)`).
|
||||
|
||||
Design + precedence: machine-docs/DECISIONS.md (Phase 1d).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import glob
|
||||
import os
|
||||
import re
|
||||
import socket
|
||||
import ssl
|
||||
|
||||
from . import abra, lifecycle
|
||||
|
||||
# A recipe is backup-capable iff a compose file carries a truthy backupbot.backup label.
|
||||
_BACKUPBOT_RE = re.compile(r"backupbot\.backup\b[^\n]*\btrue\b", re.IGNORECASE)
|
||||
|
||||
|
||||
def _recipe_dir(recipe: str) -> str:
|
||||
return os.path.expanduser(f"~/.abra/recipes/{recipe}")
|
||||
|
||||
|
||||
def backup_capable(recipe: str, meta: dict | None = None) -> bool:
|
||||
"""Whether the harness should run the backup/restore tiers (else they are a clean N/A skip, DG3).
|
||||
|
||||
`recipe_meta.BACKUP_CAPABLE` (bool) overrides; otherwise auto-detect by scanning the recipe's
|
||||
compose*.yml for a truthy `backupbot.backup` label (the Co-op Cloud backup convention)."""
|
||||
if meta and "BACKUP_CAPABLE" in meta:
|
||||
return bool(meta["BACKUP_CAPABLE"])
|
||||
for path in glob.glob(os.path.join(_recipe_dir(recipe), "compose*.yml")):
|
||||
try:
|
||||
with open(path) as fh:
|
||||
if _BACKUPBOT_RE.search(fh.read()):
|
||||
return True
|
||||
except OSError:
|
||||
continue
|
||||
return False
|
||||
|
||||
|
||||
def served_cert(domain: str, port: int = 443) -> tuple[bool, str]:
|
||||
"""CA-verified TLS handshake to `domain` (via the gateway passthrough to cc-ci's Traefik).
|
||||
Returns (verified, detail). The pre-issued wildcard is a publicly-trusted Let's Encrypt cert, so
|
||||
a real serve VERIFIES against the system CA bundle and matches the hostname; Traefik's self-signed
|
||||
DEFAULT cert (served only when no router/cert matches the SNI) FAILS verification — so this is a
|
||||
genuine 'not the default cert' assertion with no openssl dependency. detail carries CN+SAN on
|
||||
success, or the failure reason."""
|
||||
ctx = ssl.create_default_context() # verifies chain against system CAs + checks hostname
|
||||
try:
|
||||
with (
|
||||
socket.create_connection((domain, port), timeout=20) as sock,
|
||||
ctx.wrap_socket(sock, server_hostname=domain) as ssock,
|
||||
):
|
||||
cert = ssock.getpeercert()
|
||||
except ssl.SSLCertVerificationError as e:
|
||||
return (False, f"cert did not verify (Traefik default/self-signed?): {e}")
|
||||
except (OSError, ssl.SSLError) as e:
|
||||
return (False, f"TLS handshake error: {e}")
|
||||
cn = next(
|
||||
(v for rdn in cert.get("subject", ()) for k, v in rdn if k == "commonName"),
|
||||
"",
|
||||
)
|
||||
sans = [v for typ, v in cert.get("subjectAltName", ()) if typ == "DNS"]
|
||||
return (True, f"CN={cn} SAN={sans}")
|
||||
|
||||
|
||||
def assert_serving(domain: str, meta: dict) -> None:
|
||||
"""The single generic "is the app really serving?" assertion (DG1). Proves, end-to-end:
|
||||
1. every service in the stack converged (the app's own containers, not just Traefik);
|
||||
2. a real HTTP(S) response over the run domain with a status in HEALTH_OK — which EXCLUDES
|
||||
404, so a Traefik unmatched-router fallback fails here;
|
||||
3. the body is not Traefik's default 404 page;
|
||||
4. the served TLS cert is the wildcard, not Traefik's default cert.
|
||||
No bare sleeps, no health-only shortcut."""
|
||||
assert lifecycle.services_converged(domain), f"{domain}: not all services converged"
|
||||
|
||||
path = meta["HEALTH_PATH"]
|
||||
ok = tuple(meta["HEALTH_OK"])
|
||||
status = lifecycle.http_get(domain, path)
|
||||
assert status in ok, (
|
||||
f"{domain}{path}: HTTP {status} not in {ok} — app not serving "
|
||||
"(a Traefik 404 fallback or an unhealthy backend)"
|
||||
)
|
||||
|
||||
if status == 200:
|
||||
body = lifecycle.http_body(domain, path)
|
||||
assert (
|
||||
"404 page not found" not in body
|
||||
), f"{domain}{path}: served Traefik's default 404 page, not the app"
|
||||
|
||||
verified, detail = served_cert(domain)
|
||||
assert verified, f"{domain}: TLS cert is not the trusted wildcard — {detail}"
|
||||
assert "commoninternet.net" in detail.lower(), f"{domain}: served cert unexpected — {detail}"
|
||||
|
||||
|
||||
def wait_serving(domain: str, meta: dict) -> None:
|
||||
"""Wait for converged + healthy (per recipe_meta timeouts), then run the full serving assertion."""
|
||||
lifecycle.wait_healthy(
|
||||
domain,
|
||||
ok_codes=tuple(meta["HEALTH_OK"]),
|
||||
path=meta["HEALTH_PATH"],
|
||||
deploy_timeout=meta["DEPLOY_TIMEOUT"],
|
||||
http_timeout=meta["HTTP_TIMEOUT"],
|
||||
)
|
||||
assert_serving(domain, meta)
|
||||
|
||||
|
||||
def do_upgrade(domain: str, target: str | None, meta: dict) -> None:
|
||||
"""UPGRADE op (in place on the shared deployment): abra app upgrade -> target, then wait serving."""
|
||||
lifecycle.upgrade_app(domain, version=target)
|
||||
wait_serving(domain, meta)
|
||||
|
||||
|
||||
def snapshots(domain: str) -> list[str]:
|
||||
"""Snapshot ids backup-bot-two holds for this app (the backup 'artifact', DG3)."""
|
||||
proc = abra._run(["app", "backup", "snapshots", domain, "-n", "-o"], check=False)
|
||||
ids = []
|
||||
for ln in proc.stdout.splitlines():
|
||||
# restic snapshot rows start with an 8-hex short id
|
||||
m = re.match(r"^([0-9a-f]{8})\b", ln.strip())
|
||||
if m:
|
||||
ids.append(m.group(1))
|
||||
return ids
|
||||
|
||||
|
||||
def do_backup(domain: str) -> list[str]:
|
||||
"""BACKUP op: create a snapshot, then assert an artifact now exists (returns snapshot ids)."""
|
||||
lifecycle.backup_app(domain)
|
||||
snaps = snapshots(domain)
|
||||
assert (
|
||||
snaps
|
||||
), f"{domain}: backup produced no snapshot artifact (abra app backup snapshots empty)"
|
||||
return snaps
|
||||
|
||||
|
||||
def do_restore(domain: str, meta: dict) -> None:
|
||||
"""RESTORE op: restore the latest snapshot, then assert the app is healthy + serving again."""
|
||||
lifecycle.restore_app(domain)
|
||||
wait_serving(domain, meta)
|
||||
Reference in New Issue
Block a user