Files
cc-ci/runner/harness/generic.py
autonomic-bot ef44d4658b feat(1d): G0 — generic install + deploy-once orchestrator (DG1 green on hedgedoc)
- harness/generic.py: recipe-agnostic assert_serving (converged + real HTTP, 404-excluded +
  not Traefik 404 body + CA-verified trusted wildcard cert), op helpers, backup_capable detect
- harness/discovery.py: per-op overlay resolution (repo-local > cc-ci > generic), custom + hook
- tests/_generic/: assertion-only tiers (install/upgrade/backup/restore) on the shared deployment
- run_recipe_ci.py: deploy-ONCE orchestrator, per-op summary, deploy-count guard (DG4.1)
- conftest live_app fixture; lifecycle deploy-count + install-steps hook + pin DOMAIN to run domain

DG1 cold-verified green on hedgedoc (pure generic, deploy-count=1, clean teardown). G0 CLAIMED.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-27 23:27:55 +01:00

146 lines
6.0 KiB
Python

"""Generic, recipe-agnostic lifecycle assertions + op helpers (Phase 1d, plan §2.1).
These are THE default for each lifecycle op: when a recipe ships no `test_<op>.py` overlay, the
generic tier (tests/_generic/test_<op>.py) runs these against the single shared deployment the
orchestrator brought up. The lifecycle OPERATIONS (upgrade/backup/restore) live here too — owned by
the shared harness, not copy-pasted per recipe (DG7 DRY) — so overlays are assertions-only and may
reuse these by composition (`from harness import generic; generic.assert_serving(...)`).
Design + precedence: machine-docs/DECISIONS.md (Phase 1d).
"""
from __future__ import annotations
import glob
import os
import re
import socket
import ssl
from . import abra, lifecycle
# A recipe is backup-capable iff a compose file carries a truthy backupbot.backup label.
_BACKUPBOT_RE = re.compile(r"backupbot\.backup\b[^\n]*\btrue\b", re.IGNORECASE)
def _recipe_dir(recipe: str) -> str:
return os.path.expanduser(f"~/.abra/recipes/{recipe}")
def backup_capable(recipe: str, meta: dict | None = None) -> bool:
"""Whether the harness should run the backup/restore tiers (else they are a clean N/A skip, DG3).
`recipe_meta.BACKUP_CAPABLE` (bool) overrides; otherwise auto-detect by scanning the recipe's
compose*.yml for a truthy `backupbot.backup` label (the Co-op Cloud backup convention)."""
if meta and "BACKUP_CAPABLE" in meta:
return bool(meta["BACKUP_CAPABLE"])
for path in glob.glob(os.path.join(_recipe_dir(recipe), "compose*.yml")):
try:
with open(path) as fh:
if _BACKUPBOT_RE.search(fh.read()):
return True
except OSError:
continue
return False
def served_cert(domain: str, port: int = 443) -> tuple[bool, str]:
"""CA-verified TLS handshake to `domain` (via the gateway passthrough to cc-ci's Traefik).
Returns (verified, detail). The pre-issued wildcard is a publicly-trusted Let's Encrypt cert, so
a real serve VERIFIES against the system CA bundle and matches the hostname; Traefik's self-signed
DEFAULT cert (served only when no router/cert matches the SNI) FAILS verification — so this is a
genuine 'not the default cert' assertion with no openssl dependency. detail carries CN+SAN on
success, or the failure reason."""
ctx = ssl.create_default_context() # verifies chain against system CAs + checks hostname
try:
with (
socket.create_connection((domain, port), timeout=20) as sock,
ctx.wrap_socket(sock, server_hostname=domain) as ssock,
):
cert = ssock.getpeercert()
except ssl.SSLCertVerificationError as e:
return (False, f"cert did not verify (Traefik default/self-signed?): {e}")
except (OSError, ssl.SSLError) as e:
return (False, f"TLS handshake error: {e}")
cn = next(
(v for rdn in cert.get("subject", ()) for k, v in rdn if k == "commonName"),
"",
)
sans = [v for typ, v in cert.get("subjectAltName", ()) if typ == "DNS"]
return (True, f"CN={cn} SAN={sans}")
def assert_serving(domain: str, meta: dict) -> None:
"""The single generic "is the app really serving?" assertion (DG1). Proves, end-to-end:
1. every service in the stack converged (the app's own containers, not just Traefik);
2. a real HTTP(S) response over the run domain with a status in HEALTH_OK — which EXCLUDES
404, so a Traefik unmatched-router fallback fails here;
3. the body is not Traefik's default 404 page;
4. the served TLS cert is the wildcard, not Traefik's default cert.
No bare sleeps, no health-only shortcut."""
assert lifecycle.services_converged(domain), f"{domain}: not all services converged"
path = meta["HEALTH_PATH"]
ok = tuple(meta["HEALTH_OK"])
status = lifecycle.http_get(domain, path)
assert status in ok, (
f"{domain}{path}: HTTP {status} not in {ok} — app not serving "
"(a Traefik 404 fallback or an unhealthy backend)"
)
if status == 200:
body = lifecycle.http_body(domain, path)
assert (
"404 page not found" not in body
), f"{domain}{path}: served Traefik's default 404 page, not the app"
verified, detail = served_cert(domain)
assert verified, f"{domain}: TLS cert is not the trusted wildcard — {detail}"
assert "commoninternet.net" in detail.lower(), f"{domain}: served cert unexpected — {detail}"
def wait_serving(domain: str, meta: dict) -> None:
"""Wait for converged + healthy (per recipe_meta timeouts), then run the full serving assertion."""
lifecycle.wait_healthy(
domain,
ok_codes=tuple(meta["HEALTH_OK"]),
path=meta["HEALTH_PATH"],
deploy_timeout=meta["DEPLOY_TIMEOUT"],
http_timeout=meta["HTTP_TIMEOUT"],
)
assert_serving(domain, meta)
def do_upgrade(domain: str, target: str | None, meta: dict) -> None:
"""UPGRADE op (in place on the shared deployment): abra app upgrade -> target, then wait serving."""
lifecycle.upgrade_app(domain, version=target)
wait_serving(domain, meta)
def snapshots(domain: str) -> list[str]:
"""Snapshot ids backup-bot-two holds for this app (the backup 'artifact', DG3)."""
proc = abra._run(["app", "backup", "snapshots", domain, "-n", "-o"], check=False)
ids = []
for ln in proc.stdout.splitlines():
# restic snapshot rows start with an 8-hex short id
m = re.match(r"^([0-9a-f]{8})\b", ln.strip())
if m:
ids.append(m.group(1))
return ids
def do_backup(domain: str) -> list[str]:
"""BACKUP op: create a snapshot, then assert an artifact now exists (returns snapshot ids)."""
lifecycle.backup_app(domain)
snaps = snapshots(domain)
assert (
snaps
), f"{domain}: backup produced no snapshot artifact (abra app backup snapshots empty)"
return snaps
def do_restore(domain: str, meta: dict) -> None:
"""RESTORE op: restore the latest snapshot, then assert the app is healthy + serving again."""
lifecycle.restore_app(domain)
wait_serving(domain, meta)