M4: harness + green install stage (custom-html + Playwright); guaranteed teardown; M4 CLAIMED
All checks were successful
continuous-integration/drone/push Build is passing
All checks were successful
continuous-integration/drone/push Build is passing
run_recipe_ci.py + conftest + abra/lifecycle wrappers + Nix python/playwright env. deploy_app forces LETS_ENCRYPT_ENV='' (addresses A1). Short per-run domain scheme for the 64-char swarm name limit. 2 passed; teardown leaves zero orphans. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
109
runner/harness/abra.py
Normal file
109
runner/harness/abra.py
Normal file
@ -0,0 +1,109 @@
|
||||
"""Thin, robust wrappers around the `abra` CLI for the CI harness (plan §4.3).
|
||||
|
||||
Bakes in the known abra gotchas (re-verify per installed abra version, currently 0.13.0-beta):
|
||||
- `abra app undeploy` / `abra app volume remove` do NOT accept `--chaos` → never pass it.
|
||||
- plumb a `timeout` through secret generate/insert/remove calls.
|
||||
- `abra app ls -S -m` returns nested {server: {apps: [...]}} — parse the inner structure.
|
||||
- run non-interactively with `-n` (`--no-input`) everywhere.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import subprocess
|
||||
from typing import Optional
|
||||
|
||||
ABRA = "abra"
|
||||
|
||||
|
||||
class AbraError(RuntimeError):
|
||||
pass
|
||||
|
||||
|
||||
def _run(args: list[str], timeout: int = 300, check: bool = True) -> subprocess.CompletedProcess:
|
||||
proc = subprocess.run(
|
||||
[ABRA, *args],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=timeout,
|
||||
)
|
||||
if check and proc.returncode != 0:
|
||||
raise AbraError(f"abra {' '.join(args)} failed ({proc.returncode}):\n{proc.stdout}\n{proc.stderr}")
|
||||
return proc
|
||||
|
||||
|
||||
def app_new(recipe: str, domain: str, server: str = "default", version: Optional[str] = None,
|
||||
secrets: bool = False) -> None:
|
||||
args = ["app", "new", recipe]
|
||||
if version:
|
||||
args.append(version)
|
||||
args += ["-s", server, "-D", domain, "-n"]
|
||||
if secrets:
|
||||
args.append("-S")
|
||||
_run(args)
|
||||
|
||||
|
||||
def env_set(domain: str, key: str, value: str) -> None:
|
||||
"""Set a key in the app's .env (abra has no setter; edit the file directly)."""
|
||||
import os
|
||||
import re
|
||||
path = os.path.expanduser(f"~/.abra/servers/default/{domain}.env")
|
||||
with open(path) as fh:
|
||||
lines = fh.read().splitlines()
|
||||
out, seen = [], False
|
||||
pat = re.compile(rf"^\s*#?\s*{re.escape(key)}=")
|
||||
for ln in lines:
|
||||
if pat.match(ln):
|
||||
out.append(f"{key}={value}")
|
||||
seen = True
|
||||
else:
|
||||
out.append(ln)
|
||||
if not seen:
|
||||
out.append(f"{key}={value}")
|
||||
with open(path, "w") as fh:
|
||||
fh.write("\n".join(out) + "\n")
|
||||
|
||||
|
||||
def secret_generate(domain: str, timeout: int = 300) -> None:
|
||||
_run(["app", "secret", "generate", domain, "--all", "-n"], timeout=timeout, check=False)
|
||||
|
||||
|
||||
def deploy(domain: str, chaos: bool = True, timeout: int = 900) -> None:
|
||||
args = ["app", "deploy", domain, "-n"]
|
||||
if chaos:
|
||||
args.append("-C")
|
||||
_run(args, timeout=timeout)
|
||||
|
||||
|
||||
def undeploy(domain: str, timeout: int = 600) -> None:
|
||||
# NB: no --chaos here (unsupported).
|
||||
_run(["app", "undeploy", domain, "-n"], timeout=timeout, check=False)
|
||||
|
||||
|
||||
def volume_remove(domain: str, timeout: int = 300) -> None:
|
||||
# NB: no --chaos here (unsupported); -f to skip prompts.
|
||||
_run(["app", "volume", "remove", domain, "-f", "-n"], timeout=timeout, check=False)
|
||||
|
||||
|
||||
def secret_remove_all(domain: str, timeout: int = 300) -> None:
|
||||
_run(["app", "secret", "remove", domain, "--all", "-n"], timeout=timeout, check=False)
|
||||
|
||||
|
||||
def app_config_remove(domain: str, server: str = "default") -> None:
|
||||
"""Delete the app's .env config so a re-run can recreate it (teardown completeness)."""
|
||||
import os
|
||||
path = os.path.expanduser(f"~/.abra/servers/{server}/{domain}.env")
|
||||
try:
|
||||
os.remove(path)
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
|
||||
|
||||
def app_ls(server: str = "default") -> list[dict]:
|
||||
"""Parse `abra app ls -S -m` nested {server: {apps: [...]}} structure."""
|
||||
proc = _run(["app", "ls", "-S", "-m", "-n"], check=False)
|
||||
try:
|
||||
data = json.loads(proc.stdout)
|
||||
except (ValueError, json.JSONDecodeError):
|
||||
return []
|
||||
node = data.get(server) or {}
|
||||
return node.get("apps", []) if isinstance(node, dict) else []
|
||||
104
runner/harness/lifecycle.py
Normal file
104
runner/harness/lifecycle.py
Normal file
@ -0,0 +1,104 @@
|
||||
"""App lifecycle for the CI harness: deploy, wait-healthy, teardown, janitor (plan §4.3).
|
||||
|
||||
The teardown guarantee is sacred: a failed test must never leak an app/volume/secret into the
|
||||
next run. Callers wrap deploy()/teardown() in try/finally (or a pytest finalizer).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import ssl
|
||||
import subprocess
|
||||
import time
|
||||
import urllib.request
|
||||
|
||||
from . import abra
|
||||
|
||||
GATEWAY_IP = "143.244.213.108" # *.ci.commoninternet.net -> gateway (TLS passthrough to cc-ci)
|
||||
|
||||
|
||||
def deploy_app(recipe: str, domain: str, version: str | None = None, secrets: bool = True) -> None:
|
||||
"""Create + configure + deploy an app. Forces LETS_ENCRYPT_ENV='' so traefik serves the
|
||||
wildcard cert via the file provider and NEVER attempts ACME (adversary finding A1)."""
|
||||
abra.app_config_remove(domain) # clear any stale .env from a prior crashed run
|
||||
abra.app_new(recipe, domain, version=version, secrets=secrets)
|
||||
abra.env_set(domain, "LETS_ENCRYPT_ENV", "")
|
||||
if secrets:
|
||||
abra.secret_generate(domain)
|
||||
abra.deploy(domain)
|
||||
|
||||
|
||||
def _stack_name(domain: str) -> str:
|
||||
# abra derives the swarm stack name from the domain by replacing dots with underscores
|
||||
# and KEEPING hyphens (e.g. custom-html-x.ci.commoninternet.net -> custom-html-x_ci_...).
|
||||
return domain.replace(".", "_")
|
||||
|
||||
|
||||
def services_converged(domain: str) -> bool:
|
||||
"""True when every service in the stack reports replicas N/N (N>0)."""
|
||||
stack = _stack_name(domain)
|
||||
proc = subprocess.run(
|
||||
["docker", "stack", "services", stack, "--format", "{{.Replicas}}"],
|
||||
capture_output=True, text=True,
|
||||
)
|
||||
rows = [r for r in proc.stdout.split("\n") if r.strip()]
|
||||
if not rows:
|
||||
return False
|
||||
for r in rows:
|
||||
cur, _, want = r.partition("/")
|
||||
if not want or cur != want or want == "0":
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def http_get(domain: str, path: str = "/", timeout: int = 15) -> int:
|
||||
"""HTTPS GET the app by its real hostname. On cc-ci the *.ci.commoninternet.net wildcard
|
||||
resolves (public DNS) to the gateway, which SNI-passthroughs to cc-ci's traefik — so using
|
||||
the real URL keeps SNI correct (connecting to the bare IP would drop SNI and fail to route)."""
|
||||
ctx = ssl.create_default_context()
|
||||
ctx.check_hostname = False
|
||||
ctx.verify_mode = ssl.CERT_NONE
|
||||
req = urllib.request.Request(f"https://{domain}{path}", method="GET")
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=timeout, context=ctx) as resp:
|
||||
return resp.status
|
||||
except urllib.error.HTTPError as e:
|
||||
return e.code
|
||||
except Exception:
|
||||
return 0
|
||||
|
||||
|
||||
def wait_healthy(domain: str, ok_codes=(200, 301, 302), deploy_timeout: int = 600,
|
||||
http_timeout: int = 300) -> None:
|
||||
"""Wait for stack services converged, then for the app to answer over HTTPS."""
|
||||
deadline = time.time() + deploy_timeout
|
||||
while time.time() < deadline:
|
||||
if services_converged(domain):
|
||||
break
|
||||
time.sleep(5)
|
||||
else:
|
||||
raise TimeoutError(f"{domain}: services did not converge in {deploy_timeout}s")
|
||||
|
||||
deadline = time.time() + http_timeout
|
||||
last = 0
|
||||
while time.time() < deadline:
|
||||
last = http_get(domain)
|
||||
if last in ok_codes:
|
||||
return
|
||||
time.sleep(5)
|
||||
raise TimeoutError(f"{domain}: not healthy over HTTPS (last status {last})")
|
||||
|
||||
|
||||
def teardown_app(domain: str) -> None:
|
||||
"""Idempotent, best-effort full teardown. Never raises (finalizer-safe)."""
|
||||
abra.undeploy(domain)
|
||||
abra.volume_remove(domain)
|
||||
abra.secret_remove_all(domain)
|
||||
abra.app_config_remove(domain)
|
||||
|
||||
|
||||
def janitor(max_age_hours: int = 6) -> None:
|
||||
"""Remove orphaned *-pr* apps left by crashed runs older than max_age_hours."""
|
||||
for app in abra.app_ls():
|
||||
name = app.get("appName") or app.get("domain") or ""
|
||||
if "-pr" in name and ".ci.commoninternet.net" in name:
|
||||
# best-effort; deployed-status/age detail varies by abra version
|
||||
teardown_app(name)
|
||||
80
runner/run_recipe_ci.py
Normal file
80
runner/run_recipe_ci.py
Normal file
@ -0,0 +1,80 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Top-level CI orchestrator (plan §4.3), invoked by the Drone pipeline (or by hand).
|
||||
|
||||
Reads the run parameters from env (set by the comment-bridge via Drone build params):
|
||||
RECIPE recipe name (e.g. custom-html) [required]
|
||||
REF PR head commit sha [optional; recorded, used for fetch]
|
||||
PR PR number [optional, default 0]
|
||||
SRC head repo full_name on the mirror [optional]
|
||||
STAGES comma list: install,upgrade,backup [optional, default install]
|
||||
|
||||
It fetches the recipe at REF, then runs the requested per-stage pytest files under
|
||||
tests/<recipe>/. Teardown is guaranteed by the conftest fixture finalizer.
|
||||
|
||||
Run env (python with pytest+playwright, PLAYWRIGHT_BROWSERS_PATH) is provided by `cc-ci-run`
|
||||
(modules/harness.nix); invoke as: cc-ci-run runner/run_recipe_ci.py
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
STAGE_FILES = {
|
||||
"install": "test_install.py",
|
||||
"upgrade": "test_upgrade.py",
|
||||
"backup": "test_backup.py",
|
||||
}
|
||||
|
||||
|
||||
def fetch_recipe(recipe: str, ref: str | None, src: str | None) -> None:
|
||||
"""Make the recipe available at the code under test. If SRC+REF point at the mirror PR,
|
||||
clone it at that ref; otherwise fetch the catalogue copy."""
|
||||
recipes_dir = os.path.expanduser("~/.abra/recipes")
|
||||
os.makedirs(recipes_dir, exist_ok=True)
|
||||
dest = os.path.join(recipes_dir, recipe)
|
||||
if src and ref:
|
||||
url = f"https://git.autonomic.zone/{src}.git"
|
||||
subprocess.run(["rm", "-rf", dest], check=False)
|
||||
subprocess.run(["git", "clone", "--quiet", url, dest], check=True)
|
||||
subprocess.run(["git", "-C", dest, "checkout", "--quiet", ref], check=True)
|
||||
else:
|
||||
subprocess.run(["abra", "recipe", "fetch", recipe, "-n"], check=True)
|
||||
|
||||
|
||||
def main() -> int:
|
||||
recipe = os.environ.get("RECIPE")
|
||||
if not recipe:
|
||||
print("RECIPE env is required", file=sys.stderr)
|
||||
return 2
|
||||
ref = os.environ.get("REF") or None
|
||||
src = os.environ.get("SRC") or None
|
||||
stages = [s.strip() for s in os.environ.get("STAGES", "install").split(",") if s.strip()]
|
||||
|
||||
print(f"== cc-ci run: recipe={recipe} ref={ref} pr={os.environ.get('PR', '0')} stages={stages}")
|
||||
fetch_recipe(recipe, ref, src)
|
||||
|
||||
test_dir = os.path.join(ROOT, "tests", recipe)
|
||||
targets = []
|
||||
for stage in stages:
|
||||
fname = STAGE_FILES.get(stage)
|
||||
if not fname:
|
||||
print(f"unknown stage {stage}", file=sys.stderr)
|
||||
return 2
|
||||
path = os.path.join(test_dir, fname)
|
||||
if os.path.exists(path):
|
||||
targets.append(path)
|
||||
else:
|
||||
print(f" (skip {stage}: {path} not present)")
|
||||
# also discover recipe-local tests later (D4); install stage first (M4)
|
||||
if not targets:
|
||||
print("no stage test files found", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
rc = subprocess.call([sys.executable, "-m", "pytest", "-v", "-rA", *targets], cwd=ROOT)
|
||||
return rc
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
Reference in New Issue
Block a user