Files
cc-ci/runner/run_recipe_ci.py
autonomic-bot ef44d4658b feat(1d): G0 — generic install + deploy-once orchestrator (DG1 green on hedgedoc)
- harness/generic.py: recipe-agnostic assert_serving (converged + real HTTP, 404-excluded +
  not Traefik 404 body + CA-verified trusted wildcard cert), op helpers, backup_capable detect
- harness/discovery.py: per-op overlay resolution (repo-local > cc-ci > generic), custom + hook
- tests/_generic/: assertion-only tiers (install/upgrade/backup/restore) on the shared deployment
- run_recipe_ci.py: deploy-ONCE orchestrator, per-op summary, deploy-count guard (DG4.1)
- conftest live_app fixture; lifecycle deploy-count + install-steps hook + pin DOMAIN to run domain

DG1 cold-verified green on hedgedoc (pure generic, deploy-count=1, clean teardown). G0 CLAIMED.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-27 23:27:55 +01:00

306 lines
12 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""Top-level CI orchestrator (plan §4.3 + Phase 1d), invoked by the Drone pipeline (or by hand).
Phase 1d model: deploy the app ONCE, then run lifecycle TIERS against that single shared deployment
(install asserts; upgrade does `abra app upgrade` in place; backup/restore mutate in place; custom
asserts), then ONE teardown in `finally`. Each tier's assertions come from exactly one file — a
recipe overlay if present, else the generic default — discovered by `harness.discovery`
(precedence repo-local > cc-ci > generic). The generic is the default for every op, so ANY recipe is
testable with zero config (DG1DG4). The lifecycle OPS live in the shared harness (harness.generic),
not per-recipe (DG7 DRY).
Run parameters from env (set by the comment-bridge via Drone build params):
RECIPE recipe name (e.g. custom-html) [required]
REF PR head commit sha [optional; used for fetch + run-domain hash]
PR PR number [optional, default 0]
SRC head repo full_name on the mirror [optional]
VERSION upgrade target tag (else newest published) [optional]
STAGES comma filter of tiers to run [optional, default install,upgrade,backup,restore,custom]
Run env (python + pytest + playwright) is provided by `cc-ci-run` (nix/modules/harness.nix);
invoke as: cc-ci-run runner/run_recipe_ci.py
"""
from __future__ import annotations
import glob
import os
import shutil
import subprocess
import sys
import tempfile
ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.insert(0, os.path.join(ROOT, "runner"))
from harness import discovery, generic, lifecycle, naming # noqa: E402
ALL_STAGES = ("install", "upgrade", "backup", "restore", "custom")
def _redact_values() -> list[str]:
"""Values to scrub from published logs (D6 redaction filter, plan §4.4). The infra secrets
materialised at /run/secrets/* — if any subprocess ever echoes one, mask it. Only >=8-char
values, so it never false-positives on short strings / SHAs."""
vals = set()
for p in glob.glob("/run/secrets/*"):
try:
with open(p) as f:
v = f.read().strip()
except OSError:
continue
if len(v) >= 8:
vals.add(v)
return sorted(vals, key=len, reverse=True)
_REDACT = _redact_values()
def run_redacted(cmd: list[str], env: dict | None = None) -> int:
"""Run a subprocess, streaming output live (so Drone logs stay tail-able) but masking any known
infra-secret value first. Belt-and-suspenders: the harness never prints secrets and abra doesn't
echo generated ones."""
proc = subprocess.Popen(
cmd,
cwd=ROOT,
env=env,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True,
bufsize=1,
)
assert proc.stdout is not None
for line in proc.stdout:
for v in _REDACT:
if v in line:
line = line.replace(v, "***REDACTED***")
sys.stdout.write(line)
sys.stdout.flush()
return proc.wait()
def _gitea_token() -> str | None:
tok = os.environ.get("GITEA_TOKEN")
if not tok and os.path.exists("/run/secrets/bridge_gitea_token"):
with open("/run/secrets/bridge_gitea_token") as f:
tok = f.read().strip()
return tok or None
def fetch_recipe(recipe: str, ref: str | None, src: str | None) -> None:
"""Make the recipe available at the code under test. If SRC+REF point at the mirror PR,
clone it at that ref; otherwise fetch the catalogue copy. Private mirror repos need the bot
token — passed via a per-command http.extraHeader (not persisted in .git/config, not printed)."""
recipes_dir = os.path.expanduser("~/.abra/recipes")
os.makedirs(recipes_dir, exist_ok=True)
dest = os.path.join(recipes_dir, recipe)
if src and ref:
url = f"https://git.autonomic.zone/{src}.git"
git = ["git"]
tok = _gitea_token()
if tok:
git += ["-c", f"http.extraHeader=Authorization: token {tok}"]
subprocess.run(["rm", "-rf", dest], check=False)
subprocess.run([*git, "clone", "--quiet", url, dest], check=True)
subprocess.run([*git, "-C", dest, "checkout", "--quiet", ref], check=True)
# Bring in published version TAGS from the public upstream so the upgrade tier can deploy a
# previous published version (mirror PR branches carry no release tags). Read-only + plain git
# (no bot token to a foreign host). Non-fatal: if unreachable, upgrade degrades to a skip.
upstream = f"https://git.coopcloud.tech/coop-cloud/{recipe}.git"
subprocess.run(
["git", "-C", dest, "fetch", "--quiet", upstream, "refs/tags/*:refs/tags/*"],
check=False,
)
else:
# Clean re-fetch from the catalogue. rm first so a leftover dir from a prior SRC+REF run
# (origin → private mirror, maybe lacking tags) can't poison the catalogue fetch.
subprocess.run(["rm", "-rf", dest], check=False)
subprocess.run(["abra", "recipe", "fetch", recipe, "-n"], check=True)
def snapshot_recipe_tests(recipe: str) -> str | None:
"""Copy the recipe-shipped tests/ to a stable temp dir, immune to abra re-checking-out the
recipe to a version tag during the run. Returns the snapshot path, or None if no tests/."""
src = os.path.expanduser(f"~/.abra/recipes/{recipe}/tests")
if not os.path.isdir(src):
return None
has_overlay = glob.glob(os.path.join(src, "test_*.py")) or os.path.isfile(
os.path.join(src, "install_steps.sh")
)
if not has_overlay:
return None
dst = os.path.join(tempfile.gettempdir(), f"ccci-recipe-tests-{recipe}")
shutil.rmtree(dst, ignore_errors=True)
shutil.copytree(src, dst)
return dst
def _load_meta(recipe: str) -> dict:
"""Mirror tests/conftest._recipe_meta so the orchestrator's deploy/wait uses the same per-recipe
config the tiers see (timeouts, health path/codes)."""
meta = {
"HEALTH_PATH": "/",
"HEALTH_OK": (200, 301, 302),
"DEPLOY_TIMEOUT": 600,
"HTTP_TIMEOUT": 300,
}
path = os.path.join(ROOT, "tests", recipe, "recipe_meta.py")
if os.path.exists(path):
ns: dict = {}
with open(path) as fh:
exec(compile(fh.read(), path, "exec"), ns) # noqa: S102 (trusted, in-repo)
for k in list(meta) + ["BACKUP_CAPABLE"]:
if k in ns:
meta[k] = ns[k]
return meta
def _tier_env(domain: str) -> dict:
return dict(os.environ, CCCI_APP_DOMAIN=domain, CCCI_BASE_URL=f"https://{domain}")
def run_op_tier(recipe: str, op: str, repo_local: str | None, domain: str) -> str:
"""Run the single assertion file for a lifecycle op (overlay or generic) against the shared
deployment. The file performs the op (upgrade/backup/restore) + asserts; install asserts only
(already deployed). Returns 'pass' | 'fail'."""
source, path = discovery.resolve_op(recipe, op, repo_local)
rel = os.path.relpath(path, ROOT)
print(f"\n===== TIER: {op} ({source}: {rel}) =====", flush=True)
rc = run_redacted([sys.executable, "-m", "pytest", "-v", "-rA", path], env=_tier_env(domain))
return "pass" if rc == 0 else "fail"
def run_custom(recipe: str, repo_local: str | None, domain: str) -> str:
"""Run all discovered non-lifecycle custom test_*.py (both locations, additive). Returns
'skip' if none defined, else 'pass'/'fail'."""
customs = discovery.custom_tests(recipe, repo_local)
if not customs:
return "skip"
print("\n===== TIER: custom =====", flush=True)
rc_all = 0
for source, path in customs:
rel = os.path.relpath(path, ROOT)
print(f" custom ({source}): {rel}", flush=True)
rc = run_redacted(
[sys.executable, "-m", "pytest", "-v", "-rA", path], env=_tier_env(domain)
)
if rc != 0:
rc_all = rc
return "pass" if rc_all == 0 else "fail"
def main() -> int:
recipe = os.environ.get("RECIPE")
if not recipe:
print("RECIPE env is required", file=sys.stderr)
return 2
ref = os.environ.get("REF") or None
src = os.environ.get("SRC") or None
target = os.environ.get("VERSION") or None
stages = {
s.strip() for s in os.environ.get("STAGES", ",".join(ALL_STAGES)).split(",") if s.strip()
}
print(
f"== cc-ci run: recipe={recipe} ref={ref} pr={os.environ.get('PR', '0')} stages={sorted(stages)}"
)
fetch_recipe(recipe, ref, src)
repo_local = snapshot_recipe_tests(recipe)
meta = _load_meta(recipe)
domain = naming.app_domain(recipe, os.environ.get("PR", "0"), ref)
# Deploy-once base version: previous published version when the upgrade tier will run and one
# exists (so upgrade goes previous→target in place), else the target (current/$REF). (DECISIONS.)
want_upgrade = "upgrade" in stages
prev = lifecycle.previous_version(recipe) if want_upgrade else None
base = prev or target
backup_cap = generic.backup_capable(recipe, meta)
hook = discovery.install_steps(recipe, repo_local)
# Deploy-count guard (DG4.1): exactly one deploy_app() per run.
countfile = os.path.join(tempfile.gettempdir(), f"ccci-deploys-{domain}")
with open(countfile, "w") as f:
f.write("0")
os.environ["CCCI_DEPLOY_COUNT_FILE"] = countfile
results: dict[str, str] = {}
lifecycle.janitor()
try:
# ---- deploy ONCE + wait ready (the single deployment all tiers share) ----
try:
lifecycle.deploy_app(
recipe, domain, version=base, secrets=True, install_steps_hook=hook
)
lifecycle.wait_healthy(
domain,
ok_codes=tuple(meta["HEALTH_OK"]),
path=meta["HEALTH_PATH"],
deploy_timeout=meta["DEPLOY_TIMEOUT"],
http_timeout=meta["HTTP_TIMEOUT"],
)
deploy_ok = True
except Exception as e: # noqa: BLE001 — a failed deploy is a reported INSTALL failure, not a crash
print(f"!! deploy/readiness failed: {e}", flush=True)
deploy_ok = False
# ---- INSTALL tier (always) ----
if "install" in stages:
results["install"] = (
run_op_tier(recipe, "install", repo_local, domain) if deploy_ok else "fail"
)
if deploy_ok:
# ---- UPGRADE tier ----
if "upgrade" in stages:
results["upgrade"] = (
run_op_tier(recipe, "upgrade", repo_local, domain)
if prev
else "skip" # only one published version → nothing to upgrade from
)
# ---- BACKUP + RESTORE tiers (backup-capable only; else clean N/A) ----
if "backup" in stages:
results["backup"] = (
run_op_tier(recipe, "backup", repo_local, domain) if backup_cap else "skip"
)
if "restore" in stages:
results["restore"] = (
run_op_tier(recipe, "restore", repo_local, domain) if backup_cap else "skip"
)
# ---- CUSTOM tier ----
if "custom" in stages:
results["custom"] = run_custom(recipe, repo_local, domain)
else:
# install failed → the shared deployment is dead; remaining tiers cannot run on it.
for op in ("upgrade", "backup", "restore", "custom"):
if op in stages:
results[op] = "skip"
finally:
lifecycle.teardown_app(domain, verify=False)
# ---- deploy-count assertion (DG4.1) ----
with open(countfile) as f:
deploy_count = int(f.read().strip() or "0")
os.remove(countfile)
# ---- per-op summary (DG6 feed) ----
print("\n===== RUN SUMMARY =====", flush=True)
print(f"deploy-count = {deploy_count} (expect 1)")
order = [s for s in ALL_STAGES if s in results]
for op in order:
print(f" {op:8s}: {results[op]}")
overall = 0
if deploy_count != 1:
print(f"!! deploy-count {deploy_count} != 1 (DG4.1 violation)", file=sys.stderr)
overall = 1
if any(v == "fail" for v in results.values()):
overall = 1
if not results:
print("no tiers ran", file=sys.stderr)
return 1
return overall
if __name__ == "__main__":
raise SystemExit(main())