style(1b): auto-format + lint-clean the whole codebase (RL1)

Mechanical, semantics-preserving cleanup so the codebase passes the new lint stage:
- ruff format: all 32 Python files (wraps long signatures, normalizes quotes/blank lines).
- nixpkgs-fmt: modules/drone-runner.nix.
- shfmt (-i 2 -ci): scripts/*.sh.

Lint fixes (reviewed, behavior-preserving — no test weakened):
- ruff SIM105: try/except-pass -> contextlib.suppress (abra.py app_config rm; lifecycle.py janitor).
- ruff SIM115: open().read() -> with open() (run_recipe_ci.py redaction-values + gitea-token).
- statix: merge repeated sops `secrets.*` keys into one `secrets = { ... }` (comments kept);
  empty fn pattern `{ ... }:` -> `_:` (packages.nix).
- deadnix: drop unused lambda args (flake `self`; configuration.nix `lib`; overlay `final` -> `_`).

Verified on cc-ci: `scripts/lint.sh` -> lint: PASS; nixosConfigurations.cc-ci evaluates;
all Python byte-compiles. The deployed bridge/dashboard/runner source changes hash (reformat),
so cc-ci will be rebuilt to the new closure in W2 before the cold D1-D10 re-verification.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-27 20:52:05 +01:00
parent a0ea2f0aa9
commit 2cede01ed7
35 changed files with 431 additions and 185 deletions

View File

@ -26,6 +26,7 @@ Config (env): BRIDGE_LISTEN, GITEA_API, DRONE_URL, CI_REPO, HMAC_FILE, DRONE_TOK
GITEA_TOKEN_FILE, POLL_INTERVAL (default 30), POLL_REPOS (csv of enrolled repos), AUTH_ALLOWLIST
(csv, optional).
"""
import hashlib
import hmac
import json
@ -118,14 +119,22 @@ def trigger_build(recipe, ref, pr, src):
def post_comment(owner, repo, number, body):
status, c = _api(f"{GITEA_API}/repos/{owner}/{repo}/issues/{number}/comments", GITEA_TOKEN,
method="POST", data={"body": body})
status, c = _api(
f"{GITEA_API}/repos/{owner}/{repo}/issues/{number}/comments",
GITEA_TOKEN,
method="POST",
data={"body": body},
)
return c.get("id") if status in (200, 201) and c else None
def edit_comment(owner, repo, comment_id, body):
_api(f"{GITEA_API}/repos/{owner}/{repo}/issues/comments/{comment_id}", GITEA_TOKEN,
method="PATCH", data={"body": body})
_api(
f"{GITEA_API}/repos/{owner}/{repo}/issues/comments/{comment_id}",
GITEA_TOKEN,
method="PATCH",
data={"body": body},
)
def build_status(num):
@ -140,6 +149,7 @@ def watch_and_reflect(owner, name, number, num, recipe, sha, comment_id, run_url
"""Poll the Drone build to completion, then edit the PR comment to reflect the outcome (D7).
Bounded by the build timeout (60m) + margin."""
import time as _t
deadline = _t.time() + 75 * 60
last = None
while _t.time() < deadline:
@ -150,8 +160,12 @@ def watch_and_reflect(owner, name, number, num, recipe, sha, comment_id, run_url
icon = {"success": ""}.get(last, "")
verdict = "passed" if last == "success" else (last or "did not complete")
if comment_id:
edit_comment(owner, name, comment_id,
f"cc-ci: run for `{recipe}` @ `{sha[:8]}` {icon} **{verdict}** → {run_url}")
edit_comment(
owner,
name,
comment_id,
f"cc-ci: run for `{recipe}` @ `{sha[:8]}` {icon} **{verdict}** → {run_url}",
)
log(f"reflected outcome build {num} ({recipe} PR #{number}): {last}")
@ -192,14 +206,19 @@ def process_testme(full_name, owner, name, number, user, comment_id, source):
post_comment(owner, name, number, "cc-ci: failed to start a CI run (see bridge logs).")
return None, "trigger failed"
run_url = f"{DRONE_URL}/{CI_REPO}/{num}"
cid = post_comment(owner, name, number,
f"cc-ci: started CI run for `{name}` @ `{head['sha'][:8]}` → {run_url}")
log(f"[{source}] triggered build {num} for {name}@{head['sha'][:8]} "
f"(PR #{number}, comment {comment_id}) by {user}")
cid = post_comment(
owner, name, number, f"cc-ci: started CI run for `{name}` @ `{head['sha'][:8]}` → {run_url}"
)
log(
f"[{source}] triggered build {num} for {name}@{head['sha'][:8]} "
f"(PR #{number}, comment {comment_id}) by {user}"
)
# Reflect the final pass/fail back onto that comment when the build finishes (D7).
threading.Thread(target=watch_and_reflect,
args=(owner, name, number, num, name, head["sha"], cid, run_url),
daemon=True).start()
threading.Thread(
target=watch_and_reflect,
args=(owner, name, number, num, name, head["sha"], cid, run_url),
daemon=True,
).start()
return run_url, "ok"
@ -242,9 +261,14 @@ class Handler(BaseHTTPRequestHandler):
return self._send(204, "not a PR")
run_url, reason = process_testme(
repo.get("full_name", ""), (repo.get("owner") or {}).get("login", ""),
repo.get("name", ""), issue.get("number"),
c.get("user", {}).get("login", ""), c.get("id"), "webhook")
repo.get("full_name", ""),
(repo.get("owner") or {}).get("login", ""),
repo.get("name", ""),
issue.get("number"),
c.get("user", {}).get("login", ""),
c.get("id"),
"webhook",
)
if not run_url:
if reason == "duplicate":
return self._send(200, "already handled")

View File

@ -11,6 +11,7 @@ ref, when, and a link to the canonical Drone run. Also serves an embeddable SVG
Config (env): DRONE_URL, CI_REPO, DRONE_TOKEN_FILE, DASH_LISTEN (default 0.0.0.0:8080),
POLL_INTERVAL (default 60), CACHE_TTL (default 30).
"""
import html
import json
import os
@ -34,8 +35,14 @@ DRONE_TOKEN = _read(os.environ["DRONE_TOKEN_FILE"])
_CACHE = {"ts": 0.0, "recipes": []}
_COLORS = {"success": "#3fb950", "failure": "#f85149", "error": "#f85149",
"running": "#d29922", "pending": "#d29922", "killed": "#8b949e"}
_COLORS = {
"success": "#3fb950",
"failure": "#f85149",
"error": "#f85149",
"running": "#d29922",
"pending": "#d29922",
"killed": "#8b949e",
}
def log(*a):
@ -43,8 +50,9 @@ def log(*a):
def _drone(path):
req = urllib.request.Request(f"{DRONE_URL}{path}",
headers={"Authorization": f"Bearer {DRONE_TOKEN}"})
req = urllib.request.Request(
f"{DRONE_URL}{path}", headers={"Authorization": f"Bearer {DRONE_TOKEN}"}
)
with urllib.request.urlopen(req, timeout=30) as resp:
return json.loads(resp.read())
@ -72,14 +80,16 @@ def latest_per_recipe():
rows = []
for recipe, b in sorted(latest.items()):
ref = (b.get("params") or {}).get("REF") or ""
rows.append({
"recipe": recipe,
"status": b.get("status", "unknown"),
"number": b.get("number"),
"ref": ref[:8],
"finished": b.get("finished") or 0,
"url": f"{DRONE_URL}/{CI_REPO}/{b.get('number')}",
})
rows.append(
{
"recipe": recipe,
"status": b.get("status", "unknown"),
"number": b.get("number"),
"ref": ref[:8],
"finished": b.get("finished") or 0,
"url": f"{DRONE_URL}/{CI_REPO}/{b.get('number')}",
}
)
return rows
@ -163,7 +173,7 @@ class Handler(BaseHTTPRequestHandler):
if path in ("/healthz", "/dashboard/healthz"):
return self._send(200, "ok", "text/plain")
if path.startswith("/badge/") and path.endswith(".svg"):
recipe = path[len("/badge/"):-len(".svg")]
recipe = path[len("/badge/") : -len(".svg")]
row = next((r for r in recipes_cached() if r["recipe"] == recipe), None)
status = row["status"] if row else "unknown"
return self._send(200, render_badge(recipe, status), "image/svg+xml")

View File

@ -12,7 +12,7 @@
sops-nix.inputs.nixpkgs.follows = "nixpkgs";
};
outputs = { self, nixpkgs, sops-nix }:
outputs = { nixpkgs, sops-nix, ... }:
let
system = "x86_64-linux";
pkgs = nixpkgs.legacyPackages.${system};

View File

@ -1,7 +1,7 @@
# cc-ci machine config. M0 = faithful reproduction of the baseline (docs/baseline.md)
# so the first flake rebuild is a no-op-then-base. Services (swarm/Traefik/Drone/
# bridge/dashboard) are layered in via ./modules/* in later milestones.
{ pkgs, lib, ... }:
{ pkgs, ... }:
{
imports = [
./hardware.nix

View File

@ -31,7 +31,7 @@ in
environment = {
DRONE_RPC_PROTO = "https";
DRONE_RPC_HOST = "drone.ci.commoninternet.net";
DRONE_RUNNER_CAPACITY = maxTests; # MAX_TESTS concurrency cap (see let-binding above)
DRONE_RUNNER_CAPACITY = maxTests; # MAX_TESTS concurrency cap (see let-binding above)
DRONE_RUNNER_NAME = "cc-ci-exec";
# exec runner needs a writable root for build workspaces
DRONE_RUNNER_ROOT = "/var/lib/drone-runner";

View File

@ -1,9 +1,9 @@
# Project package overlay. `abra` (the Co-op Cloud CLI) is exposed as `pkgs.abra` so every
# module (systemPackages, the proxy/drone reconcile oneshots) can use the same pinned build.
{ ... }:
_:
{
nixpkgs.overlays = [
(final: prev: {
(_: prev: {
abra = prev.stdenv.mkDerivation rec {
pname = "abra";
version = "0.13.0-beta";

View File

@ -19,32 +19,34 @@
# Do not also look for a GPG key.
gnupg.sshKeyPaths = [ ];
# M0 proof secret — confirms the decrypt path works end to end.
secrets.test_secret = { };
secrets = {
# M0 proof secret — confirms the decrypt path works end to end.
test_secret = { };
# M2 Drone (A2 internal secrets). drone_rpc_secret is shared between the swarm-deployed
# Drone server (inserted as the `rpc_secret` swarm secret by scripts/deploy-drone.sh) and
# the host exec runner (read via the env template below). drone_gitea_client_secret is the
# Gitea OAuth app secret, inserted as the server's `client_secret` swarm secret.
secrets.drone_rpc_secret = { };
secrets.drone_gitea_client_secret = { };
# M2 Drone (A2 internal secrets). drone_rpc_secret is shared between the swarm-deployed
# Drone server (inserted as the `rpc_secret` swarm secret by scripts/deploy-drone.sh) and
# the host exec runner (read via the env template below). drone_gitea_client_secret is the
# Gitea OAuth app secret, inserted as the server's `client_secret` swarm secret.
drone_rpc_secret = { };
drone_gitea_client_secret = { };
# M3 comment-bridge (A2). Read by modules/bridge.nix's reconcile oneshot, which copies them
# into swarm secrets the bridge container mounts. webhook_hmac is also set on the Gitea webhook.
secrets.bridge_webhook_hmac = { };
secrets.bridge_drone_token = { };
secrets.bridge_gitea_token = { };
# M3 comment-bridge (A2). Read by modules/bridge.nix's reconcile oneshot, which copies them
# into swarm secrets the bridge container mounts. webhook_hmac is also set on the Gitea webhook.
bridge_webhook_hmac = { };
bridge_drone_token = { };
bridge_gitea_token = { };
# Phase-1c C2: the wildcard TLS cert+key are now sops secrets (in cc-ci-secrets), decrypted at
# activation to /var/lib/ci-certs/live/{fullchain.pem,privkey.pem} — the exact path the traefik
# reconcile (modules/proxy.nix) already reads. Replaces the prior operator-drops-a-cert-file step.
secrets.wildcard_cert = {
path = "/var/lib/ci-certs/live/fullchain.pem";
mode = "0444"; # leaf+intermediate chain — not secret
};
secrets.wildcard_key = {
path = "/var/lib/ci-certs/live/privkey.pem";
mode = "0400"; # private key — root only
# Phase-1c C2: the wildcard TLS cert+key are now sops secrets (in cc-ci-secrets), decrypted at
# activation to /var/lib/ci-certs/live/{fullchain.pem,privkey.pem} — the exact path the traefik
# reconcile (modules/proxy.nix) already reads. Replaces the prior operator-drops-a-cert-file step.
wildcard_cert = {
path = "/var/lib/ci-certs/live/fullchain.pem";
mode = "0444"; # leaf+intermediate chain — not secret
};
wildcard_key = {
path = "/var/lib/ci-certs/live/privkey.pem";
mode = "0400"; # private key — root only
};
};
# EnvironmentFile for the host exec runner: DRONE_RPC_SECRET rendered from the sops secret.

View File

@ -6,11 +6,11 @@ Bakes in the known abra gotchas (re-verify per installed abra version, currently
- `abra app ls -S -m` returns nested {server: {apps: [...]}} — parse the inner structure.
- run non-interactively with `-n` (`--no-input`) everywhere.
"""
from __future__ import annotations
import json
import subprocess
from typing import Optional
ABRA = "abra"
@ -19,13 +19,17 @@ class AbraError(RuntimeError):
pass
def _run_pty(args: list[str], timeout: int = 900, check: bool = True) -> subprocess.CompletedProcess:
def _run_pty(
args: list[str], timeout: int = 900, check: bool = True
) -> subprocess.CompletedProcess:
"""Run abra under a pseudo-TTY (via util-linux `script`). Needed for commands that exec into
a container interactively (backup create / restore: 'the input device is not a TTY')."""
cmd = "abra " + " ".join(args)
proc = subprocess.run(
["script", "-qec", cmd, "/dev/null"],
capture_output=True, text=True, timeout=timeout,
capture_output=True,
text=True,
timeout=timeout,
)
if check and proc.returncode != 0:
raise AbraError(f"[pty] {cmd} failed ({proc.returncode}):\n{proc.stdout}\n{proc.stderr}")
@ -40,12 +44,19 @@ def _run(args: list[str], timeout: int = 300, check: bool = True) -> subprocess.
timeout=timeout,
)
if check and proc.returncode != 0:
raise AbraError(f"abra {' '.join(args)} failed ({proc.returncode}):\n{proc.stdout}\n{proc.stderr}")
raise AbraError(
f"abra {' '.join(args)} failed ({proc.returncode}):\n{proc.stdout}\n{proc.stderr}"
)
return proc
def app_new(recipe: str, domain: str, server: str = "default", version: Optional[str] = None,
secrets: bool = False) -> None:
def app_new(
recipe: str,
domain: str,
server: str = "default",
version: str | None = None,
secrets: bool = False,
) -> None:
args = ["app", "new", recipe]
args += ["-s", server, "-D", domain, "-o", "-n"]
if version:
@ -64,6 +75,7 @@ def env_set(domain: str, key: str, value: str) -> None:
"""Set a key in the app's .env (abra has no setter; edit the file directly)."""
import os
import re
path = os.path.expanduser(f"~/.abra/servers/default/{domain}.env")
with open(path) as fh:
lines = fh.read().splitlines()
@ -86,8 +98,11 @@ def secret_generate(domain: str, timeout: int = 300) -> None:
# captured by _run and never logged. -C -o keep the recipe at the PR checkout (without -o it
# re-resolves to a version tag, dropping the PR's files incl. tests/). check=False: recipes with
# no secrets are a no-op.
_run(["app", "secret", "generate", domain, "--all", "-m", "-C", "-o", "-n"],
timeout=timeout, check=False)
_run(
["app", "secret", "generate", domain, "--all", "-m", "-C", "-o", "-n"],
timeout=timeout,
check=False,
)
def deploy(domain: str, chaos: bool = True, timeout: int = 900) -> None:
@ -97,7 +112,7 @@ def deploy(domain: str, chaos: bool = True, timeout: int = 900) -> None:
_run(args, timeout=timeout)
def upgrade(domain: str, version: Optional[str] = None, timeout: int = 900) -> None:
def upgrade(domain: str, version: str | None = None, timeout: int = 900) -> None:
args = ["app", "upgrade", domain]
if version:
args.append(version)
@ -127,9 +142,11 @@ def recipe_versions(recipe: str) -> list[str]:
"""Published versions of a recipe, oldest→newest (from the recipe git tags)."""
import os
import subprocess
path = os.path.expanduser(f"~/.abra/recipes/{recipe}")
proc = subprocess.run(["git", "-C", path, "tag", "--sort=creatordate"],
capture_output=True, text=True)
proc = subprocess.run(
["git", "-C", path, "tag", "--sort=creatordate"], capture_output=True, text=True
)
return [t for t in proc.stdout.split("\n") if t.strip()]
@ -149,12 +166,12 @@ def secret_remove_all(domain: str, timeout: int = 300) -> None:
def app_config_remove(domain: str, server: str = "default") -> None:
"""Delete the app's .env config so a re-run can recreate it (teardown completeness)."""
import contextlib
import os
path = os.path.expanduser(f"~/.abra/servers/{server}/{domain}.env")
try:
with contextlib.suppress(FileNotFoundError):
os.remove(path)
except FileNotFoundError:
pass
def app_ls(server: str = "default") -> list[dict]:

View File

@ -3,8 +3,10 @@
The teardown guarantee is sacred: a failed test must never leak an app/volume/secret into the
next run. Callers wrap deploy()/teardown() in try/finally (or a pytest finalizer).
"""
from __future__ import annotations
import contextlib
import datetime
import os
import re
@ -29,7 +31,8 @@ def _docker_names(kind: str, stack: str) -> list[str]:
"""docker <kind> ls names filtered to a stack (kind: service|volume|secret)."""
proc = subprocess.run(
["docker", kind, "ls", "--filter", f"name={stack}", "--format", "{{.Name}}"],
capture_output=True, text=True,
capture_output=True,
text=True,
)
return [n for n in proc.stdout.split("\n") if n.strip()]
@ -50,16 +53,20 @@ def _stack_age_seconds(stack: str) -> float | None:
return None
oldest = None
for s in svcs:
p = subprocess.run(["docker", "service", "inspect", s, "--format", "{{.CreatedAt}}"],
capture_output=True, text=True)
p = subprocess.run(
["docker", "service", "inspect", s, "--format", "{{.CreatedAt}}"],
capture_output=True,
text=True,
)
ts = p.stdout.strip()
try:
# docker emits e.g. 2026-05-27 00:12:33.123 +0000 UTC -> take the leading 19 chars
dt = datetime.datetime.strptime(ts[:19], "%Y-%m-%d %H:%M:%S").replace(
tzinfo=datetime.timezone.utc)
tzinfo=datetime.UTC
)
except ValueError:
continue
age = (datetime.datetime.now(datetime.timezone.utc) - dt).total_seconds()
age = (datetime.datetime.now(datetime.UTC) - dt).total_seconds()
oldest = age if oldest is None else max(oldest, age)
return oldest
@ -107,7 +114,8 @@ def services_converged(domain: str) -> bool:
stack = _stack_name(domain)
proc = subprocess.run(
["docker", "stack", "services", stack, "--format", "{{.Replicas}}"],
capture_output=True, text=True,
capture_output=True,
text=True,
)
rows = [r for r in proc.stdout.split("\n") if r.strip()]
if not rows:
@ -136,8 +144,13 @@ def http_get(domain: str, path: str = "/", timeout: int = 15) -> int:
return 0
def wait_healthy(domain: str, ok_codes=(200, 301, 302), path: str = "/",
deploy_timeout: int = 600, http_timeout: int = 300) -> None:
def wait_healthy(
domain: str,
ok_codes=(200, 301, 302),
path: str = "/",
deploy_timeout: int = 600,
http_timeout: int = 300,
) -> None:
"""Wait for stack services converged, then for the app to answer ok over HTTPS at `path`.
`path` is per-recipe (recipe_meta.HEALTH_PATH), e.g. keycloak uses /realms/master."""
deadline = time.time() + deploy_timeout
@ -181,7 +194,8 @@ def _app_container(domain: str, service: str = "app") -> str:
name = f"{_stack_name(domain)}_{service}"
proc = subprocess.run(
["docker", "ps", "--filter", f"name={name}", "--format", "{{.ID}}"],
capture_output=True, text=True,
capture_output=True,
text=True,
)
cid = proc.stdout.strip().split("\n")[0]
if not cid:
@ -221,8 +235,8 @@ def teardown_app(domain: str, verify: bool = True) -> None:
stack = _stack_name(domain)
abra.undeploy(domain)
if _docker_names("service", stack):
_force_stack_rm(stack) # fallback: abra undeploy didn't clear it
abra.volume_remove(domain) # needs the .env -> before removing it
_force_stack_rm(stack) # fallback: abra undeploy didn't clear it
abra.volume_remove(domain) # needs the .env -> before removing it
abra.secret_remove_all(domain)
# belt-and-suspenders: drop any volumes/secrets abra missed, by stack name. A volume can be
# briefly held by a just-stopped task after `stack rm`, so retry the volume removal.
@ -238,7 +252,7 @@ def teardown_app(domain: str, verify: bool = True) -> None:
time.sleep(3)
for s in _docker_names("secret", stack):
subprocess.run(["docker", "secret", "rm", s], capture_output=True, text=True)
abra.app_config_remove(domain) # only now (stack gone) drop the .env
abra.app_config_remove(domain) # only now (stack gone) drop the .env
if verify:
residual = _residual(domain)
@ -252,6 +266,7 @@ def janitor(max_age_seconds: int | None = None) -> None:
docker primitives so it works even when the .env is gone (A2/A3). Default 2h, env-overridable
via CCCI_JANITOR_MAX_AGE (e.g. 0 to reap all matching orphans immediately)."""
import os
if max_age_seconds is None:
max_age_seconds = int(os.environ.get("CCCI_JANITOR_MAX_AGE", "7200"))
seen = set()
@ -271,7 +286,5 @@ def janitor(max_age_seconds: int | None = None) -> None:
age = _stack_age_seconds(stack)
if age is not None and age < max_age_seconds:
continue # likely a concurrent in-flight run; leave it
try:
with contextlib.suppress(Exception):
teardown_app(name, verify=False)
except Exception:
pass

View File

@ -3,6 +3,7 @@
Domain = "<recipe[:4]>-<6hex(recipe|pr|ref)>.ci.commoninternet.net" — short enough for Docker's
64-char swarm config/secret name limit, unique per run, collision-safe across recipes (DECISIONS.md).
"""
from __future__ import annotations
import hashlib

View File

@ -14,6 +14,7 @@ tests/<recipe>/. Teardown is guaranteed by the conftest fixture finalizer.
Run env (python with pytest+playwright, PLAYWRIGHT_BROWSERS_PATH) is provided by `cc-ci-run`
(modules/harness.nix); invoke as: cc-ci-run runner/run_recipe_ci.py
"""
from __future__ import annotations
import glob
@ -26,6 +27,7 @@ import tempfile
ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.insert(0, os.path.join(ROOT, "runner"))
from harness import lifecycle, naming # noqa: E402
STAGE_FILES = {
"install": "test_install.py",
"upgrade": "test_upgrade.py",
@ -40,7 +42,8 @@ def _redact_values() -> list[str]:
vals = set()
for p in glob.glob("/run/secrets/*"):
try:
v = open(p).read().strip()
with open(p) as f:
v = f.read().strip()
except OSError:
continue
if len(v) >= 8:
@ -55,8 +58,15 @@ def run_stage_redacted(cmd: list[str], env: dict | None = None) -> int:
"""Run a stage subprocess, streaming its output live (so Drone logs stay tail-able) but masking
any known infra-secret value first. Belt-and-suspenders: the harness already never prints
secrets and abra doesn't echo generated ones."""
proc = subprocess.Popen(cmd, cwd=ROOT, env=env, stdout=subprocess.PIPE,
stderr=subprocess.STDOUT, text=True, bufsize=1)
proc = subprocess.Popen(
cmd,
cwd=ROOT,
env=env,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True,
bufsize=1,
)
assert proc.stdout is not None
for line in proc.stdout:
for v in _REDACT:
@ -70,7 +80,8 @@ def run_stage_redacted(cmd: list[str], env: dict | None = None) -> int:
def _gitea_token() -> str | None:
tok = os.environ.get("GITEA_TOKEN")
if not tok and os.path.exists("/run/secrets/bridge_gitea_token"):
tok = open("/run/secrets/bridge_gitea_token").read().strip()
with open("/run/secrets/bridge_gitea_token") as f:
tok = f.read().strip()
return tok or None
@ -97,8 +108,10 @@ def fetch_recipe(recipe: str, ref: str | None, src: str | None) -> None:
# to a foreign host). Non-fatal: if upstream is unreachable, upgrade degrades to a skip.
upstream = f"https://git.coopcloud.tech/coop-cloud/{recipe}.git"
# Explicit tags refspec — a bare `fetch --tags <url>` errors "couldn't find remote ref HEAD".
subprocess.run(["git", "-C", dest, "fetch", "--quiet", upstream,
"refs/tags/*:refs/tags/*"], check=False)
subprocess.run(
["git", "-C", dest, "fetch", "--quiet", upstream, "refs/tags/*:refs/tags/*"],
check=False,
)
else:
# Clean re-fetch from the catalogue. rm first so a leftover dir from a prior SRC+REF run
# (which points origin at the private mirror and may lack version tags) can't poison the
@ -178,7 +191,9 @@ def run_recipe_local(recipe: str, local_tests: str | None) -> int | None:
lifecycle.deploy_app(recipe, domain, version=os.environ.get("VERSION") or None)
lifecycle.wait_healthy(domain)
env = dict(os.environ, CCCI_APP_DOMAIN=domain, CCCI_BASE_URL=f"https://{domain}")
return run_stage_redacted([sys.executable, "-m", "pytest", "-v", "-rA", local_tests], env=env)
return run_stage_redacted(
[sys.executable, "-m", "pytest", "-v", "-rA", local_tests], env=env
)
finally:
lifecycle.teardown_app(domain, verify=False)

View File

@ -10,7 +10,8 @@
# GITEA_USERNAME=autonomic-bot GITEA_PASSWORD=… bash scripts/bootstrap-drone-oauth.sh
# Optionally ACTIVATE a repo: REPO=recipe-maintainers/cc-ci (default).
set -euo pipefail
: "${GITEA_USERNAME:?set GITEA_USERNAME}"; : "${GITEA_PASSWORD:?set GITEA_PASSWORD}"
: "${GITEA_USERNAME:?set GITEA_USERNAME}"
: "${GITEA_PASSWORD:?set GITEA_PASSWORD}"
GITEA="${GITEA:-https://git.autonomic.zone}"
DRONE="${DRONE:-https://drone.ci.commoninternet.net}"
CLIENT_ID="${CLIENT_ID:-ab4cdb9d-ee96-4867-875f-87384505fc52}"
@ -18,7 +19,9 @@ REPO="${REPO:-recipe-maintainers/cc-ci}"
RES=(--resolve "drone.ci.commoninternet.net:443:127.0.0.1")
export PATH=/run/current-system/sw/bin:"$PATH"
cj=$(mktemp); dj=$(mktemp); az=$(mktemp)
cj=$(mktemp)
dj=$(mktemp)
az=$(mktemp)
trap 'rm -f "$cj" "$dj" "$az"' EXIT
# 1) Gitea web login (CSRF cookie -> form field).
@ -31,9 +34,10 @@ curl -s -b "$cj" -c "$cj" -o /dev/null \
"$GITEA/user/login"
# 2) Drone /login -> Gitea authorize URL.
loc=$(curl -sk -c "$dj" -o /dev/null -D - "${RES[@]}" "$DRONE/login" \
| awk 'tolower($1)=="location:"{print $2}' | tr -d '\r')
azh=$(mktemp); trap 'rm -f "$cj" "$dj" "$az" "$azh"' EXIT
loc=$(curl -sk -c "$dj" -o /dev/null -D - "${RES[@]}" "$DRONE/login" |
awk 'tolower($1)=="location:"{print $2}' | tr -d '\r')
azh=$(mktemp)
trap 'rm -f "$cj" "$dj" "$az" "$azh"' EXIT
curl -sk -b "$cj" -c "$cj" -o "$az" -D "$azh" "$loc"
# 3) Either the OAuth app auto-approves (bot already granted it earlier => Gitea 302s straight to the

View File

@ -9,7 +9,7 @@
# (shfmt/shellcheck), YAML (yamllint). Run from the repo root.
set -uo pipefail
cd "$(dirname "$0")/.."
cd "$(dirname "$0")/.." || exit 1
FIX=0
[ "${1:-}" = "--fix" ] && FIX=1
@ -19,7 +19,7 @@ SHFMT_FLAGS=(-i 2 -ci)
fail=0
section() { printf '\n=== %s ===\n' "$1"; }
note() { printf ' %s\n' "$1"; }
note() { printf ' %s\n' "$1"; }
# Nix files (exclude the `secrets/` submodule).
mapfile -t NIX_FILES < <(find . -name '*.nix' -not -path './.git/*' -not -path './secrets/*' | sort)
@ -30,7 +30,10 @@ section "Nix — nixpkgs-fmt"
if [ "$FIX" = 1 ]; then
nixpkgs-fmt "${NIX_FILES[@]}" || fail=1
else
nixpkgs-fmt --check "${NIX_FILES[@]}" || { note "run: scripts/lint.sh --fix"; fail=1; }
nixpkgs-fmt --check "${NIX_FILES[@]}" || {
note "run: scripts/lint.sh --fix"
fail=1
}
fi
section "Nix — statix"
@ -51,7 +54,10 @@ section "Python — ruff format"
if [ "$FIX" = 1 ]; then
ruff format . || fail=1
else
ruff format --check . || { note "run: scripts/lint.sh --fix"; fail=1; }
ruff format --check . || {
note "run: scripts/lint.sh --fix"
fail=1
}
fi
section "Python — ruff check"
@ -66,7 +72,10 @@ if [ "${#SH_FILES[@]}" -gt 0 ]; then
if [ "$FIX" = 1 ]; then
shfmt "${SHFMT_FLAGS[@]}" -w "${SH_FILES[@]}" || fail=1
else
shfmt "${SHFMT_FLAGS[@]}" -d "${SH_FILES[@]}" || { note "run: scripts/lint.sh --fix"; fail=1; }
shfmt "${SHFMT_FLAGS[@]}" -d "${SH_FILES[@]}" || {
note "run: scripts/lint.sh --fix"
fail=1
}
fi
section "Shell — shellcheck"

View File

@ -4,6 +4,7 @@ A run is parameterized by env: RECIPE, REF (PR head sha), PR, SRC (head repo). T
computes a unique app domain per run so concurrent runs never collide, and GUARANTEES teardown
(undeploy + volume + secret removal) via a finalizer, even on failure.
"""
from __future__ import annotations
import os
@ -24,8 +25,12 @@ def _recipe_meta(recipe: str) -> dict:
A recipe may ship tests/<recipe>/recipe_meta.py with any of: HEALTH_PATH (str),
HEALTH_OK (tuple of status codes), DEPLOY_TIMEOUT (int), HTTP_TIMEOUT (int)."""
path = os.path.join(os.path.dirname(__file__), recipe, "recipe_meta.py")
meta = {"HEALTH_PATH": "/", "HEALTH_OK": (200, 301, 302),
"DEPLOY_TIMEOUT": 600, "HTTP_TIMEOUT": 300}
meta = {
"HEALTH_PATH": "/",
"HEALTH_OK": (200, 301, 302),
"DEPLOY_TIMEOUT": 600,
"HTTP_TIMEOUT": 300,
}
if os.path.exists(path):
ns: dict = {}
with open(path) as fh:
@ -57,8 +62,13 @@ def meta(recipe) -> dict:
def _wait_healthy(domain, meta):
lifecycle.wait_healthy(domain, ok_codes=tuple(meta["HEALTH_OK"]), path=meta["HEALTH_PATH"],
deploy_timeout=meta["DEPLOY_TIMEOUT"], http_timeout=meta["HTTP_TIMEOUT"])
lifecycle.wait_healthy(
domain,
ok_codes=tuple(meta["HEALTH_OK"]),
path=meta["HEALTH_PATH"],
deploy_timeout=meta["DEPLOY_TIMEOUT"],
http_timeout=meta["HTTP_TIMEOUT"],
)
@pytest.fixture

View File

@ -3,6 +3,7 @@ backup, mutate, restore, assert the restored state matches the pre-mutation (bac
The cryptpad `app` service is labelled `backupbot.backup=true`, so its volumes (incl. cryptpad_data)
are backed up. Marker is checked via `exec_in_app` (data isn't HTTP-served)."""
import os
import sys
@ -26,7 +27,13 @@ def test_backup_mutate_restore(deployed, meta):
# 3) restore -> state returns to the backed-up "original"
lifecycle.restore_app(domain)
lifecycle.wait_healthy(domain, ok_codes=tuple(meta["HEALTH_OK"]), path=meta["HEALTH_PATH"],
deploy_timeout=meta["DEPLOY_TIMEOUT"], http_timeout=meta["HTTP_TIMEOUT"])
assert lifecycle.exec_in_app(domain, ["cat", MARKER]).strip() == "original", \
"restore did not return the pre-mutation state"
lifecycle.wait_healthy(
domain,
ok_codes=tuple(meta["HEALTH_OK"]),
path=meta["HEALTH_PATH"],
deploy_timeout=meta["DEPLOY_TIMEOUT"],
http_timeout=meta["HTTP_TIMEOUT"],
)
assert (
lifecycle.exec_in_app(domain, ["cat", MARKER]).strip() == "original"
), "restore did not return the pre-mutation state"

View File

@ -1,4 +1,5 @@
"""cryptpad — install stage (recipe #3, stateful/no-DB). D2 install + D3 Playwright."""
import os
import sys
@ -23,7 +24,10 @@ def test_playwright_loads_cryptpad(deployed_app):
ctx = browser.new_context(ignore_https_errors=True)
page = ctx.new_page()
resp = page.goto(url, wait_until="load", timeout=60000)
assert resp is not None and resp.status in (200, 304), f"page status {resp and resp.status}"
assert resp is not None and resp.status in (
200,
304,
), f"page status {resp and resp.status}"
body = page.content().lower()
assert "cryptpad" in body or "<html" in body, "no cryptpad content served"
finally:

View File

@ -3,6 +3,7 @@ persistent volume, upgrade to current/$REF, assert the app stays healthy and the
cryptpad data isn't HTTP-served as a static file (it's an encrypted datastore), so the marker is
written into the cryptpad_data volume and read back via `exec_in_app` (docker exec), not HTTP."""
import os
import sys
@ -22,8 +23,13 @@ def old_app(recipe, app_domain, meta, request):
lifecycle.janitor()
request.addfinalizer(lambda: lifecycle.teardown_app(app_domain))
lifecycle.deploy_app(recipe, app_domain, version=prev)
lifecycle.wait_healthy(app_domain, ok_codes=tuple(meta["HEALTH_OK"]), path=meta["HEALTH_PATH"],
deploy_timeout=meta["DEPLOY_TIMEOUT"], http_timeout=meta["HTTP_TIMEOUT"])
lifecycle.wait_healthy(
app_domain,
ok_codes=tuple(meta["HEALTH_OK"]),
path=meta["HEALTH_PATH"],
deploy_timeout=meta["DEPLOY_TIMEOUT"],
http_timeout=meta["HTTP_TIMEOUT"],
)
return app_domain, prev
@ -35,10 +41,16 @@ def test_upgrade_preserves_data(old_app, meta):
# upgrade previous -> current/$REF
lifecycle.upgrade_app(domain, version=os.environ.get("VERSION") or None)
lifecycle.wait_healthy(domain, ok_codes=tuple(meta["HEALTH_OK"]), path=meta["HEALTH_PATH"],
deploy_timeout=meta["DEPLOY_TIMEOUT"], http_timeout=meta["HTTP_TIMEOUT"])
lifecycle.wait_healthy(
domain,
ok_codes=tuple(meta["HEALTH_OK"]),
path=meta["HEALTH_PATH"],
deploy_timeout=meta["DEPLOY_TIMEOUT"],
http_timeout=meta["HTTP_TIMEOUT"],
)
# app healthy and the data written before the upgrade is still there
assert lifecycle.http_get(domain, "/") in (200, 301, 302)
assert lifecycle.exec_in_app(domain, ["cat", MARKER]).strip() == "upgrade-survives", \
"data did not survive the upgrade"
assert (
lifecycle.exec_in_app(domain, ["cat", MARKER]).strip() == "upgrade-survives"
), "data did not survive the upgrade"

View File

@ -1,5 +1,6 @@
"""custom-html — backup/restore stage (D2): backup, mutate state, restore, assert the restored
state matches the pre-mutation (backed-up) state."""
import os
import sys
@ -24,5 +25,6 @@ def test_backup_mutate_restore(deployed):
# 3) restore -> state returns to the backed-up "original"
lifecycle.restore_app(domain)
lifecycle.wait_healthy(domain)
assert lifecycle.http_body(domain, "/ci-marker.txt").strip() == "original", \
"restore did not return the pre-mutation state"
assert (
lifecycle.http_body(domain, "/ci-marker.txt").strip() == "original"
), "restore did not return the pre-mutation state"

View File

@ -1,6 +1,7 @@
"""custom-html — install stage (recipe #1, simple/stateless). D2 install + D3 Playwright."""
import sys
import os
import sys
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner"))
from harness import lifecycle # noqa: E402

View File

@ -1,5 +1,6 @@
"""custom-html — upgrade stage (D2): deploy the previous published version, write data, upgrade
to the current/$REF version, and assert the app stays healthy and data survives."""
import os
import sys
@ -35,5 +36,6 @@ def test_upgrade_preserves_data(old_app):
# app healthy and the data written before the upgrade is still there
assert lifecycle.http_get(domain, "/") == 200
assert lifecycle.http_body(domain, "/ci-marker.txt").strip() == "upgrade-survives", \
"data did not survive the upgrade"
assert (
lifecycle.http_body(domain, "/ci-marker.txt").strip() == "upgrade-survives"
), "data did not survive the upgrade"

View File

@ -1,5 +1,6 @@
"""Recipe-specific keycloak admin-API helpers (not harness). Used by the upgrade/backup stages to
write a real data marker (a realm) into mariadb and verify it survives upgrade / backup-restore."""
import json
import ssl
import sys
@ -21,12 +22,20 @@ def admin_password(domain: str) -> str:
def admin_token(domain: str, password: str, user: str = "admin") -> str:
data = urllib.parse.urlencode({
"grant_type": "password", "client_id": "admin-cli", "username": user, "password": password,
}).encode()
data = urllib.parse.urlencode(
{
"grant_type": "password",
"client_id": "admin-cli",
"username": user,
"password": password,
}
).encode()
req = urllib.request.Request(
f"https://{domain}/realms/master/protocol/openid-connect/token", data=data,
headers={"Content-Type": "application/x-www-form-urlencoded"}, method="POST")
f"https://{domain}/realms/master/protocol/openid-connect/token",
data=data,
headers={"Content-Type": "application/x-www-form-urlencoded"},
method="POST",
)
with urllib.request.urlopen(req, timeout=30, context=_CTX) as r:
return json.load(r)["access_token"]
@ -36,8 +45,9 @@ def _admin(domain, token, path, method="GET", body=None):
headers = {"Authorization": "Bearer " + token}
if data:
headers["Content-Type"] = "application/json"
req = urllib.request.Request(f"https://{domain}/admin{path}", data=data, headers=headers,
method=method)
req = urllib.request.Request(
f"https://{domain}/admin{path}", data=data, headers=headers, method=method
)
try:
with urllib.request.urlopen(req, timeout=30, context=_CTX) as r:
return r.status

View File

@ -1,6 +1,6 @@
# Per-recipe harness config for keycloak (DB-backed: keycloak + mariadb). Read by the shared
# conftest — enrolling this recipe needs NO change to runner/harness code (D5).
HEALTH_PATH = "/realms/master" # 200 JSON once keycloak is up (not "/", which redirects)
HEALTH_PATH = "/realms/master" # 200 JSON once keycloak is up (not "/", which redirects)
HEALTH_OK = (200,)
DEPLOY_TIMEOUT = 600 # JVM + DB migration are slow on a 2-vCPU VM
DEPLOY_TIMEOUT = 600 # JVM + DB migration are slow on a 2-vCPU VM
HTTP_TIMEOUT = 600

View File

@ -1,11 +1,12 @@
"""keycloak — backup/restore stage (D2): create a realm, backup, delete it (mutate), restore,
assert the realm is back (mariadb restored to the backed-up state)."""
import os
import sys
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner"))
from harness import lifecycle # noqa: E402
import kc_admin # noqa: E402
from harness import lifecycle # noqa: E402
def test_backup_mutate_restore(deployed):
@ -24,7 +25,8 @@ def test_backup_mutate_restore(deployed):
# 3) restore -> realm returns
lifecycle.restore_app(domain)
lifecycle.wait_healthy(domain, path="/realms/master", ok_codes=(200,),
deploy_timeout=600, http_timeout=600)
lifecycle.wait_healthy(
domain, path="/realms/master", ok_codes=(200,), deploy_timeout=600, http_timeout=600
)
tok2 = kc_admin.admin_token(domain, pw)
assert kc_admin.marker_realm_exists(domain, tok2), "restore did not bring back the realm"

View File

@ -1,4 +1,5 @@
"""keycloak — install stage (recipe #2, DB-backed SSO; D2 install + D3 Playwright)."""
import os
import sys
@ -23,6 +24,8 @@ def test_playwright_admin_login(deployed_app):
page.goto(url, wait_until="domcontentloaded", timeout=45000)
# admin console redirects to the login form; wait for a username field to render
page.wait_for_selector("input#username, input[name='username']", timeout=30000)
assert "keycloak" in page.content().lower() or page.locator("input#username").count() > 0
assert (
"keycloak" in page.content().lower() or page.locator("input#username").count() > 0
)
finally:
browser.close()

View File

@ -1,13 +1,14 @@
"""keycloak — upgrade stage (D2): deploy previous version, create a realm (DB data), upgrade to
current/$REF, assert the app is healthy and the realm survived (mariadb data preserved)."""
import os
import sys
import pytest
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner"))
from harness import lifecycle # noqa: E402
import kc_admin # noqa: E402
from harness import lifecycle # noqa: E402
@pytest.fixture
@ -18,8 +19,13 @@ def old_app(recipe, app_domain, meta, request):
lifecycle.janitor()
request.addfinalizer(lambda: lifecycle.teardown_app(app_domain))
lifecycle.deploy_app(recipe, app_domain, version=prev)
lifecycle.wait_healthy(app_domain, ok_codes=tuple(meta["HEALTH_OK"]), path=meta["HEALTH_PATH"],
deploy_timeout=meta["DEPLOY_TIMEOUT"], http_timeout=meta["HTTP_TIMEOUT"])
lifecycle.wait_healthy(
app_domain,
ok_codes=tuple(meta["HEALTH_OK"]),
path=meta["HEALTH_PATH"],
deploy_timeout=meta["DEPLOY_TIMEOUT"],
http_timeout=meta["HTTP_TIMEOUT"],
)
return app_domain, prev
@ -31,8 +37,13 @@ def test_upgrade_preserves_realm(old_app, meta):
assert kc_admin.marker_realm_exists(domain, tok), "marker realm not created"
lifecycle.upgrade_app(domain, version=os.environ.get("VERSION") or None)
lifecycle.wait_healthy(domain, ok_codes=tuple(meta["HEALTH_OK"]), path=meta["HEALTH_PATH"],
deploy_timeout=meta["DEPLOY_TIMEOUT"], http_timeout=meta["HTTP_TIMEOUT"])
lifecycle.wait_healthy(
domain,
ok_codes=tuple(meta["HEALTH_OK"]),
path=meta["HEALTH_PATH"],
deploy_timeout=meta["DEPLOY_TIMEOUT"],
http_timeout=meta["HTTP_TIMEOUT"],
)
# re-auth (token from the old instance is fine, but get a fresh one post-upgrade) and verify
tok2 = kc_admin.admin_token(domain, pw)

View File

@ -3,6 +3,7 @@ dumps the DB), mutate (drop it), restore (post-hook reloads), assert the restore
Exercises the recipe's real DB-dump backup hook (postgres + minio are both backupbot-labelled); the
postgres marker is the meaningful Docs-metadata data path."""
import os
import sys
@ -18,16 +19,28 @@ def _psql(domain, sql):
def test_backup_mutate_restore(deployed, meta):
domain = deployed
_psql(domain, "CREATE TABLE IF NOT EXISTS ci_marker(v text); DELETE FROM ci_marker; "
"INSERT INTO ci_marker VALUES('original');")
_psql(
domain,
"CREATE TABLE IF NOT EXISTS ci_marker(v text); DELETE FROM ci_marker; "
"INSERT INTO ci_marker VALUES('original');",
)
assert _psql(domain, "SELECT v FROM ci_marker;") == "original"
lifecycle.backup_app(domain)
_psql(domain, "DROP TABLE ci_marker;")
assert _psql(domain, "SELECT to_regclass('public.ci_marker');") in ("", "NULL"), "drop did not take"
assert _psql(domain, "SELECT to_regclass('public.ci_marker');") in (
"",
"NULL",
), "drop did not take"
lifecycle.restore_app(domain)
lifecycle.wait_healthy(domain, ok_codes=tuple(meta["HEALTH_OK"]), path=meta["HEALTH_PATH"],
deploy_timeout=meta["DEPLOY_TIMEOUT"], http_timeout=meta["HTTP_TIMEOUT"])
assert _psql(domain, "SELECT v FROM ci_marker;") == "original", \
"restore did not return the pre-mutation postgres state"
lifecycle.wait_healthy(
domain,
ok_codes=tuple(meta["HEALTH_OK"]),
path=meta["HEALTH_PATH"],
deploy_timeout=meta["DEPLOY_TIMEOUT"],
http_timeout=meta["HTTP_TIMEOUT"],
)
assert (
_psql(domain, "SELECT v FROM ci_marker;") == "original"
), "restore did not return the pre-mutation postgres state"

View File

@ -4,6 +4,7 @@ minio + nginx) converges and serves the app over real HTTPS through the gateway.
Login is OIDC-gated (no live OIDC provider in CI), so the functional assertion is that the frontend
SPA is served (unauthenticated landing), not an authenticated flow."""
import os
import sys
@ -27,7 +28,11 @@ def test_playwright_loads_frontend(deployed_app):
ctx = browser.new_context(ignore_https_errors=True)
page = ctx.new_page()
resp = page.goto(url, wait_until="domcontentloaded", timeout=60000)
assert resp is not None and resp.status in (200, 301, 302), f"page status {resp and resp.status}"
assert resp is not None and resp.status in (
200,
301,
302,
), f"page status {resp and resp.status}"
assert "<html" in page.content().lower(), "no HTML served by the frontend"
finally:
browser.close()

View File

@ -3,6 +3,7 @@ upgrade to current/$REF, assert the app stays healthy and the postgres data surv
Docs metadata lives in postgres, so the marker is a row in a dedicated `ci_marker` table (the app's
own Django migrations don't touch it), read back via `psql` in the `db` service."""
import os
import sys
@ -25,21 +26,35 @@ def old_app(recipe, app_domain, meta, request):
lifecycle.janitor()
request.addfinalizer(lambda: lifecycle.teardown_app(app_domain))
lifecycle.deploy_app(recipe, app_domain, version=prev)
lifecycle.wait_healthy(app_domain, ok_codes=tuple(meta["HEALTH_OK"]), path=meta["HEALTH_PATH"],
deploy_timeout=meta["DEPLOY_TIMEOUT"], http_timeout=meta["HTTP_TIMEOUT"])
lifecycle.wait_healthy(
app_domain,
ok_codes=tuple(meta["HEALTH_OK"]),
path=meta["HEALTH_PATH"],
deploy_timeout=meta["DEPLOY_TIMEOUT"],
http_timeout=meta["HTTP_TIMEOUT"],
)
return app_domain, prev
def test_upgrade_preserves_data(old_app, meta):
domain, prev = old_app
_psql(domain, "CREATE TABLE IF NOT EXISTS ci_marker(v text); DELETE FROM ci_marker; "
"INSERT INTO ci_marker VALUES('upgrade-survives');")
_psql(
domain,
"CREATE TABLE IF NOT EXISTS ci_marker(v text); DELETE FROM ci_marker; "
"INSERT INTO ci_marker VALUES('upgrade-survives');",
)
assert _psql(domain, "SELECT v FROM ci_marker;") == "upgrade-survives"
lifecycle.upgrade_app(domain, version=os.environ.get("VERSION") or None)
lifecycle.wait_healthy(domain, ok_codes=tuple(meta["HEALTH_OK"]), path=meta["HEALTH_PATH"],
deploy_timeout=meta["DEPLOY_TIMEOUT"], http_timeout=meta["HTTP_TIMEOUT"])
lifecycle.wait_healthy(
domain,
ok_codes=tuple(meta["HEALTH_OK"]),
path=meta["HEALTH_PATH"],
deploy_timeout=meta["DEPLOY_TIMEOUT"],
http_timeout=meta["HTTP_TIMEOUT"],
)
assert lifecycle.http_get(domain, "/") in (200, 301, 302)
assert _psql(domain, "SELECT v FROM ci_marker;") == "upgrade-survives", \
"postgres data did not survive the upgrade"
assert (
_psql(domain, "SELECT v FROM ci_marker;") == "upgrade-survives"
), "postgres data did not survive the upgrade"

View File

@ -1,7 +1,7 @@
# Per-recipe harness config for matrix-synapse (recipe #4 — DB + media store; the large-volume /
# DB-backed category). Base recipe = synapse `app` + postgres `db` + nginx `web`. server_name is
# DOMAIN (set by abra), so no EXTRA_ENV needed. Synapse + postgres startup is slow -> long timeouts.
HEALTH_PATH = "/_matrix/client/versions" # 200 JSON once synapse is serving the client API
HEALTH_PATH = "/_matrix/client/versions" # 200 JSON once synapse is serving the client API
HEALTH_OK = (200,)
DEPLOY_TIMEOUT = 600
HTTP_TIMEOUT = 600

View File

@ -4,6 +4,7 @@ reloads the dump), assert the restored DB matches the pre-mutation state.
This exercises the real DB-dump backup hook (backupbot.backup.pre-hook / restore.post-hook), not a
plain volume copy — the meaningful data path for a postgres-backed app."""
import os
import sys
@ -20,18 +21,30 @@ def test_backup_mutate_restore(deployed, meta):
domain = deployed
# 1) establish original state in postgres, then back up (pg_backup.sh dumps the DB)
_psql(domain, "CREATE TABLE IF NOT EXISTS ci_marker(v text); DELETE FROM ci_marker; "
"INSERT INTO ci_marker VALUES('original');")
_psql(
domain,
"CREATE TABLE IF NOT EXISTS ci_marker(v text); DELETE FROM ci_marker; "
"INSERT INTO ci_marker VALUES('original');",
)
assert _psql(domain, "SELECT v FROM ci_marker;") == "original"
lifecycle.backup_app(domain)
# 2) mutate: drop the marker table (diverge from the backup)
_psql(domain, "DROP TABLE ci_marker;")
assert _psql(domain, "SELECT to_regclass('public.ci_marker');") in ("", "NULL"), "drop did not take"
assert _psql(domain, "SELECT to_regclass('public.ci_marker');") in (
"",
"NULL",
), "drop did not take"
# 3) restore -> the dumped DB (with the marker) is reloaded
lifecycle.restore_app(domain)
lifecycle.wait_healthy(domain, ok_codes=tuple(meta["HEALTH_OK"]), path=meta["HEALTH_PATH"],
deploy_timeout=meta["DEPLOY_TIMEOUT"], http_timeout=meta["HTTP_TIMEOUT"])
assert _psql(domain, "SELECT v FROM ci_marker;") == "original", \
"restore did not return the pre-mutation postgres state"
lifecycle.wait_healthy(
domain,
ok_codes=tuple(meta["HEALTH_OK"]),
path=meta["HEALTH_PATH"],
deploy_timeout=meta["DEPLOY_TIMEOUT"],
http_timeout=meta["HTTP_TIMEOUT"],
)
assert (
_psql(domain, "SELECT v FROM ci_marker;") == "original"
), "restore did not return the pre-mutation postgres state"

View File

@ -1,6 +1,7 @@
"""matrix-synapse — install stage (recipe #4, DB + media store). D2 install: the synapse client API
answers 200 over real HTTPS through the gateway (nginx -> synapse). The base recipe has no browser
UI (element-web is an addon), so the functional assertion is the JSON client API, not Playwright."""
import json
import os
import sys
@ -18,4 +19,6 @@ def test_client_api_advertises_versions(deployed_app):
"""The client-API version document is real synapse JSON (proves the app, not just a proxy 200)."""
body = lifecycle.http_body(deployed_app, "/_matrix/client/versions")
doc = json.loads(body)
assert isinstance(doc.get("versions"), list) and doc["versions"], "no matrix client versions advertised"
assert (
isinstance(doc.get("versions"), list) and doc["versions"]
), "no matrix client versions advertised"

View File

@ -3,6 +3,7 @@ upgrade to current/$REF, assert the app stays healthy and the postgres data surv
Matrix data lives in postgres, so the marker is a row in a dedicated `ci_marker` table (synapse's
own schema migrations don't touch it), read back via `psql` in the `db` service."""
import os
import sys
@ -25,24 +26,38 @@ def old_app(recipe, app_domain, meta, request):
lifecycle.janitor()
request.addfinalizer(lambda: lifecycle.teardown_app(app_domain))
lifecycle.deploy_app(recipe, app_domain, version=prev)
lifecycle.wait_healthy(app_domain, ok_codes=tuple(meta["HEALTH_OK"]), path=meta["HEALTH_PATH"],
deploy_timeout=meta["DEPLOY_TIMEOUT"], http_timeout=meta["HTTP_TIMEOUT"])
lifecycle.wait_healthy(
app_domain,
ok_codes=tuple(meta["HEALTH_OK"]),
path=meta["HEALTH_PATH"],
deploy_timeout=meta["DEPLOY_TIMEOUT"],
http_timeout=meta["HTTP_TIMEOUT"],
)
return app_domain, prev
def test_upgrade_preserves_data(old_app, meta):
domain, prev = old_app
# write a marker row into postgres (independent of synapse's own tables)
_psql(domain, "CREATE TABLE IF NOT EXISTS ci_marker(v text); DELETE FROM ci_marker; "
"INSERT INTO ci_marker VALUES('upgrade-survives');")
_psql(
domain,
"CREATE TABLE IF NOT EXISTS ci_marker(v text); DELETE FROM ci_marker; "
"INSERT INTO ci_marker VALUES('upgrade-survives');",
)
assert _psql(domain, "SELECT v FROM ci_marker;") == "upgrade-survives"
# upgrade previous -> current/$REF
lifecycle.upgrade_app(domain, version=os.environ.get("VERSION") or None)
lifecycle.wait_healthy(domain, ok_codes=tuple(meta["HEALTH_OK"]), path=meta["HEALTH_PATH"],
deploy_timeout=meta["DEPLOY_TIMEOUT"], http_timeout=meta["HTTP_TIMEOUT"])
lifecycle.wait_healthy(
domain,
ok_codes=tuple(meta["HEALTH_OK"]),
path=meta["HEALTH_PATH"],
deploy_timeout=meta["DEPLOY_TIMEOUT"],
http_timeout=meta["HTTP_TIMEOUT"],
)
# app healthy and the data written before the upgrade is still there
assert lifecycle.http_get(domain, meta["HEALTH_PATH"]) == 200
assert _psql(domain, "SELECT v FROM ci_marker;") == "upgrade-survives", \
"postgres data did not survive the upgrade"
assert (
_psql(domain, "SELECT v FROM ci_marker;") == "upgrade-survives"
), "postgres data did not survive the upgrade"

View File

@ -3,6 +3,7 @@ mutate, restore, assert the restored state matches the pre-mutation state.
The n8n `app` service is labelled `backupbot.backup=true` with `backupbot.backup.path=/home/node/.n8n`,
so a marker file there is backed up; checked via `exec_in_app`."""
import os
import sys
@ -23,7 +24,13 @@ def test_backup_mutate_restore(deployed, meta):
assert lifecycle.exec_in_app(domain, ["cat", MARKER]).strip() == "mutated"
lifecycle.restore_app(domain)
lifecycle.wait_healthy(domain, ok_codes=tuple(meta["HEALTH_OK"]), path=meta["HEALTH_PATH"],
deploy_timeout=meta["DEPLOY_TIMEOUT"], http_timeout=meta["HTTP_TIMEOUT"])
assert lifecycle.exec_in_app(domain, ["cat", MARKER]).strip() == "original", \
"restore did not return the pre-mutation state"
lifecycle.wait_healthy(
domain,
ok_codes=tuple(meta["HEALTH_OK"]),
path=meta["HEALTH_PATH"],
deploy_timeout=meta["DEPLOY_TIMEOUT"],
http_timeout=meta["HTTP_TIMEOUT"],
)
assert (
lifecycle.exec_in_app(domain, ["cat", MARKER]).strip() == "original"
), "restore did not return the pre-mutation state"

View File

@ -1,4 +1,5 @@
"""n8n — install stage (recipe #6, workflow automation). D2 install + D3 Playwright."""
import os
import sys
@ -22,7 +23,10 @@ def test_playwright_loads_editor(deployed_app):
ctx = browser.new_context(ignore_https_errors=True)
page = ctx.new_page()
resp = page.goto(url, wait_until="domcontentloaded", timeout=60000)
assert resp is not None and resp.status in (200, 304), f"page status {resp and resp.status}"
assert resp is not None and resp.status in (
200,
304,
), f"page status {resp and resp.status}"
body = page.content().lower()
assert "n8n" in body or "<html" in body, "no n8n content served"
finally:

View File

@ -3,6 +3,7 @@ persistent /home/node/.n8n volume, upgrade to current/$REF, assert health + data
n8n state lives in the .n8n volume (sqlite + config); the marker is a file there, read back via
`exec_in_app` (not HTTP-served)."""
import os
import sys
@ -22,8 +23,13 @@ def old_app(recipe, app_domain, meta, request):
lifecycle.janitor()
request.addfinalizer(lambda: lifecycle.teardown_app(app_domain))
lifecycle.deploy_app(recipe, app_domain, version=prev)
lifecycle.wait_healthy(app_domain, ok_codes=tuple(meta["HEALTH_OK"]), path=meta["HEALTH_PATH"],
deploy_timeout=meta["DEPLOY_TIMEOUT"], http_timeout=meta["HTTP_TIMEOUT"])
lifecycle.wait_healthy(
app_domain,
ok_codes=tuple(meta["HEALTH_OK"]),
path=meta["HEALTH_PATH"],
deploy_timeout=meta["DEPLOY_TIMEOUT"],
http_timeout=meta["HTTP_TIMEOUT"],
)
return app_domain, prev
@ -33,9 +39,15 @@ def test_upgrade_preserves_data(old_app, meta):
assert lifecycle.exec_in_app(domain, ["cat", MARKER]).strip() == "upgrade-survives"
lifecycle.upgrade_app(domain, version=os.environ.get("VERSION") or None)
lifecycle.wait_healthy(domain, ok_codes=tuple(meta["HEALTH_OK"]), path=meta["HEALTH_PATH"],
deploy_timeout=meta["DEPLOY_TIMEOUT"], http_timeout=meta["HTTP_TIMEOUT"])
lifecycle.wait_healthy(
domain,
ok_codes=tuple(meta["HEALTH_OK"]),
path=meta["HEALTH_PATH"],
deploy_timeout=meta["DEPLOY_TIMEOUT"],
http_timeout=meta["HTTP_TIMEOUT"],
)
assert lifecycle.http_get(domain, meta["HEALTH_PATH"]) == 200
assert lifecycle.exec_in_app(domain, ["cat", MARKER]).strip() == "upgrade-survives", \
"data did not survive the upgrade"
assert (
lifecycle.exec_in_app(domain, ["cat", MARKER]).strip() == "upgrade-survives"
), "data did not survive the upgrade"