style(1b): auto-format + lint-clean the whole codebase (RL1)

Mechanical, semantics-preserving cleanup so the codebase passes the new lint stage:
- ruff format: all 32 Python files (wraps long signatures, normalizes quotes/blank lines).
- nixpkgs-fmt: modules/drone-runner.nix.
- shfmt (-i 2 -ci): scripts/*.sh.

Lint fixes (reviewed, behavior-preserving — no test weakened):
- ruff SIM105: try/except-pass -> contextlib.suppress (abra.py app_config rm; lifecycle.py janitor).
- ruff SIM115: open().read() -> with open() (run_recipe_ci.py redaction-values + gitea-token).
- statix: merge repeated sops `secrets.*` keys into one `secrets = { ... }` (comments kept);
  empty fn pattern `{ ... }:` -> `_:` (packages.nix).
- deadnix: drop unused lambda args (flake `self`; configuration.nix `lib`; overlay `final` -> `_`).

Verified on cc-ci: `scripts/lint.sh` -> lint: PASS; nixosConfigurations.cc-ci evaluates;
all Python byte-compiles. The deployed bridge/dashboard/runner source changes hash (reformat),
so cc-ci will be rebuilt to the new closure in W2 before the cold D1-D10 re-verification.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-27 20:52:05 +01:00
parent a0ea2f0aa9
commit 2cede01ed7
35 changed files with 431 additions and 185 deletions

View File

@ -26,6 +26,7 @@ Config (env): BRIDGE_LISTEN, GITEA_API, DRONE_URL, CI_REPO, HMAC_FILE, DRONE_TOK
GITEA_TOKEN_FILE, POLL_INTERVAL (default 30), POLL_REPOS (csv of enrolled repos), AUTH_ALLOWLIST GITEA_TOKEN_FILE, POLL_INTERVAL (default 30), POLL_REPOS (csv of enrolled repos), AUTH_ALLOWLIST
(csv, optional). (csv, optional).
""" """
import hashlib import hashlib
import hmac import hmac
import json import json
@ -118,14 +119,22 @@ def trigger_build(recipe, ref, pr, src):
def post_comment(owner, repo, number, body): def post_comment(owner, repo, number, body):
status, c = _api(f"{GITEA_API}/repos/{owner}/{repo}/issues/{number}/comments", GITEA_TOKEN, status, c = _api(
method="POST", data={"body": body}) f"{GITEA_API}/repos/{owner}/{repo}/issues/{number}/comments",
GITEA_TOKEN,
method="POST",
data={"body": body},
)
return c.get("id") if status in (200, 201) and c else None return c.get("id") if status in (200, 201) and c else None
def edit_comment(owner, repo, comment_id, body): def edit_comment(owner, repo, comment_id, body):
_api(f"{GITEA_API}/repos/{owner}/{repo}/issues/comments/{comment_id}", GITEA_TOKEN, _api(
method="PATCH", data={"body": body}) f"{GITEA_API}/repos/{owner}/{repo}/issues/comments/{comment_id}",
GITEA_TOKEN,
method="PATCH",
data={"body": body},
)
def build_status(num): def build_status(num):
@ -140,6 +149,7 @@ def watch_and_reflect(owner, name, number, num, recipe, sha, comment_id, run_url
"""Poll the Drone build to completion, then edit the PR comment to reflect the outcome (D7). """Poll the Drone build to completion, then edit the PR comment to reflect the outcome (D7).
Bounded by the build timeout (60m) + margin.""" Bounded by the build timeout (60m) + margin."""
import time as _t import time as _t
deadline = _t.time() + 75 * 60 deadline = _t.time() + 75 * 60
last = None last = None
while _t.time() < deadline: while _t.time() < deadline:
@ -150,8 +160,12 @@ def watch_and_reflect(owner, name, number, num, recipe, sha, comment_id, run_url
icon = {"success": ""}.get(last, "") icon = {"success": ""}.get(last, "")
verdict = "passed" if last == "success" else (last or "did not complete") verdict = "passed" if last == "success" else (last or "did not complete")
if comment_id: if comment_id:
edit_comment(owner, name, comment_id, edit_comment(
f"cc-ci: run for `{recipe}` @ `{sha[:8]}` {icon} **{verdict}** → {run_url}") owner,
name,
comment_id,
f"cc-ci: run for `{recipe}` @ `{sha[:8]}` {icon} **{verdict}** → {run_url}",
)
log(f"reflected outcome build {num} ({recipe} PR #{number}): {last}") log(f"reflected outcome build {num} ({recipe} PR #{number}): {last}")
@ -192,14 +206,19 @@ def process_testme(full_name, owner, name, number, user, comment_id, source):
post_comment(owner, name, number, "cc-ci: failed to start a CI run (see bridge logs).") post_comment(owner, name, number, "cc-ci: failed to start a CI run (see bridge logs).")
return None, "trigger failed" return None, "trigger failed"
run_url = f"{DRONE_URL}/{CI_REPO}/{num}" run_url = f"{DRONE_URL}/{CI_REPO}/{num}"
cid = post_comment(owner, name, number, cid = post_comment(
f"cc-ci: started CI run for `{name}` @ `{head['sha'][:8]}` → {run_url}") owner, name, number, f"cc-ci: started CI run for `{name}` @ `{head['sha'][:8]}` → {run_url}"
log(f"[{source}] triggered build {num} for {name}@{head['sha'][:8]} " )
f"(PR #{number}, comment {comment_id}) by {user}") log(
f"[{source}] triggered build {num} for {name}@{head['sha'][:8]} "
f"(PR #{number}, comment {comment_id}) by {user}"
)
# Reflect the final pass/fail back onto that comment when the build finishes (D7). # Reflect the final pass/fail back onto that comment when the build finishes (D7).
threading.Thread(target=watch_and_reflect, threading.Thread(
args=(owner, name, number, num, name, head["sha"], cid, run_url), target=watch_and_reflect,
daemon=True).start() args=(owner, name, number, num, name, head["sha"], cid, run_url),
daemon=True,
).start()
return run_url, "ok" return run_url, "ok"
@ -242,9 +261,14 @@ class Handler(BaseHTTPRequestHandler):
return self._send(204, "not a PR") return self._send(204, "not a PR")
run_url, reason = process_testme( run_url, reason = process_testme(
repo.get("full_name", ""), (repo.get("owner") or {}).get("login", ""), repo.get("full_name", ""),
repo.get("name", ""), issue.get("number"), (repo.get("owner") or {}).get("login", ""),
c.get("user", {}).get("login", ""), c.get("id"), "webhook") repo.get("name", ""),
issue.get("number"),
c.get("user", {}).get("login", ""),
c.get("id"),
"webhook",
)
if not run_url: if not run_url:
if reason == "duplicate": if reason == "duplicate":
return self._send(200, "already handled") return self._send(200, "already handled")

View File

@ -11,6 +11,7 @@ ref, when, and a link to the canonical Drone run. Also serves an embeddable SVG
Config (env): DRONE_URL, CI_REPO, DRONE_TOKEN_FILE, DASH_LISTEN (default 0.0.0.0:8080), Config (env): DRONE_URL, CI_REPO, DRONE_TOKEN_FILE, DASH_LISTEN (default 0.0.0.0:8080),
POLL_INTERVAL (default 60), CACHE_TTL (default 30). POLL_INTERVAL (default 60), CACHE_TTL (default 30).
""" """
import html import html
import json import json
import os import os
@ -34,8 +35,14 @@ DRONE_TOKEN = _read(os.environ["DRONE_TOKEN_FILE"])
_CACHE = {"ts": 0.0, "recipes": []} _CACHE = {"ts": 0.0, "recipes": []}
_COLORS = {"success": "#3fb950", "failure": "#f85149", "error": "#f85149", _COLORS = {
"running": "#d29922", "pending": "#d29922", "killed": "#8b949e"} "success": "#3fb950",
"failure": "#f85149",
"error": "#f85149",
"running": "#d29922",
"pending": "#d29922",
"killed": "#8b949e",
}
def log(*a): def log(*a):
@ -43,8 +50,9 @@ def log(*a):
def _drone(path): def _drone(path):
req = urllib.request.Request(f"{DRONE_URL}{path}", req = urllib.request.Request(
headers={"Authorization": f"Bearer {DRONE_TOKEN}"}) f"{DRONE_URL}{path}", headers={"Authorization": f"Bearer {DRONE_TOKEN}"}
)
with urllib.request.urlopen(req, timeout=30) as resp: with urllib.request.urlopen(req, timeout=30) as resp:
return json.loads(resp.read()) return json.loads(resp.read())
@ -72,14 +80,16 @@ def latest_per_recipe():
rows = [] rows = []
for recipe, b in sorted(latest.items()): for recipe, b in sorted(latest.items()):
ref = (b.get("params") or {}).get("REF") or "" ref = (b.get("params") or {}).get("REF") or ""
rows.append({ rows.append(
"recipe": recipe, {
"status": b.get("status", "unknown"), "recipe": recipe,
"number": b.get("number"), "status": b.get("status", "unknown"),
"ref": ref[:8], "number": b.get("number"),
"finished": b.get("finished") or 0, "ref": ref[:8],
"url": f"{DRONE_URL}/{CI_REPO}/{b.get('number')}", "finished": b.get("finished") or 0,
}) "url": f"{DRONE_URL}/{CI_REPO}/{b.get('number')}",
}
)
return rows return rows
@ -163,7 +173,7 @@ class Handler(BaseHTTPRequestHandler):
if path in ("/healthz", "/dashboard/healthz"): if path in ("/healthz", "/dashboard/healthz"):
return self._send(200, "ok", "text/plain") return self._send(200, "ok", "text/plain")
if path.startswith("/badge/") and path.endswith(".svg"): if path.startswith("/badge/") and path.endswith(".svg"):
recipe = path[len("/badge/"):-len(".svg")] recipe = path[len("/badge/") : -len(".svg")]
row = next((r for r in recipes_cached() if r["recipe"] == recipe), None) row = next((r for r in recipes_cached() if r["recipe"] == recipe), None)
status = row["status"] if row else "unknown" status = row["status"] if row else "unknown"
return self._send(200, render_badge(recipe, status), "image/svg+xml") return self._send(200, render_badge(recipe, status), "image/svg+xml")

View File

@ -12,7 +12,7 @@
sops-nix.inputs.nixpkgs.follows = "nixpkgs"; sops-nix.inputs.nixpkgs.follows = "nixpkgs";
}; };
outputs = { self, nixpkgs, sops-nix }: outputs = { nixpkgs, sops-nix, ... }:
let let
system = "x86_64-linux"; system = "x86_64-linux";
pkgs = nixpkgs.legacyPackages.${system}; pkgs = nixpkgs.legacyPackages.${system};

View File

@ -1,7 +1,7 @@
# cc-ci machine config. M0 = faithful reproduction of the baseline (docs/baseline.md) # cc-ci machine config. M0 = faithful reproduction of the baseline (docs/baseline.md)
# so the first flake rebuild is a no-op-then-base. Services (swarm/Traefik/Drone/ # so the first flake rebuild is a no-op-then-base. Services (swarm/Traefik/Drone/
# bridge/dashboard) are layered in via ./modules/* in later milestones. # bridge/dashboard) are layered in via ./modules/* in later milestones.
{ pkgs, lib, ... }: { pkgs, ... }:
{ {
imports = [ imports = [
./hardware.nix ./hardware.nix

View File

@ -31,7 +31,7 @@ in
environment = { environment = {
DRONE_RPC_PROTO = "https"; DRONE_RPC_PROTO = "https";
DRONE_RPC_HOST = "drone.ci.commoninternet.net"; DRONE_RPC_HOST = "drone.ci.commoninternet.net";
DRONE_RUNNER_CAPACITY = maxTests; # MAX_TESTS concurrency cap (see let-binding above) DRONE_RUNNER_CAPACITY = maxTests; # MAX_TESTS concurrency cap (see let-binding above)
DRONE_RUNNER_NAME = "cc-ci-exec"; DRONE_RUNNER_NAME = "cc-ci-exec";
# exec runner needs a writable root for build workspaces # exec runner needs a writable root for build workspaces
DRONE_RUNNER_ROOT = "/var/lib/drone-runner"; DRONE_RUNNER_ROOT = "/var/lib/drone-runner";

View File

@ -1,9 +1,9 @@
# Project package overlay. `abra` (the Co-op Cloud CLI) is exposed as `pkgs.abra` so every # Project package overlay. `abra` (the Co-op Cloud CLI) is exposed as `pkgs.abra` so every
# module (systemPackages, the proxy/drone reconcile oneshots) can use the same pinned build. # module (systemPackages, the proxy/drone reconcile oneshots) can use the same pinned build.
{ ... }: _:
{ {
nixpkgs.overlays = [ nixpkgs.overlays = [
(final: prev: { (_: prev: {
abra = prev.stdenv.mkDerivation rec { abra = prev.stdenv.mkDerivation rec {
pname = "abra"; pname = "abra";
version = "0.13.0-beta"; version = "0.13.0-beta";

View File

@ -19,32 +19,34 @@
# Do not also look for a GPG key. # Do not also look for a GPG key.
gnupg.sshKeyPaths = [ ]; gnupg.sshKeyPaths = [ ];
# M0 proof secret — confirms the decrypt path works end to end. secrets = {
secrets.test_secret = { }; # M0 proof secret — confirms the decrypt path works end to end.
test_secret = { };
# M2 Drone (A2 internal secrets). drone_rpc_secret is shared between the swarm-deployed # M2 Drone (A2 internal secrets). drone_rpc_secret is shared between the swarm-deployed
# Drone server (inserted as the `rpc_secret` swarm secret by scripts/deploy-drone.sh) and # Drone server (inserted as the `rpc_secret` swarm secret by scripts/deploy-drone.sh) and
# the host exec runner (read via the env template below). drone_gitea_client_secret is the # the host exec runner (read via the env template below). drone_gitea_client_secret is the
# Gitea OAuth app secret, inserted as the server's `client_secret` swarm secret. # Gitea OAuth app secret, inserted as the server's `client_secret` swarm secret.
secrets.drone_rpc_secret = { }; drone_rpc_secret = { };
secrets.drone_gitea_client_secret = { }; drone_gitea_client_secret = { };
# M3 comment-bridge (A2). Read by modules/bridge.nix's reconcile oneshot, which copies them # M3 comment-bridge (A2). Read by modules/bridge.nix's reconcile oneshot, which copies them
# into swarm secrets the bridge container mounts. webhook_hmac is also set on the Gitea webhook. # into swarm secrets the bridge container mounts. webhook_hmac is also set on the Gitea webhook.
secrets.bridge_webhook_hmac = { }; bridge_webhook_hmac = { };
secrets.bridge_drone_token = { }; bridge_drone_token = { };
secrets.bridge_gitea_token = { }; bridge_gitea_token = { };
# Phase-1c C2: the wildcard TLS cert+key are now sops secrets (in cc-ci-secrets), decrypted at # Phase-1c C2: the wildcard TLS cert+key are now sops secrets (in cc-ci-secrets), decrypted at
# activation to /var/lib/ci-certs/live/{fullchain.pem,privkey.pem} — the exact path the traefik # activation to /var/lib/ci-certs/live/{fullchain.pem,privkey.pem} — the exact path the traefik
# reconcile (modules/proxy.nix) already reads. Replaces the prior operator-drops-a-cert-file step. # reconcile (modules/proxy.nix) already reads. Replaces the prior operator-drops-a-cert-file step.
secrets.wildcard_cert = { wildcard_cert = {
path = "/var/lib/ci-certs/live/fullchain.pem"; path = "/var/lib/ci-certs/live/fullchain.pem";
mode = "0444"; # leaf+intermediate chain — not secret mode = "0444"; # leaf+intermediate chain — not secret
}; };
secrets.wildcard_key = { wildcard_key = {
path = "/var/lib/ci-certs/live/privkey.pem"; path = "/var/lib/ci-certs/live/privkey.pem";
mode = "0400"; # private key — root only mode = "0400"; # private key — root only
};
}; };
# EnvironmentFile for the host exec runner: DRONE_RPC_SECRET rendered from the sops secret. # EnvironmentFile for the host exec runner: DRONE_RPC_SECRET rendered from the sops secret.

View File

@ -6,11 +6,11 @@ Bakes in the known abra gotchas (re-verify per installed abra version, currently
- `abra app ls -S -m` returns nested {server: {apps: [...]}} — parse the inner structure. - `abra app ls -S -m` returns nested {server: {apps: [...]}} — parse the inner structure.
- run non-interactively with `-n` (`--no-input`) everywhere. - run non-interactively with `-n` (`--no-input`) everywhere.
""" """
from __future__ import annotations from __future__ import annotations
import json import json
import subprocess import subprocess
from typing import Optional
ABRA = "abra" ABRA = "abra"
@ -19,13 +19,17 @@ class AbraError(RuntimeError):
pass pass
def _run_pty(args: list[str], timeout: int = 900, check: bool = True) -> subprocess.CompletedProcess: def _run_pty(
args: list[str], timeout: int = 900, check: bool = True
) -> subprocess.CompletedProcess:
"""Run abra under a pseudo-TTY (via util-linux `script`). Needed for commands that exec into """Run abra under a pseudo-TTY (via util-linux `script`). Needed for commands that exec into
a container interactively (backup create / restore: 'the input device is not a TTY').""" a container interactively (backup create / restore: 'the input device is not a TTY')."""
cmd = "abra " + " ".join(args) cmd = "abra " + " ".join(args)
proc = subprocess.run( proc = subprocess.run(
["script", "-qec", cmd, "/dev/null"], ["script", "-qec", cmd, "/dev/null"],
capture_output=True, text=True, timeout=timeout, capture_output=True,
text=True,
timeout=timeout,
) )
if check and proc.returncode != 0: if check and proc.returncode != 0:
raise AbraError(f"[pty] {cmd} failed ({proc.returncode}):\n{proc.stdout}\n{proc.stderr}") raise AbraError(f"[pty] {cmd} failed ({proc.returncode}):\n{proc.stdout}\n{proc.stderr}")
@ -40,12 +44,19 @@ def _run(args: list[str], timeout: int = 300, check: bool = True) -> subprocess.
timeout=timeout, timeout=timeout,
) )
if check and proc.returncode != 0: if check and proc.returncode != 0:
raise AbraError(f"abra {' '.join(args)} failed ({proc.returncode}):\n{proc.stdout}\n{proc.stderr}") raise AbraError(
f"abra {' '.join(args)} failed ({proc.returncode}):\n{proc.stdout}\n{proc.stderr}"
)
return proc return proc
def app_new(recipe: str, domain: str, server: str = "default", version: Optional[str] = None, def app_new(
secrets: bool = False) -> None: recipe: str,
domain: str,
server: str = "default",
version: str | None = None,
secrets: bool = False,
) -> None:
args = ["app", "new", recipe] args = ["app", "new", recipe]
args += ["-s", server, "-D", domain, "-o", "-n"] args += ["-s", server, "-D", domain, "-o", "-n"]
if version: if version:
@ -64,6 +75,7 @@ def env_set(domain: str, key: str, value: str) -> None:
"""Set a key in the app's .env (abra has no setter; edit the file directly).""" """Set a key in the app's .env (abra has no setter; edit the file directly)."""
import os import os
import re import re
path = os.path.expanduser(f"~/.abra/servers/default/{domain}.env") path = os.path.expanduser(f"~/.abra/servers/default/{domain}.env")
with open(path) as fh: with open(path) as fh:
lines = fh.read().splitlines() lines = fh.read().splitlines()
@ -86,8 +98,11 @@ def secret_generate(domain: str, timeout: int = 300) -> None:
# captured by _run and never logged. -C -o keep the recipe at the PR checkout (without -o it # captured by _run and never logged. -C -o keep the recipe at the PR checkout (without -o it
# re-resolves to a version tag, dropping the PR's files incl. tests/). check=False: recipes with # re-resolves to a version tag, dropping the PR's files incl. tests/). check=False: recipes with
# no secrets are a no-op. # no secrets are a no-op.
_run(["app", "secret", "generate", domain, "--all", "-m", "-C", "-o", "-n"], _run(
timeout=timeout, check=False) ["app", "secret", "generate", domain, "--all", "-m", "-C", "-o", "-n"],
timeout=timeout,
check=False,
)
def deploy(domain: str, chaos: bool = True, timeout: int = 900) -> None: def deploy(domain: str, chaos: bool = True, timeout: int = 900) -> None:
@ -97,7 +112,7 @@ def deploy(domain: str, chaos: bool = True, timeout: int = 900) -> None:
_run(args, timeout=timeout) _run(args, timeout=timeout)
def upgrade(domain: str, version: Optional[str] = None, timeout: int = 900) -> None: def upgrade(domain: str, version: str | None = None, timeout: int = 900) -> None:
args = ["app", "upgrade", domain] args = ["app", "upgrade", domain]
if version: if version:
args.append(version) args.append(version)
@ -127,9 +142,11 @@ def recipe_versions(recipe: str) -> list[str]:
"""Published versions of a recipe, oldest→newest (from the recipe git tags).""" """Published versions of a recipe, oldest→newest (from the recipe git tags)."""
import os import os
import subprocess import subprocess
path = os.path.expanduser(f"~/.abra/recipes/{recipe}") path = os.path.expanduser(f"~/.abra/recipes/{recipe}")
proc = subprocess.run(["git", "-C", path, "tag", "--sort=creatordate"], proc = subprocess.run(
capture_output=True, text=True) ["git", "-C", path, "tag", "--sort=creatordate"], capture_output=True, text=True
)
return [t for t in proc.stdout.split("\n") if t.strip()] return [t for t in proc.stdout.split("\n") if t.strip()]
@ -149,12 +166,12 @@ def secret_remove_all(domain: str, timeout: int = 300) -> None:
def app_config_remove(domain: str, server: str = "default") -> None: def app_config_remove(domain: str, server: str = "default") -> None:
"""Delete the app's .env config so a re-run can recreate it (teardown completeness).""" """Delete the app's .env config so a re-run can recreate it (teardown completeness)."""
import contextlib
import os import os
path = os.path.expanduser(f"~/.abra/servers/{server}/{domain}.env") path = os.path.expanduser(f"~/.abra/servers/{server}/{domain}.env")
try: with contextlib.suppress(FileNotFoundError):
os.remove(path) os.remove(path)
except FileNotFoundError:
pass
def app_ls(server: str = "default") -> list[dict]: def app_ls(server: str = "default") -> list[dict]:

View File

@ -3,8 +3,10 @@
The teardown guarantee is sacred: a failed test must never leak an app/volume/secret into the The teardown guarantee is sacred: a failed test must never leak an app/volume/secret into the
next run. Callers wrap deploy()/teardown() in try/finally (or a pytest finalizer). next run. Callers wrap deploy()/teardown() in try/finally (or a pytest finalizer).
""" """
from __future__ import annotations from __future__ import annotations
import contextlib
import datetime import datetime
import os import os
import re import re
@ -29,7 +31,8 @@ def _docker_names(kind: str, stack: str) -> list[str]:
"""docker <kind> ls names filtered to a stack (kind: service|volume|secret).""" """docker <kind> ls names filtered to a stack (kind: service|volume|secret)."""
proc = subprocess.run( proc = subprocess.run(
["docker", kind, "ls", "--filter", f"name={stack}", "--format", "{{.Name}}"], ["docker", kind, "ls", "--filter", f"name={stack}", "--format", "{{.Name}}"],
capture_output=True, text=True, capture_output=True,
text=True,
) )
return [n for n in proc.stdout.split("\n") if n.strip()] return [n for n in proc.stdout.split("\n") if n.strip()]
@ -50,16 +53,20 @@ def _stack_age_seconds(stack: str) -> float | None:
return None return None
oldest = None oldest = None
for s in svcs: for s in svcs:
p = subprocess.run(["docker", "service", "inspect", s, "--format", "{{.CreatedAt}}"], p = subprocess.run(
capture_output=True, text=True) ["docker", "service", "inspect", s, "--format", "{{.CreatedAt}}"],
capture_output=True,
text=True,
)
ts = p.stdout.strip() ts = p.stdout.strip()
try: try:
# docker emits e.g. 2026-05-27 00:12:33.123 +0000 UTC -> take the leading 19 chars # docker emits e.g. 2026-05-27 00:12:33.123 +0000 UTC -> take the leading 19 chars
dt = datetime.datetime.strptime(ts[:19], "%Y-%m-%d %H:%M:%S").replace( dt = datetime.datetime.strptime(ts[:19], "%Y-%m-%d %H:%M:%S").replace(
tzinfo=datetime.timezone.utc) tzinfo=datetime.UTC
)
except ValueError: except ValueError:
continue continue
age = (datetime.datetime.now(datetime.timezone.utc) - dt).total_seconds() age = (datetime.datetime.now(datetime.UTC) - dt).total_seconds()
oldest = age if oldest is None else max(oldest, age) oldest = age if oldest is None else max(oldest, age)
return oldest return oldest
@ -107,7 +114,8 @@ def services_converged(domain: str) -> bool:
stack = _stack_name(domain) stack = _stack_name(domain)
proc = subprocess.run( proc = subprocess.run(
["docker", "stack", "services", stack, "--format", "{{.Replicas}}"], ["docker", "stack", "services", stack, "--format", "{{.Replicas}}"],
capture_output=True, text=True, capture_output=True,
text=True,
) )
rows = [r for r in proc.stdout.split("\n") if r.strip()] rows = [r for r in proc.stdout.split("\n") if r.strip()]
if not rows: if not rows:
@ -136,8 +144,13 @@ def http_get(domain: str, path: str = "/", timeout: int = 15) -> int:
return 0 return 0
def wait_healthy(domain: str, ok_codes=(200, 301, 302), path: str = "/", def wait_healthy(
deploy_timeout: int = 600, http_timeout: int = 300) -> None: domain: str,
ok_codes=(200, 301, 302),
path: str = "/",
deploy_timeout: int = 600,
http_timeout: int = 300,
) -> None:
"""Wait for stack services converged, then for the app to answer ok over HTTPS at `path`. """Wait for stack services converged, then for the app to answer ok over HTTPS at `path`.
`path` is per-recipe (recipe_meta.HEALTH_PATH), e.g. keycloak uses /realms/master.""" `path` is per-recipe (recipe_meta.HEALTH_PATH), e.g. keycloak uses /realms/master."""
deadline = time.time() + deploy_timeout deadline = time.time() + deploy_timeout
@ -181,7 +194,8 @@ def _app_container(domain: str, service: str = "app") -> str:
name = f"{_stack_name(domain)}_{service}" name = f"{_stack_name(domain)}_{service}"
proc = subprocess.run( proc = subprocess.run(
["docker", "ps", "--filter", f"name={name}", "--format", "{{.ID}}"], ["docker", "ps", "--filter", f"name={name}", "--format", "{{.ID}}"],
capture_output=True, text=True, capture_output=True,
text=True,
) )
cid = proc.stdout.strip().split("\n")[0] cid = proc.stdout.strip().split("\n")[0]
if not cid: if not cid:
@ -221,8 +235,8 @@ def teardown_app(domain: str, verify: bool = True) -> None:
stack = _stack_name(domain) stack = _stack_name(domain)
abra.undeploy(domain) abra.undeploy(domain)
if _docker_names("service", stack): if _docker_names("service", stack):
_force_stack_rm(stack) # fallback: abra undeploy didn't clear it _force_stack_rm(stack) # fallback: abra undeploy didn't clear it
abra.volume_remove(domain) # needs the .env -> before removing it abra.volume_remove(domain) # needs the .env -> before removing it
abra.secret_remove_all(domain) abra.secret_remove_all(domain)
# belt-and-suspenders: drop any volumes/secrets abra missed, by stack name. A volume can be # belt-and-suspenders: drop any volumes/secrets abra missed, by stack name. A volume can be
# briefly held by a just-stopped task after `stack rm`, so retry the volume removal. # briefly held by a just-stopped task after `stack rm`, so retry the volume removal.
@ -238,7 +252,7 @@ def teardown_app(domain: str, verify: bool = True) -> None:
time.sleep(3) time.sleep(3)
for s in _docker_names("secret", stack): for s in _docker_names("secret", stack):
subprocess.run(["docker", "secret", "rm", s], capture_output=True, text=True) subprocess.run(["docker", "secret", "rm", s], capture_output=True, text=True)
abra.app_config_remove(domain) # only now (stack gone) drop the .env abra.app_config_remove(domain) # only now (stack gone) drop the .env
if verify: if verify:
residual = _residual(domain) residual = _residual(domain)
@ -252,6 +266,7 @@ def janitor(max_age_seconds: int | None = None) -> None:
docker primitives so it works even when the .env is gone (A2/A3). Default 2h, env-overridable docker primitives so it works even when the .env is gone (A2/A3). Default 2h, env-overridable
via CCCI_JANITOR_MAX_AGE (e.g. 0 to reap all matching orphans immediately).""" via CCCI_JANITOR_MAX_AGE (e.g. 0 to reap all matching orphans immediately)."""
import os import os
if max_age_seconds is None: if max_age_seconds is None:
max_age_seconds = int(os.environ.get("CCCI_JANITOR_MAX_AGE", "7200")) max_age_seconds = int(os.environ.get("CCCI_JANITOR_MAX_AGE", "7200"))
seen = set() seen = set()
@ -271,7 +286,5 @@ def janitor(max_age_seconds: int | None = None) -> None:
age = _stack_age_seconds(stack) age = _stack_age_seconds(stack)
if age is not None and age < max_age_seconds: if age is not None and age < max_age_seconds:
continue # likely a concurrent in-flight run; leave it continue # likely a concurrent in-flight run; leave it
try: with contextlib.suppress(Exception):
teardown_app(name, verify=False) teardown_app(name, verify=False)
except Exception:
pass

View File

@ -3,6 +3,7 @@
Domain = "<recipe[:4]>-<6hex(recipe|pr|ref)>.ci.commoninternet.net" — short enough for Docker's Domain = "<recipe[:4]>-<6hex(recipe|pr|ref)>.ci.commoninternet.net" — short enough for Docker's
64-char swarm config/secret name limit, unique per run, collision-safe across recipes (DECISIONS.md). 64-char swarm config/secret name limit, unique per run, collision-safe across recipes (DECISIONS.md).
""" """
from __future__ import annotations from __future__ import annotations
import hashlib import hashlib

View File

@ -14,6 +14,7 @@ tests/<recipe>/. Teardown is guaranteed by the conftest fixture finalizer.
Run env (python with pytest+playwright, PLAYWRIGHT_BROWSERS_PATH) is provided by `cc-ci-run` Run env (python with pytest+playwright, PLAYWRIGHT_BROWSERS_PATH) is provided by `cc-ci-run`
(modules/harness.nix); invoke as: cc-ci-run runner/run_recipe_ci.py (modules/harness.nix); invoke as: cc-ci-run runner/run_recipe_ci.py
""" """
from __future__ import annotations from __future__ import annotations
import glob import glob
@ -26,6 +27,7 @@ import tempfile
ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.insert(0, os.path.join(ROOT, "runner")) sys.path.insert(0, os.path.join(ROOT, "runner"))
from harness import lifecycle, naming # noqa: E402 from harness import lifecycle, naming # noqa: E402
STAGE_FILES = { STAGE_FILES = {
"install": "test_install.py", "install": "test_install.py",
"upgrade": "test_upgrade.py", "upgrade": "test_upgrade.py",
@ -40,7 +42,8 @@ def _redact_values() -> list[str]:
vals = set() vals = set()
for p in glob.glob("/run/secrets/*"): for p in glob.glob("/run/secrets/*"):
try: try:
v = open(p).read().strip() with open(p) as f:
v = f.read().strip()
except OSError: except OSError:
continue continue
if len(v) >= 8: if len(v) >= 8:
@ -55,8 +58,15 @@ def run_stage_redacted(cmd: list[str], env: dict | None = None) -> int:
"""Run a stage subprocess, streaming its output live (so Drone logs stay tail-able) but masking """Run a stage subprocess, streaming its output live (so Drone logs stay tail-able) but masking
any known infra-secret value first. Belt-and-suspenders: the harness already never prints any known infra-secret value first. Belt-and-suspenders: the harness already never prints
secrets and abra doesn't echo generated ones.""" secrets and abra doesn't echo generated ones."""
proc = subprocess.Popen(cmd, cwd=ROOT, env=env, stdout=subprocess.PIPE, proc = subprocess.Popen(
stderr=subprocess.STDOUT, text=True, bufsize=1) cmd,
cwd=ROOT,
env=env,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True,
bufsize=1,
)
assert proc.stdout is not None assert proc.stdout is not None
for line in proc.stdout: for line in proc.stdout:
for v in _REDACT: for v in _REDACT:
@ -70,7 +80,8 @@ def run_stage_redacted(cmd: list[str], env: dict | None = None) -> int:
def _gitea_token() -> str | None: def _gitea_token() -> str | None:
tok = os.environ.get("GITEA_TOKEN") tok = os.environ.get("GITEA_TOKEN")
if not tok and os.path.exists("/run/secrets/bridge_gitea_token"): if not tok and os.path.exists("/run/secrets/bridge_gitea_token"):
tok = open("/run/secrets/bridge_gitea_token").read().strip() with open("/run/secrets/bridge_gitea_token") as f:
tok = f.read().strip()
return tok or None return tok or None
@ -97,8 +108,10 @@ def fetch_recipe(recipe: str, ref: str | None, src: str | None) -> None:
# to a foreign host). Non-fatal: if upstream is unreachable, upgrade degrades to a skip. # to a foreign host). Non-fatal: if upstream is unreachable, upgrade degrades to a skip.
upstream = f"https://git.coopcloud.tech/coop-cloud/{recipe}.git" upstream = f"https://git.coopcloud.tech/coop-cloud/{recipe}.git"
# Explicit tags refspec — a bare `fetch --tags <url>` errors "couldn't find remote ref HEAD". # Explicit tags refspec — a bare `fetch --tags <url>` errors "couldn't find remote ref HEAD".
subprocess.run(["git", "-C", dest, "fetch", "--quiet", upstream, subprocess.run(
"refs/tags/*:refs/tags/*"], check=False) ["git", "-C", dest, "fetch", "--quiet", upstream, "refs/tags/*:refs/tags/*"],
check=False,
)
else: else:
# Clean re-fetch from the catalogue. rm first so a leftover dir from a prior SRC+REF run # Clean re-fetch from the catalogue. rm first so a leftover dir from a prior SRC+REF run
# (which points origin at the private mirror and may lack version tags) can't poison the # (which points origin at the private mirror and may lack version tags) can't poison the
@ -178,7 +191,9 @@ def run_recipe_local(recipe: str, local_tests: str | None) -> int | None:
lifecycle.deploy_app(recipe, domain, version=os.environ.get("VERSION") or None) lifecycle.deploy_app(recipe, domain, version=os.environ.get("VERSION") or None)
lifecycle.wait_healthy(domain) lifecycle.wait_healthy(domain)
env = dict(os.environ, CCCI_APP_DOMAIN=domain, CCCI_BASE_URL=f"https://{domain}") env = dict(os.environ, CCCI_APP_DOMAIN=domain, CCCI_BASE_URL=f"https://{domain}")
return run_stage_redacted([sys.executable, "-m", "pytest", "-v", "-rA", local_tests], env=env) return run_stage_redacted(
[sys.executable, "-m", "pytest", "-v", "-rA", local_tests], env=env
)
finally: finally:
lifecycle.teardown_app(domain, verify=False) lifecycle.teardown_app(domain, verify=False)

View File

@ -10,7 +10,8 @@
# GITEA_USERNAME=autonomic-bot GITEA_PASSWORD=… bash scripts/bootstrap-drone-oauth.sh # GITEA_USERNAME=autonomic-bot GITEA_PASSWORD=… bash scripts/bootstrap-drone-oauth.sh
# Optionally ACTIVATE a repo: REPO=recipe-maintainers/cc-ci (default). # Optionally ACTIVATE a repo: REPO=recipe-maintainers/cc-ci (default).
set -euo pipefail set -euo pipefail
: "${GITEA_USERNAME:?set GITEA_USERNAME}"; : "${GITEA_PASSWORD:?set GITEA_PASSWORD}" : "${GITEA_USERNAME:?set GITEA_USERNAME}"
: "${GITEA_PASSWORD:?set GITEA_PASSWORD}"
GITEA="${GITEA:-https://git.autonomic.zone}" GITEA="${GITEA:-https://git.autonomic.zone}"
DRONE="${DRONE:-https://drone.ci.commoninternet.net}" DRONE="${DRONE:-https://drone.ci.commoninternet.net}"
CLIENT_ID="${CLIENT_ID:-ab4cdb9d-ee96-4867-875f-87384505fc52}" CLIENT_ID="${CLIENT_ID:-ab4cdb9d-ee96-4867-875f-87384505fc52}"
@ -18,7 +19,9 @@ REPO="${REPO:-recipe-maintainers/cc-ci}"
RES=(--resolve "drone.ci.commoninternet.net:443:127.0.0.1") RES=(--resolve "drone.ci.commoninternet.net:443:127.0.0.1")
export PATH=/run/current-system/sw/bin:"$PATH" export PATH=/run/current-system/sw/bin:"$PATH"
cj=$(mktemp); dj=$(mktemp); az=$(mktemp) cj=$(mktemp)
dj=$(mktemp)
az=$(mktemp)
trap 'rm -f "$cj" "$dj" "$az"' EXIT trap 'rm -f "$cj" "$dj" "$az"' EXIT
# 1) Gitea web login (CSRF cookie -> form field). # 1) Gitea web login (CSRF cookie -> form field).
@ -31,9 +34,10 @@ curl -s -b "$cj" -c "$cj" -o /dev/null \
"$GITEA/user/login" "$GITEA/user/login"
# 2) Drone /login -> Gitea authorize URL. # 2) Drone /login -> Gitea authorize URL.
loc=$(curl -sk -c "$dj" -o /dev/null -D - "${RES[@]}" "$DRONE/login" \ loc=$(curl -sk -c "$dj" -o /dev/null -D - "${RES[@]}" "$DRONE/login" |
| awk 'tolower($1)=="location:"{print $2}' | tr -d '\r') awk 'tolower($1)=="location:"{print $2}' | tr -d '\r')
azh=$(mktemp); trap 'rm -f "$cj" "$dj" "$az" "$azh"' EXIT azh=$(mktemp)
trap 'rm -f "$cj" "$dj" "$az" "$azh"' EXIT
curl -sk -b "$cj" -c "$cj" -o "$az" -D "$azh" "$loc" curl -sk -b "$cj" -c "$cj" -o "$az" -D "$azh" "$loc"
# 3) Either the OAuth app auto-approves (bot already granted it earlier => Gitea 302s straight to the # 3) Either the OAuth app auto-approves (bot already granted it earlier => Gitea 302s straight to the

View File

@ -9,7 +9,7 @@
# (shfmt/shellcheck), YAML (yamllint). Run from the repo root. # (shfmt/shellcheck), YAML (yamllint). Run from the repo root.
set -uo pipefail set -uo pipefail
cd "$(dirname "$0")/.." cd "$(dirname "$0")/.." || exit 1
FIX=0 FIX=0
[ "${1:-}" = "--fix" ] && FIX=1 [ "${1:-}" = "--fix" ] && FIX=1
@ -19,7 +19,7 @@ SHFMT_FLAGS=(-i 2 -ci)
fail=0 fail=0
section() { printf '\n=== %s ===\n' "$1"; } section() { printf '\n=== %s ===\n' "$1"; }
note() { printf ' %s\n' "$1"; } note() { printf ' %s\n' "$1"; }
# Nix files (exclude the `secrets/` submodule). # Nix files (exclude the `secrets/` submodule).
mapfile -t NIX_FILES < <(find . -name '*.nix' -not -path './.git/*' -not -path './secrets/*' | sort) mapfile -t NIX_FILES < <(find . -name '*.nix' -not -path './.git/*' -not -path './secrets/*' | sort)
@ -30,7 +30,10 @@ section "Nix — nixpkgs-fmt"
if [ "$FIX" = 1 ]; then if [ "$FIX" = 1 ]; then
nixpkgs-fmt "${NIX_FILES[@]}" || fail=1 nixpkgs-fmt "${NIX_FILES[@]}" || fail=1
else else
nixpkgs-fmt --check "${NIX_FILES[@]}" || { note "run: scripts/lint.sh --fix"; fail=1; } nixpkgs-fmt --check "${NIX_FILES[@]}" || {
note "run: scripts/lint.sh --fix"
fail=1
}
fi fi
section "Nix — statix" section "Nix — statix"
@ -51,7 +54,10 @@ section "Python — ruff format"
if [ "$FIX" = 1 ]; then if [ "$FIX" = 1 ]; then
ruff format . || fail=1 ruff format . || fail=1
else else
ruff format --check . || { note "run: scripts/lint.sh --fix"; fail=1; } ruff format --check . || {
note "run: scripts/lint.sh --fix"
fail=1
}
fi fi
section "Python — ruff check" section "Python — ruff check"
@ -66,7 +72,10 @@ if [ "${#SH_FILES[@]}" -gt 0 ]; then
if [ "$FIX" = 1 ]; then if [ "$FIX" = 1 ]; then
shfmt "${SHFMT_FLAGS[@]}" -w "${SH_FILES[@]}" || fail=1 shfmt "${SHFMT_FLAGS[@]}" -w "${SH_FILES[@]}" || fail=1
else else
shfmt "${SHFMT_FLAGS[@]}" -d "${SH_FILES[@]}" || { note "run: scripts/lint.sh --fix"; fail=1; } shfmt "${SHFMT_FLAGS[@]}" -d "${SH_FILES[@]}" || {
note "run: scripts/lint.sh --fix"
fail=1
}
fi fi
section "Shell — shellcheck" section "Shell — shellcheck"

View File

@ -4,6 +4,7 @@ A run is parameterized by env: RECIPE, REF (PR head sha), PR, SRC (head repo). T
computes a unique app domain per run so concurrent runs never collide, and GUARANTEES teardown computes a unique app domain per run so concurrent runs never collide, and GUARANTEES teardown
(undeploy + volume + secret removal) via a finalizer, even on failure. (undeploy + volume + secret removal) via a finalizer, even on failure.
""" """
from __future__ import annotations from __future__ import annotations
import os import os
@ -24,8 +25,12 @@ def _recipe_meta(recipe: str) -> dict:
A recipe may ship tests/<recipe>/recipe_meta.py with any of: HEALTH_PATH (str), A recipe may ship tests/<recipe>/recipe_meta.py with any of: HEALTH_PATH (str),
HEALTH_OK (tuple of status codes), DEPLOY_TIMEOUT (int), HTTP_TIMEOUT (int).""" HEALTH_OK (tuple of status codes), DEPLOY_TIMEOUT (int), HTTP_TIMEOUT (int)."""
path = os.path.join(os.path.dirname(__file__), recipe, "recipe_meta.py") path = os.path.join(os.path.dirname(__file__), recipe, "recipe_meta.py")
meta = {"HEALTH_PATH": "/", "HEALTH_OK": (200, 301, 302), meta = {
"DEPLOY_TIMEOUT": 600, "HTTP_TIMEOUT": 300} "HEALTH_PATH": "/",
"HEALTH_OK": (200, 301, 302),
"DEPLOY_TIMEOUT": 600,
"HTTP_TIMEOUT": 300,
}
if os.path.exists(path): if os.path.exists(path):
ns: dict = {} ns: dict = {}
with open(path) as fh: with open(path) as fh:
@ -57,8 +62,13 @@ def meta(recipe) -> dict:
def _wait_healthy(domain, meta): def _wait_healthy(domain, meta):
lifecycle.wait_healthy(domain, ok_codes=tuple(meta["HEALTH_OK"]), path=meta["HEALTH_PATH"], lifecycle.wait_healthy(
deploy_timeout=meta["DEPLOY_TIMEOUT"], http_timeout=meta["HTTP_TIMEOUT"]) domain,
ok_codes=tuple(meta["HEALTH_OK"]),
path=meta["HEALTH_PATH"],
deploy_timeout=meta["DEPLOY_TIMEOUT"],
http_timeout=meta["HTTP_TIMEOUT"],
)
@pytest.fixture @pytest.fixture

View File

@ -3,6 +3,7 @@ backup, mutate, restore, assert the restored state matches the pre-mutation (bac
The cryptpad `app` service is labelled `backupbot.backup=true`, so its volumes (incl. cryptpad_data) The cryptpad `app` service is labelled `backupbot.backup=true`, so its volumes (incl. cryptpad_data)
are backed up. Marker is checked via `exec_in_app` (data isn't HTTP-served).""" are backed up. Marker is checked via `exec_in_app` (data isn't HTTP-served)."""
import os import os
import sys import sys
@ -26,7 +27,13 @@ def test_backup_mutate_restore(deployed, meta):
# 3) restore -> state returns to the backed-up "original" # 3) restore -> state returns to the backed-up "original"
lifecycle.restore_app(domain) lifecycle.restore_app(domain)
lifecycle.wait_healthy(domain, ok_codes=tuple(meta["HEALTH_OK"]), path=meta["HEALTH_PATH"], lifecycle.wait_healthy(
deploy_timeout=meta["DEPLOY_TIMEOUT"], http_timeout=meta["HTTP_TIMEOUT"]) domain,
assert lifecycle.exec_in_app(domain, ["cat", MARKER]).strip() == "original", \ ok_codes=tuple(meta["HEALTH_OK"]),
"restore did not return the pre-mutation state" path=meta["HEALTH_PATH"],
deploy_timeout=meta["DEPLOY_TIMEOUT"],
http_timeout=meta["HTTP_TIMEOUT"],
)
assert (
lifecycle.exec_in_app(domain, ["cat", MARKER]).strip() == "original"
), "restore did not return the pre-mutation state"

View File

@ -1,4 +1,5 @@
"""cryptpad — install stage (recipe #3, stateful/no-DB). D2 install + D3 Playwright.""" """cryptpad — install stage (recipe #3, stateful/no-DB). D2 install + D3 Playwright."""
import os import os
import sys import sys
@ -23,7 +24,10 @@ def test_playwright_loads_cryptpad(deployed_app):
ctx = browser.new_context(ignore_https_errors=True) ctx = browser.new_context(ignore_https_errors=True)
page = ctx.new_page() page = ctx.new_page()
resp = page.goto(url, wait_until="load", timeout=60000) resp = page.goto(url, wait_until="load", timeout=60000)
assert resp is not None and resp.status in (200, 304), f"page status {resp and resp.status}" assert resp is not None and resp.status in (
200,
304,
), f"page status {resp and resp.status}"
body = page.content().lower() body = page.content().lower()
assert "cryptpad" in body or "<html" in body, "no cryptpad content served" assert "cryptpad" in body or "<html" in body, "no cryptpad content served"
finally: finally:

View File

@ -3,6 +3,7 @@ persistent volume, upgrade to current/$REF, assert the app stays healthy and the
cryptpad data isn't HTTP-served as a static file (it's an encrypted datastore), so the marker is cryptpad data isn't HTTP-served as a static file (it's an encrypted datastore), so the marker is
written into the cryptpad_data volume and read back via `exec_in_app` (docker exec), not HTTP.""" written into the cryptpad_data volume and read back via `exec_in_app` (docker exec), not HTTP."""
import os import os
import sys import sys
@ -22,8 +23,13 @@ def old_app(recipe, app_domain, meta, request):
lifecycle.janitor() lifecycle.janitor()
request.addfinalizer(lambda: lifecycle.teardown_app(app_domain)) request.addfinalizer(lambda: lifecycle.teardown_app(app_domain))
lifecycle.deploy_app(recipe, app_domain, version=prev) lifecycle.deploy_app(recipe, app_domain, version=prev)
lifecycle.wait_healthy(app_domain, ok_codes=tuple(meta["HEALTH_OK"]), path=meta["HEALTH_PATH"], lifecycle.wait_healthy(
deploy_timeout=meta["DEPLOY_TIMEOUT"], http_timeout=meta["HTTP_TIMEOUT"]) app_domain,
ok_codes=tuple(meta["HEALTH_OK"]),
path=meta["HEALTH_PATH"],
deploy_timeout=meta["DEPLOY_TIMEOUT"],
http_timeout=meta["HTTP_TIMEOUT"],
)
return app_domain, prev return app_domain, prev
@ -35,10 +41,16 @@ def test_upgrade_preserves_data(old_app, meta):
# upgrade previous -> current/$REF # upgrade previous -> current/$REF
lifecycle.upgrade_app(domain, version=os.environ.get("VERSION") or None) lifecycle.upgrade_app(domain, version=os.environ.get("VERSION") or None)
lifecycle.wait_healthy(domain, ok_codes=tuple(meta["HEALTH_OK"]), path=meta["HEALTH_PATH"], lifecycle.wait_healthy(
deploy_timeout=meta["DEPLOY_TIMEOUT"], http_timeout=meta["HTTP_TIMEOUT"]) domain,
ok_codes=tuple(meta["HEALTH_OK"]),
path=meta["HEALTH_PATH"],
deploy_timeout=meta["DEPLOY_TIMEOUT"],
http_timeout=meta["HTTP_TIMEOUT"],
)
# app healthy and the data written before the upgrade is still there # app healthy and the data written before the upgrade is still there
assert lifecycle.http_get(domain, "/") in (200, 301, 302) assert lifecycle.http_get(domain, "/") in (200, 301, 302)
assert lifecycle.exec_in_app(domain, ["cat", MARKER]).strip() == "upgrade-survives", \ assert (
"data did not survive the upgrade" lifecycle.exec_in_app(domain, ["cat", MARKER]).strip() == "upgrade-survives"
), "data did not survive the upgrade"

View File

@ -1,5 +1,6 @@
"""custom-html — backup/restore stage (D2): backup, mutate state, restore, assert the restored """custom-html — backup/restore stage (D2): backup, mutate state, restore, assert the restored
state matches the pre-mutation (backed-up) state.""" state matches the pre-mutation (backed-up) state."""
import os import os
import sys import sys
@ -24,5 +25,6 @@ def test_backup_mutate_restore(deployed):
# 3) restore -> state returns to the backed-up "original" # 3) restore -> state returns to the backed-up "original"
lifecycle.restore_app(domain) lifecycle.restore_app(domain)
lifecycle.wait_healthy(domain) lifecycle.wait_healthy(domain)
assert lifecycle.http_body(domain, "/ci-marker.txt").strip() == "original", \ assert (
"restore did not return the pre-mutation state" lifecycle.http_body(domain, "/ci-marker.txt").strip() == "original"
), "restore did not return the pre-mutation state"

View File

@ -1,6 +1,7 @@
"""custom-html — install stage (recipe #1, simple/stateless). D2 install + D3 Playwright.""" """custom-html — install stage (recipe #1, simple/stateless). D2 install + D3 Playwright."""
import sys
import os import os
import sys
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner")) sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner"))
from harness import lifecycle # noqa: E402 from harness import lifecycle # noqa: E402

View File

@ -1,5 +1,6 @@
"""custom-html — upgrade stage (D2): deploy the previous published version, write data, upgrade """custom-html — upgrade stage (D2): deploy the previous published version, write data, upgrade
to the current/$REF version, and assert the app stays healthy and data survives.""" to the current/$REF version, and assert the app stays healthy and data survives."""
import os import os
import sys import sys
@ -35,5 +36,6 @@ def test_upgrade_preserves_data(old_app):
# app healthy and the data written before the upgrade is still there # app healthy and the data written before the upgrade is still there
assert lifecycle.http_get(domain, "/") == 200 assert lifecycle.http_get(domain, "/") == 200
assert lifecycle.http_body(domain, "/ci-marker.txt").strip() == "upgrade-survives", \ assert (
"data did not survive the upgrade" lifecycle.http_body(domain, "/ci-marker.txt").strip() == "upgrade-survives"
), "data did not survive the upgrade"

View File

@ -1,5 +1,6 @@
"""Recipe-specific keycloak admin-API helpers (not harness). Used by the upgrade/backup stages to """Recipe-specific keycloak admin-API helpers (not harness). Used by the upgrade/backup stages to
write a real data marker (a realm) into mariadb and verify it survives upgrade / backup-restore.""" write a real data marker (a realm) into mariadb and verify it survives upgrade / backup-restore."""
import json import json
import ssl import ssl
import sys import sys
@ -21,12 +22,20 @@ def admin_password(domain: str) -> str:
def admin_token(domain: str, password: str, user: str = "admin") -> str: def admin_token(domain: str, password: str, user: str = "admin") -> str:
data = urllib.parse.urlencode({ data = urllib.parse.urlencode(
"grant_type": "password", "client_id": "admin-cli", "username": user, "password": password, {
}).encode() "grant_type": "password",
"client_id": "admin-cli",
"username": user,
"password": password,
}
).encode()
req = urllib.request.Request( req = urllib.request.Request(
f"https://{domain}/realms/master/protocol/openid-connect/token", data=data, f"https://{domain}/realms/master/protocol/openid-connect/token",
headers={"Content-Type": "application/x-www-form-urlencoded"}, method="POST") data=data,
headers={"Content-Type": "application/x-www-form-urlencoded"},
method="POST",
)
with urllib.request.urlopen(req, timeout=30, context=_CTX) as r: with urllib.request.urlopen(req, timeout=30, context=_CTX) as r:
return json.load(r)["access_token"] return json.load(r)["access_token"]
@ -36,8 +45,9 @@ def _admin(domain, token, path, method="GET", body=None):
headers = {"Authorization": "Bearer " + token} headers = {"Authorization": "Bearer " + token}
if data: if data:
headers["Content-Type"] = "application/json" headers["Content-Type"] = "application/json"
req = urllib.request.Request(f"https://{domain}/admin{path}", data=data, headers=headers, req = urllib.request.Request(
method=method) f"https://{domain}/admin{path}", data=data, headers=headers, method=method
)
try: try:
with urllib.request.urlopen(req, timeout=30, context=_CTX) as r: with urllib.request.urlopen(req, timeout=30, context=_CTX) as r:
return r.status return r.status

View File

@ -1,6 +1,6 @@
# Per-recipe harness config for keycloak (DB-backed: keycloak + mariadb). Read by the shared # Per-recipe harness config for keycloak (DB-backed: keycloak + mariadb). Read by the shared
# conftest — enrolling this recipe needs NO change to runner/harness code (D5). # conftest — enrolling this recipe needs NO change to runner/harness code (D5).
HEALTH_PATH = "/realms/master" # 200 JSON once keycloak is up (not "/", which redirects) HEALTH_PATH = "/realms/master" # 200 JSON once keycloak is up (not "/", which redirects)
HEALTH_OK = (200,) HEALTH_OK = (200,)
DEPLOY_TIMEOUT = 600 # JVM + DB migration are slow on a 2-vCPU VM DEPLOY_TIMEOUT = 600 # JVM + DB migration are slow on a 2-vCPU VM
HTTP_TIMEOUT = 600 HTTP_TIMEOUT = 600

View File

@ -1,11 +1,12 @@
"""keycloak — backup/restore stage (D2): create a realm, backup, delete it (mutate), restore, """keycloak — backup/restore stage (D2): create a realm, backup, delete it (mutate), restore,
assert the realm is back (mariadb restored to the backed-up state).""" assert the realm is back (mariadb restored to the backed-up state)."""
import os import os
import sys import sys
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner")) sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner"))
from harness import lifecycle # noqa: E402
import kc_admin # noqa: E402 import kc_admin # noqa: E402
from harness import lifecycle # noqa: E402
def test_backup_mutate_restore(deployed): def test_backup_mutate_restore(deployed):
@ -24,7 +25,8 @@ def test_backup_mutate_restore(deployed):
# 3) restore -> realm returns # 3) restore -> realm returns
lifecycle.restore_app(domain) lifecycle.restore_app(domain)
lifecycle.wait_healthy(domain, path="/realms/master", ok_codes=(200,), lifecycle.wait_healthy(
deploy_timeout=600, http_timeout=600) domain, path="/realms/master", ok_codes=(200,), deploy_timeout=600, http_timeout=600
)
tok2 = kc_admin.admin_token(domain, pw) tok2 = kc_admin.admin_token(domain, pw)
assert kc_admin.marker_realm_exists(domain, tok2), "restore did not bring back the realm" assert kc_admin.marker_realm_exists(domain, tok2), "restore did not bring back the realm"

View File

@ -1,4 +1,5 @@
"""keycloak — install stage (recipe #2, DB-backed SSO; D2 install + D3 Playwright).""" """keycloak — install stage (recipe #2, DB-backed SSO; D2 install + D3 Playwright)."""
import os import os
import sys import sys
@ -23,6 +24,8 @@ def test_playwright_admin_login(deployed_app):
page.goto(url, wait_until="domcontentloaded", timeout=45000) page.goto(url, wait_until="domcontentloaded", timeout=45000)
# admin console redirects to the login form; wait for a username field to render # admin console redirects to the login form; wait for a username field to render
page.wait_for_selector("input#username, input[name='username']", timeout=30000) page.wait_for_selector("input#username, input[name='username']", timeout=30000)
assert "keycloak" in page.content().lower() or page.locator("input#username").count() > 0 assert (
"keycloak" in page.content().lower() or page.locator("input#username").count() > 0
)
finally: finally:
browser.close() browser.close()

View File

@ -1,13 +1,14 @@
"""keycloak — upgrade stage (D2): deploy previous version, create a realm (DB data), upgrade to """keycloak — upgrade stage (D2): deploy previous version, create a realm (DB data), upgrade to
current/$REF, assert the app is healthy and the realm survived (mariadb data preserved).""" current/$REF, assert the app is healthy and the realm survived (mariadb data preserved)."""
import os import os
import sys import sys
import pytest import pytest
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner")) sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner"))
from harness import lifecycle # noqa: E402
import kc_admin # noqa: E402 import kc_admin # noqa: E402
from harness import lifecycle # noqa: E402
@pytest.fixture @pytest.fixture
@ -18,8 +19,13 @@ def old_app(recipe, app_domain, meta, request):
lifecycle.janitor() lifecycle.janitor()
request.addfinalizer(lambda: lifecycle.teardown_app(app_domain)) request.addfinalizer(lambda: lifecycle.teardown_app(app_domain))
lifecycle.deploy_app(recipe, app_domain, version=prev) lifecycle.deploy_app(recipe, app_domain, version=prev)
lifecycle.wait_healthy(app_domain, ok_codes=tuple(meta["HEALTH_OK"]), path=meta["HEALTH_PATH"], lifecycle.wait_healthy(
deploy_timeout=meta["DEPLOY_TIMEOUT"], http_timeout=meta["HTTP_TIMEOUT"]) app_domain,
ok_codes=tuple(meta["HEALTH_OK"]),
path=meta["HEALTH_PATH"],
deploy_timeout=meta["DEPLOY_TIMEOUT"],
http_timeout=meta["HTTP_TIMEOUT"],
)
return app_domain, prev return app_domain, prev
@ -31,8 +37,13 @@ def test_upgrade_preserves_realm(old_app, meta):
assert kc_admin.marker_realm_exists(domain, tok), "marker realm not created" assert kc_admin.marker_realm_exists(domain, tok), "marker realm not created"
lifecycle.upgrade_app(domain, version=os.environ.get("VERSION") or None) lifecycle.upgrade_app(domain, version=os.environ.get("VERSION") or None)
lifecycle.wait_healthy(domain, ok_codes=tuple(meta["HEALTH_OK"]), path=meta["HEALTH_PATH"], lifecycle.wait_healthy(
deploy_timeout=meta["DEPLOY_TIMEOUT"], http_timeout=meta["HTTP_TIMEOUT"]) domain,
ok_codes=tuple(meta["HEALTH_OK"]),
path=meta["HEALTH_PATH"],
deploy_timeout=meta["DEPLOY_TIMEOUT"],
http_timeout=meta["HTTP_TIMEOUT"],
)
# re-auth (token from the old instance is fine, but get a fresh one post-upgrade) and verify # re-auth (token from the old instance is fine, but get a fresh one post-upgrade) and verify
tok2 = kc_admin.admin_token(domain, pw) tok2 = kc_admin.admin_token(domain, pw)

View File

@ -3,6 +3,7 @@ dumps the DB), mutate (drop it), restore (post-hook reloads), assert the restore
Exercises the recipe's real DB-dump backup hook (postgres + minio are both backupbot-labelled); the Exercises the recipe's real DB-dump backup hook (postgres + minio are both backupbot-labelled); the
postgres marker is the meaningful Docs-metadata data path.""" postgres marker is the meaningful Docs-metadata data path."""
import os import os
import sys import sys
@ -18,16 +19,28 @@ def _psql(domain, sql):
def test_backup_mutate_restore(deployed, meta): def test_backup_mutate_restore(deployed, meta):
domain = deployed domain = deployed
_psql(domain, "CREATE TABLE IF NOT EXISTS ci_marker(v text); DELETE FROM ci_marker; " _psql(
"INSERT INTO ci_marker VALUES('original');") domain,
"CREATE TABLE IF NOT EXISTS ci_marker(v text); DELETE FROM ci_marker; "
"INSERT INTO ci_marker VALUES('original');",
)
assert _psql(domain, "SELECT v FROM ci_marker;") == "original" assert _psql(domain, "SELECT v FROM ci_marker;") == "original"
lifecycle.backup_app(domain) lifecycle.backup_app(domain)
_psql(domain, "DROP TABLE ci_marker;") _psql(domain, "DROP TABLE ci_marker;")
assert _psql(domain, "SELECT to_regclass('public.ci_marker');") in ("", "NULL"), "drop did not take" assert _psql(domain, "SELECT to_regclass('public.ci_marker');") in (
"",
"NULL",
), "drop did not take"
lifecycle.restore_app(domain) lifecycle.restore_app(domain)
lifecycle.wait_healthy(domain, ok_codes=tuple(meta["HEALTH_OK"]), path=meta["HEALTH_PATH"], lifecycle.wait_healthy(
deploy_timeout=meta["DEPLOY_TIMEOUT"], http_timeout=meta["HTTP_TIMEOUT"]) domain,
assert _psql(domain, "SELECT v FROM ci_marker;") == "original", \ ok_codes=tuple(meta["HEALTH_OK"]),
"restore did not return the pre-mutation postgres state" path=meta["HEALTH_PATH"],
deploy_timeout=meta["DEPLOY_TIMEOUT"],
http_timeout=meta["HTTP_TIMEOUT"],
)
assert (
_psql(domain, "SELECT v FROM ci_marker;") == "original"
), "restore did not return the pre-mutation postgres state"

View File

@ -4,6 +4,7 @@ minio + nginx) converges and serves the app over real HTTPS through the gateway.
Login is OIDC-gated (no live OIDC provider in CI), so the functional assertion is that the frontend Login is OIDC-gated (no live OIDC provider in CI), so the functional assertion is that the frontend
SPA is served (unauthenticated landing), not an authenticated flow.""" SPA is served (unauthenticated landing), not an authenticated flow."""
import os import os
import sys import sys
@ -27,7 +28,11 @@ def test_playwright_loads_frontend(deployed_app):
ctx = browser.new_context(ignore_https_errors=True) ctx = browser.new_context(ignore_https_errors=True)
page = ctx.new_page() page = ctx.new_page()
resp = page.goto(url, wait_until="domcontentloaded", timeout=60000) resp = page.goto(url, wait_until="domcontentloaded", timeout=60000)
assert resp is not None and resp.status in (200, 301, 302), f"page status {resp and resp.status}" assert resp is not None and resp.status in (
200,
301,
302,
), f"page status {resp and resp.status}"
assert "<html" in page.content().lower(), "no HTML served by the frontend" assert "<html" in page.content().lower(), "no HTML served by the frontend"
finally: finally:
browser.close() browser.close()

View File

@ -3,6 +3,7 @@ upgrade to current/$REF, assert the app stays healthy and the postgres data surv
Docs metadata lives in postgres, so the marker is a row in a dedicated `ci_marker` table (the app's Docs metadata lives in postgres, so the marker is a row in a dedicated `ci_marker` table (the app's
own Django migrations don't touch it), read back via `psql` in the `db` service.""" own Django migrations don't touch it), read back via `psql` in the `db` service."""
import os import os
import sys import sys
@ -25,21 +26,35 @@ def old_app(recipe, app_domain, meta, request):
lifecycle.janitor() lifecycle.janitor()
request.addfinalizer(lambda: lifecycle.teardown_app(app_domain)) request.addfinalizer(lambda: lifecycle.teardown_app(app_domain))
lifecycle.deploy_app(recipe, app_domain, version=prev) lifecycle.deploy_app(recipe, app_domain, version=prev)
lifecycle.wait_healthy(app_domain, ok_codes=tuple(meta["HEALTH_OK"]), path=meta["HEALTH_PATH"], lifecycle.wait_healthy(
deploy_timeout=meta["DEPLOY_TIMEOUT"], http_timeout=meta["HTTP_TIMEOUT"]) app_domain,
ok_codes=tuple(meta["HEALTH_OK"]),
path=meta["HEALTH_PATH"],
deploy_timeout=meta["DEPLOY_TIMEOUT"],
http_timeout=meta["HTTP_TIMEOUT"],
)
return app_domain, prev return app_domain, prev
def test_upgrade_preserves_data(old_app, meta): def test_upgrade_preserves_data(old_app, meta):
domain, prev = old_app domain, prev = old_app
_psql(domain, "CREATE TABLE IF NOT EXISTS ci_marker(v text); DELETE FROM ci_marker; " _psql(
"INSERT INTO ci_marker VALUES('upgrade-survives');") domain,
"CREATE TABLE IF NOT EXISTS ci_marker(v text); DELETE FROM ci_marker; "
"INSERT INTO ci_marker VALUES('upgrade-survives');",
)
assert _psql(domain, "SELECT v FROM ci_marker;") == "upgrade-survives" assert _psql(domain, "SELECT v FROM ci_marker;") == "upgrade-survives"
lifecycle.upgrade_app(domain, version=os.environ.get("VERSION") or None) lifecycle.upgrade_app(domain, version=os.environ.get("VERSION") or None)
lifecycle.wait_healthy(domain, ok_codes=tuple(meta["HEALTH_OK"]), path=meta["HEALTH_PATH"], lifecycle.wait_healthy(
deploy_timeout=meta["DEPLOY_TIMEOUT"], http_timeout=meta["HTTP_TIMEOUT"]) domain,
ok_codes=tuple(meta["HEALTH_OK"]),
path=meta["HEALTH_PATH"],
deploy_timeout=meta["DEPLOY_TIMEOUT"],
http_timeout=meta["HTTP_TIMEOUT"],
)
assert lifecycle.http_get(domain, "/") in (200, 301, 302) assert lifecycle.http_get(domain, "/") in (200, 301, 302)
assert _psql(domain, "SELECT v FROM ci_marker;") == "upgrade-survives", \ assert (
"postgres data did not survive the upgrade" _psql(domain, "SELECT v FROM ci_marker;") == "upgrade-survives"
), "postgres data did not survive the upgrade"

View File

@ -1,7 +1,7 @@
# Per-recipe harness config for matrix-synapse (recipe #4 — DB + media store; the large-volume / # Per-recipe harness config for matrix-synapse (recipe #4 — DB + media store; the large-volume /
# DB-backed category). Base recipe = synapse `app` + postgres `db` + nginx `web`. server_name is # DB-backed category). Base recipe = synapse `app` + postgres `db` + nginx `web`. server_name is
# DOMAIN (set by abra), so no EXTRA_ENV needed. Synapse + postgres startup is slow -> long timeouts. # DOMAIN (set by abra), so no EXTRA_ENV needed. Synapse + postgres startup is slow -> long timeouts.
HEALTH_PATH = "/_matrix/client/versions" # 200 JSON once synapse is serving the client API HEALTH_PATH = "/_matrix/client/versions" # 200 JSON once synapse is serving the client API
HEALTH_OK = (200,) HEALTH_OK = (200,)
DEPLOY_TIMEOUT = 600 DEPLOY_TIMEOUT = 600
HTTP_TIMEOUT = 600 HTTP_TIMEOUT = 600

View File

@ -4,6 +4,7 @@ reloads the dump), assert the restored DB matches the pre-mutation state.
This exercises the real DB-dump backup hook (backupbot.backup.pre-hook / restore.post-hook), not a This exercises the real DB-dump backup hook (backupbot.backup.pre-hook / restore.post-hook), not a
plain volume copy — the meaningful data path for a postgres-backed app.""" plain volume copy — the meaningful data path for a postgres-backed app."""
import os import os
import sys import sys
@ -20,18 +21,30 @@ def test_backup_mutate_restore(deployed, meta):
domain = deployed domain = deployed
# 1) establish original state in postgres, then back up (pg_backup.sh dumps the DB) # 1) establish original state in postgres, then back up (pg_backup.sh dumps the DB)
_psql(domain, "CREATE TABLE IF NOT EXISTS ci_marker(v text); DELETE FROM ci_marker; " _psql(
"INSERT INTO ci_marker VALUES('original');") domain,
"CREATE TABLE IF NOT EXISTS ci_marker(v text); DELETE FROM ci_marker; "
"INSERT INTO ci_marker VALUES('original');",
)
assert _psql(domain, "SELECT v FROM ci_marker;") == "original" assert _psql(domain, "SELECT v FROM ci_marker;") == "original"
lifecycle.backup_app(domain) lifecycle.backup_app(domain)
# 2) mutate: drop the marker table (diverge from the backup) # 2) mutate: drop the marker table (diverge from the backup)
_psql(domain, "DROP TABLE ci_marker;") _psql(domain, "DROP TABLE ci_marker;")
assert _psql(domain, "SELECT to_regclass('public.ci_marker');") in ("", "NULL"), "drop did not take" assert _psql(domain, "SELECT to_regclass('public.ci_marker');") in (
"",
"NULL",
), "drop did not take"
# 3) restore -> the dumped DB (with the marker) is reloaded # 3) restore -> the dumped DB (with the marker) is reloaded
lifecycle.restore_app(domain) lifecycle.restore_app(domain)
lifecycle.wait_healthy(domain, ok_codes=tuple(meta["HEALTH_OK"]), path=meta["HEALTH_PATH"], lifecycle.wait_healthy(
deploy_timeout=meta["DEPLOY_TIMEOUT"], http_timeout=meta["HTTP_TIMEOUT"]) domain,
assert _psql(domain, "SELECT v FROM ci_marker;") == "original", \ ok_codes=tuple(meta["HEALTH_OK"]),
"restore did not return the pre-mutation postgres state" path=meta["HEALTH_PATH"],
deploy_timeout=meta["DEPLOY_TIMEOUT"],
http_timeout=meta["HTTP_TIMEOUT"],
)
assert (
_psql(domain, "SELECT v FROM ci_marker;") == "original"
), "restore did not return the pre-mutation postgres state"

View File

@ -1,6 +1,7 @@
"""matrix-synapse — install stage (recipe #4, DB + media store). D2 install: the synapse client API """matrix-synapse — install stage (recipe #4, DB + media store). D2 install: the synapse client API
answers 200 over real HTTPS through the gateway (nginx -> synapse). The base recipe has no browser answers 200 over real HTTPS through the gateway (nginx -> synapse). The base recipe has no browser
UI (element-web is an addon), so the functional assertion is the JSON client API, not Playwright.""" UI (element-web is an addon), so the functional assertion is the JSON client API, not Playwright."""
import json import json
import os import os
import sys import sys
@ -18,4 +19,6 @@ def test_client_api_advertises_versions(deployed_app):
"""The client-API version document is real synapse JSON (proves the app, not just a proxy 200).""" """The client-API version document is real synapse JSON (proves the app, not just a proxy 200)."""
body = lifecycle.http_body(deployed_app, "/_matrix/client/versions") body = lifecycle.http_body(deployed_app, "/_matrix/client/versions")
doc = json.loads(body) doc = json.loads(body)
assert isinstance(doc.get("versions"), list) and doc["versions"], "no matrix client versions advertised" assert (
isinstance(doc.get("versions"), list) and doc["versions"]
), "no matrix client versions advertised"

View File

@ -3,6 +3,7 @@ upgrade to current/$REF, assert the app stays healthy and the postgres data surv
Matrix data lives in postgres, so the marker is a row in a dedicated `ci_marker` table (synapse's Matrix data lives in postgres, so the marker is a row in a dedicated `ci_marker` table (synapse's
own schema migrations don't touch it), read back via `psql` in the `db` service.""" own schema migrations don't touch it), read back via `psql` in the `db` service."""
import os import os
import sys import sys
@ -25,24 +26,38 @@ def old_app(recipe, app_domain, meta, request):
lifecycle.janitor() lifecycle.janitor()
request.addfinalizer(lambda: lifecycle.teardown_app(app_domain)) request.addfinalizer(lambda: lifecycle.teardown_app(app_domain))
lifecycle.deploy_app(recipe, app_domain, version=prev) lifecycle.deploy_app(recipe, app_domain, version=prev)
lifecycle.wait_healthy(app_domain, ok_codes=tuple(meta["HEALTH_OK"]), path=meta["HEALTH_PATH"], lifecycle.wait_healthy(
deploy_timeout=meta["DEPLOY_TIMEOUT"], http_timeout=meta["HTTP_TIMEOUT"]) app_domain,
ok_codes=tuple(meta["HEALTH_OK"]),
path=meta["HEALTH_PATH"],
deploy_timeout=meta["DEPLOY_TIMEOUT"],
http_timeout=meta["HTTP_TIMEOUT"],
)
return app_domain, prev return app_domain, prev
def test_upgrade_preserves_data(old_app, meta): def test_upgrade_preserves_data(old_app, meta):
domain, prev = old_app domain, prev = old_app
# write a marker row into postgres (independent of synapse's own tables) # write a marker row into postgres (independent of synapse's own tables)
_psql(domain, "CREATE TABLE IF NOT EXISTS ci_marker(v text); DELETE FROM ci_marker; " _psql(
"INSERT INTO ci_marker VALUES('upgrade-survives');") domain,
"CREATE TABLE IF NOT EXISTS ci_marker(v text); DELETE FROM ci_marker; "
"INSERT INTO ci_marker VALUES('upgrade-survives');",
)
assert _psql(domain, "SELECT v FROM ci_marker;") == "upgrade-survives" assert _psql(domain, "SELECT v FROM ci_marker;") == "upgrade-survives"
# upgrade previous -> current/$REF # upgrade previous -> current/$REF
lifecycle.upgrade_app(domain, version=os.environ.get("VERSION") or None) lifecycle.upgrade_app(domain, version=os.environ.get("VERSION") or None)
lifecycle.wait_healthy(domain, ok_codes=tuple(meta["HEALTH_OK"]), path=meta["HEALTH_PATH"], lifecycle.wait_healthy(
deploy_timeout=meta["DEPLOY_TIMEOUT"], http_timeout=meta["HTTP_TIMEOUT"]) domain,
ok_codes=tuple(meta["HEALTH_OK"]),
path=meta["HEALTH_PATH"],
deploy_timeout=meta["DEPLOY_TIMEOUT"],
http_timeout=meta["HTTP_TIMEOUT"],
)
# app healthy and the data written before the upgrade is still there # app healthy and the data written before the upgrade is still there
assert lifecycle.http_get(domain, meta["HEALTH_PATH"]) == 200 assert lifecycle.http_get(domain, meta["HEALTH_PATH"]) == 200
assert _psql(domain, "SELECT v FROM ci_marker;") == "upgrade-survives", \ assert (
"postgres data did not survive the upgrade" _psql(domain, "SELECT v FROM ci_marker;") == "upgrade-survives"
), "postgres data did not survive the upgrade"

View File

@ -3,6 +3,7 @@ mutate, restore, assert the restored state matches the pre-mutation state.
The n8n `app` service is labelled `backupbot.backup=true` with `backupbot.backup.path=/home/node/.n8n`, The n8n `app` service is labelled `backupbot.backup=true` with `backupbot.backup.path=/home/node/.n8n`,
so a marker file there is backed up; checked via `exec_in_app`.""" so a marker file there is backed up; checked via `exec_in_app`."""
import os import os
import sys import sys
@ -23,7 +24,13 @@ def test_backup_mutate_restore(deployed, meta):
assert lifecycle.exec_in_app(domain, ["cat", MARKER]).strip() == "mutated" assert lifecycle.exec_in_app(domain, ["cat", MARKER]).strip() == "mutated"
lifecycle.restore_app(domain) lifecycle.restore_app(domain)
lifecycle.wait_healthy(domain, ok_codes=tuple(meta["HEALTH_OK"]), path=meta["HEALTH_PATH"], lifecycle.wait_healthy(
deploy_timeout=meta["DEPLOY_TIMEOUT"], http_timeout=meta["HTTP_TIMEOUT"]) domain,
assert lifecycle.exec_in_app(domain, ["cat", MARKER]).strip() == "original", \ ok_codes=tuple(meta["HEALTH_OK"]),
"restore did not return the pre-mutation state" path=meta["HEALTH_PATH"],
deploy_timeout=meta["DEPLOY_TIMEOUT"],
http_timeout=meta["HTTP_TIMEOUT"],
)
assert (
lifecycle.exec_in_app(domain, ["cat", MARKER]).strip() == "original"
), "restore did not return the pre-mutation state"

View File

@ -1,4 +1,5 @@
"""n8n — install stage (recipe #6, workflow automation). D2 install + D3 Playwright.""" """n8n — install stage (recipe #6, workflow automation). D2 install + D3 Playwright."""
import os import os
import sys import sys
@ -22,7 +23,10 @@ def test_playwright_loads_editor(deployed_app):
ctx = browser.new_context(ignore_https_errors=True) ctx = browser.new_context(ignore_https_errors=True)
page = ctx.new_page() page = ctx.new_page()
resp = page.goto(url, wait_until="domcontentloaded", timeout=60000) resp = page.goto(url, wait_until="domcontentloaded", timeout=60000)
assert resp is not None and resp.status in (200, 304), f"page status {resp and resp.status}" assert resp is not None and resp.status in (
200,
304,
), f"page status {resp and resp.status}"
body = page.content().lower() body = page.content().lower()
assert "n8n" in body or "<html" in body, "no n8n content served" assert "n8n" in body or "<html" in body, "no n8n content served"
finally: finally:

View File

@ -3,6 +3,7 @@ persistent /home/node/.n8n volume, upgrade to current/$REF, assert health + data
n8n state lives in the .n8n volume (sqlite + config); the marker is a file there, read back via n8n state lives in the .n8n volume (sqlite + config); the marker is a file there, read back via
`exec_in_app` (not HTTP-served).""" `exec_in_app` (not HTTP-served)."""
import os import os
import sys import sys
@ -22,8 +23,13 @@ def old_app(recipe, app_domain, meta, request):
lifecycle.janitor() lifecycle.janitor()
request.addfinalizer(lambda: lifecycle.teardown_app(app_domain)) request.addfinalizer(lambda: lifecycle.teardown_app(app_domain))
lifecycle.deploy_app(recipe, app_domain, version=prev) lifecycle.deploy_app(recipe, app_domain, version=prev)
lifecycle.wait_healthy(app_domain, ok_codes=tuple(meta["HEALTH_OK"]), path=meta["HEALTH_PATH"], lifecycle.wait_healthy(
deploy_timeout=meta["DEPLOY_TIMEOUT"], http_timeout=meta["HTTP_TIMEOUT"]) app_domain,
ok_codes=tuple(meta["HEALTH_OK"]),
path=meta["HEALTH_PATH"],
deploy_timeout=meta["DEPLOY_TIMEOUT"],
http_timeout=meta["HTTP_TIMEOUT"],
)
return app_domain, prev return app_domain, prev
@ -33,9 +39,15 @@ def test_upgrade_preserves_data(old_app, meta):
assert lifecycle.exec_in_app(domain, ["cat", MARKER]).strip() == "upgrade-survives" assert lifecycle.exec_in_app(domain, ["cat", MARKER]).strip() == "upgrade-survives"
lifecycle.upgrade_app(domain, version=os.environ.get("VERSION") or None) lifecycle.upgrade_app(domain, version=os.environ.get("VERSION") or None)
lifecycle.wait_healthy(domain, ok_codes=tuple(meta["HEALTH_OK"]), path=meta["HEALTH_PATH"], lifecycle.wait_healthy(
deploy_timeout=meta["DEPLOY_TIMEOUT"], http_timeout=meta["HTTP_TIMEOUT"]) domain,
ok_codes=tuple(meta["HEALTH_OK"]),
path=meta["HEALTH_PATH"],
deploy_timeout=meta["DEPLOY_TIMEOUT"],
http_timeout=meta["HTTP_TIMEOUT"],
)
assert lifecycle.http_get(domain, meta["HEALTH_PATH"]) == 200 assert lifecycle.http_get(domain, meta["HEALTH_PATH"]) == 200
assert lifecycle.exec_in_app(domain, ["cat", MARKER]).strip() == "upgrade-survives", \ assert (
"data did not survive the upgrade" lifecycle.exec_in_app(domain, ["cat", MARKER]).strip() == "upgrade-survives"
), "data did not survive the upgrade"