feat(1e): HC3 additive generic + op/assertion split (orchestrator owns the op)
- orchestrator: per mutating tier, run optional pre-op seed hook (ops.py pre_<op>) → perform the op
ONCE (harness-owned) → run generic assertion (unless opted out) AND overlay assertion, both against
the shared post-op deployment. Op results passed op→assertion via run-scoped CCCI_OP_STATE_FILE.
- opt-out: CCCI_SKIP_GENERIC / CCCI_SKIP_GENERIC_<OP> / recipe_meta.SKIP_GENERIC (declarative).
- generic.py: split do_* into op primitives (perform_upgrade/backup/restore) + assertions
(assert_upgraded/backup_artifact/restore_healthy) reading op_state(); deployed_identity now returns
{version,image,chaos} (chaos label ready for HC1).
- generic test_<op>.py + all 6 recipe overlays migrated to assertion-only; pre-op seeding moved to
per-recipe ops.py (pre_upgrade/pre_backup/pre_restore). install overlays unchanged (no op).
- deploy-count stays 1 (op primitives never call deploy_app). lint PASS; 8 unit tests PASS on cc-ci.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@ -118,22 +118,49 @@ def assert_serving(domain: str, meta: dict) -> None:
|
||||
assert "commoninternet.net" in detail.lower(), f"{domain}: served cert unexpected — {detail}"
|
||||
|
||||
|
||||
def do_upgrade(domain: str, target: str | None, meta: dict) -> None:
|
||||
"""UPGRADE op (in place on the shared deployment): abra app upgrade -> target, then assert it
|
||||
reconverges + still serves AND that the deployment actually MOVED (version label and/or image
|
||||
changed). The move assertion guards against a vacuous no-op upgrade silently passing — the exact
|
||||
F1d-2 failure where a mis-pinned base deployed LATEST so 'upgrade to latest' changed nothing."""
|
||||
before = lifecycle.deployed_identity(domain)
|
||||
lifecycle.upgrade_app(domain, version=target)
|
||||
# ---- Op/assertion split (Phase 1e HC3) -------------------------------------------------------
|
||||
# The orchestrator performs each mutating op ONCE (the harness owns the op), records what an
|
||||
# assertion needs (pre-upgrade identity, backup snapshot_id) into a run-scoped JSON state file at
|
||||
# $CCCI_OP_STATE_FILE, then runs the generic assertion file (unless opted out) AND the overlay
|
||||
# assertion file against the shared post-op state. The assertion functions below read that state via
|
||||
# `op_state()`. They NEVER perform the op — that keeps the op single + lets generic+overlay coexist.
|
||||
|
||||
import json as _json # noqa: E402
|
||||
|
||||
|
||||
def op_state() -> dict:
|
||||
"""The run-scoped op state the orchestrator wrote between op and assertions (or {} if unset).
|
||||
Carries e.g. {"upgrade": {"before": {...}}, "backup": {"snapshot_id": "..."}}."""
|
||||
path = os.environ.get("CCCI_OP_STATE_FILE")
|
||||
if not path or not os.path.exists(path):
|
||||
return {}
|
||||
try:
|
||||
with open(path) as f:
|
||||
return _json.load(f)
|
||||
except (OSError, ValueError):
|
||||
return {}
|
||||
|
||||
|
||||
def assert_upgraded(domain: str, meta: dict) -> None:
|
||||
"""Generic UPGRADE assertion (post-op): the orchestrator already performed the upgrade once.
|
||||
Assert it reconverged + still serves AND that the deployment actually MOVED — guarding against a
|
||||
vacuous no-op upgrade silently passing (F1d-2). HC1: prev→PR-head may NOT bump the version label,
|
||||
so a MOVE is ANY of: version-label change, image change, or a chaos label now present (a chaos
|
||||
deploy stamps the PR-head commit — THE proof the code under test was deployed)."""
|
||||
before = op_state().get("upgrade", {}).get("before") or {}
|
||||
assert_serving(domain, meta)
|
||||
after = lifecycle.deployed_identity(domain)
|
||||
moved = (before[0] and after[0] and before[0] != after[0]) or (
|
||||
before[1] and after[1] and before[1] != after[1]
|
||||
moved = (
|
||||
(before.get("version") and after.get("version") and before["version"] != after["version"])
|
||||
or (before.get("image") and after.get("image") and before["image"] != after["image"])
|
||||
or (after.get("chaos") and after.get("chaos") != before.get("chaos"))
|
||||
)
|
||||
assert moved, (
|
||||
f"{domain}: upgrade did not move the deployment "
|
||||
f"(version {before[0]}->{after[0]}, image {before[1]}->{after[1]}) — "
|
||||
"not a real previous->target upgrade (DG2 must be non-vacuous)"
|
||||
f"(version {before.get('version')}->{after.get('version')}, "
|
||||
f"image {before.get('image')}->{after.get('image')}, "
|
||||
f"chaos {before.get('chaos')}->{after.get('chaos')}) — "
|
||||
"not a real upgrade to the code under test (HC1/DG2 must be non-vacuous)"
|
||||
)
|
||||
|
||||
|
||||
@ -148,10 +175,10 @@ def parse_snapshot_id(backup_output: str) -> str | None:
|
||||
return m.group(1) if m else None
|
||||
|
||||
|
||||
def do_backup(domain: str) -> str:
|
||||
"""BACKUP op: create a backup, then assert a snapshot artifact was produced (returns its id)."""
|
||||
out = lifecycle.backup_app(domain)
|
||||
snap_id = parse_snapshot_id(out)
|
||||
def assert_backup_artifact(domain: str) -> str:
|
||||
"""Generic BACKUP assertion (post-op): the orchestrator already ran the backup once. Assert a
|
||||
snapshot artifact was produced (its id recorded in op state). Returns the id."""
|
||||
snap_id = op_state().get("backup", {}).get("snapshot_id")
|
||||
assert snap_id, (
|
||||
f"{domain}: backup produced no snapshot artifact "
|
||||
"(no snapshot_id in `abra app backup create` output)"
|
||||
@ -159,8 +186,29 @@ def do_backup(domain: str) -> str:
|
||||
return snap_id
|
||||
|
||||
|
||||
def do_restore(domain: str, meta: dict) -> None:
|
||||
"""RESTORE op: restore the latest snapshot, then assert the app is healthy + serving again
|
||||
(assert_serving polls, so the post-restore reconverge settles)."""
|
||||
lifecycle.restore_app(domain)
|
||||
def assert_restore_healthy(domain: str, meta: dict) -> None:
|
||||
"""Generic RESTORE assertion (post-op): the orchestrator already restored. Assert the app is
|
||||
healthy + serving again (assert_serving polls, so the post-restore reconverge settles)."""
|
||||
assert_serving(domain, meta)
|
||||
|
||||
|
||||
# ---- Op primitives (orchestrator-only; perform the op once, never assert) --------------------
|
||||
|
||||
|
||||
def perform_upgrade(domain: str, target: str | None) -> dict[str, str | None]:
|
||||
"""Perform the UPGRADE op once (in place). E1 baseline: `abra app upgrade` -> target. (HC1/E2
|
||||
redefines this as a chaos redeploy of the PR-head checkout.) Returns the pre-upgrade identity so
|
||||
the orchestrator can record it for `assert_upgraded`'s move check."""
|
||||
before = lifecycle.deployed_identity(domain)
|
||||
lifecycle.upgrade_app(domain, version=target)
|
||||
return before
|
||||
|
||||
|
||||
def perform_backup(domain: str) -> str | None:
|
||||
"""Perform the BACKUP op once. Returns the produced snapshot_id (or None) for the assertion."""
|
||||
return parse_snapshot_id(lifecycle.backup_app(domain))
|
||||
|
||||
|
||||
def perform_restore(domain: str) -> None:
|
||||
"""Perform the RESTORE op once (restore the latest snapshot)."""
|
||||
lifecycle.restore_app(domain)
|
||||
|
||||
@ -245,11 +245,18 @@ def wait_healthy(
|
||||
raise TimeoutError(f"{domain}: not healthy over HTTPS {path} (last status {last})")
|
||||
|
||||
|
||||
def deployed_identity(domain: str, service: str = "app") -> tuple[str | None, str | None]:
|
||||
"""(coop-cloud version label, image) of the running app service. Used to prove an upgrade
|
||||
actually MOVED the deployment prev→target (not a vacuous no-op — Adversary F1d-2). The version
|
||||
label (`coop-cloud.<stack>.version`) is bumped per published recipe version; the image usually
|
||||
bumps too. Either changing proves the upgrade did something."""
|
||||
def deployed_identity(domain: str, service: str = "app") -> dict[str, str | None]:
|
||||
"""Identity of the running app service: {"version", "image", "chaos"}. Used to prove an upgrade
|
||||
actually MOVED the deployment (not a vacuous no-op — Adversary F1d-2), AND (Phase 1e HC1) that an
|
||||
`abra app deploy --chaos` upgrade actually deployed the PR-head code under test.
|
||||
|
||||
- `version` = the `coop-cloud.<stack>.version` label (bumped per published recipe version).
|
||||
- `image` = the running container image (usually bumps with a published version).
|
||||
- `chaos` = the chaos label value (a chaos deploy stamps the recipe git commit/dirty state here)
|
||||
— present after `abra app deploy --chaos`, absent on a clean pinned-tag deploy. For prev→PR-head
|
||||
this is THE proof PR-head was deployed even when the version label is unbumped (HC1). The exact
|
||||
chaos label key varies by abra version, so we capture any `coop-cloud.<stack>.*` label whose key
|
||||
contains "chaos"."""
|
||||
name = f"{_stack_name(domain)}_{service}"
|
||||
proc = subprocess.run(
|
||||
[
|
||||
@ -265,15 +272,18 @@ def deployed_identity(domain: str, service: str = "app") -> tuple[str | None, st
|
||||
)
|
||||
out = proc.stdout.strip()
|
||||
if "|" not in out:
|
||||
return (None, None)
|
||||
return {"version": None, "image": None, "chaos": None}
|
||||
labels_json, _, image = out.partition("|")
|
||||
ver = None
|
||||
ver = chaos = None
|
||||
with contextlib.suppress(ValueError, json.JSONDecodeError):
|
||||
for k, v in json.loads(labels_json).items():
|
||||
if k.startswith("coop-cloud.") and k.endswith(".version"):
|
||||
if not k.startswith("coop-cloud."):
|
||||
continue
|
||||
if k.endswith(".version"):
|
||||
ver = v
|
||||
break
|
||||
return (ver, image.strip() or None)
|
||||
elif "chaos" in k:
|
||||
chaos = v
|
||||
return {"version": ver, "image": image.strip() or None, "chaos": chaos}
|
||||
|
||||
|
||||
def upgrade_app(domain: str, version: str | None = None) -> None:
|
||||
|
||||
@ -1,13 +1,18 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Top-level CI orchestrator (plan §4.3 + Phase 1d), invoked by the Drone pipeline (or by hand).
|
||||
"""Top-level CI orchestrator (plan §4.3 + Phase 1d/1e), invoked by the Drone pipeline (or by hand).
|
||||
|
||||
Phase 1d model: deploy the app ONCE, then run lifecycle TIERS against that single shared deployment
|
||||
(install asserts; upgrade does `abra app upgrade` in place; backup/restore mutate in place; custom
|
||||
asserts), then ONE teardown in `finally`. Each tier's assertions come from exactly one file — a
|
||||
recipe overlay if present, else the generic default — discovered by `harness.discovery`
|
||||
(precedence repo-local > cc-ci > generic). The generic is the default for every op, so ANY recipe is
|
||||
testable with zero config (DG1–DG4). The lifecycle OPS live in the shared harness (harness.generic),
|
||||
not per-recipe (DG7 DRY).
|
||||
Model: deploy the app ONCE, then run lifecycle TIERS against that single shared deployment, then ONE
|
||||
teardown in `finally`. Per Phase 1e the orchestrator OWNS each mutating op (HC3): for a tier it runs
|
||||
the optional pre-op seed hook (recipe ops.py `pre_<op>`), performs the op exactly ONCE
|
||||
(upgrade/backup/restore — install has none), then runs BOTH the generic assertion file (the floor,
|
||||
unless explicitly opted out) AND the recipe overlay assertion file (if any) against the shared
|
||||
post-op state — generic and overlay are ADDITIVE, not override (HC3). Op results an assertion needs
|
||||
(pre-upgrade identity, snapshot_id) pass op→assertion via a run-scoped JSON state file
|
||||
($CCCI_OP_STATE_FILE). The upgrade op deploys the PR-HEAD code under test via `abra app deploy
|
||||
--chaos` (HC1). Repo-local (PR-authored) overlays/hooks run only for allowlist-approved recipes (HC2,
|
||||
gated in harness.discovery). The generic is the default for every op, so ANY recipe is testable with
|
||||
zero config (DG1–DG4). The lifecycle OPS live in the shared harness (harness.generic), not per-recipe
|
||||
(DG7 DRY).
|
||||
|
||||
Run parameters from env (set by the comment-bridge via Drone build params):
|
||||
RECIPE recipe name (e.g. custom-html) [required]
|
||||
@ -23,7 +28,10 @@ invoke as: cc-ci-run runner/run_recipe_ci.py
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import contextlib
|
||||
import glob
|
||||
import importlib.util
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
@ -37,6 +45,10 @@ from harness import discovery, generic, lifecycle, naming # noqa: E402
|
||||
ALL_STAGES = ("install", "upgrade", "backup", "restore", "custom")
|
||||
|
||||
|
||||
def _truthy(v: str | None) -> bool:
|
||||
return str(v or "").strip().lower() in ("1", "true", "yes", "on")
|
||||
|
||||
|
||||
def _redact_values() -> list[str]:
|
||||
"""Values to scrub from published logs (D6 redaction filter, plan §4.4). The infra secrets
|
||||
materialised at /run/secrets/* — if any subprocess ever echoes one, mask it. Only >=8-char
|
||||
@ -56,6 +68,14 @@ def _redact_values() -> list[str]:
|
||||
_REDACT = _redact_values()
|
||||
|
||||
|
||||
def _scrub(text: str) -> str:
|
||||
"""Mask any known infra-secret value in a string (D6 redaction, plan §4.4)."""
|
||||
for v in _REDACT:
|
||||
if v in text:
|
||||
text = text.replace(v, "***REDACTED***")
|
||||
return text
|
||||
|
||||
|
||||
def run_redacted(cmd: list[str], env: dict | None = None) -> int:
|
||||
"""Run a subprocess, streaming output live (so Drone logs stay tail-able) but masking any known
|
||||
infra-secret value first. Belt-and-suspenders: the harness never prints secrets and abra doesn't
|
||||
@ -71,10 +91,7 @@ def run_redacted(cmd: list[str], env: dict | None = None) -> int:
|
||||
)
|
||||
assert proc.stdout is not None
|
||||
for line in proc.stdout:
|
||||
for v in _REDACT:
|
||||
if v in line:
|
||||
line = line.replace(v, "***REDACTED***")
|
||||
sys.stdout.write(line)
|
||||
sys.stdout.write(_scrub(line))
|
||||
sys.stdout.flush()
|
||||
return proc.wait()
|
||||
|
||||
@ -149,7 +166,7 @@ def _load_meta(recipe: str) -> dict:
|
||||
ns: dict = {}
|
||||
with open(path) as fh:
|
||||
exec(compile(fh.read(), path, "exec"), ns) # noqa: S102 (trusted, in-repo)
|
||||
for k in list(meta) + ["BACKUP_CAPABLE"]:
|
||||
for k in list(meta) + ["BACKUP_CAPABLE", "SKIP_GENERIC"]:
|
||||
if k in ns:
|
||||
meta[k] = ns[k]
|
||||
return meta
|
||||
@ -159,15 +176,105 @@ def _tier_env(domain: str) -> dict:
|
||||
return dict(os.environ, CCCI_APP_DOMAIN=domain, CCCI_BASE_URL=f"https://{domain}")
|
||||
|
||||
|
||||
def run_op_tier(recipe: str, op: str, repo_local: str | None, domain: str) -> str:
|
||||
"""Run the single assertion file for a lifecycle op (overlay or generic) against the shared
|
||||
deployment. The file performs the op (upgrade/backup/restore) + asserts; install asserts only
|
||||
(already deployed). Returns 'pass' | 'fail'."""
|
||||
source, path = discovery.resolve_op(recipe, op, repo_local)
|
||||
rel = os.path.relpath(path, ROOT)
|
||||
print(f"\n===== TIER: {op} ({source}: {rel}) =====", flush=True)
|
||||
rc = run_redacted([sys.executable, "-m", "pytest", "-v", "-rA", path], env=_tier_env(domain))
|
||||
return "pass" if rc == 0 else "fail"
|
||||
def _skip_generic(op: str, meta: dict) -> bool:
|
||||
"""Whether the generic assertion for `op` is opted out (Phase 1e HC3). Default: run (additive).
|
||||
Opt-out, any of: env CCCI_SKIP_GENERIC (all ops), env CCCI_SKIP_GENERIC_<OP>, or the recipe's
|
||||
declarative recipe_meta.SKIP_GENERIC list (op name, or "all"/"*")."""
|
||||
if _truthy(os.environ.get("CCCI_SKIP_GENERIC")):
|
||||
return True
|
||||
if _truthy(os.environ.get(f"CCCI_SKIP_GENERIC_{op.upper()}")):
|
||||
return True
|
||||
sg = [str(s).lower() for s in (meta.get("SKIP_GENERIC") or [])]
|
||||
return "all" in sg or "*" in sg or op in sg
|
||||
|
||||
|
||||
def _run_pre_hook(recipe: str, op: str, repo_local: str | None, domain: str, meta: dict) -> None:
|
||||
"""Run the optional pre-op seed hook (recipe ops.py `pre_<op>`) BEFORE the harness performs the
|
||||
op (HC3 op/assertion split): overlays seed data-continuity markers / the backup→restore mutation
|
||||
here, then assert post-op in test_<op>.py. cc-ci's ops.py is trusted; a repo-local ops.py is
|
||||
consulted only for allowlist-approved recipes (HC2 gate is inside discovery.pre_op_hook). Imported
|
||||
in-process; the recipe dir is put on sys.path so an ops.py can import its sibling helpers."""
|
||||
hook = discovery.pre_op_hook(recipe, op, repo_local)
|
||||
if not hook:
|
||||
return
|
||||
source, path = hook
|
||||
d = os.path.dirname(path)
|
||||
sys.path.insert(0, d)
|
||||
try:
|
||||
spec = importlib.util.spec_from_file_location(f"ccci_ops_{recipe}_{op}", path)
|
||||
mod = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(mod)
|
||||
print(f" pre-op seed ({source}): {os.path.relpath(path, ROOT)}::pre_{op}", flush=True)
|
||||
getattr(mod, f"pre_{op}")(domain, meta)
|
||||
finally:
|
||||
if d in sys.path:
|
||||
sys.path.remove(d)
|
||||
|
||||
|
||||
def _perform_op(op: str, domain: str, target: str | None, op_state: dict) -> None:
|
||||
"""Perform the single mutating op ONCE (the harness owns the op, HC3). install has no op. Records
|
||||
what the assertions need (pre-upgrade identity, backup snapshot_id) into op_state. None of these
|
||||
call deploy_app, so the deploy-count guard (DG4.1) stays 1 — the in-place upgrade is not a new
|
||||
install (HC1 reconciliation)."""
|
||||
if op == "upgrade":
|
||||
op_state["upgrade"] = {"before": generic.perform_upgrade(domain, target)}
|
||||
elif op == "backup":
|
||||
op_state["backup"] = {"snapshot_id": generic.perform_backup(domain)}
|
||||
elif op == "restore":
|
||||
generic.perform_restore(domain)
|
||||
# install: already deployed; no op
|
||||
|
||||
|
||||
def run_lifecycle_tier(
|
||||
recipe: str,
|
||||
op: str,
|
||||
repo_local: str | None,
|
||||
domain: str,
|
||||
meta: dict,
|
||||
target: str | None,
|
||||
op_state: dict,
|
||||
) -> str:
|
||||
"""Additive lifecycle tier (HC3): seed (pre-op hook) → perform the op ONCE → run the generic
|
||||
assertion file (unless opted out) AND the overlay assertion file, both against the shared post-op
|
||||
deployment. Returns 'pass' | 'fail' | 'skip'."""
|
||||
overlay = discovery.resolve_overlay_op(recipe, op, repo_local)
|
||||
skip_gen = _skip_generic(op, meta)
|
||||
files: list[tuple[str, str]] = []
|
||||
if not skip_gen:
|
||||
files.append(discovery.generic_op(op))
|
||||
if overlay:
|
||||
files.append(overlay)
|
||||
if not files:
|
||||
# generic opted out AND no overlay → nothing would assert; don't perform a pointless mutating op
|
||||
print(f"\n===== TIER: {op} — SKIP (generic opted out, no overlay) =====", flush=True)
|
||||
return "skip"
|
||||
|
||||
ov = f"{overlay[0]}:{os.path.relpath(overlay[1], ROOT)}" if overlay else "none"
|
||||
print(
|
||||
f"\n===== TIER: {op} (generic={'skip' if skip_gen else 'run'}, overlay={ov}) =====",
|
||||
flush=True,
|
||||
)
|
||||
# 1) pre-op seed hook + 2) the op ONCE (harness-owned). A failure here is an op failure → tier fail.
|
||||
try:
|
||||
_run_pre_hook(recipe, op, repo_local, domain, meta)
|
||||
_perform_op(op, domain, target, op_state)
|
||||
with open(os.environ["CCCI_OP_STATE_FILE"], "w") as f:
|
||||
json.dump(op_state, f)
|
||||
except Exception as e: # noqa: BLE001 — a failed op is a reported tier failure, not a crash
|
||||
print(f"!! {op} op failed: {_scrub(str(e))}", flush=True)
|
||||
return "fail"
|
||||
|
||||
# 3) assertions: generic (unless opted out) + overlay, each its own pytest, all against the
|
||||
# single post-op deployment. Generic runs first so an overlay may assume readiness.
|
||||
rc_all = 0
|
||||
for source, path in files:
|
||||
print(f" assert ({source}): {os.path.relpath(path, ROOT)}", flush=True)
|
||||
rc = run_redacted(
|
||||
[sys.executable, "-m", "pytest", "-v", "-rA", path], env=_tier_env(domain)
|
||||
)
|
||||
if rc != 0:
|
||||
rc_all = rc
|
||||
return "pass" if rc_all == 0 else "fail"
|
||||
|
||||
|
||||
def run_custom(recipe: str, repo_local: str | None, domain: str) -> str:
|
||||
@ -223,6 +330,14 @@ def main() -> int:
|
||||
f.write("0")
|
||||
os.environ["CCCI_DEPLOY_COUNT_FILE"] = countfile
|
||||
|
||||
# Run-scoped op state (HC3): the orchestrator records op results (pre-upgrade identity, backup
|
||||
# snapshot_id) here for the assertion tiers (generic + overlay) to read via generic.op_state().
|
||||
statefile = os.path.join(tempfile.gettempdir(), f"ccci-opstate-{domain}.json")
|
||||
with open(statefile, "w") as f:
|
||||
json.dump({}, f)
|
||||
os.environ["CCCI_OP_STATE_FILE"] = statefile
|
||||
op_state: dict = {}
|
||||
|
||||
results: dict[str, str] = {}
|
||||
lifecycle.janitor()
|
||||
try:
|
||||
@ -243,28 +358,38 @@ def main() -> int:
|
||||
print(f"!! deploy/readiness failed: {e}", flush=True)
|
||||
deploy_ok = False
|
||||
|
||||
# ---- INSTALL tier (always) ----
|
||||
# ---- INSTALL tier (always; additive generic + overlay, no op) ----
|
||||
if "install" in stages:
|
||||
results["install"] = (
|
||||
run_op_tier(recipe, "install", repo_local, domain) if deploy_ok else "fail"
|
||||
run_lifecycle_tier(recipe, "install", repo_local, domain, meta, target, op_state)
|
||||
if deploy_ok
|
||||
else "fail"
|
||||
)
|
||||
|
||||
if deploy_ok:
|
||||
# ---- UPGRADE tier ----
|
||||
# ---- UPGRADE tier (op once → generic + overlay assert) ----
|
||||
if "upgrade" in stages:
|
||||
results["upgrade"] = (
|
||||
run_op_tier(recipe, "upgrade", repo_local, domain)
|
||||
run_lifecycle_tier(
|
||||
recipe, "upgrade", repo_local, domain, meta, target, op_state
|
||||
)
|
||||
if prev
|
||||
else "skip" # only one published version → nothing to upgrade from
|
||||
)
|
||||
# ---- BACKUP + RESTORE tiers (backup-capable only; else clean N/A) ----
|
||||
if "backup" in stages:
|
||||
results["backup"] = (
|
||||
run_op_tier(recipe, "backup", repo_local, domain) if backup_cap else "skip"
|
||||
run_lifecycle_tier(recipe, "backup", repo_local, domain, meta, target, op_state)
|
||||
if backup_cap
|
||||
else "skip"
|
||||
)
|
||||
if "restore" in stages:
|
||||
results["restore"] = (
|
||||
run_op_tier(recipe, "restore", repo_local, domain) if backup_cap else "skip"
|
||||
run_lifecycle_tier(
|
||||
recipe, "restore", repo_local, domain, meta, target, op_state
|
||||
)
|
||||
if backup_cap
|
||||
else "skip"
|
||||
)
|
||||
# ---- CUSTOM tier ----
|
||||
if "custom" in stages:
|
||||
@ -281,6 +406,8 @@ def main() -> int:
|
||||
with open(countfile) as f:
|
||||
deploy_count = int(f.read().strip() or "0")
|
||||
os.remove(countfile)
|
||||
with contextlib.suppress(OSError):
|
||||
os.remove(statefile)
|
||||
|
||||
# ---- per-op summary (DG6 feed) ----
|
||||
print("\n===== RUN SUMMARY =====", flush=True)
|
||||
|
||||
@ -1,9 +1,10 @@
|
||||
"""Generic BACKUP tier (Phase 1d DG3) — recipe-agnostic, backup-capable recipes only.
|
||||
"""Generic BACKUP tier (Phase 1d DG3 + Phase 1e HC3) — recipe-agnostic, assertion-only.
|
||||
|
||||
Runs `abra app backup create` against the shared live deployment and asserts a snapshot artifact is
|
||||
produced (abra app backup snapshots is non-empty). Honest limit: the generic verifies the backup
|
||||
MECHANISM, not app-specific data integrity — that's a recipe overlay (test_backup.py seeds a marker).
|
||||
For recipes that declare no backup config the orchestrator skips this tier as N/A (not a failure)."""
|
||||
The orchestrator ran `abra app backup create` ONCE against the shared live deployment and recorded
|
||||
the produced snapshot id in the run-scoped op state. This tier ASSERTS a snapshot artifact was
|
||||
produced — it does NOT perform the op. Honest limit: the generic verifies the backup MECHANISM, not
|
||||
app-specific data integrity — that's a recipe overlay (test_backup.py). Runs by default ALONGSIDE any
|
||||
overlay (additive). For recipes that declare no backup config the orchestrator skips this tier (N/A)."""
|
||||
|
||||
import os
|
||||
import sys
|
||||
@ -13,5 +14,4 @@ from harness import generic # noqa: E402
|
||||
|
||||
|
||||
def test_backup_artifact(live_app, meta):
|
||||
snaps = generic.do_backup(live_app)
|
||||
assert snaps, "backup produced no snapshot artifact"
|
||||
assert generic.assert_backup_artifact(live_app), "backup produced no snapshot artifact"
|
||||
|
||||
@ -1,8 +1,9 @@
|
||||
"""Generic RESTORE tier (Phase 1d DG3) — recipe-agnostic, backup-capable recipes only.
|
||||
"""Generic RESTORE tier (Phase 1d DG3 + Phase 1e HC3) — recipe-agnostic, assertion-only.
|
||||
|
||||
Restores the latest snapshot (produced by the backup tier on the same shared deployment) and asserts
|
||||
the restore completes and the app is healthy + serving afterwards. App-specific data-integrity
|
||||
(marker survives) is a recipe overlay (test_restore.py); the generic verifies the restore mechanism."""
|
||||
The orchestrator restored the latest snapshot ONCE (produced by the backup op on the same shared
|
||||
deployment). This tier ASSERTS the restore completed and the app is healthy + serving afterwards — it
|
||||
does NOT perform the op. App-specific data-integrity (marker survives) is a recipe overlay
|
||||
(test_restore.py); the generic verifies the restore mechanism. Runs by default ALONGSIDE any overlay."""
|
||||
|
||||
import os
|
||||
import sys
|
||||
@ -12,4 +13,4 @@ from harness import generic # noqa: E402
|
||||
|
||||
|
||||
def test_restore_healthy(live_app, meta):
|
||||
generic.do_restore(live_app, meta)
|
||||
generic.assert_restore_healthy(live_app, meta)
|
||||
|
||||
@ -1,9 +1,10 @@
|
||||
"""Generic UPGRADE tier (Phase 1d DG2) — recipe-agnostic.
|
||||
"""Generic UPGRADE tier (Phase 1d DG2 + Phase 1e HC3) — recipe-agnostic, assertion-only.
|
||||
|
||||
The orchestrator deployed the PREVIOUS published version once; this tier upgrades it IN PLACE
|
||||
(abra app upgrade) to the target (VERSION env, else newest published) on the same live deployment,
|
||||
then asserts it reconverges and still serves. Data-continuity is a recipe overlay (test_upgrade.py),
|
||||
not the generic — the generic verifies the upgrade mechanism + still-serving."""
|
||||
The orchestrator deployed the base version once and performed the upgrade ONCE in place (Phase 1e
|
||||
HC1: to the PR-head code under test via `abra app deploy --chaos`), recording the pre-upgrade
|
||||
identity in the run-scoped op state. This tier ASSERTS the upgrade reconverged, still serves, and
|
||||
actually MOVED the deployment (version/image/chaos label) — it does NOT perform the op. Runs by
|
||||
default ALONGSIDE any recipe overlay (additive); skipped only via an explicit opt-out."""
|
||||
|
||||
import os
|
||||
import sys
|
||||
@ -13,5 +14,4 @@ from harness import generic # noqa: E402
|
||||
|
||||
|
||||
def test_upgrade_reconverges(live_app, meta):
|
||||
target = os.environ.get("VERSION") or None
|
||||
generic.do_upgrade(live_app, target, meta)
|
||||
generic.assert_upgraded(live_app, meta)
|
||||
|
||||
27
tests/cryptpad/ops.py
Normal file
27
tests/cryptpad/ops.py
Normal file
@ -0,0 +1,27 @@
|
||||
"""cryptpad — pre-op seed hooks (Phase 1e HC3). The orchestrator runs these BEFORE the op; the
|
||||
matching test_<op>.py asserts post-op (assertion-only). cryptpad data isn't HTTP-served (encrypted
|
||||
datastore), so the marker in the persistent cryptpad_data volume is read back via exec_in_app."""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner"))
|
||||
from harness import lifecycle # noqa: E402
|
||||
|
||||
MARKER = "/cryptpad/data/ci-marker.txt"
|
||||
|
||||
|
||||
def _write(domain, val):
|
||||
lifecycle.exec_in_app(domain, ["sh", "-c", f"echo {val} > {MARKER}"])
|
||||
|
||||
|
||||
def pre_upgrade(domain, meta):
|
||||
_write(domain, "upgrade-survives")
|
||||
|
||||
|
||||
def pre_backup(domain, meta):
|
||||
_write(domain, "original")
|
||||
|
||||
|
||||
def pre_restore(domain, meta):
|
||||
_write(domain, "mutated") # diverge so a successful restore is observable
|
||||
@ -1,30 +1,19 @@
|
||||
"""cryptpad — BACKUP overlay (Phase 1d, DG4): seed a known state into the backed-up cryptpad_data
|
||||
volume, back it up (assert a snapshot artifact), then mutate so the RESTORE overlay (test_restore.py)
|
||||
can prove the backed-up state returns. Runs on the shared deployment; the mutated marker persists for
|
||||
the restore tier.
|
||||
"""cryptpad — BACKUP overlay (Phase 1e HC3): assertion-only + additive.
|
||||
|
||||
The cryptpad `app` service is labelled `backupbot.backup=true`, so its volumes (incl. cryptpad_data)
|
||||
are backed up. Marker is checked via `exec_in_app` (data isn't HTTP-served)."""
|
||||
ops.pre_backup seeded "original" into cryptpad_data; the orchestrator performed the backup once
|
||||
(generic tier asserted a snapshot artifact). This overlay ADDS: the seeded state is intact at backup
|
||||
time. The backup→restore divergence is in ops.pre_restore."""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner"))
|
||||
from harness import generic, lifecycle # noqa: E402
|
||||
from harness import lifecycle # noqa: E402
|
||||
|
||||
MARKER = "/cryptpad/data/ci-marker.txt"
|
||||
|
||||
|
||||
def test_backup_captures_state(live_app, meta):
|
||||
domain = live_app
|
||||
|
||||
# 1) establish original state in the backed-up volume, then back it up (reuse the generic op:
|
||||
# backup + assert a snapshot artifact was produced)
|
||||
lifecycle.exec_in_app(domain, ["sh", "-c", f"echo original > {MARKER}"])
|
||||
assert lifecycle.exec_in_app(domain, ["cat", MARKER]).strip() == "original"
|
||||
snap = generic.do_backup(domain)
|
||||
assert snap, "backup produced no snapshot artifact"
|
||||
|
||||
# 2) mutate state (diverge from the backup)
|
||||
lifecycle.exec_in_app(domain, ["sh", "-c", f"echo mutated > {MARKER}"])
|
||||
assert lifecycle.exec_in_app(domain, ["cat", MARKER]).strip() == "mutated"
|
||||
def test_backup_captures_state(live_app):
|
||||
assert (
|
||||
lifecycle.exec_in_app(live_app, ["cat", MARKER]).strip() == "original"
|
||||
), "the seeded state was not present at backup time"
|
||||
|
||||
@ -1,24 +1,19 @@
|
||||
"""cryptpad — RESTORE overlay (Phase 1d, DG4): data-integrity, extends the generic restore.
|
||||
"""cryptpad — RESTORE overlay (Phase 1e HC3): data-integrity, assertion-only + additive.
|
||||
|
||||
Runs after the backup overlay (test_backup.py) on the SAME shared deployment, which left the
|
||||
cryptpad_data marker mutated to "mutated" after backing up "original". This restores the snapshot via
|
||||
the shared op helper (`generic.do_restore`, which also asserts the app is healthy + serving
|
||||
afterwards), then asserts the volume data returned to the pre-mutation "original" — the app-specific
|
||||
data integrity the generic restore cannot check. Reads the marker via `exec_in_app` (data isn't
|
||||
HTTP-served). Assertion-only (no deploy/teardown)."""
|
||||
ops.pre_restore mutated the cryptpad_data marker to "mutated"; the orchestrator restored once
|
||||
(generic tier asserted healthy/serving). This overlay ADDS: the volume data returned to the
|
||||
pre-mutation (backed-up) "original". Read via exec_in_app."""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner"))
|
||||
from harness import generic, lifecycle # noqa: E402
|
||||
from harness import lifecycle # noqa: E402
|
||||
|
||||
MARKER = "/cryptpad/data/ci-marker.txt"
|
||||
|
||||
|
||||
def test_restore_returns_state(live_app, meta):
|
||||
domain = live_app
|
||||
generic.do_restore(domain, meta) # restore + assert healthy/serving
|
||||
def test_restore_returns_state(live_app):
|
||||
assert (
|
||||
lifecycle.exec_in_app(domain, ["cat", MARKER]).strip() == "original"
|
||||
lifecycle.exec_in_app(live_app, ["cat", MARKER]).strip() == "original"
|
||||
), "restore did not return the pre-mutation state"
|
||||
|
||||
@ -1,31 +1,19 @@
|
||||
"""cryptpad — UPGRADE overlay (Phase 1d, DG4): data-continuity, extends the generic upgrade.
|
||||
"""cryptpad — UPGRADE overlay (Phase 1e HC3): data-continuity, assertion-only + additive.
|
||||
|
||||
The orchestrator deployed the previous published version ONCE; this overlay writes a marker into the
|
||||
persistent cryptpad_data volume (cryptpad data isn't HTTP-served as a static file — it's an encrypted
|
||||
datastore — so the marker is read back via `exec_in_app`, not HTTP), performs the in-place upgrade via
|
||||
the shared op helper (`generic.do_upgrade`, which also asserts reconverge + serving + that the
|
||||
deployment moved), then asserts the data SURVIVED. Assertion-only on the shared deployment."""
|
||||
ops.pre_upgrade seeded a marker into the persistent cryptpad_data volume; the orchestrator performed
|
||||
the upgrade once (generic tier asserted reconverge/serving/moved). This overlay ADDS: the data
|
||||
survived the upgrade. Read via exec_in_app (cryptpad data isn't HTTP-served)."""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner"))
|
||||
from harness import generic, lifecycle # noqa: E402
|
||||
from harness import lifecycle # noqa: E402
|
||||
|
||||
MARKER = "/cryptpad/data/ci-marker.txt"
|
||||
|
||||
|
||||
def test_upgrade_preserves_data(live_app, meta):
|
||||
domain = live_app
|
||||
# write a data marker into the persistent cryptpad_data volume
|
||||
lifecycle.exec_in_app(domain, ["sh", "-c", f"echo upgrade-survives > {MARKER}"])
|
||||
assert lifecycle.exec_in_app(domain, ["cat", MARKER]).strip() == "upgrade-survives"
|
||||
|
||||
# in-place upgrade previous -> target (reuses the generic op: upgrade + assert reconverge/serving)
|
||||
generic.do_upgrade(domain, os.environ.get("VERSION") or None, meta)
|
||||
|
||||
# app healthy and the data written before the upgrade is still there
|
||||
assert lifecycle.http_get(domain, "/") in (200, 301, 302)
|
||||
def test_upgrade_preserves_data(live_app):
|
||||
assert (
|
||||
lifecycle.exec_in_app(domain, ["cat", MARKER]).strip() == "upgrade-survives"
|
||||
lifecycle.exec_in_app(live_app, ["cat", MARKER]).strip() == "upgrade-survives"
|
||||
), "data did not survive the upgrade"
|
||||
|
||||
32
tests/custom-html/ops.py
Normal file
32
tests/custom-html/ops.py
Normal file
@ -0,0 +1,32 @@
|
||||
"""custom-html — pre-op seed hooks (Phase 1e HC3). The orchestrator runs `pre_<op>(domain, meta)`
|
||||
BEFORE it performs the op; the matching test_<op>.py asserts the post-op state (assertion-only).
|
||||
|
||||
nginx serves the volume at /usr/share/nginx/html, so the marker file survives an upgrade / a
|
||||
backup+restore of that volume and is both HTTP-readable and exec-readable."""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner"))
|
||||
from harness import lifecycle # noqa: E402
|
||||
|
||||
MARKER_PATH = "/usr/share/nginx/html/ci-marker.txt"
|
||||
|
||||
|
||||
def _write(domain: str, val: str) -> None:
|
||||
lifecycle.exec_in_app(domain, ["sh", "-c", f"echo {val} > {MARKER_PATH}"])
|
||||
|
||||
|
||||
def pre_upgrade(domain, meta):
|
||||
# seed a marker before the upgrade so the overlay can prove the data survives it
|
||||
_write(domain, "upgrade-survives")
|
||||
|
||||
|
||||
def pre_backup(domain, meta):
|
||||
# establish a known original state before the backup op captures it
|
||||
_write(domain, "original")
|
||||
|
||||
|
||||
def pre_restore(domain, meta):
|
||||
# diverge from the backed-up state so a successful restore (back to "original") is observable
|
||||
_write(domain, "mutated")
|
||||
@ -1,34 +1,21 @@
|
||||
"""custom-html — BACKUP overlay (Phase 1d, DG4): seed a known state, back it up (assert artifact),
|
||||
then mutate so the RESTORE overlay (test_restore.py) can prove the backed-up state returns. Runs on
|
||||
the shared deployment; the marker it leaves ("mutated") persists for the restore tier.
|
||||
"""custom-html — BACKUP overlay (Phase 1e HC3): assertion-only + additive.
|
||||
|
||||
Reads the marker via `exec_in_app` (the file in the volume), NOT http: backup/restore preserve the
|
||||
VOLUME, and reading it directly is immune to the serving/container-routing race right after
|
||||
backup-bot-two cycles the app container (HTTP briefly served empty). Serving is proven separately by
|
||||
the install/upgrade tiers' assert_serving."""
|
||||
The orchestrator ran `ops.pre_backup` (seeded "original" into the served volume), then performed the
|
||||
backup ONCE. The generic backup tier already asserted a snapshot artifact was produced; this overlay
|
||||
ADDS the recipe-specific check: the seeded "original" state is intact in the volume post-backup
|
||||
(pre-mutation). The backup→restore divergence happens in `ops.pre_restore`. Reads via exec_in_app
|
||||
(volume-direct), immune to the post-backup serving race after backup-bot-two cycles the container."""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner"))
|
||||
from harness import generic, lifecycle # noqa: E402
|
||||
from harness import lifecycle # noqa: E402
|
||||
|
||||
MARKER_PATH = "/usr/share/nginx/html/ci-marker.txt"
|
||||
|
||||
|
||||
def _marker(domain: str) -> str:
|
||||
return lifecycle.exec_in_app(domain, ["cat", MARKER_PATH]).strip()
|
||||
|
||||
|
||||
def test_backup_captures_state(live_app, meta):
|
||||
domain = live_app
|
||||
# 1) establish a known original state, then back it up (reuse the generic op: backup + assert a
|
||||
# snapshot artifact was produced)
|
||||
lifecycle.exec_in_app(domain, ["sh", "-c", f"echo original > {MARKER_PATH}"])
|
||||
assert _marker(domain) == "original"
|
||||
snap = generic.do_backup(domain)
|
||||
assert snap, "backup produced no snapshot artifact"
|
||||
|
||||
# 2) mutate state so a successful restore is observable (diverge from the backup)
|
||||
lifecycle.exec_in_app(domain, ["sh", "-c", f"echo mutated > {MARKER_PATH}"])
|
||||
assert _marker(domain) == "mutated"
|
||||
def test_backup_captures_state(live_app):
|
||||
assert (
|
||||
lifecycle.exec_in_app(live_app, ["cat", MARKER_PATH]).strip() == "original"
|
||||
), "the seeded state was not present at backup time"
|
||||
|
||||
@ -1,25 +1,22 @@
|
||||
"""custom-html — RESTORE overlay (Phase 1d, DG4): data-integrity, extends the generic restore.
|
||||
"""custom-html — RESTORE overlay (Phase 1e HC3): data-integrity, assertion-only + additive.
|
||||
|
||||
Runs after the backup overlay (test_backup.py) on the SAME shared deployment, which left state
|
||||
mutated to "mutated" after backing up "original". This restores the snapshot via the shared op
|
||||
helper (`generic.do_restore`, which also asserts the app is healthy + serving afterwards), then
|
||||
asserts the VOLUME data returned to the pre-mutation "original" — the app-specific data integrity the
|
||||
generic restore cannot check. Reads the marker via exec_in_app (volume-direct, robust to the
|
||||
post-restore serving race). Assertion-only (no deploy/teardown)."""
|
||||
The orchestrator ran `ops.pre_restore` (mutated the marker to "mutated", diverging from the backed-up
|
||||
"original"), then performed the restore ONCE. The generic restore tier already asserted healthy +
|
||||
serving; this overlay ADDS the recipe-specific check: the volume data returned to the pre-mutation
|
||||
(backed-up) "original". Reads via exec_in_app (volume-direct), robust to the post-restore serving
|
||||
race."""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner"))
|
||||
from harness import generic, lifecycle # noqa: E402
|
||||
from harness import lifecycle # noqa: E402
|
||||
|
||||
MARKER_PATH = "/usr/share/nginx/html/ci-marker.txt"
|
||||
|
||||
|
||||
def test_restore_returns_state(live_app, meta):
|
||||
domain = live_app
|
||||
generic.do_restore(domain, meta) # restore + assert healthy/serving
|
||||
restored = lifecycle.exec_in_app(domain, ["cat", MARKER_PATH]).strip()
|
||||
def test_restore_returns_state(live_app):
|
||||
restored = lifecycle.exec_in_app(live_app, ["cat", MARKER_PATH]).strip()
|
||||
assert (
|
||||
restored == "original"
|
||||
), f"restore did not return the pre-mutation (backed-up) state: got {restored!r}"
|
||||
|
||||
@ -1,29 +1,21 @@
|
||||
"""custom-html — UPGRADE overlay (Phase 1d, DG4): data-continuity, extends the generic upgrade.
|
||||
"""custom-html — UPGRADE overlay (Phase 1e HC3): data-continuity, assertion-only + additive.
|
||||
|
||||
The orchestrator deployed the previous published version ONCE; this overlay seeds a marker into the
|
||||
served volume, performs the in-place upgrade via the shared op helper (`generic.do_upgrade`, which
|
||||
also asserts reconverge + serving), then asserts the data SURVIVED. Assertion-only on the shared
|
||||
deployment (no deploy/teardown here)."""
|
||||
The orchestrator deployed the base version, ran `ops.pre_upgrade` (seeded a marker into the served
|
||||
volume), then performed the upgrade ONCE. The generic upgrade tier already asserted reconverge +
|
||||
serving + moved; this overlay runs ALONGSIDE it and ADDS the recipe-specific check: the data written
|
||||
before the upgrade survived it. No op, no deploy/teardown here."""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner"))
|
||||
from harness import generic, lifecycle # noqa: E402
|
||||
from harness import lifecycle # noqa: E402
|
||||
|
||||
MARKER_PATH = "/usr/share/nginx/html/ci-marker.txt"
|
||||
|
||||
|
||||
def test_upgrade_preserves_data(live_app, meta):
|
||||
domain = live_app
|
||||
# write a data marker into the served volume (nginx serves /usr/share/nginx/html)
|
||||
lifecycle.exec_in_app(domain, ["sh", "-c", f"echo upgrade-survives > {MARKER_PATH}"])
|
||||
assert lifecycle.http_fetch(domain, "/ci-marker.txt")[1].strip() == "upgrade-survives"
|
||||
|
||||
# in-place upgrade previous -> target (reuses the generic op: upgrade + assert reconverge/serving)
|
||||
generic.do_upgrade(domain, os.environ.get("VERSION") or None, meta)
|
||||
|
||||
# the data written before the upgrade is still there
|
||||
def test_upgrade_preserves_data(live_app):
|
||||
# the marker seeded by ops.pre_upgrade (before the harness upgraded) is still served
|
||||
assert (
|
||||
lifecycle.http_fetch(domain, "/ci-marker.txt")[1].strip() == "upgrade-survives"
|
||||
lifecycle.http_fetch(live_app, "/ci-marker.txt")[1].strip() == "upgrade-survives"
|
||||
), "data did not survive the upgrade"
|
||||
|
||||
33
tests/keycloak/ops.py
Normal file
33
tests/keycloak/ops.py
Normal file
@ -0,0 +1,33 @@
|
||||
"""keycloak — pre-op seed hooks (Phase 1e HC3). The orchestrator runs these BEFORE the op; the
|
||||
matching test_<op>.py asserts post-op (assertion-only). The data marker is a realm in mariadb,
|
||||
written via the keycloak admin API (kc_admin)."""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner"))
|
||||
import kc_admin # noqa: E402
|
||||
from harness import generic # noqa: E402
|
||||
|
||||
|
||||
def _token(domain):
|
||||
return kc_admin.admin_token(domain, kc_admin.admin_password(domain))
|
||||
|
||||
|
||||
def pre_upgrade(domain, meta):
|
||||
# create the marker realm (DB data) before the upgrade so the overlay can prove it survives
|
||||
assert kc_admin.create_marker_realm(domain, _token(domain)) in (201, 409)
|
||||
|
||||
|
||||
def pre_backup(domain, meta):
|
||||
# establish the marker realm before the backup op captures mariadb
|
||||
assert kc_admin.create_marker_realm(domain, _token(domain)) in (201, 409)
|
||||
|
||||
|
||||
def pre_restore(domain, meta):
|
||||
# backup-bot-two cycles the keycloak container during backup → wait for serving, re-auth, then
|
||||
# delete the realm (diverge from the backup) so a successful restore is observable
|
||||
generic.assert_serving(domain, meta)
|
||||
tok = _token(domain)
|
||||
assert kc_admin.delete_marker_realm(domain, tok) in (204, 200)
|
||||
assert not kc_admin.marker_realm_exists(domain, tok), "delete did not take"
|
||||
@ -1,7 +1,9 @@
|
||||
"""keycloak — BACKUP overlay (Phase 1d, DG4): seed a known state (the marker realm in mariadb),
|
||||
back it up (assert a snapshot artifact), then mutate (delete the realm) so the RESTORE overlay
|
||||
(test_restore.py) can prove the backed-up state returns. Runs on the shared deployment; the mutated
|
||||
state persists for the restore tier."""
|
||||
"""keycloak — BACKUP overlay (Phase 1e HC3): assertion-only + additive.
|
||||
|
||||
ops.pre_backup created the marker realm before the backup op captured mariadb; the orchestrator
|
||||
performed the backup once (generic tier asserted a snapshot artifact). This overlay ADDS: the marker
|
||||
realm is present at backup time. backup-bot-two cycles the container during backup, so wait for
|
||||
serving + re-auth first. The backup→restore divergence (deleting the realm) is in ops.pre_restore."""
|
||||
|
||||
import os
|
||||
import sys
|
||||
@ -11,22 +13,7 @@ import kc_admin # noqa: E402
|
||||
from harness import generic # noqa: E402
|
||||
|
||||
|
||||
def test_backup_captures_state(live_app, meta):
|
||||
domain = live_app
|
||||
pw = kc_admin.admin_password(domain)
|
||||
tok = kc_admin.admin_token(domain, pw)
|
||||
|
||||
# 1) create the marker realm, then back up (reuse the generic op: backup + assert a snapshot)
|
||||
assert kc_admin.create_marker_realm(domain, tok) in (201, 409)
|
||||
assert kc_admin.marker_realm_exists(domain, tok)
|
||||
snap = generic.do_backup(domain)
|
||||
assert snap, "backup produced no snapshot artifact"
|
||||
|
||||
# backup-bot-two cycles the keycloak container during backup, so the admin API is briefly 502.
|
||||
# Wait for it to be serving again, then re-auth, before mutating via the HTTP admin API.
|
||||
generic.assert_serving(domain, meta)
|
||||
tok = kc_admin.admin_token(domain, pw)
|
||||
|
||||
# 2) mutate: delete the realm (diverge from the backup)
|
||||
assert kc_admin.delete_marker_realm(domain, tok) in (204, 200)
|
||||
assert not kc_admin.marker_realm_exists(domain, tok), "delete did not take"
|
||||
def test_backup_captures_realm(live_app, meta):
|
||||
generic.assert_serving(live_app, meta) # container cycled during backup; wait for it to be back
|
||||
tok = kc_admin.admin_token(live_app, kc_admin.admin_password(live_app))
|
||||
assert kc_admin.marker_realm_exists(live_app, tok), "marker realm not present at backup time"
|
||||
|
||||
@ -1,22 +1,16 @@
|
||||
"""keycloak — RESTORE overlay (Phase 1d, DG4): data-integrity, extends the generic restore.
|
||||
"""keycloak — RESTORE overlay (Phase 1e HC3): data-integrity, assertion-only + additive.
|
||||
|
||||
Runs after the backup overlay (test_backup.py) on the SAME shared deployment, which left the marker
|
||||
realm deleted after backing it up. This restores the snapshot via the shared op helper
|
||||
(`generic.do_restore`, which also asserts the app is healthy + serving afterwards), then asserts the
|
||||
marker realm returned (mariadb restored to the backed-up state) — the app-specific data integrity
|
||||
the generic restore cannot check. Assertion-only (no deploy/teardown)."""
|
||||
ops.pre_restore deleted the marker realm (diverge from the backup); the orchestrator restored once
|
||||
(generic tier asserted healthy/serving). This overlay ADDS: the marker realm returned (mariadb
|
||||
restored to the backed-up state). Re-auths post-restore."""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner"))
|
||||
import kc_admin # noqa: E402
|
||||
from harness import generic # noqa: E402
|
||||
|
||||
|
||||
def test_restore_returns_state(live_app, meta):
|
||||
domain = live_app
|
||||
generic.do_restore(domain, meta) # restore + assert healthy/serving
|
||||
pw = kc_admin.admin_password(domain)
|
||||
tok = kc_admin.admin_token(domain, pw)
|
||||
assert kc_admin.marker_realm_exists(domain, tok), "restore did not bring back the realm"
|
||||
def test_restore_returns_realm(live_app):
|
||||
tok = kc_admin.admin_token(live_app, kc_admin.admin_password(live_app))
|
||||
assert kc_admin.marker_realm_exists(live_app, tok), "restore did not bring back the realm"
|
||||
|
||||
@ -1,28 +1,16 @@
|
||||
"""keycloak — UPGRADE overlay (Phase 1d, DG4): data-continuity, extends the generic upgrade.
|
||||
"""keycloak — UPGRADE overlay (Phase 1e HC3): data-continuity, assertion-only + additive.
|
||||
|
||||
The orchestrator deployed the previous published version ONCE; this overlay creates a marker realm
|
||||
(DB data in mariadb) on the live app, performs the in-place upgrade via the shared op helper
|
||||
(`generic.do_upgrade`, which also asserts reconverge + serving + that the deployment moved), then
|
||||
asserts the realm SURVIVED (mariadb data preserved). Assertion-only on the shared deployment."""
|
||||
ops.pre_upgrade created a marker realm (mariadb) before the upgrade; the orchestrator performed the
|
||||
upgrade once (generic tier asserted reconverge/serving/moved). This overlay ADDS: the realm survived
|
||||
(mariadb data preserved). Re-auths post-upgrade."""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner"))
|
||||
import kc_admin # noqa: E402
|
||||
from harness import generic # noqa: E402
|
||||
|
||||
|
||||
def test_upgrade_preserves_realm(live_app, meta):
|
||||
domain = live_app
|
||||
pw = kc_admin.admin_password(domain)
|
||||
tok = kc_admin.admin_token(domain, pw)
|
||||
assert kc_admin.create_marker_realm(domain, tok) in (201, 409)
|
||||
assert kc_admin.marker_realm_exists(domain, tok), "marker realm not created"
|
||||
|
||||
# in-place upgrade previous -> target (reuses the generic op: upgrade + assert reconverge/serving)
|
||||
generic.do_upgrade(domain, os.environ.get("VERSION") or None, meta)
|
||||
|
||||
# re-auth (token from the old instance is fine, but get a fresh one post-upgrade) and verify
|
||||
tok2 = kc_admin.admin_token(domain, pw)
|
||||
assert kc_admin.marker_realm_exists(domain, tok2), "realm did not survive the upgrade"
|
||||
def test_upgrade_preserves_realm(live_app):
|
||||
tok = kc_admin.admin_token(live_app, kc_admin.admin_password(live_app))
|
||||
assert kc_admin.marker_realm_exists(live_app, tok), "realm did not survive the upgrade"
|
||||
|
||||
41
tests/lasuite-docs/ops.py
Normal file
41
tests/lasuite-docs/ops.py
Normal file
@ -0,0 +1,41 @@
|
||||
"""lasuite-docs — pre-op seed hooks (Phase 1e HC3). The orchestrator runs these BEFORE the op; the
|
||||
matching test_<op>.py asserts post-op (assertion-only). The marker is a dedicated `ci_marker` row in
|
||||
postgres (the app's Django migrations don't touch it), written via psql in the `db` service. The
|
||||
backup path exercises the recipe's pg_backup.sh DB-dump hook (postgres + minio are backupbot-labelled)."""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner"))
|
||||
from harness import lifecycle # noqa: E402
|
||||
|
||||
|
||||
def _psql(domain, sql):
|
||||
cmd = f'PGPASSWORD=$(cat /run/secrets/postgres_p) psql -U docs -d docs -tAc "{sql}"'
|
||||
return lifecycle.exec_in_app(domain, ["sh", "-c", cmd], service="db").strip()
|
||||
|
||||
|
||||
def _seed(domain, value):
|
||||
_psql(
|
||||
domain,
|
||||
"CREATE TABLE IF NOT EXISTS ci_marker(v text); DELETE FROM ci_marker; "
|
||||
f"INSERT INTO ci_marker VALUES('{value}');",
|
||||
)
|
||||
assert _psql(domain, "SELECT v FROM ci_marker;") == value
|
||||
|
||||
|
||||
def pre_upgrade(domain, meta):
|
||||
_seed(domain, "upgrade-survives")
|
||||
|
||||
|
||||
def pre_backup(domain, meta):
|
||||
_seed(domain, "original")
|
||||
|
||||
|
||||
def pre_restore(domain, meta):
|
||||
# drop the marker table (diverge from the backup) so a successful restore is observable
|
||||
_psql(domain, "DROP TABLE ci_marker;")
|
||||
assert _psql(domain, "SELECT to_regclass('public.ci_marker');") in (
|
||||
"",
|
||||
"NULL",
|
||||
), "drop did not take"
|
||||
@ -1,16 +1,15 @@
|
||||
"""lasuite-docs — BACKUP overlay (Phase 1d, DG4): seed a postgres marker, back it up (pg_backup.sh
|
||||
pre-hook dumps the DB; assert a snapshot artifact), then mutate (drop it) so the RESTORE overlay
|
||||
(test_restore.py) can prove the backed-up state returns. Runs on the shared deployment; the mutated
|
||||
state persists for the restore tier.
|
||||
"""lasuite-docs — BACKUP overlay (Phase 1e HC3): assertion-only + additive.
|
||||
|
||||
Exercises the recipe's real DB-dump backup hook (postgres + minio are both backupbot-labelled); the
|
||||
postgres marker is the meaningful Docs-metadata data path."""
|
||||
ops.pre_backup wrote "original" into postgres before the backup op (pg_backup.sh dumps the DB); the
|
||||
orchestrator performed the backup once (generic tier asserted a snapshot artifact). This overlay
|
||||
ADDS: the seeded row is intact at backup time. The backup→restore divergence (dropping the table) is
|
||||
in ops.pre_restore."""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner"))
|
||||
from harness import generic, lifecycle # noqa: E402
|
||||
from harness import lifecycle # noqa: E402
|
||||
|
||||
|
||||
def _psql(domain, sql):
|
||||
@ -18,23 +17,7 @@ def _psql(domain, sql):
|
||||
return lifecycle.exec_in_app(domain, ["sh", "-c", cmd], service="db").strip()
|
||||
|
||||
|
||||
def test_backup_captures_state(live_app, meta):
|
||||
domain = live_app
|
||||
|
||||
# 1) establish original state in postgres, then back up (reuse the generic op: backup +
|
||||
# assert a snapshot artifact; pg_backup.sh dumps the DB)
|
||||
_psql(
|
||||
domain,
|
||||
"CREATE TABLE IF NOT EXISTS ci_marker(v text); DELETE FROM ci_marker; "
|
||||
"INSERT INTO ci_marker VALUES('original');",
|
||||
)
|
||||
assert _psql(domain, "SELECT v FROM ci_marker;") == "original"
|
||||
snap = generic.do_backup(domain)
|
||||
assert snap, "backup produced no snapshot artifact"
|
||||
|
||||
# 2) mutate: drop the marker table (diverge from the backup)
|
||||
_psql(domain, "DROP TABLE ci_marker;")
|
||||
assert _psql(domain, "SELECT to_regclass('public.ci_marker');") in (
|
||||
"",
|
||||
"NULL",
|
||||
), "drop did not take"
|
||||
def test_backup_captures_state(live_app):
|
||||
assert (
|
||||
_psql(live_app, "SELECT v FROM ci_marker;") == "original"
|
||||
), "the seeded postgres state was not present at backup time"
|
||||
|
||||
@ -1,17 +1,14 @@
|
||||
"""lasuite-docs — RESTORE overlay (Phase 1d, DG4): data-integrity, extends the generic restore.
|
||||
"""lasuite-docs — RESTORE overlay (Phase 1e HC3): data-integrity, assertion-only + additive.
|
||||
|
||||
Runs after the backup overlay (test_backup.py) on the SAME shared deployment, which left the postgres
|
||||
marker table dropped after dumping it. This restores the snapshot via the shared op helper
|
||||
(`generic.do_restore`, which also asserts the app is healthy + serving afterwards; the recipe's
|
||||
restore.post-hook reloads the dump), then asserts the restored DB matches the pre-mutation "original"
|
||||
— the app-specific data integrity the generic restore cannot check. Reads via `psql` in the `db`
|
||||
service. Assertion-only (no deploy/teardown)."""
|
||||
ops.pre_restore dropped the marker table (diverge); the orchestrator restored once (generic tier
|
||||
asserted healthy/serving; the recipe's restore.post-hook reloads the dump). This overlay ADDS: the
|
||||
restored DB matches the pre-mutation "original". Read via psql in the `db` service."""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner"))
|
||||
from harness import generic, lifecycle # noqa: E402
|
||||
from harness import lifecycle # noqa: E402
|
||||
|
||||
|
||||
def _psql(domain, sql):
|
||||
@ -19,9 +16,7 @@ def _psql(domain, sql):
|
||||
return lifecycle.exec_in_app(domain, ["sh", "-c", cmd], service="db").strip()
|
||||
|
||||
|
||||
def test_restore_returns_state(live_app, meta):
|
||||
domain = live_app
|
||||
generic.do_restore(domain, meta) # restore + assert healthy/serving
|
||||
def test_restore_returns_state(live_app):
|
||||
assert (
|
||||
_psql(domain, "SELECT v FROM ci_marker;") == "original"
|
||||
_psql(live_app, "SELECT v FROM ci_marker;") == "original"
|
||||
), "restore did not return the pre-mutation postgres state"
|
||||
|
||||
@ -1,16 +1,14 @@
|
||||
"""lasuite-docs — UPGRADE overlay (Phase 1d, DG4): data-continuity, extends the generic upgrade.
|
||||
"""lasuite-docs — UPGRADE overlay (Phase 1e HC3): data-continuity, assertion-only + additive.
|
||||
|
||||
The orchestrator deployed the previous published version ONCE; this overlay writes a marker row into
|
||||
postgres (a dedicated `ci_marker` table the app's own Django migrations don't touch, read back via
|
||||
`psql` in the `db` service), performs the in-place upgrade via the shared op helper
|
||||
(`generic.do_upgrade`, which also asserts reconverge + serving + that the deployment moved), then
|
||||
asserts the postgres data SURVIVED. Assertion-only on the shared deployment."""
|
||||
ops.pre_upgrade wrote a postgres marker row before the upgrade; the orchestrator performed the
|
||||
upgrade once (generic tier asserted reconverge/serving/moved). This overlay ADDS: the postgres data
|
||||
survived. Read via psql in the `db` service."""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner"))
|
||||
from harness import generic, lifecycle # noqa: E402
|
||||
from harness import lifecycle # noqa: E402
|
||||
|
||||
|
||||
def _psql(domain, sql):
|
||||
@ -18,19 +16,7 @@ def _psql(domain, sql):
|
||||
return lifecycle.exec_in_app(domain, ["sh", "-c", cmd], service="db").strip()
|
||||
|
||||
|
||||
def test_upgrade_preserves_data(live_app, meta):
|
||||
domain = live_app
|
||||
_psql(
|
||||
domain,
|
||||
"CREATE TABLE IF NOT EXISTS ci_marker(v text); DELETE FROM ci_marker; "
|
||||
"INSERT INTO ci_marker VALUES('upgrade-survives');",
|
||||
)
|
||||
assert _psql(domain, "SELECT v FROM ci_marker;") == "upgrade-survives"
|
||||
|
||||
# in-place upgrade previous -> target (reuses the generic op: upgrade + assert reconverge/serving)
|
||||
generic.do_upgrade(domain, os.environ.get("VERSION") or None, meta)
|
||||
|
||||
assert lifecycle.http_get(domain, "/") in (200, 301, 302)
|
||||
def test_upgrade_preserves_data(live_app):
|
||||
assert (
|
||||
_psql(domain, "SELECT v FROM ci_marker;") == "upgrade-survives"
|
||||
_psql(live_app, "SELECT v FROM ci_marker;") == "upgrade-survives"
|
||||
), "postgres data did not survive the upgrade"
|
||||
|
||||
41
tests/matrix-synapse/ops.py
Normal file
41
tests/matrix-synapse/ops.py
Normal file
@ -0,0 +1,41 @@
|
||||
"""matrix-synapse — pre-op seed hooks (Phase 1e HC3). The orchestrator runs these BEFORE the op; the
|
||||
matching test_<op>.py asserts post-op (assertion-only). The marker is a dedicated `ci_marker` row in
|
||||
postgres (synapse's own schema migrations don't touch it), written via psql in the `db` service. The
|
||||
backup path exercises the recipe's pg_backup.sh DB-dump hook, not a plain volume copy."""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner"))
|
||||
from harness import lifecycle # noqa: E402
|
||||
|
||||
|
||||
def _psql(domain, sql):
|
||||
cmd = f'PGPASSWORD=$(cat /run/secrets/db_password) psql -U synapse -d synapse -tAc "{sql}"'
|
||||
return lifecycle.exec_in_app(domain, ["sh", "-c", cmd], service="db").strip()
|
||||
|
||||
|
||||
def _seed(domain, value):
|
||||
_psql(
|
||||
domain,
|
||||
"CREATE TABLE IF NOT EXISTS ci_marker(v text); DELETE FROM ci_marker; "
|
||||
f"INSERT INTO ci_marker VALUES('{value}');",
|
||||
)
|
||||
assert _psql(domain, "SELECT v FROM ci_marker;") == value
|
||||
|
||||
|
||||
def pre_upgrade(domain, meta):
|
||||
_seed(domain, "upgrade-survives")
|
||||
|
||||
|
||||
def pre_backup(domain, meta):
|
||||
_seed(domain, "original")
|
||||
|
||||
|
||||
def pre_restore(domain, meta):
|
||||
# drop the marker table (diverge from the backup) so a successful restore is observable
|
||||
_psql(domain, "DROP TABLE ci_marker;")
|
||||
assert _psql(domain, "SELECT to_regclass('public.ci_marker');") in (
|
||||
"",
|
||||
"NULL",
|
||||
), "drop did not take"
|
||||
@ -1,16 +1,15 @@
|
||||
"""matrix-synapse — BACKUP overlay (Phase 1d, DG4): seed a postgres marker, back it up (the recipe's
|
||||
pg_backup.sh pre-hook dumps the DB to backup.sql; assert a snapshot artifact), then mutate (drop the
|
||||
marker) so the RESTORE overlay (test_restore.py) can prove the backed-up state returns. Runs on the
|
||||
shared deployment; the mutated state persists for the restore tier.
|
||||
"""matrix-synapse — BACKUP overlay (Phase 1e HC3): assertion-only + additive.
|
||||
|
||||
This exercises the real DB-dump backup hook (backupbot.backup.pre-hook / restore.post-hook), not a
|
||||
plain volume copy — the meaningful data path for a postgres-backed app."""
|
||||
ops.pre_backup wrote "original" into postgres before the backup op (pg_backup.sh dumps the DB); the
|
||||
orchestrator performed the backup once (generic tier asserted a snapshot artifact). This overlay
|
||||
ADDS: the seeded row is intact at backup time. The backup→restore divergence (dropping the table) is
|
||||
in ops.pre_restore."""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner"))
|
||||
from harness import generic, lifecycle # noqa: E402
|
||||
from harness import lifecycle # noqa: E402
|
||||
|
||||
|
||||
def _psql(domain, sql):
|
||||
@ -18,23 +17,7 @@ def _psql(domain, sql):
|
||||
return lifecycle.exec_in_app(domain, ["sh", "-c", cmd], service="db").strip()
|
||||
|
||||
|
||||
def test_backup_captures_state(live_app, meta):
|
||||
domain = live_app
|
||||
|
||||
# 1) establish original state in postgres, then back up (reuse the generic op: backup +
|
||||
# assert a snapshot artifact; pg_backup.sh dumps the DB)
|
||||
_psql(
|
||||
domain,
|
||||
"CREATE TABLE IF NOT EXISTS ci_marker(v text); DELETE FROM ci_marker; "
|
||||
"INSERT INTO ci_marker VALUES('original');",
|
||||
)
|
||||
assert _psql(domain, "SELECT v FROM ci_marker;") == "original"
|
||||
snap = generic.do_backup(domain)
|
||||
assert snap, "backup produced no snapshot artifact"
|
||||
|
||||
# 2) mutate: drop the marker table (diverge from the backup)
|
||||
_psql(domain, "DROP TABLE ci_marker;")
|
||||
assert _psql(domain, "SELECT to_regclass('public.ci_marker');") in (
|
||||
"",
|
||||
"NULL",
|
||||
), "drop did not take"
|
||||
def test_backup_captures_state(live_app):
|
||||
assert (
|
||||
_psql(live_app, "SELECT v FROM ci_marker;") == "original"
|
||||
), "the seeded postgres state was not present at backup time"
|
||||
|
||||
@ -1,17 +1,14 @@
|
||||
"""matrix-synapse — RESTORE overlay (Phase 1d, DG4): data-integrity, extends the generic restore.
|
||||
"""matrix-synapse — RESTORE overlay (Phase 1e HC3): data-integrity, assertion-only + additive.
|
||||
|
||||
Runs after the backup overlay (test_backup.py) on the SAME shared deployment, which left the postgres
|
||||
marker table dropped after dumping it. This restores the snapshot via the shared op helper
|
||||
(`generic.do_restore`, which also asserts the app is healthy + serving afterwards; the recipe's
|
||||
restore.post-hook reloads the dump), then asserts the restored DB matches the pre-mutation "original"
|
||||
— the app-specific data integrity the generic restore cannot check. Reads via `psql` in the `db`
|
||||
service. Assertion-only (no deploy/teardown)."""
|
||||
ops.pre_restore dropped the marker table (diverge); the orchestrator restored once (generic tier
|
||||
asserted healthy/serving; the recipe's restore.post-hook reloads the dump). This overlay ADDS: the
|
||||
restored DB matches the pre-mutation "original". Read via psql in the `db` service."""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner"))
|
||||
from harness import generic, lifecycle # noqa: E402
|
||||
from harness import lifecycle # noqa: E402
|
||||
|
||||
|
||||
def _psql(domain, sql):
|
||||
@ -19,9 +16,7 @@ def _psql(domain, sql):
|
||||
return lifecycle.exec_in_app(domain, ["sh", "-c", cmd], service="db").strip()
|
||||
|
||||
|
||||
def test_restore_returns_state(live_app, meta):
|
||||
domain = live_app
|
||||
generic.do_restore(domain, meta) # restore + assert healthy/serving
|
||||
def test_restore_returns_state(live_app):
|
||||
assert (
|
||||
_psql(domain, "SELECT v FROM ci_marker;") == "original"
|
||||
_psql(live_app, "SELECT v FROM ci_marker;") == "original"
|
||||
), "restore did not return the pre-mutation postgres state"
|
||||
|
||||
@ -1,16 +1,14 @@
|
||||
"""matrix-synapse — UPGRADE overlay (Phase 1d, DG4): data-continuity, extends the generic upgrade.
|
||||
"""matrix-synapse — UPGRADE overlay (Phase 1e HC3): data-continuity, assertion-only + additive.
|
||||
|
||||
The orchestrator deployed the previous published version ONCE; this overlay writes a marker row into
|
||||
postgres (a dedicated `ci_marker` table synapse's own schema migrations don't touch, read back via
|
||||
`psql` in the `db` service), performs the in-place upgrade via the shared op helper
|
||||
(`generic.do_upgrade`, which also asserts reconverge + serving + that the deployment moved), then
|
||||
asserts the postgres data SURVIVED. Assertion-only on the shared deployment."""
|
||||
ops.pre_upgrade wrote a postgres marker row before the upgrade; the orchestrator performed the
|
||||
upgrade once (generic tier asserted reconverge/serving/moved). This overlay ADDS: the postgres data
|
||||
survived. Read via psql in the `db` service."""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner"))
|
||||
from harness import generic, lifecycle # noqa: E402
|
||||
from harness import lifecycle # noqa: E402
|
||||
|
||||
|
||||
def _psql(domain, sql):
|
||||
@ -18,21 +16,7 @@ def _psql(domain, sql):
|
||||
return lifecycle.exec_in_app(domain, ["sh", "-c", cmd], service="db").strip()
|
||||
|
||||
|
||||
def test_upgrade_preserves_data(live_app, meta):
|
||||
domain = live_app
|
||||
# write a marker row into postgres (independent of synapse's own tables)
|
||||
_psql(
|
||||
domain,
|
||||
"CREATE TABLE IF NOT EXISTS ci_marker(v text); DELETE FROM ci_marker; "
|
||||
"INSERT INTO ci_marker VALUES('upgrade-survives');",
|
||||
)
|
||||
assert _psql(domain, "SELECT v FROM ci_marker;") == "upgrade-survives"
|
||||
|
||||
# in-place upgrade previous -> target (reuses the generic op: upgrade + assert reconverge/serving)
|
||||
generic.do_upgrade(domain, os.environ.get("VERSION") or None, meta)
|
||||
|
||||
# app healthy and the data written before the upgrade is still there
|
||||
assert lifecycle.http_get(domain, meta["HEALTH_PATH"]) == 200
|
||||
def test_upgrade_preserves_data(live_app):
|
||||
assert (
|
||||
_psql(domain, "SELECT v FROM ci_marker;") == "upgrade-survives"
|
||||
_psql(live_app, "SELECT v FROM ci_marker;") == "upgrade-survives"
|
||||
), "postgres data did not survive the upgrade"
|
||||
|
||||
27
tests/n8n/ops.py
Normal file
27
tests/n8n/ops.py
Normal file
@ -0,0 +1,27 @@
|
||||
"""n8n — pre-op seed hooks (Phase 1e HC3). The orchestrator runs these BEFORE the op; the matching
|
||||
test_<op>.py asserts post-op (assertion-only). n8n state lives in the persistent /home/node/.n8n
|
||||
volume (sqlite + config); the marker there is read back via exec_in_app (not HTTP-served)."""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner"))
|
||||
from harness import lifecycle # noqa: E402
|
||||
|
||||
MARKER = "/home/node/.n8n/ci-marker.txt"
|
||||
|
||||
|
||||
def _write(domain, val):
|
||||
lifecycle.exec_in_app(domain, ["sh", "-c", f"echo {val} > {MARKER}"])
|
||||
|
||||
|
||||
def pre_upgrade(domain, meta):
|
||||
_write(domain, "upgrade-survives")
|
||||
|
||||
|
||||
def pre_backup(domain, meta):
|
||||
_write(domain, "original")
|
||||
|
||||
|
||||
def pre_restore(domain, meta):
|
||||
_write(domain, "mutated") # diverge so a successful restore is observable
|
||||
@ -1,30 +1,19 @@
|
||||
"""n8n — BACKUP overlay (Phase 1d, DG4): seed a known state into the backed-up /home/node/.n8n path,
|
||||
back it up (assert a snapshot artifact), then mutate so the RESTORE overlay (test_restore.py) can
|
||||
prove the backed-up state returns. Runs on the shared deployment; the mutated marker persists for the
|
||||
restore tier.
|
||||
"""n8n — BACKUP overlay (Phase 1e HC3): assertion-only + additive.
|
||||
|
||||
The n8n `app` service is labelled `backupbot.backup=true` with `backupbot.backup.path=/home/node/.n8n`,
|
||||
so a marker file there is backed up; checked via `exec_in_app`."""
|
||||
ops.pre_backup seeded "original" into the backed-up /home/node/.n8n path; the orchestrator performed
|
||||
the backup once (generic tier asserted a snapshot artifact). This overlay ADDS: the seeded state is
|
||||
intact at backup time. The backup→restore divergence is in ops.pre_restore."""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner"))
|
||||
from harness import generic, lifecycle # noqa: E402
|
||||
from harness import lifecycle # noqa: E402
|
||||
|
||||
MARKER = "/home/node/.n8n/ci-marker.txt"
|
||||
|
||||
|
||||
def test_backup_captures_state(live_app, meta):
|
||||
domain = live_app
|
||||
|
||||
# 1) establish original state in the backed-up path, then back it up (reuse the generic op:
|
||||
# backup + assert a snapshot artifact was produced)
|
||||
lifecycle.exec_in_app(domain, ["sh", "-c", f"echo original > {MARKER}"])
|
||||
assert lifecycle.exec_in_app(domain, ["cat", MARKER]).strip() == "original"
|
||||
snap = generic.do_backup(domain)
|
||||
assert snap, "backup produced no snapshot artifact"
|
||||
|
||||
# 2) mutate state (diverge from the backup)
|
||||
lifecycle.exec_in_app(domain, ["sh", "-c", f"echo mutated > {MARKER}"])
|
||||
assert lifecycle.exec_in_app(domain, ["cat", MARKER]).strip() == "mutated"
|
||||
def test_backup_captures_state(live_app):
|
||||
assert (
|
||||
lifecycle.exec_in_app(live_app, ["cat", MARKER]).strip() == "original"
|
||||
), "the seeded state was not present at backup time"
|
||||
|
||||
@ -1,24 +1,19 @@
|
||||
"""n8n — RESTORE overlay (Phase 1d, DG4): data-integrity, extends the generic restore.
|
||||
"""n8n — RESTORE overlay (Phase 1e HC3): data-integrity, assertion-only + additive.
|
||||
|
||||
Runs after the backup overlay (test_backup.py) on the SAME shared deployment, which left the
|
||||
/home/node/.n8n marker mutated to "mutated" after backing up "original". This restores the snapshot
|
||||
via the shared op helper (`generic.do_restore`, which also asserts the app is healthy + serving
|
||||
afterwards), then asserts the data returned to the pre-mutation "original" — the app-specific data
|
||||
integrity the generic restore cannot check. Reads via `exec_in_app`. Assertion-only (no
|
||||
deploy/teardown)."""
|
||||
ops.pre_restore mutated the /home/node/.n8n marker to "mutated"; the orchestrator restored once
|
||||
(generic tier asserted healthy/serving). This overlay ADDS: the data returned to the pre-mutation
|
||||
(backed-up) "original". Read via exec_in_app."""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner"))
|
||||
from harness import generic, lifecycle # noqa: E402
|
||||
from harness import lifecycle # noqa: E402
|
||||
|
||||
MARKER = "/home/node/.n8n/ci-marker.txt"
|
||||
|
||||
|
||||
def test_restore_returns_state(live_app, meta):
|
||||
domain = live_app
|
||||
generic.do_restore(domain, meta) # restore + assert healthy/serving
|
||||
def test_restore_returns_state(live_app):
|
||||
assert (
|
||||
lifecycle.exec_in_app(domain, ["cat", MARKER]).strip() == "original"
|
||||
lifecycle.exec_in_app(live_app, ["cat", MARKER]).strip() == "original"
|
||||
), "restore did not return the pre-mutation state"
|
||||
|
||||
@ -1,29 +1,19 @@
|
||||
"""n8n — UPGRADE overlay (Phase 1d, DG4): data-continuity, extends the generic upgrade.
|
||||
"""n8n — UPGRADE overlay (Phase 1e HC3): data-continuity, assertion-only + additive.
|
||||
|
||||
The orchestrator deployed the previous published version ONCE; this overlay writes a marker file into
|
||||
the persistent /home/node/.n8n volume (n8n state = sqlite + config; the marker is read back via
|
||||
`exec_in_app`, not HTTP-served), performs the in-place upgrade via the shared op helper
|
||||
(`generic.do_upgrade`, which also asserts reconverge + serving + that the deployment moved), then
|
||||
asserts the data SURVIVED. Assertion-only on the shared deployment."""
|
||||
ops.pre_upgrade seeded a marker into /home/node/.n8n; the orchestrator performed the upgrade once
|
||||
(generic tier asserted reconverge/serving/moved). This overlay ADDS: the data survived. Read via
|
||||
exec_in_app (n8n state isn't HTTP-served)."""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner"))
|
||||
from harness import generic, lifecycle # noqa: E402
|
||||
from harness import lifecycle # noqa: E402
|
||||
|
||||
MARKER = "/home/node/.n8n/ci-marker.txt"
|
||||
|
||||
|
||||
def test_upgrade_preserves_data(live_app, meta):
|
||||
domain = live_app
|
||||
lifecycle.exec_in_app(domain, ["sh", "-c", f"echo upgrade-survives > {MARKER}"])
|
||||
assert lifecycle.exec_in_app(domain, ["cat", MARKER]).strip() == "upgrade-survives"
|
||||
|
||||
# in-place upgrade previous -> target (reuses the generic op: upgrade + assert reconverge/serving)
|
||||
generic.do_upgrade(domain, os.environ.get("VERSION") or None, meta)
|
||||
|
||||
assert lifecycle.http_get(domain, meta["HEALTH_PATH"]) == 200
|
||||
def test_upgrade_preserves_data(live_app):
|
||||
assert (
|
||||
lifecycle.exec_in_app(domain, ["cat", MARKER]).strip() == "upgrade-survives"
|
||||
lifecycle.exec_in_app(live_app, ["cat", MARKER]).strip() == "upgrade-survives"
|
||||
), "data did not survive the upgrade"
|
||||
|
||||
@ -26,6 +26,7 @@ def teardown_function():
|
||||
|
||||
# ---- HC3: generic is the floor; overlay resolution is separate + additive --------------------
|
||||
|
||||
|
||||
def test_no_overlay_means_generic_floor():
|
||||
# hedgedoc ships no tests/hedgedoc/ overlay and no repo-local -> no overlay; generic floor exists
|
||||
assert discovery.resolve_overlay_op("hedgedoc", "install", None) is None
|
||||
@ -40,11 +41,15 @@ def test_cc_ci_overlay_found_for_each_op():
|
||||
# custom-html ships cc-ci overlays for all four ops -> resolve_overlay_op returns the cc-ci file
|
||||
for op in discovery.LIFECYCLE_OPS:
|
||||
res = discovery.resolve_overlay_op("custom-html", op, None)
|
||||
assert res == ("cc-ci", os.path.join(discovery.cc_ci_dir("custom-html"), f"test_{op}.py")), op
|
||||
assert res == (
|
||||
"cc-ci",
|
||||
os.path.join(discovery.cc_ci_dir("custom-html"), f"test_{op}.py"),
|
||||
), op
|
||||
|
||||
|
||||
# ---- HC2: repo-local approval gate (default-deny) --------------------------------------------
|
||||
|
||||
|
||||
def test_repo_local_ignored_when_not_approved(tmp_path):
|
||||
# default-deny: a repo-local overlay is NOT consulted for an unapproved recipe -> cc-ci wins
|
||||
_approve(tmp_path) # empty allowlist
|
||||
@ -97,18 +102,20 @@ def test_install_steps_repo_local_gated(tmp_path):
|
||||
|
||||
|
||||
def test_pre_op_hook_repo_local_gated(tmp_path):
|
||||
# hedgedoc has no cc-ci ops.py, so this isolates the repo-local gate (custom-html now ships a
|
||||
# real cc-ci tests/custom-html/ops.py, which would mask the gate).
|
||||
rl = tmp_path / "repo"
|
||||
rl.mkdir()
|
||||
(rl / "ops.py").write_text("def pre_upgrade(domain, meta):\n pass\n")
|
||||
|
||||
_approve(tmp_path) # not approved -> repo-local ops.py ignored
|
||||
assert discovery.pre_op_hook("custom-html", "upgrade", str(rl)) is None
|
||||
_approve(tmp_path) # not approved -> repo-local ops.py ignored (no cc-ci ops.py either)
|
||||
assert discovery.pre_op_hook("hedgedoc", "upgrade", str(rl)) is None
|
||||
|
||||
_approve(tmp_path, "custom-html") # approved -> repo-local pre-op hook honored
|
||||
hook = discovery.pre_op_hook("custom-html", "upgrade", str(rl))
|
||||
_approve(tmp_path, "hedgedoc") # approved -> repo-local pre-op hook honored
|
||||
hook = discovery.pre_op_hook("hedgedoc", "upgrade", str(rl))
|
||||
assert hook == ("repo-local", str(rl / "ops.py"))
|
||||
# an ops.py that does NOT define pre_<op> is not a hook for that op
|
||||
assert discovery.pre_op_hook("custom-html", "backup", str(rl)) is None
|
||||
assert discovery.pre_op_hook("hedgedoc", "backup", str(rl)) is None
|
||||
|
||||
|
||||
def test_default_allowlist_is_empty():
|
||||
|
||||
Reference in New Issue
Block a user