feat(1e): HC3 additive generic + op/assertion split (orchestrator owns the op)

- orchestrator: per mutating tier, run optional pre-op seed hook (ops.py pre_<op>) → perform the op
  ONCE (harness-owned) → run generic assertion (unless opted out) AND overlay assertion, both against
  the shared post-op deployment. Op results passed op→assertion via run-scoped CCCI_OP_STATE_FILE.
- opt-out: CCCI_SKIP_GENERIC / CCCI_SKIP_GENERIC_<OP> / recipe_meta.SKIP_GENERIC (declarative).
- generic.py: split do_* into op primitives (perform_upgrade/backup/restore) + assertions
  (assert_upgraded/backup_artifact/restore_healthy) reading op_state(); deployed_identity now returns
  {version,image,chaos} (chaos label ready for HC1).
- generic test_<op>.py + all 6 recipe overlays migrated to assertion-only; pre-op seeding moved to
  per-recipe ops.py (pre_upgrade/pre_backup/pre_restore). install overlays unchanged (no op).
- deploy-count stays 1 (op primitives never call deploy_app). lint PASS; 8 unit tests PASS on cc-ci.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-28 03:12:04 +01:00
parent 6a59343996
commit b7e6cbd7be
31 changed files with 623 additions and 412 deletions

View File

@ -118,22 +118,49 @@ def assert_serving(domain: str, meta: dict) -> None:
assert "commoninternet.net" in detail.lower(), f"{domain}: served cert unexpected — {detail}"
def do_upgrade(domain: str, target: str | None, meta: dict) -> None:
"""UPGRADE op (in place on the shared deployment): abra app upgrade -> target, then assert it
reconverges + still serves AND that the deployment actually MOVED (version label and/or image
changed). The move assertion guards against a vacuous no-op upgrade silently passing — the exact
F1d-2 failure where a mis-pinned base deployed LATEST so 'upgrade to latest' changed nothing."""
before = lifecycle.deployed_identity(domain)
lifecycle.upgrade_app(domain, version=target)
# ---- Op/assertion split (Phase 1e HC3) -------------------------------------------------------
# The orchestrator performs each mutating op ONCE (the harness owns the op), records what an
# assertion needs (pre-upgrade identity, backup snapshot_id) into a run-scoped JSON state file at
# $CCCI_OP_STATE_FILE, then runs the generic assertion file (unless opted out) AND the overlay
# assertion file against the shared post-op state. The assertion functions below read that state via
# `op_state()`. They NEVER perform the op — that keeps the op single + lets generic+overlay coexist.
import json as _json # noqa: E402
def op_state() -> dict:
"""The run-scoped op state the orchestrator wrote between op and assertions (or {} if unset).
Carries e.g. {"upgrade": {"before": {...}}, "backup": {"snapshot_id": "..."}}."""
path = os.environ.get("CCCI_OP_STATE_FILE")
if not path or not os.path.exists(path):
return {}
try:
with open(path) as f:
return _json.load(f)
except (OSError, ValueError):
return {}
def assert_upgraded(domain: str, meta: dict) -> None:
"""Generic UPGRADE assertion (post-op): the orchestrator already performed the upgrade once.
Assert it reconverged + still serves AND that the deployment actually MOVED — guarding against a
vacuous no-op upgrade silently passing (F1d-2). HC1: prev→PR-head may NOT bump the version label,
so a MOVE is ANY of: version-label change, image change, or a chaos label now present (a chaos
deploy stamps the PR-head commit — THE proof the code under test was deployed)."""
before = op_state().get("upgrade", {}).get("before") or {}
assert_serving(domain, meta)
after = lifecycle.deployed_identity(domain)
moved = (before[0] and after[0] and before[0] != after[0]) or (
before[1] and after[1] and before[1] != after[1]
moved = (
(before.get("version") and after.get("version") and before["version"] != after["version"])
or (before.get("image") and after.get("image") and before["image"] != after["image"])
or (after.get("chaos") and after.get("chaos") != before.get("chaos"))
)
assert moved, (
f"{domain}: upgrade did not move the deployment "
f"(version {before[0]}->{after[0]}, image {before[1]}->{after[1]}) — "
"not a real previous->target upgrade (DG2 must be non-vacuous)"
f"(version {before.get('version')}->{after.get('version')}, "
f"image {before.get('image')}->{after.get('image')}, "
f"chaos {before.get('chaos')}->{after.get('chaos')}) — "
"not a real upgrade to the code under test (HC1/DG2 must be non-vacuous)"
)
@ -148,10 +175,10 @@ def parse_snapshot_id(backup_output: str) -> str | None:
return m.group(1) if m else None
def do_backup(domain: str) -> str:
"""BACKUP op: create a backup, then assert a snapshot artifact was produced (returns its id)."""
out = lifecycle.backup_app(domain)
snap_id = parse_snapshot_id(out)
def assert_backup_artifact(domain: str) -> str:
"""Generic BACKUP assertion (post-op): the orchestrator already ran the backup once. Assert a
snapshot artifact was produced (its id recorded in op state). Returns the id."""
snap_id = op_state().get("backup", {}).get("snapshot_id")
assert snap_id, (
f"{domain}: backup produced no snapshot artifact "
"(no snapshot_id in `abra app backup create` output)"
@ -159,8 +186,29 @@ def do_backup(domain: str) -> str:
return snap_id
def do_restore(domain: str, meta: dict) -> None:
"""RESTORE op: restore the latest snapshot, then assert the app is healthy + serving again
(assert_serving polls, so the post-restore reconverge settles)."""
lifecycle.restore_app(domain)
def assert_restore_healthy(domain: str, meta: dict) -> None:
"""Generic RESTORE assertion (post-op): the orchestrator already restored. Assert the app is
healthy + serving again (assert_serving polls, so the post-restore reconverge settles)."""
assert_serving(domain, meta)
# ---- Op primitives (orchestrator-only; perform the op once, never assert) --------------------
def perform_upgrade(domain: str, target: str | None) -> dict[str, str | None]:
"""Perform the UPGRADE op once (in place). E1 baseline: `abra app upgrade` -> target. (HC1/E2
redefines this as a chaos redeploy of the PR-head checkout.) Returns the pre-upgrade identity so
the orchestrator can record it for `assert_upgraded`'s move check."""
before = lifecycle.deployed_identity(domain)
lifecycle.upgrade_app(domain, version=target)
return before
def perform_backup(domain: str) -> str | None:
"""Perform the BACKUP op once. Returns the produced snapshot_id (or None) for the assertion."""
return parse_snapshot_id(lifecycle.backup_app(domain))
def perform_restore(domain: str) -> None:
"""Perform the RESTORE op once (restore the latest snapshot)."""
lifecycle.restore_app(domain)

View File

@ -245,11 +245,18 @@ def wait_healthy(
raise TimeoutError(f"{domain}: not healthy over HTTPS {path} (last status {last})")
def deployed_identity(domain: str, service: str = "app") -> tuple[str | None, str | None]:
"""(coop-cloud version label, image) of the running app service. Used to prove an upgrade
actually MOVED the deployment prev→target (not a vacuous no-op — Adversary F1d-2). The version
label (`coop-cloud.<stack>.version`) is bumped per published recipe version; the image usually
bumps too. Either changing proves the upgrade did something."""
def deployed_identity(domain: str, service: str = "app") -> dict[str, str | None]:
"""Identity of the running app service: {"version", "image", "chaos"}. Used to prove an upgrade
actually MOVED the deployment (not a vacuous no-op — Adversary F1d-2), AND (Phase 1e HC1) that an
`abra app deploy --chaos` upgrade actually deployed the PR-head code under test.
- `version` = the `coop-cloud.<stack>.version` label (bumped per published recipe version).
- `image` = the running container image (usually bumps with a published version).
- `chaos` = the chaos label value (a chaos deploy stamps the recipe git commit/dirty state here)
— present after `abra app deploy --chaos`, absent on a clean pinned-tag deploy. For prev→PR-head
this is THE proof PR-head was deployed even when the version label is unbumped (HC1). The exact
chaos label key varies by abra version, so we capture any `coop-cloud.<stack>.*` label whose key
contains "chaos"."""
name = f"{_stack_name(domain)}_{service}"
proc = subprocess.run(
[
@ -265,15 +272,18 @@ def deployed_identity(domain: str, service: str = "app") -> tuple[str | None, st
)
out = proc.stdout.strip()
if "|" not in out:
return (None, None)
return {"version": None, "image": None, "chaos": None}
labels_json, _, image = out.partition("|")
ver = None
ver = chaos = None
with contextlib.suppress(ValueError, json.JSONDecodeError):
for k, v in json.loads(labels_json).items():
if k.startswith("coop-cloud.") and k.endswith(".version"):
if not k.startswith("coop-cloud."):
continue
if k.endswith(".version"):
ver = v
break
return (ver, image.strip() or None)
elif "chaos" in k:
chaos = v
return {"version": ver, "image": image.strip() or None, "chaos": chaos}
def upgrade_app(domain: str, version: str | None = None) -> None:

View File

@ -1,13 +1,18 @@
#!/usr/bin/env python3
"""Top-level CI orchestrator (plan §4.3 + Phase 1d), invoked by the Drone pipeline (or by hand).
"""Top-level CI orchestrator (plan §4.3 + Phase 1d/1e), invoked by the Drone pipeline (or by hand).
Phase 1d model: deploy the app ONCE, then run lifecycle TIERS against that single shared deployment
(install asserts; upgrade does `abra app upgrade` in place; backup/restore mutate in place; custom
asserts), then ONE teardown in `finally`. Each tier's assertions come from exactly one file — a
recipe overlay if present, else the generic default — discovered by `harness.discovery`
(precedence repo-local > cc-ci > generic). The generic is the default for every op, so ANY recipe is
testable with zero config (DG1DG4). The lifecycle OPS live in the shared harness (harness.generic),
not per-recipe (DG7 DRY).
Model: deploy the app ONCE, then run lifecycle TIERS against that single shared deployment, then ONE
teardown in `finally`. Per Phase 1e the orchestrator OWNS each mutating op (HC3): for a tier it runs
the optional pre-op seed hook (recipe ops.py `pre_<op>`), performs the op exactly ONCE
(upgrade/backup/restore — install has none), then runs BOTH the generic assertion file (the floor,
unless explicitly opted out) AND the recipe overlay assertion file (if any) against the shared
post-op state — generic and overlay are ADDITIVE, not override (HC3). Op results an assertion needs
(pre-upgrade identity, snapshot_id) pass op→assertion via a run-scoped JSON state file
($CCCI_OP_STATE_FILE). The upgrade op deploys the PR-HEAD code under test via `abra app deploy
--chaos` (HC1). Repo-local (PR-authored) overlays/hooks run only for allowlist-approved recipes (HC2,
gated in harness.discovery). The generic is the default for every op, so ANY recipe is testable with
zero config (DG1DG4). The lifecycle OPS live in the shared harness (harness.generic), not per-recipe
(DG7 DRY).
Run parameters from env (set by the comment-bridge via Drone build params):
RECIPE recipe name (e.g. custom-html) [required]
@ -23,7 +28,10 @@ invoke as: cc-ci-run runner/run_recipe_ci.py
from __future__ import annotations
import contextlib
import glob
import importlib.util
import json
import os
import shutil
import subprocess
@ -37,6 +45,10 @@ from harness import discovery, generic, lifecycle, naming # noqa: E402
ALL_STAGES = ("install", "upgrade", "backup", "restore", "custom")
def _truthy(v: str | None) -> bool:
return str(v or "").strip().lower() in ("1", "true", "yes", "on")
def _redact_values() -> list[str]:
"""Values to scrub from published logs (D6 redaction filter, plan §4.4). The infra secrets
materialised at /run/secrets/* — if any subprocess ever echoes one, mask it. Only >=8-char
@ -56,6 +68,14 @@ def _redact_values() -> list[str]:
_REDACT = _redact_values()
def _scrub(text: str) -> str:
"""Mask any known infra-secret value in a string (D6 redaction, plan §4.4)."""
for v in _REDACT:
if v in text:
text = text.replace(v, "***REDACTED***")
return text
def run_redacted(cmd: list[str], env: dict | None = None) -> int:
"""Run a subprocess, streaming output live (so Drone logs stay tail-able) but masking any known
infra-secret value first. Belt-and-suspenders: the harness never prints secrets and abra doesn't
@ -71,10 +91,7 @@ def run_redacted(cmd: list[str], env: dict | None = None) -> int:
)
assert proc.stdout is not None
for line in proc.stdout:
for v in _REDACT:
if v in line:
line = line.replace(v, "***REDACTED***")
sys.stdout.write(line)
sys.stdout.write(_scrub(line))
sys.stdout.flush()
return proc.wait()
@ -149,7 +166,7 @@ def _load_meta(recipe: str) -> dict:
ns: dict = {}
with open(path) as fh:
exec(compile(fh.read(), path, "exec"), ns) # noqa: S102 (trusted, in-repo)
for k in list(meta) + ["BACKUP_CAPABLE"]:
for k in list(meta) + ["BACKUP_CAPABLE", "SKIP_GENERIC"]:
if k in ns:
meta[k] = ns[k]
return meta
@ -159,15 +176,105 @@ def _tier_env(domain: str) -> dict:
return dict(os.environ, CCCI_APP_DOMAIN=domain, CCCI_BASE_URL=f"https://{domain}")
def run_op_tier(recipe: str, op: str, repo_local: str | None, domain: str) -> str:
"""Run the single assertion file for a lifecycle op (overlay or generic) against the shared
deployment. The file performs the op (upgrade/backup/restore) + asserts; install asserts only
(already deployed). Returns 'pass' | 'fail'."""
source, path = discovery.resolve_op(recipe, op, repo_local)
rel = os.path.relpath(path, ROOT)
print(f"\n===== TIER: {op} ({source}: {rel}) =====", flush=True)
rc = run_redacted([sys.executable, "-m", "pytest", "-v", "-rA", path], env=_tier_env(domain))
return "pass" if rc == 0 else "fail"
def _skip_generic(op: str, meta: dict) -> bool:
"""Whether the generic assertion for `op` is opted out (Phase 1e HC3). Default: run (additive).
Opt-out, any of: env CCCI_SKIP_GENERIC (all ops), env CCCI_SKIP_GENERIC_<OP>, or the recipe's
declarative recipe_meta.SKIP_GENERIC list (op name, or "all"/"*")."""
if _truthy(os.environ.get("CCCI_SKIP_GENERIC")):
return True
if _truthy(os.environ.get(f"CCCI_SKIP_GENERIC_{op.upper()}")):
return True
sg = [str(s).lower() for s in (meta.get("SKIP_GENERIC") or [])]
return "all" in sg or "*" in sg or op in sg
def _run_pre_hook(recipe: str, op: str, repo_local: str | None, domain: str, meta: dict) -> None:
"""Run the optional pre-op seed hook (recipe ops.py `pre_<op>`) BEFORE the harness performs the
op (HC3 op/assertion split): overlays seed data-continuity markers / the backup→restore mutation
here, then assert post-op in test_<op>.py. cc-ci's ops.py is trusted; a repo-local ops.py is
consulted only for allowlist-approved recipes (HC2 gate is inside discovery.pre_op_hook). Imported
in-process; the recipe dir is put on sys.path so an ops.py can import its sibling helpers."""
hook = discovery.pre_op_hook(recipe, op, repo_local)
if not hook:
return
source, path = hook
d = os.path.dirname(path)
sys.path.insert(0, d)
try:
spec = importlib.util.spec_from_file_location(f"ccci_ops_{recipe}_{op}", path)
mod = importlib.util.module_from_spec(spec)
spec.loader.exec_module(mod)
print(f" pre-op seed ({source}): {os.path.relpath(path, ROOT)}::pre_{op}", flush=True)
getattr(mod, f"pre_{op}")(domain, meta)
finally:
if d in sys.path:
sys.path.remove(d)
def _perform_op(op: str, domain: str, target: str | None, op_state: dict) -> None:
"""Perform the single mutating op ONCE (the harness owns the op, HC3). install has no op. Records
what the assertions need (pre-upgrade identity, backup snapshot_id) into op_state. None of these
call deploy_app, so the deploy-count guard (DG4.1) stays 1 — the in-place upgrade is not a new
install (HC1 reconciliation)."""
if op == "upgrade":
op_state["upgrade"] = {"before": generic.perform_upgrade(domain, target)}
elif op == "backup":
op_state["backup"] = {"snapshot_id": generic.perform_backup(domain)}
elif op == "restore":
generic.perform_restore(domain)
# install: already deployed; no op
def run_lifecycle_tier(
recipe: str,
op: str,
repo_local: str | None,
domain: str,
meta: dict,
target: str | None,
op_state: dict,
) -> str:
"""Additive lifecycle tier (HC3): seed (pre-op hook) → perform the op ONCE → run the generic
assertion file (unless opted out) AND the overlay assertion file, both against the shared post-op
deployment. Returns 'pass' | 'fail' | 'skip'."""
overlay = discovery.resolve_overlay_op(recipe, op, repo_local)
skip_gen = _skip_generic(op, meta)
files: list[tuple[str, str]] = []
if not skip_gen:
files.append(discovery.generic_op(op))
if overlay:
files.append(overlay)
if not files:
# generic opted out AND no overlay → nothing would assert; don't perform a pointless mutating op
print(f"\n===== TIER: {op} — SKIP (generic opted out, no overlay) =====", flush=True)
return "skip"
ov = f"{overlay[0]}:{os.path.relpath(overlay[1], ROOT)}" if overlay else "none"
print(
f"\n===== TIER: {op} (generic={'skip' if skip_gen else 'run'}, overlay={ov}) =====",
flush=True,
)
# 1) pre-op seed hook + 2) the op ONCE (harness-owned). A failure here is an op failure → tier fail.
try:
_run_pre_hook(recipe, op, repo_local, domain, meta)
_perform_op(op, domain, target, op_state)
with open(os.environ["CCCI_OP_STATE_FILE"], "w") as f:
json.dump(op_state, f)
except Exception as e: # noqa: BLE001 — a failed op is a reported tier failure, not a crash
print(f"!! {op} op failed: {_scrub(str(e))}", flush=True)
return "fail"
# 3) assertions: generic (unless opted out) + overlay, each its own pytest, all against the
# single post-op deployment. Generic runs first so an overlay may assume readiness.
rc_all = 0
for source, path in files:
print(f" assert ({source}): {os.path.relpath(path, ROOT)}", flush=True)
rc = run_redacted(
[sys.executable, "-m", "pytest", "-v", "-rA", path], env=_tier_env(domain)
)
if rc != 0:
rc_all = rc
return "pass" if rc_all == 0 else "fail"
def run_custom(recipe: str, repo_local: str | None, domain: str) -> str:
@ -223,6 +330,14 @@ def main() -> int:
f.write("0")
os.environ["CCCI_DEPLOY_COUNT_FILE"] = countfile
# Run-scoped op state (HC3): the orchestrator records op results (pre-upgrade identity, backup
# snapshot_id) here for the assertion tiers (generic + overlay) to read via generic.op_state().
statefile = os.path.join(tempfile.gettempdir(), f"ccci-opstate-{domain}.json")
with open(statefile, "w") as f:
json.dump({}, f)
os.environ["CCCI_OP_STATE_FILE"] = statefile
op_state: dict = {}
results: dict[str, str] = {}
lifecycle.janitor()
try:
@ -243,28 +358,38 @@ def main() -> int:
print(f"!! deploy/readiness failed: {e}", flush=True)
deploy_ok = False
# ---- INSTALL tier (always) ----
# ---- INSTALL tier (always; additive generic + overlay, no op) ----
if "install" in stages:
results["install"] = (
run_op_tier(recipe, "install", repo_local, domain) if deploy_ok else "fail"
run_lifecycle_tier(recipe, "install", repo_local, domain, meta, target, op_state)
if deploy_ok
else "fail"
)
if deploy_ok:
# ---- UPGRADE tier ----
# ---- UPGRADE tier (op once → generic + overlay assert) ----
if "upgrade" in stages:
results["upgrade"] = (
run_op_tier(recipe, "upgrade", repo_local, domain)
run_lifecycle_tier(
recipe, "upgrade", repo_local, domain, meta, target, op_state
)
if prev
else "skip" # only one published version → nothing to upgrade from
)
# ---- BACKUP + RESTORE tiers (backup-capable only; else clean N/A) ----
if "backup" in stages:
results["backup"] = (
run_op_tier(recipe, "backup", repo_local, domain) if backup_cap else "skip"
run_lifecycle_tier(recipe, "backup", repo_local, domain, meta, target, op_state)
if backup_cap
else "skip"
)
if "restore" in stages:
results["restore"] = (
run_op_tier(recipe, "restore", repo_local, domain) if backup_cap else "skip"
run_lifecycle_tier(
recipe, "restore", repo_local, domain, meta, target, op_state
)
if backup_cap
else "skip"
)
# ---- CUSTOM tier ----
if "custom" in stages:
@ -281,6 +406,8 @@ def main() -> int:
with open(countfile) as f:
deploy_count = int(f.read().strip() or "0")
os.remove(countfile)
with contextlib.suppress(OSError):
os.remove(statefile)
# ---- per-op summary (DG6 feed) ----
print("\n===== RUN SUMMARY =====", flush=True)

View File

@ -1,9 +1,10 @@
"""Generic BACKUP tier (Phase 1d DG3) — recipe-agnostic, backup-capable recipes only.
"""Generic BACKUP tier (Phase 1d DG3 + Phase 1e HC3) — recipe-agnostic, assertion-only.
Runs `abra app backup create` against the shared live deployment and asserts a snapshot artifact is
produced (abra app backup snapshots is non-empty). Honest limit: the generic verifies the backup
MECHANISM, not app-specific data integrity — that's a recipe overlay (test_backup.py seeds a marker).
For recipes that declare no backup config the orchestrator skips this tier as N/A (not a failure)."""
The orchestrator ran `abra app backup create` ONCE against the shared live deployment and recorded
the produced snapshot id in the run-scoped op state. This tier ASSERTS a snapshot artifact was
produced — it does NOT perform the op. Honest limit: the generic verifies the backup MECHANISM, not
app-specific data integrity — that's a recipe overlay (test_backup.py). Runs by default ALONGSIDE any
overlay (additive). For recipes that declare no backup config the orchestrator skips this tier (N/A)."""
import os
import sys
@ -13,5 +14,4 @@ from harness import generic # noqa: E402
def test_backup_artifact(live_app, meta):
snaps = generic.do_backup(live_app)
assert snaps, "backup produced no snapshot artifact"
assert generic.assert_backup_artifact(live_app), "backup produced no snapshot artifact"

View File

@ -1,8 +1,9 @@
"""Generic RESTORE tier (Phase 1d DG3) — recipe-agnostic, backup-capable recipes only.
"""Generic RESTORE tier (Phase 1d DG3 + Phase 1e HC3) — recipe-agnostic, assertion-only.
Restores the latest snapshot (produced by the backup tier on the same shared deployment) and asserts
the restore completes and the app is healthy + serving afterwards. App-specific data-integrity
(marker survives) is a recipe overlay (test_restore.py); the generic verifies the restore mechanism."""
The orchestrator restored the latest snapshot ONCE (produced by the backup op on the same shared
deployment). This tier ASSERTS the restore completed and the app is healthy + serving afterwards — it
does NOT perform the op. App-specific data-integrity (marker survives) is a recipe overlay
(test_restore.py); the generic verifies the restore mechanism. Runs by default ALONGSIDE any overlay."""
import os
import sys
@ -12,4 +13,4 @@ from harness import generic # noqa: E402
def test_restore_healthy(live_app, meta):
generic.do_restore(live_app, meta)
generic.assert_restore_healthy(live_app, meta)

View File

@ -1,9 +1,10 @@
"""Generic UPGRADE tier (Phase 1d DG2) — recipe-agnostic.
"""Generic UPGRADE tier (Phase 1d DG2 + Phase 1e HC3) — recipe-agnostic, assertion-only.
The orchestrator deployed the PREVIOUS published version once; this tier upgrades it IN PLACE
(abra app upgrade) to the target (VERSION env, else newest published) on the same live deployment,
then asserts it reconverges and still serves. Data-continuity is a recipe overlay (test_upgrade.py),
not the generic — the generic verifies the upgrade mechanism + still-serving."""
The orchestrator deployed the base version once and performed the upgrade ONCE in place (Phase 1e
HC1: to the PR-head code under test via `abra app deploy --chaos`), recording the pre-upgrade
identity in the run-scoped op state. This tier ASSERTS the upgrade reconverged, still serves, and
actually MOVED the deployment (version/image/chaos label) — it does NOT perform the op. Runs by
default ALONGSIDE any recipe overlay (additive); skipped only via an explicit opt-out."""
import os
import sys
@ -13,5 +14,4 @@ from harness import generic # noqa: E402
def test_upgrade_reconverges(live_app, meta):
target = os.environ.get("VERSION") or None
generic.do_upgrade(live_app, target, meta)
generic.assert_upgraded(live_app, meta)

27
tests/cryptpad/ops.py Normal file
View File

@ -0,0 +1,27 @@
"""cryptpad — pre-op seed hooks (Phase 1e HC3). The orchestrator runs these BEFORE the op; the
matching test_<op>.py asserts post-op (assertion-only). cryptpad data isn't HTTP-served (encrypted
datastore), so the marker in the persistent cryptpad_data volume is read back via exec_in_app."""
import os
import sys
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner"))
from harness import lifecycle # noqa: E402
MARKER = "/cryptpad/data/ci-marker.txt"
def _write(domain, val):
lifecycle.exec_in_app(domain, ["sh", "-c", f"echo {val} > {MARKER}"])
def pre_upgrade(domain, meta):
_write(domain, "upgrade-survives")
def pre_backup(domain, meta):
_write(domain, "original")
def pre_restore(domain, meta):
_write(domain, "mutated") # diverge so a successful restore is observable

View File

@ -1,30 +1,19 @@
"""cryptpad — BACKUP overlay (Phase 1d, DG4): seed a known state into the backed-up cryptpad_data
volume, back it up (assert a snapshot artifact), then mutate so the RESTORE overlay (test_restore.py)
can prove the backed-up state returns. Runs on the shared deployment; the mutated marker persists for
the restore tier.
"""cryptpad — BACKUP overlay (Phase 1e HC3): assertion-only + additive.
The cryptpad `app` service is labelled `backupbot.backup=true`, so its volumes (incl. cryptpad_data)
are backed up. Marker is checked via `exec_in_app` (data isn't HTTP-served)."""
ops.pre_backup seeded "original" into cryptpad_data; the orchestrator performed the backup once
(generic tier asserted a snapshot artifact). This overlay ADDS: the seeded state is intact at backup
time. The backup→restore divergence is in ops.pre_restore."""
import os
import sys
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner"))
from harness import generic, lifecycle # noqa: E402
from harness import lifecycle # noqa: E402
MARKER = "/cryptpad/data/ci-marker.txt"
def test_backup_captures_state(live_app, meta):
domain = live_app
# 1) establish original state in the backed-up volume, then back it up (reuse the generic op:
# backup + assert a snapshot artifact was produced)
lifecycle.exec_in_app(domain, ["sh", "-c", f"echo original > {MARKER}"])
assert lifecycle.exec_in_app(domain, ["cat", MARKER]).strip() == "original"
snap = generic.do_backup(domain)
assert snap, "backup produced no snapshot artifact"
# 2) mutate state (diverge from the backup)
lifecycle.exec_in_app(domain, ["sh", "-c", f"echo mutated > {MARKER}"])
assert lifecycle.exec_in_app(domain, ["cat", MARKER]).strip() == "mutated"
def test_backup_captures_state(live_app):
assert (
lifecycle.exec_in_app(live_app, ["cat", MARKER]).strip() == "original"
), "the seeded state was not present at backup time"

View File

@ -1,24 +1,19 @@
"""cryptpad — RESTORE overlay (Phase 1d, DG4): data-integrity, extends the generic restore.
"""cryptpad — RESTORE overlay (Phase 1e HC3): data-integrity, assertion-only + additive.
Runs after the backup overlay (test_backup.py) on the SAME shared deployment, which left the
cryptpad_data marker mutated to "mutated" after backing up "original". This restores the snapshot via
the shared op helper (`generic.do_restore`, which also asserts the app is healthy + serving
afterwards), then asserts the volume data returned to the pre-mutation "original" — the app-specific
data integrity the generic restore cannot check. Reads the marker via `exec_in_app` (data isn't
HTTP-served). Assertion-only (no deploy/teardown)."""
ops.pre_restore mutated the cryptpad_data marker to "mutated"; the orchestrator restored once
(generic tier asserted healthy/serving). This overlay ADDS: the volume data returned to the
pre-mutation (backed-up) "original". Read via exec_in_app."""
import os
import sys
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner"))
from harness import generic, lifecycle # noqa: E402
from harness import lifecycle # noqa: E402
MARKER = "/cryptpad/data/ci-marker.txt"
def test_restore_returns_state(live_app, meta):
domain = live_app
generic.do_restore(domain, meta) # restore + assert healthy/serving
def test_restore_returns_state(live_app):
assert (
lifecycle.exec_in_app(domain, ["cat", MARKER]).strip() == "original"
lifecycle.exec_in_app(live_app, ["cat", MARKER]).strip() == "original"
), "restore did not return the pre-mutation state"

View File

@ -1,31 +1,19 @@
"""cryptpad — UPGRADE overlay (Phase 1d, DG4): data-continuity, extends the generic upgrade.
"""cryptpad — UPGRADE overlay (Phase 1e HC3): data-continuity, assertion-only + additive.
The orchestrator deployed the previous published version ONCE; this overlay writes a marker into the
persistent cryptpad_data volume (cryptpad data isn't HTTP-served as a static file — it's an encrypted
datastore — so the marker is read back via `exec_in_app`, not HTTP), performs the in-place upgrade via
the shared op helper (`generic.do_upgrade`, which also asserts reconverge + serving + that the
deployment moved), then asserts the data SURVIVED. Assertion-only on the shared deployment."""
ops.pre_upgrade seeded a marker into the persistent cryptpad_data volume; the orchestrator performed
the upgrade once (generic tier asserted reconverge/serving/moved). This overlay ADDS: the data
survived the upgrade. Read via exec_in_app (cryptpad data isn't HTTP-served)."""
import os
import sys
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner"))
from harness import generic, lifecycle # noqa: E402
from harness import lifecycle # noqa: E402
MARKER = "/cryptpad/data/ci-marker.txt"
def test_upgrade_preserves_data(live_app, meta):
domain = live_app
# write a data marker into the persistent cryptpad_data volume
lifecycle.exec_in_app(domain, ["sh", "-c", f"echo upgrade-survives > {MARKER}"])
assert lifecycle.exec_in_app(domain, ["cat", MARKER]).strip() == "upgrade-survives"
# in-place upgrade previous -> target (reuses the generic op: upgrade + assert reconverge/serving)
generic.do_upgrade(domain, os.environ.get("VERSION") or None, meta)
# app healthy and the data written before the upgrade is still there
assert lifecycle.http_get(domain, "/") in (200, 301, 302)
def test_upgrade_preserves_data(live_app):
assert (
lifecycle.exec_in_app(domain, ["cat", MARKER]).strip() == "upgrade-survives"
lifecycle.exec_in_app(live_app, ["cat", MARKER]).strip() == "upgrade-survives"
), "data did not survive the upgrade"

32
tests/custom-html/ops.py Normal file
View File

@ -0,0 +1,32 @@
"""custom-html — pre-op seed hooks (Phase 1e HC3). The orchestrator runs `pre_<op>(domain, meta)`
BEFORE it performs the op; the matching test_<op>.py asserts the post-op state (assertion-only).
nginx serves the volume at /usr/share/nginx/html, so the marker file survives an upgrade / a
backup+restore of that volume and is both HTTP-readable and exec-readable."""
import os
import sys
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner"))
from harness import lifecycle # noqa: E402
MARKER_PATH = "/usr/share/nginx/html/ci-marker.txt"
def _write(domain: str, val: str) -> None:
lifecycle.exec_in_app(domain, ["sh", "-c", f"echo {val} > {MARKER_PATH}"])
def pre_upgrade(domain, meta):
# seed a marker before the upgrade so the overlay can prove the data survives it
_write(domain, "upgrade-survives")
def pre_backup(domain, meta):
# establish a known original state before the backup op captures it
_write(domain, "original")
def pre_restore(domain, meta):
# diverge from the backed-up state so a successful restore (back to "original") is observable
_write(domain, "mutated")

View File

@ -1,34 +1,21 @@
"""custom-html — BACKUP overlay (Phase 1d, DG4): seed a known state, back it up (assert artifact),
then mutate so the RESTORE overlay (test_restore.py) can prove the backed-up state returns. Runs on
the shared deployment; the marker it leaves ("mutated") persists for the restore tier.
"""custom-html — BACKUP overlay (Phase 1e HC3): assertion-only + additive.
Reads the marker via `exec_in_app` (the file in the volume), NOT http: backup/restore preserve the
VOLUME, and reading it directly is immune to the serving/container-routing race right after
backup-bot-two cycles the app container (HTTP briefly served empty). Serving is proven separately by
the install/upgrade tiers' assert_serving."""
The orchestrator ran `ops.pre_backup` (seeded "original" into the served volume), then performed the
backup ONCE. The generic backup tier already asserted a snapshot artifact was produced; this overlay
ADDS the recipe-specific check: the seeded "original" state is intact in the volume post-backup
(pre-mutation). The backup→restore divergence happens in `ops.pre_restore`. Reads via exec_in_app
(volume-direct), immune to the post-backup serving race after backup-bot-two cycles the container."""
import os
import sys
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner"))
from harness import generic, lifecycle # noqa: E402
from harness import lifecycle # noqa: E402
MARKER_PATH = "/usr/share/nginx/html/ci-marker.txt"
def _marker(domain: str) -> str:
return lifecycle.exec_in_app(domain, ["cat", MARKER_PATH]).strip()
def test_backup_captures_state(live_app, meta):
domain = live_app
# 1) establish a known original state, then back it up (reuse the generic op: backup + assert a
# snapshot artifact was produced)
lifecycle.exec_in_app(domain, ["sh", "-c", f"echo original > {MARKER_PATH}"])
assert _marker(domain) == "original"
snap = generic.do_backup(domain)
assert snap, "backup produced no snapshot artifact"
# 2) mutate state so a successful restore is observable (diverge from the backup)
lifecycle.exec_in_app(domain, ["sh", "-c", f"echo mutated > {MARKER_PATH}"])
assert _marker(domain) == "mutated"
def test_backup_captures_state(live_app):
assert (
lifecycle.exec_in_app(live_app, ["cat", MARKER_PATH]).strip() == "original"
), "the seeded state was not present at backup time"

View File

@ -1,25 +1,22 @@
"""custom-html — RESTORE overlay (Phase 1d, DG4): data-integrity, extends the generic restore.
"""custom-html — RESTORE overlay (Phase 1e HC3): data-integrity, assertion-only + additive.
Runs after the backup overlay (test_backup.py) on the SAME shared deployment, which left state
mutated to "mutated" after backing up "original". This restores the snapshot via the shared op
helper (`generic.do_restore`, which also asserts the app is healthy + serving afterwards), then
asserts the VOLUME data returned to the pre-mutation "original" — the app-specific data integrity the
generic restore cannot check. Reads the marker via exec_in_app (volume-direct, robust to the
post-restore serving race). Assertion-only (no deploy/teardown)."""
The orchestrator ran `ops.pre_restore` (mutated the marker to "mutated", diverging from the backed-up
"original"), then performed the restore ONCE. The generic restore tier already asserted healthy +
serving; this overlay ADDS the recipe-specific check: the volume data returned to the pre-mutation
(backed-up) "original". Reads via exec_in_app (volume-direct), robust to the post-restore serving
race."""
import os
import sys
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner"))
from harness import generic, lifecycle # noqa: E402
from harness import lifecycle # noqa: E402
MARKER_PATH = "/usr/share/nginx/html/ci-marker.txt"
def test_restore_returns_state(live_app, meta):
domain = live_app
generic.do_restore(domain, meta) # restore + assert healthy/serving
restored = lifecycle.exec_in_app(domain, ["cat", MARKER_PATH]).strip()
def test_restore_returns_state(live_app):
restored = lifecycle.exec_in_app(live_app, ["cat", MARKER_PATH]).strip()
assert (
restored == "original"
), f"restore did not return the pre-mutation (backed-up) state: got {restored!r}"

View File

@ -1,29 +1,21 @@
"""custom-html — UPGRADE overlay (Phase 1d, DG4): data-continuity, extends the generic upgrade.
"""custom-html — UPGRADE overlay (Phase 1e HC3): data-continuity, assertion-only + additive.
The orchestrator deployed the previous published version ONCE; this overlay seeds a marker into the
served volume, performs the in-place upgrade via the shared op helper (`generic.do_upgrade`, which
also asserts reconverge + serving), then asserts the data SURVIVED. Assertion-only on the shared
deployment (no deploy/teardown here)."""
The orchestrator deployed the base version, ran `ops.pre_upgrade` (seeded a marker into the served
volume), then performed the upgrade ONCE. The generic upgrade tier already asserted reconverge +
serving + moved; this overlay runs ALONGSIDE it and ADDS the recipe-specific check: the data written
before the upgrade survived it. No op, no deploy/teardown here."""
import os
import sys
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner"))
from harness import generic, lifecycle # noqa: E402
from harness import lifecycle # noqa: E402
MARKER_PATH = "/usr/share/nginx/html/ci-marker.txt"
def test_upgrade_preserves_data(live_app, meta):
domain = live_app
# write a data marker into the served volume (nginx serves /usr/share/nginx/html)
lifecycle.exec_in_app(domain, ["sh", "-c", f"echo upgrade-survives > {MARKER_PATH}"])
assert lifecycle.http_fetch(domain, "/ci-marker.txt")[1].strip() == "upgrade-survives"
# in-place upgrade previous -> target (reuses the generic op: upgrade + assert reconverge/serving)
generic.do_upgrade(domain, os.environ.get("VERSION") or None, meta)
# the data written before the upgrade is still there
def test_upgrade_preserves_data(live_app):
# the marker seeded by ops.pre_upgrade (before the harness upgraded) is still served
assert (
lifecycle.http_fetch(domain, "/ci-marker.txt")[1].strip() == "upgrade-survives"
lifecycle.http_fetch(live_app, "/ci-marker.txt")[1].strip() == "upgrade-survives"
), "data did not survive the upgrade"

33
tests/keycloak/ops.py Normal file
View File

@ -0,0 +1,33 @@
"""keycloak — pre-op seed hooks (Phase 1e HC3). The orchestrator runs these BEFORE the op; the
matching test_<op>.py asserts post-op (assertion-only). The data marker is a realm in mariadb,
written via the keycloak admin API (kc_admin)."""
import os
import sys
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner"))
import kc_admin # noqa: E402
from harness import generic # noqa: E402
def _token(domain):
return kc_admin.admin_token(domain, kc_admin.admin_password(domain))
def pre_upgrade(domain, meta):
# create the marker realm (DB data) before the upgrade so the overlay can prove it survives
assert kc_admin.create_marker_realm(domain, _token(domain)) in (201, 409)
def pre_backup(domain, meta):
# establish the marker realm before the backup op captures mariadb
assert kc_admin.create_marker_realm(domain, _token(domain)) in (201, 409)
def pre_restore(domain, meta):
# backup-bot-two cycles the keycloak container during backup → wait for serving, re-auth, then
# delete the realm (diverge from the backup) so a successful restore is observable
generic.assert_serving(domain, meta)
tok = _token(domain)
assert kc_admin.delete_marker_realm(domain, tok) in (204, 200)
assert not kc_admin.marker_realm_exists(domain, tok), "delete did not take"

View File

@ -1,7 +1,9 @@
"""keycloak — BACKUP overlay (Phase 1d, DG4): seed a known state (the marker realm in mariadb),
back it up (assert a snapshot artifact), then mutate (delete the realm) so the RESTORE overlay
(test_restore.py) can prove the backed-up state returns. Runs on the shared deployment; the mutated
state persists for the restore tier."""
"""keycloak — BACKUP overlay (Phase 1e HC3): assertion-only + additive.
ops.pre_backup created the marker realm before the backup op captured mariadb; the orchestrator
performed the backup once (generic tier asserted a snapshot artifact). This overlay ADDS: the marker
realm is present at backup time. backup-bot-two cycles the container during backup, so wait for
serving + re-auth first. The backup→restore divergence (deleting the realm) is in ops.pre_restore."""
import os
import sys
@ -11,22 +13,7 @@ import kc_admin # noqa: E402
from harness import generic # noqa: E402
def test_backup_captures_state(live_app, meta):
domain = live_app
pw = kc_admin.admin_password(domain)
tok = kc_admin.admin_token(domain, pw)
# 1) create the marker realm, then back up (reuse the generic op: backup + assert a snapshot)
assert kc_admin.create_marker_realm(domain, tok) in (201, 409)
assert kc_admin.marker_realm_exists(domain, tok)
snap = generic.do_backup(domain)
assert snap, "backup produced no snapshot artifact"
# backup-bot-two cycles the keycloak container during backup, so the admin API is briefly 502.
# Wait for it to be serving again, then re-auth, before mutating via the HTTP admin API.
generic.assert_serving(domain, meta)
tok = kc_admin.admin_token(domain, pw)
# 2) mutate: delete the realm (diverge from the backup)
assert kc_admin.delete_marker_realm(domain, tok) in (204, 200)
assert not kc_admin.marker_realm_exists(domain, tok), "delete did not take"
def test_backup_captures_realm(live_app, meta):
generic.assert_serving(live_app, meta) # container cycled during backup; wait for it to be back
tok = kc_admin.admin_token(live_app, kc_admin.admin_password(live_app))
assert kc_admin.marker_realm_exists(live_app, tok), "marker realm not present at backup time"

View File

@ -1,22 +1,16 @@
"""keycloak — RESTORE overlay (Phase 1d, DG4): data-integrity, extends the generic restore.
"""keycloak — RESTORE overlay (Phase 1e HC3): data-integrity, assertion-only + additive.
Runs after the backup overlay (test_backup.py) on the SAME shared deployment, which left the marker
realm deleted after backing it up. This restores the snapshot via the shared op helper
(`generic.do_restore`, which also asserts the app is healthy + serving afterwards), then asserts the
marker realm returned (mariadb restored to the backed-up state) — the app-specific data integrity
the generic restore cannot check. Assertion-only (no deploy/teardown)."""
ops.pre_restore deleted the marker realm (diverge from the backup); the orchestrator restored once
(generic tier asserted healthy/serving). This overlay ADDS: the marker realm returned (mariadb
restored to the backed-up state). Re-auths post-restore."""
import os
import sys
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner"))
import kc_admin # noqa: E402
from harness import generic # noqa: E402
def test_restore_returns_state(live_app, meta):
domain = live_app
generic.do_restore(domain, meta) # restore + assert healthy/serving
pw = kc_admin.admin_password(domain)
tok = kc_admin.admin_token(domain, pw)
assert kc_admin.marker_realm_exists(domain, tok), "restore did not bring back the realm"
def test_restore_returns_realm(live_app):
tok = kc_admin.admin_token(live_app, kc_admin.admin_password(live_app))
assert kc_admin.marker_realm_exists(live_app, tok), "restore did not bring back the realm"

View File

@ -1,28 +1,16 @@
"""keycloak — UPGRADE overlay (Phase 1d, DG4): data-continuity, extends the generic upgrade.
"""keycloak — UPGRADE overlay (Phase 1e HC3): data-continuity, assertion-only + additive.
The orchestrator deployed the previous published version ONCE; this overlay creates a marker realm
(DB data in mariadb) on the live app, performs the in-place upgrade via the shared op helper
(`generic.do_upgrade`, which also asserts reconverge + serving + that the deployment moved), then
asserts the realm SURVIVED (mariadb data preserved). Assertion-only on the shared deployment."""
ops.pre_upgrade created a marker realm (mariadb) before the upgrade; the orchestrator performed the
upgrade once (generic tier asserted reconverge/serving/moved). This overlay ADDS: the realm survived
(mariadb data preserved). Re-auths post-upgrade."""
import os
import sys
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner"))
import kc_admin # noqa: E402
from harness import generic # noqa: E402
def test_upgrade_preserves_realm(live_app, meta):
domain = live_app
pw = kc_admin.admin_password(domain)
tok = kc_admin.admin_token(domain, pw)
assert kc_admin.create_marker_realm(domain, tok) in (201, 409)
assert kc_admin.marker_realm_exists(domain, tok), "marker realm not created"
# in-place upgrade previous -> target (reuses the generic op: upgrade + assert reconverge/serving)
generic.do_upgrade(domain, os.environ.get("VERSION") or None, meta)
# re-auth (token from the old instance is fine, but get a fresh one post-upgrade) and verify
tok2 = kc_admin.admin_token(domain, pw)
assert kc_admin.marker_realm_exists(domain, tok2), "realm did not survive the upgrade"
def test_upgrade_preserves_realm(live_app):
tok = kc_admin.admin_token(live_app, kc_admin.admin_password(live_app))
assert kc_admin.marker_realm_exists(live_app, tok), "realm did not survive the upgrade"

41
tests/lasuite-docs/ops.py Normal file
View File

@ -0,0 +1,41 @@
"""lasuite-docs — pre-op seed hooks (Phase 1e HC3). The orchestrator runs these BEFORE the op; the
matching test_<op>.py asserts post-op (assertion-only). The marker is a dedicated `ci_marker` row in
postgres (the app's Django migrations don't touch it), written via psql in the `db` service. The
backup path exercises the recipe's pg_backup.sh DB-dump hook (postgres + minio are backupbot-labelled)."""
import os
import sys
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner"))
from harness import lifecycle # noqa: E402
def _psql(domain, sql):
cmd = f'PGPASSWORD=$(cat /run/secrets/postgres_p) psql -U docs -d docs -tAc "{sql}"'
return lifecycle.exec_in_app(domain, ["sh", "-c", cmd], service="db").strip()
def _seed(domain, value):
_psql(
domain,
"CREATE TABLE IF NOT EXISTS ci_marker(v text); DELETE FROM ci_marker; "
f"INSERT INTO ci_marker VALUES('{value}');",
)
assert _psql(domain, "SELECT v FROM ci_marker;") == value
def pre_upgrade(domain, meta):
_seed(domain, "upgrade-survives")
def pre_backup(domain, meta):
_seed(domain, "original")
def pre_restore(domain, meta):
# drop the marker table (diverge from the backup) so a successful restore is observable
_psql(domain, "DROP TABLE ci_marker;")
assert _psql(domain, "SELECT to_regclass('public.ci_marker');") in (
"",
"NULL",
), "drop did not take"

View File

@ -1,16 +1,15 @@
"""lasuite-docs — BACKUP overlay (Phase 1d, DG4): seed a postgres marker, back it up (pg_backup.sh
pre-hook dumps the DB; assert a snapshot artifact), then mutate (drop it) so the RESTORE overlay
(test_restore.py) can prove the backed-up state returns. Runs on the shared deployment; the mutated
state persists for the restore tier.
"""lasuite-docs — BACKUP overlay (Phase 1e HC3): assertion-only + additive.
Exercises the recipe's real DB-dump backup hook (postgres + minio are both backupbot-labelled); the
postgres marker is the meaningful Docs-metadata data path."""
ops.pre_backup wrote "original" into postgres before the backup op (pg_backup.sh dumps the DB); the
orchestrator performed the backup once (generic tier asserted a snapshot artifact). This overlay
ADDS: the seeded row is intact at backup time. The backup→restore divergence (dropping the table) is
in ops.pre_restore."""
import os
import sys
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner"))
from harness import generic, lifecycle # noqa: E402
from harness import lifecycle # noqa: E402
def _psql(domain, sql):
@ -18,23 +17,7 @@ def _psql(domain, sql):
return lifecycle.exec_in_app(domain, ["sh", "-c", cmd], service="db").strip()
def test_backup_captures_state(live_app, meta):
domain = live_app
# 1) establish original state in postgres, then back up (reuse the generic op: backup +
# assert a snapshot artifact; pg_backup.sh dumps the DB)
_psql(
domain,
"CREATE TABLE IF NOT EXISTS ci_marker(v text); DELETE FROM ci_marker; "
"INSERT INTO ci_marker VALUES('original');",
)
assert _psql(domain, "SELECT v FROM ci_marker;") == "original"
snap = generic.do_backup(domain)
assert snap, "backup produced no snapshot artifact"
# 2) mutate: drop the marker table (diverge from the backup)
_psql(domain, "DROP TABLE ci_marker;")
assert _psql(domain, "SELECT to_regclass('public.ci_marker');") in (
"",
"NULL",
), "drop did not take"
def test_backup_captures_state(live_app):
assert (
_psql(live_app, "SELECT v FROM ci_marker;") == "original"
), "the seeded postgres state was not present at backup time"

View File

@ -1,17 +1,14 @@
"""lasuite-docs — RESTORE overlay (Phase 1d, DG4): data-integrity, extends the generic restore.
"""lasuite-docs — RESTORE overlay (Phase 1e HC3): data-integrity, assertion-only + additive.
Runs after the backup overlay (test_backup.py) on the SAME shared deployment, which left the postgres
marker table dropped after dumping it. This restores the snapshot via the shared op helper
(`generic.do_restore`, which also asserts the app is healthy + serving afterwards; the recipe's
restore.post-hook reloads the dump), then asserts the restored DB matches the pre-mutation "original"
— the app-specific data integrity the generic restore cannot check. Reads via `psql` in the `db`
service. Assertion-only (no deploy/teardown)."""
ops.pre_restore dropped the marker table (diverge); the orchestrator restored once (generic tier
asserted healthy/serving; the recipe's restore.post-hook reloads the dump). This overlay ADDS: the
restored DB matches the pre-mutation "original". Read via psql in the `db` service."""
import os
import sys
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner"))
from harness import generic, lifecycle # noqa: E402
from harness import lifecycle # noqa: E402
def _psql(domain, sql):
@ -19,9 +16,7 @@ def _psql(domain, sql):
return lifecycle.exec_in_app(domain, ["sh", "-c", cmd], service="db").strip()
def test_restore_returns_state(live_app, meta):
domain = live_app
generic.do_restore(domain, meta) # restore + assert healthy/serving
def test_restore_returns_state(live_app):
assert (
_psql(domain, "SELECT v FROM ci_marker;") == "original"
_psql(live_app, "SELECT v FROM ci_marker;") == "original"
), "restore did not return the pre-mutation postgres state"

View File

@ -1,16 +1,14 @@
"""lasuite-docs — UPGRADE overlay (Phase 1d, DG4): data-continuity, extends the generic upgrade.
"""lasuite-docs — UPGRADE overlay (Phase 1e HC3): data-continuity, assertion-only + additive.
The orchestrator deployed the previous published version ONCE; this overlay writes a marker row into
postgres (a dedicated `ci_marker` table the app's own Django migrations don't touch, read back via
`psql` in the `db` service), performs the in-place upgrade via the shared op helper
(`generic.do_upgrade`, which also asserts reconverge + serving + that the deployment moved), then
asserts the postgres data SURVIVED. Assertion-only on the shared deployment."""
ops.pre_upgrade wrote a postgres marker row before the upgrade; the orchestrator performed the
upgrade once (generic tier asserted reconverge/serving/moved). This overlay ADDS: the postgres data
survived. Read via psql in the `db` service."""
import os
import sys
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner"))
from harness import generic, lifecycle # noqa: E402
from harness import lifecycle # noqa: E402
def _psql(domain, sql):
@ -18,19 +16,7 @@ def _psql(domain, sql):
return lifecycle.exec_in_app(domain, ["sh", "-c", cmd], service="db").strip()
def test_upgrade_preserves_data(live_app, meta):
domain = live_app
_psql(
domain,
"CREATE TABLE IF NOT EXISTS ci_marker(v text); DELETE FROM ci_marker; "
"INSERT INTO ci_marker VALUES('upgrade-survives');",
)
assert _psql(domain, "SELECT v FROM ci_marker;") == "upgrade-survives"
# in-place upgrade previous -> target (reuses the generic op: upgrade + assert reconverge/serving)
generic.do_upgrade(domain, os.environ.get("VERSION") or None, meta)
assert lifecycle.http_get(domain, "/") in (200, 301, 302)
def test_upgrade_preserves_data(live_app):
assert (
_psql(domain, "SELECT v FROM ci_marker;") == "upgrade-survives"
_psql(live_app, "SELECT v FROM ci_marker;") == "upgrade-survives"
), "postgres data did not survive the upgrade"

View File

@ -0,0 +1,41 @@
"""matrix-synapse — pre-op seed hooks (Phase 1e HC3). The orchestrator runs these BEFORE the op; the
matching test_<op>.py asserts post-op (assertion-only). The marker is a dedicated `ci_marker` row in
postgres (synapse's own schema migrations don't touch it), written via psql in the `db` service. The
backup path exercises the recipe's pg_backup.sh DB-dump hook, not a plain volume copy."""
import os
import sys
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner"))
from harness import lifecycle # noqa: E402
def _psql(domain, sql):
cmd = f'PGPASSWORD=$(cat /run/secrets/db_password) psql -U synapse -d synapse -tAc "{sql}"'
return lifecycle.exec_in_app(domain, ["sh", "-c", cmd], service="db").strip()
def _seed(domain, value):
_psql(
domain,
"CREATE TABLE IF NOT EXISTS ci_marker(v text); DELETE FROM ci_marker; "
f"INSERT INTO ci_marker VALUES('{value}');",
)
assert _psql(domain, "SELECT v FROM ci_marker;") == value
def pre_upgrade(domain, meta):
_seed(domain, "upgrade-survives")
def pre_backup(domain, meta):
_seed(domain, "original")
def pre_restore(domain, meta):
# drop the marker table (diverge from the backup) so a successful restore is observable
_psql(domain, "DROP TABLE ci_marker;")
assert _psql(domain, "SELECT to_regclass('public.ci_marker');") in (
"",
"NULL",
), "drop did not take"

View File

@ -1,16 +1,15 @@
"""matrix-synapse — BACKUP overlay (Phase 1d, DG4): seed a postgres marker, back it up (the recipe's
pg_backup.sh pre-hook dumps the DB to backup.sql; assert a snapshot artifact), then mutate (drop the
marker) so the RESTORE overlay (test_restore.py) can prove the backed-up state returns. Runs on the
shared deployment; the mutated state persists for the restore tier.
"""matrix-synapse — BACKUP overlay (Phase 1e HC3): assertion-only + additive.
This exercises the real DB-dump backup hook (backupbot.backup.pre-hook / restore.post-hook), not a
plain volume copy — the meaningful data path for a postgres-backed app."""
ops.pre_backup wrote "original" into postgres before the backup op (pg_backup.sh dumps the DB); the
orchestrator performed the backup once (generic tier asserted a snapshot artifact). This overlay
ADDS: the seeded row is intact at backup time. The backup→restore divergence (dropping the table) is
in ops.pre_restore."""
import os
import sys
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner"))
from harness import generic, lifecycle # noqa: E402
from harness import lifecycle # noqa: E402
def _psql(domain, sql):
@ -18,23 +17,7 @@ def _psql(domain, sql):
return lifecycle.exec_in_app(domain, ["sh", "-c", cmd], service="db").strip()
def test_backup_captures_state(live_app, meta):
domain = live_app
# 1) establish original state in postgres, then back up (reuse the generic op: backup +
# assert a snapshot artifact; pg_backup.sh dumps the DB)
_psql(
domain,
"CREATE TABLE IF NOT EXISTS ci_marker(v text); DELETE FROM ci_marker; "
"INSERT INTO ci_marker VALUES('original');",
)
assert _psql(domain, "SELECT v FROM ci_marker;") == "original"
snap = generic.do_backup(domain)
assert snap, "backup produced no snapshot artifact"
# 2) mutate: drop the marker table (diverge from the backup)
_psql(domain, "DROP TABLE ci_marker;")
assert _psql(domain, "SELECT to_regclass('public.ci_marker');") in (
"",
"NULL",
), "drop did not take"
def test_backup_captures_state(live_app):
assert (
_psql(live_app, "SELECT v FROM ci_marker;") == "original"
), "the seeded postgres state was not present at backup time"

View File

@ -1,17 +1,14 @@
"""matrix-synapse — RESTORE overlay (Phase 1d, DG4): data-integrity, extends the generic restore.
"""matrix-synapse — RESTORE overlay (Phase 1e HC3): data-integrity, assertion-only + additive.
Runs after the backup overlay (test_backup.py) on the SAME shared deployment, which left the postgres
marker table dropped after dumping it. This restores the snapshot via the shared op helper
(`generic.do_restore`, which also asserts the app is healthy + serving afterwards; the recipe's
restore.post-hook reloads the dump), then asserts the restored DB matches the pre-mutation "original"
— the app-specific data integrity the generic restore cannot check. Reads via `psql` in the `db`
service. Assertion-only (no deploy/teardown)."""
ops.pre_restore dropped the marker table (diverge); the orchestrator restored once (generic tier
asserted healthy/serving; the recipe's restore.post-hook reloads the dump). This overlay ADDS: the
restored DB matches the pre-mutation "original". Read via psql in the `db` service."""
import os
import sys
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner"))
from harness import generic, lifecycle # noqa: E402
from harness import lifecycle # noqa: E402
def _psql(domain, sql):
@ -19,9 +16,7 @@ def _psql(domain, sql):
return lifecycle.exec_in_app(domain, ["sh", "-c", cmd], service="db").strip()
def test_restore_returns_state(live_app, meta):
domain = live_app
generic.do_restore(domain, meta) # restore + assert healthy/serving
def test_restore_returns_state(live_app):
assert (
_psql(domain, "SELECT v FROM ci_marker;") == "original"
_psql(live_app, "SELECT v FROM ci_marker;") == "original"
), "restore did not return the pre-mutation postgres state"

View File

@ -1,16 +1,14 @@
"""matrix-synapse — UPGRADE overlay (Phase 1d, DG4): data-continuity, extends the generic upgrade.
"""matrix-synapse — UPGRADE overlay (Phase 1e HC3): data-continuity, assertion-only + additive.
The orchestrator deployed the previous published version ONCE; this overlay writes a marker row into
postgres (a dedicated `ci_marker` table synapse's own schema migrations don't touch, read back via
`psql` in the `db` service), performs the in-place upgrade via the shared op helper
(`generic.do_upgrade`, which also asserts reconverge + serving + that the deployment moved), then
asserts the postgres data SURVIVED. Assertion-only on the shared deployment."""
ops.pre_upgrade wrote a postgres marker row before the upgrade; the orchestrator performed the
upgrade once (generic tier asserted reconverge/serving/moved). This overlay ADDS: the postgres data
survived. Read via psql in the `db` service."""
import os
import sys
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner"))
from harness import generic, lifecycle # noqa: E402
from harness import lifecycle # noqa: E402
def _psql(domain, sql):
@ -18,21 +16,7 @@ def _psql(domain, sql):
return lifecycle.exec_in_app(domain, ["sh", "-c", cmd], service="db").strip()
def test_upgrade_preserves_data(live_app, meta):
domain = live_app
# write a marker row into postgres (independent of synapse's own tables)
_psql(
domain,
"CREATE TABLE IF NOT EXISTS ci_marker(v text); DELETE FROM ci_marker; "
"INSERT INTO ci_marker VALUES('upgrade-survives');",
)
assert _psql(domain, "SELECT v FROM ci_marker;") == "upgrade-survives"
# in-place upgrade previous -> target (reuses the generic op: upgrade + assert reconverge/serving)
generic.do_upgrade(domain, os.environ.get("VERSION") or None, meta)
# app healthy and the data written before the upgrade is still there
assert lifecycle.http_get(domain, meta["HEALTH_PATH"]) == 200
def test_upgrade_preserves_data(live_app):
assert (
_psql(domain, "SELECT v FROM ci_marker;") == "upgrade-survives"
_psql(live_app, "SELECT v FROM ci_marker;") == "upgrade-survives"
), "postgres data did not survive the upgrade"

27
tests/n8n/ops.py Normal file
View File

@ -0,0 +1,27 @@
"""n8n — pre-op seed hooks (Phase 1e HC3). The orchestrator runs these BEFORE the op; the matching
test_<op>.py asserts post-op (assertion-only). n8n state lives in the persistent /home/node/.n8n
volume (sqlite + config); the marker there is read back via exec_in_app (not HTTP-served)."""
import os
import sys
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner"))
from harness import lifecycle # noqa: E402
MARKER = "/home/node/.n8n/ci-marker.txt"
def _write(domain, val):
lifecycle.exec_in_app(domain, ["sh", "-c", f"echo {val} > {MARKER}"])
def pre_upgrade(domain, meta):
_write(domain, "upgrade-survives")
def pre_backup(domain, meta):
_write(domain, "original")
def pre_restore(domain, meta):
_write(domain, "mutated") # diverge so a successful restore is observable

View File

@ -1,30 +1,19 @@
"""n8n — BACKUP overlay (Phase 1d, DG4): seed a known state into the backed-up /home/node/.n8n path,
back it up (assert a snapshot artifact), then mutate so the RESTORE overlay (test_restore.py) can
prove the backed-up state returns. Runs on the shared deployment; the mutated marker persists for the
restore tier.
"""n8n — BACKUP overlay (Phase 1e HC3): assertion-only + additive.
The n8n `app` service is labelled `backupbot.backup=true` with `backupbot.backup.path=/home/node/.n8n`,
so a marker file there is backed up; checked via `exec_in_app`."""
ops.pre_backup seeded "original" into the backed-up /home/node/.n8n path; the orchestrator performed
the backup once (generic tier asserted a snapshot artifact). This overlay ADDS: the seeded state is
intact at backup time. The backup→restore divergence is in ops.pre_restore."""
import os
import sys
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner"))
from harness import generic, lifecycle # noqa: E402
from harness import lifecycle # noqa: E402
MARKER = "/home/node/.n8n/ci-marker.txt"
def test_backup_captures_state(live_app, meta):
domain = live_app
# 1) establish original state in the backed-up path, then back it up (reuse the generic op:
# backup + assert a snapshot artifact was produced)
lifecycle.exec_in_app(domain, ["sh", "-c", f"echo original > {MARKER}"])
assert lifecycle.exec_in_app(domain, ["cat", MARKER]).strip() == "original"
snap = generic.do_backup(domain)
assert snap, "backup produced no snapshot artifact"
# 2) mutate state (diverge from the backup)
lifecycle.exec_in_app(domain, ["sh", "-c", f"echo mutated > {MARKER}"])
assert lifecycle.exec_in_app(domain, ["cat", MARKER]).strip() == "mutated"
def test_backup_captures_state(live_app):
assert (
lifecycle.exec_in_app(live_app, ["cat", MARKER]).strip() == "original"
), "the seeded state was not present at backup time"

View File

@ -1,24 +1,19 @@
"""n8n — RESTORE overlay (Phase 1d, DG4): data-integrity, extends the generic restore.
"""n8n — RESTORE overlay (Phase 1e HC3): data-integrity, assertion-only + additive.
Runs after the backup overlay (test_backup.py) on the SAME shared deployment, which left the
/home/node/.n8n marker mutated to "mutated" after backing up "original". This restores the snapshot
via the shared op helper (`generic.do_restore`, which also asserts the app is healthy + serving
afterwards), then asserts the data returned to the pre-mutation "original" — the app-specific data
integrity the generic restore cannot check. Reads via `exec_in_app`. Assertion-only (no
deploy/teardown)."""
ops.pre_restore mutated the /home/node/.n8n marker to "mutated"; the orchestrator restored once
(generic tier asserted healthy/serving). This overlay ADDS: the data returned to the pre-mutation
(backed-up) "original". Read via exec_in_app."""
import os
import sys
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner"))
from harness import generic, lifecycle # noqa: E402
from harness import lifecycle # noqa: E402
MARKER = "/home/node/.n8n/ci-marker.txt"
def test_restore_returns_state(live_app, meta):
domain = live_app
generic.do_restore(domain, meta) # restore + assert healthy/serving
def test_restore_returns_state(live_app):
assert (
lifecycle.exec_in_app(domain, ["cat", MARKER]).strip() == "original"
lifecycle.exec_in_app(live_app, ["cat", MARKER]).strip() == "original"
), "restore did not return the pre-mutation state"

View File

@ -1,29 +1,19 @@
"""n8n — UPGRADE overlay (Phase 1d, DG4): data-continuity, extends the generic upgrade.
"""n8n — UPGRADE overlay (Phase 1e HC3): data-continuity, assertion-only + additive.
The orchestrator deployed the previous published version ONCE; this overlay writes a marker file into
the persistent /home/node/.n8n volume (n8n state = sqlite + config; the marker is read back via
`exec_in_app`, not HTTP-served), performs the in-place upgrade via the shared op helper
(`generic.do_upgrade`, which also asserts reconverge + serving + that the deployment moved), then
asserts the data SURVIVED. Assertion-only on the shared deployment."""
ops.pre_upgrade seeded a marker into /home/node/.n8n; the orchestrator performed the upgrade once
(generic tier asserted reconverge/serving/moved). This overlay ADDS: the data survived. Read via
exec_in_app (n8n state isn't HTTP-served)."""
import os
import sys
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner"))
from harness import generic, lifecycle # noqa: E402
from harness import lifecycle # noqa: E402
MARKER = "/home/node/.n8n/ci-marker.txt"
def test_upgrade_preserves_data(live_app, meta):
domain = live_app
lifecycle.exec_in_app(domain, ["sh", "-c", f"echo upgrade-survives > {MARKER}"])
assert lifecycle.exec_in_app(domain, ["cat", MARKER]).strip() == "upgrade-survives"
# in-place upgrade previous -> target (reuses the generic op: upgrade + assert reconverge/serving)
generic.do_upgrade(domain, os.environ.get("VERSION") or None, meta)
assert lifecycle.http_get(domain, meta["HEALTH_PATH"]) == 200
def test_upgrade_preserves_data(live_app):
assert (
lifecycle.exec_in_app(domain, ["cat", MARKER]).strip() == "upgrade-survives"
lifecycle.exec_in_app(live_app, ["cat", MARKER]).strip() == "upgrade-survives"
), "data did not survive the upgrade"

View File

@ -26,6 +26,7 @@ def teardown_function():
# ---- HC3: generic is the floor; overlay resolution is separate + additive --------------------
def test_no_overlay_means_generic_floor():
# hedgedoc ships no tests/hedgedoc/ overlay and no repo-local -> no overlay; generic floor exists
assert discovery.resolve_overlay_op("hedgedoc", "install", None) is None
@ -40,11 +41,15 @@ def test_cc_ci_overlay_found_for_each_op():
# custom-html ships cc-ci overlays for all four ops -> resolve_overlay_op returns the cc-ci file
for op in discovery.LIFECYCLE_OPS:
res = discovery.resolve_overlay_op("custom-html", op, None)
assert res == ("cc-ci", os.path.join(discovery.cc_ci_dir("custom-html"), f"test_{op}.py")), op
assert res == (
"cc-ci",
os.path.join(discovery.cc_ci_dir("custom-html"), f"test_{op}.py"),
), op
# ---- HC2: repo-local approval gate (default-deny) --------------------------------------------
def test_repo_local_ignored_when_not_approved(tmp_path):
# default-deny: a repo-local overlay is NOT consulted for an unapproved recipe -> cc-ci wins
_approve(tmp_path) # empty allowlist
@ -97,18 +102,20 @@ def test_install_steps_repo_local_gated(tmp_path):
def test_pre_op_hook_repo_local_gated(tmp_path):
# hedgedoc has no cc-ci ops.py, so this isolates the repo-local gate (custom-html now ships a
# real cc-ci tests/custom-html/ops.py, which would mask the gate).
rl = tmp_path / "repo"
rl.mkdir()
(rl / "ops.py").write_text("def pre_upgrade(domain, meta):\n pass\n")
_approve(tmp_path) # not approved -> repo-local ops.py ignored
assert discovery.pre_op_hook("custom-html", "upgrade", str(rl)) is None
_approve(tmp_path) # not approved -> repo-local ops.py ignored (no cc-ci ops.py either)
assert discovery.pre_op_hook("hedgedoc", "upgrade", str(rl)) is None
_approve(tmp_path, "custom-html") # approved -> repo-local pre-op hook honored
hook = discovery.pre_op_hook("custom-html", "upgrade", str(rl))
_approve(tmp_path, "hedgedoc") # approved -> repo-local pre-op hook honored
hook = discovery.pre_op_hook("hedgedoc", "upgrade", str(rl))
assert hook == ("repo-local", str(rl / "ops.py"))
# an ops.py that does NOT define pre_<op> is not a hook for that op
assert discovery.pre_op_hook("custom-html", "backup", str(rl)) is None
assert discovery.pre_op_hook("hedgedoc", "backup", str(rl)) is None
def test_default_allowlist_is_empty():