feat(1e): HC3 additive generic + op/assertion split (orchestrator owns the op)

- orchestrator: per mutating tier, run optional pre-op seed hook (ops.py pre_<op>) → perform the op
  ONCE (harness-owned) → run generic assertion (unless opted out) AND overlay assertion, both against
  the shared post-op deployment. Op results passed op→assertion via run-scoped CCCI_OP_STATE_FILE.
- opt-out: CCCI_SKIP_GENERIC / CCCI_SKIP_GENERIC_<OP> / recipe_meta.SKIP_GENERIC (declarative).
- generic.py: split do_* into op primitives (perform_upgrade/backup/restore) + assertions
  (assert_upgraded/backup_artifact/restore_healthy) reading op_state(); deployed_identity now returns
  {version,image,chaos} (chaos label ready for HC1).
- generic test_<op>.py + all 6 recipe overlays migrated to assertion-only; pre-op seeding moved to
  per-recipe ops.py (pre_upgrade/pre_backup/pre_restore). install overlays unchanged (no op).
- deploy-count stays 1 (op primitives never call deploy_app). lint PASS; 8 unit tests PASS on cc-ci.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-28 03:12:04 +01:00
parent 6a59343996
commit b7e6cbd7be
31 changed files with 623 additions and 412 deletions

View File

@ -1,13 +1,18 @@
#!/usr/bin/env python3
"""Top-level CI orchestrator (plan §4.3 + Phase 1d), invoked by the Drone pipeline (or by hand).
"""Top-level CI orchestrator (plan §4.3 + Phase 1d/1e), invoked by the Drone pipeline (or by hand).
Phase 1d model: deploy the app ONCE, then run lifecycle TIERS against that single shared deployment
(install asserts; upgrade does `abra app upgrade` in place; backup/restore mutate in place; custom
asserts), then ONE teardown in `finally`. Each tier's assertions come from exactly one file — a
recipe overlay if present, else the generic default — discovered by `harness.discovery`
(precedence repo-local > cc-ci > generic). The generic is the default for every op, so ANY recipe is
testable with zero config (DG1DG4). The lifecycle OPS live in the shared harness (harness.generic),
not per-recipe (DG7 DRY).
Model: deploy the app ONCE, then run lifecycle TIERS against that single shared deployment, then ONE
teardown in `finally`. Per Phase 1e the orchestrator OWNS each mutating op (HC3): for a tier it runs
the optional pre-op seed hook (recipe ops.py `pre_<op>`), performs the op exactly ONCE
(upgrade/backup/restore — install has none), then runs BOTH the generic assertion file (the floor,
unless explicitly opted out) AND the recipe overlay assertion file (if any) against the shared
post-op state — generic and overlay are ADDITIVE, not override (HC3). Op results an assertion needs
(pre-upgrade identity, snapshot_id) pass op→assertion via a run-scoped JSON state file
($CCCI_OP_STATE_FILE). The upgrade op deploys the PR-HEAD code under test via `abra app deploy
--chaos` (HC1). Repo-local (PR-authored) overlays/hooks run only for allowlist-approved recipes (HC2,
gated in harness.discovery). The generic is the default for every op, so ANY recipe is testable with
zero config (DG1DG4). The lifecycle OPS live in the shared harness (harness.generic), not per-recipe
(DG7 DRY).
Run parameters from env (set by the comment-bridge via Drone build params):
RECIPE recipe name (e.g. custom-html) [required]
@ -23,7 +28,10 @@ invoke as: cc-ci-run runner/run_recipe_ci.py
from __future__ import annotations
import contextlib
import glob
import importlib.util
import json
import os
import shutil
import subprocess
@ -37,6 +45,10 @@ from harness import discovery, generic, lifecycle, naming # noqa: E402
ALL_STAGES = ("install", "upgrade", "backup", "restore", "custom")
def _truthy(v: str | None) -> bool:
return str(v or "").strip().lower() in ("1", "true", "yes", "on")
def _redact_values() -> list[str]:
"""Values to scrub from published logs (D6 redaction filter, plan §4.4). The infra secrets
materialised at /run/secrets/* — if any subprocess ever echoes one, mask it. Only >=8-char
@ -56,6 +68,14 @@ def _redact_values() -> list[str]:
_REDACT = _redact_values()
def _scrub(text: str) -> str:
"""Mask any known infra-secret value in a string (D6 redaction, plan §4.4)."""
for v in _REDACT:
if v in text:
text = text.replace(v, "***REDACTED***")
return text
def run_redacted(cmd: list[str], env: dict | None = None) -> int:
"""Run a subprocess, streaming output live (so Drone logs stay tail-able) but masking any known
infra-secret value first. Belt-and-suspenders: the harness never prints secrets and abra doesn't
@ -71,10 +91,7 @@ def run_redacted(cmd: list[str], env: dict | None = None) -> int:
)
assert proc.stdout is not None
for line in proc.stdout:
for v in _REDACT:
if v in line:
line = line.replace(v, "***REDACTED***")
sys.stdout.write(line)
sys.stdout.write(_scrub(line))
sys.stdout.flush()
return proc.wait()
@ -149,7 +166,7 @@ def _load_meta(recipe: str) -> dict:
ns: dict = {}
with open(path) as fh:
exec(compile(fh.read(), path, "exec"), ns) # noqa: S102 (trusted, in-repo)
for k in list(meta) + ["BACKUP_CAPABLE"]:
for k in list(meta) + ["BACKUP_CAPABLE", "SKIP_GENERIC"]:
if k in ns:
meta[k] = ns[k]
return meta
@ -159,15 +176,105 @@ def _tier_env(domain: str) -> dict:
return dict(os.environ, CCCI_APP_DOMAIN=domain, CCCI_BASE_URL=f"https://{domain}")
def run_op_tier(recipe: str, op: str, repo_local: str | None, domain: str) -> str:
"""Run the single assertion file for a lifecycle op (overlay or generic) against the shared
deployment. The file performs the op (upgrade/backup/restore) + asserts; install asserts only
(already deployed). Returns 'pass' | 'fail'."""
source, path = discovery.resolve_op(recipe, op, repo_local)
rel = os.path.relpath(path, ROOT)
print(f"\n===== TIER: {op} ({source}: {rel}) =====", flush=True)
rc = run_redacted([sys.executable, "-m", "pytest", "-v", "-rA", path], env=_tier_env(domain))
return "pass" if rc == 0 else "fail"
def _skip_generic(op: str, meta: dict) -> bool:
"""Whether the generic assertion for `op` is opted out (Phase 1e HC3). Default: run (additive).
Opt-out, any of: env CCCI_SKIP_GENERIC (all ops), env CCCI_SKIP_GENERIC_<OP>, or the recipe's
declarative recipe_meta.SKIP_GENERIC list (op name, or "all"/"*")."""
if _truthy(os.environ.get("CCCI_SKIP_GENERIC")):
return True
if _truthy(os.environ.get(f"CCCI_SKIP_GENERIC_{op.upper()}")):
return True
sg = [str(s).lower() for s in (meta.get("SKIP_GENERIC") or [])]
return "all" in sg or "*" in sg or op in sg
def _run_pre_hook(recipe: str, op: str, repo_local: str | None, domain: str, meta: dict) -> None:
"""Run the optional pre-op seed hook (recipe ops.py `pre_<op>`) BEFORE the harness performs the
op (HC3 op/assertion split): overlays seed data-continuity markers / the backup→restore mutation
here, then assert post-op in test_<op>.py. cc-ci's ops.py is trusted; a repo-local ops.py is
consulted only for allowlist-approved recipes (HC2 gate is inside discovery.pre_op_hook). Imported
in-process; the recipe dir is put on sys.path so an ops.py can import its sibling helpers."""
hook = discovery.pre_op_hook(recipe, op, repo_local)
if not hook:
return
source, path = hook
d = os.path.dirname(path)
sys.path.insert(0, d)
try:
spec = importlib.util.spec_from_file_location(f"ccci_ops_{recipe}_{op}", path)
mod = importlib.util.module_from_spec(spec)
spec.loader.exec_module(mod)
print(f" pre-op seed ({source}): {os.path.relpath(path, ROOT)}::pre_{op}", flush=True)
getattr(mod, f"pre_{op}")(domain, meta)
finally:
if d in sys.path:
sys.path.remove(d)
def _perform_op(op: str, domain: str, target: str | None, op_state: dict) -> None:
"""Perform the single mutating op ONCE (the harness owns the op, HC3). install has no op. Records
what the assertions need (pre-upgrade identity, backup snapshot_id) into op_state. None of these
call deploy_app, so the deploy-count guard (DG4.1) stays 1 — the in-place upgrade is not a new
install (HC1 reconciliation)."""
if op == "upgrade":
op_state["upgrade"] = {"before": generic.perform_upgrade(domain, target)}
elif op == "backup":
op_state["backup"] = {"snapshot_id": generic.perform_backup(domain)}
elif op == "restore":
generic.perform_restore(domain)
# install: already deployed; no op
def run_lifecycle_tier(
recipe: str,
op: str,
repo_local: str | None,
domain: str,
meta: dict,
target: str | None,
op_state: dict,
) -> str:
"""Additive lifecycle tier (HC3): seed (pre-op hook) → perform the op ONCE → run the generic
assertion file (unless opted out) AND the overlay assertion file, both against the shared post-op
deployment. Returns 'pass' | 'fail' | 'skip'."""
overlay = discovery.resolve_overlay_op(recipe, op, repo_local)
skip_gen = _skip_generic(op, meta)
files: list[tuple[str, str]] = []
if not skip_gen:
files.append(discovery.generic_op(op))
if overlay:
files.append(overlay)
if not files:
# generic opted out AND no overlay → nothing would assert; don't perform a pointless mutating op
print(f"\n===== TIER: {op} — SKIP (generic opted out, no overlay) =====", flush=True)
return "skip"
ov = f"{overlay[0]}:{os.path.relpath(overlay[1], ROOT)}" if overlay else "none"
print(
f"\n===== TIER: {op} (generic={'skip' if skip_gen else 'run'}, overlay={ov}) =====",
flush=True,
)
# 1) pre-op seed hook + 2) the op ONCE (harness-owned). A failure here is an op failure → tier fail.
try:
_run_pre_hook(recipe, op, repo_local, domain, meta)
_perform_op(op, domain, target, op_state)
with open(os.environ["CCCI_OP_STATE_FILE"], "w") as f:
json.dump(op_state, f)
except Exception as e: # noqa: BLE001 — a failed op is a reported tier failure, not a crash
print(f"!! {op} op failed: {_scrub(str(e))}", flush=True)
return "fail"
# 3) assertions: generic (unless opted out) + overlay, each its own pytest, all against the
# single post-op deployment. Generic runs first so an overlay may assume readiness.
rc_all = 0
for source, path in files:
print(f" assert ({source}): {os.path.relpath(path, ROOT)}", flush=True)
rc = run_redacted(
[sys.executable, "-m", "pytest", "-v", "-rA", path], env=_tier_env(domain)
)
if rc != 0:
rc_all = rc
return "pass" if rc_all == 0 else "fail"
def run_custom(recipe: str, repo_local: str | None, domain: str) -> str:
@ -223,6 +330,14 @@ def main() -> int:
f.write("0")
os.environ["CCCI_DEPLOY_COUNT_FILE"] = countfile
# Run-scoped op state (HC3): the orchestrator records op results (pre-upgrade identity, backup
# snapshot_id) here for the assertion tiers (generic + overlay) to read via generic.op_state().
statefile = os.path.join(tempfile.gettempdir(), f"ccci-opstate-{domain}.json")
with open(statefile, "w") as f:
json.dump({}, f)
os.environ["CCCI_OP_STATE_FILE"] = statefile
op_state: dict = {}
results: dict[str, str] = {}
lifecycle.janitor()
try:
@ -243,28 +358,38 @@ def main() -> int:
print(f"!! deploy/readiness failed: {e}", flush=True)
deploy_ok = False
# ---- INSTALL tier (always) ----
# ---- INSTALL tier (always; additive generic + overlay, no op) ----
if "install" in stages:
results["install"] = (
run_op_tier(recipe, "install", repo_local, domain) if deploy_ok else "fail"
run_lifecycle_tier(recipe, "install", repo_local, domain, meta, target, op_state)
if deploy_ok
else "fail"
)
if deploy_ok:
# ---- UPGRADE tier ----
# ---- UPGRADE tier (op once → generic + overlay assert) ----
if "upgrade" in stages:
results["upgrade"] = (
run_op_tier(recipe, "upgrade", repo_local, domain)
run_lifecycle_tier(
recipe, "upgrade", repo_local, domain, meta, target, op_state
)
if prev
else "skip" # only one published version → nothing to upgrade from
)
# ---- BACKUP + RESTORE tiers (backup-capable only; else clean N/A) ----
if "backup" in stages:
results["backup"] = (
run_op_tier(recipe, "backup", repo_local, domain) if backup_cap else "skip"
run_lifecycle_tier(recipe, "backup", repo_local, domain, meta, target, op_state)
if backup_cap
else "skip"
)
if "restore" in stages:
results["restore"] = (
run_op_tier(recipe, "restore", repo_local, domain) if backup_cap else "skip"
run_lifecycle_tier(
recipe, "restore", repo_local, domain, meta, target, op_state
)
if backup_cap
else "skip"
)
# ---- CUSTOM tier ----
if "custom" in stages:
@ -281,6 +406,8 @@ def main() -> int:
with open(countfile) as f:
deploy_count = int(f.read().strip() or "0")
os.remove(countfile)
with contextlib.suppress(OSError):
os.remove(statefile)
# ---- per-op summary (DG6 feed) ----
print("\n===== RUN SUMMARY =====", flush=True)