feat(1e): HC3 additive generic + op/assertion split (orchestrator owns the op)
- orchestrator: per mutating tier, run optional pre-op seed hook (ops.py pre_<op>) → perform the op
ONCE (harness-owned) → run generic assertion (unless opted out) AND overlay assertion, both against
the shared post-op deployment. Op results passed op→assertion via run-scoped CCCI_OP_STATE_FILE.
- opt-out: CCCI_SKIP_GENERIC / CCCI_SKIP_GENERIC_<OP> / recipe_meta.SKIP_GENERIC (declarative).
- generic.py: split do_* into op primitives (perform_upgrade/backup/restore) + assertions
(assert_upgraded/backup_artifact/restore_healthy) reading op_state(); deployed_identity now returns
{version,image,chaos} (chaos label ready for HC1).
- generic test_<op>.py + all 6 recipe overlays migrated to assertion-only; pre-op seeding moved to
per-recipe ops.py (pre_upgrade/pre_backup/pre_restore). install overlays unchanged (no op).
- deploy-count stays 1 (op primitives never call deploy_app). lint PASS; 8 unit tests PASS on cc-ci.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@ -1,13 +1,18 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Top-level CI orchestrator (plan §4.3 + Phase 1d), invoked by the Drone pipeline (or by hand).
|
||||
"""Top-level CI orchestrator (plan §4.3 + Phase 1d/1e), invoked by the Drone pipeline (or by hand).
|
||||
|
||||
Phase 1d model: deploy the app ONCE, then run lifecycle TIERS against that single shared deployment
|
||||
(install asserts; upgrade does `abra app upgrade` in place; backup/restore mutate in place; custom
|
||||
asserts), then ONE teardown in `finally`. Each tier's assertions come from exactly one file — a
|
||||
recipe overlay if present, else the generic default — discovered by `harness.discovery`
|
||||
(precedence repo-local > cc-ci > generic). The generic is the default for every op, so ANY recipe is
|
||||
testable with zero config (DG1–DG4). The lifecycle OPS live in the shared harness (harness.generic),
|
||||
not per-recipe (DG7 DRY).
|
||||
Model: deploy the app ONCE, then run lifecycle TIERS against that single shared deployment, then ONE
|
||||
teardown in `finally`. Per Phase 1e the orchestrator OWNS each mutating op (HC3): for a tier it runs
|
||||
the optional pre-op seed hook (recipe ops.py `pre_<op>`), performs the op exactly ONCE
|
||||
(upgrade/backup/restore — install has none), then runs BOTH the generic assertion file (the floor,
|
||||
unless explicitly opted out) AND the recipe overlay assertion file (if any) against the shared
|
||||
post-op state — generic and overlay are ADDITIVE, not override (HC3). Op results an assertion needs
|
||||
(pre-upgrade identity, snapshot_id) pass op→assertion via a run-scoped JSON state file
|
||||
($CCCI_OP_STATE_FILE). The upgrade op deploys the PR-HEAD code under test via `abra app deploy
|
||||
--chaos` (HC1). Repo-local (PR-authored) overlays/hooks run only for allowlist-approved recipes (HC2,
|
||||
gated in harness.discovery). The generic is the default for every op, so ANY recipe is testable with
|
||||
zero config (DG1–DG4). The lifecycle OPS live in the shared harness (harness.generic), not per-recipe
|
||||
(DG7 DRY).
|
||||
|
||||
Run parameters from env (set by the comment-bridge via Drone build params):
|
||||
RECIPE recipe name (e.g. custom-html) [required]
|
||||
@ -23,7 +28,10 @@ invoke as: cc-ci-run runner/run_recipe_ci.py
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import contextlib
|
||||
import glob
|
||||
import importlib.util
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
@ -37,6 +45,10 @@ from harness import discovery, generic, lifecycle, naming # noqa: E402
|
||||
ALL_STAGES = ("install", "upgrade", "backup", "restore", "custom")
|
||||
|
||||
|
||||
def _truthy(v: str | None) -> bool:
|
||||
return str(v or "").strip().lower() in ("1", "true", "yes", "on")
|
||||
|
||||
|
||||
def _redact_values() -> list[str]:
|
||||
"""Values to scrub from published logs (D6 redaction filter, plan §4.4). The infra secrets
|
||||
materialised at /run/secrets/* — if any subprocess ever echoes one, mask it. Only >=8-char
|
||||
@ -56,6 +68,14 @@ def _redact_values() -> list[str]:
|
||||
_REDACT = _redact_values()
|
||||
|
||||
|
||||
def _scrub(text: str) -> str:
|
||||
"""Mask any known infra-secret value in a string (D6 redaction, plan §4.4)."""
|
||||
for v in _REDACT:
|
||||
if v in text:
|
||||
text = text.replace(v, "***REDACTED***")
|
||||
return text
|
||||
|
||||
|
||||
def run_redacted(cmd: list[str], env: dict | None = None) -> int:
|
||||
"""Run a subprocess, streaming output live (so Drone logs stay tail-able) but masking any known
|
||||
infra-secret value first. Belt-and-suspenders: the harness never prints secrets and abra doesn't
|
||||
@ -71,10 +91,7 @@ def run_redacted(cmd: list[str], env: dict | None = None) -> int:
|
||||
)
|
||||
assert proc.stdout is not None
|
||||
for line in proc.stdout:
|
||||
for v in _REDACT:
|
||||
if v in line:
|
||||
line = line.replace(v, "***REDACTED***")
|
||||
sys.stdout.write(line)
|
||||
sys.stdout.write(_scrub(line))
|
||||
sys.stdout.flush()
|
||||
return proc.wait()
|
||||
|
||||
@ -149,7 +166,7 @@ def _load_meta(recipe: str) -> dict:
|
||||
ns: dict = {}
|
||||
with open(path) as fh:
|
||||
exec(compile(fh.read(), path, "exec"), ns) # noqa: S102 (trusted, in-repo)
|
||||
for k in list(meta) + ["BACKUP_CAPABLE"]:
|
||||
for k in list(meta) + ["BACKUP_CAPABLE", "SKIP_GENERIC"]:
|
||||
if k in ns:
|
||||
meta[k] = ns[k]
|
||||
return meta
|
||||
@ -159,15 +176,105 @@ def _tier_env(domain: str) -> dict:
|
||||
return dict(os.environ, CCCI_APP_DOMAIN=domain, CCCI_BASE_URL=f"https://{domain}")
|
||||
|
||||
|
||||
def run_op_tier(recipe: str, op: str, repo_local: str | None, domain: str) -> str:
|
||||
"""Run the single assertion file for a lifecycle op (overlay or generic) against the shared
|
||||
deployment. The file performs the op (upgrade/backup/restore) + asserts; install asserts only
|
||||
(already deployed). Returns 'pass' | 'fail'."""
|
||||
source, path = discovery.resolve_op(recipe, op, repo_local)
|
||||
rel = os.path.relpath(path, ROOT)
|
||||
print(f"\n===== TIER: {op} ({source}: {rel}) =====", flush=True)
|
||||
rc = run_redacted([sys.executable, "-m", "pytest", "-v", "-rA", path], env=_tier_env(domain))
|
||||
return "pass" if rc == 0 else "fail"
|
||||
def _skip_generic(op: str, meta: dict) -> bool:
|
||||
"""Whether the generic assertion for `op` is opted out (Phase 1e HC3). Default: run (additive).
|
||||
Opt-out, any of: env CCCI_SKIP_GENERIC (all ops), env CCCI_SKIP_GENERIC_<OP>, or the recipe's
|
||||
declarative recipe_meta.SKIP_GENERIC list (op name, or "all"/"*")."""
|
||||
if _truthy(os.environ.get("CCCI_SKIP_GENERIC")):
|
||||
return True
|
||||
if _truthy(os.environ.get(f"CCCI_SKIP_GENERIC_{op.upper()}")):
|
||||
return True
|
||||
sg = [str(s).lower() for s in (meta.get("SKIP_GENERIC") or [])]
|
||||
return "all" in sg or "*" in sg or op in sg
|
||||
|
||||
|
||||
def _run_pre_hook(recipe: str, op: str, repo_local: str | None, domain: str, meta: dict) -> None:
|
||||
"""Run the optional pre-op seed hook (recipe ops.py `pre_<op>`) BEFORE the harness performs the
|
||||
op (HC3 op/assertion split): overlays seed data-continuity markers / the backup→restore mutation
|
||||
here, then assert post-op in test_<op>.py. cc-ci's ops.py is trusted; a repo-local ops.py is
|
||||
consulted only for allowlist-approved recipes (HC2 gate is inside discovery.pre_op_hook). Imported
|
||||
in-process; the recipe dir is put on sys.path so an ops.py can import its sibling helpers."""
|
||||
hook = discovery.pre_op_hook(recipe, op, repo_local)
|
||||
if not hook:
|
||||
return
|
||||
source, path = hook
|
||||
d = os.path.dirname(path)
|
||||
sys.path.insert(0, d)
|
||||
try:
|
||||
spec = importlib.util.spec_from_file_location(f"ccci_ops_{recipe}_{op}", path)
|
||||
mod = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(mod)
|
||||
print(f" pre-op seed ({source}): {os.path.relpath(path, ROOT)}::pre_{op}", flush=True)
|
||||
getattr(mod, f"pre_{op}")(domain, meta)
|
||||
finally:
|
||||
if d in sys.path:
|
||||
sys.path.remove(d)
|
||||
|
||||
|
||||
def _perform_op(op: str, domain: str, target: str | None, op_state: dict) -> None:
|
||||
"""Perform the single mutating op ONCE (the harness owns the op, HC3). install has no op. Records
|
||||
what the assertions need (pre-upgrade identity, backup snapshot_id) into op_state. None of these
|
||||
call deploy_app, so the deploy-count guard (DG4.1) stays 1 — the in-place upgrade is not a new
|
||||
install (HC1 reconciliation)."""
|
||||
if op == "upgrade":
|
||||
op_state["upgrade"] = {"before": generic.perform_upgrade(domain, target)}
|
||||
elif op == "backup":
|
||||
op_state["backup"] = {"snapshot_id": generic.perform_backup(domain)}
|
||||
elif op == "restore":
|
||||
generic.perform_restore(domain)
|
||||
# install: already deployed; no op
|
||||
|
||||
|
||||
def run_lifecycle_tier(
|
||||
recipe: str,
|
||||
op: str,
|
||||
repo_local: str | None,
|
||||
domain: str,
|
||||
meta: dict,
|
||||
target: str | None,
|
||||
op_state: dict,
|
||||
) -> str:
|
||||
"""Additive lifecycle tier (HC3): seed (pre-op hook) → perform the op ONCE → run the generic
|
||||
assertion file (unless opted out) AND the overlay assertion file, both against the shared post-op
|
||||
deployment. Returns 'pass' | 'fail' | 'skip'."""
|
||||
overlay = discovery.resolve_overlay_op(recipe, op, repo_local)
|
||||
skip_gen = _skip_generic(op, meta)
|
||||
files: list[tuple[str, str]] = []
|
||||
if not skip_gen:
|
||||
files.append(discovery.generic_op(op))
|
||||
if overlay:
|
||||
files.append(overlay)
|
||||
if not files:
|
||||
# generic opted out AND no overlay → nothing would assert; don't perform a pointless mutating op
|
||||
print(f"\n===== TIER: {op} — SKIP (generic opted out, no overlay) =====", flush=True)
|
||||
return "skip"
|
||||
|
||||
ov = f"{overlay[0]}:{os.path.relpath(overlay[1], ROOT)}" if overlay else "none"
|
||||
print(
|
||||
f"\n===== TIER: {op} (generic={'skip' if skip_gen else 'run'}, overlay={ov}) =====",
|
||||
flush=True,
|
||||
)
|
||||
# 1) pre-op seed hook + 2) the op ONCE (harness-owned). A failure here is an op failure → tier fail.
|
||||
try:
|
||||
_run_pre_hook(recipe, op, repo_local, domain, meta)
|
||||
_perform_op(op, domain, target, op_state)
|
||||
with open(os.environ["CCCI_OP_STATE_FILE"], "w") as f:
|
||||
json.dump(op_state, f)
|
||||
except Exception as e: # noqa: BLE001 — a failed op is a reported tier failure, not a crash
|
||||
print(f"!! {op} op failed: {_scrub(str(e))}", flush=True)
|
||||
return "fail"
|
||||
|
||||
# 3) assertions: generic (unless opted out) + overlay, each its own pytest, all against the
|
||||
# single post-op deployment. Generic runs first so an overlay may assume readiness.
|
||||
rc_all = 0
|
||||
for source, path in files:
|
||||
print(f" assert ({source}): {os.path.relpath(path, ROOT)}", flush=True)
|
||||
rc = run_redacted(
|
||||
[sys.executable, "-m", "pytest", "-v", "-rA", path], env=_tier_env(domain)
|
||||
)
|
||||
if rc != 0:
|
||||
rc_all = rc
|
||||
return "pass" if rc_all == 0 else "fail"
|
||||
|
||||
|
||||
def run_custom(recipe: str, repo_local: str | None, domain: str) -> str:
|
||||
@ -223,6 +330,14 @@ def main() -> int:
|
||||
f.write("0")
|
||||
os.environ["CCCI_DEPLOY_COUNT_FILE"] = countfile
|
||||
|
||||
# Run-scoped op state (HC3): the orchestrator records op results (pre-upgrade identity, backup
|
||||
# snapshot_id) here for the assertion tiers (generic + overlay) to read via generic.op_state().
|
||||
statefile = os.path.join(tempfile.gettempdir(), f"ccci-opstate-{domain}.json")
|
||||
with open(statefile, "w") as f:
|
||||
json.dump({}, f)
|
||||
os.environ["CCCI_OP_STATE_FILE"] = statefile
|
||||
op_state: dict = {}
|
||||
|
||||
results: dict[str, str] = {}
|
||||
lifecycle.janitor()
|
||||
try:
|
||||
@ -243,28 +358,38 @@ def main() -> int:
|
||||
print(f"!! deploy/readiness failed: {e}", flush=True)
|
||||
deploy_ok = False
|
||||
|
||||
# ---- INSTALL tier (always) ----
|
||||
# ---- INSTALL tier (always; additive generic + overlay, no op) ----
|
||||
if "install" in stages:
|
||||
results["install"] = (
|
||||
run_op_tier(recipe, "install", repo_local, domain) if deploy_ok else "fail"
|
||||
run_lifecycle_tier(recipe, "install", repo_local, domain, meta, target, op_state)
|
||||
if deploy_ok
|
||||
else "fail"
|
||||
)
|
||||
|
||||
if deploy_ok:
|
||||
# ---- UPGRADE tier ----
|
||||
# ---- UPGRADE tier (op once → generic + overlay assert) ----
|
||||
if "upgrade" in stages:
|
||||
results["upgrade"] = (
|
||||
run_op_tier(recipe, "upgrade", repo_local, domain)
|
||||
run_lifecycle_tier(
|
||||
recipe, "upgrade", repo_local, domain, meta, target, op_state
|
||||
)
|
||||
if prev
|
||||
else "skip" # only one published version → nothing to upgrade from
|
||||
)
|
||||
# ---- BACKUP + RESTORE tiers (backup-capable only; else clean N/A) ----
|
||||
if "backup" in stages:
|
||||
results["backup"] = (
|
||||
run_op_tier(recipe, "backup", repo_local, domain) if backup_cap else "skip"
|
||||
run_lifecycle_tier(recipe, "backup", repo_local, domain, meta, target, op_state)
|
||||
if backup_cap
|
||||
else "skip"
|
||||
)
|
||||
if "restore" in stages:
|
||||
results["restore"] = (
|
||||
run_op_tier(recipe, "restore", repo_local, domain) if backup_cap else "skip"
|
||||
run_lifecycle_tier(
|
||||
recipe, "restore", repo_local, domain, meta, target, op_state
|
||||
)
|
||||
if backup_cap
|
||||
else "skip"
|
||||
)
|
||||
# ---- CUSTOM tier ----
|
||||
if "custom" in stages:
|
||||
@ -281,6 +406,8 @@ def main() -> int:
|
||||
with open(countfile) as f:
|
||||
deploy_count = int(f.read().strip() or "0")
|
||||
os.remove(countfile)
|
||||
with contextlib.suppress(OSError):
|
||||
os.remove(statefile)
|
||||
|
||||
# ---- per-op summary (DG6 feed) ----
|
||||
print("\n===== RUN SUMMARY =====", flush=True)
|
||||
|
||||
Reference in New Issue
Block a user