feat(2w): W2 --quick mode in run_recipe_ci.py (WC4+WC7)
run_quick(): opt-in fast lane (CCCI_QUICK=1 / MODE=quick) — reattach the data-warm canonical (canonical.deploy_canonical, known-good volume) → deps wiring (warm keycloak + per-run realm) → UPGRADE to PR head (chaos, run_lifecycle_tier 'upgrade': reconverge+moved+serving + overlay) → custom tier. PASS → undeploy_keep_volume, known-good UNCHANGED (NEVER promote); FAIL → warmsnap.restore last-known-good + undeploy (roll back, data safe). Always deletes per-run warm realm. mode=quick labelled lower-confidence (WC7); skips install/backup/restore; no deploy-count guard (no deploy_app). main() dispatches to run_quick when a canonical exists, else clean no-canonical fallback to COLD. Cold path byte-identical (deps wiring intentionally mirrored, not refactored). 61 unit pass; cold untouched. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@ -40,7 +40,17 @@ import tempfile
|
||||
|
||||
ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
sys.path.insert(0, os.path.join(ROOT, "runner"))
|
||||
from harness import deps as deps_mod, discovery, generic, lifecycle, naming, warm # noqa: E402
|
||||
from harness import ( # noqa: E402
|
||||
abra,
|
||||
canonical,
|
||||
deps as deps_mod,
|
||||
discovery,
|
||||
generic,
|
||||
lifecycle,
|
||||
naming,
|
||||
warm,
|
||||
warmsnap,
|
||||
)
|
||||
|
||||
ALL_STAGES = ("install", "upgrade", "backup", "restore", "custom")
|
||||
|
||||
@ -386,6 +396,202 @@ def run_custom(recipe: str, repo_local: str | None, domain: str) -> str:
|
||||
return "pass" if rc_all == 0 else "fail"
|
||||
|
||||
|
||||
def _wait_undeployed(domain: str, timeout: int = 120) -> None:
|
||||
"""Block until the stack's services are gone after an undeploy (so warmsnap.restore, which
|
||||
requires undeployed, doesn't race a half-removed stack)."""
|
||||
stack = lifecycle._stack_name(domain) # noqa: SLF001
|
||||
deadline = time.time() + timeout
|
||||
while time.time() < deadline:
|
||||
if not lifecycle._docker_names("service", stack): # noqa: SLF001
|
||||
return
|
||||
time.sleep(2)
|
||||
|
||||
|
||||
def run_quick(recipe: str, ref: str | None, head_ref: str | None, repo_local: str | None,
|
||||
meta: dict) -> int:
|
||||
"""WC4 `--quick` opt-in fast lane (plan §2). Reattach the data-warm canonical (known-good volume)
|
||||
→ upgrade IN PLACE to the PR head (chaos) → assert generic UPGRADE (reconverge+moved+serving) +
|
||||
overlay + custom. PASS → undeploy-keep-volume, **known-good UNCHANGED (NEVER promote)**; FAIL →
|
||||
restore the last-known-good snapshot + undeploy (roll back, data safe). Lower-confidence; does
|
||||
NOT gate merge (WC7). Caller has confirmed a canonical exists.
|
||||
|
||||
NB: the deps wiring + temp-state scaffolding intentionally mirror main()'s cold path rather than
|
||||
refactoring it — keeping the gate-passed cold flow byte-identical (zero regression risk)."""
|
||||
import contextlib
|
||||
|
||||
domain = canonical.canonical_domain(recipe)
|
||||
reg = canonical.read_registry(recipe) or {}
|
||||
print(
|
||||
f"\n== cc-ci run [MODE=quick]: recipe={recipe} canonical={domain} "
|
||||
f"known-good={reg.get('version')} ref={ref}\n"
|
||||
" quick = LOWER-CONFIDENCE opt-in fast lane; does NOT gate merge; NEVER promotes the canonical",
|
||||
flush=True,
|
||||
)
|
||||
|
||||
statefile = os.path.join(tempfile.gettempdir(), f"ccci-opstate-{domain}.json")
|
||||
with open(statefile, "w") as f:
|
||||
json.dump({}, f)
|
||||
os.environ["CCCI_OP_STATE_FILE"] = statefile
|
||||
depsfile = os.path.join(tempfile.gettempdir(), f"ccci-deps-{domain}.json")
|
||||
with open(depsfile, "w") as f:
|
||||
json.dump({}, f)
|
||||
os.environ["CCCI_DEPS_FILE"] = depsfile
|
||||
skipfile = os.path.join(tempfile.gettempdir(), f"ccci-depskip-{domain}.txt")
|
||||
with contextlib.suppress(OSError):
|
||||
os.remove(skipfile)
|
||||
os.environ["CCCI_DEPS_SKIP_REPORT"] = skipfile
|
||||
|
||||
op_state: dict = {}
|
||||
results: dict[str, str] = {}
|
||||
declared = deps_mod.declared_deps(recipe)
|
||||
deps_state: dict = {}
|
||||
deps_ready = True
|
||||
deps_not_ready_reason = ""
|
||||
dep_teardown_error: str | None = None
|
||||
warm_ok = False
|
||||
rolled_back = False
|
||||
|
||||
lifecycle.janitor()
|
||||
try:
|
||||
# 1) reattach the canonical (warm boot at the known-good version + retained volume)
|
||||
try:
|
||||
canonical.deploy_canonical(recipe, timeout=int(meta.get("DEPLOY_TIMEOUT", 900)))
|
||||
lifecycle.wait_healthy(
|
||||
domain, ok_codes=tuple(meta["HEALTH_OK"]), path=meta["HEALTH_PATH"],
|
||||
deploy_timeout=meta["DEPLOY_TIMEOUT"], http_timeout=meta["HTTP_TIMEOUT"],
|
||||
)
|
||||
warm_ok = True
|
||||
except Exception as e: # noqa: BLE001
|
||||
print(f"!! canonical reattach/readiness failed: {_scrub(str(e))}", flush=True)
|
||||
|
||||
if warm_ok:
|
||||
# 2) deps (warm keycloak + per-run realm) — mirrors main()'s warm/cold split
|
||||
if declared:
|
||||
print(f"\n===== setup_custom_tests (quick): deps {declared} =====", flush=True)
|
||||
try:
|
||||
warm_deps, cold_deps = [], []
|
||||
for d in declared:
|
||||
wd = warm.warm_domain(d)
|
||||
(warm_deps if (wd and warm.is_warm_up(d, wd)) else cold_deps).append(d)
|
||||
dep_metas = {d: _load_meta(d) for d in cold_deps}
|
||||
deps_list = (
|
||||
deps_mod.deploy_deps(recipe, os.environ.get("PR", "0"), ref, cold_deps,
|
||||
meta_for=dep_metas)
|
||||
if cold_deps else []
|
||||
)
|
||||
for d in warm_deps:
|
||||
wd = warm.warm_domain(d)
|
||||
warm.reap_orphan_realms(d, wd)
|
||||
deps_list.append({"recipe": d, "domain": wd, "warm": True})
|
||||
print(f" dep: using live-warm {d} @ {wd} (per-run realm)", flush=True)
|
||||
deps_state = _enrich_deps_with_sso(recipe, domain, deps_list)
|
||||
deps_mod.write_run_state(deps_state)
|
||||
_run_setup_custom_tests_hook(recipe, domain, depsfile)
|
||||
except Exception as e: # noqa: BLE001
|
||||
deps_ready = False
|
||||
deps_not_ready_reason = _scrub(str(e))[:300]
|
||||
print(f"!! setup_custom_tests failed (deps-not-ready): {deps_not_ready_reason}",
|
||||
flush=True)
|
||||
|
||||
# 3) UPGRADE to PR head (chaos) + assert (generic reconverge+moved+serving + overlay)
|
||||
results["upgrade"] = run_lifecycle_tier(
|
||||
recipe, "upgrade", repo_local, domain, meta, head_ref, op_state
|
||||
)
|
||||
# 4) custom tier
|
||||
os.environ["CCCI_DEPS_READY"] = "1" if deps_ready else "0"
|
||||
os.environ["CCCI_DEPS_NOT_READY_REASON"] = deps_not_ready_reason
|
||||
results["custom"] = run_custom(recipe, repo_local, domain)
|
||||
else:
|
||||
results["upgrade"] = "fail"
|
||||
results["custom"] = "skip"
|
||||
finally:
|
||||
# F2-11 skip count (read before deciding pass/fail)
|
||||
requires_deps_skipped = 0
|
||||
try:
|
||||
with open(skipfile) as f:
|
||||
requires_deps_skipped = sum(int(x) for x in f.read().split() if x.strip())
|
||||
except OSError:
|
||||
pass
|
||||
sso_unverified = sso_dep_unverified(declared, deps_ready, requires_deps_skipped)
|
||||
passed = (
|
||||
warm_ok and bool(results) and all(v != "fail" for v in results.values())
|
||||
and not sso_unverified
|
||||
)
|
||||
|
||||
# dep teardown: delete per-run warm realms; undeploy cold deps (mirrors cold)
|
||||
if deps_state:
|
||||
ordered = ([deps_state[d] for d in declared if d in deps_state]
|
||||
if isinstance(deps_state, dict) else deps_state)
|
||||
for e in [x for x in ordered if x.get("warm")]:
|
||||
try:
|
||||
from harness import sso
|
||||
sso.delete_keycloak_realm(e["domain"], e["realm"])
|
||||
print(f" dep: deleted per-run realm {e['realm']} on warm {e['recipe']}", flush=True)
|
||||
except Exception as ex: # noqa: BLE001
|
||||
dep_teardown_error = f"warm realm delete failed for {e.get('realm')}: {ex}"
|
||||
print(f"!! {dep_teardown_error}", flush=True)
|
||||
try:
|
||||
deps_mod.teardown_deps([x for x in ordered if not x.get("warm")])
|
||||
except lifecycle.TeardownError as e:
|
||||
dep_teardown_error = str(e)
|
||||
print(f"!! {dep_teardown_error}", flush=True)
|
||||
|
||||
# canonical teardown — the WC4 contract:
|
||||
# PASS → undeploy, KEEP volume, known-good UNCHANGED (never promote)
|
||||
# FAIL → restore last-known-good snapshot (data safe) then leave undeployed (idle)
|
||||
try:
|
||||
if warm_ok and passed:
|
||||
canonical.undeploy_keep_volume(recipe)
|
||||
print(" quick PASS → canonical undeployed, volume retained, known-good UNCHANGED",
|
||||
flush=True)
|
||||
elif warm_ok:
|
||||
print(" quick FAIL → rolling back canonical to last-known-good snapshot", flush=True)
|
||||
abra.undeploy(domain)
|
||||
_wait_undeployed(domain)
|
||||
warmsnap.restore(recipe, domain)
|
||||
canonical._set_status(recipe, "idle") # noqa: SLF001
|
||||
rolled_back = True
|
||||
print(" quick FAIL → restored known-good data; canonical idle (NOT promoted)",
|
||||
flush=True)
|
||||
except Exception as e: # noqa: BLE001
|
||||
dep_teardown_error = (dep_teardown_error or "") + f" | quick teardown/rollback: {e}"
|
||||
print(f"!! quick teardown/rollback error: {e}", flush=True)
|
||||
|
||||
with contextlib.suppress(OSError):
|
||||
os.remove(statefile)
|
||||
with contextlib.suppress(OSError):
|
||||
os.remove(depsfile)
|
||||
with contextlib.suppress(OSError):
|
||||
os.remove(skipfile)
|
||||
|
||||
print("\n===== RUN SUMMARY =====", flush=True)
|
||||
print(f"mode = quick (LOWER-CONFIDENCE; opt-in; does not gate merge)")
|
||||
print(f"canonical = {domain} known-good = {reg.get('version')} (UNCHANGED; quick never promotes)")
|
||||
if rolled_back:
|
||||
print("rolled-back = yes (restored last-known-good snapshot)")
|
||||
for op in ("upgrade", "custom"):
|
||||
if op in results:
|
||||
suffix = ""
|
||||
if op == "custom" and requires_deps_skipped:
|
||||
suffix = f" ({requires_deps_skipped} requires_deps SKIPPED — SSO UNVERIFIED)"
|
||||
print(f" {op:8s}: {results[op]}{suffix}")
|
||||
|
||||
overall = 0
|
||||
if any(v == "fail" for v in results.values()) or not warm_ok:
|
||||
overall = 1
|
||||
if sso_unverified:
|
||||
print(f"!! DEPS={declared} but setup_custom_tests failed and {requires_deps_skipped} "
|
||||
"requires_deps SKIPPED — SSO NOT verified (F2-11)", file=sys.stderr)
|
||||
overall = 1
|
||||
if dep_teardown_error:
|
||||
print(f"!! teardown leaked/erred: {dep_teardown_error}", file=sys.stderr)
|
||||
overall = 1
|
||||
if not results:
|
||||
print("no tiers ran", file=sys.stderr)
|
||||
return 1
|
||||
return overall
|
||||
|
||||
|
||||
def main() -> int:
|
||||
recipe = os.environ.get("RECIPE")
|
||||
if not recipe:
|
||||
@ -408,6 +614,18 @@ def main() -> int:
|
||||
head_ref = ref or lifecycle.recipe_head_commit(recipe)
|
||||
repo_local = snapshot_recipe_tests(recipe)
|
||||
meta = _load_meta(recipe)
|
||||
|
||||
# WC4/WC7: opt-in `--quick` fast lane. Requires an existing data-warm canonical; if none, fall
|
||||
# back cleanly to the full COLD run below so the PR is still tested (DECISIONS Phase-2w).
|
||||
if os.environ.get("CCCI_QUICK") == "1" or os.environ.get("MODE") == "quick":
|
||||
if canonical.has_canonical(recipe):
|
||||
return run_quick(recipe, ref, head_ref, repo_local, meta)
|
||||
print(
|
||||
f"MODE=quick requested but no canonical for {recipe} — falling back to COLD run "
|
||||
"(no-canonical fallback, WC7)",
|
||||
flush=True,
|
||||
)
|
||||
|
||||
domain = naming.app_domain(recipe, os.environ.get("PR", "0"), ref)
|
||||
|
||||
# Deploy-once base version: previous published version when the upgrade tier will run and one
|
||||
|
||||
Reference in New Issue
Block a user