feat(2w): W2 --quick mode in run_recipe_ci.py (WC4+WC7)
run_quick(): opt-in fast lane (CCCI_QUICK=1 / MODE=quick) — reattach the data-warm canonical (canonical.deploy_canonical, known-good volume) → deps wiring (warm keycloak + per-run realm) → UPGRADE to PR head (chaos, run_lifecycle_tier 'upgrade': reconverge+moved+serving + overlay) → custom tier. PASS → undeploy_keep_volume, known-good UNCHANGED (NEVER promote); FAIL → warmsnap.restore last-known-good + undeploy (roll back, data safe). Always deletes per-run warm realm. mode=quick labelled lower-confidence (WC7); skips install/backup/restore; no deploy-count guard (no deploy_app). main() dispatches to run_quick when a canonical exists, else clean no-canonical fallback to COLD. Cold path byte-identical (deps wiring intentionally mirrored, not refactored). 61 unit pass; cold untouched. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@ -40,7 +40,17 @@ import tempfile
|
|||||||
|
|
||||||
ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||||
sys.path.insert(0, os.path.join(ROOT, "runner"))
|
sys.path.insert(0, os.path.join(ROOT, "runner"))
|
||||||
from harness import deps as deps_mod, discovery, generic, lifecycle, naming, warm # noqa: E402
|
from harness import ( # noqa: E402
|
||||||
|
abra,
|
||||||
|
canonical,
|
||||||
|
deps as deps_mod,
|
||||||
|
discovery,
|
||||||
|
generic,
|
||||||
|
lifecycle,
|
||||||
|
naming,
|
||||||
|
warm,
|
||||||
|
warmsnap,
|
||||||
|
)
|
||||||
|
|
||||||
ALL_STAGES = ("install", "upgrade", "backup", "restore", "custom")
|
ALL_STAGES = ("install", "upgrade", "backup", "restore", "custom")
|
||||||
|
|
||||||
@ -386,6 +396,202 @@ def run_custom(recipe: str, repo_local: str | None, domain: str) -> str:
|
|||||||
return "pass" if rc_all == 0 else "fail"
|
return "pass" if rc_all == 0 else "fail"
|
||||||
|
|
||||||
|
|
||||||
|
def _wait_undeployed(domain: str, timeout: int = 120) -> None:
|
||||||
|
"""Block until the stack's services are gone after an undeploy (so warmsnap.restore, which
|
||||||
|
requires undeployed, doesn't race a half-removed stack)."""
|
||||||
|
stack = lifecycle._stack_name(domain) # noqa: SLF001
|
||||||
|
deadline = time.time() + timeout
|
||||||
|
while time.time() < deadline:
|
||||||
|
if not lifecycle._docker_names("service", stack): # noqa: SLF001
|
||||||
|
return
|
||||||
|
time.sleep(2)
|
||||||
|
|
||||||
|
|
||||||
|
def run_quick(recipe: str, ref: str | None, head_ref: str | None, repo_local: str | None,
|
||||||
|
meta: dict) -> int:
|
||||||
|
"""WC4 `--quick` opt-in fast lane (plan §2). Reattach the data-warm canonical (known-good volume)
|
||||||
|
→ upgrade IN PLACE to the PR head (chaos) → assert generic UPGRADE (reconverge+moved+serving) +
|
||||||
|
overlay + custom. PASS → undeploy-keep-volume, **known-good UNCHANGED (NEVER promote)**; FAIL →
|
||||||
|
restore the last-known-good snapshot + undeploy (roll back, data safe). Lower-confidence; does
|
||||||
|
NOT gate merge (WC7). Caller has confirmed a canonical exists.
|
||||||
|
|
||||||
|
NB: the deps wiring + temp-state scaffolding intentionally mirror main()'s cold path rather than
|
||||||
|
refactoring it — keeping the gate-passed cold flow byte-identical (zero regression risk)."""
|
||||||
|
import contextlib
|
||||||
|
|
||||||
|
domain = canonical.canonical_domain(recipe)
|
||||||
|
reg = canonical.read_registry(recipe) or {}
|
||||||
|
print(
|
||||||
|
f"\n== cc-ci run [MODE=quick]: recipe={recipe} canonical={domain} "
|
||||||
|
f"known-good={reg.get('version')} ref={ref}\n"
|
||||||
|
" quick = LOWER-CONFIDENCE opt-in fast lane; does NOT gate merge; NEVER promotes the canonical",
|
||||||
|
flush=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
statefile = os.path.join(tempfile.gettempdir(), f"ccci-opstate-{domain}.json")
|
||||||
|
with open(statefile, "w") as f:
|
||||||
|
json.dump({}, f)
|
||||||
|
os.environ["CCCI_OP_STATE_FILE"] = statefile
|
||||||
|
depsfile = os.path.join(tempfile.gettempdir(), f"ccci-deps-{domain}.json")
|
||||||
|
with open(depsfile, "w") as f:
|
||||||
|
json.dump({}, f)
|
||||||
|
os.environ["CCCI_DEPS_FILE"] = depsfile
|
||||||
|
skipfile = os.path.join(tempfile.gettempdir(), f"ccci-depskip-{domain}.txt")
|
||||||
|
with contextlib.suppress(OSError):
|
||||||
|
os.remove(skipfile)
|
||||||
|
os.environ["CCCI_DEPS_SKIP_REPORT"] = skipfile
|
||||||
|
|
||||||
|
op_state: dict = {}
|
||||||
|
results: dict[str, str] = {}
|
||||||
|
declared = deps_mod.declared_deps(recipe)
|
||||||
|
deps_state: dict = {}
|
||||||
|
deps_ready = True
|
||||||
|
deps_not_ready_reason = ""
|
||||||
|
dep_teardown_error: str | None = None
|
||||||
|
warm_ok = False
|
||||||
|
rolled_back = False
|
||||||
|
|
||||||
|
lifecycle.janitor()
|
||||||
|
try:
|
||||||
|
# 1) reattach the canonical (warm boot at the known-good version + retained volume)
|
||||||
|
try:
|
||||||
|
canonical.deploy_canonical(recipe, timeout=int(meta.get("DEPLOY_TIMEOUT", 900)))
|
||||||
|
lifecycle.wait_healthy(
|
||||||
|
domain, ok_codes=tuple(meta["HEALTH_OK"]), path=meta["HEALTH_PATH"],
|
||||||
|
deploy_timeout=meta["DEPLOY_TIMEOUT"], http_timeout=meta["HTTP_TIMEOUT"],
|
||||||
|
)
|
||||||
|
warm_ok = True
|
||||||
|
except Exception as e: # noqa: BLE001
|
||||||
|
print(f"!! canonical reattach/readiness failed: {_scrub(str(e))}", flush=True)
|
||||||
|
|
||||||
|
if warm_ok:
|
||||||
|
# 2) deps (warm keycloak + per-run realm) — mirrors main()'s warm/cold split
|
||||||
|
if declared:
|
||||||
|
print(f"\n===== setup_custom_tests (quick): deps {declared} =====", flush=True)
|
||||||
|
try:
|
||||||
|
warm_deps, cold_deps = [], []
|
||||||
|
for d in declared:
|
||||||
|
wd = warm.warm_domain(d)
|
||||||
|
(warm_deps if (wd and warm.is_warm_up(d, wd)) else cold_deps).append(d)
|
||||||
|
dep_metas = {d: _load_meta(d) for d in cold_deps}
|
||||||
|
deps_list = (
|
||||||
|
deps_mod.deploy_deps(recipe, os.environ.get("PR", "0"), ref, cold_deps,
|
||||||
|
meta_for=dep_metas)
|
||||||
|
if cold_deps else []
|
||||||
|
)
|
||||||
|
for d in warm_deps:
|
||||||
|
wd = warm.warm_domain(d)
|
||||||
|
warm.reap_orphan_realms(d, wd)
|
||||||
|
deps_list.append({"recipe": d, "domain": wd, "warm": True})
|
||||||
|
print(f" dep: using live-warm {d} @ {wd} (per-run realm)", flush=True)
|
||||||
|
deps_state = _enrich_deps_with_sso(recipe, domain, deps_list)
|
||||||
|
deps_mod.write_run_state(deps_state)
|
||||||
|
_run_setup_custom_tests_hook(recipe, domain, depsfile)
|
||||||
|
except Exception as e: # noqa: BLE001
|
||||||
|
deps_ready = False
|
||||||
|
deps_not_ready_reason = _scrub(str(e))[:300]
|
||||||
|
print(f"!! setup_custom_tests failed (deps-not-ready): {deps_not_ready_reason}",
|
||||||
|
flush=True)
|
||||||
|
|
||||||
|
# 3) UPGRADE to PR head (chaos) + assert (generic reconverge+moved+serving + overlay)
|
||||||
|
results["upgrade"] = run_lifecycle_tier(
|
||||||
|
recipe, "upgrade", repo_local, domain, meta, head_ref, op_state
|
||||||
|
)
|
||||||
|
# 4) custom tier
|
||||||
|
os.environ["CCCI_DEPS_READY"] = "1" if deps_ready else "0"
|
||||||
|
os.environ["CCCI_DEPS_NOT_READY_REASON"] = deps_not_ready_reason
|
||||||
|
results["custom"] = run_custom(recipe, repo_local, domain)
|
||||||
|
else:
|
||||||
|
results["upgrade"] = "fail"
|
||||||
|
results["custom"] = "skip"
|
||||||
|
finally:
|
||||||
|
# F2-11 skip count (read before deciding pass/fail)
|
||||||
|
requires_deps_skipped = 0
|
||||||
|
try:
|
||||||
|
with open(skipfile) as f:
|
||||||
|
requires_deps_skipped = sum(int(x) for x in f.read().split() if x.strip())
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
sso_unverified = sso_dep_unverified(declared, deps_ready, requires_deps_skipped)
|
||||||
|
passed = (
|
||||||
|
warm_ok and bool(results) and all(v != "fail" for v in results.values())
|
||||||
|
and not sso_unverified
|
||||||
|
)
|
||||||
|
|
||||||
|
# dep teardown: delete per-run warm realms; undeploy cold deps (mirrors cold)
|
||||||
|
if deps_state:
|
||||||
|
ordered = ([deps_state[d] for d in declared if d in deps_state]
|
||||||
|
if isinstance(deps_state, dict) else deps_state)
|
||||||
|
for e in [x for x in ordered if x.get("warm")]:
|
||||||
|
try:
|
||||||
|
from harness import sso
|
||||||
|
sso.delete_keycloak_realm(e["domain"], e["realm"])
|
||||||
|
print(f" dep: deleted per-run realm {e['realm']} on warm {e['recipe']}", flush=True)
|
||||||
|
except Exception as ex: # noqa: BLE001
|
||||||
|
dep_teardown_error = f"warm realm delete failed for {e.get('realm')}: {ex}"
|
||||||
|
print(f"!! {dep_teardown_error}", flush=True)
|
||||||
|
try:
|
||||||
|
deps_mod.teardown_deps([x for x in ordered if not x.get("warm")])
|
||||||
|
except lifecycle.TeardownError as e:
|
||||||
|
dep_teardown_error = str(e)
|
||||||
|
print(f"!! {dep_teardown_error}", flush=True)
|
||||||
|
|
||||||
|
# canonical teardown — the WC4 contract:
|
||||||
|
# PASS → undeploy, KEEP volume, known-good UNCHANGED (never promote)
|
||||||
|
# FAIL → restore last-known-good snapshot (data safe) then leave undeployed (idle)
|
||||||
|
try:
|
||||||
|
if warm_ok and passed:
|
||||||
|
canonical.undeploy_keep_volume(recipe)
|
||||||
|
print(" quick PASS → canonical undeployed, volume retained, known-good UNCHANGED",
|
||||||
|
flush=True)
|
||||||
|
elif warm_ok:
|
||||||
|
print(" quick FAIL → rolling back canonical to last-known-good snapshot", flush=True)
|
||||||
|
abra.undeploy(domain)
|
||||||
|
_wait_undeployed(domain)
|
||||||
|
warmsnap.restore(recipe, domain)
|
||||||
|
canonical._set_status(recipe, "idle") # noqa: SLF001
|
||||||
|
rolled_back = True
|
||||||
|
print(" quick FAIL → restored known-good data; canonical idle (NOT promoted)",
|
||||||
|
flush=True)
|
||||||
|
except Exception as e: # noqa: BLE001
|
||||||
|
dep_teardown_error = (dep_teardown_error or "") + f" | quick teardown/rollback: {e}"
|
||||||
|
print(f"!! quick teardown/rollback error: {e}", flush=True)
|
||||||
|
|
||||||
|
with contextlib.suppress(OSError):
|
||||||
|
os.remove(statefile)
|
||||||
|
with contextlib.suppress(OSError):
|
||||||
|
os.remove(depsfile)
|
||||||
|
with contextlib.suppress(OSError):
|
||||||
|
os.remove(skipfile)
|
||||||
|
|
||||||
|
print("\n===== RUN SUMMARY =====", flush=True)
|
||||||
|
print(f"mode = quick (LOWER-CONFIDENCE; opt-in; does not gate merge)")
|
||||||
|
print(f"canonical = {domain} known-good = {reg.get('version')} (UNCHANGED; quick never promotes)")
|
||||||
|
if rolled_back:
|
||||||
|
print("rolled-back = yes (restored last-known-good snapshot)")
|
||||||
|
for op in ("upgrade", "custom"):
|
||||||
|
if op in results:
|
||||||
|
suffix = ""
|
||||||
|
if op == "custom" and requires_deps_skipped:
|
||||||
|
suffix = f" ({requires_deps_skipped} requires_deps SKIPPED — SSO UNVERIFIED)"
|
||||||
|
print(f" {op:8s}: {results[op]}{suffix}")
|
||||||
|
|
||||||
|
overall = 0
|
||||||
|
if any(v == "fail" for v in results.values()) or not warm_ok:
|
||||||
|
overall = 1
|
||||||
|
if sso_unverified:
|
||||||
|
print(f"!! DEPS={declared} but setup_custom_tests failed and {requires_deps_skipped} "
|
||||||
|
"requires_deps SKIPPED — SSO NOT verified (F2-11)", file=sys.stderr)
|
||||||
|
overall = 1
|
||||||
|
if dep_teardown_error:
|
||||||
|
print(f"!! teardown leaked/erred: {dep_teardown_error}", file=sys.stderr)
|
||||||
|
overall = 1
|
||||||
|
if not results:
|
||||||
|
print("no tiers ran", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
return overall
|
||||||
|
|
||||||
|
|
||||||
def main() -> int:
|
def main() -> int:
|
||||||
recipe = os.environ.get("RECIPE")
|
recipe = os.environ.get("RECIPE")
|
||||||
if not recipe:
|
if not recipe:
|
||||||
@ -408,6 +614,18 @@ def main() -> int:
|
|||||||
head_ref = ref or lifecycle.recipe_head_commit(recipe)
|
head_ref = ref or lifecycle.recipe_head_commit(recipe)
|
||||||
repo_local = snapshot_recipe_tests(recipe)
|
repo_local = snapshot_recipe_tests(recipe)
|
||||||
meta = _load_meta(recipe)
|
meta = _load_meta(recipe)
|
||||||
|
|
||||||
|
# WC4/WC7: opt-in `--quick` fast lane. Requires an existing data-warm canonical; if none, fall
|
||||||
|
# back cleanly to the full COLD run below so the PR is still tested (DECISIONS Phase-2w).
|
||||||
|
if os.environ.get("CCCI_QUICK") == "1" or os.environ.get("MODE") == "quick":
|
||||||
|
if canonical.has_canonical(recipe):
|
||||||
|
return run_quick(recipe, ref, head_ref, repo_local, meta)
|
||||||
|
print(
|
||||||
|
f"MODE=quick requested but no canonical for {recipe} — falling back to COLD run "
|
||||||
|
"(no-canonical fallback, WC7)",
|
||||||
|
flush=True,
|
||||||
|
)
|
||||||
|
|
||||||
domain = naming.app_domain(recipe, os.environ.get("PR", "0"), ref)
|
domain = naming.app_domain(recipe, os.environ.get("PR", "0"), ref)
|
||||||
|
|
||||||
# Deploy-once base version: previous published version when the upgrade tier will run and one
|
# Deploy-once base version: previous published version when the upgrade tier will run and one
|
||||||
|
|||||||
Reference in New Issue
Block a user