feat(2): lasuite-drive Q3.2a Part A — wire OIDC at INSTALL, eliminate flaky redeploy
Q3.2a / plan-lasuite-drive-oidc-robustness.md Part A. The old setup_custom_tests.sh did a post-deploy in-place `abra app deploy --force --chaos` of the heavy 12-service stack to apply the OIDC env — flaky (collabora WOPI-discovery race + gunicorn-perms; JOURNAL Step 0). Since the OIDC env only affects backend/app and keycloak is live-warm, provision the per-run realm BEFORE the single deploy and wire OIDC into the .env at install time (no reconverge). - runner/run_recipe_ci.py: new _provision_deps() helper (warm/cold split + SSO enrich + write $CCCI_DEPS_FILE), used by both paths. New per-recipe OIDC_AT_INSTALL meta flag (added to _load_meta whitelist). When set + deps live-warm: provision BEFORE deploy_app; the install tier's install_steps.sh wires OIDC into the single deploy; post-deploy step runs only the MinIO bucket one-shot — no re-provision, no redeploy. Legacy post-deploy path unchanged for all other dep recipes (gated on `not oidc_at_install`). - tests/lasuite-drive/install_steps.sh (NEW): install-time OIDC env + secret wiring; no-ops on empty deps file (recipe still boots, OIDC test skips → F2-11 RED). - tests/lasuite-drive/setup_custom_tests.sh: trimmed to MinIO-bucket-only (OIDC moved out). - tests/lasuite-drive/recipe_meta.py: OIDC_AT_INSTALL = True. - JOURNAL-2: Step-0 root-cause failure logs captured before the fix. NOT a claim — validating 3x green (incl. now-required upgrade tier) before claiming Q3.2. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@ -194,7 +194,7 @@ def _load_meta(recipe: str) -> dict:
|
||||
ns: dict = {}
|
||||
with open(path) as fh:
|
||||
exec(compile(fh.read(), path, "exec"), ns) # noqa: S102 (trusted, in-repo)
|
||||
for k in list(meta) + ["BACKUP_CAPABLE", "SKIP_GENERIC"]:
|
||||
for k in list(meta) + ["BACKUP_CAPABLE", "SKIP_GENERIC", "OIDC_AT_INSTALL"]:
|
||||
if k in ns:
|
||||
meta[k] = ns[k]
|
||||
return meta
|
||||
@ -361,6 +361,45 @@ def _enrich_deps_with_sso(parent_recipe: str, parent_domain: str, deps_list) ->
|
||||
return out
|
||||
|
||||
|
||||
def _provision_deps(recipe: str, domain: str, ref: str | None, declared: list[str]) -> dict[str, dict]:
|
||||
"""Provision a run's declared deps and write `$CCCI_DEPS_FILE`; return the recipe→entry deps_state.
|
||||
|
||||
Splits deps into live-warm (shared provider at a stable domain + a per-run realm) vs cold
|
||||
(co-deployed per run), provisions each dep's SSO realm/client/user, and persists the enriched
|
||||
dict the `setup_custom_tests.sh`/`install_steps.sh` hooks + dependent tests read. Raises on any
|
||||
failure (the caller marks deps-not-ready). Used by BOTH wiring paths:
|
||||
- post-deploy (legacy): provision AFTER generic tiers, then `setup_custom_tests.sh` does an
|
||||
in-place OIDC redeploy.
|
||||
- install-time (`OIDC_AT_INSTALL`, Q3.2a): provision BEFORE the single deploy so the
|
||||
install-tier `install_steps.sh` hook wires OIDC env into that one deploy — no reconverge.
|
||||
"""
|
||||
warm_deps, cold_deps = [], []
|
||||
for d in declared:
|
||||
wd = warm.warm_domain(d)
|
||||
if wd and warm.is_warm_up(d, wd):
|
||||
warm_deps.append(d)
|
||||
else:
|
||||
if wd:
|
||||
print(f" dep: {d} warm provider {wd} not up — cold fallback", flush=True)
|
||||
cold_deps.append(d)
|
||||
dep_metas = {d: _load_meta(d) for d in cold_deps}
|
||||
deps_list = (
|
||||
deps_mod.deploy_deps(recipe, os.environ.get("PR", "0"), ref, cold_deps, meta_for=dep_metas)
|
||||
if cold_deps
|
||||
else []
|
||||
)
|
||||
for d in warm_deps:
|
||||
wd = warm.warm_domain(d)
|
||||
reaped = warm.reap_orphan_realms(d, wd)
|
||||
if reaped:
|
||||
print(f" dep: reaped {len(reaped)} orphan realm(s) on warm {d}: {reaped}", flush=True)
|
||||
deps_list.append({"recipe": d, "domain": wd, "warm": True})
|
||||
print(f" dep: using live-warm {d} @ {wd} (per-run realm)", flush=True)
|
||||
deps_state = _enrich_deps_with_sso(recipe, domain, deps_list)
|
||||
deps_mod.write_run_state(deps_state)
|
||||
return deps_state
|
||||
|
||||
|
||||
def _run_setup_custom_tests_hook(recipe: str, domain: str, deps_file: str) -> None:
|
||||
"""Run `tests/<recipe>/setup_custom_tests.sh` if present (operator-2026-05-28 SSO-dep plan
|
||||
§3.2). The hook reads `$CCCI_DEPS_FILE`, sets OIDC env via `abra app config set` + secret
|
||||
@ -712,8 +751,14 @@ def main() -> int:
|
||||
os.remove(skipfile)
|
||||
os.environ["CCCI_DEPS_SKIP_REPORT"] = skipfile
|
||||
declared = deps_mod.declared_deps(recipe)
|
||||
# Q3.2a: a recipe that tolerates OIDC env at first boot AND whose deps are live-warm wires OIDC
|
||||
# at INSTALL time (provision the realm BEFORE the single deploy; install_steps.sh writes the env
|
||||
# into it) instead of the post-deploy in-place `--chaos` redeploy — which is flaky on the heavy
|
||||
# 12-service lasuite-drive stack (collabora WOPI race; see JOURNAL Step 0). Opt-in per recipe.
|
||||
oidc_at_install = bool(meta.get("OIDC_AT_INSTALL")) and bool(declared)
|
||||
if declared:
|
||||
print(f"\n===== DEPS declared (deploy AFTER generic tiers): {declared} =====", flush=True)
|
||||
when = "BEFORE deploy (install-time OIDC)" if oidc_at_install else "AFTER generic tiers"
|
||||
print(f"\n===== DEPS declared (provision {when}): {declared} =====", flush=True)
|
||||
deps_state: dict[str, dict] = {} # new shape: recipe→entry dict (sso-dep plan §1)
|
||||
deps_ready = True
|
||||
deps_not_ready_reason: str = ""
|
||||
@ -722,6 +767,20 @@ def main() -> int:
|
||||
lifecycle.janitor()
|
||||
dep_teardown_error: str | None = None
|
||||
try:
|
||||
# ---- (Q3.2a) install-time OIDC: provision the warm-dep realm BEFORE the single deploy so
|
||||
# install_steps.sh can read $CCCI_DEPS_FILE and wire the OIDC env into that one deploy. On
|
||||
# failure we mark deps-not-ready but STILL deploy the recipe alone (install_steps.sh no-ops
|
||||
# on an empty deps file) so the generic tiers run; the OIDC custom test then skips → F2-11. ----
|
||||
if oidc_at_install:
|
||||
print(f"\n===== install-time OIDC: provisioning deps {declared} BEFORE deploy =====", flush=True)
|
||||
try:
|
||||
deps_state = _provision_deps(recipe, domain, ref, declared)
|
||||
print(" install-time OIDC: deps provisioned; install_steps.sh will wire OIDC env", flush=True)
|
||||
except Exception as e: # noqa: BLE001 — isolated; recipe still deploys, OIDC test skips
|
||||
deps_ready = False
|
||||
deps_not_ready_reason = _scrub(str(e))[:300]
|
||||
print(f"!! install-time dep provisioning failed (deps-not-ready): {deps_not_ready_reason}", flush=True)
|
||||
|
||||
# ---- deploy RECIPE FIRST, alone (no deps yet — generic tiers run recipe-only) ----
|
||||
try:
|
||||
lifecycle.deploy_app(
|
||||
@ -784,44 +843,12 @@ def main() -> int:
|
||||
# setup_custom_tests.sh hook + in-place redeploy. Failure here marks deps-not-ready
|
||||
# but does NOT abort the run — @pytest.mark.requires_deps tests skip with reason;
|
||||
# non-deps custom tests still run normally.
|
||||
if declared:
|
||||
if declared and not oidc_at_install:
|
||||
# LEGACY post-deploy path: provision deps AFTER generic tiers, then wire OIDC env
|
||||
# into the parent via the setup_custom_tests.sh hook + an in-place `--chaos` redeploy.
|
||||
print("\n===== setup_custom_tests: deps + OIDC wiring =====", flush=True)
|
||||
try:
|
||||
# WC1: split deps into live-warm (shared provider at a stable domain + per-run
|
||||
# realm) vs cold (co-deploy per run). A warm dep is used ONLY if its provider is
|
||||
# actually up right now; otherwise it falls back to cold so a from-scratch host
|
||||
# (before the warm reconciler has run) still works.
|
||||
warm_deps, cold_deps = [], []
|
||||
for d in declared:
|
||||
wd = warm.warm_domain(d)
|
||||
if wd and warm.is_warm_up(d, wd):
|
||||
warm_deps.append(d)
|
||||
else:
|
||||
if wd:
|
||||
print(f" dep: {d} warm provider {wd} not up — cold fallback", flush=True)
|
||||
cold_deps.append(d)
|
||||
# Cold deps: co-deploy per run (existing path).
|
||||
dep_metas = {d: _load_meta(d) for d in cold_deps}
|
||||
deps_list = (
|
||||
deps_mod.deploy_deps(
|
||||
recipe, os.environ.get("PR", "0"), ref, cold_deps, meta_for=dep_metas
|
||||
)
|
||||
if cold_deps
|
||||
else []
|
||||
)
|
||||
# Warm deps: no deploy. Reap orphan realms first (concurrency-safe), then point
|
||||
# at the stable domain; _enrich creates the per-run realm on it.
|
||||
for d in warm_deps:
|
||||
wd = warm.warm_domain(d)
|
||||
reaped = warm.reap_orphan_realms(d, wd)
|
||||
if reaped:
|
||||
print(f" dep: reaped {len(reaped)} orphan realm(s) on warm {d}: {reaped}", flush=True)
|
||||
deps_list.append({"recipe": d, "domain": wd, "warm": True})
|
||||
print(f" dep: using live-warm {d} @ {wd} (per-run realm)", flush=True)
|
||||
# Enrich each dep entry with SSO creds (realm/client/secret). The dict form is
|
||||
# what setup_custom_tests.sh reads.
|
||||
deps_state = _enrich_deps_with_sso(recipe, domain, deps_list)
|
||||
deps_mod.write_run_state(deps_state)
|
||||
deps_state = _provision_deps(recipe, domain, ref, declared)
|
||||
# Run the per-recipe post-deps hook (jq-driven OIDC wiring + in-place redeploy)
|
||||
_run_setup_custom_tests_hook(recipe, domain, depsfile)
|
||||
except Exception as e: # noqa: BLE001 — setup failure is ISOLATED to dep-marked tests
|
||||
@ -831,6 +858,21 @@ def main() -> int:
|
||||
f"!! setup_custom_tests failed (deps-not-ready): {deps_not_ready_reason}",
|
||||
flush=True,
|
||||
)
|
||||
elif declared and oidc_at_install and deps_ready:
|
||||
# INSTALL-TIME path (Q3.2a): deps were provisioned BEFORE the single deploy and the
|
||||
# install-tier install_steps.sh hook already wired OIDC env into that one deploy —
|
||||
# so NO re-provision, NO reconverge here. Run only the post-deploy setup hook
|
||||
# (e.g. lasuite-drive's minio-createbuckets one-shot), which needs the live stack.
|
||||
print("\n===== post-deploy setup (OIDC already wired at install) =====", flush=True)
|
||||
try:
|
||||
_run_setup_custom_tests_hook(recipe, domain, depsfile)
|
||||
except Exception as e: # noqa: BLE001 — isolated to dep-marked / state-dependent tests
|
||||
deps_ready = False
|
||||
deps_not_ready_reason = _scrub(str(e))[:300]
|
||||
print(
|
||||
f"!! post-deploy setup failed: {deps_not_ready_reason}",
|
||||
flush=True,
|
||||
)
|
||||
|
||||
# ---- CUSTOM tier ----
|
||||
if "custom" in stages:
|
||||
|
||||
Reference in New Issue
Block a user