feat(2): lasuite-drive Q3.2a Part A — wire OIDC at INSTALL, eliminate flaky redeploy

Q3.2a / plan-lasuite-drive-oidc-robustness.md Part A. The old setup_custom_tests.sh did a
post-deploy in-place `abra app deploy --force --chaos` of the heavy 12-service stack to apply
the OIDC env — flaky (collabora WOPI-discovery race + gunicorn-perms; JOURNAL Step 0). Since
the OIDC env only affects backend/app and keycloak is live-warm, provision the per-run realm
BEFORE the single deploy and wire OIDC into the .env at install time (no reconverge).

- runner/run_recipe_ci.py: new _provision_deps() helper (warm/cold split + SSO enrich + write
  $CCCI_DEPS_FILE), used by both paths. New per-recipe OIDC_AT_INSTALL meta flag (added to
  _load_meta whitelist). When set + deps live-warm: provision BEFORE deploy_app; the install
  tier's install_steps.sh wires OIDC into the single deploy; post-deploy step runs only the
  MinIO bucket one-shot — no re-provision, no redeploy. Legacy post-deploy path unchanged for
  all other dep recipes (gated on `not oidc_at_install`).
- tests/lasuite-drive/install_steps.sh (NEW): install-time OIDC env + secret wiring; no-ops on
  empty deps file (recipe still boots, OIDC test skips → F2-11 RED).
- tests/lasuite-drive/setup_custom_tests.sh: trimmed to MinIO-bucket-only (OIDC moved out).
- tests/lasuite-drive/recipe_meta.py: OIDC_AT_INSTALL = True.
- JOURNAL-2: Step-0 root-cause failure logs captured before the fix.

NOT a claim — validating 3x green (incl. now-required upgrade tier) before claiming Q3.2.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-29 10:10:05 +01:00
parent 4356f0009c
commit a151489996
5 changed files with 210 additions and 117 deletions

View File

@ -194,7 +194,7 @@ def _load_meta(recipe: str) -> dict:
ns: dict = {}
with open(path) as fh:
exec(compile(fh.read(), path, "exec"), ns) # noqa: S102 (trusted, in-repo)
for k in list(meta) + ["BACKUP_CAPABLE", "SKIP_GENERIC"]:
for k in list(meta) + ["BACKUP_CAPABLE", "SKIP_GENERIC", "OIDC_AT_INSTALL"]:
if k in ns:
meta[k] = ns[k]
return meta
@ -361,6 +361,45 @@ def _enrich_deps_with_sso(parent_recipe: str, parent_domain: str, deps_list) ->
return out
def _provision_deps(recipe: str, domain: str, ref: str | None, declared: list[str]) -> dict[str, dict]:
"""Provision a run's declared deps and write `$CCCI_DEPS_FILE`; return the recipe→entry deps_state.
Splits deps into live-warm (shared provider at a stable domain + a per-run realm) vs cold
(co-deployed per run), provisions each dep's SSO realm/client/user, and persists the enriched
dict the `setup_custom_tests.sh`/`install_steps.sh` hooks + dependent tests read. Raises on any
failure (the caller marks deps-not-ready). Used by BOTH wiring paths:
- post-deploy (legacy): provision AFTER generic tiers, then `setup_custom_tests.sh` does an
in-place OIDC redeploy.
- install-time (`OIDC_AT_INSTALL`, Q3.2a): provision BEFORE the single deploy so the
install-tier `install_steps.sh` hook wires OIDC env into that one deploy — no reconverge.
"""
warm_deps, cold_deps = [], []
for d in declared:
wd = warm.warm_domain(d)
if wd and warm.is_warm_up(d, wd):
warm_deps.append(d)
else:
if wd:
print(f" dep: {d} warm provider {wd} not up — cold fallback", flush=True)
cold_deps.append(d)
dep_metas = {d: _load_meta(d) for d in cold_deps}
deps_list = (
deps_mod.deploy_deps(recipe, os.environ.get("PR", "0"), ref, cold_deps, meta_for=dep_metas)
if cold_deps
else []
)
for d in warm_deps:
wd = warm.warm_domain(d)
reaped = warm.reap_orphan_realms(d, wd)
if reaped:
print(f" dep: reaped {len(reaped)} orphan realm(s) on warm {d}: {reaped}", flush=True)
deps_list.append({"recipe": d, "domain": wd, "warm": True})
print(f" dep: using live-warm {d} @ {wd} (per-run realm)", flush=True)
deps_state = _enrich_deps_with_sso(recipe, domain, deps_list)
deps_mod.write_run_state(deps_state)
return deps_state
def _run_setup_custom_tests_hook(recipe: str, domain: str, deps_file: str) -> None:
"""Run `tests/<recipe>/setup_custom_tests.sh` if present (operator-2026-05-28 SSO-dep plan
§3.2). The hook reads `$CCCI_DEPS_FILE`, sets OIDC env via `abra app config set` + secret
@ -712,8 +751,14 @@ def main() -> int:
os.remove(skipfile)
os.environ["CCCI_DEPS_SKIP_REPORT"] = skipfile
declared = deps_mod.declared_deps(recipe)
# Q3.2a: a recipe that tolerates OIDC env at first boot AND whose deps are live-warm wires OIDC
# at INSTALL time (provision the realm BEFORE the single deploy; install_steps.sh writes the env
# into it) instead of the post-deploy in-place `--chaos` redeploy — which is flaky on the heavy
# 12-service lasuite-drive stack (collabora WOPI race; see JOURNAL Step 0). Opt-in per recipe.
oidc_at_install = bool(meta.get("OIDC_AT_INSTALL")) and bool(declared)
if declared:
print(f"\n===== DEPS declared (deploy AFTER generic tiers): {declared} =====", flush=True)
when = "BEFORE deploy (install-time OIDC)" if oidc_at_install else "AFTER generic tiers"
print(f"\n===== DEPS declared (provision {when}): {declared} =====", flush=True)
deps_state: dict[str, dict] = {} # new shape: recipe→entry dict (sso-dep plan §1)
deps_ready = True
deps_not_ready_reason: str = ""
@ -722,6 +767,20 @@ def main() -> int:
lifecycle.janitor()
dep_teardown_error: str | None = None
try:
# ---- (Q3.2a) install-time OIDC: provision the warm-dep realm BEFORE the single deploy so
# install_steps.sh can read $CCCI_DEPS_FILE and wire the OIDC env into that one deploy. On
# failure we mark deps-not-ready but STILL deploy the recipe alone (install_steps.sh no-ops
# on an empty deps file) so the generic tiers run; the OIDC custom test then skips → F2-11. ----
if oidc_at_install:
print(f"\n===== install-time OIDC: provisioning deps {declared} BEFORE deploy =====", flush=True)
try:
deps_state = _provision_deps(recipe, domain, ref, declared)
print(" install-time OIDC: deps provisioned; install_steps.sh will wire OIDC env", flush=True)
except Exception as e: # noqa: BLE001 — isolated; recipe still deploys, OIDC test skips
deps_ready = False
deps_not_ready_reason = _scrub(str(e))[:300]
print(f"!! install-time dep provisioning failed (deps-not-ready): {deps_not_ready_reason}", flush=True)
# ---- deploy RECIPE FIRST, alone (no deps yet — generic tiers run recipe-only) ----
try:
lifecycle.deploy_app(
@ -784,44 +843,12 @@ def main() -> int:
# setup_custom_tests.sh hook + in-place redeploy. Failure here marks deps-not-ready
# but does NOT abort the run — @pytest.mark.requires_deps tests skip with reason;
# non-deps custom tests still run normally.
if declared:
if declared and not oidc_at_install:
# LEGACY post-deploy path: provision deps AFTER generic tiers, then wire OIDC env
# into the parent via the setup_custom_tests.sh hook + an in-place `--chaos` redeploy.
print("\n===== setup_custom_tests: deps + OIDC wiring =====", flush=True)
try:
# WC1: split deps into live-warm (shared provider at a stable domain + per-run
# realm) vs cold (co-deploy per run). A warm dep is used ONLY if its provider is
# actually up right now; otherwise it falls back to cold so a from-scratch host
# (before the warm reconciler has run) still works.
warm_deps, cold_deps = [], []
for d in declared:
wd = warm.warm_domain(d)
if wd and warm.is_warm_up(d, wd):
warm_deps.append(d)
else:
if wd:
print(f" dep: {d} warm provider {wd} not up — cold fallback", flush=True)
cold_deps.append(d)
# Cold deps: co-deploy per run (existing path).
dep_metas = {d: _load_meta(d) for d in cold_deps}
deps_list = (
deps_mod.deploy_deps(
recipe, os.environ.get("PR", "0"), ref, cold_deps, meta_for=dep_metas
)
if cold_deps
else []
)
# Warm deps: no deploy. Reap orphan realms first (concurrency-safe), then point
# at the stable domain; _enrich creates the per-run realm on it.
for d in warm_deps:
wd = warm.warm_domain(d)
reaped = warm.reap_orphan_realms(d, wd)
if reaped:
print(f" dep: reaped {len(reaped)} orphan realm(s) on warm {d}: {reaped}", flush=True)
deps_list.append({"recipe": d, "domain": wd, "warm": True})
print(f" dep: using live-warm {d} @ {wd} (per-run realm)", flush=True)
# Enrich each dep entry with SSO creds (realm/client/secret). The dict form is
# what setup_custom_tests.sh reads.
deps_state = _enrich_deps_with_sso(recipe, domain, deps_list)
deps_mod.write_run_state(deps_state)
deps_state = _provision_deps(recipe, domain, ref, declared)
# Run the per-recipe post-deps hook (jq-driven OIDC wiring + in-place redeploy)
_run_setup_custom_tests_hook(recipe, domain, depsfile)
except Exception as e: # noqa: BLE001 — setup failure is ISOLATED to dep-marked tests
@ -831,6 +858,21 @@ def main() -> int:
f"!! setup_custom_tests failed (deps-not-ready): {deps_not_ready_reason}",
flush=True,
)
elif declared and oidc_at_install and deps_ready:
# INSTALL-TIME path (Q3.2a): deps were provisioned BEFORE the single deploy and the
# install-tier install_steps.sh hook already wired OIDC env into that one deploy —
# so NO re-provision, NO reconverge here. Run only the post-deploy setup hook
# (e.g. lasuite-drive's minio-createbuckets one-shot), which needs the live stack.
print("\n===== post-deploy setup (OIDC already wired at install) =====", flush=True)
try:
_run_setup_custom_tests_hook(recipe, domain, depsfile)
except Exception as e: # noqa: BLE001 — isolated to dep-marked / state-dependent tests
deps_ready = False
deps_not_ready_reason = _scrub(str(e))[:300]
print(
f"!! post-deploy setup failed: {deps_not_ready_reason}",
flush=True,
)
# ---- CUSTOM tier ----
if "custom" in stages: