"""lasuite-drive — pre-op seed hooks (Phase 1e HC3). The orchestrator runs these BEFORE the op; the matching test_.py asserts post-op (assertion-only). The marker is a dedicated `ci_marker` row in postgres (independent of the app's Django migrations — CREATE TABLE IF NOT EXISTS), written via psql in the `db` service. The backup path exercises the recipe's pg_backup.sh DB-dump hook (postgres is backupbot-labelled).""" import os import subprocess import sys import time sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner")) from harness import lifecycle # noqa: E402 def pre_install(ctx): """Post-deploy seed for the custom tier (the former setup_custom_tests.sh, moved here in rcust P2b — install_steps.sh runs PRE-deploy and cannot touch the live stack). The deploy alone does NOT create the MinIO bucket: `minio-createbuckets` is a `replicas:0` one-shot (restart_policy: none) that must be triggered. The MinIO storage test asserts the bucket exists, so trigger it here and poll. `--detach` is REQUIRED: the job creates the bucket then EXITS 0, so it never holds a steady 1/1 replica — a blocking scale would wait forever. BEST-EFFORT, like the setup_custom_tests.sh it replaced: on poll timeout we WARN and continue (the one-shot often lands just after the window). The custom-tier MinIO storage test is the real gate for a genuinely missing bucket — failing the install op here was an rcust M2 regression (the original hook fell through on timeout by design).""" stack = ctx.domain.replace(".", "_") print(" pre_install: creating MinIO bucket via the minio-createbuckets one-shot", flush=True) subprocess.run( ["docker", "service", "scale", "--detach", f"{stack}_minio-createbuckets=1"], capture_output=True, check=False, ) check = ( 'mc alias set _c http://localhost:9000 "$(cat /run/secrets/minio_ru)" ' '"$(cat /run/secrets/minio_rp)" >/dev/null 2>&1 && ' "mc ls _c/drive-media-storage >/dev/null 2>&1" ) for i in range(30): cid = subprocess.run( ["docker", "ps", "-q", "-f", f"name={stack}_minio.1"], capture_output=True, text=True, check=False, ).stdout.split() if cid and ( subprocess.run( ["docker", "exec", cid[0], "sh", "-c", check], capture_output=True, check=False ).returncode == 0 ): print( f" pre_install: bucket drive-media-storage present after {i + 1} poll(s)", flush=True, ) return time.sleep(3) print( " !! pre_install: minio-createbuckets one-shot did not create drive-media-storage in 90s " "— continuing (best-effort, as the pre-restructure hook did); the custom-tier MinIO test " "gates a genuinely missing bucket", flush=True, ) def _wait_collabora_ready(domain, timeout=420): """Gate the upgrade op on collabora being FULLY ready (WOPI discovery endpoint → 200), not just container 1/1 'running'. coolwsd takes ~2min to boot (pre-reads 1300+ l10n files + RSA keygen); the install wait_healthy returns on container 1/1 while coolwsd is still loading. An in-place `abra app deploy --chaos` upgrade that lands on a still-booting collabora SIGTERMs it mid-init ("Shutdown requested while starting up", forced exit 70) → abra aborts the deploy (Q3.2a run 1, JOURNAL 2026-05-29). Waiting for discovery=200 first makes the redeploy replace a ready collabora cleanly. collabora routes on the COLLABORA_DOMAIN sibling (collabora-); /hosting/discovery is the WOPI discovery endpoint celery's configure_wopi calls.""" host = f"collabora-{domain}" deadline = time.time() + timeout last = 0 while time.time() < deadline: last = lifecycle.http_get(host, "/hosting/discovery", timeout=15) if last == 200: print(f" pre_upgrade: collabora WOPI discovery ready (200) on {host}", flush=True) return time.sleep(5) raise AssertionError( f"collabora WOPI discovery not ready on {host} (last status {last}) within {timeout}s" ) def _psql(domain, sql): cmd = f'PGPASSWORD=$(cat /run/secrets/postgres_p) psql -U drive -d drive -tAc "{sql}"' return lifecycle.exec_in_app(domain, ["sh", "-c", cmd], service="db").strip() def _seed(domain, value): _psql( domain, "CREATE TABLE IF NOT EXISTS ci_marker(v text); DELETE FROM ci_marker; " f"INSERT INTO ci_marker VALUES('{value}');", ) assert _psql(domain, "SELECT v FROM ci_marker;") == value def pre_upgrade(ctx): # Gate the chaos redeploy on a fully-ready collabora (else it kills a still-booting coolwsd and # abra aborts the upgrade deploy — Q3.2a run 1). Then seed the data-integrity marker. _wait_collabora_ready(ctx.domain) _seed(ctx.domain, "upgrade-survives") def pre_backup(ctx): _seed(ctx.domain, "original") def pre_restore(ctx): # drop the marker table (diverge from the backup) so a successful restore is observable _psql(ctx.domain, "DROP TABLE ci_marker;") assert _psql(ctx.domain, "SELECT to_regclass('public.ci_marker');") in ( "", "NULL", ), "drop did not take"