# Phase 2w / WC1 — a live-warm, shared keycloak SSO provider, deployed via abra at a STABLE domain # (distinct from cold per-run `-<6hex>`; see DECISIONS.md Phase-2w). SSO-dependent # recipe runs use this one instance (creating + deleting a per-run namespaced realm) instead of # co-deploying a fresh keycloak each run — the highest-ROI warm layer (W0). # # Declared as an idempotent-RECONCILE systemd oneshot (like deploy-proxy / swarm-init): it inspects # current state and converges every activation/boot, self-healing drift (redeploys if the stack is # gone). No run-once sentinel. So a from-scratch install re-warms keycloak with just # `nixos-rebuild switch` (D8 / WC8 "re-warmable from scratch"). The keycloak is declarative INFRA # (in the D8 closure); only warm *volumes/snapshots* (W1+) are cache excluded from D8. Its realm # data is ephemeral per-run. # # Secrets are generated ONLY if missing — never rotated — so a reconcile against a running provider # does not invalidate the admin/db creds the harness reads from inside the container. { pkgs, ... }: let # Pinned known-good keycloak version (latest published as of 2026-05-28). Bump deliberately. kcVersion = "10.7.1+26.6.2"; reconcile = pkgs.writeShellApplication { name = "cc-ci-reconcile-warm-keycloak"; runtimeInputs = with pkgs; [ abra docker jq gnused gnugrep coreutils git curl ]; text = '' DOMAIN="warm-keycloak.ci.commoninternet.net" VERSION="${kcVersion}" ENV_FILE="$HOME/.abra/servers/default/$DOMAIN.env" RECIPE_DIR="$HOME/.abra/recipes/keycloak" abra server ls -m -n >/dev/null 2>&1 || abra server add --local -n || true abra recipe fetch keycloak -n >/dev/null # Create the app config once (records ENV VERSION). No -S here: secrets are generated below, # guarded, so a reconcile never rotates a running provider's creds. [ -f "$ENV_FILE" ] || abra app new keycloak -s default -D "$DOMAIN" "$VERSION" -o -n set_env() { sed -i -E "/^[[:space:]]*#?[[:space:]]*$1=/d" "$ENV_FILE" # Ensure the file ends in a newline before appending — keycloak's .env.sample ends with a # newline-less comment line (#COMPOSE_FILE=...), so a bare append would glue the var onto # that comment (commenting it out → KC_HOSTNAME=https:// with no host → crash). `$(tail -c1)` # is empty iff the last byte is already a newline. (Same bite as backupbot.nix.) if [ -s "$ENV_FILE" ] && [ -n "$(tail -c1 "$ENV_FILE")" ]; then printf '\n' >> "$ENV_FILE"; fi printf '%s=%s\n' "$1" "$2" >> "$ENV_FILE" } set_env DOMAIN "$DOMAIN" set_env LETS_ENCRYPT_ENV "" # Pin the on-disk recipe to the version tag so a non-chaos deploy genuinely deploys VERSION # (a chaos deploy would ignore ENV VERSION and use the current checkout — see abra.recipe_checkout). git -C "$RECIPE_DIR" checkout --quiet "$VERSION" # Generate secrets only if absent (idempotent; never rotate a live provider). have_secret() { docker secret ls --format '{{.Name}}' | grep -q "_$1_v1$"; } if ! have_secret admin_password; then abra app secret generate "$DOMAIN" --all -m -o -n fi health() { curl -sk -o /dev/null -w '%{http_code}' --max-time 10 \ --resolve "$DOMAIN:443:127.0.0.1" "https://$DOMAIN/realms/master" 2>/dev/null || true } # Converge WITHOUT churning a healthy provider: only (re)deploy if it is not already serving. # This makes every activation/boot a true no-op when keycloak is up (no JVM restart blip), and # self-heals when the stack is gone or crash-looping. (To roll a new kcVersion, `abra app # undeploy` first so this redeploys — a deliberate, rare op; keycloak is the SSO dep, not under # test.) `-f` because a plain non-chaos deploy FATALs "already deployed". stack="warm-keycloak_ci_commoninternet_net" if [ "$(health)" = "200" ] && docker service ls --format '{{.Name}}' | grep -q "^''${stack}_app$"; then echo "warm keycloak already healthy ($DOMAIN) — no-op converge" exit 0 fi abra app deploy "$DOMAIN" -o -n -f # Wait until keycloak actually answers /realms/master (JVM + DB migration is slow). Surface a # failed unit if it never comes up rather than reporting success on a half-booted provider. for _ in $(seq 1 90); do [ "$(health)" = "200" ] && { echo "warm keycloak healthy ($DOMAIN)"; exit 0; } sleep 10 done echo "FATAL: warm keycloak $DOMAIN did not become healthy" >&2 exit 1 ''; }; in { systemd.services.warm-keycloak = { description = "Reconcile the live-warm shared keycloak SSO provider (WC1) via abra"; after = [ "deploy-proxy.service" "swarm-init.service" "docker.service" "network-online.target" ]; requires = [ "swarm-init.service" "docker.service" ]; wants = [ "deploy-proxy.service" "network-online.target" ]; wantedBy = [ "multi-user.target" ]; environment.HOME = "/root"; serviceConfig = { Type = "oneshot"; RemainAfterExit = true; # Generous: a cold keycloak boot (JVM + DB migration) can take ~10min on this 2-vCPU node. TimeoutStartSec = "1200"; ExecStart = "${reconcile}/bin/cc-ci-reconcile-warm-keycloak"; }; }; }