From c8e9ddb68120153c83e5ff9f78fa465b0139ad64 Mon Sep 17 00:00:00 2001 From: autonomic-bot Date: Thu, 28 May 2026 23:28:44 +0100 Subject: [PATCH] feat(2w): W0.3 declarative warm-keycloak reconciler (WC1) nix/modules/warm-keycloak.nix: idempotent systemd oneshot (like deploy-proxy) that converges a live-warm shared keycloak at warm-keycloak.ci.commoninternet.net pinned to 10.7.1+26.6.2, secrets generated only-if-missing (never rotate a live provider), waits /realms/master=200. Re-warmable from scratch (D8/WC8). Wired into hosts/cc-ci/configuration.nix. Co-Authored-By: Claude Opus 4.8 (1M context) --- nix/hosts/cc-ci/configuration.nix | 1 + nix/modules/warm-keycloak.nix | 84 +++++++++++++++++++++++++++++++ 2 files changed, 85 insertions(+) create mode 100644 nix/modules/warm-keycloak.nix diff --git a/nix/hosts/cc-ci/configuration.nix b/nix/hosts/cc-ci/configuration.nix index 23296c5..91e15c1 100644 --- a/nix/hosts/cc-ci/configuration.nix +++ b/nix/hosts/cc-ci/configuration.nix @@ -16,6 +16,7 @@ ../../modules/dashboard.nix ../../modules/backupbot.nix ../../modules/harness.nix + ../../modules/warm-keycloak.nix ]; # --- Tailscale (ACCESS-CRITICAL: do not break, this is the only route in) --- diff --git a/nix/modules/warm-keycloak.nix b/nix/modules/warm-keycloak.nix new file mode 100644 index 0000000..0198baa --- /dev/null +++ b/nix/modules/warm-keycloak.nix @@ -0,0 +1,84 @@ +# Phase 2w / WC1 — a live-warm, shared keycloak SSO provider, deployed via abra at a STABLE domain +# (distinct from cold per-run `-<6hex>`; see DECISIONS.md Phase-2w). SSO-dependent +# recipe runs use this one instance (creating + deleting a per-run namespaced realm) instead of +# co-deploying a fresh keycloak each run — the highest-ROI warm layer (W0). +# +# Declared as an idempotent-RECONCILE systemd oneshot (like deploy-proxy / swarm-init): it inspects +# current state and converges every activation/boot, self-healing drift (redeploys if the stack is +# gone). No run-once sentinel. So a from-scratch install re-warms keycloak with just +# `nixos-rebuild switch` (D8 / WC8 "re-warmable from scratch"). The keycloak is declarative INFRA +# (in the D8 closure); only warm *volumes/snapshots* (W1+) are cache excluded from D8. Its realm +# data is ephemeral per-run. +# +# Secrets are generated ONLY if missing — never rotated — so a reconcile against a running provider +# does not invalidate the admin/db creds the harness reads from inside the container. +{ pkgs, ... }: +let + # Pinned known-good keycloak version (latest published as of 2026-05-28). Bump deliberately. + kcVersion = "10.7.1+26.6.2"; + reconcile = pkgs.writeShellApplication { + name = "cc-ci-reconcile-warm-keycloak"; + runtimeInputs = with pkgs; [ abra docker jq gnused gnugrep coreutils git curl ]; + text = '' + DOMAIN="warm-keycloak.ci.commoninternet.net" + VERSION="${kcVersion}" + ENV_FILE="$HOME/.abra/servers/default/$DOMAIN.env" + RECIPE_DIR="$HOME/.abra/recipes/keycloak" + + abra server ls -m -n >/dev/null 2>&1 || abra server add --local -n || true + abra recipe fetch keycloak -n >/dev/null + + # Create the app config once (records ENV VERSION). No -S here: secrets are generated below, + # guarded, so a reconcile never rotates a running provider's creds. + [ -f "$ENV_FILE" ] || abra app new keycloak -s default -D "$DOMAIN" "$VERSION" -o -n + + set_env() { + sed -i -E "/^[[:space:]]*#?[[:space:]]*$1=/d" "$ENV_FILE" + printf '%s=%s\n' "$1" "$2" >> "$ENV_FILE" + } + set_env DOMAIN "$DOMAIN" + set_env LETS_ENCRYPT_ENV "" + + # Pin the on-disk recipe to the version tag so a non-chaos deploy genuinely deploys VERSION + # (a chaos deploy would ignore ENV VERSION and use the current checkout — see abra.recipe_checkout). + git -C "$RECIPE_DIR" checkout --quiet "$VERSION" + + # Generate secrets only if absent (idempotent; never rotate a live provider). + have_secret() { docker secret ls --format '{{.Name}}' | grep -q "_$1_v1$"; } + if ! have_secret admin_password; then + abra app secret generate "$DOMAIN" --all -m -o -n + fi + + # Converge the stack (non-chaos => the pinned checkout). Idempotent: no-op if already correct. + abra app deploy "$DOMAIN" -o -n + + # Wait until keycloak actually answers /realms/master (JVM + DB migration is slow). Surface a + # failed unit if it never comes up rather than reporting success on a half-booted provider. + for _ in $(seq 1 60); do + code=$(curl -sk -o /dev/null -w '%{http_code}' --max-time 10 \ + --resolve "$DOMAIN:443:127.0.0.1" "https://$DOMAIN/realms/master" || true) + [ "$code" = "200" ] && { echo "warm keycloak healthy ($DOMAIN)"; exit 0; } + sleep 10 + done + echo "FATAL: warm keycloak $DOMAIN did not become healthy" >&2 + exit 1 + ''; + }; +in +{ + systemd.services.warm-keycloak = { + description = "Reconcile the live-warm shared keycloak SSO provider (WC1) via abra"; + after = [ "deploy-proxy.service" "swarm-init.service" "docker.service" "network-online.target" ]; + requires = [ "swarm-init.service" "docker.service" ]; + wants = [ "deploy-proxy.service" "network-online.target" ]; + wantedBy = [ "multi-user.target" ]; + environment.HOME = "/root"; + serviceConfig = { + Type = "oneshot"; + RemainAfterExit = true; + # Generous: a cold keycloak boot (JVM + DB migration) can take ~10min on this 2-vCPU node. + TimeoutStartSec = "1200"; + ExecStart = "${reconcile}/bin/cc-ci-reconcile-warm-keycloak"; + }; + }; +}