feat(2w): W0.3 declarative warm-keycloak reconciler (WC1)

nix/modules/warm-keycloak.nix: idempotent systemd oneshot (like deploy-proxy)
that converges a live-warm shared keycloak at warm-keycloak.ci.commoninternet.net
pinned to  10.7.1+26.6.2, secrets generated only-if-missing (never
rotate a live provider), waits /realms/master=200. Re-warmable from scratch
(D8/WC8). Wired into hosts/cc-ci/configuration.nix.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-28 23:28:44 +01:00
parent 1b8d26b504
commit c8e9ddb681
2 changed files with 85 additions and 0 deletions

View File

@ -16,6 +16,7 @@
../../modules/dashboard.nix
../../modules/backupbot.nix
../../modules/harness.nix
../../modules/warm-keycloak.nix
];
# --- Tailscale (ACCESS-CRITICAL: do not break, this is the only route in) ---

View File

@ -0,0 +1,84 @@
# Phase 2w / WC1 — a live-warm, shared keycloak SSO provider, deployed via abra at a STABLE domain
# (distinct from cold per-run `<recipe[:4]>-<6hex>`; see DECISIONS.md Phase-2w). SSO-dependent
# recipe runs use this one instance (creating + deleting a per-run namespaced realm) instead of
# co-deploying a fresh keycloak each run — the highest-ROI warm layer (W0).
#
# Declared as an idempotent-RECONCILE systemd oneshot (like deploy-proxy / swarm-init): it inspects
# current state and converges every activation/boot, self-healing drift (redeploys if the stack is
# gone). No run-once sentinel. So a from-scratch install re-warms keycloak with just
# `nixos-rebuild switch` (D8 / WC8 "re-warmable from scratch"). The keycloak is declarative INFRA
# (in the D8 closure); only warm *volumes/snapshots* (W1+) are cache excluded from D8. Its realm
# data is ephemeral per-run.
#
# Secrets are generated ONLY if missing — never rotated — so a reconcile against a running provider
# does not invalidate the admin/db creds the harness reads from inside the container.
{ pkgs, ... }:
let
# Pinned known-good keycloak version (latest published as of 2026-05-28). Bump deliberately.
kcVersion = "10.7.1+26.6.2";
reconcile = pkgs.writeShellApplication {
name = "cc-ci-reconcile-warm-keycloak";
runtimeInputs = with pkgs; [ abra docker jq gnused gnugrep coreutils git curl ];
text = ''
DOMAIN="warm-keycloak.ci.commoninternet.net"
VERSION="${kcVersion}"
ENV_FILE="$HOME/.abra/servers/default/$DOMAIN.env"
RECIPE_DIR="$HOME/.abra/recipes/keycloak"
abra server ls -m -n >/dev/null 2>&1 || abra server add --local -n || true
abra recipe fetch keycloak -n >/dev/null
# Create the app config once (records ENV VERSION). No -S here: secrets are generated below,
# guarded, so a reconcile never rotates a running provider's creds.
[ -f "$ENV_FILE" ] || abra app new keycloak -s default -D "$DOMAIN" "$VERSION" -o -n
set_env() {
sed -i -E "/^[[:space:]]*#?[[:space:]]*$1=/d" "$ENV_FILE"
printf '%s=%s\n' "$1" "$2" >> "$ENV_FILE"
}
set_env DOMAIN "$DOMAIN"
set_env LETS_ENCRYPT_ENV ""
# Pin the on-disk recipe to the version tag so a non-chaos deploy genuinely deploys VERSION
# (a chaos deploy would ignore ENV VERSION and use the current checkout see abra.recipe_checkout).
git -C "$RECIPE_DIR" checkout --quiet "$VERSION"
# Generate secrets only if absent (idempotent; never rotate a live provider).
have_secret() { docker secret ls --format '{{.Name}}' | grep -q "_$1_v1$"; }
if ! have_secret admin_password; then
abra app secret generate "$DOMAIN" --all -m -o -n
fi
# Converge the stack (non-chaos => the pinned checkout). Idempotent: no-op if already correct.
abra app deploy "$DOMAIN" -o -n
# Wait until keycloak actually answers /realms/master (JVM + DB migration is slow). Surface a
# failed unit if it never comes up rather than reporting success on a half-booted provider.
for _ in $(seq 1 60); do
code=$(curl -sk -o /dev/null -w '%{http_code}' --max-time 10 \
--resolve "$DOMAIN:443:127.0.0.1" "https://$DOMAIN/realms/master" || true)
[ "$code" = "200" ] && { echo "warm keycloak healthy ($DOMAIN)"; exit 0; }
sleep 10
done
echo "FATAL: warm keycloak $DOMAIN did not become healthy" >&2
exit 1
'';
};
in
{
systemd.services.warm-keycloak = {
description = "Reconcile the live-warm shared keycloak SSO provider (WC1) via abra";
after = [ "deploy-proxy.service" "swarm-init.service" "docker.service" "network-online.target" ];
requires = [ "swarm-init.service" "docker.service" ];
wants = [ "deploy-proxy.service" "network-online.target" ];
wantedBy = [ "multi-user.target" ];
environment.HOME = "/root";
serviceConfig = {
Type = "oneshot";
RemainAfterExit = true;
# Generous: a cold keycloak boot (JVM + DB migration) can take ~10min on this 2-vCPU node.
TimeoutStartSec = "1200";
ExecStart = "${reconcile}/bin/cc-ci-reconcile-warm-keycloak";
};
};
}