fix(2w): W0.3 warm-keycloak reconciler — newline bite + skip-if-healthy
- set_env: ensure trailing newline before append (keycloak .env.sample ends with a newline-less #COMPOSE_FILE comment, so a bare append glued DOMAIN onto it -> DOMAIN unset -> KC_HOSTNAME=https:// -> crash-loop). Same bite fixed in backupbot.nix. - converge skips the (forced) redeploy when keycloak already serves 200, so an activation/boot is a true no-op (no JVM-restart blip) and only redeploys when down/crash-looping. Health-wait extended to 15min. Verified on cc-ci: nixos-rebuild switch -> warm-keycloak.service active, 'no-op converge', system running (0 failed), /realms/master=200. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@ -34,6 +34,11 @@ let
|
||||
|
||||
set_env() {
|
||||
sed -i -E "/^[[:space:]]*#?[[:space:]]*$1=/d" "$ENV_FILE"
|
||||
# Ensure the file ends in a newline before appending — keycloak's .env.sample ends with a
|
||||
# newline-less comment line (#COMPOSE_FILE=...), so a bare append would glue the var onto
|
||||
# that comment (commenting it out → KC_HOSTNAME=https:// with no host → crash). `$(tail -c1)`
|
||||
# is empty iff the last byte is already a newline. (Same bite as backupbot.nix.)
|
||||
if [ -s "$ENV_FILE" ] && [ -n "$(tail -c1 "$ENV_FILE")" ]; then printf '\n' >> "$ENV_FILE"; fi
|
||||
printf '%s=%s\n' "$1" "$2" >> "$ENV_FILE"
|
||||
}
|
||||
set_env DOMAIN "$DOMAIN"
|
||||
@ -49,15 +54,27 @@ let
|
||||
abra app secret generate "$DOMAIN" --all -m -o -n
|
||||
fi
|
||||
|
||||
# Converge the stack (non-chaos => the pinned checkout). Idempotent: no-op if already correct.
|
||||
abra app deploy "$DOMAIN" -o -n
|
||||
health() {
|
||||
curl -sk -o /dev/null -w '%{http_code}' --max-time 10 \
|
||||
--resolve "$DOMAIN:443:127.0.0.1" "https://$DOMAIN/realms/master" 2>/dev/null || true
|
||||
}
|
||||
|
||||
# Converge WITHOUT churning a healthy provider: only (re)deploy if it is not already serving.
|
||||
# This makes every activation/boot a true no-op when keycloak is up (no JVM restart blip), and
|
||||
# self-heals when the stack is gone or crash-looping. (To roll a new kcVersion, `abra app
|
||||
# undeploy` first so this redeploys — a deliberate, rare op; keycloak is the SSO dep, not under
|
||||
# test.) `-f` because a plain non-chaos deploy FATALs "already deployed".
|
||||
stack="warm-keycloak_ci_commoninternet_net"
|
||||
if [ "$(health)" = "200" ] && docker service ls --format '{{.Name}}' | grep -q "^''${stack}_app$"; then
|
||||
echo "warm keycloak already healthy ($DOMAIN) — no-op converge"
|
||||
exit 0
|
||||
fi
|
||||
abra app deploy "$DOMAIN" -o -n -f
|
||||
|
||||
# Wait until keycloak actually answers /realms/master (JVM + DB migration is slow). Surface a
|
||||
# failed unit if it never comes up rather than reporting success on a half-booted provider.
|
||||
for _ in $(seq 1 60); do
|
||||
code=$(curl -sk -o /dev/null -w '%{http_code}' --max-time 10 \
|
||||
--resolve "$DOMAIN:443:127.0.0.1" "https://$DOMAIN/realms/master" || true)
|
||||
[ "$code" = "200" ] && { echo "warm keycloak healthy ($DOMAIN)"; exit 0; }
|
||||
for _ in $(seq 1 90); do
|
||||
[ "$(health)" = "200" ] && { echo "warm keycloak healthy ($DOMAIN)"; exit 0; }
|
||||
sleep 10
|
||||
done
|
||||
echo "FATAL: warm keycloak $DOMAIN did not become healthy" >&2
|
||||
|
||||
Reference in New Issue
Block a user