fix(backup): block reconnections via pg_hba during pg restore (app reconnect broke reimport)

This commit is contained in:
2026-05-30 23:37:31 +00:00
parent 7a2e0e044c
commit 3758522cf8

View File

@ -5,12 +5,14 @@
# backupbot.backup.volumes.postgresql_data.path = "backup.sql"
# backupbot.restore.post-hook = "/pg_backup.sh restore"
# Backup dumps the DB to backup.sql (gzip) inside the postgresql_data volume; backupbot archives it.
# Restore reimports it. Discourse (the rails app + sidekiq) keeps TCP connections open to the DB, so
# restore must terminate them and FORCE-drop before recreating, then reimport the dump deterministically.
# The previous recipe shipped a pg_dump backup but NO restore hook — a file-level restore did not reload
# into the running postgres, so a restored backup silently kept the live (un-restored) state. cc-ci
# caught this: a seeded ci_marker row was gone after restore. Same pattern as the immich / mattermost-lts
# / ghost recipe-PRs.
# Restore reimports it. Discourse (the rails app + sidekiq) keeps many TCP connections open to the DB
# and reconnects within milliseconds, so a one-shot pg_terminate_backend is NOT enough: restore must
# first block all non-local connections at the pg_hba level (so the app cannot reconnect and interfere
# mid-reimport), then FORCE-drop, recreate, and deterministically reimport the dump, then restore
# pg_hba. (Mirrors the proven matrix-synapse restore hook.) The previous recipe shipped a pg_dump
# backup but NO restore hook — a file-level restore did not reload into the running postgres, so a
# restored backup silently kept the live (un-restored) state. cc-ci caught this: a seeded ci_marker row
# was gone after restore. Same pattern as the immich / mattermost-lts / ghost recipe-PRs.
set -e
@ -24,6 +26,22 @@ function backup {
}
function restore {
cd /var/lib/postgresql/data/
# Block all non-local connections so the running discourse app + sidekiq cannot reconnect and
# interfere with the drop/recreate/reimport (a one-shot pg_terminate_backend is not enough — the
# app reconnects within ms over TCP). Restored on exit.
restore_hba() {
cat pg_hba.conf.bak > pg_hba.conf
rm -f pg_hba.conf.bak
su postgres -c 'pg_ctl reload'
}
cp pg_hba.conf pg_hba.conf.bak
echo 'local all all trust' > pg_hba.conf
su postgres -c 'pg_ctl reload'
trap restore_hba EXIT INT TERM
# Terminate lingering local sessions, then FORCE-drop + recreate + deterministic reimport.
psql -U "$DB_USER" -d postgres -c \
"SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE datname='${DB_NAME}' AND pid<>pg_backend_pid();"
psql -U "$DB_USER" -d postgres -c "DROP DATABASE ${DB_NAME} WITH (FORCE);"