fix(backup): add pg_backup.sh + proper backup/restore hooks, 20m start_period
This commit is contained in:
14
compose.yml
14
compose.yml
@ -49,7 +49,7 @@ services:
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 6
|
||||
start_period: 5m
|
||||
start_period: 20m
|
||||
|
||||
db:
|
||||
image: postgres:16
|
||||
@ -63,6 +63,9 @@ services:
|
||||
- source: db_entrypoint
|
||||
target: /docker-entrypoint.sh
|
||||
mode: 0555
|
||||
- source: pg_backup
|
||||
target: /pg_backup.sh
|
||||
mode: 0555
|
||||
entrypoint: /docker-entrypoint.sh
|
||||
environment:
|
||||
- POSTGRES_HOST_AUTH_METHOD=trust
|
||||
@ -72,9 +75,9 @@ services:
|
||||
deploy:
|
||||
labels:
|
||||
backupbot.backup: "true"
|
||||
backupbot.backup.pre-hook: "bash -c 'PGPASSWORD=$$(cat $${POSTGRES_PASSWORD_FILE}) pg_dump -U $${POSTGRES_USER} $${POSTGRES_DB} > /tmp/backup.sql'"
|
||||
backupbot.backup.post-hook: "rm -rf /tmp/backup.sql"
|
||||
backupbot.backup.path: "/tmp/backup.sql"
|
||||
backupbot.backup.pre-hook: "/pg_backup.sh backup"
|
||||
backupbot.backup.volumes.postgresql_data.path: "backup.sql"
|
||||
backupbot.restore.post-hook: "/pg_backup.sh restore"
|
||||
|
||||
redis:
|
||||
image: redis:7.4-alpine
|
||||
@ -132,3 +135,6 @@ configs:
|
||||
name: ${STACK_NAME}_db_entrypoint_${DB_ENTRYPOINT_VERSION}
|
||||
file: entrypoint.postgres.sh.tmpl
|
||||
template_driver: golang
|
||||
pg_backup:
|
||||
name: ${STACK_NAME}_pg_backup_${PG_BACKUP_VERSION}
|
||||
file: pg_backup.sh
|
||||
|
||||
52
pg_backup.sh
Executable file
52
pg_backup.sh
Executable file
@ -0,0 +1,52 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Postgres backup/restore hook for the discourse `db` service. Invoked by backupbot-two via:
|
||||
# backupbot.backup.pre-hook = "/pg_backup.sh backup"
|
||||
# backupbot.backup.volumes.postgresql_data.path = "backup.sql"
|
||||
# backupbot.restore.post-hook = "/pg_backup.sh restore"
|
||||
# Backup dumps the DB to backup.sql (gzip) inside the postgresql_data volume; backupbot archives it.
|
||||
# Restore reimports it. Discourse (the rails app + sidekiq) keeps many TCP connections open to the DB
|
||||
# and reconnects within milliseconds, so a one-shot pg_terminate_backend is NOT enough: restore must
|
||||
# first block all non-local connections at the pg_hba level (so the app cannot reconnect and interfere
|
||||
# mid-reimport), then FORCE-drop, recreate, and deterministically reimport the dump, then restore
|
||||
# pg_hba. (Mirrors the proven matrix-synapse restore hook.) The previous recipe shipped a pg_dump
|
||||
# backup but NO restore hook — a file-level restore did not reload into the running postgres, so a
|
||||
# restored backup silently kept the live (un-restored) state. cc-ci caught this: a seeded ci_marker row
|
||||
# was gone after restore. Same pattern as the immich / mattermost-lts / ghost recipe-PRs.
|
||||
|
||||
set -e
|
||||
|
||||
BACKUP_FILE='/var/lib/postgresql/data/backup.sql'
|
||||
export PGPASSWORD=$(cat "${POSTGRES_PASSWORD_FILE:-/run/secrets/db_password}")
|
||||
DB_USER="${POSTGRES_USER:-discourse}"
|
||||
DB_NAME="${POSTGRES_DB:-discourse}"
|
||||
|
||||
function backup {
|
||||
pg_dump -U "$DB_USER" "$DB_NAME" | gzip > "$BACKUP_FILE"
|
||||
}
|
||||
|
||||
function restore {
|
||||
cd /var/lib/postgresql/data/
|
||||
|
||||
# Block all non-local connections so the running discourse app + sidekiq cannot reconnect and
|
||||
# interfere with the drop/recreate/reimport (a one-shot pg_terminate_backend is not enough — the
|
||||
# app reconnects within ms over TCP). Restored on exit.
|
||||
restore_hba() {
|
||||
cat pg_hba.conf.bak > pg_hba.conf
|
||||
rm -f pg_hba.conf.bak
|
||||
su postgres -c 'pg_ctl reload'
|
||||
}
|
||||
cp pg_hba.conf pg_hba.conf.bak
|
||||
echo 'local all all trust' > pg_hba.conf
|
||||
su postgres -c 'pg_ctl reload'
|
||||
trap restore_hba EXIT INT TERM
|
||||
|
||||
# Terminate lingering local sessions, then FORCE-drop + recreate + deterministic reimport.
|
||||
psql -U "$DB_USER" -d postgres -c \
|
||||
"SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE datname='${DB_NAME}' AND pid<>pg_backend_pid();"
|
||||
psql -U "$DB_USER" -d postgres -c "DROP DATABASE ${DB_NAME} WITH (FORCE);"
|
||||
createdb -U "$DB_USER" "$DB_NAME"
|
||||
gunzip -c "$BACKUP_FILE" | psql -U "$DB_USER" -d "$DB_NAME" -1 -v ON_ERROR_STOP=1 -f -
|
||||
}
|
||||
|
||||
$@
|
||||
Reference in New Issue
Block a user