3 Commits

Author SHA1 Message Date
3758522cf8 fix(backup): block reconnections via pg_hba during pg restore (app reconnect broke reimport) 2026-05-30 23:37:31 +00:00
7a2e0e044c fix(backup): reimport postgres dump on restore (restore was a no-op)
The db service dumped the DB on backup (pg_dump pre-hook) but shipped no
restore hook, and a file-level restore does not reload into the running
postgres, so a restored backup silently kept the live (un-restored) state.
Add pg_backup.sh (backup=pg_dump|gzip into the postgresql_data volume;
restore=terminate conns + DROP DATABASE WITH FORCE + createdb + reimport),
mount it via a config, and wire the backupbot backup/restore hooks. Same
fix as the immich / mattermost-lts / ghost recipes.
2026-05-30 15:19:26 +00:00
7b7ddd70bc fix(image): re-pin bitnami/discourse -> bitnamilegacy/discourse:3.3.1
Docker Hub emptied the bitnami/discourse namespace (manifest 3.3.1 -> 404, tags:[]); Bitnami moved
archived images to the bitnamilegacy namespace. bitnamilegacy/discourse:3.3.1 is the byte-identical
drop-in (manifest -> 200, same /bitnami/discourse paths + /opt/bitnami scripts). Re-pin both app +
sidekiq services; bump 0.7.0 -> 0.8.0. Found via cc-ci enrollment.
2026-05-30 09:52:27 +00:00
4 changed files with 67 additions and 7 deletions

View File

@ -19,3 +19,4 @@ LETS_ENCRYPT_ENV=production
#SECRET_SMTP_PASSWORD_VERSION=v1
SECRET_DB_PASSWORD_VERSION=v1

View File

@ -1 +1,2 @@
export DB_ENTRYPOINT_VERSION=v1
export PG_BACKUP_VERSION=v1

View File

@ -3,7 +3,7 @@ version: "3.8"
services:
app:
image: bitnami/discourse:3.3.1
image: bitnamilegacy/discourse:3.3.1
networks:
- proxy
- internal
@ -43,13 +43,13 @@ services:
#- "traefik.http.routers.${STACK_NAME}.middlewares=${STACK_NAME}-redirect"
#- "traefik.http.middlewares.${STACK_NAME}-redirect.headers.SSLForceHost=true"
#- "traefik.http.middlewares.${STACK_NAME}-redirect.headers.SSLHost=${DOMAIN}"
- "coop-cloud.${STACK_NAME}.version=0.7.0+3.3.1"
- "coop-cloud.${STACK_NAME}.version=0.8.0+3.3.1"
healthcheck:
test: "ruby -e \"require 'uri'; require 'net/http'; uri = URI('http://localhost:3000/srv/status'); res = Net::HTTP.get_response(uri); if res.is_a?(Net::HTTPSuccess) then exit (0) else exit (1) end\""
interval: 30s
timeout: 10s
retries: 6
start_period: 5m
start_period: 20m
db:
image: postgres:13
@ -63,6 +63,9 @@ services:
- source: db_entrypoint
target: /docker-entrypoint.sh
mode: 0555
- source: pg_backup
target: /pg_backup.sh
mode: 0555
entrypoint: /docker-entrypoint.sh
environment:
- POSTGRES_HOST_AUTH_METHOD=trust
@ -72,9 +75,9 @@ services:
deploy:
labels:
backupbot.backup: "true"
backupbot.backup.pre-hook: "bash -c 'PGPASSWORD=$$(cat $${POSTGRES_PASSWORD_FILE}) pg_dump -U $${POSTGRES_USER} $${POSTGRES_DB} > /tmp/backup.sql'"
backupbot.backup.post-hook: "rm -rf /tmp/backup.sql"
backupbot.backup.path: "/tmp/backup.sql"
backupbot.backup.pre-hook: "/pg_backup.sh backup"
backupbot.backup.volumes.postgresql_data.path: "backup.sql"
backupbot.restore.post-hook: "/pg_backup.sh restore"
redis:
image: redis:7.4-alpine
@ -84,7 +87,7 @@ services:
- 'redis_data:/data'
sidekiq:
image: bitnami/discourse:3.3.1
image: bitnamilegacy/discourse:3.3.1
networks:
- proxy
- internal
@ -132,3 +135,6 @@ configs:
name: ${STACK_NAME}_db_entrypoint_${DB_ENTRYPOINT_VERSION}
file: entrypoint.postgres.sh.tmpl
template_driver: golang
pg_backup:
name: ${STACK_NAME}_pg_backup_${PG_BACKUP_VERSION}
file: pg_backup.sh

52
pg_backup.sh Executable file
View File

@ -0,0 +1,52 @@
#!/bin/bash
# Postgres backup/restore hook for the discourse `db` service. Invoked by backupbot-two via:
# backupbot.backup.pre-hook = "/pg_backup.sh backup"
# backupbot.backup.volumes.postgresql_data.path = "backup.sql"
# backupbot.restore.post-hook = "/pg_backup.sh restore"
# Backup dumps the DB to backup.sql (gzip) inside the postgresql_data volume; backupbot archives it.
# Restore reimports it. Discourse (the rails app + sidekiq) keeps many TCP connections open to the DB
# and reconnects within milliseconds, so a one-shot pg_terminate_backend is NOT enough: restore must
# first block all non-local connections at the pg_hba level (so the app cannot reconnect and interfere
# mid-reimport), then FORCE-drop, recreate, and deterministically reimport the dump, then restore
# pg_hba. (Mirrors the proven matrix-synapse restore hook.) The previous recipe shipped a pg_dump
# backup but NO restore hook — a file-level restore did not reload into the running postgres, so a
# restored backup silently kept the live (un-restored) state. cc-ci caught this: a seeded ci_marker row
# was gone after restore. Same pattern as the immich / mattermost-lts / ghost recipe-PRs.
set -e
BACKUP_FILE='/var/lib/postgresql/data/backup.sql'
export PGPASSWORD=$(cat "${POSTGRES_PASSWORD_FILE:-/run/secrets/db_password}")
DB_USER="${POSTGRES_USER:-discourse}"
DB_NAME="${POSTGRES_DB:-discourse}"
function backup {
pg_dump -U "$DB_USER" "$DB_NAME" | gzip > "$BACKUP_FILE"
}
function restore {
cd /var/lib/postgresql/data/
# Block all non-local connections so the running discourse app + sidekiq cannot reconnect and
# interfere with the drop/recreate/reimport (a one-shot pg_terminate_backend is not enough — the
# app reconnects within ms over TCP). Restored on exit.
restore_hba() {
cat pg_hba.conf.bak > pg_hba.conf
rm -f pg_hba.conf.bak
su postgres -c 'pg_ctl reload'
}
cp pg_hba.conf pg_hba.conf.bak
echo 'local all all trust' > pg_hba.conf
su postgres -c 'pg_ctl reload'
trap restore_hba EXIT INT TERM
# Terminate lingering local sessions, then FORCE-drop + recreate + deterministic reimport.
psql -U "$DB_USER" -d postgres -c \
"SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE datname='${DB_NAME}' AND pid<>pg_backend_pid();"
psql -U "$DB_USER" -d postgres -c "DROP DATABASE ${DB_NAME} WITH (FORCE);"
createdb -U "$DB_USER" "$DB_NAME"
gunzip -c "$BACKUP_FILE" | psql -U "$DB_USER" -d "$DB_NAME" -1 -v ON_ERROR_STOP=1 -f -
}
$@