Compare commits
3 Commits
fix-pg-mig
...
ci/bitnami
| Author | SHA1 | Date | |
|---|---|---|---|
| 3758522cf8 | |||
| 7a2e0e044c | |||
| 7b7ddd70bc |
@ -19,3 +19,4 @@ LETS_ENCRYPT_ENV=production
|
||||
#SECRET_SMTP_PASSWORD_VERSION=v1
|
||||
|
||||
SECRET_DB_PASSWORD_VERSION=v1
|
||||
|
||||
|
||||
4
abra.sh
4
abra.sh
@ -1,2 +1,2 @@
|
||||
export DB_ENTRYPOINT_VERSION=v3
|
||||
export PG_BACKUP_VERSION=v2
|
||||
export DB_ENTRYPOINT_VERSION=v1
|
||||
export PG_BACKUP_VERSION=v1
|
||||
|
||||
@ -3,7 +3,7 @@ version: "3.8"
|
||||
|
||||
services:
|
||||
app:
|
||||
image: bitnamilegacy/discourse:3.5.0
|
||||
image: bitnamilegacy/discourse:3.3.1
|
||||
networks:
|
||||
- proxy
|
||||
- internal
|
||||
@ -43,7 +43,7 @@ services:
|
||||
#- "traefik.http.routers.${STACK_NAME}.middlewares=${STACK_NAME}-redirect"
|
||||
#- "traefik.http.middlewares.${STACK_NAME}-redirect.headers.SSLForceHost=true"
|
||||
#- "traefik.http.middlewares.${STACK_NAME}-redirect.headers.SSLHost=${DOMAIN}"
|
||||
- "coop-cloud.${STACK_NAME}.version=0.8.0+3.5.0"
|
||||
- "coop-cloud.${STACK_NAME}.version=0.8.0+3.3.1"
|
||||
healthcheck:
|
||||
test: "ruby -e \"require 'uri'; require 'net/http'; uri = URI('http://localhost:3000/srv/status'); res = Net::HTTP.get_response(uri); if res.is_a?(Net::HTTPSuccess) then exit (0) else exit (1) end\""
|
||||
interval: 30s
|
||||
@ -52,7 +52,7 @@ services:
|
||||
start_period: 20m
|
||||
|
||||
db:
|
||||
image: pgvector/pgvector:pg17
|
||||
image: postgres:13
|
||||
networks:
|
||||
- internal
|
||||
secrets:
|
||||
@ -87,7 +87,7 @@ services:
|
||||
- 'redis_data:/data'
|
||||
|
||||
sidekiq:
|
||||
image: bitnamilegacy/discourse:3.5.0
|
||||
image: bitnamilegacy/discourse:3.3.1
|
||||
networks:
|
||||
- proxy
|
||||
- internal
|
||||
|
||||
@ -2,23 +2,16 @@
|
||||
|
||||
set -e
|
||||
|
||||
MIGRATION_MARKER=$PGDATA/migration_in_progress
|
||||
OLDDATA=$PGDATA/old_data
|
||||
NEWDATA=$PGDATA/new_data
|
||||
|
||||
echo "Running as $(id)"
|
||||
|
||||
# The migration uses $OLDDATA/$NEWDATA as scratch and removes them when it
|
||||
# finishes; a leftover *empty* one means a run was interrupted before any data
|
||||
# moved (data still intact at $PGDATA) so we clear it and retry, while a
|
||||
# *non-empty* one means data may live only there, so we stop for manual recovery.
|
||||
for scratch in $OLDDATA $NEWDATA; do
|
||||
if [ -d "$scratch" ] && [ -n "$(ls -A "$scratch")" ]; then
|
||||
echo "FATAL: $scratch exists and is not empty - a previous migration did not"
|
||||
echo "complete and the data may only exist there. manual recovery necessary."
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
rm -rf $OLDDATA $NEWDATA
|
||||
if [ -e $MIGRATION_MARKER ]; then
|
||||
echo "FATAL: migration was started but did not complete in a previous run. manual recovery necessary"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ -f $PGDATA/PG_VERSION ]; then
|
||||
DATA_VERSION=$(cat $PGDATA/PG_VERSION)
|
||||
@ -30,33 +23,22 @@ if [ -f $PGDATA/PG_VERSION ]; then
|
||||
apt-get update && apt-get install -y --no-install-recommends \
|
||||
postgresql-$DATA_VERSION \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
# pg_upgrade must run as the old cluster's bootstrap superuser (the "install
|
||||
# user", oid 10), and the new cluster must be initialised with that same
|
||||
# user. It is not necessarily $POSTGRES_USER (e.g. clusters created with the
|
||||
# default "postgres" superuser and a separate app role), so read it from the
|
||||
# old cluster: briefly start it and ask, connecting as the app role we know.
|
||||
PGBIN=/usr/lib/postgresql/$DATA_VERSION/bin
|
||||
gosu postgres $PGBIN/pg_ctl -D $PGDATA -w \
|
||||
-o "-c listen_addresses= -c unix_socket_directories=/tmp" start
|
||||
INSTALL_USER=$(gosu postgres psql -h /tmp -U "$POSTGRES_USER" -d postgres -tAc \
|
||||
"select rolname from pg_roles where oid = 10")
|
||||
gosu postgres $PGBIN/pg_ctl -D $PGDATA -w stop
|
||||
echo "old cluster install user: $INSTALL_USER"
|
||||
echo "shuffling around"
|
||||
gosu postgres mkdir $OLDDATA $NEWDATA
|
||||
chmod 700 $OLDDATA $NEWDATA
|
||||
mv $PGDATA/* $OLDDATA/ || true
|
||||
touch $MIGRATION_MARKER
|
||||
echo "running initdb"
|
||||
# abuse entrypoint script for initdb by making server error out; initialise
|
||||
# the new cluster with the same superuser as the old one so pg_upgrade matches
|
||||
gosu postgres bash -c "export PGDATA=$NEWDATA POSTGRES_USER=$INSTALL_USER ; /usr/local/bin/docker-entrypoint.sh --invalid-arg || true"
|
||||
# abuse entrypoint script for initdb by making server error out
|
||||
gosu postgres bash -c "export PGDATA=$NEWDATA ; /usr/local/bin/docker-entrypoint.sh --invalid-arg || true"
|
||||
echo "running pg_upgrade"
|
||||
cd /tmp
|
||||
gosu postgres pg_upgrade --link -b /usr/lib/postgresql/$DATA_VERSION/bin -d $OLDDATA -D $NEWDATA -U $INSTALL_USER
|
||||
gosu postgres pg_upgrade --link -b /usr/lib/postgresql/$DATA_VERSION/bin -d $OLDDATA -D $NEWDATA -U $POSTGRES_USER
|
||||
cp $OLDDATA/pg_hba.conf $NEWDATA/
|
||||
mv $NEWDATA/* $PGDATA
|
||||
rm -rf $OLDDATA
|
||||
rmdir $NEWDATA
|
||||
rm $MIGRATION_MARKER
|
||||
echo "migration complete"
|
||||
fi
|
||||
fi
|
||||
|
||||
24
pg_backup.sh
24
pg_backup.sh
@ -1,6 +1,18 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Postgres backup/restore hook for the discourse `db` service.
|
||||
# Postgres backup/restore hook for the discourse `db` service. Invoked by backupbot-two via:
|
||||
# backupbot.backup.pre-hook = "/pg_backup.sh backup"
|
||||
# backupbot.backup.volumes.postgresql_data.path = "backup.sql"
|
||||
# backupbot.restore.post-hook = "/pg_backup.sh restore"
|
||||
# Backup dumps the DB to backup.sql (gzip) inside the postgresql_data volume; backupbot archives it.
|
||||
# Restore reimports it. Discourse (the rails app + sidekiq) keeps many TCP connections open to the DB
|
||||
# and reconnects within milliseconds, so a one-shot pg_terminate_backend is NOT enough: restore must
|
||||
# first block all non-local connections at the pg_hba level (so the app cannot reconnect and interfere
|
||||
# mid-reimport), then FORCE-drop, recreate, and deterministically reimport the dump, then restore
|
||||
# pg_hba. (Mirrors the proven matrix-synapse restore hook.) The previous recipe shipped a pg_dump
|
||||
# backup but NO restore hook — a file-level restore did not reload into the running postgres, so a
|
||||
# restored backup silently kept the live (un-restored) state. cc-ci caught this: a seeded ci_marker row
|
||||
# was gone after restore. Same pattern as the immich / mattermost-lts / ghost recipe-PRs.
|
||||
|
||||
set -e
|
||||
|
||||
@ -17,7 +29,8 @@ function restore {
|
||||
cd /var/lib/postgresql/data/
|
||||
|
||||
# Block all non-local connections so the running discourse app + sidekiq cannot reconnect and
|
||||
# interfere with the drop/recreate/reimport. Restored on exit.
|
||||
# interfere with the drop/recreate/reimport (a one-shot pg_terminate_backend is not enough — the
|
||||
# app reconnects within ms over TCP). Restored on exit.
|
||||
restore_hba() {
|
||||
cat pg_hba.conf.bak > pg_hba.conf
|
||||
rm -f pg_hba.conf.bak
|
||||
@ -28,16 +41,11 @@ function restore {
|
||||
su postgres -c 'pg_ctl reload'
|
||||
trap restore_hba EXIT INT TERM
|
||||
|
||||
# terminate any lingering local sessions before recreate
|
||||
# see https://stackoverflow.com/questions/5108876/kill-a-postgresql-session-connection
|
||||
# Terminate lingering local sessions, then FORCE-drop + recreate + deterministic reimport.
|
||||
psql -U "$DB_USER" -d postgres -c \
|
||||
"SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE datname='${DB_NAME}' AND pid<>pg_backend_pid();"
|
||||
|
||||
# drop database and then recreate it
|
||||
psql -U "$DB_USER" -d postgres -c "DROP DATABASE ${DB_NAME} WITH (FORCE);"
|
||||
createdb -U "$DB_USER" "$DB_NAME"
|
||||
|
||||
# reimport data
|
||||
gunzip -c "$BACKUP_FILE" | psql -U "$DB_USER" -d "$DB_NAME" -1 -v ON_ERROR_STOP=1 -f -
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user