From 7b7ddd70bc753608d086884b8de1ad3c327d9ac5 Mon Sep 17 00:00:00 2001 From: autonomic-bot Date: Sat, 30 May 2026 09:52:27 +0000 Subject: [PATCH 1/3] fix(image): re-pin bitnami/discourse -> bitnamilegacy/discourse:3.3.1 Docker Hub emptied the bitnami/discourse namespace (manifest 3.3.1 -> 404, tags:[]); Bitnami moved archived images to the bitnamilegacy namespace. bitnamilegacy/discourse:3.3.1 is the byte-identical drop-in (manifest -> 200, same /bitnami/discourse paths + /opt/bitnami scripts). Re-pin both app + sidekiq services; bump 0.7.0 -> 0.8.0. Found via cc-ci enrollment. --- compose.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/compose.yml b/compose.yml index 79edcd7..370d7ce 100644 --- a/compose.yml +++ b/compose.yml @@ -3,7 +3,7 @@ version: "3.8" services: app: - image: bitnami/discourse:3.3.1 + image: bitnamilegacy/discourse:3.3.1 networks: - proxy - internal @@ -43,7 +43,7 @@ services: #- "traefik.http.routers.${STACK_NAME}.middlewares=${STACK_NAME}-redirect" #- "traefik.http.middlewares.${STACK_NAME}-redirect.headers.SSLForceHost=true" #- "traefik.http.middlewares.${STACK_NAME}-redirect.headers.SSLHost=${DOMAIN}" - - "coop-cloud.${STACK_NAME}.version=0.7.0+3.3.1" + - "coop-cloud.${STACK_NAME}.version=0.8.0+3.3.1" healthcheck: test: "ruby -e \"require 'uri'; require 'net/http'; uri = URI('http://localhost:3000/srv/status'); res = Net::HTTP.get_response(uri); if res.is_a?(Net::HTTPSuccess) then exit (0) else exit (1) end\"" interval: 30s @@ -84,7 +84,7 @@ services: - 'redis_data:/data' sidekiq: - image: bitnami/discourse:3.3.1 + image: bitnamilegacy/discourse:3.3.1 networks: - proxy - internal -- 2.49.0 From 7a2e0e044cfd301aa7790e297adf0ac2aafb369b Mon Sep 17 00:00:00 2001 From: autonomic-bot Date: Sat, 30 May 2026 14:03:18 +0000 Subject: [PATCH 2/3] fix(backup): reimport postgres dump on restore (restore was a no-op) The db service dumped the DB on backup (pg_dump pre-hook) but shipped no restore hook, and a file-level restore does not reload into the running postgres, so a restored backup silently kept the live (un-restored) state. Add pg_backup.sh (backup=pg_dump|gzip into the postgresql_data volume; restore=terminate conns + DROP DATABASE WITH FORCE + createdb + reimport), mount it via a config, and wire the backupbot backup/restore hooks. Same fix as the immich / mattermost-lts / ghost recipes. --- .env.sample | 1 + abra.sh | 1 + compose.yml | 14 ++++++++++---- pg_backup.sh | 34 ++++++++++++++++++++++++++++++++++ 4 files changed, 46 insertions(+), 4 deletions(-) create mode 100755 pg_backup.sh diff --git a/.env.sample b/.env.sample index fd54e8f..46db9d8 100644 --- a/.env.sample +++ b/.env.sample @@ -19,3 +19,4 @@ LETS_ENCRYPT_ENV=production #SECRET_SMTP_PASSWORD_VERSION=v1 SECRET_DB_PASSWORD_VERSION=v1 + diff --git a/abra.sh b/abra.sh index 1c66304..def0c38 100644 --- a/abra.sh +++ b/abra.sh @@ -1 +1,2 @@ export DB_ENTRYPOINT_VERSION=v1 +export PG_BACKUP_VERSION=v1 diff --git a/compose.yml b/compose.yml index 370d7ce..22b5dc2 100644 --- a/compose.yml +++ b/compose.yml @@ -49,7 +49,7 @@ services: interval: 30s timeout: 10s retries: 6 - start_period: 5m + start_period: 20m db: image: postgres:13 @@ -63,6 +63,9 @@ services: - source: db_entrypoint target: /docker-entrypoint.sh mode: 0555 + - source: pg_backup + target: /pg_backup.sh + mode: 0555 entrypoint: /docker-entrypoint.sh environment: - POSTGRES_HOST_AUTH_METHOD=trust @@ -72,9 +75,9 @@ services: deploy: labels: backupbot.backup: "true" - backupbot.backup.pre-hook: "bash -c 'PGPASSWORD=$$(cat $${POSTGRES_PASSWORD_FILE}) pg_dump -U $${POSTGRES_USER} $${POSTGRES_DB} > /tmp/backup.sql'" - backupbot.backup.post-hook: "rm -rf /tmp/backup.sql" - backupbot.backup.path: "/tmp/backup.sql" + backupbot.backup.pre-hook: "/pg_backup.sh backup" + backupbot.backup.volumes.postgresql_data.path: "backup.sql" + backupbot.restore.post-hook: "/pg_backup.sh restore" redis: image: redis:7.4-alpine @@ -132,3 +135,6 @@ configs: name: ${STACK_NAME}_db_entrypoint_${DB_ENTRYPOINT_VERSION} file: entrypoint.postgres.sh.tmpl template_driver: golang + pg_backup: + name: ${STACK_NAME}_pg_backup_${PG_BACKUP_VERSION} + file: pg_backup.sh diff --git a/pg_backup.sh b/pg_backup.sh new file mode 100755 index 0000000..55a8365 --- /dev/null +++ b/pg_backup.sh @@ -0,0 +1,34 @@ +#!/bin/bash + +# Postgres backup/restore hook for the discourse `db` service. Invoked by backupbot-two via: +# backupbot.backup.pre-hook = "/pg_backup.sh backup" +# backupbot.backup.volumes.postgresql_data.path = "backup.sql" +# backupbot.restore.post-hook = "/pg_backup.sh restore" +# Backup dumps the DB to backup.sql (gzip) inside the postgresql_data volume; backupbot archives it. +# Restore reimports it. Discourse (the rails app + sidekiq) keeps TCP connections open to the DB, so +# restore must terminate them and FORCE-drop before recreating, then reimport the dump deterministically. +# The previous recipe shipped a pg_dump backup but NO restore hook — a file-level restore did not reload +# into the running postgres, so a restored backup silently kept the live (un-restored) state. cc-ci +# caught this: a seeded ci_marker row was gone after restore. Same pattern as the immich / mattermost-lts +# / ghost recipe-PRs. + +set -e + +BACKUP_FILE='/var/lib/postgresql/data/backup.sql' +export PGPASSWORD=$(cat "${POSTGRES_PASSWORD_FILE:-/run/secrets/db_password}") +DB_USER="${POSTGRES_USER:-discourse}" +DB_NAME="${POSTGRES_DB:-discourse}" + +function backup { + pg_dump -U "$DB_USER" "$DB_NAME" | gzip > "$BACKUP_FILE" +} + +function restore { + psql -U "$DB_USER" -d postgres -c \ + "SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE datname='${DB_NAME}' AND pid<>pg_backend_pid();" + psql -U "$DB_USER" -d postgres -c "DROP DATABASE ${DB_NAME} WITH (FORCE);" + createdb -U "$DB_USER" "$DB_NAME" + gunzip -c "$BACKUP_FILE" | psql -U "$DB_USER" -d "$DB_NAME" -1 -v ON_ERROR_STOP=1 -f - +} + +$@ -- 2.49.0 From 3758522cf8702e97e88cd38d47165cf14defe74e Mon Sep 17 00:00:00 2001 From: autonomic-bot Date: Sat, 30 May 2026 23:37:31 +0000 Subject: [PATCH 3/3] fix(backup): block reconnections via pg_hba during pg restore (app reconnect broke reimport) --- pg_backup.sh | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/pg_backup.sh b/pg_backup.sh index 55a8365..5a2e6d8 100755 --- a/pg_backup.sh +++ b/pg_backup.sh @@ -5,12 +5,14 @@ # backupbot.backup.volumes.postgresql_data.path = "backup.sql" # backupbot.restore.post-hook = "/pg_backup.sh restore" # Backup dumps the DB to backup.sql (gzip) inside the postgresql_data volume; backupbot archives it. -# Restore reimports it. Discourse (the rails app + sidekiq) keeps TCP connections open to the DB, so -# restore must terminate them and FORCE-drop before recreating, then reimport the dump deterministically. -# The previous recipe shipped a pg_dump backup but NO restore hook — a file-level restore did not reload -# into the running postgres, so a restored backup silently kept the live (un-restored) state. cc-ci -# caught this: a seeded ci_marker row was gone after restore. Same pattern as the immich / mattermost-lts -# / ghost recipe-PRs. +# Restore reimports it. Discourse (the rails app + sidekiq) keeps many TCP connections open to the DB +# and reconnects within milliseconds, so a one-shot pg_terminate_backend is NOT enough: restore must +# first block all non-local connections at the pg_hba level (so the app cannot reconnect and interfere +# mid-reimport), then FORCE-drop, recreate, and deterministically reimport the dump, then restore +# pg_hba. (Mirrors the proven matrix-synapse restore hook.) The previous recipe shipped a pg_dump +# backup but NO restore hook — a file-level restore did not reload into the running postgres, so a +# restored backup silently kept the live (un-restored) state. cc-ci caught this: a seeded ci_marker row +# was gone after restore. Same pattern as the immich / mattermost-lts / ghost recipe-PRs. set -e @@ -24,6 +26,22 @@ function backup { } function restore { + cd /var/lib/postgresql/data/ + + # Block all non-local connections so the running discourse app + sidekiq cannot reconnect and + # interfere with the drop/recreate/reimport (a one-shot pg_terminate_backend is not enough — the + # app reconnects within ms over TCP). Restored on exit. + restore_hba() { + cat pg_hba.conf.bak > pg_hba.conf + rm -f pg_hba.conf.bak + su postgres -c 'pg_ctl reload' + } + cp pg_hba.conf pg_hba.conf.bak + echo 'local all all trust' > pg_hba.conf + su postgres -c 'pg_ctl reload' + trap restore_hba EXIT INT TERM + + # Terminate lingering local sessions, then FORCE-drop + recreate + deterministic reimport. psql -U "$DB_USER" -d postgres -c \ "SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE datname='${DB_NAME}' AND pid<>pg_backend_pid();" psql -U "$DB_USER" -d postgres -c "DROP DATABASE ${DB_NAME} WITH (FORCE);" -- 2.49.0