Compare commits
6 Commits
main
...
upgrade-3.
| Author | SHA1 | Date | |
|---|---|---|---|
| 13458fac56 | |||
| 270c8404ce | |||
| 4cab6b5146 | |||
| 9f8bcbc9e3 | |||
| b90a8c4239 | |||
| 50a3715caa |
@ -26,4 +26,3 @@
|
||||
|
||||
[`abra`]: https://git.coopcloud.tech/coop-cloud/abra
|
||||
[`coop-cloud/traefik`]: https://git.coopcloud.tech/coop-cloud/traefik
|
||||
p-cloud/traefik
|
||||
|
||||
5
abra.sh
5
abra.sh
@ -1,4 +1,5 @@
|
||||
export CLICKHOUSE_CONF_VERSION=v2
|
||||
export CLICKHOUSE_USER_CONF_VERSION=v2
|
||||
export DB_ENTRYPOINT_VERSION=v1
|
||||
export CLICKHOUSE_ENTRYPOINT_VERSION=v2
|
||||
export CLICKHOUSE_ENTRYPOINT_VERSION=v6
|
||||
export PG_BACKUP_VERSION=v1
|
||||
export CLICKHOUSE_BACKUP_SCRIPT_VERSION=v1
|
||||
|
||||
30
clickhouse_backup.sh
Normal file
30
clickhouse_backup.sh
Normal file
@ -0,0 +1,30 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -e
|
||||
|
||||
# clickhouse-backup output lives inside the event-data volume (snapshotted via
|
||||
# backupbot.backup.volumes.event-data.path). Restoring the raw data files under a
|
||||
# running server is unsafe, so restore performs a logical restore instead.
|
||||
BACKUP_DIR=/var/lib/clickhouse/backup/events
|
||||
MIGRATIONS_TSV="$BACKUP_DIR/schema_migrations.tsv"
|
||||
|
||||
backup() {
|
||||
clickhouse-backup create events
|
||||
# schema_migrations is a TinyLog table — clickhouse-backup only FREEZEs MergeTree
|
||||
# data, so its rows aren't captured. Export them alongside the backup, else a restore
|
||||
# leaves the ledger empty and the next boot re-runs every migration (DUPLICATE_COLUMN).
|
||||
clickhouse-client --query "SELECT * FROM plausible_events_db.schema_migrations FORMAT TSV" > "$MIGRATIONS_TSV"
|
||||
}
|
||||
|
||||
backup_cleanup() {
|
||||
rm -rf "$BACKUP_DIR"
|
||||
}
|
||||
|
||||
restore() {
|
||||
clickhouse-backup restore --rm events
|
||||
clickhouse-client --query "TRUNCATE TABLE plausible_events_db.schema_migrations"
|
||||
clickhouse-client --query "INSERT INTO plausible_events_db.schema_migrations FORMAT TSV" < "$MIGRATIONS_TSV"
|
||||
rm -rf "$BACKUP_DIR"
|
||||
}
|
||||
|
||||
"$@"
|
||||
57
compose.yml
57
compose.yml
@ -7,11 +7,12 @@ services:
|
||||
command: sh -c "sleep 10 && /entrypoint.sh db createdb && /entrypoint.sh db migrate && /entrypoint.sh run"
|
||||
depends_on:
|
||||
- db
|
||||
- events_db
|
||||
- plausible_events_db
|
||||
environment:
|
||||
- BASE_URL=https://$DOMAIN
|
||||
- SECRET_KEY_BASE
|
||||
- DATABASE_URL=postgres://plausible:plausible@${STACK_NAME}_db:5432/plausible
|
||||
- CLICKHOUSE_DATABASE_URL=http://${STACK_NAME}_plausible_events_db:8123/plausible_events_db
|
||||
- SMTP_HOST_ADDR
|
||||
- MAILER_EMAIL
|
||||
- SMTP_HOST_PORT
|
||||
@ -25,38 +26,45 @@ services:
|
||||
- internal
|
||||
deploy:
|
||||
restart_policy:
|
||||
condition: on-failure
|
||||
# `any`, not `on-failure`: a restore disrupts postgres under the app and Erlang then
|
||||
# shuts down gracefully (exit 0), which on-failure treats as done and never restarts.
|
||||
condition: any
|
||||
labels:
|
||||
- "traefik.enable=true"
|
||||
- "traefik.http.services.${STACK_NAME}.loadbalancer.server.port=8000"
|
||||
- "traefik.http.routers.${STACK_NAME}.rule=Host(`${DOMAIN}`${EXTRA_DOMAINS})"
|
||||
- "traefik.http.routers.${STACK_NAME}.entrypoints=web-secure"
|
||||
- "traefik.http.routers.${STACK_NAME}.tls.certresolver=${LETS_ENCRYPT_ENV}"
|
||||
- coop-cloud.${STACK_NAME}.version=3.0.1+v2.0.0
|
||||
- coop-cloud.${STACK_NAME}.version=3.1.0+v2.0.0
|
||||
db:
|
||||
image: postgres:13.12
|
||||
configs:
|
||||
- source: db_entrypoint
|
||||
target: /docker-entrypoint.sh
|
||||
mode: 0555
|
||||
# Custom docker entrypoint to handle major Postgres version upgrades
|
||||
image: pgautoupgrade/pgautoupgrade:18-alpine
|
||||
volumes:
|
||||
- db-data:/var/lib/postgresql/data
|
||||
entrypoint: /docker-entrypoint.sh
|
||||
environment:
|
||||
# pin legacy PGDATA so the existing cluster on the volume is upgraded in place, not re-init'd
|
||||
- PGDATA=/var/lib/postgresql/data
|
||||
- POSTGRES_USER=plausible
|
||||
- POSTGRES_PASSWORD=plausible
|
||||
- POSTGRES_DB=plausible
|
||||
networks:
|
||||
- internal
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -U plausible -d plausible"]
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 60
|
||||
configs:
|
||||
- source: pg_backup
|
||||
target: /pg_backup.sh
|
||||
mode: 0555
|
||||
deploy:
|
||||
labels:
|
||||
backupbot.backup: "true"
|
||||
backupbot.backup.pre-hook: sh -c 'pg_dump -U "$$POSTGRES_USER" -Fc "$$POSTGRES_DB" | gzip > "/postgres.dump.gz"'
|
||||
backupbot.backup.path: "/postgres.dump.gz"
|
||||
backupbot.backup.post-hook: "rm -f /postgres.dump.gz"
|
||||
backupbot.backup.volumes.db-data.path: "postgres.dump.gz"
|
||||
backupbot.backup.pre-hook: "/pg_backup.sh backup"
|
||||
backupbot.backup.post-hook: "/pg_backup.sh backup_cleanup"
|
||||
backupbot.restore: "true"
|
||||
backupbot.restore.post-hook: sh -c 'gzip -d /postgres.dump.gz && pg_restore --clean -U "$$POSTGRES_USER" --dbname="$$PLAUSIBLE_DB" < /postgres.dump && rm -f /postgres.dump'
|
||||
backupbot.restore.post-hook: "/pg_backup.sh restore"
|
||||
|
||||
plausible_events_db:
|
||||
image: clickhouse/clickhouse-server:23.4.2.11-alpine
|
||||
@ -71,16 +79,19 @@ services:
|
||||
- source: clickhouse_entrypoint
|
||||
target: /custom-entrypoint.sh
|
||||
mode: 0555
|
||||
- source: clickhouse_backup
|
||||
target: /clickhouse_backup.sh
|
||||
mode: 0555
|
||||
networks:
|
||||
- internal
|
||||
deploy:
|
||||
labels:
|
||||
backupbot.backup: "true"
|
||||
backupbot.backup.pre-hook: clickhouse-backup create events
|
||||
backupbot.backup.path: "/var/lib/clickhouse/backup/events"
|
||||
backupbot.backup.post-hook: "rm -rf /var/lib/clickhouse/backup/events"
|
||||
backupbot.backup.volumes.event-data.path: "backup/events"
|
||||
backupbot.backup.pre-hook: "/clickhouse_backup.sh backup"
|
||||
backupbot.backup.post-hook: "/clickhouse_backup.sh backup_cleanup"
|
||||
backupbot.restore: "true"
|
||||
backupbot.restore.post-hook: clickhouse-backup restore --rm events && rm -rf /var/lib/clickhouse/backup/events"
|
||||
backupbot.restore.post-hook: "/clickhouse_backup.sh restore"
|
||||
|
||||
volumes:
|
||||
db-data:
|
||||
@ -98,10 +109,12 @@ configs:
|
||||
clickhouse-user-config:
|
||||
name: ${STACK_NAME}_clickhouse_user_config_${CLICKHOUSE_USER_CONF_VERSION}
|
||||
file: clickhouse-user-config.xml
|
||||
db_entrypoint:
|
||||
name: ${STACK_NAME}_db_entrypoint_${DB_ENTRYPOINT_VERSION}
|
||||
file: entrypoint.postgres.sh.tmpl
|
||||
template_driver: golang
|
||||
clickhouse_entrypoint:
|
||||
name: ${STACK_NAME}_clickhouse_entrypoint_${CLICKHOUSE_ENTRYPOINT_VERSION}
|
||||
file: entrypoint.clickhouse.sh
|
||||
pg_backup:
|
||||
name: ${STACK_NAME}_pg_backup_${PG_BACKUP_VERSION}
|
||||
file: pg_backup.sh
|
||||
clickhouse_backup:
|
||||
name: ${STACK_NAME}_clickhouse_backup_${CLICKHOUSE_BACKUP_SCRIPT_VERSION}
|
||||
file: clickhouse_backup.sh
|
||||
|
||||
@ -1,6 +1,11 @@
|
||||
#!/bin/bash
|
||||
# Install clickhouse-backup (powers this recipe's backup/restore hooks) before starting the
|
||||
# server. The binary is cached on the persistent volume keyed by version (downloaded at most
|
||||
# once per app) and fetched with bounded retries + a read timeout; the binary is verified before
|
||||
# being trusted or cached. If it truly cannot be installed the deploy fails loudly rather than
|
||||
# silently shipping broken backups.
|
||||
|
||||
set -ex
|
||||
set -e
|
||||
|
||||
CLICKHOUSE_BACKUP_VERSION=2.4.2
|
||||
|
||||
@ -17,13 +22,38 @@ elif [[ $ARCH =~ "x86_64" ]]; then
|
||||
ARCH="amd64"
|
||||
fi
|
||||
|
||||
wget \
|
||||
--quiet \
|
||||
--continue \
|
||||
--no-clobber \
|
||||
--output-document=/tmp/clickhouse-backup.tar.gz \
|
||||
"https://github.com/AlexAkulov/clickhouse-backup/releases/download/v${CLICKHOUSE_BACKUP_VERSION}/clickhouse-backup-linux-${ARCH}.tar.gz" 2>/dev/null
|
||||
CACHE_DIR=/var/lib/clickhouse/.ccci-bin
|
||||
CACHED="${CACHE_DIR}/clickhouse-backup-v${CLICKHOUSE_BACKUP_VERSION}"
|
||||
BIN=/usr/local/bin/clickhouse-backup
|
||||
URL="https://github.com/Altinity/clickhouse-backup/releases/download/v${CLICKHOUSE_BACKUP_VERSION}/clickhouse-backup-linux-${ARCH}.tar.gz"
|
||||
|
||||
tar -xf /tmp/clickhouse-backup.tar.gz --directory=/usr/local/bin --strip-components=3
|
||||
binary_ok() {
|
||||
"$1" --version >/dev/null 2>&1
|
||||
}
|
||||
|
||||
/entrypoint.sh
|
||||
install_clickhouse_backup() {
|
||||
mkdir -p "$CACHE_DIR"
|
||||
if [ -x "$CACHED" ] && binary_ok "$CACHED"; then
|
||||
cp -f "$CACHED" "$BIN"
|
||||
echo "clickhouse-backup: using verified cached binary ($CACHED)"
|
||||
return 0
|
||||
fi
|
||||
rm -f "$CACHED" # absent or fails to execute — re-fetch
|
||||
for attempt in 1 2 3 4 5; do
|
||||
if wget -T 30 --continue --output-document=/tmp/clickhouse-backup.tar.gz "$URL" \
|
||||
&& tar -xf /tmp/clickhouse-backup.tar.gz --directory=/usr/local/bin --strip-components=3 \
|
||||
&& binary_ok "$BIN"; then
|
||||
cp -f "$BIN" "$CACHED" 2>/dev/null || true
|
||||
echo "clickhouse-backup: downloaded, verified + cached (attempt ${attempt})"
|
||||
return 0
|
||||
fi
|
||||
echo "clickhouse-backup: fetch attempt ${attempt}/5 failed" >&2
|
||||
[ "$attempt" -lt 5 ] && sleep $((attempt * 10))
|
||||
done
|
||||
echo "clickhouse-backup: could not install after 5 attempts — failing the deploy (without it backup/restore would be silently broken)" >&2
|
||||
return 1
|
||||
}
|
||||
|
||||
install_clickhouse_backup
|
||||
|
||||
exec /entrypoint.sh
|
||||
|
||||
@ -1,44 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -e
|
||||
|
||||
MIGRATION_MARKER=$PGDATA/migration_in_progress
|
||||
OLDDATA=$PGDATA/old_data
|
||||
NEWDATA=$PGDATA/new_data
|
||||
|
||||
if [ -e $MIGRATION_MARKER ]; then
|
||||
echo "FATAL: migration was started but did not complete in a previous run. manual recovery necessary"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ -f $PGDATA/PG_VERSION ]; then
|
||||
DATA_VERSION=$(cat $PGDATA/PG_VERSION)
|
||||
|
||||
if [ -n "$DATA_VERSION" -a "$PG_MAJOR" != "$DATA_VERSION" ]; then
|
||||
echo "postgres data version $DATA_VERSION found, but need $PG_MAJOR. Starting migration"
|
||||
echo "Installing postgres $DATA_VERSION"
|
||||
sed -i "s/$/ $DATA_VERSION/" /etc/apt/sources.list.d/pgdg.list
|
||||
apt-get update && apt-get install -y --no-install-recommends \
|
||||
postgresql-$DATA_VERSION \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
echo "shuffling around"
|
||||
gosu postgres mkdir $OLDDATA $NEWDATA
|
||||
chmod 700 $OLDDATA $NEWDATA
|
||||
mv $PGDATA/* $OLDDATA/ || true
|
||||
touch $MIGRATION_MARKER
|
||||
echo "running initdb"
|
||||
# abuse entrypoint script for initdb by making server error out
|
||||
gosu postgres bash -c "export PGDATA=$NEWDATA ; /usr/local/bin/docker-entrypoint.sh --invalid-arg || true"
|
||||
echo "running pg_upgrade"
|
||||
cd /tmp
|
||||
gosu postgres pg_upgrade --link -b /usr/lib/postgresql/$DATA_VERSION/bin -d $OLDDATA -D $NEWDATA -U $POSTGRES_USER
|
||||
cp $OLDDATA/pg_hba.conf $NEWDATA/
|
||||
mv $NEWDATA/* $PGDATA
|
||||
rm -rf $OLDDATA
|
||||
rmdir $NEWDATA
|
||||
rm $MIGRATION_MARKER
|
||||
echo "migration complete"
|
||||
fi
|
||||
fi
|
||||
|
||||
/usr/local/bin/docker-entrypoint.sh postgres
|
||||
29
pg_backup.sh
Normal file
29
pg_backup.sh
Normal file
@ -0,0 +1,29 @@
|
||||
#!/bin/sh
|
||||
|
||||
set -e
|
||||
|
||||
# The dump lives at the db-data volume root: backup-bot-two v2 snapshots paths inside
|
||||
# named volumes (backupbot.backup.volumes.db-data.path), not the container root fs.
|
||||
DUMP=/var/lib/postgresql/data/postgres.dump
|
||||
|
||||
backup() {
|
||||
pg_dump -U "$POSTGRES_USER" -Fc "$POSTGRES_DB" | gzip > "$DUMP.gz"
|
||||
}
|
||||
|
||||
backup_cleanup() {
|
||||
rm -f "$DUMP.gz"
|
||||
}
|
||||
|
||||
restore() {
|
||||
gzip -d "$DUMP.gz"
|
||||
# --if-exists: otherwise DROPs on objects absent from the live db error out and
|
||||
# pg_restore exits 1, killing the chain and leaving the dump behind.
|
||||
pg_restore --clean --if-exists -U "$POSTGRES_USER" --dbname="$POSTGRES_DB" < "$DUMP"
|
||||
rm -f "$DUMP"
|
||||
# pg_restore --clean recreates objects under the live app, so its pooled connections
|
||||
# keep stale type-OID caches ('cache lookup failed for type ...' crash loops, e.g.
|
||||
# Oban). Terminate them so Ecto reconnects fresh.
|
||||
psql -U "$POSTGRES_USER" -d "$POSTGRES_DB" -c "SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE datname = current_database() AND pid <> pg_backend_pid();"
|
||||
}
|
||||
|
||||
"$@"
|
||||
Reference in New Issue
Block a user