diff --git a/tests/discourse/compose.ccci.yml b/tests/discourse/compose.ccci.yml new file mode 100644 index 0000000..1053580 --- /dev/null +++ b/tests/discourse/compose.ccci.yml @@ -0,0 +1,29 @@ +--- +version: "3.8" +# cc-ci overlay (Phase 2 Q4.6) — minimal, single-purpose: make the UPGRADE-tier BASE deploy (the +# previous published discourse version) deployable so upgrade-to-latest can run. +# +# WHY THIS OVERLAY EXISTS (plan-ccci-compose-overlay-policy.md §1 "minimal justified fallback" + +# the §1 mandate that upgrade-to-latest must ALWAYS run): the harness base-deploys the from-version +# (UPGRADE_BASE_VERSION = 0.7.0+3.3.1), then `deploy --chaos` to the recipe-PR head. Two blockers on +# that published base, both resolved here, NEITHER weakening any test: +# 1. RE-PIN: every published discourse tag pins `bitnami/discourse:3.3.1` (and 0.6.3 → 3.1.2), +# but Docker Hub REMOVED the bitnami/discourse namespace (404). The recipe-PR (recipe-maintainers/ +# discourse#1) re-pins app+sidekiq to `bitnamilegacy/discourse:3.3.1` (the legit upstream +# relocation of the identical image). This overlay applies the SAME namespace-only re-pin to the +# BASE 0.7.0 (identical version 3.3.1, identical image content) so the from-version pulls — exactly +# the policy-blessed "minimal bitnami→bitnamilegacy re-pin overlay on the 0.7.0 from-version". +# 2. GRACE: discourse's Rails cold first boot (DB migrate + asset precompile) is 15-25min on cc-ci, +# exceeding the published 5m start_period → swarm kills the still-booting app. start_period CANNOT +# be an env var (abra validates the literal 'duration' BEFORE substitution → FATA; Adversary- +# reproduced, REVIEW-2 4b862f6), so we widen it to a literal 20m on the BASE. The PR head already +# ships 20m, so this overlay is idempotent on the head (it persists untracked across the checkout). +# Both changes are namespace/grace-only: identical image content, a healthy check still marks healthy +# immediately → NO assertion is weakened and no defect is masked. +services: + app: + image: bitnamilegacy/discourse:3.3.1 + healthcheck: + start_period: 20m + sidekiq: + image: bitnamilegacy/discourse:3.3.1 diff --git a/tests/discourse/install_steps.sh b/tests/discourse/install_steps.sh new file mode 100755 index 0000000..330a5cc --- /dev/null +++ b/tests/discourse/install_steps.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash +# discourse — INSTALL-TIME hook (Phase 2 Q4.6). Runs during the install tier AFTER `abra app new` + +# EXTRA_ENV + `abra app secret generate` and BEFORE the single `abra app deploy` +# (lifecycle.py::_run_install_steps), with CCCI_RECIPE / CCCI_APP_DOMAIN in env. +# +# Purpose: provide the cc-ci re-pin+grace overlay (compose.ccci.yml) to the recipe checkout so the +# UPGRADE-tier BASE deploy (published 0.7.0+3.3.1, whose compose pins the Docker-Hub-removed +# `bitnami/discourse:3.3.1` and ships a too-tight 5m start_period) is deployable and can survive the +# 15-25min Rails cold boot — so upgrade-to-latest can run. See compose.ccci.yml's header for the full +# rationale. The overlay is referenced by recipe_meta COMPOSE_FILE; it is a cc-ci file (not part of the +# recipe), so copying it here makes it resolvable. It persists across the later `git checkout ` +# (untracked) so the head deploy also merges it (idempotent — the PR head already re-pins + ships 20m). +# CHAOS_BASE_DEPLOY=True is set so abra's pinned-deploy clean-tree check doesn't FATA on the overlay. +set -euo pipefail + +: "${CCCI_RECIPE:?missing CCCI_RECIPE}" +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +RECIPE_DIR="${HOME}/.abra/recipes/${CCCI_RECIPE}" + +if [ ! -d "$RECIPE_DIR" ]; then + echo " discourse install_steps: recipe dir $RECIPE_DIR missing — cannot provide compose.ccci.yml" >&2 + exit 1 +fi + +cp "$SCRIPT_DIR/compose.ccci.yml" "$RECIPE_DIR/compose.ccci.yml" +echo " discourse install_steps: provided compose.ccci.yml (bitnamilegacy re-pin + 20m start_period grace) to recipe checkout (${CCCI_RECIPE})" diff --git a/tests/discourse/recipe_meta.py b/tests/discourse/recipe_meta.py index 2505c45..bfe2280 100644 --- a/tests/discourse/recipe_meta.py +++ b/tests/discourse/recipe_meta.py @@ -9,27 +9,30 @@ HEALTH_OK = (200,) DEPLOY_TIMEOUT = 2400 # slow Rails cold boot (15-25min); matches the EXTRA_ENV TIMEOUT below HTTP_TIMEOUT = 1200 -# Slow-cold-boot handling via a LITERAL recipe-PR start_period bump, NOT a cc-ci compose overlay -# (plan.md §9 anti-drift guardrail). discourse's 15-25min Rails cold boot exceeds the recipe -# healthcheck's default start_period (5m) + grace, so swarm would kill the still-booting app and the -# deploy never converges. §9 pt1 prefers exposing such a value as an env var — but abra REJECTS -# env-interpolation in healthcheck `start_period` (`FATA ...Does not match format 'duration'` for both -# `${VAR}` and quoted `"${VAR:-5m}"`; it validates the literal compose duration before substitution, -# and no catalogue recipe env-interpolates start_period). So the §9-compliant fix is a LITERAL bump in -# the recipe-PR (recipe-maintainers/discourse#1): `start_period: 20m` on the app healthcheck — a change -# to the recipe EVERYONE runs (not a cc-ci fork), and strictly safer (start_period only widens the -# startup grace; a healthy check still marks healthy immediately, so fast hosts are unaffected). -# Precedent: the lasuite-drive collabora start_period recipe-PR. (See DECISIONS.md 2026-05-30.) +# Slow-cold-boot handling: the recipe-PR (recipe-maintainers/discourse#1) bumps the app healthcheck +# `start_period` to a LITERAL 20m for the HEAD. discourse's 15-25min Rails cold boot (DB migrate + +# asset precompile) exceeds the published 5m start_period → swarm would kill the still-booting app. +# start_period CANNOT be an env var (abra validates the literal compose 'duration' BEFORE substitution +# → `FATA ...Does not match format 'duration'`; Adversary-reproduced, REVIEW-2 4b862f6), so a literal +# recipe-PR bump is the only §9-compliant way to widen it. start_period is grace-only (a healthy check +# still marks healthy immediately → fast hosts unaffected). Precedent: lasuite-drive collabora PR. # TIMEOUT (abra's internal convergence wait) is raised to outlast the boot. +# +# UPGRADE-tier BASE (compose.ccci.yml + UPGRADE_BASE_VERSION): upgrade-to-latest must ALWAYS run +# (plan-ccci-compose-overlay-policy.md §1). The from-version is the latest published 0.7.0+3.3.1 +# (UPGRADE_BASE_VERSION below; the PR head is 0.7.0-based, so 0.7.0 is the true predecessor — not the +# default [-2]=0.6.3). The published 0.7.0 has TWO blockers, both resolved by the policy-blessed +# minimal base overlay compose.ccci.yml (see its header), neither weakening a test: +# (1) it pins the Docker-Hub-removed `bitnami/discourse:3.3.1` (404) → overlay re-pins app+sidekiq to +# `bitnamilegacy/discourse:3.3.1` (namespace-only, identical image), the same re-pin the PR makes; +# (2) its 5m start_period is too tight for the 15-25min Rails boot → overlay widens it to 20m (grace). +# install_steps.sh provides the overlay; CHAOS_BASE_DEPLOY skips the clean-tree gate on the untracked +# overlay; it persists across the head checkout (idempotent — the PR head already re-pins + ships 20m). +# Upgrade crossover: 0.7.0 (re-pinned base) → PR head; full assertions run on the HEAD. The 0.7.0 +# *custom* tests are not separately run (custom tier runs once, on the head — policy §1 allows skip+record). +CHAOS_BASE_DEPLOY = True +UPGRADE_BASE_VERSION = "0.7.0+3.3.1" EXTRA_ENV = { "TIMEOUT": "2400", + "COMPOSE_FILE": "compose.yml:compose.ccci.yml", } - -# Upgrade tier — N/A (declared NOT-TESTABLE under cc-ci; Adversary §7.1 sign-off GRANTED, REVIEW-2 -# efe3790). Both published predecessor versions pin Docker-Hub-removed images: -# 0.7.0+3.3.1 → bitnami/discourse:3.3.1 (404), 0.6.3+3.1.2 → bitnami/discourse:3.1.2 (404). -# The recipe-PR re-pins the HEAD to bitnamilegacy/discourse:3.3.1 (a legit upstream fix), but per -# plan.md §9 / plan-prefer-env-over-compose-overlay.md pt2 we declare an old base whose image is gone -# NOT-TESTABLE rather than authoring an image-repin compose overlay to resurrect it. So no honest -# prev→head crossover is deployable here → the upgrade tier is omitted (run STAGES without `upgrade`). -# (P1 coverage is the maximal subset install+backup+restore+custom; P4 restore-hook is the headline.)