From 0f2cc2d704816fd5eabbcc3c11d305b792d28109 Mon Sep 17 00:00:00 2001 From: autonomic-bot Date: Sat, 30 May 2026 17:20:20 +0100 Subject: [PATCH] =?UTF-8?q?feat(2):=20ghost=20F2-14b=20overlay=20migration?= =?UTF-8?q?=20=E2=80=94=20start=5Fperiod=20bump=20moved=20to=20recipe-PR?= =?UTF-8?q?=20(ghost#1=20head=20ae43ffe,=20literal=2015m=20on=20app=20heal?= =?UTF-8?q?thcheck);=20DELETE=20cc-ci=20compose.ccci-health.yml=20+=20inst?= =?UTF-8?q?all=5Fsteps.sh=20+=20COMPOSE=5FFILE/CHAOS=5FBASE=5FDEPLOY.=20An?= =?UTF-8?q?ti-drift=20(plan=20=C2=A79):=20recipe-as-tested=20=3D=3D=20reci?= =?UTF-8?q?pe-as-published.=20env-var=20start=5Fperiod=20impossible=20(abr?= =?UTF-8?q?a=20pre-subst=20duration=20validation,=20Adversary-reproduced?= =?UTF-8?q?=204b862f6).=20Next:=20run=20ghost=20on=20ae43ffe=20head.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/ghost/compose.ccci-health.yml | 18 ------------------ tests/ghost/install_steps.sh | 26 -------------------------- tests/ghost/recipe_meta.py | 28 +++++++++++++++++----------- 3 files changed, 17 insertions(+), 55 deletions(-) delete mode 100644 tests/ghost/compose.ccci-health.yml delete mode 100755 tests/ghost/install_steps.sh diff --git a/tests/ghost/compose.ccci-health.yml b/tests/ghost/compose.ccci-health.yml deleted file mode 100644 index 7ec2b1a..0000000 --- a/tests/ghost/compose.ccci-health.yml +++ /dev/null @@ -1,18 +0,0 @@ -# cc-ci deploy overlay (NOT a recipe change) — raises ONLY the app healthcheck start_period. -# -# Ghost's first-boot runs a full schema migration (dozens of CREATE TABLEs, each a separate MySQL -# round-trip → ~6-9min on cc-ci) against the fresh `ghost` DB. The upstream recipe healthcheck uses -# `start_period: 1m` (+ 10×30s retries ≈ 6min grace); on cc-ci the migration regularly exceeds that, -# so swarm marks the still-migrating task unhealthy and KILLS it mid-migration — which leaves a stale -# `migrations_lock` row, and every later task then refuses to boot (`MigrationsAreLockedError` -# deadlock). This is round-trip-bound, so more vCPU does not close the gap. -# -# Raising the START_PERIOD (failures ignored during it; a PASS still marks healthy immediately) lets -# the fresh migration finish + release the lock, after which Ghost serves and the (unchanged) check -# passes. This is DEPLOY/infra tuning, not a test change — no assertion is weakened, and the app's -# real healthcheck still gates readiness. Applied via recipe_meta COMPOSE_FILE; only the install -# tier's fresh migration needs it (the upgrade redeploy boots on the already-populated DB → fast). -services: - app: - healthcheck: - start_period: 900s diff --git a/tests/ghost/install_steps.sh b/tests/ghost/install_steps.sh deleted file mode 100755 index d816bce..0000000 --- a/tests/ghost/install_steps.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/usr/bin/env bash -# ghost — INSTALL-TIME hook (Phase 2 Q4.4). Runs during the install tier AFTER `abra app new` + -# EXTRA_ENV + `abra app secret generate` and BEFORE the single `abra app deploy` -# (lifecycle.py::_run_install_steps), with CCCI_RECIPE / CCCI_APP_DOMAIN / CCCI_APP_ENV in env. -# -# Purpose: provide the cc-ci deploy overlay `compose.ccci-health.yml` (app healthcheck start_period -# bump) into the recipe checkout so recipe_meta's COMPOSE_FILE (compose.yml:compose.ccci-health.yml) -# resolves. Without the larger start_period, Ghost's ~6-9min fresh-DB migration is killed mid-flight -# by the recipe's 1m-start_period healthcheck, leaving a stale migrations_lock → deadlock (see the -# overlay file header). The overlay is an UNTRACKED file in the recipe repo, so `git checkout -f` -# (the upgrade tier's re-checkout to PR head) preserves it — COMPOSE_FILE keeps resolving across -# install AND upgrade deploys. CHAOS_BASE_DEPLOY=True (recipe_meta) lets the pinned base deploy -# proceed despite this untracked file (abra's clean-tree check would otherwise FATA). -set -euo pipefail - -: "${CCCI_RECIPE:?missing CCCI_RECIPE}" -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -RECIPE_DIR="${HOME}/.abra/recipes/${CCCI_RECIPE}" - -if [ ! -d "$RECIPE_DIR" ]; then - echo " ghost install_steps: recipe dir $RECIPE_DIR missing — cannot provide health overlay" >&2 - exit 1 -fi - -cp "$SCRIPT_DIR/compose.ccci-health.yml" "$RECIPE_DIR/compose.ccci-health.yml" -echo " ghost install_steps: provided compose.ccci-health.yml (healthcheck start_period bump) to ${CCCI_RECIPE}" diff --git a/tests/ghost/recipe_meta.py b/tests/ghost/recipe_meta.py index e007239..e55e339 100644 --- a/tests/ghost/recipe_meta.py +++ b/tests/ghost/recipe_meta.py @@ -12,17 +12,23 @@ DEPLOY_TIMEOUT = 1200 # subprocess timeout for `abra app deploy` HTTP_TIMEOUT = 900 # Ghost's fresh-DB first boot runs a full schema migration (dozens of CREATE TABLEs, each a separate -# MySQL round-trip → ~6-9min on cc-ci, round-trip-bound so more vCPU doesn't help). The upstream -# recipe healthcheck (`start_period: 1m` + 10×30s ≈ 6min grace) is too tight: swarm kills the still- -# migrating task, leaving a stale `migrations_lock` → every later task deadlocks -# (`MigrationsAreLockedError`). cc-ci provides a DEPLOY overlay `compose.ccci-health.yml` (raises the -# app healthcheck start_period to 900s; failures ignored during it, a PASS still marks healthy at -# once) via COMPOSE_FILE + install_steps.sh, so the fresh migration finishes + releases the lock. -# This is infra/deploy tuning — NO test/assertion is weakened. CHAOS_BASE_DEPLOY lets the pinned base -# deploy proceed with the untracked overlay present. TIMEOUT 1200s = migration (≤9min) + convergence, -# bounded so a genuine failure still fails (not a long blackout). See DECISIONS (ghost MySQL cold-boot). -CHAOS_BASE_DEPLOY = True +# MySQL round-trip → ~6-9min on cc-ci, round-trip-bound so more vCPU doesn't help). The published +# recipe healthcheck used `start_period: 1m` (+10×30s ≈ 6min grace) — too tight on cc-ci: swarm kills +# the still-migrating task, leaving a stale `migrations_lock` → every later task deadlocks +# (`MigrationsAreLockedError`). +# +# FIXED IN THE RECIPE-PR (recipe-maintainers/ghost#1, branch ci/mysql-backup): the app-service +# healthcheck `start_period` is bumped to a literal 15m in the recipe itself — the real recipe +# everyone runs, NOT a cc-ci compose fork. This is the plan §9 / plan-prefer-env-over-compose-overlay.md +# anti-drift path: start_period CANNOT be expressed as an env var (abra validates the literal compose +# 'duration' format BEFORE env substitution — `${VAR}` / `"${VAR:-1m}"` → FATA 'Does not match format +# duration'; reproduced by the Adversary, REVIEW-2 4b862f6), so a literal recipe-PR bump is the only +# §9-compliant way to widen it. Precedent: discourse + lasuite-drive collabora start_period recipe-PRs. +# start_period only widens the startup grace window (a healthy check still marks healthy at once → fast +# hosts unaffected); NO test/assertion is weakened. With the bump in the recipe, the former cc-ci +# DEPLOY overlay (`compose.ccci-health.yml` + `install_steps.sh` + COMPOSE_FILE + CHAOS_BASE_DEPLOY) +# is DELETED. TIMEOUT 1200s = migration (≤9min) + convergence, bounded so a genuine failure still +# fails (not a long blackout). See DECISIONS (ghost MySQL cold-boot / start_period recipe-PR). EXTRA_ENV = { "TIMEOUT": "1200", - "COMPOSE_FILE": "compose.yml:compose.ccci-health.yml", }