From bdaeb41496320b62cd8df6da60ba4e4abbcde453 Mon Sep 17 00:00:00 2001
From: autonomic-bot <maxf.account@proton.me>
Date: Sat, 30 May 2026 04:41:59 +0100
Subject: [PATCH] =?UTF-8?q?fix(2):=20ghost=20DEPLOY=5FTIMEOUT/TIMEOUT=2012?=
 =?UTF-8?q?00->2400=20=E2=80=94=20MySQL=20cold-boot=20migration=20+=20heal?=
 =?UTF-8?q?thcheck-kill+retry=20needs=20>20min=20on=20slow=20node=20(insta?=
 =?UTF-8?q?ll=20timed=20out=20as=20it=20converged)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/ghost/recipe_meta.py | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/tests/ghost/recipe_meta.py b/tests/ghost/recipe_meta.py
index 8236224..dccdac5 100644
--- a/tests/ghost/recipe_meta.py
+++ b/tests/ghost/recipe_meta.py
@@ -8,10 +8,15 @@
 # mysqldump pre-hook; P4 (ops.py + test_{backup,restore,upgrade}.py) seeds a `ci_marker` row there.
 HEALTH_PATH = "/"  # Ghost serves a themed site HTML at root (200)
 HEALTH_OK = (200,)
-DEPLOY_TIMEOUT = 1200  # subprocess timeout for `abra app deploy` (cold-start ghost ~15-20min)
+DEPLOY_TIMEOUT = 2400  # subprocess timeout for `abra app deploy`
 HTTP_TIMEOUT = 900
 
-# Ghost's first-boot does theme + DB migrations against a fresh MySQL `ghost` DB; default TIMEOUT=300
-# (abra's internal convergence wait) is too tight on cc-ci's single node. Bump to 1200s, matched
-# to DEPLOY_TIMEOUT so abra finishes its convergence wait before the Python subprocess timeout.
-EXTRA_ENV = {"TIMEOUT": "1200"}
+# Ghost's first-boot does a full schema migration (dozens of tables) against a fresh MySQL `ghost`
+# DB. On cc-ci's slow single node this takes ~6min, during which the recipe healthcheck
+# (start_period 1m → ~5min grace) marks the still-booting task unhealthy and swarm kills it; the
+# NEXT task finds the schema already created and boots fast → converges. But the first task's
+# migration + the early MySQL-not-ready (`exit 2`) app restarts can eat ~18min, so the default 1200s
+# convergence wait timed out right as it was converging. Bump to 2400s (matched to DEPLOY_TIMEOUT) so
+# the post-migration fast-boot task has room to converge within one deploy (the volume persists
+# across the in-deploy task restarts). Documented as heavy-recipe cold-boot fragility in DECISIONS.
+EXTRA_ENV = {"TIMEOUT": "2400"}