fix(2): ghost DEPLOY_TIMEOUT/TIMEOUT 1200->2400 — MySQL cold-boot migration + healthcheck-kill+retry needs >20min on slow node (install timed out as it converged)

This commit is contained in:
2026-05-30 04:41:59 +01:00
parent fca4866ea1
commit bdaeb41496

View File

@ -8,10 +8,15 @@
# mysqldump pre-hook; P4 (ops.py + test_{backup,restore,upgrade}.py) seeds a `ci_marker` row there.
HEALTH_PATH = "/" # Ghost serves a themed site HTML at root (200)
HEALTH_OK = (200,)
DEPLOY_TIMEOUT = 1200 # subprocess timeout for `abra app deploy` (cold-start ghost ~15-20min)
DEPLOY_TIMEOUT = 2400 # subprocess timeout for `abra app deploy`
HTTP_TIMEOUT = 900
# Ghost's first-boot does theme + DB migrations against a fresh MySQL `ghost` DB; default TIMEOUT=300
# (abra's internal convergence wait) is too tight on cc-ci's single node. Bump to 1200s, matched
# to DEPLOY_TIMEOUT so abra finishes its convergence wait before the Python subprocess timeout.
EXTRA_ENV = {"TIMEOUT": "1200"}
# Ghost's first-boot does a full schema migration (dozens of tables) against a fresh MySQL `ghost`
# DB. On cc-ci's slow single node this takes ~6min, during which the recipe healthcheck
# (start_period 1m → ~5min grace) marks the still-booting task unhealthy and swarm kills it; the
# NEXT task finds the schema already created and boots fast → converges. But the first task's
# migration + the early MySQL-not-ready (`exit 2`) app restarts can eat ~18min, so the default 1200s
# convergence wait timed out right as it was converging. Bump to 2400s (matched to DEPLOY_TIMEOUT) so
# the post-migration fast-boot task has room to converge within one deploy (the volume persists
# across the in-deploy task restarts). Documented as heavy-recipe cold-boot fragility in DECISIONS.
EXTRA_ENV = {"TIMEOUT": "2400"}