feat(prevb): dynamic upgrade base (last-green→main→skip) + per-recipe previous/ overlay; migrate discourse off static base + leaky overlay
All checks were successful
continuous-integration/drone/push Build is passing

- resolve_upgrade_base: BasePlan(kind=version|ref|skip); last-green (warm canonical) primary,
  main-tip fallback, declared skip else. UPGRADE_BASE_VERSION retained as optional override.
- deploy_app: base_ref path (chaos-deploy a main-tip/last-green commit) + apply_previous wiring.
- lifecycle: previous/ surface (has_previous, previous_target_version, previous_status decision,
  provide/remove overlay, compose_file add/remove, recipe_branch_commit, stack_service_names).
- generic.perform_upgrade: strip previous/ overlay + COMPOSE_FILE entry before head redeploy.
- discourse: compose.ccci.yml now environmental-only (order: stop-first); removed bitnamilegacy
  pins + sidekiq + UPGRADE_BASE_VERSION; test_upgrade.py asserts head image == official 3.5.3 + no sidekiq.
- unit tests: resolve_upgrade_base matrix + previous/ apply/skip/stale + COMPOSE_FILE layering.
This commit is contained in:
autonomic-bot
2026-06-17 00:14:53 +00:00
parent 1090abb97a
commit bb2e3c6b2c
8 changed files with 532 additions and 137 deletions

View File

@ -251,6 +251,16 @@ def perform_upgrade(
before = lifecycle.deployed_identity(domain)
if head_ref:
lifecycle.recipe_checkout_ref(recipe, head_ref)
# Phase prevb: strip the base-only `previous/` overlay before the head redeploy so the PR head
# runs UNMODIFIED (the version-specific repair must never leak onto the head). Delete the copied
# compose.previous.yml and drop it from COMPOSE_FILE in the app .env. No-op when no previous/ was
# applied (the file/entry are absent). The environmental compose.ccci.yml stays (all deploys).
lifecycle.remove_previous_overlay(recipe)
cf_now = abra.env_get(domain, "COMPOSE_FILE")
if cf_now and lifecycle.PREVIOUS_COMPOSE in cf_now:
cf_stripped = lifecycle.compose_file_remove(cf_now, lifecycle.PREVIOUS_COMPOSE)
abra.env_set(domain, "COMPOSE_FILE", cf_stripped)
print(f" previous-overlay: COMPOSE_FILE for head redeploy = {cf_stripped}", flush=True)
# UPGRADE_EXTRA_ENV (F2-14c): a recipe may need different app .env for the upgrade-TARGET deploy
# than for the base — e.g. mumble's `compose.host-ports.yml` overlay exists ONLY in the newer
# (target) version, so the base deploys minimally WITHOUT it and the upgrade adds it to COMPOSE_FILE

View File

@ -154,6 +154,145 @@ def provide_ccci_overlay(recipe: str) -> None:
)
# ---------------------------------------------------------------------------------------------
# Phase prevb: dynamic upgrade base + per-recipe `previous/` overlay.
#
# `previous/` holds the MINIMAL config needed to deploy the *previous (last-green) version* when it
# can't deploy as-published (e.g. an image relocation). It is applied ONLY to the base deploy and
# ONLY when the resolved base is that exact published version; NEVER to the PR head; on a main-tip
# base or version mismatch it is skipped + flagged stale. Mechanism mirrors the environmental
# compose.ccci.yml overlay (copied untracked into the checkout, referenced via COMPOSE_FILE), but is
# stripped before the head redeploy so the head runs unmodified. (plan-phase-prevb §2.)
# ---------------------------------------------------------------------------------------------
PREVIOUS_COMPOSE = "compose.previous.yml"
def previous_dir(recipe: str) -> str:
return os.path.join(meta_mod.TESTS_DIR, recipe, "previous")
def has_previous(recipe: str) -> bool:
"""True iff the recipe ships a `tests/<recipe>/previous/` folder with a compose.previous.yml."""
return os.path.isfile(os.path.join(previous_dir(recipe), PREVIOUS_COMPOSE))
def previous_target_version(recipe: str) -> str | None:
"""The published version the recipe's `previous/` folder targets — declared in a one-line
`previous/VERSION` marker (first non-blank, non-`#` line). None if no marker. The harness applies
`previous/` ONLY when the resolved base equals this; otherwise the folder is stale and skipped."""
marker = os.path.join(previous_dir(recipe), "VERSION")
try:
with open(marker) as f:
for line in f:
s = line.strip()
if s and not s.startswith("#"):
return s
except OSError:
return None
return None
def previous_status(recipe: str, base_kind: str, base_version: str | None) -> dict:
"""Decide whether `previous/` applies to this base, as a pure decision (unit-tested).
Returns {apply, stale, reason}:
- no `previous/` folder → apply=False, stale=False (nothing to do).
- base is not a pinned published → apply=False, stale=True (main-tip / no base): previous/ can
version (kind != "version") only repair a published base; flag for review.
- no `previous/VERSION` marker → apply=False, stale=True (undeclared: cannot version-guard).
- marker != resolved base version → apply=False, stale=True (stale: targets X, base is Y).
- marker == resolved base version → apply=True, stale=False.
"""
if not has_previous(recipe):
return {"apply": False, "stale": False, "reason": ""}
if base_kind != "version" or not base_version:
return {
"apply": False,
"stale": True,
"reason": (
f"previous/ present but the resolved base is not a pinned published version "
f"(base_kind={base_kind}) — previous/ only repairs a published base; not applied"
),
}
target = previous_target_version(recipe)
if not target:
return {
"apply": False,
"stale": True,
"reason": "previous/ has no VERSION marker — cannot version-guard; not applied",
}
if target != base_version:
return {
"apply": False,
"stale": True,
"reason": f"previous/ targets {target}, base is {base_version} — stale, remove it",
}
return {"apply": True, "stale": False, "reason": ""}
def provide_previous_overlay(recipe: str) -> None:
"""Copy `tests/<recipe>/previous/compose.previous.yml` into THIS run's recipe checkout so a
COMPOSE_FILE reference to it resolves (base deploy only). No-op if absent."""
src = os.path.join(previous_dir(recipe), PREVIOUS_COMPOSE)
if not os.path.isfile(src):
return
dest_dir = abra.recipe_dir(recipe)
if not os.path.isdir(dest_dir):
raise RuntimeError(f"recipe checkout missing for {recipe}: {dest_dir}")
shutil.copy(src, os.path.join(dest_dir, PREVIOUS_COMPOSE))
print(
f" previous-overlay: provided {PREVIOUS_COMPOSE} to the {recipe} base checkout "
"(base-only; stripped before the head redeploy)",
flush=True,
)
def remove_previous_overlay(recipe: str) -> None:
"""Delete compose.previous.yml from this run's recipe checkout (called before the head redeploy so
the PR head NEVER sees the previous-version repair). No-op if absent."""
p = os.path.join(abra.recipe_dir(recipe), PREVIOUS_COMPOSE)
with contextlib.suppress(OSError):
if os.path.isfile(p):
os.remove(p)
print(
f" previous-overlay: removed {PREVIOUS_COMPOSE} from the head checkout", flush=True
)
def compose_file_add(compose_file: str, overlay: str) -> str:
"""Append `overlay` to a ':'-separated COMPOSE_FILE value if absent (pure). Defaults a missing
value to `compose.yml` first, matching abra."""
parts = [p for p in (compose_file or "").split(":") if p] or ["compose.yml"]
if overlay not in parts:
parts.append(overlay)
return ":".join(parts)
def compose_file_remove(compose_file: str, overlay: str) -> str:
"""Remove `overlay` from a ':'-separated COMPOSE_FILE value (pure). Keeps order; defaults a now-
empty value to `compose.yml`."""
parts = [p for p in (compose_file or "").split(":") if p and p != overlay]
return ":".join(parts or ["compose.yml"])
def recipe_branch_commit(recipe: str, branch: str = "main") -> str | None:
"""Resolve the recipe repo's target-branch tip (the predecessor the PR merges onto) to a commit
SHA, for the dynamic upgrade-base main-tip fallback. The per-run tree is a full clone of the
mirror, so `origin/<branch>` is present. Tries origin/<branch>, then origin/master. None if
neither resolves (new recipe / detached state)."""
path = abra.recipe_dir(recipe)
for ref in (f"origin/{branch}", "origin/master"):
proc = subprocess.run(
["git", "-C", path, "rev-parse", "--verify", "--quiet", ref],
capture_output=True,
text=True,
)
if proc.returncode == 0 and proc.stdout.strip():
return proc.stdout.strip()
return None
def _run_install_steps(hook: tuple[str, str], recipe: str, domain: str) -> None:
"""Run a recipe's custom install-steps hook (install_steps.sh) during the install tier — after
`abra app new` + env defaults + secret generate, before deploy (Phase 1d DG5). The hook gets the
@ -234,6 +373,8 @@ def deploy_app(
recipe: str,
domain: str,
version: str | None = None,
base_ref: str | None = None,
apply_previous: bool = False,
secrets: bool = True,
install_steps_hook: tuple[str, str] | None = None,
deploy_timeout: int = 900,
@ -273,7 +414,18 @@ def deploy_app(
# Adversary F1d-2). Chaos is correct ONLY for the version=None case (deploy the current PR-head
# checkout). Order matters: checkout before secret_generate (-C) so secrets match the pinned tree.
chaos = version is None
if version:
if base_ref:
# Dynamic upgrade base = target-branch (main) tip, or a last-green commit (phase prevb): an
# arbitrary git ref, not a published tag. Check it out and deploy via chaos — same mechanism
# as the head deploy (a non-tag ref would FATA abra's pinned-deploy lint/clean-tree gate).
recipe_checkout_ref(recipe, base_ref)
chaos = True
print(
f" deploy_app({recipe}): base = main-tip/ref {base_ref[:12]} → chaos deploy of the "
"checked-out ref (the PR's true predecessor; not a published pin)",
flush=True,
)
elif version:
abra.recipe_checkout(recipe, version)
# A pinned (non-chaos) deploy runs `abra recipe lint`, which FATAs R014 ('only annotated
# tags') if the upstream recipe ships a stray lightweight version tag (e.g. lasuite-meet's
@ -309,8 +461,19 @@ def deploy_app(
# it ourselves is recipe-agnostic and canonical (the run domain IS the app's domain).
abra.env_set(domain, "DOMAIN", domain)
abra.env_set(domain, "LETS_ENCRYPT_ENV", "")
for k, v in meta_mod.extra_env(meta, meta_mod.hook_ctx(domain, meta)).items():
extra = meta_mod.extra_env(meta, meta_mod.hook_ctx(domain, meta))
for k, v in extra.items():
abra.env_set(domain, k, v)
# Per-recipe `previous/` overlay (phase prevb): version-specific repair to deploy the *previous
# (last-green) version*, applied to the BASE deploy ONLY (the caller resolved apply=True only when
# the resolved base equals previous/VERSION). Appended on top of the recipe's COMPOSE_FILE (which
# may already include the environmental compose.ccci.yml). Stripped before the head redeploy
# (generic.perform_upgrade) so the PR head runs unmodified.
if apply_previous:
cf = compose_file_add(extra.get("COMPOSE_FILE", "compose.yml"), PREVIOUS_COMPOSE)
abra.env_set(domain, "COMPOSE_FILE", cf)
provide_previous_overlay(recipe)
print(f" previous-overlay: COMPOSE_FILE for base deploy = {cf}", flush=True)
if secrets:
abra.secret_generate(domain)
if install_steps_hook:
@ -332,6 +495,24 @@ def _stack_name(domain: str) -> str:
return domain.replace(".", "_")
def stack_service_names(domain: str) -> list[str]:
"""Short service names in this app's swarm stack (stack prefix stripped). Used by recipe overlay
assertions to prove a service was added/removed by an upgrade (e.g. discourse drops `sidekiq`)."""
stack = _stack_name(domain)
proc = subprocess.run(
["docker", "stack", "services", stack, "--format", "{{.Name}}"],
capture_output=True,
text=True,
)
names = []
for ln in proc.stdout.split("\n"):
n = ln.strip()
if not n:
continue
names.append(n[len(stack) + 1 :] if n.startswith(stack + "_") else n)
return names
def services_converged(domain: str) -> bool:
"""True when every service in the stack reports replicas N/N (N>0) AND no service is
mid-rolling-update (swarm UpdateStatus settled)."""

View File

@ -38,6 +38,7 @@ import subprocess
import sys
import tempfile
import time
from typing import NamedTuple
ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.insert(0, os.path.join(ROOT, "runner"))
@ -88,36 +89,66 @@ def sso_dep_unverified(declared, deps_ready: bool, requires_deps_skipped: int) -
return bool(declared) and not deps_ready and requires_deps_skipped > 0
def upgrade_base(stages, meta, recipe: str) -> str | None:
"""Deploy-once base version decision (pure given meta + the published-version lookup):
previous published version when the upgrade tier will run and one exists (so upgrade goes
previous→target in place), else None (the caller falls back to the target / PR head).
(DECISIONS.)
class BasePlan(NamedTuple):
"""Resolved upgrade-base decision (phase prevb). `kind`:
- "version" → deploy a pinned published version (`version`): an explicit UPGRADE_BASE_VERSION
override, or last-green (warm-canonical) version. `previous/` may apply (version-guarded).
- "ref" → deploy the target-branch (main) tip at commit `ref` (chaos): the true predecessor
the PR merges onto, used when there is no last-green. `previous/` never applies to a ref base.
- "skip" → no upgrade base; the single deploy is the PR head and the upgrade tier records a
declared skip with `reason` (upgrade∉stages / EXPECTED_NA / new recipe / head==main tip)."""
A recipe may override the base via recipe_meta UPGRADE_BASE_VERSION when the harness default
(recipe_versions[-2]) is NOT the PR's true predecessor — e.g. a PR that adds a version ABOVE the
newest published tag, where the correct base is [-1] (the newest published), not [-2]. The
override must be an exact published version tag (deployed as a pinned base). (Adversary §7.1.)
kind: str
version: str | None
ref: str | None
reason: str
A recipe that declares the upgrade rung in EXPECTED_NA gets NO base: published versions may
exist yet be genuinely undeployable — e.g. bluesky-pds, where every published tag pins the
moving image tag `:0.4` that upstream republished with incompatible main builds, so no
published version can come up as an upgrade base (phase bsky, DECISIONS). Deploying one would
fail the INSTALL tier before the PR-head code is ever exercised. With no base, the single
deploy is the PR head itself and the upgrade tier records "skip", which derive_rungs
classifies as the DECLARED intentional skip (reason from EXPECTED_NA — visible in
results.json `skips.intentional`, never reported as a pass)."""
@property
def runs(self) -> bool:
return self.kind in ("version", "ref")
def resolve_upgrade_base(stages, meta, recipe: str, head_ref: str | None = None) -> BasePlan:
"""Dynamic upgrade-base resolution (phase prevb, replaces the static `recipe_versions[-2]`
default). Order: explicit override → last-green (warm canonical) → target-branch (main) tip →
skip. EXPECTED_NA[upgrade] / upgrade∉stages short-circuit to a declared skip first.
last-green is the PRIMARY base — the version cc-ci last recorded green for this recipe (the
warm-canonical registry record). main-tip is the FALLBACK: the recipe repo's `main` HEAD, the
real predecessor the PR merges on top of, used when there is no last-green. Else the tier is
skipped with a recorded reason (structural, declared — not a silent pass).
`UPGRADE_BASE_VERSION` is RETAINED as an optional explicit override (wins when set) for the rare
PR-adds-a-version-above-the-newest-tag case; it is no longer the default (DECISIONS prevb)."""
if "upgrade" not in stages:
return None
if "upgrade" in (meta.EXPECTED_NA or {}):
return BasePlan("skip", None, None, "upgrade tier not in requested stages")
declared = (meta.EXPECTED_NA or {}).get("upgrade")
if declared:
print(
"== upgrade tier: declared EXPECTED_NA['upgrade'] — no upgrade base will be "
f"deployed; the single deploy is the target/PR head. Reason: "
f"{(meta.EXPECTED_NA or {}).get('upgrade')}",
f"== upgrade tier: declared EXPECTED_NA['upgrade'] — single deploy is the PR head. "
f"Reason: {declared}",
flush=True,
)
return None
return meta.UPGRADE_BASE_VERSION or lifecycle.previous_version(recipe)
return BasePlan("skip", None, None, f"declared EXPECTED_NA[upgrade]: {declared}")
override = getattr(meta, "UPGRADE_BASE_VERSION", None)
if override:
return BasePlan("version", override, None, "explicit UPGRADE_BASE_VERSION override")
rec = canonical.read_registry(recipe)
if rec and rec.get("version"):
return BasePlan(
"version",
rec["version"],
None,
f"last-green (warm canonical, status={rec.get('status')})",
)
main_tip = lifecycle.recipe_branch_commit(recipe, "main")
if main_tip and main_tip != head_ref:
return BasePlan("ref", None, main_tip, "target-branch (main) tip")
if main_tip and main_tip == head_ref:
return BasePlan("skip", None, None, "head == main tip (no predecessor delta)")
return BasePlan(
"skip", None, None, "no last-green and no main tip (new recipe / no predecessor)"
)
def _truthy(v: str | None) -> bool:
@ -952,8 +983,25 @@ def main() -> int:
domain = naming.app_domain(recipe, os.environ.get("PR", "0"), ref)
prev = upgrade_base(stages, meta, recipe)
base = prev or target
base_plan = resolve_upgrade_base(stages, meta, recipe, head_ref=head_ref)
prev = base_plan.runs # gates the upgrade tier
# base deploy target: a pinned published version (kind=version) or main-tip commit (kind=ref);
# on skip fall back to the run's VERSION/head (target=None → chaos head deploy, as before).
base = base_plan.version or target
base_ref = base_plan.ref
prev_status = lifecycle.previous_status(recipe, base_plan.kind, base_plan.version)
print(
f"== upgrade base: kind={base_plan.kind} "
f"{('version=' + base) if base_plan.kind == 'version' else ''}"
f"{('ref=' + (base_ref or '')[:12]) if base_plan.kind == 'ref' else ''}"
f"{(' SKIP: ' + base_plan.reason) if base_plan.kind == 'skip' else ''} "
f"({base_plan.reason if base_plan.kind != 'skip' else ''})",
flush=True,
)
if prev_status["stale"]:
print(f"!! previous/ STALE — {prev_status['reason']}", flush=True)
elif prev_status["apply"]:
print(f"== previous/ applies to the base deploy (targets {base})", flush=True)
backup_cap = generic.backup_capable(recipe, meta)
hook = discovery.install_steps(recipe, repo_local)
@ -1051,6 +1099,8 @@ def main() -> int:
recipe,
domain,
version=base,
base_ref=base_ref,
apply_previous=prev_status["apply"],
secrets=True,
install_steps_hook=hook,
deploy_timeout=int(meta.DEPLOY_TIMEOUT),
@ -1129,7 +1179,7 @@ def main() -> int:
junit_dir=junit_dir,
)
if prev
else "skip" # no upgrade base: single published version, or declared EXPECTED_NA
else "skip" # base_plan.kind == "skip": no predecessor / EXPECTED_NA / head==main
)
# ---- BACKUP + RESTORE tiers (backup-capable only; else clean N/A) ----
if "backup" in stages: