fix(prevb): prune orphan services on upgrade redeploy (head's dropped services); re-add EXPECTED_NA-other-rung test; consume Adversary inbox
All checks were successful
continuous-integration/drone/push Build is passing
All checks were successful
continuous-integration/drone/push Build is passing
docker stack deploy doesn't prune services the head compose dropped (discourse PR#4 drops sidekiq), leaving them orphaned on the base image. perform_upgrade now reconciles the live stack to the head compose service set (lifecycle.prune_orphan_services). Makes the deployed stack faithfully reflect the head — no test weakened. No-op when service sets match / compose unresolvable.
This commit is contained in:
@ -303,6 +303,11 @@ def perform_upgrade(
|
||||
http_timeout=int(meta.HTTP_TIMEOUT),
|
||||
)
|
||||
lifecycle.wait_ready_probes(meta, domain, timeout=int(meta.DEPLOY_TIMEOUT), op="upgrade")
|
||||
# Reconcile the live stack to the head compose's service set: `docker stack deploy` doesn't prune
|
||||
# services the head dropped (e.g. discourse PR#4 deletes sidekiq), leaving them orphaned on the
|
||||
# base's old image. Remove them so the deployed stack faithfully reflects the head under test
|
||||
# (phase prevb). No-op when service sets match / compose can't be resolved.
|
||||
lifecycle.prune_orphan_services(domain, recipe)
|
||||
after = lifecycle.deployed_identity(domain)
|
||||
# Evidence (HC1): the chaos-version label = the deployed recipe commit; it should match the
|
||||
# PR-head we checked out — proving the upgrade deployed the code under test, not a published tag.
|
||||
|
||||
@ -513,6 +513,63 @@ def stack_service_names(domain: str) -> list[str]:
|
||||
return names
|
||||
|
||||
|
||||
def compose_services(recipe: str, domain: str) -> set[str]:
|
||||
"""Service names defined by the app's CURRENT COMPOSE_FILE (resolved the way abra/prepull do —
|
||||
`docker compose --env-file <.env> -f <each> config --services`). Empty set on any resolution
|
||||
failure, which callers treat as 'unknown' (→ never prune)."""
|
||||
recipe_dir = abra.recipe_dir(recipe)
|
||||
env_path = os.path.expanduser(f"~/.abra/servers/default/{domain}.env")
|
||||
if not os.path.isdir(recipe_dir) or not os.path.isfile(env_path):
|
||||
return set()
|
||||
cf = subprocess.run(
|
||||
["bash", "-c", f'set -a; . "{env_path}"; printf "%s" "${{COMPOSE_FILE:-compose.yml}}"'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
).stdout.strip()
|
||||
files = [f for f in cf.split(":") if f] or ["compose.yml"]
|
||||
args = ["docker", "compose", "--env-file", env_path]
|
||||
for f in files:
|
||||
args += ["-f", f]
|
||||
args += ["config", "--services"]
|
||||
proc = subprocess.run(args, cwd=recipe_dir, capture_output=True, text=True)
|
||||
if proc.returncode != 0:
|
||||
return set()
|
||||
return {ln.strip() for ln in proc.stdout.splitlines() if ln.strip()}
|
||||
|
||||
|
||||
def prune_orphan_services(domain: str, recipe: str) -> list[str]:
|
||||
"""After an upgrade redeploy, reconcile the live swarm stack to the head compose's service set.
|
||||
|
||||
`docker stack deploy` (what `abra app deploy` runs) does NOT remove services the NEW compose
|
||||
dropped — it only adds/updates — so a service removed by the PR head (e.g. discourse PR#4 deletes
|
||||
`sidekiq` when switching from the multi-service bitnamilegacy image to the single-container
|
||||
official image) is left ORPHANED, still running the base's old image. Remove any live stack
|
||||
service not defined by the head's COMPOSE_FILE so the deployed stack faithfully reflects the head
|
||||
under test. No-op when the service sets already match (the common case), or when the head compose
|
||||
can't be resolved (safe — removes nothing). Returns the short names removed."""
|
||||
defined = compose_services(recipe, domain)
|
||||
if not defined:
|
||||
print(
|
||||
" prune-orphans: head compose services unresolved — skipping (removes nothing)",
|
||||
flush=True,
|
||||
)
|
||||
return []
|
||||
stack = _stack_name(domain)
|
||||
removed = []
|
||||
for s in stack_service_names(domain):
|
||||
if s not in defined:
|
||||
subprocess.run(
|
||||
["docker", "service", "rm", f"{stack}_{s}"], capture_output=True, text=True
|
||||
)
|
||||
removed.append(s)
|
||||
print(
|
||||
f" prune-orphans: removed '{s}' — dropped by the head compose (was an orphan left "
|
||||
"by `docker stack deploy` no-prune)",
|
||||
flush=True,
|
||||
)
|
||||
return removed
|
||||
|
||||
|
||||
def services_converged(domain: str) -> bool:
|
||||
"""True when every service in the stack reports replicas N/N (N>0) AND no service is
|
||||
mid-rolling-update (swarm UpdateStatus settled)."""
|
||||
|
||||
Reference in New Issue
Block a user