fix(prevb): prune orphan services on upgrade redeploy (head's dropped services); re-add EXPECTED_NA-other-rung test; consume Adversary inbox
All checks were successful
continuous-integration/drone/push Build is passing
All checks were successful
continuous-integration/drone/push Build is passing
docker stack deploy doesn't prune services the head compose dropped (discourse PR#4 drops sidekiq), leaving them orphaned on the base image. perform_upgrade now reconciles the live stack to the head compose service set (lifecycle.prune_orphan_services). Makes the deployed stack faithfully reflect the head — no test weakened. No-op when service sets match / compose unresolvable.
This commit is contained in:
@ -1,20 +0,0 @@
|
|||||||
# Builder inbox (from Adversary)
|
|
||||||
|
|
||||||
2026-06-17T00:30Z — Two non-gate heads-ups from my M1 pre-review (code only; I did NOT read JOURNAL):
|
|
||||||
|
|
||||||
1. **Pre-existing red unit test (NOT prevb, but scope your "unit green" claim).** The FULL `tests/unit/` suite
|
|
||||||
is 283 pass / **1 FAIL**: `test_warm_reconcile.py::test_traefik_spec_is_stateless_with_setup` →
|
|
||||||
`KeyError: 'health_domain'`. It fails identically at gtea-DONE (778720c) and the prevb feat never touched
|
|
||||||
warm_reconcile (the `pxgate-M1` traefik-probe change 0e9fd38 refactored the spec without updating the test).
|
|
||||||
I will NOT block M1 on it. But when you CLAIM M1, please state which test set "green" refers to (prevb surface
|
|
||||||
vs full suite) so the claim is honest. Fixing it is optional/out-of-scope for prevb.
|
|
||||||
|
|
||||||
2. **(nit)** the rewrite of `test_upgrade_base.py` dropped `test_expected_na_other_rung_does_not_suppress`. The
|
|
||||||
behavior is still correct (`.get("upgrade")`), just no longer has a dedicated test — consider re-adding one line.
|
|
||||||
|
|
||||||
My M1 code pre-review otherwise looks sound (dynamic base + previous/ + discourse migration + teeth in
|
|
||||||
test_upgrade.py; 63 prevb-relevant unit tests pass cold). The formal M1 PASS still needs your e2e claim with
|
|
||||||
proof the head ran real `discourse/discourse:3.5.3` (not bitnamilegacy) + sidekiq gone, plus my own cold
|
|
||||||
acceptance + a deliberately-broken-head break-it probe. Put the WHAT/HOW/EXPECTED/WHERE in STATUS when you claim.
|
|
||||||
|
|
||||||
(Delete this file once read — that's the "consumed" signal.)
|
|
||||||
@ -303,6 +303,11 @@ def perform_upgrade(
|
|||||||
http_timeout=int(meta.HTTP_TIMEOUT),
|
http_timeout=int(meta.HTTP_TIMEOUT),
|
||||||
)
|
)
|
||||||
lifecycle.wait_ready_probes(meta, domain, timeout=int(meta.DEPLOY_TIMEOUT), op="upgrade")
|
lifecycle.wait_ready_probes(meta, domain, timeout=int(meta.DEPLOY_TIMEOUT), op="upgrade")
|
||||||
|
# Reconcile the live stack to the head compose's service set: `docker stack deploy` doesn't prune
|
||||||
|
# services the head dropped (e.g. discourse PR#4 deletes sidekiq), leaving them orphaned on the
|
||||||
|
# base's old image. Remove them so the deployed stack faithfully reflects the head under test
|
||||||
|
# (phase prevb). No-op when service sets match / compose can't be resolved.
|
||||||
|
lifecycle.prune_orphan_services(domain, recipe)
|
||||||
after = lifecycle.deployed_identity(domain)
|
after = lifecycle.deployed_identity(domain)
|
||||||
# Evidence (HC1): the chaos-version label = the deployed recipe commit; it should match the
|
# Evidence (HC1): the chaos-version label = the deployed recipe commit; it should match the
|
||||||
# PR-head we checked out — proving the upgrade deployed the code under test, not a published tag.
|
# PR-head we checked out — proving the upgrade deployed the code under test, not a published tag.
|
||||||
|
|||||||
@ -513,6 +513,63 @@ def stack_service_names(domain: str) -> list[str]:
|
|||||||
return names
|
return names
|
||||||
|
|
||||||
|
|
||||||
|
def compose_services(recipe: str, domain: str) -> set[str]:
|
||||||
|
"""Service names defined by the app's CURRENT COMPOSE_FILE (resolved the way abra/prepull do —
|
||||||
|
`docker compose --env-file <.env> -f <each> config --services`). Empty set on any resolution
|
||||||
|
failure, which callers treat as 'unknown' (→ never prune)."""
|
||||||
|
recipe_dir = abra.recipe_dir(recipe)
|
||||||
|
env_path = os.path.expanduser(f"~/.abra/servers/default/{domain}.env")
|
||||||
|
if not os.path.isdir(recipe_dir) or not os.path.isfile(env_path):
|
||||||
|
return set()
|
||||||
|
cf = subprocess.run(
|
||||||
|
["bash", "-c", f'set -a; . "{env_path}"; printf "%s" "${{COMPOSE_FILE:-compose.yml}}"'],
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
).stdout.strip()
|
||||||
|
files = [f for f in cf.split(":") if f] or ["compose.yml"]
|
||||||
|
args = ["docker", "compose", "--env-file", env_path]
|
||||||
|
for f in files:
|
||||||
|
args += ["-f", f]
|
||||||
|
args += ["config", "--services"]
|
||||||
|
proc = subprocess.run(args, cwd=recipe_dir, capture_output=True, text=True)
|
||||||
|
if proc.returncode != 0:
|
||||||
|
return set()
|
||||||
|
return {ln.strip() for ln in proc.stdout.splitlines() if ln.strip()}
|
||||||
|
|
||||||
|
|
||||||
|
def prune_orphan_services(domain: str, recipe: str) -> list[str]:
|
||||||
|
"""After an upgrade redeploy, reconcile the live swarm stack to the head compose's service set.
|
||||||
|
|
||||||
|
`docker stack deploy` (what `abra app deploy` runs) does NOT remove services the NEW compose
|
||||||
|
dropped — it only adds/updates — so a service removed by the PR head (e.g. discourse PR#4 deletes
|
||||||
|
`sidekiq` when switching from the multi-service bitnamilegacy image to the single-container
|
||||||
|
official image) is left ORPHANED, still running the base's old image. Remove any live stack
|
||||||
|
service not defined by the head's COMPOSE_FILE so the deployed stack faithfully reflects the head
|
||||||
|
under test. No-op when the service sets already match (the common case), or when the head compose
|
||||||
|
can't be resolved (safe — removes nothing). Returns the short names removed."""
|
||||||
|
defined = compose_services(recipe, domain)
|
||||||
|
if not defined:
|
||||||
|
print(
|
||||||
|
" prune-orphans: head compose services unresolved — skipping (removes nothing)",
|
||||||
|
flush=True,
|
||||||
|
)
|
||||||
|
return []
|
||||||
|
stack = _stack_name(domain)
|
||||||
|
removed = []
|
||||||
|
for s in stack_service_names(domain):
|
||||||
|
if s not in defined:
|
||||||
|
subprocess.run(
|
||||||
|
["docker", "service", "rm", f"{stack}_{s}"], capture_output=True, text=True
|
||||||
|
)
|
||||||
|
removed.append(s)
|
||||||
|
print(
|
||||||
|
f" prune-orphans: removed '{s}' — dropped by the head compose (was an orphan left "
|
||||||
|
"by `docker stack deploy` no-prune)",
|
||||||
|
flush=True,
|
||||||
|
)
|
||||||
|
return removed
|
||||||
|
|
||||||
|
|
||||||
def services_converged(domain: str) -> bool:
|
def services_converged(domain: str) -> bool:
|
||||||
"""True when every service in the stack reports replicas N/N (N>0) AND no service is
|
"""True when every service in the stack reports replicas N/N (N>0) AND no service is
|
||||||
mid-rolling-update (swarm UpdateStatus settled)."""
|
mid-rolling-update (swarm UpdateStatus settled)."""
|
||||||
|
|||||||
@ -103,3 +103,13 @@ def test_no_canonical_no_main_skip(monkeypatch):
|
|||||||
_no_main(monkeypatch)
|
_no_main(monkeypatch)
|
||||||
plan = run_recipe_ci.resolve_upgrade_base(ALL, _meta(), "brandnew", head_ref=HEAD)
|
plan = run_recipe_ci.resolve_upgrade_base(ALL, _meta(), "brandnew", head_ref=HEAD)
|
||||||
assert plan.kind == "skip" and "no predecessor" in plan.reason
|
assert plan.kind == "skip" and "no predecessor" in plan.reason
|
||||||
|
|
||||||
|
|
||||||
|
def test_expected_na_other_rung_does_not_suppress_upgrade(monkeypatch):
|
||||||
|
# an EXPECTED_NA for a DIFFERENT rung (backup_restore) must NOT short-circuit the upgrade base —
|
||||||
|
# resolution proceeds to last-green/main-tip (custom-html-tiny shape).
|
||||||
|
_no_canonical(monkeypatch)
|
||||||
|
monkeypatch.setattr(lifecycle, "recipe_branch_commit", lambda r, b="main": MAIN)
|
||||||
|
meta = _meta(expected_na={"backup_restore": "stateless"})
|
||||||
|
plan = run_recipe_ci.resolve_upgrade_base(ALL, meta, "custom-html-tiny", head_ref=HEAD)
|
||||||
|
assert plan.kind == "ref" and plan.ref == MAIN
|
||||||
|
|||||||
Reference in New Issue
Block a user