fix(2w): WC1.1 reconcile rolls back on deploy FAILURE too (not just unhealthy)
A broken 'latest' can fail abra's converge (deploy_version raises) rather than deploy-then-be-unhealthy; wrap the upgrade deploy so BOTH paths trigger the snapshot-restore rollback instead of crashing the reconcile unit.
This commit is contained in:
@ -334,8 +334,17 @@ def reconcile(app: str) -> str:
|
|||||||
abra.undeploy(domain)
|
abra.undeploy(domain)
|
||||||
warmsnap.snapshot(recipe, domain, version=last_good)
|
warmsnap.snapshot(recipe, domain, version=last_good)
|
||||||
# snapshot requires undeployed; now bring up latest.
|
# snapshot requires undeployed; now bring up latest.
|
||||||
deploy_version(recipe, domain, latest, dt)
|
# A broken "latest" can fail in two ways: deploy_version raises (abra converge times out on a
|
||||||
if wait_healthy(spec):
|
# crash-looping task) OR it deploys but never becomes healthy. BOTH must roll back, so treat a
|
||||||
|
# deploy exception the same as an unhealthy result.
|
||||||
|
upgrade_ok = False
|
||||||
|
try:
|
||||||
|
deploy_version(recipe, domain, latest, dt)
|
||||||
|
upgrade_ok = wait_healthy(spec)
|
||||||
|
except Exception as e: # noqa: BLE001 — a broken release must trigger rollback, not crash the unit
|
||||||
|
print(f"[{app}] deploy of latest {latest} failed: {e}", flush=True)
|
||||||
|
upgrade_ok = False
|
||||||
|
if upgrade_ok:
|
||||||
write_last_good(recipe, latest)
|
write_last_good(recipe, latest)
|
||||||
print(f"[{app}] upgrade healthy → committed last-good={latest}", flush=True)
|
print(f"[{app}] upgrade healthy → committed last-good={latest}", flush=True)
|
||||||
return f"upgraded:{last_good}->{latest}"
|
return f"upgraded:{last_good}->{latest}"
|
||||||
|
|||||||
Reference in New Issue
Block a user