From 575efb5054e38be2044a524b5f4fae645e2d374f Mon Sep 17 00:00:00 2001 From: autonomic-bot Date: Wed, 27 May 2026 11:34:59 +0100 Subject: [PATCH] =?UTF-8?q?fix:=20abra=20app=20upgrade=20-c=20(no-converge?= =?UTF-8?q?-checks)=20=E2=80=94=20abra=20false-fails=20slow=20heavy=20roll?= =?UTF-8?q?ing=20upgrades?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Diagnosed via instrumented diag: lasuite-docs upgrade reported 'FATA deploy failed' while all 9 services converged 1/1 — abra's convergence poll gives up too early on the slow stop-first roll (pulling new images). Disable abra's check; the harness wait_healthy + data-survival assertion is the real, more-patient gate (a genuine failure still fails the test: app never gets healthy). Co-Authored-By: Claude Opus 4.7 (1M context) --- runner/harness/abra.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/runner/harness/abra.py b/runner/harness/abra.py index 4c1a4f5..ff6e2b8 100644 --- a/runner/harness/abra.py +++ b/runner/harness/abra.py @@ -101,10 +101,14 @@ def upgrade(domain: str, version: Optional[str] = None, timeout: int = 900) -> N args = ["app", "upgrade", domain] if version: args.append(version) - # -f no prompt, -D skip public-DNS checks (our per-run domains route via the gateway), -o offline - # (use local tags — incl. the upstream tags fetched at clone — and DON'T fetch from the private - # mirror origin, which 401s). upgrade has no --chaos flag. - args += ["-f", "-D", "-n", "-o"] + # -f no prompt, -D skip public-DNS checks, -o offline (local tags, no private-origin 401), + # -c no-converge-checks: abra's convergence poll gives up too early on a slow heavy rolling + # upgrade (e.g. lasuite-docs' 9-service stop-first roll while pulling new images) and reports a + # FALSE "deploy failed" even though all services do converge. We disable abra's check and rely on + # the harness's own wait_healthy + data-survival assertion (more patient + the real test) to gate + # the upgrade. A genuinely-failed upgrade still fails the test (app never gets healthy). upgrade + # has no --chaos flag. + args += ["-f", "-D", "-n", "-o", "-c"] _run(args, timeout=timeout)