fix(2): F2-3 systemic — harness.browser.goto_with_retry; applied to all install overlays
Phase 2 lesson from F2-3 (n8n install Playwright flake on net::ERR_NETWORK_CHANGED): every install overlay that does page.goto needs the same try/except PlaywrightError + status retry. Centralize in runner/harness/browser.py::goto_with_retry; apply to ALL install overlays. - runner/harness/browser.py: shared helper. Polls page.goto until status in accept_statuses; catches PlaywrightError (net::ERR_*) as a retryable signal, not a failure. Raises AssertionError with last_status + last_err diagnostic only on deadline expiry. - tests/custom-html/test_install.py: now uses goto_with_retry (200 only, wait_until=load). - tests/custom-html/playwright/test_browser_smoke.py: same. - tests/n8n/test_install.py: replaced inline retry loop with goto_with_retry (200, 304). - tests/keycloak/test_install.py: goto_with_retry for admin console (200, 302, 303; 45s goto). - tests/cryptpad/test_install.py: goto_with_retry (200, 304; 60s goto, wait_until=load). - tests/lasuite-docs/test_install.py: goto_with_retry (200, 301, 302; 60s goto). Cold-verifiable: ssh cc-ci 'RECIPE=custom-html cc-ci-run runner/run_recipe_ci.py' all 5 stages PASS (including the install overlay that flaked in the deps_smoke run), deploy-count=1, head_ref=8a026066==chaos-version=8a026066 (HC1 non-vacuous). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
56
runner/harness/browser.py
Normal file
56
runner/harness/browser.py
Normal file
@ -0,0 +1,56 @@
|
||||
"""Playwright helpers for Phase-2 recipe tests (plan §4.2).
|
||||
|
||||
Centralizes the `page.goto(...)` retry loop that absorbs transient network errors (F2-3 / F2-5):
|
||||
Playwright's `page.goto` raises `PlaywrightError` on transport-level failures (`net::ERR_*`,
|
||||
connection resets, CDP target gone) — those escape a naive loop that only retries on status
|
||||
mismatches. Wrap every install-overlay `page.goto` in this helper so transient errors retry
|
||||
without weakening the underlying assertion (same pattern as F1e-1's `exec_in_app` poll+raise
|
||||
hardening).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import time
|
||||
|
||||
|
||||
def goto_with_retry(page, url, *, deadline_seconds: int = 120, accept_statuses=(200, 304),
|
||||
goto_timeout_ms: int = 30_000, wait_until: str = "domcontentloaded"):
|
||||
"""Poll `page.goto(url)` until status is in `accept_statuses` OR the deadline expires.
|
||||
|
||||
Returns the final Playwright response. Raises AssertionError if the deadline expires without
|
||||
a successful status. Each iteration catches `PlaywrightError` (and any other exception) so
|
||||
transient network failures retry rather than fail the test.
|
||||
|
||||
Use case: recipe install overlays where the app's HTTP layer may be up (status 200 to
|
||||
/healthz or generic readiness) but the requested route is still registering (404), or
|
||||
Playwright's CDP connection transiently flakes (`net::ERR_NETWORK_CHANGED`).
|
||||
"""
|
||||
# Imported lazily so this module can be imported without playwright at unit-test time.
|
||||
try:
|
||||
from playwright.sync_api import Error as PlaywrightError
|
||||
except ImportError: # pragma: no cover — playwright is always installed in cc-ci-run
|
||||
PlaywrightError = Exception # noqa: N806
|
||||
|
||||
deadline = time.time() + deadline_seconds
|
||||
resp = None
|
||||
last_status = 0
|
||||
last_err = ""
|
||||
attempts = 0
|
||||
while time.time() < deadline:
|
||||
attempts += 1
|
||||
try:
|
||||
resp = page.goto(url, wait_until=wait_until, timeout=goto_timeout_ms)
|
||||
except PlaywrightError as e:
|
||||
last_err = str(e)
|
||||
resp = None
|
||||
last_status = 0
|
||||
else:
|
||||
last_status = resp.status if resp is not None else 0
|
||||
if last_status in accept_statuses:
|
||||
return resp
|
||||
time.sleep(3)
|
||||
raise AssertionError(
|
||||
f"page.goto({url}) never returned a status in {accept_statuses} after "
|
||||
f"{attempts} attempts ({deadline_seconds}s); last status={last_status}, "
|
||||
f"last error={last_err or 'none'}"
|
||||
)
|
||||
@ -8,7 +8,7 @@ import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner"))
|
||||
from harness import generic, lifecycle # noqa: E402
|
||||
from harness import browser as harness_browser, generic, lifecycle # noqa: E402
|
||||
|
||||
|
||||
def test_serving_and_content(live_app, meta):
|
||||
@ -29,7 +29,10 @@ def test_serving_and_content(live_app, meta):
|
||||
try:
|
||||
ctx = browser.new_context(ignore_https_errors=True)
|
||||
page = ctx.new_page()
|
||||
resp = page.goto(url, wait_until="load", timeout=60000)
|
||||
# F2-3 hardening centralized in harness.browser
|
||||
resp = harness_browser.goto_with_retry(
|
||||
page, url, accept_statuses=(200, 304), goto_timeout_ms=60_000, wait_until="load"
|
||||
)
|
||||
assert resp is not None and resp.status in (
|
||||
200,
|
||||
304,
|
||||
|
||||
@ -10,6 +10,12 @@ later non-lifecycle browser flow (e.g. a content-management UI) has its home alr
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "..", "runner"))
|
||||
from harness import browser as harness_browser # noqa: E402
|
||||
|
||||
|
||||
def test_browser_renders_html(live_app):
|
||||
"""Browser-render the served root page and assert the HTML loads with no console errors."""
|
||||
@ -26,7 +32,10 @@ def test_browser_renders_html(live_app):
|
||||
"console",
|
||||
lambda msg: console_errors.append(msg.text) if msg.type == "error" else None,
|
||||
)
|
||||
resp = page.goto(url, wait_until="load", timeout=30_000)
|
||||
# F2-3 hardening (status mismatch + PlaywrightError retries)
|
||||
resp = harness_browser.goto_with_retry(
|
||||
page, url, accept_statuses=(200,), wait_until="load"
|
||||
)
|
||||
assert resp is not None and resp.status == 200, f"page status {resp and resp.status}"
|
||||
html = page.content()
|
||||
assert "<html" in html.lower(), "page did not render an HTML document"
|
||||
|
||||
@ -9,7 +9,7 @@ import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner"))
|
||||
from harness import generic # noqa: E402
|
||||
from harness import browser as harness_browser, generic # noqa: E402
|
||||
|
||||
|
||||
def test_serving_and_content(live_app, meta):
|
||||
@ -23,7 +23,11 @@ def test_serving_and_content(live_app, meta):
|
||||
browser = p.chromium.launch(args=["--no-sandbox"])
|
||||
try:
|
||||
page = browser.new_context(ignore_https_errors=True).new_page()
|
||||
resp = page.goto(url, wait_until="load", timeout=30000)
|
||||
# F2-3-style hardening (centralized in harness.browser.goto_with_retry): retry through
|
||||
# transient PlaywrightError (net::ERR_*) and status mismatches.
|
||||
resp = harness_browser.goto_with_retry(
|
||||
page, url, accept_statuses=(200,), wait_until="load"
|
||||
)
|
||||
assert resp is not None and resp.status == 200, f"page status {resp and resp.status}"
|
||||
body = page.content()
|
||||
assert "nginx" in body.lower() or "<html" in body.lower(), "no served HTML content"
|
||||
|
||||
@ -8,7 +8,7 @@ import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner"))
|
||||
from harness import generic, lifecycle # noqa: E402
|
||||
from harness import browser as harness_browser, generic, lifecycle # noqa: E402
|
||||
|
||||
|
||||
def test_serving_and_admin_console(live_app, meta):
|
||||
@ -27,7 +27,11 @@ def test_serving_and_admin_console(live_app, meta):
|
||||
browser = p.chromium.launch(args=["--no-sandbox"])
|
||||
try:
|
||||
page = browser.new_context(ignore_https_errors=True).new_page()
|
||||
page.goto(url, wait_until="domcontentloaded", timeout=45000)
|
||||
# F2-3 hardening: harness.browser handles status-mismatch + PlaywrightError retries.
|
||||
# Accept 200 or 302 — the admin console redirects to the login form.
|
||||
harness_browser.goto_with_retry(
|
||||
page, url, accept_statuses=(200, 302, 303), goto_timeout_ms=45_000
|
||||
)
|
||||
# admin console redirects to the login form; wait for a username field to render
|
||||
page.wait_for_selector("input#username, input[name='username']", timeout=30000)
|
||||
assert (
|
||||
|
||||
@ -10,7 +10,7 @@ import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner"))
|
||||
from harness import generic, lifecycle # noqa: E402
|
||||
from harness import browser as harness_browser, generic, lifecycle # noqa: E402
|
||||
|
||||
|
||||
def test_serving_and_frontend(live_app, meta):
|
||||
@ -30,7 +30,10 @@ def test_serving_and_frontend(live_app, meta):
|
||||
try:
|
||||
ctx = browser.new_context(ignore_https_errors=True)
|
||||
page = ctx.new_page()
|
||||
resp = page.goto(url, wait_until="domcontentloaded", timeout=60000)
|
||||
# F2-3 hardening centralized in harness.browser
|
||||
resp = harness_browser.goto_with_retry(
|
||||
page, url, accept_statuses=(200, 301, 302), goto_timeout_ms=60_000
|
||||
)
|
||||
assert resp is not None and resp.status in (
|
||||
200,
|
||||
301,
|
||||
|
||||
@ -8,7 +8,7 @@ import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner"))
|
||||
from harness import generic, lifecycle # noqa: E402
|
||||
from harness import browser as harness_browser, generic, lifecycle # noqa: E402
|
||||
|
||||
|
||||
def test_serving_and_editor(live_app, meta):
|
||||
@ -21,9 +21,9 @@ def test_serving_and_editor(live_app, meta):
|
||||
|
||||
# A real browser loads the live n8n editor SPA over HTTPS.
|
||||
# n8n's boot is staged: /healthz returns 200 before / route is registered. So we may briefly
|
||||
# get 404 from /, then a 200 with the "starting up" placeholder, then the actual SPA. Poll
|
||||
# page.goto until status==200 (route registered) — Phase 1e exec_in_app pattern: bounded poll,
|
||||
# no bare sleep, raise on persistent failure.
|
||||
# get 404 from /, then a 200 with the "starting up" placeholder, then the actual SPA. The
|
||||
# centralized harness.browser.goto_with_retry helper handles both the status-mismatch retry
|
||||
# AND transient PlaywrightError (net::ERR_*) — F2-3 hardening.
|
||||
from playwright.sync_api import sync_playwright
|
||||
|
||||
url = f"https://{live_app}/"
|
||||
@ -32,29 +32,9 @@ def test_serving_and_editor(live_app, meta):
|
||||
try:
|
||||
ctx = browser.new_context(ignore_https_errors=True)
|
||||
page = ctx.new_page()
|
||||
import time
|
||||
|
||||
from playwright.sync_api import Error as PlaywrightError
|
||||
|
||||
deadline = time.time() + 120
|
||||
resp = None
|
||||
last_status = 0
|
||||
last_err = ""
|
||||
while time.time() < deadline:
|
||||
try:
|
||||
resp = page.goto(url, wait_until="domcontentloaded", timeout=30000)
|
||||
except PlaywrightError as e: # net::ERR_*, transient navigation, etc. (F2-3)
|
||||
last_err = str(e)
|
||||
resp = None
|
||||
last_status = 0
|
||||
else:
|
||||
last_status = resp.status if resp is not None else 0
|
||||
if last_status in (200, 304):
|
||||
break
|
||||
time.sleep(3)
|
||||
assert resp is not None and last_status in (200, 304), (
|
||||
f"page status {last_status} after polling — n8n route never came up "
|
||||
f"(last error: {last_err or 'none'})"
|
||||
resp = harness_browser.goto_with_retry(page, url, accept_statuses=(200, 304))
|
||||
assert resp is not None and resp.status in (200, 304), (
|
||||
f"page status {resp and resp.status}"
|
||||
)
|
||||
body = page.content().lower()
|
||||
assert "n8n" in body or "<html" in body, "no n8n content served"
|
||||
|
||||
Reference in New Issue
Block a user