fix(2): matrix register test — bounded readiness-retry on transient post-restore 5xx (synapse re-establishing DB pool after restore-tier DROP DATABASE); assertion unchanged, RAISEs on persistent failure
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@ -80,29 +80,65 @@ def _container_curl(domain: str, method: str, path: str, body: dict | None = Non
|
||||
|
||||
|
||||
def _admin_register(domain: str, secret: str, username: str, password: str, admin: bool) -> dict:
|
||||
"""Register a user via the shared-secret admin endpoint, called from inside the container."""
|
||||
# Step 1: GET nonce
|
||||
r = _container_curl(domain, "GET", "/_synapse/admin/v1/register")
|
||||
assert r["status"] == 200, f"nonce GET failed: status={r['status']} raw={r['raw'][:200]!r}"
|
||||
nonce = (r["body"] or {}).get("nonce")
|
||||
assert nonce, f"no nonce in response: {r['body']!r}"
|
||||
"""Register a user via the shared-secret admin endpoint, called from inside the container.
|
||||
|
||||
Readiness-robust: in the FULL lifecycle the custom tier runs right after the restore tier, which
|
||||
`DROP DATABASE … WITH (FORCE)` + recreates synapse's postgres DB (pg_backup.sh restore). Synapse
|
||||
is still re-establishing its DB connection pool in that window, so a registration (a DB *write*)
|
||||
can transiently return HTTP 500 M_UNKNOWN even though HTTP health (a read) is already green. We
|
||||
poll: re-fetch a fresh nonce + re-POST on 5xx/transport-error until 200 or timeout, then RAISE. A
|
||||
4xx (real rejection — bad MAC, user exists, policy) is NOT retried (fail fast). The assertion is
|
||||
unchanged (registration must succeed); only the post-restore recovery window is tolerated."""
|
||||
import time
|
||||
|
||||
# Step 2: HMAC and POST
|
||||
admin_flag = "admin" if admin else "notadmin"
|
||||
msg = f"{nonce}\0{username}\0{password}\0{admin_flag}".encode()
|
||||
mac = hmac.new(secret.encode(), msg, hashlib.sha1).hexdigest()
|
||||
payload = {
|
||||
"nonce": nonce,
|
||||
"username": username,
|
||||
"password": password,
|
||||
"mac": mac,
|
||||
"admin": admin,
|
||||
}
|
||||
r = _container_curl(domain, "POST", "/_synapse/admin/v1/register", body=payload)
|
||||
assert r["status"] == 200, (
|
||||
f"register {username!r} failed: status={r['status']} body={r['body']!r}"
|
||||
deadline = time.monotonic() + 90 # bounded recovery window
|
||||
attempt = 0
|
||||
last = {"status": 0, "body": None, "raw": ""}
|
||||
while time.monotonic() < deadline:
|
||||
attempt += 1
|
||||
# Step 1: GET a fresh nonce (single-use; re-fetch each attempt)
|
||||
r = _container_curl(domain, "GET", "/_synapse/admin/v1/register")
|
||||
if r["status"] in (500, 502, 503, 504, 0):
|
||||
last = r
|
||||
print(f" [register] {username}: nonce GET transient {r['status']} "
|
||||
f"(attempt {attempt}, synapse recovering) — retrying", flush=True)
|
||||
time.sleep(5)
|
||||
continue
|
||||
assert r["status"] == 200, f"nonce GET failed: status={r['status']} raw={r['raw'][:200]!r}"
|
||||
nonce = (r["body"] or {}).get("nonce")
|
||||
assert nonce, f"no nonce in response: {r['body']!r}"
|
||||
|
||||
# Step 2: HMAC and POST
|
||||
msg = f"{nonce}\0{username}\0{password}\0{admin_flag}".encode()
|
||||
mac = hmac.new(secret.encode(), msg, hashlib.sha1).hexdigest()
|
||||
payload = {
|
||||
"nonce": nonce,
|
||||
"username": username,
|
||||
"password": password,
|
||||
"mac": mac,
|
||||
"admin": admin,
|
||||
}
|
||||
r = _container_curl(domain, "POST", "/_synapse/admin/v1/register", body=payload)
|
||||
if r["status"] == 200:
|
||||
if attempt > 1:
|
||||
print(f" [register] {username}: succeeded on attempt {attempt} "
|
||||
f"(synapse recovered)", flush=True)
|
||||
return r["body"] or {}
|
||||
if r["status"] in (500, 502, 503, 504, 0):
|
||||
last = r
|
||||
print(f" [register] {username}: POST transient {r['status']} "
|
||||
f"(attempt {attempt}, synapse recovering) — retrying", flush=True)
|
||||
time.sleep(5)
|
||||
continue
|
||||
# a 4xx is a real rejection — fail fast, do not retry
|
||||
raise AssertionError(
|
||||
f"register {username!r} rejected: status={r['status']} body={r['body']!r}"
|
||||
)
|
||||
raise AssertionError(
|
||||
f"register {username!r} never succeeded within the post-restore recovery window "
|
||||
f"({attempt} attempts, 90s): last status={last['status']} body={last['body']!r}"
|
||||
)
|
||||
return r["body"] or {}
|
||||
|
||||
|
||||
def _login(domain: str, username: str, password: str) -> str:
|
||||
|
||||
Reference in New Issue
Block a user