fix(1e): F1e-1 exec_in_app race + HC1 head_ref/move hardening
F1e-1 (Adversary): exec_in_app silently returned '' on a failed docker exec, flipping a healthy recipe RED under opt-out (post-backup container cycle, no readiness buffer). Now polls (re-resolve container + re-exec) until rc==0 or 90s, then RAISES — never masks an exec failure as empty data. No assertion weakened. Verified: opt-out install,backup,restore on custom-html now PASS. HC1: head_ref = ref or recipe_head_commit (prefer explicit PR head sha $REF — robust, no git race; production !testme always sets REF). assert_upgraded, when head_ref known, REQUIRES the deployed chaos-version commit to MATCH head_ref (direct + non-vacuous proof the PR-head code was deployed; a stale prev-checkout chaos redeploy fails). Falls back to version/image/chaos move check otherwise. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@ -252,11 +252,11 @@ def deployed_identity(domain: str, service: str = "app") -> dict[str, str | None
|
||||
|
||||
- `version` = the `coop-cloud.<stack>.version` label (bumped per published recipe version).
|
||||
- `image` = the running container image (usually bumps with a published version).
|
||||
- `chaos` = the chaos label value (a chaos deploy stamps the recipe git commit/dirty state here)
|
||||
— present after `abra app deploy --chaos`, absent on a clean pinned-tag deploy. For prev→PR-head
|
||||
this is THE proof PR-head was deployed even when the version label is unbumped (HC1). The exact
|
||||
chaos label key varies by abra version, so we capture any `coop-cloud.<stack>.*` label whose key
|
||||
contains "chaos"."""
|
||||
- `chaos` = the chaos deploy's recipe git commit. abra stamps `coop-cloud.<stack>.chaos-version`
|
||||
= the deployed recipe commit (e.g. "91b27ceb") + `coop-cloud.<stack>.chaos`="true" on a
|
||||
`--chaos` deploy; both are absent on a clean pinned-tag deploy. We prefer the `.chaos-version`
|
||||
commit — for prev→PR-head it IS the proof the PR-head code under test was deployed even when the
|
||||
version label is unbumped (HC1); fall back to the `.chaos` flag if no commit is present."""
|
||||
name = f"{_stack_name(domain)}_{service}"
|
||||
proc = subprocess.run(
|
||||
[
|
||||
@ -274,22 +274,43 @@ def deployed_identity(domain: str, service: str = "app") -> dict[str, str | None
|
||||
if "|" not in out:
|
||||
return {"version": None, "image": None, "chaos": None}
|
||||
labels_json, _, image = out.partition("|")
|
||||
ver = chaos = None
|
||||
ver = chaos = chaos_flag = None
|
||||
with contextlib.suppress(ValueError, json.JSONDecodeError):
|
||||
for k, v in json.loads(labels_json).items():
|
||||
if not k.startswith("coop-cloud."):
|
||||
continue
|
||||
if k.endswith(".version"):
|
||||
ver = v
|
||||
elif "chaos" in k:
|
||||
chaos = v
|
||||
return {"version": ver, "image": image.strip() or None, "chaos": chaos}
|
||||
elif k.endswith(".chaos-version"):
|
||||
chaos = v # the deployed recipe commit — the strongest signal
|
||||
elif k.endswith(".chaos"):
|
||||
chaos_flag = v
|
||||
return {"version": ver, "image": image.strip() or None, "chaos": chaos or chaos_flag}
|
||||
|
||||
|
||||
def upgrade_app(domain: str, version: str | None = None) -> None:
|
||||
abra.upgrade(domain, version=version)
|
||||
|
||||
|
||||
def recipe_head_commit(recipe: str) -> str | None:
|
||||
"""The recipe checkout's current HEAD commit (captured right after fetch, before any version-tag
|
||||
checkout) so the upgrade tier can re-checkout the PR head for the chaos redeploy (HC1)."""
|
||||
return abra.recipe_head_commit(recipe)
|
||||
|
||||
|
||||
def recipe_checkout_ref(recipe: str, ref: str) -> None:
|
||||
"""git-checkout the recipe to an arbitrary ref/commit (HC1: restore the PR-head checkout before
|
||||
the chaos upgrade — the prev-tag base deploy reset it to the published tag)."""
|
||||
abra.recipe_checkout(recipe, ref)
|
||||
|
||||
|
||||
def chaos_redeploy(domain: str) -> None:
|
||||
"""In-place `abra app deploy --chaos`: redeploy the running app at the CURRENT recipe checkout
|
||||
(HC1: the PR-head code under test). This is the upgrade op, not a fresh install — it does NOT go
|
||||
through deploy_app, so the deploy-count guard (DG4.1) is not incremented."""
|
||||
abra.deploy(domain, chaos=True)
|
||||
|
||||
|
||||
def backup_app(domain: str) -> str:
|
||||
"""Create a backup; return the abra/restic output (carries the produced snapshot_id)."""
|
||||
return abra.backup_create(domain)
|
||||
@ -326,10 +347,26 @@ def _app_container(domain: str, service: str = "app", timeout: int = 60) -> str:
|
||||
time.sleep(3)
|
||||
|
||||
|
||||
def exec_in_app(domain: str, cmd: list[str], service: str = "app") -> str:
|
||||
cid = _app_container(domain, service)
|
||||
proc = subprocess.run(["docker", "exec", cid, *cmd], capture_output=True, text=True)
|
||||
return proc.stdout
|
||||
def exec_in_app(domain: str, cmd: list[str], service: str = "app", timeout: int = 90) -> str:
|
||||
"""Run `docker exec` in the app's container and return stdout. Hardened (Adversary F1e-1): a
|
||||
lifecycle op (backup/restore) cycles the container, so a freshly-resolved container can be
|
||||
mid-transition and `docker exec` FAILS — poll (re-resolving the container each try) until the exec
|
||||
succeeds (returncode 0) or timeout, then RAISE. Never silently return '' on a failed exec: that
|
||||
masked a container-cycle race as empty data, flipping a healthy recipe RED under opt-out (no
|
||||
accidental generic-pytest timing buffer) — and could mask a real failure as a pass elsewhere."""
|
||||
deadline = time.time() + timeout
|
||||
last = ""
|
||||
while True:
|
||||
cid = _app_container(domain, service)
|
||||
proc = subprocess.run(["docker", "exec", cid, *cmd], capture_output=True, text=True)
|
||||
if proc.returncode == 0:
|
||||
return proc.stdout
|
||||
last = (proc.stderr or proc.stdout).strip()
|
||||
if time.time() >= deadline:
|
||||
raise RuntimeError(
|
||||
f"docker exec in {domain}/{service} failed (rc={proc.returncode}) after {timeout}s: {last}"
|
||||
)
|
||||
time.sleep(3)
|
||||
|
||||
|
||||
def http_body(domain: str, path: str = "/", timeout: int = 15) -> str:
|
||||
|
||||
Reference in New Issue
Block a user