fix(1e): F1e-1 exec_in_app race + HC1 head_ref/move hardening
F1e-1 (Adversary): exec_in_app silently returned '' on a failed docker exec, flipping a healthy recipe RED under opt-out (post-backup container cycle, no readiness buffer). Now polls (re-resolve container + re-exec) until rc==0 or 90s, then RAISES — never masks an exec failure as empty data. No assertion weakened. Verified: opt-out install,backup,restore on custom-html now PASS. HC1: head_ref = ref or recipe_head_commit (prefer explicit PR head sha $REF — robust, no git race; production !testme always sets REF). assert_upgraded, when head_ref known, REQUIRES the deployed chaos-version commit to MATCH head_ref (direct + non-vacuous proof the PR-head code was deployed; a stale prev-checkout chaos redeploy fails). Falls back to version/image/chaos move check otherwise. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@ -142,24 +142,43 @@ def op_state() -> dict:
|
||||
|
||||
|
||||
def assert_upgraded(domain: str, meta: dict) -> None:
|
||||
"""Generic UPGRADE assertion (post-op): the orchestrator already performed the upgrade once.
|
||||
Assert it reconverged + still serves AND that the deployment actually MOVED — guarding against a
|
||||
vacuous no-op upgrade silently passing (F1d-2). HC1: prev→PR-head may NOT bump the version label,
|
||||
so a MOVE is ANY of: version-label change, image change, or a chaos label now present (a chaos
|
||||
deploy stamps the PR-head commit — THE proof the code under test was deployed)."""
|
||||
before = op_state().get("upgrade", {}).get("before") or {}
|
||||
"""Generic UPGRADE assertion (post-op): the orchestrator already performed the upgrade once via
|
||||
`abra app deploy --chaos` of the PR-head checkout. Assert it reconverged + still serves AND that
|
||||
the deployment is genuinely the PR-head code under test (HC1) — non-vacuously (guarding F1d-2).
|
||||
|
||||
The chaos deploy stamps `coop-cloud.<stack>.chaos-version` = the deployed recipe commit. When the
|
||||
intended PR-head commit is known (head_ref), require the deployed chaos commit to MATCH it — THE
|
||||
proof the code under test was deployed, and non-vacuous: a stale prev-checkout chaos redeploy would
|
||||
stamp prev's commit, not head_ref, and fail here. When head_ref is unknown, fall back to requiring
|
||||
a move vs the pre-upgrade state (version/image/chaos changed)."""
|
||||
st = op_state().get("upgrade", {})
|
||||
before = st.get("before") or {}
|
||||
head_ref = st.get("head_ref")
|
||||
assert_serving(domain, meta)
|
||||
after = lifecycle.deployed_identity(domain)
|
||||
chaos = after.get("chaos")
|
||||
if head_ref:
|
||||
assert chaos, (
|
||||
f"{domain}: upgrade left no chaos label — `abra app deploy --chaos` did not deploy the "
|
||||
"PR-head checkout (the code under test was not exercised by the upgrade)"
|
||||
)
|
||||
# chaos-version is an abbreviated commit (e.g. '8a026066'); head_ref may be full or short.
|
||||
assert head_ref.startswith(chaos) or chaos.startswith(head_ref), (
|
||||
f"{domain}: upgrade deployed chaos commit {chaos!r}, not the intended PR-head "
|
||||
f"{head_ref[:12]!r} — the re-checkout to the code under test failed, so the upgrade is "
|
||||
"not exercising the PR's changes (HC1)"
|
||||
)
|
||||
return
|
||||
moved = (
|
||||
(before.get("version") and after.get("version") and before["version"] != after["version"])
|
||||
or (before.get("image") and after.get("image") and before["image"] != after["image"])
|
||||
or (after.get("chaos") and after.get("chaos") != before.get("chaos"))
|
||||
or (chaos and chaos != before.get("chaos"))
|
||||
)
|
||||
assert moved, (
|
||||
f"{domain}: upgrade did not move the deployment "
|
||||
f"(version {before.get('version')}->{after.get('version')}, "
|
||||
f"image {before.get('image')}->{after.get('image')}, "
|
||||
f"chaos {before.get('chaos')}->{after.get('chaos')}) — "
|
||||
f"chaos {before.get('chaos')}->{chaos}) — "
|
||||
"not a real upgrade to the code under test (HC1/DG2 must be non-vacuous)"
|
||||
)
|
||||
|
||||
@ -195,12 +214,25 @@ def assert_restore_healthy(domain: str, meta: dict) -> None:
|
||||
# ---- Op primitives (orchestrator-only; perform the op once, never assert) --------------------
|
||||
|
||||
|
||||
def perform_upgrade(domain: str, target: str | None) -> dict[str, str | None]:
|
||||
"""Perform the UPGRADE op once (in place). E1 baseline: `abra app upgrade` -> target. (HC1/E2
|
||||
redefines this as a chaos redeploy of the PR-head checkout.) Returns the pre-upgrade identity so
|
||||
the orchestrator can record it for `assert_upgraded`'s move check."""
|
||||
def perform_upgrade(domain: str, recipe: str, head_ref: str | None) -> dict[str, str | None]:
|
||||
"""Perform the UPGRADE op once, in place, to the PR-HEAD code under test (HC1): re-checkout the
|
||||
PR head (the prev-tag base deploy reset the recipe working tree), then `abra app deploy --chaos`
|
||||
to redeploy the running app at that checkout. This is the real upgrade the PR's changes are
|
||||
exercised by (vs the old 'upgrade to newest published tag', which never deployed PR-head code).
|
||||
Returns the pre-upgrade identity so the orchestrator records it for `assert_upgraded`'s move check
|
||||
— after the chaos deploy the `chaos`(-version) label carries the PR-head commit, proving it."""
|
||||
before = lifecycle.deployed_identity(domain)
|
||||
lifecycle.upgrade_app(domain, version=target)
|
||||
if head_ref:
|
||||
lifecycle.recipe_checkout_ref(recipe, head_ref)
|
||||
lifecycle.chaos_redeploy(domain)
|
||||
after = lifecycle.deployed_identity(domain)
|
||||
# Evidence (HC1): the chaos-version label = the deployed recipe commit; it should match the
|
||||
# PR-head we checked out — proving the upgrade deployed the code under test, not a published tag.
|
||||
print(
|
||||
f" upgrade→PR-head: head_ref={(head_ref or '')[:8] or None} "
|
||||
f"chaos-version={after.get('chaos')} version={before.get('version')}→{after.get('version')}",
|
||||
flush=True,
|
||||
)
|
||||
return before
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user