fix(1e): F1e-1 exec_in_app race + HC1 head_ref/move hardening
F1e-1 (Adversary): exec_in_app silently returned '' on a failed docker exec, flipping a healthy recipe RED under opt-out (post-backup container cycle, no readiness buffer). Now polls (re-resolve container + re-exec) until rc==0 or 90s, then RAISES — never masks an exec failure as empty data. No assertion weakened. Verified: opt-out install,backup,restore on custom-html now PASS. HC1: head_ref = ref or recipe_head_commit (prefer explicit PR head sha $REF — robust, no git race; production !testme always sets REF). assert_upgraded, when head_ref known, REQUIRES the deployed chaos-version commit to MATCH head_ref (direct + non-vacuous proof the PR-head code was deployed; a stale prev-checkout chaos redeploy fails). Falls back to version/image/chaos move check otherwise. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@ -211,13 +211,14 @@ def _run_pre_hook(recipe: str, op: str, repo_local: str | None, domain: str, met
|
||||
sys.path.remove(d)
|
||||
|
||||
|
||||
def _perform_op(op: str, domain: str, target: str | None, op_state: dict) -> None:
|
||||
def _perform_op(op: str, domain: str, recipe: str, head_ref: str | None, op_state: dict) -> None:
|
||||
"""Perform the single mutating op ONCE (the harness owns the op, HC3). install has no op. Records
|
||||
what the assertions need (pre-upgrade identity, backup snapshot_id) into op_state. None of these
|
||||
call deploy_app, so the deploy-count guard (DG4.1) stays 1 — the in-place upgrade is not a new
|
||||
install (HC1 reconciliation)."""
|
||||
call deploy_app, so the deploy-count guard (DG4.1) stays 1 — the in-place chaos upgrade is not a
|
||||
new install (HC1 reconciliation)."""
|
||||
if op == "upgrade":
|
||||
op_state["upgrade"] = {"before": generic.perform_upgrade(domain, target)}
|
||||
before = generic.perform_upgrade(domain, recipe, head_ref)
|
||||
op_state["upgrade"] = {"before": before, "head_ref": head_ref}
|
||||
elif op == "backup":
|
||||
op_state["backup"] = {"snapshot_id": generic.perform_backup(domain)}
|
||||
elif op == "restore":
|
||||
@ -231,12 +232,13 @@ def run_lifecycle_tier(
|
||||
repo_local: str | None,
|
||||
domain: str,
|
||||
meta: dict,
|
||||
target: str | None,
|
||||
head_ref: str | None,
|
||||
op_state: dict,
|
||||
) -> str:
|
||||
"""Additive lifecycle tier (HC3): seed (pre-op hook) → perform the op ONCE → run the generic
|
||||
assertion file (unless opted out) AND the overlay assertion file, both against the shared post-op
|
||||
deployment. Returns 'pass' | 'fail' | 'skip'."""
|
||||
deployment. The upgrade op redeploys the PR head (head_ref) via chaos (HC1). Returns
|
||||
'pass' | 'fail' | 'skip'."""
|
||||
overlay = discovery.resolve_overlay_op(recipe, op, repo_local)
|
||||
skip_gen = _skip_generic(op, meta)
|
||||
files: list[tuple[str, str]] = []
|
||||
@ -257,7 +259,7 @@ def run_lifecycle_tier(
|
||||
# 1) pre-op seed hook + 2) the op ONCE (harness-owned). A failure here is an op failure → tier fail.
|
||||
try:
|
||||
_run_pre_hook(recipe, op, repo_local, domain, meta)
|
||||
_perform_op(op, domain, target, op_state)
|
||||
_perform_op(op, domain, recipe, head_ref, op_state)
|
||||
with open(os.environ["CCCI_OP_STATE_FILE"], "w") as f:
|
||||
json.dump(op_state, f)
|
||||
except Exception as e: # noqa: BLE001 — a failed op is a reported tier failure, not a crash
|
||||
@ -312,6 +314,10 @@ def main() -> int:
|
||||
f"== cc-ci run: recipe={recipe} ref={ref} pr={os.environ.get('PR', '0')} stages={sorted(stages)}"
|
||||
)
|
||||
fetch_recipe(recipe, ref, src)
|
||||
# The PR-head commit the upgrade tier re-checks out for the chaos redeploy to the code under test
|
||||
# (HC1). Prefer the explicit PR head sha ($REF) — robust + exact; fall back to the recipe checkout
|
||||
# HEAD (the catalogue current) for a non-PR `!testme`. Captured before any version-tag checkout.
|
||||
head_ref = ref or lifecycle.recipe_head_commit(recipe)
|
||||
repo_local = snapshot_recipe_tests(recipe)
|
||||
meta = _load_meta(recipe)
|
||||
domain = naming.app_domain(recipe, os.environ.get("PR", "0"), ref)
|
||||
@ -361,7 +367,7 @@ def main() -> int:
|
||||
# ---- INSTALL tier (always; additive generic + overlay, no op) ----
|
||||
if "install" in stages:
|
||||
results["install"] = (
|
||||
run_lifecycle_tier(recipe, "install", repo_local, domain, meta, target, op_state)
|
||||
run_lifecycle_tier(recipe, "install", repo_local, domain, meta, head_ref, op_state)
|
||||
if deploy_ok
|
||||
else "fail"
|
||||
)
|
||||
@ -379,7 +385,9 @@ def main() -> int:
|
||||
# ---- BACKUP + RESTORE tiers (backup-capable only; else clean N/A) ----
|
||||
if "backup" in stages:
|
||||
results["backup"] = (
|
||||
run_lifecycle_tier(recipe, "backup", repo_local, domain, meta, target, op_state)
|
||||
run_lifecycle_tier(
|
||||
recipe, "backup", repo_local, domain, meta, head_ref, op_state
|
||||
)
|
||||
if backup_cap
|
||||
else "skip"
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user