fix(2w): W2 --quick live-proof fixes (time import + stale-TYPE reset)

3 bugs found by the live PASS+FAIL proof on the custom-html canonical:
- import time (run_quick._wait_undeployed used it → the FAIL rollback crashed
  with NameError before restore ran).
- canonical.deploy_canonical now resets .env TYPE=<recipe>:<version> before
  redeploy, so a stale TYPE left by a prior --quick upgrade (pointing at a
  since-removed broken PR commit) can't FATAL abra 'unable to resolve <commit>'.
- run_quick FAIL rollback resets TYPE to known-good after restore (idle .env
  agrees with the registry).

LIVE PROOF (custom-html canonical), ALL PASS: (A) PASS quick run → undeploy
keep-volume, known-good UNCHANGED, marker intact; (B) FAIL quick run (broken
image) → 'rolling back' → 'restored known-good data; canonical idle' → exit 1,
known-good UNCHANGED, DATA RESTORED. Canonical left clean (idle, 1.11.0+1.29.0).
61 unit pass; cold path untouched.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-29 03:05:39 +01:00
parent f68e9d463f
commit 191ebde466
2 changed files with 16 additions and 1 deletions

View File

@ -100,7 +100,11 @@ def deploy_canonical(recipe: str, timeout: int = 900) -> None:
if not rec:
raise RuntimeError(f"no canonical registry for {recipe} — seed one first (cold run)")
domain, version = rec["domain"], rec["version"]
# The .env + retained volume already exist; redeploy the recorded version (idempotent with -f).
# The .env + retained volume already exist; redeploy the recorded known-good version. Reset the
# recorded TYPE=<recipe>:<version> FIRST so abra can resolve the "current deployment" even if a
# prior --quick upgrade left TYPE pointing at a since-removed/broken PR commit (otherwise abra
# FATALs "unable to resolve <commit>"). Then checkout the tag + idempotent (-f) redeploy.
abra.env_set(domain, "TYPE", f"{recipe}:{version}")
abra.recipe_checkout(recipe, version)
r = subprocess.run(
["abra", "app", "deploy", domain, version, "-o", "-n", "-f"],

View File

@ -37,6 +37,7 @@ import shutil
import subprocess
import sys
import tempfile
import time
ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.insert(0, os.path.join(ROOT, "runner"))
@ -132,6 +133,12 @@ def fetch_recipe(recipe: str, ref: str | None, src: str | None) -> None:
recipes_dir = os.path.expanduser("~/.abra/recipes")
os.makedirs(recipes_dir, exist_ok=True)
dest = os.path.join(recipes_dir, recipe)
# CCCI_SKIP_FETCH=1: use the local recipe clone as-is (lets a test/Adversary stage a fake/broken
# ref — e.g. a simulated broken PR head for the --quick rollback proof — without it being clobbered
# by a re-fetch). Never set in production CI.
if os.environ.get("CCCI_SKIP_FETCH") == "1":
print(f"[fetch] CCCI_SKIP_FETCH=1 — using local {recipe} recipe clone as-is", flush=True)
return
if src and ref:
url = f"https://git.autonomic.zone/{src}.git"
git = ["git"]
@ -549,6 +556,10 @@ def run_quick(recipe: str, ref: str | None, head_ref: str | None, repo_local: st
abra.undeploy(domain)
_wait_undeployed(domain)
warmsnap.restore(recipe, domain)
# reset recorded version to the known-good (the failed upgrade set TYPE to the broken
# PR commit) so the idle canonical's .env agrees with the registry + re-warms cleanly.
if reg.get("version"):
abra.env_set(domain, "TYPE", f"{recipe}:{reg['version']}")
canonical._set_status(recipe, "idle") # noqa: SLF001
rolled_back = True
print(" quick FAIL → restored known-good data; canonical idle (NOT promoted)",