fix(canon): promote does a FAITHFUL warm install (clean tree + deps + install_steps)
All checks were successful
continuous-integration/drone/push Build is passing

M2 finding (Adversary-flagged): promote_canonical did a bare `abra app deploy` that lacked the
cold install's wiring, so recipes that passed the cold test still failed to promote:
- ghost: `abra app new` FATA 'locally unstaged changes' — the CCCI_SKIP_FETCH per-run tree was
  left dirty by the tier suite. Fix: force re-checkout the tag + `git clean -fd` before deploy.
- bluesky-pds: missing pds_plc_rotation_key (install_steps inserts it, #generate=false).
- custom-html-tiny: 404 (install_steps seeds index.html). Fix: run install_steps_hook in promote.
- OIDC recipes would miss their realm. Fix: provision DEPS in promote like the cold install.
promote_canonical now: clean tree → provision deps → deploy_app with install_steps_hook + overlay +
ready-probes, then snapshot. Also: sweep result label now derives from whether the canonical was
actually written (promote is non-fatal; rc==0 did not imply promoted) — fixes the misleading
'PASS (promoted)'.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
autonomic-bot
2026-06-17 08:50:59 +00:00
parent 4cf1b32f4c
commit f94de22234
2 changed files with 51 additions and 13 deletions

View File

@ -116,7 +116,17 @@ def sweep() -> int:
continue continue
print(f"sweep: {r} RUN — {reason}; cold-testing tagged release {latest}", flush=True) print(f"sweep: {r} RUN — {reason}; cold-testing tagged release {latest}", flush=True)
rc = run_on_tag(r, latest) rc = run_on_tag(r, latest)
results[r] = "PASS (promoted)" if rc == 0 else "FAIL (canonical unchanged)" # Trustworthy label (canon M2): promote_canonical is non-fatal, so rc==0 does NOT imply a
# canonical was written. Derive the result from whether the registry now records `latest`.
rec = canonical.read_registry(r) or {}
if rc != 0:
results[r] = "FAIL (red; canonical unchanged)"
elif rec.get("version") == latest:
results[r] = f"PASS (promoted {latest})"
else:
results[r] = (
f"GREEN-BUT-PROMOTE-FAILED (canonical={rec.get('version') or 'none'}, expected {latest})"
)
print(f"sweep: {r} rc={rc} ({results[r]})", flush=True) print(f"sweep: {r} rc={rc} ({results[r]})", flush=True)
# WC8 disk hygiene: drop warm data for de-enrolled canonicals; log the disk budget. # WC8 disk hygiene: drop warm data for de-enrolled canonicals; log the disk budget.
pruned = canonical.prune_stale() pruned = canonical.prune_stale()

View File

@ -921,37 +921,64 @@ def should_promote_canonical(
return canonical.is_enrolled(recipe) and overall == 0 and not quick and not ref and tagged return canonical.is_enrolled(recipe) and overall == 0 and not quick and not ref and tagged
def promote_canonical(recipe: str, head_ref: str | None, version: str | None) -> None: def promote_canonical(
recipe: str, head_ref: str | None, version: str | None, repo_local: str | None = None
) -> None:
"""canon §2.A / WC5: (re)seed the canonical at the GREEN-VERIFIED TESTED RELEASE `version` — the """canon §2.A / WC5: (re)seed the canonical at the GREEN-VERIFIED TESTED RELEASE `version` — the
exact version under test (head_version), which the should_promote tagged-gate guarantees is a exact version under test (head_version), which the should_promote tagged-gate guarantees is a
published release tag. Deploy `warm-<recipe>` at that version (reattaching the retained canonical published release tag. Deploy `warm-<recipe>` at that version as a FAITHFUL install (the same
volume if one exists — an in-place version bump — else a fresh install), wait healthy, undeploy, wiring the cold install used — deps + install_steps + overlay + secrets), wait healthy, undeploy,
snapshot + record the registry (atomic replace of the last-known-good). snapshot + record the registry (atomic replace of the last-known-good).
Promotes EXACTLY the tested version — it no longer re-derives `latest_version(recipe_tags)`, which The warm deploy must reproduce the cold install, not a bare `abra app deploy` (canon M2 finding):
could differ from the version actually exercised by the run (e.g. a manual `RECIPE=<r>` run whose - CLEAN the recipe tree first. The sweep's run_on_tag sets CCCI_SKIP_FETCH=1 so the cold run
`main` checkout sits on a tag older than the newest published tag): the canonical must record the stages the tag; by promote time that per-run tree was mutated by the tier suite (chaos head
version the tier suite proved green, not a never-tested newer tag. The OLD known-good is replaced checkout + the untracked compose.ccci.yml overlay), which makes `abra app new` FATA "locally
ONLY here, after green (never lost on a red run).""" unstaged changes". A forced re-checkout of the tag + `git clean -fd` restores a pristine tree.
import warm_reconcile as wr - PROVISION DEPS (OIDC realms) + run INSTALL_STEPS, exactly like the cold install. Without these,
recipes whose healthy state depends on them fail the warm deploy though the cold test was green
— e.g. bluesky-pds (install_steps inserts the non-generatable pds_plc_rotation_key),
custom-html-tiny (install_steps seeds index.html), and any DEPS recipe (OIDC env).
Promotes EXACTLY the tested version (never re-derives `latest_version`). The OLD known-good is
replaced ONLY here, after green (never lost on a red run)."""
domain = canonical.canonical_domain(recipe) domain = canonical.canonical_domain(recipe)
if not version: if not version:
print(f"WC5 promote: no tested release version for {recipe} — skip", flush=True) print(f"WC5 promote: no tested release version for {recipe} — skip", flush=True)
return return
wr.fetch_recipe(recipe) # ensure the release tag is present locally for the pinned checkout warm_reconcile.fetch_recipe(
recipe
) # no-op under CCCI_SKIP_FETCH; real fetch on the manual path
meta = meta_mod.load(recipe) meta = meta_mod.load(recipe)
# The cold run's deploy-count was already asserted + the countfile removed; don't perturb it. # The cold run's deploy-count was already asserted + the countfile removed; don't perturb it.
os.environ.pop("CCCI_DEPLOY_COUNT_FILE", None) os.environ.pop("CCCI_DEPLOY_COUNT_FILE", None)
# Pristine tree at the tag: discard the cold run's tier mutations + untracked overlay so the
# pinned `abra app new` clean-tree gate passes (deploy_app re-applies the overlay + auto-chaos).
abra.recipe_checkout(recipe, version)
subprocess.run(
["git", "-C", abra.recipe_dir(recipe), "clean", "-fd"], capture_output=True, text=True
)
print( print(
f"\n===== WC5 promote-on-green-cold: (re)seed canonical {recipe} @ {version} =====", f"\n===== WC5 promote-on-green-cold: (re)seed canonical {recipe} @ {version} =====",
flush=True, flush=True,
) )
# Faithful install wiring: deps (OIDC) then install_steps (via deploy_app's hook), same as cold.
declared = list(meta.DEPS)
if declared:
try:
_provision_deps(recipe, domain, None, declared)
print(f" WC5 promote: provisioned deps {declared} for warm {domain}", flush=True)
except Exception as e: # noqa: BLE001 — log; deploy may still come up for non-blocking deps
print(
f" WC5 promote: dep provisioning failed ({_scrub(str(e))}) — deploying anyway",
flush=True,
)
hook = discovery.install_steps(recipe, repo_local)
lifecycle.deploy_app( lifecycle.deploy_app(
recipe, recipe,
domain, domain,
version=version, version=version,
secrets=True, secrets=True,
install_steps_hook=hook,
deploy_timeout=int(meta.DEPLOY_TIMEOUT), deploy_timeout=int(meta.DEPLOY_TIMEOUT),
meta=meta, meta=meta,
) )
@ -962,6 +989,7 @@ def promote_canonical(recipe: str, head_ref: str | None, version: str | None) ->
deploy_timeout=meta.DEPLOY_TIMEOUT, deploy_timeout=meta.DEPLOY_TIMEOUT,
http_timeout=meta.HTTP_TIMEOUT, http_timeout=meta.HTTP_TIMEOUT,
) )
lifecycle.wait_ready_probes(meta, domain, timeout=int(meta.DEPLOY_TIMEOUT), op="install")
abra.undeploy(domain) abra.undeploy(domain)
_wait_undeployed(domain) _wait_undeployed(domain)
canonical.seed_canonical(recipe, version, commit=head_ref) canonical.seed_canonical(recipe, version, commit=head_ref)
@ -1500,7 +1528,7 @@ def main() -> int:
tagged = warm_reconcile.is_released_version(recipe, head_version) tagged = warm_reconcile.is_released_version(recipe, head_version)
if should_promote_canonical(recipe, ref, overall, quick=False, tagged=tagged): if should_promote_canonical(recipe, ref, overall, quick=False, tagged=tagged):
try: try:
promote_canonical(recipe, head_ref, head_version) promote_canonical(recipe, head_ref, head_version, repo_local)
except Exception as e: # noqa: BLE001 — promote is a post-green bonus; never fail a green run except Exception as e: # noqa: BLE001 — promote is a post-green bonus; never fail a green run
print( print(
f"!! WC5 promote failed (non-fatal; known-good unchanged): {_scrub(str(e))}", f"!! WC5 promote failed (non-fatal; known-good unchanged): {_scrub(str(e))}",