feat(canon): M1.2 release-tag trigger + faithful mirror-sync in the weekly sweep (§2.C/§2.D)
All checks were successful
continuous-integration/drone/push Build is passing

- warm_reconcile.sweep_decision(latest_tag, canon_version): pure new-release-tag trigger
  keyed on version_key (NOT commit) — new tag>canon → run; ==/older → skip no-new-version
  (even with untagged main commits); no tag → skip never-released. Unit-tested.
- scripts/recipe-mirror-sync.sh: faithful mirror sync (adapted from open-recipe-pr.sh
  --reconcile-only) — explicit coopcloud `upstream` remote (robust to inconsistent clone
  remotes), syncs main+TAGS, closes merged-upstream PRs, leaves unrelated PRs, bot-token auth.
- nightly_sweep rewritten: per enrolled recipe → mirror_sync → fetch → sweep_decision →
  run_on_tag (checkout the release tag + CCCI_SKIP_FETCH=1 so head IS the tag → tagged-promote
  gate passes, REF empty → promote allowed). Skips logged; run-twice → skip-all determinism.
- smoke-tested recipe-mirror-sync.sh live on custom-html: faithful no-op main/tags push,
  closed merged-upstream PR #2, left pending PR #5.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
autonomic-bot
2026-06-17 06:45:37 +00:00
parent f089c30040
commit a20890a363
4 changed files with 234 additions and 29 deletions

View File

@ -1,18 +1,25 @@
#!/usr/bin/env python3
"""Nightly full-cold sweep (Phase 2w / WC6).
"""Weekly canonical sweep (Phase 2w / WC6 + phase canon).
Invoked by the `nightly-sweep` systemd timer (nix/modules/nightly-sweep.nix). Order (plan WC6):
Invoked by the `nightly-sweep` systemd timer (nix/modules/nightly-sweep.nix), weekly. Order:
1. Roll warm/infra to latest, HEALTH-GATED (WC1.1): re-run the keycloak + traefik reconcilers
(warm_reconcile.py <app> — fetch latest recipe → deploy → health-gate → commit/rollback+alert).
This is the health-gated "warm/infra → latest" step; a full operator `nixos-rebuild switch` is
the config-deploy path, not the autonomous nightly's job (DECISIONS Phase-2w WC6).
2. FULL-COLD sweep across enrolled (WARM_CANONICAL) recipes, SERIAL (MAX_TESTS honored — one at a
time), each `RECIPE=<r> run_recipe_ci.py` on LATEST (no REF) → a green run promotes/refreshes
that recipe's canonical (WC5). Serves as the daily authoritative regression.
the config-deploy path, not the autonomous sweep's job (DECISIONS Phase-2w WC6).
2. Per ENROLLED (WARM_CANONICAL) recipe, SERIAL (one at a time):
(C) faithfully mirror-sync the recipe to coopcloud upstream (main+tags, close merged-upstream
PRs) via scripts/recipe-mirror-sync.sh — so the sweep measures TRUE upstream tags/latest.
(D) NEW-RELEASE-TAG trigger (canon §2.D): compare the latest release tag to the recipe's
canonical version (NOT commit). No new tag → SKIP (even if `main` has new untagged commits).
New tag → cold-test that TAGGED version (run_on_tag) and, on green, promote the canonical to
it (run_recipe_ci.promote_canonical, gated on green+cold+latest+enrolled+TAGGED, canon §2.A).
Run-twice determinism (canon M2): a second immediate sweep finds latest tag == canonical for every
recipe → SKIPs all (clean no-op, no CI rerun).
MUST NOT run while a test/Drone build is in flight: if a `run_recipe_ci.py` is already active, skip
this nightly (defer to the next) rather than pile on the single node. Bounded + serial. Exit 0 even
if some recipes fail (logs per-recipe results; a red recipe just doesn't advance its canonical).
this sweep (defer) rather than pile on the single node. Bounded + serial. Exit 0 even if some recipes
fail (logs per-recipe results; a red recipe just doesn't advance its canonical). NO AI at runtime —
pure script + systemd timer.
"""
from __future__ import annotations
@ -26,9 +33,11 @@ import sys
# against $CCCI_REPO (default /root/cc-ci) — the same checkout run_recipe_ci already runs from.
REPO = os.environ.get("CCCI_REPO", "/root/cc-ci")
sys.path.insert(0, os.path.join(REPO, "runner"))
from harness import canonical # noqa: E402
import warm_reconcile as wr # noqa: E402
from harness import abra, canonical # noqa: E402
WARM_APPS = ["keycloak", "traefik"] # the live-warm/infra reconcilers to roll first (health-gated)
MIRROR_SYNC = os.path.join(REPO, "scripts", "recipe-mirror-sync.sh")
def _here() -> str:
@ -53,33 +62,71 @@ def roll_warm_infra() -> None:
print(f"nightly: reconcile {app} rc={rc}", flush=True)
def sweep() -> int:
recipes = canonical.enrolled_recipes()
print(f"\n===== nightly cold sweep: enrolled canonicals = {recipes} =====", flush=True)
results: dict[str, int] = {}
for r in recipes:
print(f"\n===== nightly: full-cold {r} (latest) =====", flush=True)
env = dict(os.environ, RECIPE=r)
env.pop("REF", None) # latest, not a PR head
env.pop("CCCI_QUICK", None)
env.pop("MODE", None)
rc = subprocess.run(
[sys.executable, os.path.join(_here(), "run_recipe_ci.py")], env=env
).returncode
results[r] = rc
def mirror_sync(recipe: str) -> int:
"""canon §2.C: faithfully reconcile the recipe MIRROR to coopcloud upstream (main+tags, close
merged-upstream PRs). Best-effort — a sync failure is logged but does NOT abort the recipe's run
(the trigger still reads upstream tags via the abra fetch below). Returns the script rc."""
if not os.path.isfile(MIRROR_SYNC):
print(
f"nightly: {r} rc={rc} ({'green→canonical refreshed' if rc == 0 else 'red'})",
f"sweep: mirror-sync script missing ({MIRROR_SYNC}) — skipping sync for {recipe}",
flush=True,
)
return 0
rc = subprocess.run(["bash", MIRROR_SYNC, recipe]).returncode
if rc != 0:
print(f"sweep: mirror-sync {recipe} rc={rc} (non-fatal — continuing)", flush=True)
return rc
def run_on_tag(recipe: str, tag: str) -> int:
"""Run a full COLD CI on the recipe at the published RELEASE TAG `tag` (canon §2.D: the sweep
tests releases, not arbitrary `main` commits). Checks out the tag in the canonical recipe clone
and runs run_recipe_ci with CCCI_SKIP_FETCH=1 so the head under test IS the tag (head_version =
tag → the tagged-promote gate passes; REF stays empty → promote allowed). A green run promotes
the canonical to that tagged version (run_recipe_ci.should_promote_canonical)."""
abra.recipe_checkout(recipe, tag)
env = dict(os.environ, RECIPE=recipe, CCCI_SKIP_FETCH="1")
for k in ("REF", "CCCI_QUICK", "MODE", "VERSION"):
env.pop(k, None) # cold (no PR head), full mode, head = the staged tag checkout
return subprocess.run(
[sys.executable, os.path.join(_here(), "run_recipe_ci.py")], env=env
).returncode
def sweep() -> int:
recipes = canonical.enrolled_recipes()
print(f"\n===== weekly canonical sweep: enrolled = {recipes} =====", flush=True)
results: dict[str, str] = {}
for r in recipes:
print(f"\n===== sweep: {r} =====", flush=True)
# C. faithful mirror-sync to upstream (best-effort) so we measure true upstream tags/latest.
mirror_sync(r)
# Ensure the local recipe clone reflects upstream tags for the trigger computation.
try:
wr.fetch_recipe(r)
except Exception as e: # noqa: BLE001 — a fetch failure is logged; trigger uses what's local
print(f"sweep: {r} fetch_recipe failed (non-fatal): {e}", flush=True)
# D. new-release-tag trigger: latest release tag vs canonical version (NOT commit).
latest = wr.latest_version(wr.recipe_tags(r))
canon = (canonical.read_registry(r) or {}).get("version")
action, reason = wr.sweep_decision(latest, canon)
if action == "skip":
results[r] = f"SKIP ({reason})"
print(f"sweep: {r} SKIP — {reason}", flush=True)
continue
print(f"sweep: {r} RUN — {reason}; cold-testing tagged release {latest}", flush=True)
rc = run_on_tag(r, latest)
results[r] = "PASS (promoted)" if rc == 0 else "FAIL (canonical unchanged)"
print(f"sweep: {r} rc={rc} ({results[r]})", flush=True)
# WC8 disk hygiene: drop warm data for de-enrolled canonicals; log the disk budget.
pruned = canonical.prune_stale()
if pruned:
print(f"nightly: pruned stale warm data for de-enrolled canonicals: {pruned}", flush=True)
print(f"sweep: pruned stale warm data for de-enrolled canonicals: {pruned}", flush=True)
df = subprocess.run(["df", "-h", "/"], capture_output=True, text=True)
print(f"nightly: disk / →\n{df.stdout.strip()}", flush=True)
print("\n===== nightly sweep summary =====", flush=True)
for r, rc in results.items():
print(f" {r}: {'PASS' if rc == 0 else 'FAIL'}", flush=True)
print(f"sweep: disk / →\n{df.stdout.strip()}", flush=True)
print("\n===== weekly sweep summary =====", flush=True)
for r, status in results.items():
print(f" {r}: {status}", flush=True)
return 0 # the sweep itself succeeds; per-recipe reds are reported, not fatal

View File

@ -185,6 +185,29 @@ def latest_version(tags) -> str | None:
return s[-1] if s else None
def sweep_decision(latest_tag: str | None, canon_version: str | None) -> tuple[str, str]:
"""Pure new-release-tag TRIGGER for the weekly sweep (phase canon §2.D), keyed on the latest
RELEASE TAG vs the recipe's canonical version — NOT on commits. Returns (action, reason) where
action is "run" or "skip":
- no release tag at all → skip ("never-released") — recipe never cut a release
- no canonical yet → run (seed at latest_tag)
- latest_tag <= canonical (by key) → skip ("no-new-version") — even if `main` has NEW
UNTAGGED commits: the sweep tests releases, not commits
- latest_tag > canonical (by key) → run (cold-test the new tagged version, then promote)
This is the run-twice determinism property: after a green run promotes canonical→latest_tag, a
second immediate sweep finds latest_tag == canonical for every recipe → skips all (clean no-op)."""
if not latest_tag:
return ("skip", "never-released (no release tag)")
if not canon_version:
return ("run", f"no canonical yet → seed at {latest_tag}")
if version_key(latest_tag) <= version_key(canon_version):
return (
"skip",
f"no-new-version (latest release {latest_tag} <= canonical {canon_version})",
)
return ("run", f"new release {latest_tag} > canonical {canon_version}")
def is_released_version(recipe: str, version: str | None) -> bool:
"""True iff `version` corresponds to a PUBLISHED RELEASE TAG of the recipe (phase canon §2.A:
the canonical may only ever advance to a real release — never an arbitrary untagged `main`