diff --git a/runner/adv_check4.py b/runner/adv_check4.py deleted file mode 100644 index 4e6808b..0000000 --- a/runner/adv_check4.py +++ /dev/null @@ -1,62 +0,0 @@ -#!/usr/bin/env python3 -"""ADVERSARY check4 (WC1 concurrency + reaping, deploy-free). Cold-run from my own clone. -Asserts: realm_for distinct per run-hex; realms create on live warm kc + oidc_password_grant returns -a JWT each; reap_orphaned_realms keeps the live hex and deletes the orphans. Leaves kc clean.""" -import sys, os -sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) -from harness import warm, sso - -D = warm.warm_domain("keycloak") -assert D == "warm-keycloak.ci.commoninternet.net", D -fails = [] - -# 1) realm_for distinct per run-hex (two concurrent same-recipe runs never collide) -r_a = warm.realm_for("lasuite-docs", "lasu-aaa111.ci.commoninternet.net") -r_b = warm.realm_for("lasuite-docs", "lasu-bbb222.ci.commoninternet.net") -print(f"realm_for aaa111={r_a!r} bbb222={r_b!r}") -if r_a != "lasuite-docs-aaa111": fails.append(f"realm_for aaa111 -> {r_a}") -if r_b != "lasuite-docs-bbb222": fails.append(f"realm_for bbb222 -> {r_b}") -if r_a == r_b: fails.append("realm_for collision") - -admin = sso.admin_password_inside(D) -before = sorted(sso.list_realms(D, admin)) -print(f"realms BEFORE: {before}") - -# 2) create three realms; each must yield a working password-grant JWT -hexes = ["aaa111", "bbb222", "ccc333"] -created = [] -for h in hexes: - realm = f"advchk-{h}" - creds = sso.setup_keycloak_realm(D, realm, f"client-{h}", redirect_uris=["*"], web_origins=["*"]) - created.append(realm) - tok = sso.oidc_password_grant(creds) - ok = isinstance(tok, str) and tok.count(".") == 2 and len(tok) > 40 - print(f" {realm}: JWT={'OK' if ok else 'BAD'} (len={len(tok)}, dots={tok.count('.')})") - if not ok: fails.append(f"{realm} no/!JWT") - # confirm discovery issuer too (independent re-check) - disc = sso.assert_discovery_endpoint(creds) - if disc.get("issuer") != f"https://{D}/realms/{realm}": fails.append(f"{realm} issuer") - -mid = sorted(sso.list_realms(D, admin)) -print(f"realms AFTER CREATE: {mid}") -for realm in created: - if realm not in mid: fails.append(f"{realm} not present after create") - -# 3) reap with live_hexes={aaa111}: must delete bbb222+ccc333, KEEP aaa111 -reaped = sorted(sso.reap_orphaned_realms(D, live_hexes={"aaa111"})) -print(f"REAPED (live=aaa111): {reaped}") -if reaped != ["advchk-bbb222", "advchk-ccc333"]: fails.append(f"reaped set wrong: {reaped}") -after = sorted(sso.list_realms(D, admin)) -print(f"realms AFTER REAP: {after}") -if "advchk-aaa111" not in after: fails.append("aaa111 wrongly reaped (live run would lose its realm)") -if "advchk-bbb222" in after or "advchk-ccc333" in after: fails.append("orphan not reaped") - -# cleanup: remove aaa111 too; leave kc with only master (+ any pre-existing non-advchk realms) -sso.delete_keycloak_realm(D, "advchk-aaa111", admin) -final = sorted(sso.list_realms(D, admin)) -print(f"realms FINAL (after cleanup): {final}") -leftover = [r for r in final if r.startswith("advchk-")] -if leftover: fails.append(f"leftover advchk realms: {leftover}") - -print("\nRESULT:", "FAIL " + "; ".join(fails) if fails else "PASS — all check4 assertions hold") -sys.exit(1 if fails else 0) diff --git a/runner/adv_check5.py b/runner/adv_check5.py deleted file mode 100644 index 045e02a..0000000 --- a/runner/adv_check5.py +++ /dev/null @@ -1,118 +0,0 @@ -#!/usr/bin/env python3 -"""ADVERSARY check5 — WC1.1 MARQUEE health-gated rollback with data integrity (live, cold). -Run via cc-ci-run from /root/cc-ci-adv-verify on cc-ci (PATH has abra/docker/git). - -Independent reproduce (does NOT trust the Builder's run): - A. plant a MARKER realm on warm kc (the data whose survival proves integrity) - B. stage fake tag 10.7.9+26.6.2 at the good commit -> reconcile -> expect HEALTHY upgrade, - last_good advances to 10.7.9, marker preserved - C. stage broken commit (KC_HOSTNAME=:::bad-host:::) tagged 10.7.10+26.6.2 -> reconcile -> - expect ROLLBACK to 10.7.9, kc HEALTHY, marker INTACT, last_good NOT advanced, rollback alert - D. cleanup: delete fake tags + broken commit, reconcile back to canonical 10.7.1+26.6.2, delete marker -""" -import json, os, subprocess, sys, time -sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) -from harness import sso -import warm_reconcile as wr - -RECIPE, APP = "keycloak", "keycloak" -D = "warm-keycloak.ci.commoninternet.net" -RDIR = os.path.expanduser("~/.abra/recipes/keycloak") -GOOD = "04400df" # HEAD = chore: upgrade to 10.7.1+26.6.2 -CANON = "10.7.1+26.6.2" -T_GOOD = "10.7.9+26.6.2" # fake, points at good commit -T_BAD = "10.7.10+26.6.2" # fake, points at broken-KC_HOSTNAME commit -MARKER = "advmarker-rollback" -ALERTS = os.path.join(wr.warmsnap.DEFAULT_WARM_ROOT, "alerts") -fails = [] - -def git(*a, check=True): - return subprocess.run(["git", "-C", RDIR, "-c", "user.email=adv@cc-ci", "-c", "user.name=adv", - *a], capture_output=True, text=True, check=check) - -def reconcile(): - """Run the reconciler exactly as the unit would, but CCCI_SKIP_FETCH so my staged tags stand.""" - env = {**os.environ, "CCCI_SKIP_FETCH": "1"} - r = subprocess.run(["python3", os.path.join(os.path.dirname(__file__), "warm_reconcile.py"), APP], - capture_output=True, text=True, env=env, timeout=1800) - print(r.stdout[-1500:]); print(r.stderr[-600:], file=sys.stderr) - for line in r.stdout.splitlines(): - if line.startswith("RECONCILE RESULT:"): - return line.split(":", 1)[1].strip() - return f"" - -def health(): - return wr.health_code(wr.SPECS[APP]) - -def realms(): - return sorted(sso.list_realms(D)) - -def last_good(): - return wr.read_last_good(RECIPE) - -def type_env(): - return wr.current_version(D) - -print(f"=== START: TYPE={type_env()} last_good={last_good()} health={health()} realms={realms()}") - -# ---- A. plant marker realm (data) ---- -sso.setup_keycloak_realm(D, MARKER, "marker-client", redirect_uris=["*"], web_origins=["*"]) -assert MARKER in realms(), "marker realm not created" -print(f"[A] marker realm planted: {MARKER in realms()}") - -# ---- B. healthy upgrade to fake 10.7.9 ---- -git("tag", "-a", "-m", "adv", T_GOOD, GOOD + "^{commit}", check=False) -wr.write_last_good(RECIPE, CANON) # baseline last_good = canonical -print(f"[B] staged {T_GOOD}@good; reconcile #1 (expect upgrade->{T_GOOD})...") -res1 = reconcile() -print(f"[B] result={res1!r} last_good={last_good()} health={health()} markerIntact={MARKER in realms()}") -if not res1.startswith("upgraded:"): fails.append(f"B not upgraded: {res1}") -if last_good() != T_GOOD: fails.append(f"B last_good={last_good()} != {T_GOOD}") -if health() != 200: fails.append(f"B health={health()}") -if MARKER not in realms(): fails.append("B marker lost on healthy upgrade") - -# ---- C. broken latest 10.7.10 -> rollback ---- -import shutil -compose = os.path.join(RDIR, "compose.yml") -bak = compose + ".advbak"; shutil.copy(compose, bak) -txt = open(compose).read().replace("KC_HOSTNAME=https://${DOMAIN}", "KC_HOSTNAME=:::bad-host:::") -open(compose, "w").write(txt) -git("commit", "-am", "adv broken KC_HOSTNAME") -broken_sha = git("rev-parse", "HEAD").stdout.strip() -git("tag", "-a", "-m", "adv", T_BAD, broken_sha) -git("reset", "--hard", GOOD) # branch back to good; tag keeps the broken commit alive -shutil.copy(bak, compose); os.remove(bak) -alerts_before = set(os.listdir(ALERTS)) if os.path.isdir(ALERTS) else set() -print(f"[C] staged broken {T_BAD}@{broken_sha[:7]}; reconcile #2 (expect rollback->{T_GOOD})... (broken deploy may take minutes)") -res2 = reconcile() -alerts_after = set(os.listdir(ALERTS)) if os.path.isdir(ALERTS) else set() -new_alerts = sorted(alerts_after - alerts_before) -print(f"[C] result={res2!r} last_good={last_good()} health={health()} markerIntact={MARKER in realms()} newAlerts={new_alerts}") -if not res2.startswith("rolled-back:"): fails.append(f"C not rolled-back: {res2}") -if health() != 200: fails.append(f"C kc unhealthy after rollback: {health()}") -if MARKER not in realms(): fails.append("C MARKER LOST — data integrity FAILED on rollback") -if last_good() != T_GOOD: fails.append(f"C last_good advanced to {last_good()} (should stay {T_GOOD})") -rb = [a for a in new_alerts if "rollback" in a] -if not rb: fails.append("C no rollback alert written") -else: - rec = json.load(open(os.path.join(ALERTS, rb[0]))) - print(f"[C] rollback alert: {rec}") - if rec.get("attempted") != T_BAD: fails.append(f"alert attempted={rec.get('attempted')}") - if rec.get("last_good") != T_GOOD: fails.append(f"alert last_good={rec.get('last_good')}") - if rec.get("recovered") is not True: fails.append(f"alert recovered={rec.get('recovered')}") - -# ---- D. cleanup + restore canonical ---- -print("[D] cleanup: delete fake tags, reconcile back to canonical, delete marker...") -git("tag", "-d", T_GOOD, check=False); git("tag", "-d", T_BAD, check=False) -git("reset", "--hard", GOOD) -res3 = reconcile() # latest now = real CANON; current(env)=10.7.9 -> redeploys to CANON -print(f"[D] result={res3!r} TYPE={type_env()} last_good={last_good()} health={health()}") -sso.delete_keycloak_realm(D, MARKER) -if type_env() != CANON: fails.append(f"D not restored to canonical TYPE: {type_env()}") -if health() != 200: fails.append(f"D final health={health()}") -fin = realms() -if MARKER in fin: fails.append("D marker not cleaned") -print(f"=== END: TYPE={type_env()} last_good={last_good()} health={health()} realms={fin}") - -print("\nRESULT:", "FAIL: " + "; ".join(fails) if fails else "PASS — WC1.1 marquee: healthy upgrade commits, broken latest rolls back with marker realm (data) INTACT, last_good not advanced, alert correct, canonical restored") -sys.exit(1 if fails else 0) diff --git a/runner/adv_check6.py b/runner/adv_check6.py deleted file mode 100644 index aca1bb8..0000000 --- a/runner/adv_check6.py +++ /dev/null @@ -1,81 +0,0 @@ -#!/usr/bin/env python3 -"""ADVERSARY check6 — WC1.2 pre-deploy SAFETY gate (live, cold). A hold must do NO deploy. - (a) MAJOR fake tag 11.0.0+27.0.0 -> held-major, alert, kc untouched (TYPE same, 200) - (b) minor tag 10.7.2+26.6.3 + releaseNotes flagging manual migration -> held-manual-migration, - alert CARRIES the notes, kc untouched -Leaves the recipe + kc exactly as found (canonical, no fake tags/notes).""" -import json, os, subprocess, sys -sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) -import warm_reconcile as wr - -D = "warm-keycloak.ci.commoninternet.net" -RDIR = os.path.expanduser("~/.abra/recipes/keycloak") -GOOD = "04400df"; CANON = "10.7.1+26.6.2" -T_MAJOR = "11.0.0+27.0.0"; T_MINOR = "10.7.2+26.6.3" -NOTES = os.path.join(RDIR, "releaseNotes", T_MINOR + ".md") -ALERTS = os.path.join(wr.warmsnap.DEFAULT_WARM_ROOT, "alerts") -fails = [] - -def git(*a, check=True): - return subprocess.run(["git", "-C", RDIR, "-c", "user.email=adv@cc-ci", "-c", "user.name=adv", *a], - capture_output=True, text=True, check=check) -def reconcile(): - env = {**os.environ, "CCCI_SKIP_FETCH": "1"} - r = subprocess.run(["python3", os.path.join(os.path.dirname(__file__), "warm_reconcile.py"), "keycloak"], - capture_output=True, text=True, env=env, timeout=300) - for line in r.stdout.splitlines(): - if line.startswith("RECONCILE RESULT:"): return line.split(":", 1)[1].strip() - return f" {r.stdout[-300:]} {r.stderr[-300:]}" -def alerts_now(): return set(os.listdir(ALERTS)) if os.path.isdir(ALERTS) else set() - -type0, lg0 = wr.current_version(D), wr.read_last_good("keycloak") -print(f"START TYPE={type0} last_good={lg0} health={wr.health_code(wr.SPECS['keycloak'])}") -assert type0 == CANON, f"precond: kc not canonical ({type0})" - -# (a) MAJOR -> held-major, no deploy -git("reset", "--hard", GOOD) -git("tag", "-a", "-m", "adv", T_MAJOR, GOOD + "^{commit}", check=False) -a0 = alerts_now() -res_a = reconcile() -new_a = sorted(alerts_now() - a0) -print(f"(a) MAJOR {T_MAJOR}: result={res_a!r} TYPE={wr.current_version(D)} new_alerts={new_a}") -if not res_a.startswith("held-major"): fails.append(f"(a) not held-major: {res_a}") -if wr.current_version(D) != CANON: fails.append(f"(a) kc TYPE changed to {wr.current_version(D)}") -hm = [x for x in new_a if "held-major" in x] -if not hm: fails.append("(a) no held-major alert") -else: - rec = json.load(open(os.path.join(ALERTS, hm[0]))) - if rec.get("latest") != T_MAJOR: fails.append(f"(a) alert latest={rec.get('latest')}") - if "release_notes" not in rec: fails.append("(a) alert missing release_notes field") -git("tag", "-d", T_MAJOR, check=False) - -# (b) minor + manual-migration notes -> held-manual-migration, no deploy, alert carries notes -git("tag", "-a", "-m", "adv", T_MINOR, GOOD + "^{commit}", check=False) -os.makedirs(os.path.dirname(NOTES), exist_ok=True) -open(NOTES, "w").write("# 10.7.2\n\nThis release requires a **manual migration**: run the DB upgrade by hand.\n") -b0 = alerts_now() -res_b = reconcile() -new_b = sorted(alerts_now() - b0) -print(f"(b) MINOR+migration {T_MINOR}: result={res_b!r} TYPE={wr.current_version(D)} new_alerts={new_b}") -if not res_b.startswith("held-manual-migration"): fails.append(f"(b) not held-manual-migration: {res_b}") -if wr.current_version(D) != CANON: fails.append(f"(b) kc TYPE changed to {wr.current_version(D)}") -hmm = [x for x in new_b if "manual-migration" in x] -if not hmm: fails.append("(b) no held-manual-migration alert") -else: - rec = json.load(open(os.path.join(ALERTS, hmm[0]))) - if "manual migration" not in (rec.get("release_notes") or "").lower(): - fails.append(f"(b) alert release_notes lacks the notes: {rec.get('release_notes')!r}") - -# cleanup -git("tag", "-d", T_MINOR, check=False) -if os.path.exists(NOTES): os.remove(NOTES) -git("reset", "--hard", GOOD) -faketags = [t for t in git("tag").stdout.split() if t in (T_MAJOR, T_MINOR)] -print(f"END TYPE={wr.current_version(D)} last_good={wr.read_last_good('keycloak')} " - f"health={wr.health_code(wr.SPECS['keycloak'])} faketags={faketags} notes_exists={os.path.exists(NOTES)}") -if wr.current_version(D) != CANON: fails.append(f"END not canonical: {wr.current_version(D)}") -if faketags: fails.append(f"leftover fake tags {faketags}") -if wr.read_last_good("keycloak") != CANON: fails.append(f"last_good moved to {wr.read_last_good('keycloak')}") - -print("\nRESULT:", "FAIL: " + "; ".join(fails) if fails else "PASS — WC1.2 holds major + manual-migration with notes-carrying alert; kc untouched (no deploy/last_good churn)") -sys.exit(1 if fails else 0) diff --git a/runner/adv_check_wc2.py b/runner/adv_check_wc2.py deleted file mode 100644 index 48aba1c..0000000 --- a/runner/adv_check_wc2.py +++ /dev/null @@ -1,87 +0,0 @@ -#!/usr/bin/env python3 -"""ADVERSARY WC2+WC3 cold reproduce (data-warm canonical round-trip). Run via cc-ci-run from -/root/cc-ci-adv-verify. Drives the cycle MYSELF (does not trust the Builder's single run): - 1. deploy_canonical -> reattach the retained volume; confirm the Builder's known-good marker is served - 2. WC2: write MY OWN marker -> undeploy_keep_volume (assert app DOWN + volume RETAINED) -> - deploy_canonical -> MY marker SURVIVES (data-warm reattach) - 3. WC3: mutate (delete the known-good marker) -> undeploy -> warmsnap.restore -> deploy -> - known-good marker BACK and my marker GONE (restore round-trips the exact known-good) - 4. leave it idle (as found): undeploy_keep_volume; content == known-good -""" -import os, subprocess, sys -sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) -from harness import canonical, lifecycle, warmsnap - -R = "custom-html" -D = "warm-custom-html.ci.commoninternet.net" -HTML = "/usr/share/nginx/html" -KG = "WC2-DATA-MARKER-7f3a9c" # the Builder's known-good marker string -A = "ADV-OWN-MARKER-a1b2c3" # my own marker -fails = [] - -def sh(*cmd): return lifecycle.exec_in_app(D, list(cmd), service="app") -def has_service(): - return bool(subprocess.run(["docker","service","ls","--format","{{.Name}}"], - capture_output=True,text=True).stdout and - any("custom-html" in n for n in subprocess.run(["docker","service","ls","--format","{{.Name}}"], - capture_output=True,text=True).stdout.split())) -def has_volume(): - out = subprocess.run(["docker","volume","ls","--format","{{.Name}}"],capture_output=True,text=True).stdout - return any("warm-custom-html" in n and n.endswith("_content") for n in out.split()) -def serving(): - r = subprocess.run(["curl","-sk","--resolve",f"{D}:443:127.0.0.1","-o","/dev/null", - "-w","%{http_code}","--max-time","10",f"https://{D}/"],capture_output=True,text=True) - return r.stdout.strip() - -print(f"START canonical={canonical.read_registry(R)} has_canonical={canonical.has_canonical(R)} " - f"has_snapshot={warmsnap.has_snapshot(R)} service={has_service()} volume={has_volume()}") -if not warmsnap.has_snapshot(R): fails.append("no snapshot present at start") - -# 1. reattach + confirm Builder's known-good marker survived their run -canonical.deploy_canonical(R); -listing = sh("ls", HTML) -kg_files = sh("grep","-rl",KG,HTML).split() -print(f"[1] deployed; serving={serving()} html={listing.split()} kg_marker_files={kg_files}") -if not kg_files: fails.append("Builder known-good marker not found after reattach") -kg_file = kg_files[0] if kg_files else None -if serving() != "200": fails.append(f"[1] not serving 200: {serving()}") - -# 2. WC2: my own marker through undeploy-keep-volume -> redeploy -sh("sh","-c",f"echo {A} > {HTML}/adv_own.txt") -got = sh("cat",f"{HTML}/adv_own.txt").strip() -print(f"[2] wrote my marker: {got!r}") -canonical.undeploy_keep_volume(R) -svc_down, vol_kept = not has_service(), has_volume() -print(f"[2] after undeploy_keep_volume: service_down={svc_down} volume_retained={vol_kept} " - f"registry_status={ (canonical.read_registry(R) or {}).get('status') }") -if not svc_down: fails.append("[2] app still has a service after undeploy_keep_volume") -if not vol_kept: fails.append("[2] content volume NOT retained (data-warm broken)") -canonical.deploy_canonical(R) -survived = sh("cat",f"{HTML}/adv_own.txt").strip() -print(f"[2] after redeploy: my marker={survived!r}") -if survived != A: fails.append(f"[2] my marker did NOT survive data-warm round-trip: {survived!r}") - -# 3. WC3: mutate (delete known-good marker) -> undeploy -> restore -> deploy -> known-good BACK -if kg_file: - sh("rm", "-f", kg_file) - mutated_gone = not sh("grep", "-rl", KG, HTML).split() - print(f"[3] mutated: deleted known-good marker file {kg_file}; gone_now={mutated_gone}") -canonical.undeploy_keep_volume(R) -warmsnap.restore(R, D) -canonical.deploy_canonical(R) -kg_back = bool(sh("grep", "-rl", KG, HTML).split()) -a_present = "adv_own.txt" in sh("ls", HTML).split() -print(f"[3] after restore+deploy: known_good_back={kg_back} my_marker_still_there={a_present}") -if not kg_back: fails.append("[3] known-good marker NOT restored (WC3 restore failed)") -if a_present: fails.append("[3] my marker still present after restore — restore not exact known-good") - -# 4. leave idle as found -canonical.undeploy_keep_volume(R) -print(f"END registry={canonical.read_registry(R)} service={has_service()} volume={has_volume()}") -if has_service(): fails.append("[4] left a running service (should be idle)") -if not has_volume(): fails.append("[4] volume not retained at end") - -print("\nRESULT:", "FAIL: "+"; ".join(fails) if fails else - "PASS — WC2 data-warm round-trip (my own marker survives undeploy-keep-volume+reattach) + WC3 " - "restore round-trips the exact known-good; left idle with volume retained") -sys.exit(1 if fails else 0) diff --git a/runner/adv_check_wc3.py b/runner/adv_check_wc3.py deleted file mode 100644 index 69acd6d..0000000 --- a/runner/adv_check_wc3.py +++ /dev/null @@ -1,57 +0,0 @@ -#!/usr/bin/env python3 -"""ADVERSARY WC3 restore round-trip + recover custom-html canonical to known-good idle. -State on entry (left by adv_check_wc2 crash): app UP (warm), known-good marker file deleted, my -marker adv_own.txt present. This driver: restore the known-good snapshot -> known-good marker BACK, -my marker GONE (restore = exact known-good) -> leave idle. Also diagnoses HTTPS serving.""" -import os, subprocess, sys -sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) -from harness import canonical, lifecycle, warmsnap - -R = "custom-html"; D = "warm-custom-html.ci.commoninternet.net"; HTML = "/usr/share/nginx/html" -KG_FILE = "wc2-marker.txt"; KG_STR = "WC2-DATA-MARKER-7f3a9c"; A_FILE = "adv_own.txt" -fails = [] -def sh(*c): return lifecycle.exec_in_app(D, list(c), service="app") -def ls(): return sh("ls", HTML).split() -def has_service(): - out = subprocess.run(["docker","service","ls","--format","{{.Name}}"],capture_output=True,text=True).stdout - return any("custom-html" in n for n in out.split()) -def has_volume(): - out = subprocess.run(["docker","volume","ls","--format","{{.Name}}"],capture_output=True,text=True).stdout - return any("warm-custom-html" in n and n.endswith("_content") for n in out.split()) -def code(path): - return subprocess.run(["curl","-sk","--resolve",f"{D}:443:127.0.0.1","-o","/dev/null","-w","%{http_code}", - "--max-time","10",f"https://{D}{path}"],capture_output=True,text=True).stdout.strip() - -# ensure app is up to inspect entry state (it should be, from the crash) -if not has_service(): - canonical.deploy_canonical(R) -entry = ls() -print(f"ENTRY html={entry} (expect adv_own.txt present, {KG_FILE} deleted)") -# serving diagnosis -print(f"SERVING: /={code('/')} /index.html={code('/index.html')} /{KG_FILE}={code('/'+KG_FILE)} /{A_FILE}={code('/'+A_FILE)}") - -# WC3 restore round-trip -canonical.undeploy_keep_volume(R) -warmsnap.restore(R, D) -canonical.deploy_canonical(R) -after = ls() -kg_back = KG_FILE in after -a_gone = A_FILE not in after -kg_content = sh("cat", f"{HTML}/{KG_FILE}").strip() if kg_back else "" -print(f"AFTER RESTORE html={after} kg_back={kg_back} kg_content={kg_content!r} my_marker_gone={a_gone}") -if not kg_back: fails.append("WC3: known-good marker NOT restored") -if KG_STR not in kg_content: fails.append(f"WC3: restored marker content wrong: {kg_content!r}") -if not a_gone: fails.append("WC3: my marker still present — restore not exact known-good") - -# leave idle as found -canonical.undeploy_keep_volume(R) -fin = canonical.read_registry(R) -print(f"END registry_status={fin.get('status')} version={fin.get('version')} service={has_service()} " - f"volume={has_volume()} snapshot={warmsnap.has_snapshot(R)}") -if has_service(): fails.append("END: service still running (should be idle)") -if not has_volume(): fails.append("END: volume not retained") -if fin.get("status") != "idle": fails.append(f"END: status={fin.get('status')} (want idle)") - -print("\nRESULT:", "FAIL: "+"; ".join(fails) if fails else - "PASS — WC3 restore round-trips the EXACT known-good (marker back, content correct, my mutation gone); canonical left idle+retained") -sys.exit(1 if fails else 0) diff --git a/runner/adv_quickfail_verify.py b/runner/adv_quickfail_verify.py deleted file mode 100644 index 0e7d584..0000000 --- a/runner/adv_quickfail_verify.py +++ /dev/null @@ -1,49 +0,0 @@ -#!/usr/bin/env python3 -"""ADVERSARY: after the --quick FAIL run, independently verify the rollback restored the EXACT -known-good (data + healthy app), the known-good was NOT promoted, then leave idle.""" -import os, subprocess, sys, hashlib -sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) -from harness import canonical, lifecycle, warmsnap - -R="custom-html"; D="warm-custom-html.ci.commoninternet.net"; HTML="/usr/share/nginx/html" -KG_STR="WC2-DATA-MARKER-7f3a9c"; BASE_SNAP_SHA="9ef62bdf11c6060c" -fails=[] -def sh(*c): return lifecycle.exec_in_app(D, list(c), service="app") -def snap_sha(): - import glob - t=glob.glob(f"/var/lib/ci-warm/{R}/snapshot/volumes/*.tar") - return hashlib.sha256(open(t[0],"rb").read()).hexdigest()[:16] if t else "NONE" -def code(p): - return subprocess.run(["curl","-sk","--resolve",f"{D}:443:127.0.0.1","-o","/dev/null","-w","%{http_code}", - "--max-time","10",f"https://{D}{p}"],capture_output=True,text=True).stdout.strip() - -reg=canonical.read_registry(R) or {} -ssha=snap_sha() -print(f"registry version={reg.get('version')} status={reg.get('status')} snapshot_sha={ssha} (baseline {BASE_SNAP_SHA})") -if reg.get('version')!="1.11.0+1.29.0": fails.append(f"known-good promoted/changed: {reg.get('version')}") -if reg.get('status')!="idle": fails.append(f"not idle: {reg.get('status')}") -if ssha!=BASE_SNAP_SHA: fails.append(f"snapshot changed: {ssha} != {BASE_SNAP_SHA}") - -# bring canonical up, confirm restored data + healthy + non-broken image -canonical.deploy_canonical(R) -lifecycle.wait_healthy(D, ok_codes=(200,), path="/", deploy_timeout=300, http_timeout=20) -html=sh("ls",HTML).split() -kg_files=[f for f in html if f=="wc2-marker.txt"] -kg_content=sh("cat",f"{HTML}/wc2-marker.txt").strip() if kg_files else "" -img=subprocess.run(["docker","service","ls","--format","{{.Name}} {{.Image}}"],capture_output=True,text=True).stdout -serving=code("/") -print(f"AFTER deploy: html={html} kg_content={kg_content!r} serving/={serving}") -print(f"image: {[l for l in img.splitlines() if 'custom-html' in l]}") -if not kg_files: fails.append("rollback did NOT restore known-good marker file") -if KG_STR not in kg_content: fails.append(f"restored marker content wrong: {kg_content!r}") -if serving!="200": fails.append(f"rolled-back app not serving 200: {serving}") -if "99.99.99-doesnotexist" in img: fails.append("BROKEN image still deployed after rollback") -if "nginx:1.29.0" not in img: fails.append(f"canonical not on known-good image: {img!r}") - -# leave idle -canonical.undeploy_keep_volume(R) -print(f"END status={(canonical.read_registry(R) or {}).get('status')} " - f"service={'custom-html' in subprocess.run(['docker','service','ls','--format','{{.Name}}'],capture_output=True,text=True).stdout}") -print("\nRESULT:", "FAIL: "+"; ".join(fails) if fails else - "PASS — --quick FAIL rolled back to EXACT known-good (marker+content restored, app healthy on nginx:1.29.0, broken image gone), known-good UNCHANGED+snapshot byte-identical (never promoted); left idle") -sys.exit(1 if fails else 0) diff --git a/runner/adv_recover.py b/runner/adv_recover.py deleted file mode 100644 index ef4ffa4..0000000 --- a/runner/adv_recover.py +++ /dev/null @@ -1,43 +0,0 @@ -#!/usr/bin/env python3 -"""Recover warm keycloak to canonical 10.7.1+26.6.2 healthy after adv_check5's cleanup-script bug -left it undeployed on TYPE=10.7.9 (a tag I deleted). NOT a reconciler defect — my test's fault. -Steps: recreate 10.7.9 tag (so abra can resolve the current from-version) -> deploy 10.7.9 (kc back -up, marker present) -> delete marker realm -> deploy canonical 10.7.1 -> set last_good -> drop 10.7.9 -tag -> verify clean.""" -import os, subprocess, sys -sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) -from harness import sso -import warm_reconcile as wr - -D = "warm-keycloak.ci.commoninternet.net" -RDIR = os.path.expanduser("~/.abra/recipes/keycloak") -GOOD = "04400df"; CANON = "10.7.1+26.6.2"; T9 = "10.7.9+26.6.2"; MARKER = "advmarker-rollback" - -def git(*a, check=True): - return subprocess.run(["git", "-C", RDIR, "-c", "user.email=adv@cc-ci", "-c", "user.name=adv", *a], - capture_output=True, text=True, check=check) - -print(f"START TYPE={wr.current_version(D)} health={wr.health_code(wr.SPECS['keycloak'])}") -git("reset", "--hard", GOOD) -git("tag", "-a", "-m", "adv", T9, GOOD + "^{commit}", check=False) -print("recreated 10.7.9 tag; deploying 10.7.9 to bring kc back...") -wr.deploy_version("keycloak", D, T9, 900) -assert wr.wait_healthy(wr.SPECS["keycloak"]), "kc not healthy on 10.7.9" -realms = sorted(sso.list_realms(D)) -print(f"kc healthy on 10.7.9; realms={realms}") -if MARKER in realms: - sso.delete_keycloak_realm(D, MARKER); print("deleted marker realm") -print("deploying canonical 10.7.1...") -wr.deploy_version("keycloak", D, CANON, 900) -assert wr.wait_healthy(wr.SPECS["keycloak"]), "kc not healthy on canonical" -wr.write_last_good("keycloak", CANON) -git("tag", "-d", T9, check=False) -git("reset", "--hard", GOOD) -final_realms = sorted(sso.list_realms(D)) -faketags = [t for t in git("tag").stdout.split() if t in (T9, "10.7.10+26.6.2")] -print(f"DONE TYPE={wr.current_version(D)} last_good={wr.read_last_good('keycloak')} " - f"health={wr.health_code(wr.SPECS['keycloak'])} realms={final_realms} faketags={faketags}") -ok = (wr.current_version(D) == CANON and wr.read_last_good("keycloak") == CANON - and wr.health_code(wr.SPECS["keycloak"]) == 200 and final_realms == ["master"] and not faketags) -print("RECOVER:", "OK" if ok else "INCOMPLETE") -sys.exit(0 if ok else 1) diff --git a/runner/adv_traefik_rollback.py b/runner/adv_traefik_rollback.py deleted file mode 100644 index 8d4aead..0000000 --- a/runner/adv_traefik_rollback.py +++ /dev/null @@ -1,81 +0,0 @@ -#!/usr/bin/env python3 -"""ADVERSARY traefik WC1.1 destructive rollback cold proof (LOW TLS risk). -Stage a fake NEWER traefik tag whose compose fails abra LINT (a bare-int env entry → "must be a -string"), so the broken deploy is REJECTED before the running proxy is touched. The reconciler then -exercises the STATELESS rollback path: deploy(latest=broken) fails → redeploy last_good 5.1.1+v3.6.15 -(no snapshot — traefik is stateless) → healthy → rollback alert. Asserts traefik stays serving -(ci.commoninternet.net=200) + keycloak-through-traefik=200 throughout/after, last_good unchanged, a -*-rollback.json alert. DEFENSIVE: finally always restores traefik to 5.1.1+v3.6.15 healthy + cleans -the fake tag. Manual recovery if needed: abra app deploy traefik.ci.commoninternet.net 5.1.1+v3.6.15 -o -n -f""" -import os, subprocess, sys -sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) -import warm_reconcile as wr - -RDIR = os.path.expanduser("~/.abra/recipes/traefik") -DOMAIN = "traefik.ci.commoninternet.net"; GOOD = "5.1.1+v3.6.15"; FAKE = "5.2.0+v3.6.15" -ALERTS = os.path.join(wr.warmsnap.DEFAULT_WARM_ROOT, "alerts") -fails = [] -def git(*a, check=True): - return subprocess.run(["git","-C",RDIR,"-c","user.email=adv@cc-ci","-c","user.name=adv",*a], - capture_output=True, text=True, check=check) -def routed(host="ci.commoninternet.net", path="/"): - return subprocess.run(["curl","-sk","--resolve",f"{host}:443:127.0.0.1","-o","/dev/null", - "-w","%{http_code}","--max-time","10",f"https://{host}{path}"],capture_output=True,text=True).stdout.strip() -def reconcile(): - env={**os.environ,"CCCI_SKIP_FETCH":"1"} - r=subprocess.run(["python3",os.path.join(os.path.dirname(__file__),"warm_reconcile.py"),"traefik"], - capture_output=True,text=True,env=env,timeout=1200) - print(r.stdout[-2000:]); print(r.stderr[-500:],file=sys.stderr) - for line in r.stdout.splitlines(): - if line.startswith("RECONCILE RESULT:"): return line.split(":",1)[1].strip() - return f"" - -orig_head = git("rev-parse","HEAD").stdout.strip() -print(f"START traefik TYPE={wr.current_version(DOMAIN)} last_good={wr.read_last_good('traefik')} " - f"ci={routed()} kc-through={routed('warm-keycloak.ci.commoninternet.net','/realms/master')} orig_head={orig_head[:8]}") -try: - # stage fake NEWER tag with a lint-breaking env (bare int → not a string) - git("checkout","-fq",GOOD) - import re - cf=os.path.join(RDIR,"compose.yml"); txt=open(cf).read() - # add a bare-integer entry to the app service environment list (first 'environment:' block) - txt=txt.replace(" environment:\n - DASHBOARD_ENABLED", - " environment:\n - {advbad: brokenmapping}\n - DASHBOARD_ENABLED",1) - open(cf,"w").write(txt) - git("commit","-aqm","adv: lint-breaking env for traefik rollback proof") - broken=git("rev-parse","HEAD").stdout.strip() - git("tag","-a","-m","adv",FAKE,broken) - git("checkout","-fq",orig_head) # leave working tree on the good HEAD; tag keeps broken commit - print(f"staged fake {FAKE}@{broken[:8]} (lint-breaking); reconcile (expect rollback->{GOOD})...") - a0=set(os.listdir(ALERTS)) if os.path.isdir(ALERTS) else set() - res=reconcile() - new=sorted((set(os.listdir(ALERTS)) if os.path.isdir(ALERTS) else set())-a0) - ci, kc = routed(), routed("warm-keycloak.ci.commoninternet.net","/realms/master") - print(f"RESULT={res!r} TYPE={wr.current_version(DOMAIN)} last_good={wr.read_last_good('traefik')} ci={ci} kc-through={kc} new_alerts={new}") - if not res.startswith("rolled-back:"): fails.append(f"not rolled-back: {res}") - if wr.read_last_good("traefik")!=GOOD: fails.append(f"last_good changed: {wr.read_last_good('traefik')}") - if ci!="200": fails.append(f"traefik not serving after rollback: ci={ci}") - if kc!="200": fails.append(f"keycloak-through-traefik not 200: {kc}") - rb=[a for a in new if "rollback" in a] - if not rb: fails.append("no rollback alert") - else: - import json; rec=json.load(open(os.path.join(ALERTS,rb[0]))) - print(f"rollback alert: {rec}") - if rec.get("attempted")!=FAKE: fails.append(f"alert attempted={rec.get('attempted')}") - if rec.get("last_good")!=GOOD: fails.append(f"alert last_good={rec.get('last_good')}") - if rec.get("recovered") is not True: fails.append(f"alert recovered={rec.get('recovered')}") -finally: - # DEFENSIVE recovery: delete fake tag, restore recipe HEAD, ensure traefik on GOOD + healthy - git("tag","-d",FAKE,check=False); git("checkout","-fq",orig_head) - if wr.current_version(DOMAIN)!=GOOD or routed()!="200": - print("!! defensive recovery: redeploying traefik GOOD", flush=True) - try: wr.deploy_version("traefik",DOMAIN,GOOD,600); wr.wait_healthy(wr.SPECS["traefik"]) - except Exception as e: print(f"!! recovery deploy error: {e}") - fin_ci=routed(); fin_kc=routed("warm-keycloak.ci.commoninternet.net","/realms/master") - fake_left=[t for t in git("tag").stdout.split() if t==FAKE] - print(f"END TYPE={wr.current_version(DOMAIN)} last_good={wr.read_last_good('traefik')} ci={fin_ci} kc-through={fin_kc} fake_tag_left={fake_left}") - if fin_ci!="200": fails.append(f"FINAL traefik not serving: {fin_ci}") - if fake_left: fails.append("fake tag not cleaned") -print("\nRESULT:", "FAIL: "+"; ".join(fails) if fails else - "PASS — traefik WC1.1 stateless rollback: broken-latest deploy rejected → rolled back to last_good 5.1.1+v3.6.15, traefik+routes healthy (no TLS outage), alert written, cert/config preserved") -sys.exit(1 if fails else 0)