revert(2): drop adversary scratch probe scripts accidentally staged by git add -A (runner/adv_*.py are local-only adversary scratch, not Builder code)
This commit is contained in:
@ -1,62 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""ADVERSARY check4 (WC1 concurrency + reaping, deploy-free). Cold-run from my own clone.
|
||||
Asserts: realm_for distinct per run-hex; realms create on live warm kc + oidc_password_grant returns
|
||||
a JWT each; reap_orphaned_realms keeps the live hex and deletes the orphans. Leaves kc clean."""
|
||||
import sys, os
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
from harness import warm, sso
|
||||
|
||||
D = warm.warm_domain("keycloak")
|
||||
assert D == "warm-keycloak.ci.commoninternet.net", D
|
||||
fails = []
|
||||
|
||||
# 1) realm_for distinct per run-hex (two concurrent same-recipe runs never collide)
|
||||
r_a = warm.realm_for("lasuite-docs", "lasu-aaa111.ci.commoninternet.net")
|
||||
r_b = warm.realm_for("lasuite-docs", "lasu-bbb222.ci.commoninternet.net")
|
||||
print(f"realm_for aaa111={r_a!r} bbb222={r_b!r}")
|
||||
if r_a != "lasuite-docs-aaa111": fails.append(f"realm_for aaa111 -> {r_a}")
|
||||
if r_b != "lasuite-docs-bbb222": fails.append(f"realm_for bbb222 -> {r_b}")
|
||||
if r_a == r_b: fails.append("realm_for collision")
|
||||
|
||||
admin = sso.admin_password_inside(D)
|
||||
before = sorted(sso.list_realms(D, admin))
|
||||
print(f"realms BEFORE: {before}")
|
||||
|
||||
# 2) create three realms; each must yield a working password-grant JWT
|
||||
hexes = ["aaa111", "bbb222", "ccc333"]
|
||||
created = []
|
||||
for h in hexes:
|
||||
realm = f"advchk-{h}"
|
||||
creds = sso.setup_keycloak_realm(D, realm, f"client-{h}", redirect_uris=["*"], web_origins=["*"])
|
||||
created.append(realm)
|
||||
tok = sso.oidc_password_grant(creds)
|
||||
ok = isinstance(tok, str) and tok.count(".") == 2 and len(tok) > 40
|
||||
print(f" {realm}: JWT={'OK' if ok else 'BAD'} (len={len(tok)}, dots={tok.count('.')})")
|
||||
if not ok: fails.append(f"{realm} no/!JWT")
|
||||
# confirm discovery issuer too (independent re-check)
|
||||
disc = sso.assert_discovery_endpoint(creds)
|
||||
if disc.get("issuer") != f"https://{D}/realms/{realm}": fails.append(f"{realm} issuer")
|
||||
|
||||
mid = sorted(sso.list_realms(D, admin))
|
||||
print(f"realms AFTER CREATE: {mid}")
|
||||
for realm in created:
|
||||
if realm not in mid: fails.append(f"{realm} not present after create")
|
||||
|
||||
# 3) reap with live_hexes={aaa111}: must delete bbb222+ccc333, KEEP aaa111
|
||||
reaped = sorted(sso.reap_orphaned_realms(D, live_hexes={"aaa111"}))
|
||||
print(f"REAPED (live=aaa111): {reaped}")
|
||||
if reaped != ["advchk-bbb222", "advchk-ccc333"]: fails.append(f"reaped set wrong: {reaped}")
|
||||
after = sorted(sso.list_realms(D, admin))
|
||||
print(f"realms AFTER REAP: {after}")
|
||||
if "advchk-aaa111" not in after: fails.append("aaa111 wrongly reaped (live run would lose its realm)")
|
||||
if "advchk-bbb222" in after or "advchk-ccc333" in after: fails.append("orphan not reaped")
|
||||
|
||||
# cleanup: remove aaa111 too; leave kc with only master (+ any pre-existing non-advchk realms)
|
||||
sso.delete_keycloak_realm(D, "advchk-aaa111", admin)
|
||||
final = sorted(sso.list_realms(D, admin))
|
||||
print(f"realms FINAL (after cleanup): {final}")
|
||||
leftover = [r for r in final if r.startswith("advchk-")]
|
||||
if leftover: fails.append(f"leftover advchk realms: {leftover}")
|
||||
|
||||
print("\nRESULT:", "FAIL " + "; ".join(fails) if fails else "PASS — all check4 assertions hold")
|
||||
sys.exit(1 if fails else 0)
|
||||
@ -1,118 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""ADVERSARY check5 — WC1.1 MARQUEE health-gated rollback with data integrity (live, cold).
|
||||
Run via cc-ci-run from /root/cc-ci-adv-verify on cc-ci (PATH has abra/docker/git).
|
||||
|
||||
Independent reproduce (does NOT trust the Builder's run):
|
||||
A. plant a MARKER realm on warm kc (the data whose survival proves integrity)
|
||||
B. stage fake tag 10.7.9+26.6.2 at the good commit -> reconcile -> expect HEALTHY upgrade,
|
||||
last_good advances to 10.7.9, marker preserved
|
||||
C. stage broken commit (KC_HOSTNAME=:::bad-host:::) tagged 10.7.10+26.6.2 -> reconcile ->
|
||||
expect ROLLBACK to 10.7.9, kc HEALTHY, marker INTACT, last_good NOT advanced, rollback alert
|
||||
D. cleanup: delete fake tags + broken commit, reconcile back to canonical 10.7.1+26.6.2, delete marker
|
||||
"""
|
||||
import json, os, subprocess, sys, time
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
from harness import sso
|
||||
import warm_reconcile as wr
|
||||
|
||||
RECIPE, APP = "keycloak", "keycloak"
|
||||
D = "warm-keycloak.ci.commoninternet.net"
|
||||
RDIR = os.path.expanduser("~/.abra/recipes/keycloak")
|
||||
GOOD = "04400df" # HEAD = chore: upgrade to 10.7.1+26.6.2
|
||||
CANON = "10.7.1+26.6.2"
|
||||
T_GOOD = "10.7.9+26.6.2" # fake, points at good commit
|
||||
T_BAD = "10.7.10+26.6.2" # fake, points at broken-KC_HOSTNAME commit
|
||||
MARKER = "advmarker-rollback"
|
||||
ALERTS = os.path.join(wr.warmsnap.DEFAULT_WARM_ROOT, "alerts")
|
||||
fails = []
|
||||
|
||||
def git(*a, check=True):
|
||||
return subprocess.run(["git", "-C", RDIR, "-c", "user.email=adv@cc-ci", "-c", "user.name=adv",
|
||||
*a], capture_output=True, text=True, check=check)
|
||||
|
||||
def reconcile():
|
||||
"""Run the reconciler exactly as the unit would, but CCCI_SKIP_FETCH so my staged tags stand."""
|
||||
env = {**os.environ, "CCCI_SKIP_FETCH": "1"}
|
||||
r = subprocess.run(["python3", os.path.join(os.path.dirname(__file__), "warm_reconcile.py"), APP],
|
||||
capture_output=True, text=True, env=env, timeout=1800)
|
||||
print(r.stdout[-1500:]); print(r.stderr[-600:], file=sys.stderr)
|
||||
for line in r.stdout.splitlines():
|
||||
if line.startswith("RECONCILE RESULT:"):
|
||||
return line.split(":", 1)[1].strip()
|
||||
return f"<no result rc={r.returncode}>"
|
||||
|
||||
def health():
|
||||
return wr.health_code(wr.SPECS[APP])
|
||||
|
||||
def realms():
|
||||
return sorted(sso.list_realms(D))
|
||||
|
||||
def last_good():
|
||||
return wr.read_last_good(RECIPE)
|
||||
|
||||
def type_env():
|
||||
return wr.current_version(D)
|
||||
|
||||
print(f"=== START: TYPE={type_env()} last_good={last_good()} health={health()} realms={realms()}")
|
||||
|
||||
# ---- A. plant marker realm (data) ----
|
||||
sso.setup_keycloak_realm(D, MARKER, "marker-client", redirect_uris=["*"], web_origins=["*"])
|
||||
assert MARKER in realms(), "marker realm not created"
|
||||
print(f"[A] marker realm planted: {MARKER in realms()}")
|
||||
|
||||
# ---- B. healthy upgrade to fake 10.7.9 ----
|
||||
git("tag", "-a", "-m", "adv", T_GOOD, GOOD + "^{commit}", check=False)
|
||||
wr.write_last_good(RECIPE, CANON) # baseline last_good = canonical
|
||||
print(f"[B] staged {T_GOOD}@good; reconcile #1 (expect upgrade->{T_GOOD})...")
|
||||
res1 = reconcile()
|
||||
print(f"[B] result={res1!r} last_good={last_good()} health={health()} markerIntact={MARKER in realms()}")
|
||||
if not res1.startswith("upgraded:"): fails.append(f"B not upgraded: {res1}")
|
||||
if last_good() != T_GOOD: fails.append(f"B last_good={last_good()} != {T_GOOD}")
|
||||
if health() != 200: fails.append(f"B health={health()}")
|
||||
if MARKER not in realms(): fails.append("B marker lost on healthy upgrade")
|
||||
|
||||
# ---- C. broken latest 10.7.10 -> rollback ----
|
||||
import shutil
|
||||
compose = os.path.join(RDIR, "compose.yml")
|
||||
bak = compose + ".advbak"; shutil.copy(compose, bak)
|
||||
txt = open(compose).read().replace("KC_HOSTNAME=https://${DOMAIN}", "KC_HOSTNAME=:::bad-host:::")
|
||||
open(compose, "w").write(txt)
|
||||
git("commit", "-am", "adv broken KC_HOSTNAME")
|
||||
broken_sha = git("rev-parse", "HEAD").stdout.strip()
|
||||
git("tag", "-a", "-m", "adv", T_BAD, broken_sha)
|
||||
git("reset", "--hard", GOOD) # branch back to good; tag keeps the broken commit alive
|
||||
shutil.copy(bak, compose); os.remove(bak)
|
||||
alerts_before = set(os.listdir(ALERTS)) if os.path.isdir(ALERTS) else set()
|
||||
print(f"[C] staged broken {T_BAD}@{broken_sha[:7]}; reconcile #2 (expect rollback->{T_GOOD})... (broken deploy may take minutes)")
|
||||
res2 = reconcile()
|
||||
alerts_after = set(os.listdir(ALERTS)) if os.path.isdir(ALERTS) else set()
|
||||
new_alerts = sorted(alerts_after - alerts_before)
|
||||
print(f"[C] result={res2!r} last_good={last_good()} health={health()} markerIntact={MARKER in realms()} newAlerts={new_alerts}")
|
||||
if not res2.startswith("rolled-back:"): fails.append(f"C not rolled-back: {res2}")
|
||||
if health() != 200: fails.append(f"C kc unhealthy after rollback: {health()}")
|
||||
if MARKER not in realms(): fails.append("C MARKER LOST — data integrity FAILED on rollback")
|
||||
if last_good() != T_GOOD: fails.append(f"C last_good advanced to {last_good()} (should stay {T_GOOD})")
|
||||
rb = [a for a in new_alerts if "rollback" in a]
|
||||
if not rb: fails.append("C no rollback alert written")
|
||||
else:
|
||||
rec = json.load(open(os.path.join(ALERTS, rb[0])))
|
||||
print(f"[C] rollback alert: {rec}")
|
||||
if rec.get("attempted") != T_BAD: fails.append(f"alert attempted={rec.get('attempted')}")
|
||||
if rec.get("last_good") != T_GOOD: fails.append(f"alert last_good={rec.get('last_good')}")
|
||||
if rec.get("recovered") is not True: fails.append(f"alert recovered={rec.get('recovered')}")
|
||||
|
||||
# ---- D. cleanup + restore canonical ----
|
||||
print("[D] cleanup: delete fake tags, reconcile back to canonical, delete marker...")
|
||||
git("tag", "-d", T_GOOD, check=False); git("tag", "-d", T_BAD, check=False)
|
||||
git("reset", "--hard", GOOD)
|
||||
res3 = reconcile() # latest now = real CANON; current(env)=10.7.9 -> redeploys to CANON
|
||||
print(f"[D] result={res3!r} TYPE={type_env()} last_good={last_good()} health={health()}")
|
||||
sso.delete_keycloak_realm(D, MARKER)
|
||||
if type_env() != CANON: fails.append(f"D not restored to canonical TYPE: {type_env()}")
|
||||
if health() != 200: fails.append(f"D final health={health()}")
|
||||
fin = realms()
|
||||
if MARKER in fin: fails.append("D marker not cleaned")
|
||||
print(f"=== END: TYPE={type_env()} last_good={last_good()} health={health()} realms={fin}")
|
||||
|
||||
print("\nRESULT:", "FAIL: " + "; ".join(fails) if fails else "PASS — WC1.1 marquee: healthy upgrade commits, broken latest rolls back with marker realm (data) INTACT, last_good not advanced, alert correct, canonical restored")
|
||||
sys.exit(1 if fails else 0)
|
||||
@ -1,81 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""ADVERSARY check6 — WC1.2 pre-deploy SAFETY gate (live, cold). A hold must do NO deploy.
|
||||
(a) MAJOR fake tag 11.0.0+27.0.0 -> held-major, alert, kc untouched (TYPE same, 200)
|
||||
(b) minor tag 10.7.2+26.6.3 + releaseNotes flagging manual migration -> held-manual-migration,
|
||||
alert CARRIES the notes, kc untouched
|
||||
Leaves the recipe + kc exactly as found (canonical, no fake tags/notes)."""
|
||||
import json, os, subprocess, sys
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
import warm_reconcile as wr
|
||||
|
||||
D = "warm-keycloak.ci.commoninternet.net"
|
||||
RDIR = os.path.expanduser("~/.abra/recipes/keycloak")
|
||||
GOOD = "04400df"; CANON = "10.7.1+26.6.2"
|
||||
T_MAJOR = "11.0.0+27.0.0"; T_MINOR = "10.7.2+26.6.3"
|
||||
NOTES = os.path.join(RDIR, "releaseNotes", T_MINOR + ".md")
|
||||
ALERTS = os.path.join(wr.warmsnap.DEFAULT_WARM_ROOT, "alerts")
|
||||
fails = []
|
||||
|
||||
def git(*a, check=True):
|
||||
return subprocess.run(["git", "-C", RDIR, "-c", "user.email=adv@cc-ci", "-c", "user.name=adv", *a],
|
||||
capture_output=True, text=True, check=check)
|
||||
def reconcile():
|
||||
env = {**os.environ, "CCCI_SKIP_FETCH": "1"}
|
||||
r = subprocess.run(["python3", os.path.join(os.path.dirname(__file__), "warm_reconcile.py"), "keycloak"],
|
||||
capture_output=True, text=True, env=env, timeout=300)
|
||||
for line in r.stdout.splitlines():
|
||||
if line.startswith("RECONCILE RESULT:"): return line.split(":", 1)[1].strip()
|
||||
return f"<no result rc={r.returncode}> {r.stdout[-300:]} {r.stderr[-300:]}"
|
||||
def alerts_now(): return set(os.listdir(ALERTS)) if os.path.isdir(ALERTS) else set()
|
||||
|
||||
type0, lg0 = wr.current_version(D), wr.read_last_good("keycloak")
|
||||
print(f"START TYPE={type0} last_good={lg0} health={wr.health_code(wr.SPECS['keycloak'])}")
|
||||
assert type0 == CANON, f"precond: kc not canonical ({type0})"
|
||||
|
||||
# (a) MAJOR -> held-major, no deploy
|
||||
git("reset", "--hard", GOOD)
|
||||
git("tag", "-a", "-m", "adv", T_MAJOR, GOOD + "^{commit}", check=False)
|
||||
a0 = alerts_now()
|
||||
res_a = reconcile()
|
||||
new_a = sorted(alerts_now() - a0)
|
||||
print(f"(a) MAJOR {T_MAJOR}: result={res_a!r} TYPE={wr.current_version(D)} new_alerts={new_a}")
|
||||
if not res_a.startswith("held-major"): fails.append(f"(a) not held-major: {res_a}")
|
||||
if wr.current_version(D) != CANON: fails.append(f"(a) kc TYPE changed to {wr.current_version(D)}")
|
||||
hm = [x for x in new_a if "held-major" in x]
|
||||
if not hm: fails.append("(a) no held-major alert")
|
||||
else:
|
||||
rec = json.load(open(os.path.join(ALERTS, hm[0])))
|
||||
if rec.get("latest") != T_MAJOR: fails.append(f"(a) alert latest={rec.get('latest')}")
|
||||
if "release_notes" not in rec: fails.append("(a) alert missing release_notes field")
|
||||
git("tag", "-d", T_MAJOR, check=False)
|
||||
|
||||
# (b) minor + manual-migration notes -> held-manual-migration, no deploy, alert carries notes
|
||||
git("tag", "-a", "-m", "adv", T_MINOR, GOOD + "^{commit}", check=False)
|
||||
os.makedirs(os.path.dirname(NOTES), exist_ok=True)
|
||||
open(NOTES, "w").write("# 10.7.2\n\nThis release requires a **manual migration**: run the DB upgrade by hand.\n")
|
||||
b0 = alerts_now()
|
||||
res_b = reconcile()
|
||||
new_b = sorted(alerts_now() - b0)
|
||||
print(f"(b) MINOR+migration {T_MINOR}: result={res_b!r} TYPE={wr.current_version(D)} new_alerts={new_b}")
|
||||
if not res_b.startswith("held-manual-migration"): fails.append(f"(b) not held-manual-migration: {res_b}")
|
||||
if wr.current_version(D) != CANON: fails.append(f"(b) kc TYPE changed to {wr.current_version(D)}")
|
||||
hmm = [x for x in new_b if "manual-migration" in x]
|
||||
if not hmm: fails.append("(b) no held-manual-migration alert")
|
||||
else:
|
||||
rec = json.load(open(os.path.join(ALERTS, hmm[0])))
|
||||
if "manual migration" not in (rec.get("release_notes") or "").lower():
|
||||
fails.append(f"(b) alert release_notes lacks the notes: {rec.get('release_notes')!r}")
|
||||
|
||||
# cleanup
|
||||
git("tag", "-d", T_MINOR, check=False)
|
||||
if os.path.exists(NOTES): os.remove(NOTES)
|
||||
git("reset", "--hard", GOOD)
|
||||
faketags = [t for t in git("tag").stdout.split() if t in (T_MAJOR, T_MINOR)]
|
||||
print(f"END TYPE={wr.current_version(D)} last_good={wr.read_last_good('keycloak')} "
|
||||
f"health={wr.health_code(wr.SPECS['keycloak'])} faketags={faketags} notes_exists={os.path.exists(NOTES)}")
|
||||
if wr.current_version(D) != CANON: fails.append(f"END not canonical: {wr.current_version(D)}")
|
||||
if faketags: fails.append(f"leftover fake tags {faketags}")
|
||||
if wr.read_last_good("keycloak") != CANON: fails.append(f"last_good moved to {wr.read_last_good('keycloak')}")
|
||||
|
||||
print("\nRESULT:", "FAIL: " + "; ".join(fails) if fails else "PASS — WC1.2 holds major + manual-migration with notes-carrying alert; kc untouched (no deploy/last_good churn)")
|
||||
sys.exit(1 if fails else 0)
|
||||
@ -1,87 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""ADVERSARY WC2+WC3 cold reproduce (data-warm canonical round-trip). Run via cc-ci-run from
|
||||
/root/cc-ci-adv-verify. Drives the cycle MYSELF (does not trust the Builder's single run):
|
||||
1. deploy_canonical -> reattach the retained volume; confirm the Builder's known-good marker is served
|
||||
2. WC2: write MY OWN marker -> undeploy_keep_volume (assert app DOWN + volume RETAINED) ->
|
||||
deploy_canonical -> MY marker SURVIVES (data-warm reattach)
|
||||
3. WC3: mutate (delete the known-good marker) -> undeploy -> warmsnap.restore -> deploy ->
|
||||
known-good marker BACK and my marker GONE (restore round-trips the exact known-good)
|
||||
4. leave it idle (as found): undeploy_keep_volume; content == known-good
|
||||
"""
|
||||
import os, subprocess, sys
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
from harness import canonical, lifecycle, warmsnap
|
||||
|
||||
R = "custom-html"
|
||||
D = "warm-custom-html.ci.commoninternet.net"
|
||||
HTML = "/usr/share/nginx/html"
|
||||
KG = "WC2-DATA-MARKER-7f3a9c" # the Builder's known-good marker string
|
||||
A = "ADV-OWN-MARKER-a1b2c3" # my own marker
|
||||
fails = []
|
||||
|
||||
def sh(*cmd): return lifecycle.exec_in_app(D, list(cmd), service="app")
|
||||
def has_service():
|
||||
return bool(subprocess.run(["docker","service","ls","--format","{{.Name}}"],
|
||||
capture_output=True,text=True).stdout and
|
||||
any("custom-html" in n for n in subprocess.run(["docker","service","ls","--format","{{.Name}}"],
|
||||
capture_output=True,text=True).stdout.split()))
|
||||
def has_volume():
|
||||
out = subprocess.run(["docker","volume","ls","--format","{{.Name}}"],capture_output=True,text=True).stdout
|
||||
return any("warm-custom-html" in n and n.endswith("_content") for n in out.split())
|
||||
def serving():
|
||||
r = subprocess.run(["curl","-sk","--resolve",f"{D}:443:127.0.0.1","-o","/dev/null",
|
||||
"-w","%{http_code}","--max-time","10",f"https://{D}/"],capture_output=True,text=True)
|
||||
return r.stdout.strip()
|
||||
|
||||
print(f"START canonical={canonical.read_registry(R)} has_canonical={canonical.has_canonical(R)} "
|
||||
f"has_snapshot={warmsnap.has_snapshot(R)} service={has_service()} volume={has_volume()}")
|
||||
if not warmsnap.has_snapshot(R): fails.append("no snapshot present at start")
|
||||
|
||||
# 1. reattach + confirm Builder's known-good marker survived their run
|
||||
canonical.deploy_canonical(R);
|
||||
listing = sh("ls", HTML)
|
||||
kg_files = sh("grep","-rl",KG,HTML).split()
|
||||
print(f"[1] deployed; serving={serving()} html={listing.split()} kg_marker_files={kg_files}")
|
||||
if not kg_files: fails.append("Builder known-good marker not found after reattach")
|
||||
kg_file = kg_files[0] if kg_files else None
|
||||
if serving() != "200": fails.append(f"[1] not serving 200: {serving()}")
|
||||
|
||||
# 2. WC2: my own marker through undeploy-keep-volume -> redeploy
|
||||
sh("sh","-c",f"echo {A} > {HTML}/adv_own.txt")
|
||||
got = sh("cat",f"{HTML}/adv_own.txt").strip()
|
||||
print(f"[2] wrote my marker: {got!r}")
|
||||
canonical.undeploy_keep_volume(R)
|
||||
svc_down, vol_kept = not has_service(), has_volume()
|
||||
print(f"[2] after undeploy_keep_volume: service_down={svc_down} volume_retained={vol_kept} "
|
||||
f"registry_status={ (canonical.read_registry(R) or {}).get('status') }")
|
||||
if not svc_down: fails.append("[2] app still has a service after undeploy_keep_volume")
|
||||
if not vol_kept: fails.append("[2] content volume NOT retained (data-warm broken)")
|
||||
canonical.deploy_canonical(R)
|
||||
survived = sh("cat",f"{HTML}/adv_own.txt").strip()
|
||||
print(f"[2] after redeploy: my marker={survived!r}")
|
||||
if survived != A: fails.append(f"[2] my marker did NOT survive data-warm round-trip: {survived!r}")
|
||||
|
||||
# 3. WC3: mutate (delete known-good marker) -> undeploy -> restore -> deploy -> known-good BACK
|
||||
if kg_file:
|
||||
sh("rm", "-f", kg_file)
|
||||
mutated_gone = not sh("grep", "-rl", KG, HTML).split()
|
||||
print(f"[3] mutated: deleted known-good marker file {kg_file}; gone_now={mutated_gone}")
|
||||
canonical.undeploy_keep_volume(R)
|
||||
warmsnap.restore(R, D)
|
||||
canonical.deploy_canonical(R)
|
||||
kg_back = bool(sh("grep", "-rl", KG, HTML).split())
|
||||
a_present = "adv_own.txt" in sh("ls", HTML).split()
|
||||
print(f"[3] after restore+deploy: known_good_back={kg_back} my_marker_still_there={a_present}")
|
||||
if not kg_back: fails.append("[3] known-good marker NOT restored (WC3 restore failed)")
|
||||
if a_present: fails.append("[3] my marker still present after restore — restore not exact known-good")
|
||||
|
||||
# 4. leave idle as found
|
||||
canonical.undeploy_keep_volume(R)
|
||||
print(f"END registry={canonical.read_registry(R)} service={has_service()} volume={has_volume()}")
|
||||
if has_service(): fails.append("[4] left a running service (should be idle)")
|
||||
if not has_volume(): fails.append("[4] volume not retained at end")
|
||||
|
||||
print("\nRESULT:", "FAIL: "+"; ".join(fails) if fails else
|
||||
"PASS — WC2 data-warm round-trip (my own marker survives undeploy-keep-volume+reattach) + WC3 "
|
||||
"restore round-trips the exact known-good; left idle with volume retained")
|
||||
sys.exit(1 if fails else 0)
|
||||
@ -1,57 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""ADVERSARY WC3 restore round-trip + recover custom-html canonical to known-good idle.
|
||||
State on entry (left by adv_check_wc2 crash): app UP (warm), known-good marker file deleted, my
|
||||
marker adv_own.txt present. This driver: restore the known-good snapshot -> known-good marker BACK,
|
||||
my marker GONE (restore = exact known-good) -> leave idle. Also diagnoses HTTPS serving."""
|
||||
import os, subprocess, sys
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
from harness import canonical, lifecycle, warmsnap
|
||||
|
||||
R = "custom-html"; D = "warm-custom-html.ci.commoninternet.net"; HTML = "/usr/share/nginx/html"
|
||||
KG_FILE = "wc2-marker.txt"; KG_STR = "WC2-DATA-MARKER-7f3a9c"; A_FILE = "adv_own.txt"
|
||||
fails = []
|
||||
def sh(*c): return lifecycle.exec_in_app(D, list(c), service="app")
|
||||
def ls(): return sh("ls", HTML).split()
|
||||
def has_service():
|
||||
out = subprocess.run(["docker","service","ls","--format","{{.Name}}"],capture_output=True,text=True).stdout
|
||||
return any("custom-html" in n for n in out.split())
|
||||
def has_volume():
|
||||
out = subprocess.run(["docker","volume","ls","--format","{{.Name}}"],capture_output=True,text=True).stdout
|
||||
return any("warm-custom-html" in n and n.endswith("_content") for n in out.split())
|
||||
def code(path):
|
||||
return subprocess.run(["curl","-sk","--resolve",f"{D}:443:127.0.0.1","-o","/dev/null","-w","%{http_code}",
|
||||
"--max-time","10",f"https://{D}{path}"],capture_output=True,text=True).stdout.strip()
|
||||
|
||||
# ensure app is up to inspect entry state (it should be, from the crash)
|
||||
if not has_service():
|
||||
canonical.deploy_canonical(R)
|
||||
entry = ls()
|
||||
print(f"ENTRY html={entry} (expect adv_own.txt present, {KG_FILE} deleted)")
|
||||
# serving diagnosis
|
||||
print(f"SERVING: /={code('/')} /index.html={code('/index.html')} /{KG_FILE}={code('/'+KG_FILE)} /{A_FILE}={code('/'+A_FILE)}")
|
||||
|
||||
# WC3 restore round-trip
|
||||
canonical.undeploy_keep_volume(R)
|
||||
warmsnap.restore(R, D)
|
||||
canonical.deploy_canonical(R)
|
||||
after = ls()
|
||||
kg_back = KG_FILE in after
|
||||
a_gone = A_FILE not in after
|
||||
kg_content = sh("cat", f"{HTML}/{KG_FILE}").strip() if kg_back else ""
|
||||
print(f"AFTER RESTORE html={after} kg_back={kg_back} kg_content={kg_content!r} my_marker_gone={a_gone}")
|
||||
if not kg_back: fails.append("WC3: known-good marker NOT restored")
|
||||
if KG_STR not in kg_content: fails.append(f"WC3: restored marker content wrong: {kg_content!r}")
|
||||
if not a_gone: fails.append("WC3: my marker still present — restore not exact known-good")
|
||||
|
||||
# leave idle as found
|
||||
canonical.undeploy_keep_volume(R)
|
||||
fin = canonical.read_registry(R)
|
||||
print(f"END registry_status={fin.get('status')} version={fin.get('version')} service={has_service()} "
|
||||
f"volume={has_volume()} snapshot={warmsnap.has_snapshot(R)}")
|
||||
if has_service(): fails.append("END: service still running (should be idle)")
|
||||
if not has_volume(): fails.append("END: volume not retained")
|
||||
if fin.get("status") != "idle": fails.append(f"END: status={fin.get('status')} (want idle)")
|
||||
|
||||
print("\nRESULT:", "FAIL: "+"; ".join(fails) if fails else
|
||||
"PASS — WC3 restore round-trips the EXACT known-good (marker back, content correct, my mutation gone); canonical left idle+retained")
|
||||
sys.exit(1 if fails else 0)
|
||||
@ -1,49 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""ADVERSARY: after the --quick FAIL run, independently verify the rollback restored the EXACT
|
||||
known-good (data + healthy app), the known-good was NOT promoted, then leave idle."""
|
||||
import os, subprocess, sys, hashlib
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
from harness import canonical, lifecycle, warmsnap
|
||||
|
||||
R="custom-html"; D="warm-custom-html.ci.commoninternet.net"; HTML="/usr/share/nginx/html"
|
||||
KG_STR="WC2-DATA-MARKER-7f3a9c"; BASE_SNAP_SHA="9ef62bdf11c6060c"
|
||||
fails=[]
|
||||
def sh(*c): return lifecycle.exec_in_app(D, list(c), service="app")
|
||||
def snap_sha():
|
||||
import glob
|
||||
t=glob.glob(f"/var/lib/ci-warm/{R}/snapshot/volumes/*.tar")
|
||||
return hashlib.sha256(open(t[0],"rb").read()).hexdigest()[:16] if t else "NONE"
|
||||
def code(p):
|
||||
return subprocess.run(["curl","-sk","--resolve",f"{D}:443:127.0.0.1","-o","/dev/null","-w","%{http_code}",
|
||||
"--max-time","10",f"https://{D}{p}"],capture_output=True,text=True).stdout.strip()
|
||||
|
||||
reg=canonical.read_registry(R) or {}
|
||||
ssha=snap_sha()
|
||||
print(f"registry version={reg.get('version')} status={reg.get('status')} snapshot_sha={ssha} (baseline {BASE_SNAP_SHA})")
|
||||
if reg.get('version')!="1.11.0+1.29.0": fails.append(f"known-good promoted/changed: {reg.get('version')}")
|
||||
if reg.get('status')!="idle": fails.append(f"not idle: {reg.get('status')}")
|
||||
if ssha!=BASE_SNAP_SHA: fails.append(f"snapshot changed: {ssha} != {BASE_SNAP_SHA}")
|
||||
|
||||
# bring canonical up, confirm restored data + healthy + non-broken image
|
||||
canonical.deploy_canonical(R)
|
||||
lifecycle.wait_healthy(D, ok_codes=(200,), path="/", deploy_timeout=300, http_timeout=20)
|
||||
html=sh("ls",HTML).split()
|
||||
kg_files=[f for f in html if f=="wc2-marker.txt"]
|
||||
kg_content=sh("cat",f"{HTML}/wc2-marker.txt").strip() if kg_files else ""
|
||||
img=subprocess.run(["docker","service","ls","--format","{{.Name}} {{.Image}}"],capture_output=True,text=True).stdout
|
||||
serving=code("/")
|
||||
print(f"AFTER deploy: html={html} kg_content={kg_content!r} serving/={serving}")
|
||||
print(f"image: {[l for l in img.splitlines() if 'custom-html' in l]}")
|
||||
if not kg_files: fails.append("rollback did NOT restore known-good marker file")
|
||||
if KG_STR not in kg_content: fails.append(f"restored marker content wrong: {kg_content!r}")
|
||||
if serving!="200": fails.append(f"rolled-back app not serving 200: {serving}")
|
||||
if "99.99.99-doesnotexist" in img: fails.append("BROKEN image still deployed after rollback")
|
||||
if "nginx:1.29.0" not in img: fails.append(f"canonical not on known-good image: {img!r}")
|
||||
|
||||
# leave idle
|
||||
canonical.undeploy_keep_volume(R)
|
||||
print(f"END status={(canonical.read_registry(R) or {}).get('status')} "
|
||||
f"service={'custom-html' in subprocess.run(['docker','service','ls','--format','{{.Name}}'],capture_output=True,text=True).stdout}")
|
||||
print("\nRESULT:", "FAIL: "+"; ".join(fails) if fails else
|
||||
"PASS — --quick FAIL rolled back to EXACT known-good (marker+content restored, app healthy on nginx:1.29.0, broken image gone), known-good UNCHANGED+snapshot byte-identical (never promoted); left idle")
|
||||
sys.exit(1 if fails else 0)
|
||||
@ -1,43 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Recover warm keycloak to canonical 10.7.1+26.6.2 healthy after adv_check5's cleanup-script bug
|
||||
left it undeployed on TYPE=10.7.9 (a tag I deleted). NOT a reconciler defect — my test's fault.
|
||||
Steps: recreate 10.7.9 tag (so abra can resolve the current from-version) -> deploy 10.7.9 (kc back
|
||||
up, marker present) -> delete marker realm -> deploy canonical 10.7.1 -> set last_good -> drop 10.7.9
|
||||
tag -> verify clean."""
|
||||
import os, subprocess, sys
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
from harness import sso
|
||||
import warm_reconcile as wr
|
||||
|
||||
D = "warm-keycloak.ci.commoninternet.net"
|
||||
RDIR = os.path.expanduser("~/.abra/recipes/keycloak")
|
||||
GOOD = "04400df"; CANON = "10.7.1+26.6.2"; T9 = "10.7.9+26.6.2"; MARKER = "advmarker-rollback"
|
||||
|
||||
def git(*a, check=True):
|
||||
return subprocess.run(["git", "-C", RDIR, "-c", "user.email=adv@cc-ci", "-c", "user.name=adv", *a],
|
||||
capture_output=True, text=True, check=check)
|
||||
|
||||
print(f"START TYPE={wr.current_version(D)} health={wr.health_code(wr.SPECS['keycloak'])}")
|
||||
git("reset", "--hard", GOOD)
|
||||
git("tag", "-a", "-m", "adv", T9, GOOD + "^{commit}", check=False)
|
||||
print("recreated 10.7.9 tag; deploying 10.7.9 to bring kc back...")
|
||||
wr.deploy_version("keycloak", D, T9, 900)
|
||||
assert wr.wait_healthy(wr.SPECS["keycloak"]), "kc not healthy on 10.7.9"
|
||||
realms = sorted(sso.list_realms(D))
|
||||
print(f"kc healthy on 10.7.9; realms={realms}")
|
||||
if MARKER in realms:
|
||||
sso.delete_keycloak_realm(D, MARKER); print("deleted marker realm")
|
||||
print("deploying canonical 10.7.1...")
|
||||
wr.deploy_version("keycloak", D, CANON, 900)
|
||||
assert wr.wait_healthy(wr.SPECS["keycloak"]), "kc not healthy on canonical"
|
||||
wr.write_last_good("keycloak", CANON)
|
||||
git("tag", "-d", T9, check=False)
|
||||
git("reset", "--hard", GOOD)
|
||||
final_realms = sorted(sso.list_realms(D))
|
||||
faketags = [t for t in git("tag").stdout.split() if t in (T9, "10.7.10+26.6.2")]
|
||||
print(f"DONE TYPE={wr.current_version(D)} last_good={wr.read_last_good('keycloak')} "
|
||||
f"health={wr.health_code(wr.SPECS['keycloak'])} realms={final_realms} faketags={faketags}")
|
||||
ok = (wr.current_version(D) == CANON and wr.read_last_good("keycloak") == CANON
|
||||
and wr.health_code(wr.SPECS["keycloak"]) == 200 and final_realms == ["master"] and not faketags)
|
||||
print("RECOVER:", "OK" if ok else "INCOMPLETE")
|
||||
sys.exit(0 if ok else 1)
|
||||
@ -1,81 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""ADVERSARY traefik WC1.1 destructive rollback cold proof (LOW TLS risk).
|
||||
Stage a fake NEWER traefik tag whose compose fails abra LINT (a bare-int env entry → "must be a
|
||||
string"), so the broken deploy is REJECTED before the running proxy is touched. The reconciler then
|
||||
exercises the STATELESS rollback path: deploy(latest=broken) fails → redeploy last_good 5.1.1+v3.6.15
|
||||
(no snapshot — traefik is stateless) → healthy → rollback alert. Asserts traefik stays serving
|
||||
(ci.commoninternet.net=200) + keycloak-through-traefik=200 throughout/after, last_good unchanged, a
|
||||
*-rollback.json alert. DEFENSIVE: finally always restores traefik to 5.1.1+v3.6.15 healthy + cleans
|
||||
the fake tag. Manual recovery if needed: abra app deploy traefik.ci.commoninternet.net 5.1.1+v3.6.15 -o -n -f"""
|
||||
import os, subprocess, sys
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
import warm_reconcile as wr
|
||||
|
||||
RDIR = os.path.expanduser("~/.abra/recipes/traefik")
|
||||
DOMAIN = "traefik.ci.commoninternet.net"; GOOD = "5.1.1+v3.6.15"; FAKE = "5.2.0+v3.6.15"
|
||||
ALERTS = os.path.join(wr.warmsnap.DEFAULT_WARM_ROOT, "alerts")
|
||||
fails = []
|
||||
def git(*a, check=True):
|
||||
return subprocess.run(["git","-C",RDIR,"-c","user.email=adv@cc-ci","-c","user.name=adv",*a],
|
||||
capture_output=True, text=True, check=check)
|
||||
def routed(host="ci.commoninternet.net", path="/"):
|
||||
return subprocess.run(["curl","-sk","--resolve",f"{host}:443:127.0.0.1","-o","/dev/null",
|
||||
"-w","%{http_code}","--max-time","10",f"https://{host}{path}"],capture_output=True,text=True).stdout.strip()
|
||||
def reconcile():
|
||||
env={**os.environ,"CCCI_SKIP_FETCH":"1"}
|
||||
r=subprocess.run(["python3",os.path.join(os.path.dirname(__file__),"warm_reconcile.py"),"traefik"],
|
||||
capture_output=True,text=True,env=env,timeout=1200)
|
||||
print(r.stdout[-2000:]); print(r.stderr[-500:],file=sys.stderr)
|
||||
for line in r.stdout.splitlines():
|
||||
if line.startswith("RECONCILE RESULT:"): return line.split(":",1)[1].strip()
|
||||
return f"<no result rc={r.returncode}>"
|
||||
|
||||
orig_head = git("rev-parse","HEAD").stdout.strip()
|
||||
print(f"START traefik TYPE={wr.current_version(DOMAIN)} last_good={wr.read_last_good('traefik')} "
|
||||
f"ci={routed()} kc-through={routed('warm-keycloak.ci.commoninternet.net','/realms/master')} orig_head={orig_head[:8]}")
|
||||
try:
|
||||
# stage fake NEWER tag with a lint-breaking env (bare int → not a string)
|
||||
git("checkout","-fq",GOOD)
|
||||
import re
|
||||
cf=os.path.join(RDIR,"compose.yml"); txt=open(cf).read()
|
||||
# add a bare-integer entry to the app service environment list (first 'environment:' block)
|
||||
txt=txt.replace(" environment:\n - DASHBOARD_ENABLED",
|
||||
" environment:\n - {advbad: brokenmapping}\n - DASHBOARD_ENABLED",1)
|
||||
open(cf,"w").write(txt)
|
||||
git("commit","-aqm","adv: lint-breaking env for traefik rollback proof")
|
||||
broken=git("rev-parse","HEAD").stdout.strip()
|
||||
git("tag","-a","-m","adv",FAKE,broken)
|
||||
git("checkout","-fq",orig_head) # leave working tree on the good HEAD; tag keeps broken commit
|
||||
print(f"staged fake {FAKE}@{broken[:8]} (lint-breaking); reconcile (expect rollback->{GOOD})...")
|
||||
a0=set(os.listdir(ALERTS)) if os.path.isdir(ALERTS) else set()
|
||||
res=reconcile()
|
||||
new=sorted((set(os.listdir(ALERTS)) if os.path.isdir(ALERTS) else set())-a0)
|
||||
ci, kc = routed(), routed("warm-keycloak.ci.commoninternet.net","/realms/master")
|
||||
print(f"RESULT={res!r} TYPE={wr.current_version(DOMAIN)} last_good={wr.read_last_good('traefik')} ci={ci} kc-through={kc} new_alerts={new}")
|
||||
if not res.startswith("rolled-back:"): fails.append(f"not rolled-back: {res}")
|
||||
if wr.read_last_good("traefik")!=GOOD: fails.append(f"last_good changed: {wr.read_last_good('traefik')}")
|
||||
if ci!="200": fails.append(f"traefik not serving after rollback: ci={ci}")
|
||||
if kc!="200": fails.append(f"keycloak-through-traefik not 200: {kc}")
|
||||
rb=[a for a in new if "rollback" in a]
|
||||
if not rb: fails.append("no rollback alert")
|
||||
else:
|
||||
import json; rec=json.load(open(os.path.join(ALERTS,rb[0])))
|
||||
print(f"rollback alert: {rec}")
|
||||
if rec.get("attempted")!=FAKE: fails.append(f"alert attempted={rec.get('attempted')}")
|
||||
if rec.get("last_good")!=GOOD: fails.append(f"alert last_good={rec.get('last_good')}")
|
||||
if rec.get("recovered") is not True: fails.append(f"alert recovered={rec.get('recovered')}")
|
||||
finally:
|
||||
# DEFENSIVE recovery: delete fake tag, restore recipe HEAD, ensure traefik on GOOD + healthy
|
||||
git("tag","-d",FAKE,check=False); git("checkout","-fq",orig_head)
|
||||
if wr.current_version(DOMAIN)!=GOOD or routed()!="200":
|
||||
print("!! defensive recovery: redeploying traefik GOOD", flush=True)
|
||||
try: wr.deploy_version("traefik",DOMAIN,GOOD,600); wr.wait_healthy(wr.SPECS["traefik"])
|
||||
except Exception as e: print(f"!! recovery deploy error: {e}")
|
||||
fin_ci=routed(); fin_kc=routed("warm-keycloak.ci.commoninternet.net","/realms/master")
|
||||
fake_left=[t for t in git("tag").stdout.split() if t==FAKE]
|
||||
print(f"END TYPE={wr.current_version(DOMAIN)} last_good={wr.read_last_good('traefik')} ci={fin_ci} kc-through={fin_kc} fake_tag_left={fake_left}")
|
||||
if fin_ci!="200": fails.append(f"FINAL traefik not serving: {fin_ci}")
|
||||
if fake_left: fails.append("fake tag not cleaned")
|
||||
print("\nRESULT:", "FAIL: "+"; ".join(fails) if fails else
|
||||
"PASS — traefik WC1.1 stateless rollback: broken-latest deploy rejected → rolled back to last_good 5.1.1+v3.6.15, traefik+routes healthy (no TLS outage), alert written, cert/config preserved")
|
||||
sys.exit(1 if fails else 0)
|
||||
Reference in New Issue
Block a user