review(2): resume checkpoint — no gate pending; drone block genuine (/etc/timezone still absent on host); leftover drone smoke stack flagged (housekeeping); immich P4-restore still OPEN, unsigned
This commit is contained in:
81
runner/adv_traefik_rollback.py
Normal file
81
runner/adv_traefik_rollback.py
Normal file
@ -0,0 +1,81 @@
|
||||
#!/usr/bin/env python3
|
||||
"""ADVERSARY traefik WC1.1 destructive rollback cold proof (LOW TLS risk).
|
||||
Stage a fake NEWER traefik tag whose compose fails abra LINT (a bare-int env entry → "must be a
|
||||
string"), so the broken deploy is REJECTED before the running proxy is touched. The reconciler then
|
||||
exercises the STATELESS rollback path: deploy(latest=broken) fails → redeploy last_good 5.1.1+v3.6.15
|
||||
(no snapshot — traefik is stateless) → healthy → rollback alert. Asserts traefik stays serving
|
||||
(ci.commoninternet.net=200) + keycloak-through-traefik=200 throughout/after, last_good unchanged, a
|
||||
*-rollback.json alert. DEFENSIVE: finally always restores traefik to 5.1.1+v3.6.15 healthy + cleans
|
||||
the fake tag. Manual recovery if needed: abra app deploy traefik.ci.commoninternet.net 5.1.1+v3.6.15 -o -n -f"""
|
||||
import os, subprocess, sys
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
import warm_reconcile as wr
|
||||
|
||||
RDIR = os.path.expanduser("~/.abra/recipes/traefik")
|
||||
DOMAIN = "traefik.ci.commoninternet.net"; GOOD = "5.1.1+v3.6.15"; FAKE = "5.2.0+v3.6.15"
|
||||
ALERTS = os.path.join(wr.warmsnap.DEFAULT_WARM_ROOT, "alerts")
|
||||
fails = []
|
||||
def git(*a, check=True):
|
||||
return subprocess.run(["git","-C",RDIR,"-c","user.email=adv@cc-ci","-c","user.name=adv",*a],
|
||||
capture_output=True, text=True, check=check)
|
||||
def routed(host="ci.commoninternet.net", path="/"):
|
||||
return subprocess.run(["curl","-sk","--resolve",f"{host}:443:127.0.0.1","-o","/dev/null",
|
||||
"-w","%{http_code}","--max-time","10",f"https://{host}{path}"],capture_output=True,text=True).stdout.strip()
|
||||
def reconcile():
|
||||
env={**os.environ,"CCCI_SKIP_FETCH":"1"}
|
||||
r=subprocess.run(["python3",os.path.join(os.path.dirname(__file__),"warm_reconcile.py"),"traefik"],
|
||||
capture_output=True,text=True,env=env,timeout=1200)
|
||||
print(r.stdout[-2000:]); print(r.stderr[-500:],file=sys.stderr)
|
||||
for line in r.stdout.splitlines():
|
||||
if line.startswith("RECONCILE RESULT:"): return line.split(":",1)[1].strip()
|
||||
return f"<no result rc={r.returncode}>"
|
||||
|
||||
orig_head = git("rev-parse","HEAD").stdout.strip()
|
||||
print(f"START traefik TYPE={wr.current_version(DOMAIN)} last_good={wr.read_last_good('traefik')} "
|
||||
f"ci={routed()} kc-through={routed('warm-keycloak.ci.commoninternet.net','/realms/master')} orig_head={orig_head[:8]}")
|
||||
try:
|
||||
# stage fake NEWER tag with a lint-breaking env (bare int → not a string)
|
||||
git("checkout","-fq",GOOD)
|
||||
import re
|
||||
cf=os.path.join(RDIR,"compose.yml"); txt=open(cf).read()
|
||||
# add a bare-integer entry to the app service environment list (first 'environment:' block)
|
||||
txt=txt.replace(" environment:\n - DASHBOARD_ENABLED",
|
||||
" environment:\n - {advbad: brokenmapping}\n - DASHBOARD_ENABLED",1)
|
||||
open(cf,"w").write(txt)
|
||||
git("commit","-aqm","adv: lint-breaking env for traefik rollback proof")
|
||||
broken=git("rev-parse","HEAD").stdout.strip()
|
||||
git("tag","-a","-m","adv",FAKE,broken)
|
||||
git("checkout","-fq",orig_head) # leave working tree on the good HEAD; tag keeps broken commit
|
||||
print(f"staged fake {FAKE}@{broken[:8]} (lint-breaking); reconcile (expect rollback->{GOOD})...")
|
||||
a0=set(os.listdir(ALERTS)) if os.path.isdir(ALERTS) else set()
|
||||
res=reconcile()
|
||||
new=sorted((set(os.listdir(ALERTS)) if os.path.isdir(ALERTS) else set())-a0)
|
||||
ci, kc = routed(), routed("warm-keycloak.ci.commoninternet.net","/realms/master")
|
||||
print(f"RESULT={res!r} TYPE={wr.current_version(DOMAIN)} last_good={wr.read_last_good('traefik')} ci={ci} kc-through={kc} new_alerts={new}")
|
||||
if not res.startswith("rolled-back:"): fails.append(f"not rolled-back: {res}")
|
||||
if wr.read_last_good("traefik")!=GOOD: fails.append(f"last_good changed: {wr.read_last_good('traefik')}")
|
||||
if ci!="200": fails.append(f"traefik not serving after rollback: ci={ci}")
|
||||
if kc!="200": fails.append(f"keycloak-through-traefik not 200: {kc}")
|
||||
rb=[a for a in new if "rollback" in a]
|
||||
if not rb: fails.append("no rollback alert")
|
||||
else:
|
||||
import json; rec=json.load(open(os.path.join(ALERTS,rb[0])))
|
||||
print(f"rollback alert: {rec}")
|
||||
if rec.get("attempted")!=FAKE: fails.append(f"alert attempted={rec.get('attempted')}")
|
||||
if rec.get("last_good")!=GOOD: fails.append(f"alert last_good={rec.get('last_good')}")
|
||||
if rec.get("recovered") is not True: fails.append(f"alert recovered={rec.get('recovered')}")
|
||||
finally:
|
||||
# DEFENSIVE recovery: delete fake tag, restore recipe HEAD, ensure traefik on GOOD + healthy
|
||||
git("tag","-d",FAKE,check=False); git("checkout","-fq",orig_head)
|
||||
if wr.current_version(DOMAIN)!=GOOD or routed()!="200":
|
||||
print("!! defensive recovery: redeploying traefik GOOD", flush=True)
|
||||
try: wr.deploy_version("traefik",DOMAIN,GOOD,600); wr.wait_healthy(wr.SPECS["traefik"])
|
||||
except Exception as e: print(f"!! recovery deploy error: {e}")
|
||||
fin_ci=routed(); fin_kc=routed("warm-keycloak.ci.commoninternet.net","/realms/master")
|
||||
fake_left=[t for t in git("tag").stdout.split() if t==FAKE]
|
||||
print(f"END TYPE={wr.current_version(DOMAIN)} last_good={wr.read_last_good('traefik')} ci={fin_ci} kc-through={fin_kc} fake_tag_left={fake_left}")
|
||||
if fin_ci!="200": fails.append(f"FINAL traefik not serving: {fin_ci}")
|
||||
if fake_left: fails.append("fake tag not cleaned")
|
||||
print("\nRESULT:", "FAIL: "+"; ".join(fails) if fails else
|
||||
"PASS — traefik WC1.1 stateless rollback: broken-latest deploy rejected → rolled back to last_good 5.1.1+v3.6.15, traefik+routes healthy (no TLS outage), alert written, cert/config preserved")
|
||||
sys.exit(1 if fails else 0)
|
||||
Reference in New Issue
Block a user