claim(2w): W0.10a traefik WC1.1 migrated onto shared health-gated reconciler — no-op converge proven; destructive rollback = Adversary cold proof

warm_reconcile.py: per-spec setup hook + health_domain; SPECS[traefik]
(stateful=False, version-rollback-only, _traefik_setup preserves wildcard-cert/
file-provider config, health on routed dashboard host). keycloak path unchanged.
proxy.nix: deploy-proxy.service now execs warm_reconcile.py traefik. ZERO-disruption
migration (traefik already at latest 5.1.1+v3.6.15; pre-seeded TYPE+last_good →
clean no-op converge; traefik 200 + keycloak-through-traefik 200 + 0 failed).
65 unit pass. Per operator out: code+converge delivered; destructive rollback
(brief TLS blip) = Adversary's required cold proof. Closes the W0.10a tracked-open.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-29 03:50:32 +01:00
parent aec6911c68
commit e678d2e006
5 changed files with 145 additions and 45 deletions

View File

@ -36,6 +36,38 @@ from harness import abra, lifecycle, warmsnap # noqa: E402
# --------------------------------------------------------------------------- specs
def _traefik_setup(recipe: str, domain: str, version: str) -> None:
"""Per-app config for the traefik reverse-proxy reconcile — preserves EXACTLY what the prior
proxy.nix bash reconcile did (wildcard/file-provider mode serving the pre-issued cert as
ssl_cert/ssl_key swarm secrets; NO ACME). Uses the proven abra.env_set (newline-safe, unlike the
bash set_env that bit keycloak)."""
cert_dir = "/var/lib/ci-certs/live"
if not (os.path.isfile(f"{cert_dir}/fullchain.pem") and os.path.isfile(f"{cert_dir}/privkey.pem")):
raise RuntimeError(f"FATAL: wildcard cert missing at {cert_dir} (sops decrypt broken?)")
if not os.path.isfile(env_file(domain)):
_run(["abra", "app", "new", recipe, "-s", "default", "-D", domain, version, "-o", "-n"],
timeout=120, check=True)
abra.env_set(domain, "DOMAIN", domain)
abra.env_set(domain, "LETS_ENCRYPT_ENV", "")
abra.env_set(domain, "WILDCARDS_ENABLED", "1")
abra.env_set(domain, "SECRET_WILDCARD_CERT_VERSION", "v1")
abra.env_set(domain, "SECRET_WILDCARD_KEY_VERSION", "v1")
abra.env_set(domain, "COMPOSE_FILE", '"compose.yml:compose.wildcard.yml"')
stack = lifecycle._stack_name(domain) # noqa: SLF001
have = set(lifecycle._docker_names("secret", stack)) # noqa: SLF001
def _has(name):
return any(s.endswith(f"_{name}_v1") for s in have)
if not _has("ssl_cert"):
_run(["abra", "app", "secret", "insert", domain, "ssl_cert", "v1",
f"{cert_dir}/fullchain.pem", "-f", "-n"], timeout=120, check=True)
if not _has("ssl_key"):
_run(["abra", "app", "secret", "insert", domain, "ssl_key", "v1",
f"{cert_dir}/privkey.pem", "-f", "-n"], timeout=120, check=True)
SPECS: dict[str, dict] = {
"keycloak": {
"recipe": "keycloak",
@ -46,6 +78,20 @@ SPECS: dict[str, dict] = {
"deploy_timeout": 900,
"health_timeout": 900,
},
# traefik = the reverse proxy: STATELESS (version-rollback-only, NO snapshot). Health is probed
# on a ROUTED host (the dashboard) since traefik's own domain has no route. `setup` preserves the
# wildcard cert / file-provider config.
"traefik": {
"recipe": "traefik",
"domain": "traefik.ci.commoninternet.net",
"health_domain": "ci.commoninternet.net",
"health_path": "/",
"health_ok": (200,),
"stateful": False,
"deploy_timeout": 600,
"health_timeout": 300,
"setup": _traefik_setup,
},
}
ALERTS_DIR = os.path.join(warmsnap.DEFAULT_WARM_ROOT, "alerts")
@ -166,7 +212,10 @@ def is_deployed(domain: str) -> bool:
def health_code(spec: dict) -> int:
domain = spec["domain"]
# health is probed on `health_domain` (defaults to the app domain). For traefik the app domain
# (traefik.ci…) has no route of its own — health is a ROUTED host (e.g. the dashboard
# ci.commoninternet.net), so a 200 proves traefik is up + routing + TLS-terminating.
domain = spec.get("health_domain", spec["domain"])
r = _run(
[
"curl", "-sk", "-o", "/dev/null", "-w", "%{http_code}", "--max-time", "10",
@ -300,8 +349,14 @@ def reconcile(app: str) -> str:
latest = latest_version(tags)
if not latest:
raise RuntimeError(f"no version tags for {recipe}")
ensure_app_config(recipe, domain, latest)
ensure_secrets(domain)
# Per-app config/secrets: a spec may provide its own `setup` (traefik's cert/file-provider wiring);
# otherwise the default keycloak-shaped path (app new + DOMAIN/LETS_ENCRYPT + generate secrets).
setup = spec.get("setup")
if setup:
setup(recipe, domain, latest)
else:
ensure_app_config(recipe, domain, latest)
ensure_secrets(domain)
current = current_version(domain)
deployed = is_deployed(domain)