claim(2w): W0.10a traefik WC1.1 migrated onto shared health-gated reconciler — no-op converge proven; destructive rollback = Adversary cold proof
warm_reconcile.py: per-spec setup hook + health_domain; SPECS[traefik] (stateful=False, version-rollback-only, _traefik_setup preserves wildcard-cert/ file-provider config, health on routed dashboard host). keycloak path unchanged. proxy.nix: deploy-proxy.service now execs warm_reconcile.py traefik. ZERO-disruption migration (traefik already at latest 5.1.1+v3.6.15; pre-seeded TYPE+last_good → clean no-op converge; traefik 200 + keycloak-through-traefik 200 + 0 failed). 65 unit pass. Per operator out: code+converge delivered; destructive rollback (brief TLS blip) = Adversary's required cold proof. Closes the W0.10a tracked-open. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@ -36,6 +36,38 @@ from harness import abra, lifecycle, warmsnap # noqa: E402
|
||||
|
||||
# --------------------------------------------------------------------------- specs
|
||||
|
||||
|
||||
def _traefik_setup(recipe: str, domain: str, version: str) -> None:
|
||||
"""Per-app config for the traefik reverse-proxy reconcile — preserves EXACTLY what the prior
|
||||
proxy.nix bash reconcile did (wildcard/file-provider mode serving the pre-issued cert as
|
||||
ssl_cert/ssl_key swarm secrets; NO ACME). Uses the proven abra.env_set (newline-safe, unlike the
|
||||
bash set_env that bit keycloak)."""
|
||||
cert_dir = "/var/lib/ci-certs/live"
|
||||
if not (os.path.isfile(f"{cert_dir}/fullchain.pem") and os.path.isfile(f"{cert_dir}/privkey.pem")):
|
||||
raise RuntimeError(f"FATAL: wildcard cert missing at {cert_dir} (sops decrypt broken?)")
|
||||
if not os.path.isfile(env_file(domain)):
|
||||
_run(["abra", "app", "new", recipe, "-s", "default", "-D", domain, version, "-o", "-n"],
|
||||
timeout=120, check=True)
|
||||
abra.env_set(domain, "DOMAIN", domain)
|
||||
abra.env_set(domain, "LETS_ENCRYPT_ENV", "")
|
||||
abra.env_set(domain, "WILDCARDS_ENABLED", "1")
|
||||
abra.env_set(domain, "SECRET_WILDCARD_CERT_VERSION", "v1")
|
||||
abra.env_set(domain, "SECRET_WILDCARD_KEY_VERSION", "v1")
|
||||
abra.env_set(domain, "COMPOSE_FILE", '"compose.yml:compose.wildcard.yml"')
|
||||
stack = lifecycle._stack_name(domain) # noqa: SLF001
|
||||
have = set(lifecycle._docker_names("secret", stack)) # noqa: SLF001
|
||||
|
||||
def _has(name):
|
||||
return any(s.endswith(f"_{name}_v1") for s in have)
|
||||
|
||||
if not _has("ssl_cert"):
|
||||
_run(["abra", "app", "secret", "insert", domain, "ssl_cert", "v1",
|
||||
f"{cert_dir}/fullchain.pem", "-f", "-n"], timeout=120, check=True)
|
||||
if not _has("ssl_key"):
|
||||
_run(["abra", "app", "secret", "insert", domain, "ssl_key", "v1",
|
||||
f"{cert_dir}/privkey.pem", "-f", "-n"], timeout=120, check=True)
|
||||
|
||||
|
||||
SPECS: dict[str, dict] = {
|
||||
"keycloak": {
|
||||
"recipe": "keycloak",
|
||||
@ -46,6 +78,20 @@ SPECS: dict[str, dict] = {
|
||||
"deploy_timeout": 900,
|
||||
"health_timeout": 900,
|
||||
},
|
||||
# traefik = the reverse proxy: STATELESS (version-rollback-only, NO snapshot). Health is probed
|
||||
# on a ROUTED host (the dashboard) since traefik's own domain has no route. `setup` preserves the
|
||||
# wildcard cert / file-provider config.
|
||||
"traefik": {
|
||||
"recipe": "traefik",
|
||||
"domain": "traefik.ci.commoninternet.net",
|
||||
"health_domain": "ci.commoninternet.net",
|
||||
"health_path": "/",
|
||||
"health_ok": (200,),
|
||||
"stateful": False,
|
||||
"deploy_timeout": 600,
|
||||
"health_timeout": 300,
|
||||
"setup": _traefik_setup,
|
||||
},
|
||||
}
|
||||
|
||||
ALERTS_DIR = os.path.join(warmsnap.DEFAULT_WARM_ROOT, "alerts")
|
||||
@ -166,7 +212,10 @@ def is_deployed(domain: str) -> bool:
|
||||
|
||||
|
||||
def health_code(spec: dict) -> int:
|
||||
domain = spec["domain"]
|
||||
# health is probed on `health_domain` (defaults to the app domain). For traefik the app domain
|
||||
# (traefik.ci…) has no route of its own — health is a ROUTED host (e.g. the dashboard
|
||||
# ci.commoninternet.net), so a 200 proves traefik is up + routing + TLS-terminating.
|
||||
domain = spec.get("health_domain", spec["domain"])
|
||||
r = _run(
|
||||
[
|
||||
"curl", "-sk", "-o", "/dev/null", "-w", "%{http_code}", "--max-time", "10",
|
||||
@ -300,8 +349,14 @@ def reconcile(app: str) -> str:
|
||||
latest = latest_version(tags)
|
||||
if not latest:
|
||||
raise RuntimeError(f"no version tags for {recipe}")
|
||||
ensure_app_config(recipe, domain, latest)
|
||||
ensure_secrets(domain)
|
||||
# Per-app config/secrets: a spec may provide its own `setup` (traefik's cert/file-provider wiring);
|
||||
# otherwise the default keycloak-shaped path (app new + DOMAIN/LETS_ENCRYPT + generate secrets).
|
||||
setup = spec.get("setup")
|
||||
if setup:
|
||||
setup(recipe, domain, latest)
|
||||
else:
|
||||
ensure_app_config(recipe, domain, latest)
|
||||
ensure_secrets(domain)
|
||||
|
||||
current = current_version(domain)
|
||||
deployed = is_deployed(domain)
|
||||
|
||||
Reference in New Issue
Block a user