All checks were successful
continuous-integration/drone/push Build is passing
3-stage run green (install/upgrade/backup), clean teardown. backupbot deployed via reconcile oneshot; PTY (script) for abra backup/restore; -m for secret generate (no value leak). M5 CLAIMED. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
151 lines
5.4 KiB
Python
151 lines
5.4 KiB
Python
"""App lifecycle for the CI harness: deploy, wait-healthy, teardown, janitor (plan §4.3).
|
|
|
|
The teardown guarantee is sacred: a failed test must never leak an app/volume/secret into the
|
|
next run. Callers wrap deploy()/teardown() in try/finally (or a pytest finalizer).
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import ssl
|
|
import subprocess
|
|
import time
|
|
import urllib.request
|
|
|
|
from . import abra
|
|
|
|
GATEWAY_IP = "143.244.213.108" # *.ci.commoninternet.net -> gateway (TLS passthrough to cc-ci)
|
|
|
|
|
|
def deploy_app(recipe: str, domain: str, version: str | None = None, secrets: bool = True) -> None:
|
|
"""Create + configure + deploy an app. Forces LETS_ENCRYPT_ENV='' so traefik serves the
|
|
wildcard cert via the file provider and NEVER attempts ACME (adversary finding A1)."""
|
|
abra.app_config_remove(domain) # clear any stale .env from a prior crashed run
|
|
abra.app_new(recipe, domain, version=version, secrets=secrets)
|
|
abra.env_set(domain, "LETS_ENCRYPT_ENV", "")
|
|
if secrets:
|
|
abra.secret_generate(domain)
|
|
abra.deploy(domain)
|
|
|
|
|
|
def _stack_name(domain: str) -> str:
|
|
# abra derives the swarm stack name from the domain by replacing dots with underscores
|
|
# and KEEPING hyphens (e.g. custom-html-x.ci.commoninternet.net -> custom-html-x_ci_...).
|
|
return domain.replace(".", "_")
|
|
|
|
|
|
def services_converged(domain: str) -> bool:
|
|
"""True when every service in the stack reports replicas N/N (N>0)."""
|
|
stack = _stack_name(domain)
|
|
proc = subprocess.run(
|
|
["docker", "stack", "services", stack, "--format", "{{.Replicas}}"],
|
|
capture_output=True, text=True,
|
|
)
|
|
rows = [r for r in proc.stdout.split("\n") if r.strip()]
|
|
if not rows:
|
|
return False
|
|
for r in rows:
|
|
cur, _, want = r.partition("/")
|
|
if not want or cur != want or want == "0":
|
|
return False
|
|
return True
|
|
|
|
|
|
def http_get(domain: str, path: str = "/", timeout: int = 15) -> int:
|
|
"""HTTPS GET the app by its real hostname. On cc-ci the *.ci.commoninternet.net wildcard
|
|
resolves (public DNS) to the gateway, which SNI-passthroughs to cc-ci's traefik — so using
|
|
the real URL keeps SNI correct (connecting to the bare IP would drop SNI and fail to route)."""
|
|
ctx = ssl.create_default_context()
|
|
ctx.check_hostname = False
|
|
ctx.verify_mode = ssl.CERT_NONE
|
|
req = urllib.request.Request(f"https://{domain}{path}", method="GET")
|
|
try:
|
|
with urllib.request.urlopen(req, timeout=timeout, context=ctx) as resp:
|
|
return resp.status
|
|
except urllib.error.HTTPError as e:
|
|
return e.code
|
|
except Exception:
|
|
return 0
|
|
|
|
|
|
def wait_healthy(domain: str, ok_codes=(200, 301, 302), deploy_timeout: int = 600,
|
|
http_timeout: int = 300) -> None:
|
|
"""Wait for stack services converged, then for the app to answer over HTTPS."""
|
|
deadline = time.time() + deploy_timeout
|
|
while time.time() < deadline:
|
|
if services_converged(domain):
|
|
break
|
|
time.sleep(5)
|
|
else:
|
|
raise TimeoutError(f"{domain}: services did not converge in {deploy_timeout}s")
|
|
|
|
deadline = time.time() + http_timeout
|
|
last = 0
|
|
while time.time() < deadline:
|
|
last = http_get(domain)
|
|
if last in ok_codes:
|
|
return
|
|
time.sleep(5)
|
|
raise TimeoutError(f"{domain}: not healthy over HTTPS (last status {last})")
|
|
|
|
|
|
def upgrade_app(domain: str, version: str | None = None) -> None:
|
|
abra.upgrade(domain, version=version)
|
|
|
|
|
|
def backup_app(domain: str) -> None:
|
|
abra.backup_create(domain)
|
|
|
|
|
|
def restore_app(domain: str) -> None:
|
|
abra.restore(domain)
|
|
|
|
|
|
def previous_version(recipe: str) -> str | None:
|
|
"""The second-newest published version (to deploy before upgrading to latest)."""
|
|
vers = abra.recipe_versions(recipe)
|
|
return vers[-2] if len(vers) >= 2 else None
|
|
|
|
|
|
def _app_container(domain: str, service: str = "app") -> str:
|
|
"""The running container id for <stack>_<service>."""
|
|
name = f"{_stack_name(domain)}_{service}"
|
|
proc = subprocess.run(
|
|
["docker", "ps", "--filter", f"name={name}", "--format", "{{.ID}}"],
|
|
capture_output=True, text=True,
|
|
)
|
|
cid = proc.stdout.strip().split("\n")[0]
|
|
if not cid:
|
|
raise RuntimeError(f"no running container for {name}")
|
|
return cid
|
|
|
|
|
|
def exec_in_app(domain: str, cmd: list[str], service: str = "app") -> str:
|
|
cid = _app_container(domain, service)
|
|
proc = subprocess.run(["docker", "exec", cid, *cmd], capture_output=True, text=True)
|
|
return proc.stdout
|
|
|
|
|
|
def http_body(domain: str, path: str = "/", timeout: int = 15) -> str:
|
|
ctx = ssl.create_default_context()
|
|
ctx.check_hostname = False
|
|
ctx.verify_mode = ssl.CERT_NONE
|
|
req = urllib.request.Request(f"https://{domain}{path}", method="GET")
|
|
with urllib.request.urlopen(req, timeout=timeout, context=ctx) as resp:
|
|
return resp.read().decode(errors="replace")
|
|
|
|
|
|
def teardown_app(domain: str) -> None:
|
|
"""Idempotent, best-effort full teardown. Never raises (finalizer-safe)."""
|
|
abra.undeploy(domain)
|
|
abra.volume_remove(domain)
|
|
abra.secret_remove_all(domain)
|
|
abra.app_config_remove(domain)
|
|
|
|
|
|
def janitor(max_age_hours: int = 6) -> None:
|
|
"""Remove orphaned *-pr* apps left by crashed runs older than max_age_hours."""
|
|
for app in abra.app_ls():
|
|
name = app.get("appName") or app.get("domain") or ""
|
|
if "-pr" in name and ".ci.commoninternet.net" in name:
|
|
# best-effort; deployed-status/age detail varies by abra version
|
|
teardown_app(name)
|