diff --git a/runner/harness/generic.py b/runner/harness/generic.py index fc18f3f..60a0057 100644 --- a/runner/harness/generic.py +++ b/runner/harness/generic.py @@ -237,6 +237,9 @@ def perform_upgrade( before = lifecycle.deployed_identity(domain) if head_ref: lifecycle.recipe_checkout_ref(recipe, head_ref) + # HQ1: warm the NEW-version image set before the chaos redeploy (the head_ref checkout's pinned + # tags) so a pull failure is a clear pre-deploy error and convergence isn't pull-bound. + lifecycle.prepull_images(recipe, domain) lifecycle.chaos_redeploy(domain, deploy_timeout=deploy_timeout, no_converge_checks=True) # Own the convergence verification (abra's monitor was skipped via -c). lifecycle.wait_healthy( diff --git a/runner/harness/lifecycle.py b/runner/harness/lifecycle.py index dc06299..0867a6d 100644 --- a/runner/harness/lifecycle.py +++ b/runner/harness/lifecycle.py @@ -122,6 +122,62 @@ def _run_install_steps(hook: tuple[str, str], recipe: str, domain: str) -> None: ) +def prepull_images(recipe: str, domain: str) -> None: + """HQ1 (plan-prepull-images.md): pre-pull a recipe's images into the local store BEFORE the deploy. + + A pull failure (rate-limit / bad tag / slow) then fails FAST as a CLEAR pull error here, instead + of surfacing later as a murky 'not converged' deploy timeout (the F2-12-class confusion); and + images-already-local lets the deploy converge within abra's native window. Resolves images via + `docker compose config --images` using abra's COMPOSE_FILE from the app .env (handles $VERSION + interpolation + multi-compose recipes — a naive `grep image:` misses both), then `docker pull` + each, SKIP-IF-PRESENT (zero network for already-cached pinned tags). The deploy itself stays + UNCHANGED (real `abra app deploy`) — this only warms the local store. Removes PULL time, NOT + app-INIT time (slow-init apps like collabora/immich still need their recipe healthcheck/READY_PROBE). + Best-effort on resolution failure (skip + let the deploy pull as usual); HARD-fails on a real + pull error (don't mask it).""" + import os + + recipe_dir = os.path.expanduser(f"~/.abra/recipes/{recipe}") + env_path = os.path.expanduser(f"~/.abra/servers/default/{domain}.env") + if not os.path.isdir(recipe_dir) or not os.path.isfile(env_path): + print(f" prepull: recipe dir or .env missing for {recipe} — skipping", flush=True) + return + # COMPOSE_FILE is a shell-style ':'-separated list (may self-reference $COMPOSE_FILE for + # multi-compose); evaluate it the way abra does, then pass each file to docker compose. The + # --env-file supplies $VERSION-style interpolation so pinned tags resolve correctly. + cf = subprocess.run( + ["bash", "-c", f'set -a; . "{env_path}"; printf "%s" "${{COMPOSE_FILE:-compose.yml}}"'], + capture_output=True, text=True, + ).stdout.strip() + files = [f for f in cf.split(":") if f] or ["compose.yml"] + args = ["docker", "compose", "--env-file", env_path] + for f in files: + args += ["-f", f] + args += ["config", "--images"] + proc = subprocess.run(args, cwd=recipe_dir, capture_output=True, text=True) + # `config --images` prints one image ref per line to stdout (warnings go to stderr). + images = sorted({ln.strip() for ln in proc.stdout.splitlines() if ln.strip()}) + if not images: + print( + f" prepull: no images resolved for {recipe} (config --images rc={proc.returncode}) — " + f"skipping (deploy will pull as usual). stderr: {proc.stderr.strip()[-160:]}", + flush=True, + ) + return + for img in images: + if subprocess.run(["docker", "image", "inspect", img], capture_output=True).returncode == 0: + print(f" prepull: present {img}", flush=True) + continue + print(f" prepull: pulling {img} …", flush=True) + r = subprocess.run(["docker", "pull", img], capture_output=True, text=True) + if r.returncode != 0: + raise RuntimeError( + f"prepull: `docker pull {img}` failed (rc={r.returncode}) — clear pull error BEFORE " + f"deploy: {r.stderr.strip()[-300:] or r.stdout.strip()[-300:]}" + ) + print(f" prepull: {len(images)} image(s) present/pulled for {recipe}", flush=True) + + def deploy_app( recipe: str, domain: str, @@ -173,6 +229,8 @@ def deploy_app( abra.secret_generate(domain) if install_steps_hook: _run_install_steps(install_steps_hook, recipe, domain) + # HQ1: warm the local image store before the (real, unchanged) abra deploy. + prepull_images(recipe, domain) abra.deploy(domain, chaos=chaos, timeout=deploy_timeout) diff --git a/tests/unit/test_prepull.py b/tests/unit/test_prepull.py new file mode 100644 index 0000000..8103610 --- /dev/null +++ b/tests/unit/test_prepull.py @@ -0,0 +1,81 @@ +"""Unit tests for HQ1 image pre-pull (lifecycle.prepull_images) — deterministic, mocked docker. + +Proves the pre-pull is non-vacuous (the Adversary's criteria, REVIEW-2 754f508): +- present image → SKIP (no `docker pull`, zero network — the warm-cache property). +- missing image → `docker pull` it. +- a pull FAILURE → RAISE a clear pull error (so a bad tag fails fast PRE-deploy, not as a converge + timeout). NOT vacuous. +- no images resolved → best-effort skip (deploy pulls as usual), no raise. +And that resolution uses the recipe's COMPOSE_FILE via `docker compose config --images` (not grep). +""" + +from __future__ import annotations + +import os +import sys + +import pytest + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner")) +from harness import lifecycle as lc # noqa: E402 + + +class _R: + def __init__(self, stdout="", stderr="", returncode=0): + self.stdout, self.stderr, self.returncode = stdout, stderr, returncode + + +def _patch_paths(monkeypatch): + monkeypatch.setattr(os.path, "isdir", lambda p: True) + monkeypatch.setattr(os.path, "isfile", lambda p: True) + + +def _runner(monkeypatch, *, images="img-a:1\nimg-b:2\n", present=(), pull_rc=0, pull_err=""): + """Install a fake subprocess.run; record calls; return the calls list.""" + calls: list[list[str]] = [] + + def fake_run(args, **kw): + calls.append(list(args)) + if args[0] == "bash": + return _R(stdout="compose.yml") # COMPOSE_FILE eval + if "config" in args and "--images" in args: + return _R(stdout=images) + if args[:3] == ["docker", "image", "inspect"]: + return _R(returncode=0 if args[3] in present else 1) + if args[:2] == ["docker", "pull"]: + return _R(returncode=pull_rc, stderr=pull_err) + return _R() + + monkeypatch.setattr(lc.subprocess, "run", fake_run) + return calls + + +def test_prepull_skips_present_images_zero_network(monkeypatch): + _patch_paths(monkeypatch) + calls = _runner(monkeypatch, present=("img-a:1", "img-b:2")) + lc.prepull_images("r", "d") # both present → no pull + assert not any(c[:2] == ["docker", "pull"] for c in calls), "must NOT pull a present image" + # it DID resolve via `docker compose ... config --images` + assert any("config" in c and "--images" in c for c in calls) + + +def test_prepull_pulls_missing_image(monkeypatch): + _patch_paths(monkeypatch) + calls = _runner(monkeypatch, present=("img-a:1",)) # img-b:2 missing + lc.prepull_images("r", "d") + pulled = [c[2] for c in calls if c[:2] == ["docker", "pull"]] + assert pulled == ["img-b:2"], f"should pull only the missing image; pulled={pulled}" + + +def test_prepull_raises_clear_error_on_pull_failure(monkeypatch): + _patch_paths(monkeypatch) + _runner(monkeypatch, present=(), pull_rc=1, pull_err="manifest unknown: bad tag") + with pytest.raises(RuntimeError, match="clear pull error BEFORE deploy"): + lc.prepull_images("r", "d") + + +def test_prepull_skips_when_no_images_resolved(monkeypatch): + _patch_paths(monkeypatch) + calls = _runner(monkeypatch, images="") # config --images returns nothing + lc.prepull_images("r", "d") # no raise + assert not any(c[:2] == ["docker", "pull"] for c in calls)