git checkout <head_ref> aborted on the untracked install_steps-provided compose.host-ports.yml (which head_ref tracks). Force-checkout yields the exact ref tree. Also fixes the mumble restore tier: backup labels exist only in 1.0.0+, so backup/restore are meaningful only after the (now-working) upgrade moves the app to head_ref. DECISIONS.md updated. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
251 lines
11 KiB
Python
251 lines
11 KiB
Python
"""Thin, robust wrappers around the `abra` CLI for the CI harness (plan §4.3).
|
|
|
|
Bakes in the known abra gotchas (re-verify per installed abra version, currently 0.13.0-beta):
|
|
- `abra app undeploy` / `abra app volume remove` do NOT accept `--chaos` → never pass it.
|
|
- plumb a `timeout` through secret generate/insert/remove calls.
|
|
- `abra app ls -S -m` returns nested {server: {apps: [...]}} — parse the inner structure.
|
|
- run non-interactively with `-n` (`--no-input`) everywhere.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import subprocess
|
|
|
|
ABRA = "abra"
|
|
|
|
|
|
class AbraError(RuntimeError):
|
|
pass
|
|
|
|
|
|
def _run_pty(
|
|
args: list[str], timeout: int = 900, check: bool = True
|
|
) -> subprocess.CompletedProcess:
|
|
"""Run abra under a pseudo-TTY (via util-linux `script`). Needed for commands that exec into
|
|
a container interactively (backup create / restore: 'the input device is not a TTY')."""
|
|
cmd = "abra " + " ".join(args)
|
|
proc = subprocess.run(
|
|
["script", "-qec", cmd, "/dev/null"],
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=timeout,
|
|
)
|
|
if check and proc.returncode != 0:
|
|
raise AbraError(f"[pty] {cmd} failed ({proc.returncode}):\n{proc.stdout}\n{proc.stderr}")
|
|
return proc
|
|
|
|
|
|
def _run(args: list[str], timeout: int = 300, check: bool = True) -> subprocess.CompletedProcess:
|
|
proc = subprocess.run(
|
|
[ABRA, *args],
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=timeout,
|
|
)
|
|
if check and proc.returncode != 0:
|
|
raise AbraError(
|
|
f"abra {' '.join(args)} failed ({proc.returncode}):\n{proc.stdout}\n{proc.stderr}"
|
|
)
|
|
return proc
|
|
|
|
|
|
def app_new(
|
|
recipe: str,
|
|
domain: str,
|
|
server: str = "default",
|
|
version: str | None = None,
|
|
secrets: bool = False,
|
|
) -> None:
|
|
args = ["app", "new", recipe]
|
|
args += ["-s", server, "-D", domain, "-o", "-n"]
|
|
if version:
|
|
# pin to a published version tag (e.g. upgrade's previous-version deploy) — a clean tag
|
|
# checkout, which is incompatible with chaos.
|
|
args.append(version)
|
|
else:
|
|
# -C (chaos): deploy the recipe AT THE CURRENT CHECKOUT (the PR head under test).
|
|
args.append("-C")
|
|
if secrets:
|
|
args.append("-S")
|
|
_run(args)
|
|
|
|
|
|
def recipe_checkout(recipe: str, version: str) -> None:
|
|
"""git-checkout the recipe to a published version tag so the on-disk compose/.env match the pin.
|
|
`abra app new <recipe> <version>` records ENV VERSION but does NOT reliably check out the tag, and
|
|
a chaos (`-C`) deploy ignores ENV VERSION and uses the current checkout — together that silently
|
|
deployed LATEST for a 'previous-version' base, making the upgrade a no-op (Adversary F1d-2). With
|
|
this checkout + a non-chaos deploy, a pinned deploy genuinely deploys that version."""
|
|
import os
|
|
|
|
path = os.path.expanduser(f"~/.abra/recipes/{recipe}")
|
|
# -f (force): the version-pinning checkout must yield the EXACT ref tree. Without it, a cc-ci
|
|
# install_steps-provided overlay (e.g. mumble's compose.host-ports.yml, copied into a version that
|
|
# predates it) is an UNTRACKED file that collides with the same path TRACKED in a later ref, and
|
|
# `git checkout <ref>` aborts ("untracked working tree files would be overwritten"). Force resolves
|
|
# it by writing the ref's tracked version. Safe: we never want local recipe-tree state preserved
|
|
# across a version switch (and chaos deploys re-provide the overlay via install_steps when needed).
|
|
subprocess.run(["git", "-C", path, "checkout", "-f", "--quiet", version], check=True)
|
|
|
|
|
|
def has_lightweight_version_tags(recipe: str) -> bool:
|
|
"""True if the recipe's local checkout has any LIGHTWEIGHT (non-annotated) version tag.
|
|
|
|
Some upstream coop-cloud recipes ship a stray lightweight tag (e.g. lasuite-meet's
|
|
`0.3.0+v1.16.0`). abra's pinned (non-chaos) deploy runs `abra recipe lint`, which FATAs R014
|
|
('only annotated tags used for recipe version') for the WHOLE recipe — blocking the upgrade tier's
|
|
prev-version base deploy. (Re-annotating locally doesn't help: abra force-fetches tags from origin
|
|
before linting and reverts it; repointing origin to a local mirror tripped a go-git
|
|
'reference not found'.) The caller (deploy_app) uses this to fall back to a chaos base deploy
|
|
(which skips lint and deploys the explicitly-checked-out pinned version — see lifecycle.deploy_app).
|
|
Read-only: just `git tag` + `cat-file -t`; no fetch/mutation, so it can't trigger abra's revert."""
|
|
import os
|
|
|
|
path = os.path.expanduser(f"~/.abra/recipes/{recipe}")
|
|
tags = subprocess.run(
|
|
["git", "-C", path, "tag", "-l"], capture_output=True, text=True
|
|
).stdout.split()
|
|
for t in tags:
|
|
objtype = subprocess.run(
|
|
["git", "-C", path, "cat-file", "-t", t], capture_output=True, text=True
|
|
).stdout.strip()
|
|
if objtype == "commit": # lightweight (annotated tags are objtype "tag")
|
|
return True
|
|
return False
|
|
|
|
|
|
def env_set(domain: str, key: str, value: str) -> None:
|
|
"""Set a key in the app's .env (abra has no setter; edit the file directly)."""
|
|
import os
|
|
import re
|
|
|
|
path = os.path.expanduser(f"~/.abra/servers/default/{domain}.env")
|
|
with open(path) as fh:
|
|
lines = fh.read().splitlines()
|
|
out, seen = [], False
|
|
pat = re.compile(rf"^\s*#?\s*{re.escape(key)}=")
|
|
for ln in lines:
|
|
if pat.match(ln):
|
|
out.append(f"{key}={value}")
|
|
seen = True
|
|
else:
|
|
out.append(ln)
|
|
if not seen:
|
|
out.append(f"{key}={value}")
|
|
with open(path, "w") as fh:
|
|
fh.write("\n".join(out) + "\n")
|
|
|
|
|
|
def secret_generate(domain: str, timeout: int = 300) -> None:
|
|
# -m avoids the TTY/table (ioctl) path; output (which contains the generated values) is
|
|
# captured by _run and never logged. -C -o keep the recipe at the PR checkout (without -o it
|
|
# re-resolves to a version tag, dropping the PR's files incl. tests/). check=False: recipes with
|
|
# no secrets are a no-op.
|
|
_run(
|
|
["app", "secret", "generate", domain, "--all", "-m", "-C", "-o", "-n"],
|
|
timeout=timeout,
|
|
check=False,
|
|
)
|
|
|
|
|
|
def deploy(domain: str, chaos: bool = True, timeout: int = 900, no_converge_checks: bool = False) -> None:
|
|
args = ["app", "deploy", domain, "-o", "-n"]
|
|
if chaos:
|
|
args.append("-C")
|
|
if no_converge_checks:
|
|
# `-c`: skip abra's own post-deploy convergence monitor. Used by the upgrade chaos redeploy
|
|
# of heavy stacks (lasuite-drive): abra's monitor FATAs while a slow service (collabora's
|
|
# new-version jail/config init) is still becoming healthy, even though it converges given
|
|
# time. The caller then performs its OWN, stricter convergence+health wait (services N/N +
|
|
# app health + recipe READY_PROBE) with a generous deadline — see lifecycle.chaos_redeploy.
|
|
args.append("-c")
|
|
_run(args, timeout=timeout)
|
|
|
|
|
|
def upgrade(domain: str, version: str | None = None, timeout: int = 900) -> None:
|
|
args = ["app", "upgrade", domain]
|
|
if version:
|
|
args.append(version)
|
|
# -f no prompt, -D skip public-DNS checks, -o offline (local tags, no private-origin 401),
|
|
# -c no-converge-checks: abra's convergence poll gives up too early on a slow heavy rolling
|
|
# upgrade (e.g. lasuite-docs' 9-service stop-first roll while pulling new images) and reports a
|
|
# FALSE "deploy failed" even though all services do converge. We disable abra's check and rely on
|
|
# the harness's own wait_healthy + data-survival assertion (more patient + the real test) to gate
|
|
# the upgrade. A genuinely-failed upgrade still fails the test (app never gets healthy). upgrade
|
|
# has no --chaos flag.
|
|
args += ["-f", "-D", "-n", "-o", "-c"]
|
|
_run(args, timeout=timeout)
|
|
|
|
|
|
def backup_create(domain: str, timeout: int = 900) -> str:
|
|
# -C -o: use the current recipe checkout, no remote fetch — like every other recipe-touching
|
|
# call (DECISIONS.md). Without -o, abra tries to fetch recipe tags from the (possibly private)
|
|
# remote and fails "authentication required: Unauthorized". Returns the captured output, whose
|
|
# restic JSON summary line carries the produced "snapshot_id" (the backup artifact, DG3) — note
|
|
# `abra app backup snapshots` needs a TTY and is awkward to script, so we read the create output.
|
|
return _run_pty(["app", "backup", "create", domain, "-n", "-C", "-o"], timeout=timeout).stdout
|
|
|
|
|
|
def restore(domain: str, timeout: int = 900) -> None:
|
|
_run_pty(["app", "restore", domain, "-n", "-C", "-o"], timeout=timeout)
|
|
|
|
|
|
def recipe_head_commit(recipe: str) -> str | None:
|
|
"""The current HEAD commit of the recipe checkout — captured right after fetch (the PR head, or
|
|
the catalogue current) so the upgrade tier can re-checkout it for the chaos redeploy after the
|
|
prev-tag base deploy reset the working tree (HC1)."""
|
|
import os
|
|
|
|
path = os.path.expanduser(f"~/.abra/recipes/{recipe}")
|
|
proc = subprocess.run(["git", "-C", path, "rev-parse", "HEAD"], capture_output=True, text=True)
|
|
out = proc.stdout.strip()
|
|
return out or None
|
|
|
|
|
|
def recipe_versions(recipe: str) -> list[str]:
|
|
"""Published versions of a recipe, oldest→newest (from the recipe git tags)."""
|
|
import os
|
|
import subprocess
|
|
|
|
path = os.path.expanduser(f"~/.abra/recipes/{recipe}")
|
|
proc = subprocess.run(
|
|
["git", "-C", path, "tag", "--sort=creatordate"], capture_output=True, text=True
|
|
)
|
|
return [t for t in proc.stdout.split("\n") if t.strip()]
|
|
|
|
|
|
def undeploy(domain: str, timeout: int = 600) -> None:
|
|
# NB: no --chaos here (unsupported).
|
|
_run(["app", "undeploy", domain, "-n"], timeout=timeout, check=False)
|
|
|
|
|
|
def volume_remove(domain: str, timeout: int = 300) -> None:
|
|
# NB: no --chaos here (unsupported); -f to skip prompts.
|
|
_run(["app", "volume", "remove", domain, "-f", "-n"], timeout=timeout, check=False)
|
|
|
|
|
|
def secret_remove_all(domain: str, timeout: int = 300) -> None:
|
|
_run(["app", "secret", "remove", domain, "--all", "-n"], timeout=timeout, check=False)
|
|
|
|
|
|
def app_config_remove(domain: str, server: str = "default") -> None:
|
|
"""Delete the app's .env config so a re-run can recreate it (teardown completeness)."""
|
|
import contextlib
|
|
import os
|
|
|
|
path = os.path.expanduser(f"~/.abra/servers/{server}/{domain}.env")
|
|
with contextlib.suppress(FileNotFoundError):
|
|
os.remove(path)
|
|
|
|
|
|
def app_ls(server: str = "default") -> list[dict]:
|
|
"""Parse `abra app ls -S -m` nested {server: {apps: [...]}} structure."""
|
|
proc = _run(["app", "ls", "-S", "-m", "-n"], check=False)
|
|
try:
|
|
data = json.loads(proc.stdout)
|
|
except (ValueError, json.JSONDecodeError):
|
|
return []
|
|
node = data.get(server) or {}
|
|
return node.get("apps", []) if isinstance(node, dict) else []
|