fix(1d): F1d-2 — pinned base deploys the pinned version; upgrade is non-vacuous

- deploy_app: checkout the pinned tag + deploy NON-chaos when a version is pinned (chaos only for
  version=None / PR-head). Was always -C, which ignored the pin and deployed LATEST -> upgrade no-op.
- do_upgrade: assert the deployment actually MOVED (coop-cloud version label and/or image changed)
  via lifecycle.deployed_identity -> a vacuous no-op upgrade can no longer pass (DG2).
- G2: migrate custom-html overlays to the assertion-only contract (override + extend-by-composition
  + data-continuity; split backup/restore). tests/unit/test_discovery.py proves precedence (5/5).

Probe (Adversary's F1d-2 test): hedgedoc deploy-prev=1.10.7 -> upgrade=1.10.8, CHANGED=True.
hedgedoc full generic lifecycle green (install/upgrade/backup/restore, deploy-count=1).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-28 00:02:59 +01:00
parent 1aea1541a7
commit 81e26a1bdc
9 changed files with 204 additions and 59 deletions

View File

@ -71,6 +71,18 @@ def app_new(
_run(args)
def recipe_checkout(recipe: str, version: str) -> None:
"""git-checkout the recipe to a published version tag so the on-disk compose/.env match the pin.
`abra app new <recipe> <version>` records ENV VERSION but does NOT reliably check out the tag, and
a chaos (`-C`) deploy ignores ENV VERSION and uses the current checkout — together that silently
deployed LATEST for a 'previous-version' base, making the upgrade a no-op (Adversary F1d-2). With
this checkout + a non-chaos deploy, a pinned deploy genuinely deploys that version."""
import os
path = os.path.expanduser(f"~/.abra/recipes/{recipe}")
subprocess.run(["git", "-C", path, "checkout", "--quiet", version], check=True)
def env_set(domain: str, key: str, value: str) -> None:
"""Set a key in the app's .env (abra has no setter; edit the file directly)."""
import os

View File

@ -120,9 +120,21 @@ def assert_serving(domain: str, meta: dict) -> None:
def do_upgrade(domain: str, target: str | None, meta: dict) -> None:
"""UPGRADE op (in place on the shared deployment): abra app upgrade -> target, then assert it
reconverges + still serves (assert_serving polls, so the rolling upgrade settles)."""
reconverges + still serves AND that the deployment actually MOVED (version label and/or image
changed). The move assertion guards against a vacuous no-op upgrade silently passing — the exact
F1d-2 failure where a mis-pinned base deployed LATEST so 'upgrade to latest' changed nothing."""
before = lifecycle.deployed_identity(domain)
lifecycle.upgrade_app(domain, version=target)
assert_serving(domain, meta)
after = lifecycle.deployed_identity(domain)
moved = (before[0] and after[0] and before[0] != after[0]) or (
before[1] and after[1] and before[1] != after[1]
)
assert moved, (
f"{domain}: upgrade did not move the deployment "
f"(version {before[0]}->{after[0]}, image {before[1]}->{after[1]}) — "
"not a real previous->target upgrade (DG2 must be non-vacuous)"
)
_SNAPSHOT_ID_RE = re.compile(r'"snapshot_id"\s*:\s*"([0-9a-f]{8,})"')

View File

@ -8,6 +8,7 @@ from __future__ import annotations
import contextlib
import datetime
import json
import os
import re
import ssl
@ -134,6 +135,12 @@ def deploy_app(
_record_deploy()
abra.app_config_remove(domain) # clear any stale .env from a prior crashed run
abra.app_new(recipe, domain, version=version, secrets=secrets)
# A pinned version must actually deploy that version: check the recipe out to the tag so the
# on-disk compose/.env match, and deploy NON-chaos below (chaos ignores the pin → deployed LATEST,
# Adversary F1d-2). Chaos is correct ONLY for the version=None case (deploy the current PR-head
# checkout). Order matters: checkout before secret_generate (-C) so secrets match the pinned tree.
if version:
abra.recipe_checkout(recipe, version)
# Pin DOMAIN to the run domain explicitly. `abra app new -D` fills it for recipes whose
# .env.sample uses a literal placeholder, but NOT for ones using a `{{ .Domain }}` Go-template
# (this abra version leaves it unexpanded → deploy fails "can't evaluate field Domain"). Setting
@ -146,7 +153,7 @@ def deploy_app(
abra.secret_generate(domain)
if install_steps_hook:
_run_install_steps(install_steps_hook, recipe, domain)
abra.deploy(domain)
abra.deploy(domain, chaos=(version is None))
def _stack_name(domain: str) -> str:
@ -238,6 +245,37 @@ def wait_healthy(
raise TimeoutError(f"{domain}: not healthy over HTTPS {path} (last status {last})")
def deployed_identity(domain: str, service: str = "app") -> tuple[str | None, str | None]:
"""(coop-cloud version label, image) of the running app service. Used to prove an upgrade
actually MOVED the deployment prev→target (not a vacuous no-op — Adversary F1d-2). The version
label (`coop-cloud.<stack>.version`) is bumped per published recipe version; the image usually
bumps too. Either changing proves the upgrade did something."""
name = f"{_stack_name(domain)}_{service}"
proc = subprocess.run(
[
"docker",
"service",
"inspect",
name,
"--format",
"{{json .Spec.Labels}}|{{.Spec.TaskTemplate.ContainerSpec.Image}}",
],
capture_output=True,
text=True,
)
out = proc.stdout.strip()
if "|" not in out:
return (None, None)
labels_json, _, image = out.partition("|")
ver = None
with contextlib.suppress(ValueError, json.JSONDecodeError):
for k, v in json.loads(labels_json).items():
if k.startswith("coop-cloud.") and k.endswith(".version"):
ver = v
break
return (ver, image.strip() or None)
def upgrade_app(domain: str, version: str | None = None) -> None:
abra.upgrade(domain, version=version)