diff --git a/machine-docs/JOURNAL-1d.md b/machine-docs/JOURNAL-1d.md index 1bed3d2..ec14a26 100644 --- a/machine-docs/JOURNAL-1d.md +++ b/machine-docs/JOURNAL-1d.md @@ -121,3 +121,29 @@ $ docker stack ls | grep -iE 'hedg|cust' -> (none — clean teardown) DG3-N/A (skip on a serving non-backup recipe) together. - **DG4.1** corroborated again: deploy-count=1 across the whole install→upgrade→backup→restore run. Claiming G1. + +## 2026-05-28 — F1d-2 fix: pinned base now deploys the pinned version (DG2 was vacuous) + +**Adversary G1 verdict: FAIL** — DG2 upgrade was a vacuous no-op. F1d-1 CLOSED (cert reframe accepted). +Root cause (Adversary + my confirmation): `deploy_app` always deployed with `-C` (chaos = current +checkout), which IGNORES the version pin → a "previous-version" base actually deployed LATEST, so +"upgrade to newest" was latest→latest and only the still-serving assertion ran ⇒ a broken upgrade +would pass. Real defect. + +**Fix (two parts):** +1. `deploy_app` now checks the recipe out to the pinned tag (`abra.recipe_checkout`) AND deploys + **non-chaos** when a version is pinned (`abra.deploy(chaos=(version is None))`). Chaos stays only + for the version=None case (deploy the current PR-head checkout). +2. Hardened the generic upgrade so a no-op CANNOT pass by construction: `do_upgrade` captures the app + service's (coop-cloud version label, image) before+after and asserts the deployment actually + MOVED (`lifecycle.deployed_identity`). Even if the pin regressed again, before==after → FAIL. + +**Probe (the Adversary's exact F1d-2 test, my code, on cc-ci) — now PASSES:** +``` +prev: 3.0.9+1.10.7 +IMAGE BEFORE (asked prev): quay.io/hedgedoc/hedgedoc:1.10.7@sha256:3174abea… ← was 1.10.8 (LATEST) pre-fix +IMAGE AFTER (upgraded) : quay.io/hedgedoc/hedgedoc:1.10.8@sha256:423f4117… +CHANGED: True +``` +Re-running the full hedgedoc + custom-html lifecycles to confirm all-green with the move-assertion, +then re-claim G1 (and G2: custom-html overlays override+extend the generic, deploy-count=1). diff --git a/runner/harness/abra.py b/runner/harness/abra.py index 19c24fa..094b8bf 100644 --- a/runner/harness/abra.py +++ b/runner/harness/abra.py @@ -71,6 +71,18 @@ def app_new( _run(args) +def recipe_checkout(recipe: str, version: str) -> None: + """git-checkout the recipe to a published version tag so the on-disk compose/.env match the pin. + `abra app new ` records ENV VERSION but does NOT reliably check out the tag, and + a chaos (`-C`) deploy ignores ENV VERSION and uses the current checkout — together that silently + deployed LATEST for a 'previous-version' base, making the upgrade a no-op (Adversary F1d-2). With + this checkout + a non-chaos deploy, a pinned deploy genuinely deploys that version.""" + import os + + path = os.path.expanduser(f"~/.abra/recipes/{recipe}") + subprocess.run(["git", "-C", path, "checkout", "--quiet", version], check=True) + + def env_set(domain: str, key: str, value: str) -> None: """Set a key in the app's .env (abra has no setter; edit the file directly).""" import os diff --git a/runner/harness/generic.py b/runner/harness/generic.py index 2514f3f..326b589 100644 --- a/runner/harness/generic.py +++ b/runner/harness/generic.py @@ -120,9 +120,21 @@ def assert_serving(domain: str, meta: dict) -> None: def do_upgrade(domain: str, target: str | None, meta: dict) -> None: """UPGRADE op (in place on the shared deployment): abra app upgrade -> target, then assert it - reconverges + still serves (assert_serving polls, so the rolling upgrade settles).""" + reconverges + still serves AND that the deployment actually MOVED (version label and/or image + changed). The move assertion guards against a vacuous no-op upgrade silently passing — the exact + F1d-2 failure where a mis-pinned base deployed LATEST so 'upgrade to latest' changed nothing.""" + before = lifecycle.deployed_identity(domain) lifecycle.upgrade_app(domain, version=target) assert_serving(domain, meta) + after = lifecycle.deployed_identity(domain) + moved = (before[0] and after[0] and before[0] != after[0]) or ( + before[1] and after[1] and before[1] != after[1] + ) + assert moved, ( + f"{domain}: upgrade did not move the deployment " + f"(version {before[0]}->{after[0]}, image {before[1]}->{after[1]}) — " + "not a real previous->target upgrade (DG2 must be non-vacuous)" + ) _SNAPSHOT_ID_RE = re.compile(r'"snapshot_id"\s*:\s*"([0-9a-f]{8,})"') diff --git a/runner/harness/lifecycle.py b/runner/harness/lifecycle.py index 52ada9e..c33941a 100644 --- a/runner/harness/lifecycle.py +++ b/runner/harness/lifecycle.py @@ -8,6 +8,7 @@ from __future__ import annotations import contextlib import datetime +import json import os import re import ssl @@ -134,6 +135,12 @@ def deploy_app( _record_deploy() abra.app_config_remove(domain) # clear any stale .env from a prior crashed run abra.app_new(recipe, domain, version=version, secrets=secrets) + # A pinned version must actually deploy that version: check the recipe out to the tag so the + # on-disk compose/.env match, and deploy NON-chaos below (chaos ignores the pin → deployed LATEST, + # Adversary F1d-2). Chaos is correct ONLY for the version=None case (deploy the current PR-head + # checkout). Order matters: checkout before secret_generate (-C) so secrets match the pinned tree. + if version: + abra.recipe_checkout(recipe, version) # Pin DOMAIN to the run domain explicitly. `abra app new -D` fills it for recipes whose # .env.sample uses a literal placeholder, but NOT for ones using a `{{ .Domain }}` Go-template # (this abra version leaves it unexpanded → deploy fails "can't evaluate field Domain"). Setting @@ -146,7 +153,7 @@ def deploy_app( abra.secret_generate(domain) if install_steps_hook: _run_install_steps(install_steps_hook, recipe, domain) - abra.deploy(domain) + abra.deploy(domain, chaos=(version is None)) def _stack_name(domain: str) -> str: @@ -238,6 +245,37 @@ def wait_healthy( raise TimeoutError(f"{domain}: not healthy over HTTPS {path} (last status {last})") +def deployed_identity(domain: str, service: str = "app") -> tuple[str | None, str | None]: + """(coop-cloud version label, image) of the running app service. Used to prove an upgrade + actually MOVED the deployment prev→target (not a vacuous no-op — Adversary F1d-2). The version + label (`coop-cloud..version`) is bumped per published recipe version; the image usually + bumps too. Either changing proves the upgrade did something.""" + name = f"{_stack_name(domain)}_{service}" + proc = subprocess.run( + [ + "docker", + "service", + "inspect", + name, + "--format", + "{{json .Spec.Labels}}|{{.Spec.TaskTemplate.ContainerSpec.Image}}", + ], + capture_output=True, + text=True, + ) + out = proc.stdout.strip() + if "|" not in out: + return (None, None) + labels_json, _, image = out.partition("|") + ver = None + with contextlib.suppress(ValueError, json.JSONDecodeError): + for k, v in json.loads(labels_json).items(): + if k.startswith("coop-cloud.") and k.endswith(".version"): + ver = v + break + return (ver, image.strip() or None) + + def upgrade_app(domain: str, version: str | None = None) -> None: abra.upgrade(domain, version=version) diff --git a/tests/custom-html/test_backup.py b/tests/custom-html/test_backup.py index 4223455..58544eb 100644 --- a/tests/custom-html/test_backup.py +++ b/tests/custom-html/test_backup.py @@ -1,30 +1,25 @@ -"""custom-html — backup/restore stage (D2): backup, mutate state, restore, assert the restored -state matches the pre-mutation (backed-up) state.""" +"""custom-html — BACKUP overlay (Phase 1d, DG4): seed a known state, back it up (assert artifact), +then mutate so the RESTORE overlay (test_restore.py) can prove the backed-up state returns. Runs on +the shared deployment; the marker it leaves ("mutated") persists for the restore tier.""" import os import sys sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner")) -from harness import lifecycle # noqa: E402 +from harness import generic, lifecycle # noqa: E402 MARKER_PATH = "/usr/share/nginx/html/ci-marker.txt" -def test_backup_mutate_restore(deployed): - domain = deployed - - # 1) establish original state, then back it up +def test_backup_captures_state(live_app, meta): + domain = live_app + # 1) establish a known original state, then back it up (reuse the generic op: backup + assert + # a snapshot artifact was produced) lifecycle.exec_in_app(domain, ["sh", "-c", f"echo original > {MARKER_PATH}"]) - assert lifecycle.http_body(domain, "/ci-marker.txt").strip() == "original" - lifecycle.backup_app(domain) + assert lifecycle.http_fetch(domain, "/ci-marker.txt")[1].strip() == "original" + snap = generic.do_backup(domain) + assert snap, "backup produced no snapshot artifact" - # 2) mutate state (diverge from the backup) + # 2) mutate state so a successful restore is observable (diverge from the backup) lifecycle.exec_in_app(domain, ["sh", "-c", f"echo mutated > {MARKER_PATH}"]) - assert lifecycle.http_body(domain, "/ci-marker.txt").strip() == "mutated" - - # 3) restore -> state returns to the backed-up "original" - lifecycle.restore_app(domain) - lifecycle.wait_healthy(domain) - assert ( - lifecycle.http_body(domain, "/ci-marker.txt").strip() == "original" - ), "restore did not return the pre-mutation state" + assert lifecycle.http_fetch(domain, "/ci-marker.txt")[1].strip() == "mutated" diff --git a/tests/custom-html/test_install.py b/tests/custom-html/test_install.py index 142471e..0eac63b 100644 --- a/tests/custom-html/test_install.py +++ b/tests/custom-html/test_install.py @@ -1,28 +1,28 @@ -"""custom-html — install stage (recipe #1, simple/stateless). D2 install + D3 Playwright.""" +"""custom-html — INSTALL overlay (Phase 1d layering proof, DG4). + +Demonstrates OVERRIDE + EXTEND-by-composition: this file's presence makes the harness run it INSTEAD +of the generic install tier (override), and it reuses the generic assertion (`generic.assert_serving`) +then ADDS a recipe-specific Playwright content check (extend). Assertion-only — the orchestrator has +already deployed the shared app once (no deploy here, so deploy-count stays 1).""" import os import sys sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner")) -from harness import lifecycle # noqa: E402 +from harness import generic # noqa: E402 -def test_http_reachable(deployed_app): - """The deployed app answers 200 over real HTTPS through the gateway.""" - status = lifecycle.http_get(deployed_app, "/") - assert status == 200, f"expected 200 from {deployed_app}, got {status}" - - -def test_playwright_page(deployed_app): - """A real browser (Playwright/Chromium) loads the live app and sees served content.""" +def test_serving_and_content(live_app, meta): + # extend-by-composition: reuse the generic "really serving" assertion ... + generic.assert_serving(live_app, meta) + # ... then add the recipe-specific assertion: a real browser sees nginx-served HTML (D3). from playwright.sync_api import sync_playwright - url = f"https://{deployed_app}/" + url = f"https://{live_app}/" with sync_playwright() as p: browser = p.chromium.launch(args=["--no-sandbox"]) try: - ctx = browser.new_context(ignore_https_errors=True) - page = ctx.new_page() + page = browser.new_context(ignore_https_errors=True).new_page() resp = page.goto(url, wait_until="load", timeout=30000) assert resp is not None and resp.status == 200, f"page status {resp and resp.status}" body = page.content() diff --git a/tests/custom-html/test_restore.py b/tests/custom-html/test_restore.py new file mode 100644 index 0000000..c5bf9df --- /dev/null +++ b/tests/custom-html/test_restore.py @@ -0,0 +1,21 @@ +"""custom-html — RESTORE overlay (Phase 1d, DG4): data-integrity, extends the generic restore. + +Runs after the backup overlay (test_backup.py) on the SAME shared deployment, which left state +mutated to "mutated" after backing up "original". This restores the snapshot via the shared op +helper (`generic.do_restore`, which also asserts the app is healthy + serving afterwards), then +asserts the served data returned to the pre-mutation "original" — the app-specific data integrity the +generic restore cannot check. Assertion-only (no deploy/teardown).""" + +import os +import sys + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner")) +from harness import generic, lifecycle # noqa: E402 + + +def test_restore_returns_state(live_app, meta): + domain = live_app + generic.do_restore(domain, meta) # restore + assert healthy/serving + assert ( + lifecycle.http_fetch(domain, "/ci-marker.txt")[1].strip() == "original" + ), "restore did not return the pre-mutation (backed-up) state" diff --git a/tests/custom-html/test_upgrade.py b/tests/custom-html/test_upgrade.py index 3cadad2..14c5897 100644 --- a/tests/custom-html/test_upgrade.py +++ b/tests/custom-html/test_upgrade.py @@ -1,41 +1,29 @@ -"""custom-html — upgrade stage (D2): deploy the previous published version, write data, upgrade -to the current/$REF version, and assert the app stays healthy and data survives.""" +"""custom-html — UPGRADE overlay (Phase 1d, DG4): data-continuity, extends the generic upgrade. + +The orchestrator deployed the previous published version ONCE; this overlay seeds a marker into the +served volume, performs the in-place upgrade via the shared op helper (`generic.do_upgrade`, which +also asserts reconverge + serving), then asserts the data SURVIVED. Assertion-only on the shared +deployment (no deploy/teardown here).""" import os import sys -import pytest - sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner")) -from harness import lifecycle # noqa: E402 +from harness import generic, lifecycle # noqa: E402 MARKER_PATH = "/usr/share/nginx/html/ci-marker.txt" -@pytest.fixture -def old_app(recipe, app_domain, request): - prev = lifecycle.previous_version(recipe) - if not prev: - pytest.skip(f"{recipe}: no previous published version to upgrade from") - lifecycle.janitor() - request.addfinalizer(lambda: lifecycle.teardown_app(app_domain)) - lifecycle.deploy_app(recipe, app_domain, version=prev) - lifecycle.wait_healthy(app_domain) - return app_domain, prev - - -def test_upgrade_preserves_data(old_app): - domain, prev = old_app +def test_upgrade_preserves_data(live_app, meta): + domain = live_app # write a data marker into the served volume (nginx serves /usr/share/nginx/html) lifecycle.exec_in_app(domain, ["sh", "-c", f"echo upgrade-survives > {MARKER_PATH}"]) - assert lifecycle.http_body(domain, "/ci-marker.txt").strip() == "upgrade-survives" + assert lifecycle.http_fetch(domain, "/ci-marker.txt")[1].strip() == "upgrade-survives" - # upgrade previous -> current/$REF - lifecycle.upgrade_app(domain, version=os.environ.get("VERSION") or None) - lifecycle.wait_healthy(domain) + # in-place upgrade previous -> target (reuses the generic op: upgrade + assert reconverge/serving) + generic.do_upgrade(domain, os.environ.get("VERSION") or None, meta) - # app healthy and the data written before the upgrade is still there - assert lifecycle.http_get(domain, "/") == 200 + # the data written before the upgrade is still there assert ( - lifecycle.http_body(domain, "/ci-marker.txt").strip() == "upgrade-survives" + lifecycle.http_fetch(domain, "/ci-marker.txt")[1].strip() == "upgrade-survives" ), "data did not survive the upgrade" diff --git a/tests/unit/test_discovery.py b/tests/unit/test_discovery.py new file mode 100644 index 0000000..6be845e --- /dev/null +++ b/tests/unit/test_discovery.py @@ -0,0 +1,53 @@ +"""Unit tests for overlay/custom/hook discovery + precedence (Phase 1d, DG4). + +Deterministic, no deployment — proves the resolution rule (repo-local > cc-ci > generic) and the +invariant "no overlay for an op ⇒ generic runs". Run with: `cc-ci-run -m pytest tests/unit`. +These live under tests/unit/ (NOT a recipe name, NOT tests/_generic/) so the run orchestrator never +picks them up as overlays/custom tests.""" + +import os +import sys + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner")) +from harness import discovery # noqa: E402 + + +def test_no_overlay_falls_back_to_generic(): + # hedgedoc has no tests/hedgedoc/ in cc-ci and no repo-local dir -> generic is the floor (DG4 invariant) + source, path = discovery.resolve_op("hedgedoc", "install", None) + assert source == "generic" + assert path == os.path.join(discovery.GENERIC_DIR, "test_install.py") + + +def test_cc_ci_overlay_overrides_generic(): + # custom-html ships cc-ci overlays for all four ops -> cc-ci wins over generic when no repo-local + for op in discovery.LIFECYCLE_OPS: + source, path = discovery.resolve_op("custom-html", op, None) + assert source == "cc-ci", op + assert path == os.path.join(discovery.cc_ci_dir("custom-html"), f"test_{op}.py") + + +def test_repo_local_wins_same_name_collision(tmp_path): + # repo-local is upstream-authoritative: a repo-local test_install.py beats cc-ci's for that op + (tmp_path / "test_install.py").write_text("# repo-local overlay\n") + source, path = discovery.resolve_op("custom-html", "install", str(tmp_path)) + assert source == "repo-local" + assert path == str(tmp_path / "test_install.py") + + +def test_custom_tests_additive_from_both_locations(tmp_path): + # non-lifecycle test_*.py are opt-in and additive; lifecycle names are excluded + (tmp_path / "test_sso.py").write_text("# repo-local custom\n") + (tmp_path / "test_install.py").write_text("# lifecycle name -> excluded from custom\n") + customs = discovery.custom_tests("custom-html", str(tmp_path)) + names = {(src, os.path.basename(p)) for src, p in customs} + assert ("repo-local", "test_sso.py") in names + assert all(os.path.basename(p) != "test_install.py" for _, p in customs) + + +def test_install_steps_repo_local_over_cc_ci(tmp_path): + (tmp_path / "install_steps.sh").write_text("#!/usr/bin/env bash\n") + hook = discovery.install_steps("custom-html", str(tmp_path)) + assert hook is not None + assert hook[0] == "repo-local" + assert discovery.install_steps("hedgedoc", None) is None