fix(1d): F1d-2 — pinned base deploys the pinned version; upgrade is non-vacuous

- deploy_app: checkout the pinned tag + deploy NON-chaos when a version is pinned (chaos only for
  version=None / PR-head). Was always -C, which ignored the pin and deployed LATEST -> upgrade no-op.
- do_upgrade: assert the deployment actually MOVED (coop-cloud version label and/or image changed)
  via lifecycle.deployed_identity -> a vacuous no-op upgrade can no longer pass (DG2).
- G2: migrate custom-html overlays to the assertion-only contract (override + extend-by-composition
  + data-continuity; split backup/restore). tests/unit/test_discovery.py proves precedence (5/5).

Probe (Adversary's F1d-2 test): hedgedoc deploy-prev=1.10.7 -> upgrade=1.10.8, CHANGED=True.
hedgedoc full generic lifecycle green (install/upgrade/backup/restore, deploy-count=1).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-28 00:02:59 +01:00
parent 1aea1541a7
commit 81e26a1bdc
9 changed files with 204 additions and 59 deletions

View File

@ -1,30 +1,25 @@
"""custom-html — backup/restore stage (D2): backup, mutate state, restore, assert the restored
state matches the pre-mutation (backed-up) state."""
"""custom-html — BACKUP overlay (Phase 1d, DG4): seed a known state, back it up (assert artifact),
then mutate so the RESTORE overlay (test_restore.py) can prove the backed-up state returns. Runs on
the shared deployment; the marker it leaves ("mutated") persists for the restore tier."""
import os
import sys
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner"))
from harness import lifecycle # noqa: E402
from harness import generic, lifecycle # noqa: E402
MARKER_PATH = "/usr/share/nginx/html/ci-marker.txt"
def test_backup_mutate_restore(deployed):
domain = deployed
# 1) establish original state, then back it up
def test_backup_captures_state(live_app, meta):
domain = live_app
# 1) establish a known original state, then back it up (reuse the generic op: backup + assert
# a snapshot artifact was produced)
lifecycle.exec_in_app(domain, ["sh", "-c", f"echo original > {MARKER_PATH}"])
assert lifecycle.http_body(domain, "/ci-marker.txt").strip() == "original"
lifecycle.backup_app(domain)
assert lifecycle.http_fetch(domain, "/ci-marker.txt")[1].strip() == "original"
snap = generic.do_backup(domain)
assert snap, "backup produced no snapshot artifact"
# 2) mutate state (diverge from the backup)
# 2) mutate state so a successful restore is observable (diverge from the backup)
lifecycle.exec_in_app(domain, ["sh", "-c", f"echo mutated > {MARKER_PATH}"])
assert lifecycle.http_body(domain, "/ci-marker.txt").strip() == "mutated"
# 3) restore -> state returns to the backed-up "original"
lifecycle.restore_app(domain)
lifecycle.wait_healthy(domain)
assert (
lifecycle.http_body(domain, "/ci-marker.txt").strip() == "original"
), "restore did not return the pre-mutation state"
assert lifecycle.http_fetch(domain, "/ci-marker.txt")[1].strip() == "mutated"

View File

@ -1,28 +1,28 @@
"""custom-html — install stage (recipe #1, simple/stateless). D2 install + D3 Playwright."""
"""custom-html — INSTALL overlay (Phase 1d layering proof, DG4).
Demonstrates OVERRIDE + EXTEND-by-composition: this file's presence makes the harness run it INSTEAD
of the generic install tier (override), and it reuses the generic assertion (`generic.assert_serving`)
then ADDS a recipe-specific Playwright content check (extend). Assertion-only — the orchestrator has
already deployed the shared app once (no deploy here, so deploy-count stays 1)."""
import os
import sys
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner"))
from harness import lifecycle # noqa: E402
from harness import generic # noqa: E402
def test_http_reachable(deployed_app):
"""The deployed app answers 200 over real HTTPS through the gateway."""
status = lifecycle.http_get(deployed_app, "/")
assert status == 200, f"expected 200 from {deployed_app}, got {status}"
def test_playwright_page(deployed_app):
"""A real browser (Playwright/Chromium) loads the live app and sees served content."""
def test_serving_and_content(live_app, meta):
# extend-by-composition: reuse the generic "really serving" assertion ...
generic.assert_serving(live_app, meta)
# ... then add the recipe-specific assertion: a real browser sees nginx-served HTML (D3).
from playwright.sync_api import sync_playwright
url = f"https://{deployed_app}/"
url = f"https://{live_app}/"
with sync_playwright() as p:
browser = p.chromium.launch(args=["--no-sandbox"])
try:
ctx = browser.new_context(ignore_https_errors=True)
page = ctx.new_page()
page = browser.new_context(ignore_https_errors=True).new_page()
resp = page.goto(url, wait_until="load", timeout=30000)
assert resp is not None and resp.status == 200, f"page status {resp and resp.status}"
body = page.content()

View File

@ -0,0 +1,21 @@
"""custom-html — RESTORE overlay (Phase 1d, DG4): data-integrity, extends the generic restore.
Runs after the backup overlay (test_backup.py) on the SAME shared deployment, which left state
mutated to "mutated" after backing up "original". This restores the snapshot via the shared op
helper (`generic.do_restore`, which also asserts the app is healthy + serving afterwards), then
asserts the served data returned to the pre-mutation "original" — the app-specific data integrity the
generic restore cannot check. Assertion-only (no deploy/teardown)."""
import os
import sys
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner"))
from harness import generic, lifecycle # noqa: E402
def test_restore_returns_state(live_app, meta):
domain = live_app
generic.do_restore(domain, meta) # restore + assert healthy/serving
assert (
lifecycle.http_fetch(domain, "/ci-marker.txt")[1].strip() == "original"
), "restore did not return the pre-mutation (backed-up) state"

View File

@ -1,41 +1,29 @@
"""custom-html — upgrade stage (D2): deploy the previous published version, write data, upgrade
to the current/$REF version, and assert the app stays healthy and data survives."""
"""custom-html — UPGRADE overlay (Phase 1d, DG4): data-continuity, extends the generic upgrade.
The orchestrator deployed the previous published version ONCE; this overlay seeds a marker into the
served volume, performs the in-place upgrade via the shared op helper (`generic.do_upgrade`, which
also asserts reconverge + serving), then asserts the data SURVIVED. Assertion-only on the shared
deployment (no deploy/teardown here)."""
import os
import sys
import pytest
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner"))
from harness import lifecycle # noqa: E402
from harness import generic, lifecycle # noqa: E402
MARKER_PATH = "/usr/share/nginx/html/ci-marker.txt"
@pytest.fixture
def old_app(recipe, app_domain, request):
prev = lifecycle.previous_version(recipe)
if not prev:
pytest.skip(f"{recipe}: no previous published version to upgrade from")
lifecycle.janitor()
request.addfinalizer(lambda: lifecycle.teardown_app(app_domain))
lifecycle.deploy_app(recipe, app_domain, version=prev)
lifecycle.wait_healthy(app_domain)
return app_domain, prev
def test_upgrade_preserves_data(old_app):
domain, prev = old_app
def test_upgrade_preserves_data(live_app, meta):
domain = live_app
# write a data marker into the served volume (nginx serves /usr/share/nginx/html)
lifecycle.exec_in_app(domain, ["sh", "-c", f"echo upgrade-survives > {MARKER_PATH}"])
assert lifecycle.http_body(domain, "/ci-marker.txt").strip() == "upgrade-survives"
assert lifecycle.http_fetch(domain, "/ci-marker.txt")[1].strip() == "upgrade-survives"
# upgrade previous -> current/$REF
lifecycle.upgrade_app(domain, version=os.environ.get("VERSION") or None)
lifecycle.wait_healthy(domain)
# in-place upgrade previous -> target (reuses the generic op: upgrade + assert reconverge/serving)
generic.do_upgrade(domain, os.environ.get("VERSION") or None, meta)
# app healthy and the data written before the upgrade is still there
assert lifecycle.http_get(domain, "/") == 200
# the data written before the upgrade is still there
assert (
lifecycle.http_body(domain, "/ci-marker.txt").strip() == "upgrade-survives"
lifecycle.http_fetch(domain, "/ci-marker.txt")[1].strip() == "upgrade-survives"
), "data did not survive the upgrade"