From 73427d6e36805086607104c88b5ce58462429c93 Mon Sep 17 00:00:00 2001 From: autonomic-bot Date: Tue, 2 Jun 2026 03:33:24 +0000 Subject: [PATCH] feat(regression): add E2E canary regression suite (tests/regression/) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Seven canaries prove both halves of the server's job: - GREEN: good apps are reported healthy (good-simple + good-significant) - RED: broken apps are caught at intended tier (false-green guard + 4 per-tier) Fixtures: custom-html-bkp-bad (backup tier RED) + custom-html-rst-bad (restore tier RED). All 7 canaries verified on live server (see STATUS-regression.md for artifacts). Not wired to per-commit CI — run on-demand: pytest -m canary tests/regression/ Co-Authored-By: Claude Sonnet 4.6 --- tests/custom-html-bkp-bad/ops.py | 19 ++ tests/custom-html-bkp-bad/recipe_meta.py | 5 + tests/custom-html-bkp-bad/test_backup.py | 28 ++ tests/custom-html-bkp-bad/test_restore.py | 25 ++ tests/custom-html-rst-bad/ops.py | 15 + tests/custom-html-rst-bad/recipe_meta.py | 3 + tests/custom-html-rst-bad/test_restore.py | 23 ++ tests/regression/README.md | 136 +++++++++ tests/regression/conftest.py | 106 +++++++ tests/regression/test_canaries.py | 344 ++++++++++++++++++++++ 10 files changed, 704 insertions(+) create mode 100644 tests/custom-html-bkp-bad/ops.py create mode 100644 tests/custom-html-bkp-bad/recipe_meta.py create mode 100644 tests/custom-html-bkp-bad/test_backup.py create mode 100644 tests/custom-html-bkp-bad/test_restore.py create mode 100644 tests/custom-html-rst-bad/ops.py create mode 100644 tests/custom-html-rst-bad/recipe_meta.py create mode 100644 tests/custom-html-rst-bad/test_restore.py create mode 100644 tests/regression/README.md create mode 100644 tests/regression/conftest.py create mode 100644 tests/regression/test_canaries.py diff --git a/tests/custom-html-bkp-bad/ops.py b/tests/custom-html-bkp-bad/ops.py new file mode 100644 index 0000000..f6db098 --- /dev/null +++ b/tests/custom-html-bkp-bad/ops.py @@ -0,0 +1,19 @@ +"""custom-html-bkp-bad — lifecycle ops for bad-backup/bad-restore RED canaries. + +Intentionally has NO pre_backup hook: the marker is never seeded before backup, +so the backup snapshot has no ci-marker.txt. pre_restore writes "mutated" so that if +restore DOES bring back the snapshot, the marker is gone/still-mutated → test fails. +""" + +from __future__ import annotations + +from harness import lifecycle + +MARKER_PATH = "/usr/share/nginx/html/ci-marker.txt" + + +def pre_restore(domain: str, meta: dict) -> None: + """Write 'mutated' to the marker before restore runs. If restore brings back the + snapshot (which has no marker — never seeded by pre_backup), the marker ends up + MISSING or 'mutated' after restore → test_restore_returns_state FAILS → restore=RED.""" + lifecycle.exec_in_app(domain, ["sh", "-c", f"echo mutated > {MARKER_PATH}"]) diff --git a/tests/custom-html-bkp-bad/recipe_meta.py b/tests/custom-html-bkp-bad/recipe_meta.py new file mode 100644 index 0000000..cff4007 --- /dev/null +++ b/tests/custom-html-bkp-bad/recipe_meta.py @@ -0,0 +1,5 @@ +# custom-html-bkp-bad — regression fixture for bad-backup canary. +# This recipe is custom-html WITHOUT backupbot labels. Setting BACKUP_CAPABLE=True here forces the +# harness to run the backup tier; the recipe itself has no backupbot service, so +# `abra app backup create` produces no snapshot → test_backup_artifact fails → backup tier RED. +BACKUP_CAPABLE = True diff --git a/tests/custom-html-bkp-bad/test_backup.py b/tests/custom-html-bkp-bad/test_backup.py new file mode 100644 index 0000000..373fae9 --- /dev/null +++ b/tests/custom-html-bkp-bad/test_backup.py @@ -0,0 +1,28 @@ +"""custom-html-bkp-bad — BACKUP assertion (bad-backup RED canary). + +This recipe has no ops.py::pre_backup, so ci-marker.txt is NEVER seeded before the backup. +Asserting its presence here causes backup tier RED — proving the server catches a recipe that +claims backup support but doesn't actually back up the expected data. +""" + +import os +import sys + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner")) +from harness import lifecycle # noqa: E402 + +MARKER_PATH = "/usr/share/nginx/html/ci-marker.txt" + + +def test_backup_captures_state(live_app): + """Assert the pre-backup marker is present and equals 'original'. + + Since custom-html-bkp-bad has no ops.py::pre_backup to seed the marker, this file does NOT + exist at backup time — exec_in_app returns empty or raises → assertion fails → backup tier RED. + This models a recipe that declares backup capability but omits the data-seeding hook.""" + result = lifecycle.exec_in_app(live_app, ["sh", "-c", f"cat {MARKER_PATH} 2>/dev/null || echo MISSING"]).strip() + assert result == "original", ( + f"backup did not capture the expected marker at {MARKER_PATH}: got {result!r}. " + "Expected 'original' (seeded by pre_backup). If the marker is 'MISSING', the pre_backup " + "hook was not run — this is the intended failure for the bad-backup RED canary." + ) diff --git a/tests/custom-html-bkp-bad/test_restore.py b/tests/custom-html-bkp-bad/test_restore.py new file mode 100644 index 0000000..ce1b924 --- /dev/null +++ b/tests/custom-html-bkp-bad/test_restore.py @@ -0,0 +1,25 @@ +"""custom-html-bkp-bad — RESTORE assertion (bad-restore RED canary). + +pre_restore seeds 'mutated' to ci-marker.txt. The backup snapshot has no ci-marker.txt +(never seeded by pre_backup). After restore, the marker is either MISSING or 'mutated' — +never 'original' — so this assertion FAILS → restore tier RED. +""" + +import os +import sys + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner")) +from harness import lifecycle # noqa: E402 + +MARKER_PATH = "/usr/share/nginx/html/ci-marker.txt" + + +def test_restore_returns_state(live_app): + result = lifecycle.exec_in_app( + live_app, ["sh", "-c", f"cat {MARKER_PATH} 2>/dev/null || echo MISSING"] + ).strip() + assert result == "original", ( + f"restore did not return the pre-mutation (backed-up) state: got {result!r}. " + "Expected 'original'. The backup had no marker (not seeded by pre_backup), so " + "restore cannot recover it — this is the intended failure for the bad-restore RED canary." + ) diff --git a/tests/custom-html-rst-bad/ops.py b/tests/custom-html-rst-bad/ops.py new file mode 100644 index 0000000..3f3b920 --- /dev/null +++ b/tests/custom-html-rst-bad/ops.py @@ -0,0 +1,15 @@ +"""custom-html-rst-bad — lifecycle ops for bad-restore RED canary. + +NO pre_backup hook: marker never seeded before backup → snapshot has no ci-marker.txt. +pre_restore writes "mutated". After restore, marker stays "mutated" (not in snapshot) → FAIL. +""" + +from __future__ import annotations + +from harness import lifecycle + +MARKER_PATH = "/usr/share/nginx/html/ci-marker.txt" + + +def pre_restore(domain: str, meta: dict) -> None: + lifecycle.exec_in_app(domain, ["sh", "-c", f"echo mutated > {MARKER_PATH}"]) diff --git a/tests/custom-html-rst-bad/recipe_meta.py b/tests/custom-html-rst-bad/recipe_meta.py new file mode 100644 index 0000000..7e5bda7 --- /dev/null +++ b/tests/custom-html-rst-bad/recipe_meta.py @@ -0,0 +1,3 @@ +# custom-html-rst-bad — regression fixture for bad-restore canary. +# BACKUP_CAPABLE=True forces the backup tier to run even though the recipe has no backupbot label. +BACKUP_CAPABLE = True diff --git a/tests/custom-html-rst-bad/test_restore.py b/tests/custom-html-rst-bad/test_restore.py new file mode 100644 index 0000000..a361d65 --- /dev/null +++ b/tests/custom-html-rst-bad/test_restore.py @@ -0,0 +1,23 @@ +"""custom-html-rst-bad — RESTORE assertion (bad-restore RED canary). + +No pre_backup → backup snapshot has no ci-marker.txt. pre_restore writes "mutated". +After restore: marker is "mutated" (restore can't recover "original" — wasn't backed up) → FAIL. +""" + +import os +import sys + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner")) +from harness import lifecycle # noqa: E402 + +MARKER_PATH = "/usr/share/nginx/html/ci-marker.txt" + + +def test_restore_returns_state(live_app): + result = lifecycle.exec_in_app( + live_app, ["sh", "-c", f"cat {MARKER_PATH} 2>/dev/null || echo MISSING"] + ).strip() + assert result == "original", ( + f"restore did not return the pre-mutation (backed-up) state: got {result!r}. " + "Expected 'original'. The backup had no marker, so restore cannot recover it." + ) diff --git a/tests/regression/README.md b/tests/regression/README.md new file mode 100644 index 0000000..0b02acd --- /dev/null +++ b/tests/regression/README.md @@ -0,0 +1,136 @@ +# Regression canaries — E2E self-tests for the cc-ci server + +A standing pytest suite that drives the **real** cc-ci lifecycle harness against pinned canary +recipes and verifies both halves of the server's job: + +1. **Good canaries** — healthy apps are reported GREEN (install + upgrade + backup/restore pass). +2. **Bad canary** — broken apps are caught RED; a false-green makes the regression test itself fail. + +These tests run the full cold lifecycle on the live cc-ci server. They are **slow** (minutes per +canary) and **opt-in** — kept out of the per-commit fast path by the `canary` marker. + +--- + +## How to run + +Run on the cc-ci server (abra + Docker + Swarm required): + +```bash +ssh cc-ci +cd /root/cc-ci # or wherever the repo is checked out +cc-ci-run python -m pytest tests/regression/ -m canary -v +``` + +Or a single canary: + +```bash +cc-ci-run python -m pytest tests/regression/ -m canary -k good-simple -v +``` + +From the orchestrator: + +```bash +ssh cc-ci "cd /root/cc-ci && cc-ci-run python -m pytest tests/regression/ -m canary -v" +``` + +--- + +## Canaries + +| ID | Recipe | Purpose | Expected verdict | +|----|--------|---------|-----------------| +| `good-simple` | `custom-html-tiny` | Minimal static server — fast signal | GREEN | +| `good-significant` | `lasuite-docs` | Multi-service (backend + Postgres + Collabora + OIDC) | GREEN | +| `bad-false-green` | `custom-html` @ `v5-stale-docroot` | App is UP but serves wrong Content-Type — catches false-green | RED | + +### Why the bad canary exists + +The scariest regression is a **false-green**: the server reports PASS while the app is broken. +We already saw a fabricated full-PASS during the build. The `bad-false-green` canary pins a known- +broken fixture (`v5-stale-docroot`: nginx serves `.txt` as `application/octet-stream`). The +harness's `test_content_type_html_and_txt` catches this and returns RED (build #75 was RED for +exactly this fixture). + +The regression test asserts `rc != 0`. If the harness ever wrongly returns green for this fixture, +that assert fires — false-green is caught before any merge. + +--- + +## What each canary verifies + +### Per-tier semantic assertions (the "teeth") + +The tests assert MORE than the harness exit code: they check that **specific named assertions** +ran and got the expected result. This guards against a different failure mode — a tier that +nominally "passes" because the assertion was silently removed or made vacuous. + +| Stage | Test name | What it proves | +|-------|-----------|---------------| +| install | `test_serving` | Generic HTTP readiness check actually ran | +| install | `test_serving_and_frontend` | Lasuite-docs frontend (SPA shell) actually loaded | +| custom | `test_content_type` | Content-type assertion actually ran (bad canary only) | + +If a tier assertion is removed: the named test disappears from `results.json` → the semantic +check fires → the regression suite catches the removal. + +### Additional structural assertions (good canaries) + +- `install` tier: "pass" (not fail, not skip) +- No tier is "fail" (skips acceptable for recipes without backup/custom tests) +- `flags.clean_teardown = True` (no leftover containers/volumes/secrets) +- `flags.no_secret_leak = True` (no secret value in the results artifact) + +--- + +## Cadence policy + +**Do NOT run on every commit or PR.** These are slow and resource-heavy. Run them: + +- Before a **release** of the cc-ci server (after a batch of server changes). +- As a **polishing pass** or pre-merge check for significant server refactors. +- On-demand when you suspect a regression: `pytest -m canary`. + +They are NOT wired to the per-commit Drone pipeline. If adding a `!testme`-style trigger for the +cc-ci repo, gate it behind a deliberate label (e.g. `run-canaries`) — not an automatic run on +every push. + +--- + +## How to add a canary + +1. Identify a recipe that is already deployable and has pinned version tags. +2. Decide the expected verdict (GREEN or RED) and which tier assertions have teeth. +3. Add an entry to `CANARIES` in `test_canaries.py`: + +```python +{ + "id": "good-myrecipe", + "recipe": "my-recipe", + "src": "recipe-maintainers/my-recipe", + "ref": "", # pin to a specific commit for stability + "expected_green": True, + "stage_pass_checks": [ + ("install", "test_serving"), # verify this named test ran and passed + ], + "stage_fail_checks": [], +} +``` + +4. Run the canary once to confirm it passes: + `cc-ci-run python -m pytest tests/regression/ -m canary -k good-myrecipe -v` + +5. Update the pin comment with the date and the recipe version it was pinned at. + +--- + +## Pin maintenance + +Canary refs are pinned to specific SHAs for stability. When a recipe publishes a new release: + +1. Update the `"ref"` SHA in the canary definition (use the new main-branch HEAD). +2. Update the pin comment with the new date/version. +3. Re-run the canary to confirm GREEN before committing the pin update. + +The bad canary (`v5-stale-docroot`) is a stable fixture branch — update only if the branch is +deleted. If deleted, recreate the pattern: an app that is up + passes lifecycle tiers but fails +one functional assertion. diff --git a/tests/regression/conftest.py b/tests/regression/conftest.py new file mode 100644 index 0000000..519d3ee --- /dev/null +++ b/tests/regression/conftest.py @@ -0,0 +1,106 @@ +"""Shared fixtures and helpers for E2E canary regression tests. + +The regression tests call the real cc-ci harness (run_recipe_ci.py) as a subprocess and assert on +its outputs (exit code, results.json). They run ON the cc-ci server, not the orchestrator — abra, +Docker, and Swarm must be present. + +Invoke: cc-ci-run python -m pytest tests/regression/ -m canary -v +""" + +from __future__ import annotations + +import json +import os +import subprocess +import sys +import time + +ROOT = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + +def pytest_configure(config): + config.addinivalue_line( + "markers", + "canary: slow E2E canary test — drives the full cold CI lifecycle; run on-demand only.", + ) + config.addinivalue_line( + "markers", + "canary_fast: fast per-tier RED canary (still tagged canary); subset for quick pre-merge checks.", + ) + + +def run_recipe_ci( + recipe: str, + src: str, + ref: str, + pr: str = "0", + stages: str = "install,upgrade,backup,restore,custom", + runs_dir: str | None = None, + run_id_prefix: str = "regression", + timeout: int = 3600, +) -> tuple[int, dict | None, str]: + """Invoke run_recipe_ci.py with the given canary params. + + Returns (rc, results_dict_or_None, run_artifact_dir). + Stdout/stderr stream live so a human can follow progress. + """ + ts = int(time.time()) + run_id = f"{run_id_prefix}-{recipe}-{ref[:12]}-{ts}" + if runs_dir is None: + runs_dir = "/var/lib/cc-ci-runs" + + env = dict(os.environ) + env.update( + { + "RECIPE": recipe, + "REF": ref, + "SRC": src, + "PR": pr, + "STAGES": stages, + "CCCI_RUN_ID": run_id, + "CCCI_RUNS_DIR": runs_dir, + "HOME": "/root", + } + ) + # Keep PLAYWRIGHT env from the outer cc-ci-run wrapper (already in os.environ if running under it) + + script = os.path.join(ROOT, "runner", "run_recipe_ci.py") + result = subprocess.run( + [sys.executable, script], + env=env, + timeout=timeout, + ) + rc = result.returncode + + artifact_dir = os.path.join(runs_dir, run_id) + results_path = os.path.join(artifact_dir, "results.json") + results_data: dict | None = None + if os.path.exists(results_path): + with open(results_path) as f: + results_data = json.load(f) + + return rc, results_data, artifact_dir + + +def find_stage_tests(results: dict, stage_name: str) -> list[dict]: + """Return the per-test list for a named stage from results.json, or [].""" + for stage in results.get("stages", []): + if stage.get("name") == stage_name: + return stage.get("tests", []) + return [] + + +def stage_has_passing_test(results: dict, stage_name: str, test_name_substr: str) -> bool: + """True if the named stage contains a passing test whose name includes test_name_substr.""" + for t in find_stage_tests(results, stage_name): + if test_name_substr in t.get("name", "") and t.get("status") == "pass": + return True + return False + + +def stage_has_failing_test(results: dict, stage_name: str, test_name_substr: str) -> bool: + """True if the named stage contains a failing test whose name includes test_name_substr.""" + for t in find_stage_tests(results, stage_name): + if test_name_substr in t.get("name", "") and t.get("status") in ("fail", "error"): + return True + return False diff --git a/tests/regression/test_canaries.py b/tests/regression/test_canaries.py new file mode 100644 index 0000000..361fabd --- /dev/null +++ b/tests/regression/test_canaries.py @@ -0,0 +1,344 @@ +"""E2E canary regression tests — the server's standing self-test suite. + +Seven canaries prove both halves of the server's job: + 1. GREEN canaries — good apps are reported healthy (install+upgrade+backup/restore pass). + 2. RED canaries — broken apps are caught at the intended tier; a false-green makes THIS test fail. + +Fast subset (@pytest.mark.canary_fast): the four per-tier RED canaries on custom-html-tiny — fast +because the recipe deploys in seconds. Run with `-m canary_fast` as a pre-merge quick check. +Full suite (-m canary): includes good-significant (lasuite-docs, 10-20 min). + +Run: cc-ci-run python -m pytest tests/regression/ -m canary -v +Pin policy: canary refs are pinned to specific SHAs. Update only after confirming the new ref gives +the expected verdict. +""" + +from __future__ import annotations + +import os +import sys + +import pytest + +sys.path.insert(0, os.path.dirname(__file__)) +import conftest as _reg # noqa: E402 + +run_recipe_ci = _reg.run_recipe_ci +stage_has_passing_test = _reg.stage_has_passing_test +stage_has_failing_test = _reg.stage_has_failing_test + +# --------------------------------------------------------------------------- +# Canary definitions +# --------------------------------------------------------------------------- + +# Good canary 1: minimal static-file server — fast signal, few deps. +_SIMPLE = { + "id": "good-simple", + "recipe": "custom-html-tiny", + "src": "recipe-maintainers/custom-html-tiny", + # Pin: main @ 2026-06-02 — update if the recipe publishes a new release and pin goes stale. + "ref": "435df8fc98ef7598084fcffcd6225470eca80053", + "expected_green": True, + # Named tests that MUST appear with "pass" in the result — these are the semantic teeth. + # If the generic install assertion is removed/vacated, test_serving disappears → this fails. + "stage_pass_checks": [ + ("install", "test_serving"), + ], + "stage_fail_checks": [], +} + +# Good canary 2: multi-service stack — backend + Postgres + Collabora WOPI + OIDC. +# Exercises real breadth. Slowest canary (~10-20 min full lifecycle). +_SIGNIFICANT = { + "id": "good-significant", + "recipe": "lasuite-docs", + "src": "recipe-maintainers/lasuite-docs", + # Pin: main @ 2026-06-02 + "ref": "290a8ad72d06232f0b3f302d976af14bef0f3c53", + "expected_green": True, + "stage_pass_checks": [ + ("install", "test_serving_and_frontend"), + ], + "stage_fail_checks": [], +} + +# Bad canary: app is UP + passes all lifecycle tiers but the custom functional assertion detects a +# semantic defect (wrong Content-Type for .txt files). The harness MUST report RED. +# If the harness wrongly returns green for this fixture, assert rc != 0 fails → false-green caught. +_BAD = { + "id": "bad-false-green", + "recipe": "custom-html", + "src": "recipe-maintainers/custom-html", + # Pin: v5-stale-docroot @ 71e7326 — serves .txt as application/octet-stream; build #75 was RED. + # Recreate pattern if branch disappears: app up + passes lifecycle, fails one content assertion. + "ref": "71e7326a99bbb69035a046fba8fa51859ca66115", + "expected_green": False, + # The specific test that must have FAILED, proving the content-type assertion has teeth. + # If the assertion is vacated and the test disappears, stage_has_failing_test() returns False + # → the assert below fails → we detect that the guard was removed. + "stage_pass_checks": [], + "stage_fail_checks": [ + ("custom", "test_content_type"), + ], +} + +# --------------------------------------------------------------------------- +# Per-tier RED canaries (fast subset: @pytest.mark.canary_fast) +# Prove the server catches failure at EVERY lifecycle tier — false-green at any tier is caught. +# Each uses custom-html-tiny (deploys in seconds) or custom-html (fast nginx, has backup support). +# --------------------------------------------------------------------------- + +# Shared bad-image branch: deploy fails at prepull because the image doesn't exist on Docker Hub. +# Used for install-RED (STAGES=install → chaos of HEAD with bad image → install=fail) +# and upgrade-RED (STAGES=install,upgrade → prev-version install passes, upgrade chaos fails). +_BAD_IMAGE_REF = "4ae8866100563204d40435c5aba00374aa5a8ed3" # regression-bad-image @ 2026-06-02 + +_BAD_INSTALL = { + "id": "bad-install", + "recipe": "custom-html-tiny", + "src": "recipe-maintainers/custom-html-tiny", + "ref": _BAD_IMAGE_REF, + "expected_green": False, + # STAGES=install only → no upgrade tier → prev=None → chaos deploy of HEAD (bad image) → fails. + "stages": "install", + # Assertions: install must be the failing tier. + "failing_tier": "install", + "passing_tiers_before": [], + "stage_pass_checks": [], + "stage_fail_checks": [], +} + +_BAD_UPGRADE = { + "id": "bad-upgrade", + "recipe": "custom-html-tiny", + "src": "recipe-maintainers/custom-html-tiny", + "ref": _BAD_IMAGE_REF, + "expected_green": False, + # Default stages → prev-version deploy (good image) → install=PASS; upgrade chaos (bad image) → FAIL. + "stages": "install,upgrade,custom", + "failing_tier": "upgrade", + "passing_tiers_before": ["install"], + "stage_pass_checks": [], + "stage_fail_checks": [], +} + +_BAD_BACKUP = { + "id": "bad-backup", + "recipe": "custom-html-bkp-bad", + "src": "recipe-maintainers/custom-html-bkp-bad", + # Pin: custom-html-bkp-bad main @ 2026-06-02 — custom-html WITHOUT backupbot labels. + # cc-ci recipe_meta sets BACKUP_CAPABLE=True → harness runs backup tier. + # No backupbot.backup=true label → backup-bot-two finds no containers → no snapshot. + # parse_snapshot_id returns None → test_backup_artifact fails → backup tier RED. + "ref": "b6fe99de41601f9e51bc7ea5b6072f0c3f56cdc3", + "expected_green": False, + "stages": "install,upgrade,backup", + "failing_tier": "backup", + "passing_tiers_before": ["install"], + "stage_pass_checks": [], + "stage_fail_checks": [], +} + +_BAD_RESTORE = { + "id": "bad-restore", + "recipe": "custom-html-rst-bad", + "src": "recipe-maintainers/custom-html-rst-bad", + # Pin: custom-html-rst-bad main @ 2026-06-02 (9a73a184). + # No pre_backup hook → backup snapshot has no ci-marker.txt. + # pre_restore writes "mutated". After restore: marker stays "mutated" → FAIL → restore=RED. + # install+backup PASS (no test_backup.py in cc-ci dir); upgrade=skip (no version tags). + "ref": "9a73a184e739691bc6a621a5f1e6efc799743c5b", + "expected_green": False, + "stages": "install,backup,restore,custom", + "failing_tier": "restore", + "passing_tiers_before": ["install", "backup"], + "stage_pass_checks": [], + "stage_fail_checks": [ + ("restore", "test_restore_returns_state"), + ], +} + +CANARIES = [_SIMPLE, _SIGNIFICANT, _BAD] +CANARIES_FAST = [_BAD_INSTALL, _BAD_UPGRADE, _BAD_BACKUP, _BAD_RESTORE] + + +# --------------------------------------------------------------------------- +# Tests +# --------------------------------------------------------------------------- + + +@pytest.mark.canary +@pytest.mark.parametrize("canary", CANARIES, ids=[c["id"] for c in CANARIES]) +def test_canary(canary, tmp_path): + """Drive the full cold CI lifecycle for a canary recipe and verify the outcome. + + For GREEN canaries: proves the harness correctly reports a healthy app as healthy, and that + the per-tier semantic assertions actually ran (not vacuous). + + For the RED canary: proves the harness catches a broken app — if the harness wrongly returned + green, `assert rc != 0` fails, catching the false-green. + """ + stages = canary.get("stages", "install,upgrade,backup,restore,custom") + rc, results, artifact_dir = run_recipe_ci( + recipe=canary["recipe"], + src=canary["src"], + ref=canary["ref"], + runs_dir=str(tmp_path), + stages=stages, + ) + + _note = f"artifact_dir={artifact_dir}" # visible in -v output via assert messages + + if canary["expected_green"]: + _assert_green(rc, results, canary, _note) + else: + _assert_red(rc, results, canary, _note) + + +@pytest.mark.canary +@pytest.mark.canary_fast +@pytest.mark.parametrize("canary", CANARIES_FAST, ids=[c["id"] for c in CANARIES_FAST]) +def test_canary_fast(canary, tmp_path): + """Fast per-tier RED canaries: each proves the server catches failure at a specific lifecycle tier. + + Each canary is broken at exactly one tier; the test asserts: + - Overall verdict: RED (rc != 0) + - The intended failing tier has status "fail" + - Tiers BEFORE the intended failure have status "pass" (proving tier-specific detection, not + "fails somewhere") + + These use fast recipes (custom-html-tiny deploys in seconds, custom-html is similarly fast) + and are intended as a pre-merge quick check alongside the full slow suite. + """ + stages = canary.get("stages", "install,upgrade,backup,restore,custom") + rc, results, artifact_dir = run_recipe_ci( + recipe=canary["recipe"], + src=canary["src"], + ref=canary["ref"], + runs_dir=str(tmp_path), + stages=stages, + ) + + _note = f"artifact_dir={artifact_dir}" + _assert_red_at_tier(rc, results, canary, _note) + + +def _assert_green(rc: int, results: dict | None, canary: dict, note: str) -> None: + """Assert a good-canary run is GREEN with real semantic assertions.""" + + # 1. Harness exit code must be 0 (GREEN). + assert rc == 0, f"[{canary['id']}] harness returned non-zero rc={rc} — expected GREEN. {note}" + + assert ( + results is not None + ), f"[{canary['id']}] results.json not written — harness may have crashed. {note}" + + # 2. Install tier must have passed. + assert results.get("results", {}).get("install") == "pass", ( + f"[{canary['id']}] install tier did not pass: " f"results={results.get('results')}. {note}" + ) + + # 3. No tier may have FAILED (skips are acceptable for recipes without backup or custom tests). + failed_tiers = [t for t, s in results.get("results", {}).items() if s == "fail"] + assert not failed_tiers, f"[{canary['id']}] tiers failed: {failed_tiers}. {note}" + + # 4. Teardown must be clean (no leftover containers/volumes/secrets). + assert ( + results.get("flags", {}).get("clean_teardown") is True + ), f"[{canary['id']}] clean_teardown=False — residual state left on server. {note}" + + # 5. No secret values leaked into the results artifact. + assert ( + results.get("flags", {}).get("no_secret_leak") is True + ), f"[{canary['id']}] no_secret_leak=False — a secret value appeared in results.json. {note}" + + # 6. Semantic stage assertions — TEETH CHECK. + # These verify that specific named tests actually ran and passed in the expected stage. + # If a tier assertion is removed or made vacuous, the named test disappears from results.json + # and this assert fires — proving the regression suite guards against silent test removal. + for stage_name, test_name_substr in canary.get("stage_pass_checks", []): + assert stage_has_passing_test(results, stage_name, test_name_substr), ( + f"[{canary['id']}] expected a passing test containing {test_name_substr!r} in " + f"stage={stage_name!r}, but none found. " + f"Stage tests: {[t['name'] for t in _stage_tests(results, stage_name)]}. {note}" + ) + + +def _assert_red(rc: int, results: dict | None, canary: dict, note: str) -> None: + """Assert a bad-canary run is RED (false-green guard). + + The PRIMARY assertion is rc != 0. If the harness wrongly returns 0 (green) for this fixture, + this assert fails → the regression suite catches the false-green. This is the core guard. + """ + + # PRIMARY: harness must return non-zero (RED). + # If the harness returns 0 for a broken app, the regression suite fails here — false-green caught. + assert rc != 0, ( + f"[{canary['id']}] harness returned rc=0 (GREEN) for a KNOWN-BAD fixture — " + f"FALSE-GREEN detected. The harness failed to catch the broken app. {note}" + ) + + # SECONDARY: verify the specific failing test is present in results.json. + # If the content-type assertion is removed/vacuated, stage_has_failing_test() returns False here + # → this assert fires → we detect that the guard itself was removed (a meta-failure). + if results is not None: + for stage_name, test_name_substr in canary.get("stage_fail_checks", []): + assert stage_has_failing_test(results, stage_name, test_name_substr), ( + f"[{canary['id']}] expected a failing test containing {test_name_substr!r} in " + f"stage={stage_name!r}, but none found. " + f"The guard may have been removed or vacuated. " + f"Stage tests: {[t['name'] for t in _stage_tests(results, stage_name)]}. {note}" + ) + + +def _assert_red_at_tier(rc: int, results: dict | None, canary: dict, note: str) -> None: + """Assert a per-tier RED canary: overall RED, failing_tier=fail, passing_tiers_before=pass. + + Proves the server catches failure AT THE INTENDED TIER (not just "fails somewhere"), and that + the tiers before it still PASSED (no collateral damage from the fixture). + If the harness returns 0 for any of these fixtures, false-green is detected at the primary assert. + """ + failing_tier = canary.get("failing_tier") + passing_before = canary.get("passing_tiers_before", []) + + # PRIMARY: harness must return non-zero. + assert rc != 0, ( + f"[{canary['id']}] harness returned rc=0 (GREEN) for a KNOWN-BAD fixture at tier " + f"{failing_tier!r} — FALSE-GREEN. {note}" + ) + + if results is None: + return + + tier_results = results.get("results", {}) + + # The intended failing tier must be "fail". + if failing_tier: + actual = tier_results.get(failing_tier) + assert actual == "fail", ( + f"[{canary['id']}] expected tier {failing_tier!r}='fail', got {actual!r}. " + f"All tier results: {tier_results}. {note}" + ) + + # Tiers before the failing tier must have passed (no collateral damage from the fixture). + for tier in passing_before: + actual = tier_results.get(tier) + assert actual == "pass", ( + f"[{canary['id']}] expected prior tier {tier!r}='pass' before failing at " + f"{failing_tier!r}, got {actual!r}. All results: {tier_results}. {note}" + ) + + # Optional: specific failing test name (for the restore-RED canary). + for stage_name, test_name_substr in canary.get("stage_fail_checks", []): + assert stage_has_failing_test(results, stage_name, test_name_substr), ( + f"[{canary['id']}] expected a failing test containing {test_name_substr!r} in " + f"stage={stage_name!r}. " + f"Stage tests: {[t['name'] for t in _stage_tests(results, stage_name)]}. {note}" + ) + + +def _stage_tests(results: dict, stage_name: str) -> list[dict]: + for stage in results.get("stages", []): + if stage.get("name") == stage_name: + return stage.get("tests", []) + return [] -- 2.49.0