feat(regression): add 4 per-tier RED canaries (DoD#4) + canary_fast marker

Four new per-tier RED canaries prove the server catches failure at every lifecycle tier: - bad-install: custom-html-tiny @ regression-bad-image (4ae88661) nonexistent image → prepull fails → install=fail STAGES=install → no prev-version lookup → chaos deploy of HEAD - bad-upgrade: same branch + SHA, STAGES=install,upgrade install uses prev-version (good image) → PASS upgrade chaos checks out HEAD (bad image) → prepull fails → FAIL - bad-backup: custom-html @ regression-bad-backup (e1e3c5fc) backupbot.backup.path=/nonexistent-path-cc-ci-canary-bad abra app backup create fails → backup=fail - bad-restore: custom-html @ regression-bad-restore (5a481cc1) backup targets .backup-data/ subdir (not where ci-marker.txt lives) backup succeeds; restore puts .backup-data back but NOT the marker marker stays "mutated" → test_restore_returns_state FAILS → restore=fail Each test asserts: rc!=0, failing_tier="fail", prior tiers="pass". Adds @pytest.mark.canary_fast for the fast subset. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-02 01:49:28 +00:00
parent 3dd06ef0ce
commit cf405b4195
2 changed files with 163 additions and 7 deletions
--- a/tests/regression/conftest.py
+++ b/tests/regression/conftest.py
@ -23,6 +23,10 @@ def pytest_configure(config):
        "markers",
        "canary: slow E2E canary test — drives the full cold CI lifecycle; run on-demand only.",
    )
+    config.addinivalue_line(
+        "markers",
+        "canary_fast: fast per-tier RED canary (still tagged canary); subset for quick pre-merge checks.",
+    )


 def run_recipe_ci(
--- a/tests/regression/test_canaries.py
+++ b/tests/regression/test_canaries.py
@ -1,14 +1,16 @@
 """E2E canary regression tests — the server's standing self-test suite.

-Three canaries prove both halves of the server's job:
+Seven canaries prove both halves of the server's job:
  1. GREEN canaries — good apps are reported healthy (install+upgrade+backup/restore pass).
-  2. RED canary    — broken apps are caught; a false-green makes THIS test fail.
+  2. RED canaries   — broken apps are caught at the intended tier; a false-green makes THIS test fail.
+
+Fast subset (@pytest.mark.canary_fast): the four per-tier RED canaries on custom-html-tiny — fast
+because the recipe deploys in seconds. Run with `-m canary_fast` as a pre-merge quick check.
+Full suite (-m canary): includes good-significant (lasuite-docs, 10-20 min).

 Run: cc-ci-run python -m pytest tests/regression/ -m canary -v
-Slow: each canary drives the full cold lifecycle on the live server (minutes per run).
-
-Pin policy: canary refs are pinned to specific SHAs for stability. Update them when the recipe
-publishes a new release and the pin is stale (re-run to confirm GREEN before updating).
+Pin policy: canary refs are pinned to specific SHAs. Update only after confirming the new ref gives
+the expected verdict.
 """

 from __future__ import annotations
@ -80,11 +82,85 @@ _BAD = {
    ],
 }

+# ---------------------------------------------------------------------------
+# Per-tier RED canaries (fast subset: @pytest.mark.canary_fast)
+# Prove the server catches failure at EVERY lifecycle tier — false-green at any tier is caught.
+# Each uses custom-html-tiny (deploys in seconds) or custom-html (fast nginx, has backup support).
+# ---------------------------------------------------------------------------
+
+# Shared bad-image branch: deploy fails at prepull because the image doesn't exist on Docker Hub.
+# Used for install-RED (STAGES=install → chaos of HEAD with bad image → install=fail)
+# and upgrade-RED (STAGES=install,upgrade → prev-version install passes, upgrade chaos fails).
+_BAD_IMAGE_REF = "4ae8866100563204d40435c5aba00374aa5a8ed3"  # regression-bad-image @ 2026-06-02
+
+_BAD_INSTALL = {
+    "id": "bad-install",
+    "recipe": "custom-html-tiny",
+    "src": "recipe-maintainers/custom-html-tiny",
+    "ref": _BAD_IMAGE_REF,
+    "expected_green": False,
+    # STAGES=install only → no upgrade tier → prev=None → chaos deploy of HEAD (bad image) → fails.
+    "stages": "install",
+    # Assertions: install must be the failing tier.
+    "failing_tier": "install",
+    "passing_tiers_before": [],
+    "stage_pass_checks": [],
+    "stage_fail_checks": [],
+}
+
+_BAD_UPGRADE = {
+    "id": "bad-upgrade",
+    "recipe": "custom-html-tiny",
+    "src": "recipe-maintainers/custom-html-tiny",
+    "ref": _BAD_IMAGE_REF,
+    "expected_green": False,
+    # Default stages → prev-version deploy (good image) → install=PASS; upgrade chaos (bad image) → FAIL.
+    "stages": "install,upgrade,custom",
+    "failing_tier": "upgrade",
+    "passing_tiers_before": ["install"],
+    "stage_pass_checks": [],
+    "stage_fail_checks": [],
+}
+
+_BAD_BACKUP = {
+    "id": "bad-backup",
+    "recipe": "custom-html",
+    "src": "recipe-maintainers/custom-html",
+    # Pin: regression-bad-backup @ 2026-06-02 — backupbot.backup.path=/nonexistent-path-cc-ci-canary-bad
+    # `abra app backup create` fails → backup tier RED. install+upgrade still PASS.
+    "ref": "e1e3c5fc5e2bd414600b6d3a9f2266566415ff34",
+    "expected_green": False,
+    "stages": "install,upgrade,backup",
+    "failing_tier": "backup",
+    "passing_tiers_before": ["install"],
+    "stage_pass_checks": [],
+    "stage_fail_checks": [],
+}
+
+_BAD_RESTORE = {
+    "id": "bad-restore",
+    "recipe": "custom-html",
+    "src": "recipe-maintainers/custom-html",
+    # Pin: regression-bad-restore @ 2026-06-02 — backup captures /usr/share/nginx/html/.backup-data/
+    # (a subdir NOT containing ci-marker.txt). Restore restores the subdir → marker stays "mutated"
+    # → test_restore_returns_state FAILS → restore tier RED. install+upgrade+backup PASS.
+    "ref": "5a481cc1f6b2a46279b8e0eca09ca7cb4dc6f25d",
+    "expected_green": False,
+    "stages": "install,upgrade,backup,restore,custom",
+    "failing_tier": "restore",
+    "passing_tiers_before": ["install", "backup"],
+    "stage_pass_checks": [],
+    "stage_fail_checks": [
+        ("restore", "test_restore_returns_state"),
+    ],
+}
+
 CANARIES = [_SIMPLE, _SIGNIFICANT, _BAD]
+CANARIES_FAST = [_BAD_INSTALL, _BAD_UPGRADE, _BAD_BACKUP, _BAD_RESTORE]


 # ---------------------------------------------------------------------------
-# Test
+# Tests
 # ---------------------------------------------------------------------------


@ -99,11 +175,13 @@ def test_canary(canary, tmp_path):
    For the RED canary: proves the harness catches a broken app — if the harness wrongly returned
    green, `assert rc != 0` fails, catching the false-green.
    """
+    stages = canary.get("stages", "install,upgrade,backup,restore,custom")
    rc, results, artifact_dir = run_recipe_ci(
        recipe=canary["recipe"],
        src=canary["src"],
        ref=canary["ref"],
        runs_dir=str(tmp_path),
+        stages=stages,
    )

    _note = f"artifact_dir={artifact_dir}"  # visible in -v output via assert messages
@ -114,6 +192,34 @@ def test_canary(canary, tmp_path):
        _assert_red(rc, results, canary, _note)


+@pytest.mark.canary
+@pytest.mark.canary_fast
+@pytest.mark.parametrize("canary", CANARIES_FAST, ids=[c["id"] for c in CANARIES_FAST])
+def test_canary_fast(canary, tmp_path):
+    """Fast per-tier RED canaries: each proves the server catches failure at a specific lifecycle tier.
+
+    Each canary is broken at exactly one tier; the test asserts:
+    - Overall verdict: RED (rc != 0)
+    - The intended failing tier has status "fail"
+    - Tiers BEFORE the intended failure have status "pass" (proving tier-specific detection, not
+      "fails somewhere")
+
+    These use fast recipes (custom-html-tiny deploys in seconds, custom-html is similarly fast)
+    and are intended as a pre-merge quick check alongside the full slow suite.
+    """
+    stages = canary.get("stages", "install,upgrade,backup,restore,custom")
+    rc, results, artifact_dir = run_recipe_ci(
+        recipe=canary["recipe"],
+        src=canary["src"],
+        ref=canary["ref"],
+        runs_dir=str(tmp_path),
+        stages=stages,
+    )
+
+    _note = f"artifact_dir={artifact_dir}"
+    _assert_red_at_tier(rc, results, canary, _note)
+
+
 def _assert_green(rc: int, results: dict | None, canary: dict, note: str) -> None:
    """Assert a good-canary run is GREEN with real semantic assertions."""

@ -182,6 +288,52 @@ def _assert_red(rc: int, results: dict | None, canary: dict, note: str) -> None:
            )


+def _assert_red_at_tier(rc: int, results: dict | None, canary: dict, note: str) -> None:
+    """Assert a per-tier RED canary: overall RED, failing_tier=fail, passing_tiers_before=pass.
+
+    Proves the server catches failure AT THE INTENDED TIER (not just "fails somewhere"), and that
+    the tiers before it still PASSED (no collateral damage from the fixture).
+    If the harness returns 0 for any of these fixtures, false-green is detected at the primary assert.
+    """
+    failing_tier = canary.get("failing_tier")
+    passing_before = canary.get("passing_tiers_before", [])
+
+    # PRIMARY: harness must return non-zero.
+    assert rc != 0, (
+        f"[{canary['id']}] harness returned rc=0 (GREEN) for a KNOWN-BAD fixture at tier "
+        f"{failing_tier!r} — FALSE-GREEN. {note}"
+    )
+
+    if results is None:
+        return
+
+    tier_results = results.get("results", {})
+
+    # The intended failing tier must be "fail".
+    if failing_tier:
+        actual = tier_results.get(failing_tier)
+        assert actual == "fail", (
+            f"[{canary['id']}] expected tier {failing_tier!r}='fail', got {actual!r}. "
+            f"All tier results: {tier_results}. {note}"
+        )
+
+    # Tiers before the failing tier must have passed (no collateral damage from the fixture).
+    for tier in passing_before:
+        actual = tier_results.get(tier)
+        assert actual == "pass", (
+            f"[{canary['id']}] expected prior tier {tier!r}='pass' before failing at "
+            f"{failing_tier!r}, got {actual!r}. All results: {tier_results}. {note}"
+        )
+
+    # Optional: specific failing test name (for the restore-RED canary).
+    for stage_name, test_name_substr in canary.get("stage_fail_checks", []):
+        assert stage_has_failing_test(results, stage_name, test_name_substr), (
+            f"[{canary['id']}] expected a failing test containing {test_name_substr!r} in "
+            f"stage={stage_name!r}. "
+            f"Stage tests: {[t['name'] for t in _stage_tests(results, stage_name)]}. {note}"
+        )
+
+
 def _stage_tests(results: dict, stage_name: str) -> list[dict]:
    for stage in results.get("stages", []):
        if stage.get("name") == stage_name: