diff --git a/tests/regression/conftest.py b/tests/regression/conftest.py index 07b86a7..519d3ee 100644 --- a/tests/regression/conftest.py +++ b/tests/regression/conftest.py @@ -23,6 +23,10 @@ def pytest_configure(config): "markers", "canary: slow E2E canary test — drives the full cold CI lifecycle; run on-demand only.", ) + config.addinivalue_line( + "markers", + "canary_fast: fast per-tier RED canary (still tagged canary); subset for quick pre-merge checks.", + ) def run_recipe_ci( diff --git a/tests/regression/test_canaries.py b/tests/regression/test_canaries.py index 55bc48e..041ae71 100644 --- a/tests/regression/test_canaries.py +++ b/tests/regression/test_canaries.py @@ -1,14 +1,16 @@ """E2E canary regression tests — the server's standing self-test suite. -Three canaries prove both halves of the server's job: +Seven canaries prove both halves of the server's job: 1. GREEN canaries — good apps are reported healthy (install+upgrade+backup/restore pass). - 2. RED canary — broken apps are caught; a false-green makes THIS test fail. + 2. RED canaries — broken apps are caught at the intended tier; a false-green makes THIS test fail. + +Fast subset (@pytest.mark.canary_fast): the four per-tier RED canaries on custom-html-tiny — fast +because the recipe deploys in seconds. Run with `-m canary_fast` as a pre-merge quick check. +Full suite (-m canary): includes good-significant (lasuite-docs, 10-20 min). Run: cc-ci-run python -m pytest tests/regression/ -m canary -v -Slow: each canary drives the full cold lifecycle on the live server (minutes per run). - -Pin policy: canary refs are pinned to specific SHAs for stability. Update them when the recipe -publishes a new release and the pin is stale (re-run to confirm GREEN before updating). +Pin policy: canary refs are pinned to specific SHAs. Update only after confirming the new ref gives +the expected verdict. """ from __future__ import annotations @@ -80,11 +82,85 @@ _BAD = { ], } +# --------------------------------------------------------------------------- +# Per-tier RED canaries (fast subset: @pytest.mark.canary_fast) +# Prove the server catches failure at EVERY lifecycle tier — false-green at any tier is caught. +# Each uses custom-html-tiny (deploys in seconds) or custom-html (fast nginx, has backup support). +# --------------------------------------------------------------------------- + +# Shared bad-image branch: deploy fails at prepull because the image doesn't exist on Docker Hub. +# Used for install-RED (STAGES=install → chaos of HEAD with bad image → install=fail) +# and upgrade-RED (STAGES=install,upgrade → prev-version install passes, upgrade chaos fails). +_BAD_IMAGE_REF = "4ae8866100563204d40435c5aba00374aa5a8ed3" # regression-bad-image @ 2026-06-02 + +_BAD_INSTALL = { + "id": "bad-install", + "recipe": "custom-html-tiny", + "src": "recipe-maintainers/custom-html-tiny", + "ref": _BAD_IMAGE_REF, + "expected_green": False, + # STAGES=install only → no upgrade tier → prev=None → chaos deploy of HEAD (bad image) → fails. + "stages": "install", + # Assertions: install must be the failing tier. + "failing_tier": "install", + "passing_tiers_before": [], + "stage_pass_checks": [], + "stage_fail_checks": [], +} + +_BAD_UPGRADE = { + "id": "bad-upgrade", + "recipe": "custom-html-tiny", + "src": "recipe-maintainers/custom-html-tiny", + "ref": _BAD_IMAGE_REF, + "expected_green": False, + # Default stages → prev-version deploy (good image) → install=PASS; upgrade chaos (bad image) → FAIL. + "stages": "install,upgrade,custom", + "failing_tier": "upgrade", + "passing_tiers_before": ["install"], + "stage_pass_checks": [], + "stage_fail_checks": [], +} + +_BAD_BACKUP = { + "id": "bad-backup", + "recipe": "custom-html", + "src": "recipe-maintainers/custom-html", + # Pin: regression-bad-backup @ 2026-06-02 — backupbot.backup.path=/nonexistent-path-cc-ci-canary-bad + # `abra app backup create` fails → backup tier RED. install+upgrade still PASS. + "ref": "e1e3c5fc5e2bd414600b6d3a9f2266566415ff34", + "expected_green": False, + "stages": "install,upgrade,backup", + "failing_tier": "backup", + "passing_tiers_before": ["install"], + "stage_pass_checks": [], + "stage_fail_checks": [], +} + +_BAD_RESTORE = { + "id": "bad-restore", + "recipe": "custom-html", + "src": "recipe-maintainers/custom-html", + # Pin: regression-bad-restore @ 2026-06-02 — backup captures /usr/share/nginx/html/.backup-data/ + # (a subdir NOT containing ci-marker.txt). Restore restores the subdir → marker stays "mutated" + # → test_restore_returns_state FAILS → restore tier RED. install+upgrade+backup PASS. + "ref": "5a481cc1f6b2a46279b8e0eca09ca7cb4dc6f25d", + "expected_green": False, + "stages": "install,upgrade,backup,restore,custom", + "failing_tier": "restore", + "passing_tiers_before": ["install", "backup"], + "stage_pass_checks": [], + "stage_fail_checks": [ + ("restore", "test_restore_returns_state"), + ], +} + CANARIES = [_SIMPLE, _SIGNIFICANT, _BAD] +CANARIES_FAST = [_BAD_INSTALL, _BAD_UPGRADE, _BAD_BACKUP, _BAD_RESTORE] # --------------------------------------------------------------------------- -# Test +# Tests # --------------------------------------------------------------------------- @@ -99,11 +175,13 @@ def test_canary(canary, tmp_path): For the RED canary: proves the harness catches a broken app — if the harness wrongly returned green, `assert rc != 0` fails, catching the false-green. """ + stages = canary.get("stages", "install,upgrade,backup,restore,custom") rc, results, artifact_dir = run_recipe_ci( recipe=canary["recipe"], src=canary["src"], ref=canary["ref"], runs_dir=str(tmp_path), + stages=stages, ) _note = f"artifact_dir={artifact_dir}" # visible in -v output via assert messages @@ -114,6 +192,34 @@ def test_canary(canary, tmp_path): _assert_red(rc, results, canary, _note) +@pytest.mark.canary +@pytest.mark.canary_fast +@pytest.mark.parametrize("canary", CANARIES_FAST, ids=[c["id"] for c in CANARIES_FAST]) +def test_canary_fast(canary, tmp_path): + """Fast per-tier RED canaries: each proves the server catches failure at a specific lifecycle tier. + + Each canary is broken at exactly one tier; the test asserts: + - Overall verdict: RED (rc != 0) + - The intended failing tier has status "fail" + - Tiers BEFORE the intended failure have status "pass" (proving tier-specific detection, not + "fails somewhere") + + These use fast recipes (custom-html-tiny deploys in seconds, custom-html is similarly fast) + and are intended as a pre-merge quick check alongside the full slow suite. + """ + stages = canary.get("stages", "install,upgrade,backup,restore,custom") + rc, results, artifact_dir = run_recipe_ci( + recipe=canary["recipe"], + src=canary["src"], + ref=canary["ref"], + runs_dir=str(tmp_path), + stages=stages, + ) + + _note = f"artifact_dir={artifact_dir}" + _assert_red_at_tier(rc, results, canary, _note) + + def _assert_green(rc: int, results: dict | None, canary: dict, note: str) -> None: """Assert a good-canary run is GREEN with real semantic assertions.""" @@ -182,6 +288,52 @@ def _assert_red(rc: int, results: dict | None, canary: dict, note: str) -> None: ) +def _assert_red_at_tier(rc: int, results: dict | None, canary: dict, note: str) -> None: + """Assert a per-tier RED canary: overall RED, failing_tier=fail, passing_tiers_before=pass. + + Proves the server catches failure AT THE INTENDED TIER (not just "fails somewhere"), and that + the tiers before it still PASSED (no collateral damage from the fixture). + If the harness returns 0 for any of these fixtures, false-green is detected at the primary assert. + """ + failing_tier = canary.get("failing_tier") + passing_before = canary.get("passing_tiers_before", []) + + # PRIMARY: harness must return non-zero. + assert rc != 0, ( + f"[{canary['id']}] harness returned rc=0 (GREEN) for a KNOWN-BAD fixture at tier " + f"{failing_tier!r} — FALSE-GREEN. {note}" + ) + + if results is None: + return + + tier_results = results.get("results", {}) + + # The intended failing tier must be "fail". + if failing_tier: + actual = tier_results.get(failing_tier) + assert actual == "fail", ( + f"[{canary['id']}] expected tier {failing_tier!r}='fail', got {actual!r}. " + f"All tier results: {tier_results}. {note}" + ) + + # Tiers before the failing tier must have passed (no collateral damage from the fixture). + for tier in passing_before: + actual = tier_results.get(tier) + assert actual == "pass", ( + f"[{canary['id']}] expected prior tier {tier!r}='pass' before failing at " + f"{failing_tier!r}, got {actual!r}. All results: {tier_results}. {note}" + ) + + # Optional: specific failing test name (for the restore-RED canary). + for stage_name, test_name_substr in canary.get("stage_fail_checks", []): + assert stage_has_failing_test(results, stage_name, test_name_substr), ( + f"[{canary['id']}] expected a failing test containing {test_name_substr!r} in " + f"stage={stage_name!r}. " + f"Stage tests: {[t['name'] for t in _stage_tests(results, stage_name)]}. {note}" + ) + + def _stage_tests(results: dict, stage_name: str) -> list[dict]: for stage in results.get("stages", []): if stage.get("name") == stage_name: