"""E2E canary regression tests — the server's standing self-test suite. Three canaries prove both halves of the server's job: 1. GREEN canaries — good apps are reported healthy (install+upgrade+backup/restore pass). 2. RED canary — broken apps are caught; a false-green makes THIS test fail. Run: cc-ci-run python -m pytest tests/regression/ -m canary -v Slow: each canary drives the full cold lifecycle on the live server (minutes per run). Pin policy: canary refs are pinned to specific SHAs for stability. Update them when the recipe publishes a new release and the pin is stale (re-run to confirm GREEN before updating). """ from __future__ import annotations import pytest from .conftest import run_recipe_ci, stage_has_failing_test, stage_has_passing_test # --------------------------------------------------------------------------- # Canary definitions # --------------------------------------------------------------------------- # Good canary 1: minimal static-file server — fast signal, few deps. _SIMPLE = { "id": "good-simple", "recipe": "custom-html-tiny", "src": "recipe-maintainers/custom-html-tiny", # Pin: main @ 2026-06-02 — update if the recipe publishes a new release and pin goes stale. "ref": "435df8fc98ef7598084fcffcd6225470eca80053", "expected_green": True, # Named tests that MUST appear with "pass" in the result — these are the semantic teeth. # If the generic install assertion is removed/vacated, test_serving disappears → this fails. "stage_pass_checks": [ ("install", "test_serving"), ], "stage_fail_checks": [], } # Good canary 2: multi-service stack — backend + Postgres + Collabora WOPI + OIDC. # Exercises real breadth. Slowest canary (~10-20 min full lifecycle). _SIGNIFICANT = { "id": "good-significant", "recipe": "lasuite-docs", "src": "recipe-maintainers/lasuite-docs", # Pin: main @ 2026-06-02 "ref": "290a8ad72d06232f0b3f302d976af14bef0f3c53", "expected_green": True, "stage_pass_checks": [ ("install", "test_serving_and_frontend"), ], "stage_fail_checks": [], } # Bad canary: app is UP + passes all lifecycle tiers but the custom functional assertion detects a # semantic defect (wrong Content-Type for .txt files). The harness MUST report RED. # If the harness wrongly returns green for this fixture, assert rc != 0 fails → false-green caught. _BAD = { "id": "bad-false-green", "recipe": "custom-html", "src": "recipe-maintainers/custom-html", # Pin: v5-stale-docroot @ 71e7326 — serves .txt as application/octet-stream; build #75 was RED. # Recreate pattern if branch disappears: app up + passes lifecycle, fails one content assertion. "ref": "71e7326a99bbb69035a046fba8fa51859ca66115", "expected_green": False, # The specific test that must have FAILED, proving the content-type assertion has teeth. # If the assertion is vacated and the test disappears, stage_has_failing_test() returns False # → the assert below fails → we detect that the guard was removed. "stage_pass_checks": [], "stage_fail_checks": [ ("custom", "test_content_type"), ], } CANARIES = [_SIMPLE, _SIGNIFICANT, _BAD] # --------------------------------------------------------------------------- # Test # --------------------------------------------------------------------------- @pytest.mark.canary @pytest.mark.parametrize("canary", CANARIES, ids=[c["id"] for c in CANARIES]) def test_canary(canary, tmp_path): """Drive the full cold CI lifecycle for a canary recipe and verify the outcome. For GREEN canaries: proves the harness correctly reports a healthy app as healthy, and that the per-tier semantic assertions actually ran (not vacuous). For the RED canary: proves the harness catches a broken app — if the harness wrongly returned green, `assert rc != 0` fails, catching the false-green. """ rc, results, artifact_dir = run_recipe_ci( recipe=canary["recipe"], src=canary["src"], ref=canary["ref"], runs_dir=str(tmp_path), ) _note = f"artifact_dir={artifact_dir}" # visible in -v output via assert messages if canary["expected_green"]: _assert_green(rc, results, canary, _note) else: _assert_red(rc, results, canary, _note) def _assert_green(rc: int, results: dict | None, canary: dict, note: str) -> None: """Assert a good-canary run is GREEN with real semantic assertions.""" # 1. Harness exit code must be 0 (GREEN). assert rc == 0, f"[{canary['id']}] harness returned non-zero rc={rc} — expected GREEN. {note}" assert ( results is not None ), f"[{canary['id']}] results.json not written — harness may have crashed. {note}" # 2. Install tier must have passed. assert results.get("results", {}).get("install") == "pass", ( f"[{canary['id']}] install tier did not pass: " f"results={results.get('results')}. {note}" ) # 3. No tier may have FAILED (skips are acceptable for recipes without backup or custom tests). failed_tiers = [t for t, s in results.get("results", {}).items() if s == "fail"] assert not failed_tiers, f"[{canary['id']}] tiers failed: {failed_tiers}. {note}" # 4. Teardown must be clean (no leftover containers/volumes/secrets). assert ( results.get("flags", {}).get("clean_teardown") is True ), f"[{canary['id']}] clean_teardown=False — residual state left on server. {note}" # 5. No secret values leaked into the results artifact. assert ( results.get("flags", {}).get("no_secret_leak") is True ), f"[{canary['id']}] no_secret_leak=False — a secret value appeared in results.json. {note}" # 6. Semantic stage assertions — TEETH CHECK. # These verify that specific named tests actually ran and passed in the expected stage. # If a tier assertion is removed or made vacuous, the named test disappears from results.json # and this assert fires — proving the regression suite guards against silent test removal. for stage_name, test_name_substr in canary.get("stage_pass_checks", []): assert stage_has_passing_test(results, stage_name, test_name_substr), ( f"[{canary['id']}] expected a passing test containing {test_name_substr!r} in " f"stage={stage_name!r}, but none found. " f"Stage tests: {[t['name'] for t in _stage_tests(results, stage_name)]}. {note}" ) def _assert_red(rc: int, results: dict | None, canary: dict, note: str) -> None: """Assert a bad-canary run is RED (false-green guard). The PRIMARY assertion is rc != 0. If the harness wrongly returns 0 (green) for this fixture, this assert fails → the regression suite catches the false-green. This is the core guard. """ # PRIMARY: harness must return non-zero (RED). # If the harness returns 0 for a broken app, the regression suite fails here — false-green caught. assert rc != 0, ( f"[{canary['id']}] harness returned rc=0 (GREEN) for a KNOWN-BAD fixture — " f"FALSE-GREEN detected. The harness failed to catch the broken app. {note}" ) # SECONDARY: verify the specific failing test is present in results.json. # If the content-type assertion is removed/vacuated, stage_has_failing_test() returns False here # → this assert fires → we detect that the guard itself was removed (a meta-failure). if results is not None: for stage_name, test_name_substr in canary.get("stage_fail_checks", []): assert stage_has_failing_test(results, stage_name, test_name_substr), ( f"[{canary['id']}] expected a failing test containing {test_name_substr!r} in " f"stage={stage_name!r}, but none found. " f"The guard may have been removed or vacuated. " f"Stage tests: {[t['name'] for t in _stage_tests(results, stage_name)]}. {note}" ) def _stage_tests(results: dict, stage_name: str) -> list[dict]: for stage in results.get("stages", []): if stage.get("name") == stage_name: return stage.get("tests", []) return []