cc-ci/tests/regression/test_canaries.py

"""E2E canary regression tests — the server's standing self-test suite.

Seven canaries prove both halves of the server's job:
  1. GREEN canaries — good apps are reported healthy (install+upgrade+backup/restore pass).
  2. RED canaries   — broken apps are caught at the intended tier; a false-green makes THIS test fail.

Fast subset (@pytest.mark.canary_fast): the four per-tier RED canaries on custom-html-tiny — fast
because the recipe deploys in seconds. Run with `-m canary_fast` as a pre-merge quick check.
Full suite (-m canary): includes good-significant (lasuite-docs, 10-20 min).

Run: cc-ci-run python -m pytest tests/regression/ -m canary -v
Pin policy: canary refs are pinned to specific SHAs. Update only after confirming the new ref gives
the expected verdict.
"""

from __future__ import annotations

import os
import sys

import pytest

sys.path.insert(0, os.path.dirname(__file__))
import conftest as _reg  # noqa: E402

run_recipe_ci = _reg.run_recipe_ci
stage_has_passing_test = _reg.stage_has_passing_test
stage_has_failing_test = _reg.stage_has_failing_test

# ---------------------------------------------------------------------------
# Canary definitions
# ---------------------------------------------------------------------------

# Good canary 1: minimal static-file server — fast signal, few deps.
_SIMPLE = {
    "id": "good-simple",
    "recipe": "custom-html-tiny",
    "src": "recipe-maintainers/custom-html-tiny",
    # Pin: main @ 2026-06-02 — update if the recipe publishes a new release and pin goes stale.
    "ref": "435df8fc98ef7598084fcffcd6225470eca80053",
    "expected_green": True,
    # Named tests that MUST appear with "pass" in the result — these are the semantic teeth.
    # If the generic install assertion is removed/vacated, test_serving disappears → this fails.
    "stage_pass_checks": [
        ("install", "test_serving"),
    ],
    "stage_fail_checks": [],
}

# Good canary 2: multi-service stack — backend + Postgres + Collabora WOPI + OIDC.
# Exercises real breadth. Slowest canary (~10-20 min full lifecycle).
_SIGNIFICANT = {
    "id": "good-significant",
    "recipe": "lasuite-docs",
    "src": "recipe-maintainers/lasuite-docs",
    # Pin: main @ 2026-06-02
    "ref": "290a8ad72d06232f0b3f302d976af14bef0f3c53",
    "expected_green": True,
    "stage_pass_checks": [
        ("install", "test_serving_and_frontend"),
    ],
    "stage_fail_checks": [],
}

# Bad canary: app is UP + passes all lifecycle tiers but the custom functional assertion detects a
# semantic defect (wrong Content-Type for .txt files). The harness MUST report RED.
# If the harness wrongly returns green for this fixture, assert rc != 0 fails → false-green caught.
_BAD = {
    "id": "bad-false-green",
    "recipe": "custom-html",
    "src": "recipe-maintainers/custom-html",
    # Pin: v5-stale-docroot @ 71e7326 — serves .txt as application/octet-stream; build #75 was RED.
    # Recreate pattern if branch disappears: app up + passes lifecycle, fails one content assertion.
    "ref": "71e7326a99bbb69035a046fba8fa51859ca66115",
    "expected_green": False,
    # The specific test that must have FAILED, proving the content-type assertion has teeth.
    # If the assertion is vacated and the test disappears, stage_has_failing_test() returns False
    # → the assert below fails → we detect that the guard was removed.
    "stage_pass_checks": [],
    "stage_fail_checks": [
        ("custom", "test_content_type"),
    ],
}

# ---------------------------------------------------------------------------
# Per-tier RED canaries (fast subset: @pytest.mark.canary_fast)
# Prove the server catches failure at EVERY lifecycle tier — false-green at any tier is caught.
# Each uses custom-html-tiny (deploys in seconds) or custom-html (fast nginx, has backup support).
# ---------------------------------------------------------------------------

# Shared bad-image branch: deploy fails at prepull because the image doesn't exist on Docker Hub.
# Used for install-RED (STAGES=install → chaos of HEAD with bad image → install=fail)
# and upgrade-RED (STAGES=install,upgrade → prev-version install passes, upgrade chaos fails).
_BAD_IMAGE_REF = "4ae8866100563204d40435c5aba00374aa5a8ed3"  # regression-bad-image @ 2026-06-02

_BAD_INSTALL = {
    "id": "bad-install",
    "recipe": "custom-html-tiny",
    "src": "recipe-maintainers/custom-html-tiny",
    "ref": _BAD_IMAGE_REF,
    "expected_green": False,
    # STAGES=install only → no upgrade tier → prev=None → chaos deploy of HEAD (bad image) → fails.
    "stages": "install",
    # Assertions: install must be the failing tier.
    "failing_tier": "install",
    "passing_tiers_before": [],
    "stage_pass_checks": [],
    "stage_fail_checks": [],
}

_BAD_UPGRADE = {
    "id": "bad-upgrade",
    "recipe": "custom-html-tiny",
    "src": "recipe-maintainers/custom-html-tiny",
    "ref": _BAD_IMAGE_REF,
    "expected_green": False,
    # Default stages → prev-version deploy (good image) → install=PASS; upgrade chaos (bad image) → FAIL.
    "stages": "install,upgrade,custom",
    "failing_tier": "upgrade",
    "passing_tiers_before": ["install"],
    "stage_pass_checks": [],
    "stage_fail_checks": [],
}

_BAD_BACKUP = {
    "id": "bad-backup",
    "recipe": "custom-html",
    "src": "recipe-maintainers/custom-html",
    # Pin: regression-bad-backup @ 2026-06-02 — backupbot.backup.path=/nonexistent-path-cc-ci-canary-bad
    # `abra app backup create` fails → backup tier RED. install+upgrade still PASS.
    "ref": "e1e3c5fc5e2bd414600b6d3a9f2266566415ff34",
    "expected_green": False,
    "stages": "install,upgrade,backup",
    "failing_tier": "backup",
    "passing_tiers_before": ["install"],
    "stage_pass_checks": [],
    "stage_fail_checks": [],
}

_BAD_RESTORE = {
    "id": "bad-restore",
    "recipe": "custom-html",
    "src": "recipe-maintainers/custom-html",
    # Pin: regression-bad-restore @ 2026-06-02 — backup captures /usr/share/nginx/html/.backup-data/
    # (a subdir NOT containing ci-marker.txt). Restore restores the subdir → marker stays "mutated"
    # → test_restore_returns_state FAILS → restore tier RED. install+upgrade+backup PASS.
    "ref": "5a481cc1f6b2a46279b8e0eca09ca7cb4dc6f25d",
    "expected_green": False,
    "stages": "install,upgrade,backup,restore,custom",
    "failing_tier": "restore",
    "passing_tiers_before": ["install", "backup"],
    "stage_pass_checks": [],
    "stage_fail_checks": [
        ("restore", "test_restore_returns_state"),
    ],
}

CANARIES = [_SIMPLE, _SIGNIFICANT, _BAD]
CANARIES_FAST = [_BAD_INSTALL, _BAD_UPGRADE, _BAD_BACKUP, _BAD_RESTORE]


# ---------------------------------------------------------------------------
# Tests
# ---------------------------------------------------------------------------


@pytest.mark.canary
@pytest.mark.parametrize("canary", CANARIES, ids=[c["id"] for c in CANARIES])
def test_canary(canary, tmp_path):
    """Drive the full cold CI lifecycle for a canary recipe and verify the outcome.

    For GREEN canaries: proves the harness correctly reports a healthy app as healthy, and that
    the per-tier semantic assertions actually ran (not vacuous).

    For the RED canary: proves the harness catches a broken app — if the harness wrongly returned
    green, `assert rc != 0` fails, catching the false-green.
    """
    stages = canary.get("stages", "install,upgrade,backup,restore,custom")
    rc, results, artifact_dir = run_recipe_ci(
        recipe=canary["recipe"],
        src=canary["src"],
        ref=canary["ref"],
        runs_dir=str(tmp_path),
        stages=stages,
    )

    _note = f"artifact_dir={artifact_dir}"  # visible in -v output via assert messages

    if canary["expected_green"]:
        _assert_green(rc, results, canary, _note)
    else:
        _assert_red(rc, results, canary, _note)


@pytest.mark.canary
@pytest.mark.canary_fast
@pytest.mark.parametrize("canary", CANARIES_FAST, ids=[c["id"] for c in CANARIES_FAST])
def test_canary_fast(canary, tmp_path):
    """Fast per-tier RED canaries: each proves the server catches failure at a specific lifecycle tier.

    Each canary is broken at exactly one tier; the test asserts:
    - Overall verdict: RED (rc != 0)
    - The intended failing tier has status "fail"
    - Tiers BEFORE the intended failure have status "pass" (proving tier-specific detection, not
      "fails somewhere")

    These use fast recipes (custom-html-tiny deploys in seconds, custom-html is similarly fast)
    and are intended as a pre-merge quick check alongside the full slow suite.
    """
    stages = canary.get("stages", "install,upgrade,backup,restore,custom")
    rc, results, artifact_dir = run_recipe_ci(
        recipe=canary["recipe"],
        src=canary["src"],
        ref=canary["ref"],
        runs_dir=str(tmp_path),
        stages=stages,
    )

    _note = f"artifact_dir={artifact_dir}"
    _assert_red_at_tier(rc, results, canary, _note)


def _assert_green(rc: int, results: dict | None, canary: dict, note: str) -> None:
    """Assert a good-canary run is GREEN with real semantic assertions."""

    # 1. Harness exit code must be 0 (GREEN).
    assert rc == 0, f"[{canary['id']}] harness returned non-zero rc={rc} — expected GREEN. {note}"

    assert (
        results is not None
    ), f"[{canary['id']}] results.json not written — harness may have crashed. {note}"

    # 2. Install tier must have passed.
    assert results.get("results", {}).get("install") == "pass", (
        f"[{canary['id']}] install tier did not pass: " f"results={results.get('results')}. {note}"
    )

    # 3. No tier may have FAILED (skips are acceptable for recipes without backup or custom tests).
    failed_tiers = [t for t, s in results.get("results", {}).items() if s == "fail"]
    assert not failed_tiers, f"[{canary['id']}] tiers failed: {failed_tiers}. {note}"

    # 4. Teardown must be clean (no leftover containers/volumes/secrets).
    assert (
        results.get("flags", {}).get("clean_teardown") is True
    ), f"[{canary['id']}] clean_teardown=False — residual state left on server. {note}"

    # 5. No secret values leaked into the results artifact.
    assert (
        results.get("flags", {}).get("no_secret_leak") is True
    ), f"[{canary['id']}] no_secret_leak=False — a secret value appeared in results.json. {note}"

    # 6. Semantic stage assertions — TEETH CHECK.
    # These verify that specific named tests actually ran and passed in the expected stage.
    # If a tier assertion is removed or made vacuous, the named test disappears from results.json
    # and this assert fires — proving the regression suite guards against silent test removal.
    for stage_name, test_name_substr in canary.get("stage_pass_checks", []):
        assert stage_has_passing_test(results, stage_name, test_name_substr), (
            f"[{canary['id']}] expected a passing test containing {test_name_substr!r} in "
            f"stage={stage_name!r}, but none found. "
            f"Stage tests: {[t['name'] for t in _stage_tests(results, stage_name)]}. {note}"
        )


def _assert_red(rc: int, results: dict | None, canary: dict, note: str) -> None:
    """Assert a bad-canary run is RED (false-green guard).

    The PRIMARY assertion is rc != 0. If the harness wrongly returns 0 (green) for this fixture,
    this assert fails → the regression suite catches the false-green. This is the core guard.
    """

    # PRIMARY: harness must return non-zero (RED).
    # If the harness returns 0 for a broken app, the regression suite fails here — false-green caught.
    assert rc != 0, (
        f"[{canary['id']}] harness returned rc=0 (GREEN) for a KNOWN-BAD fixture — "
        f"FALSE-GREEN detected. The harness failed to catch the broken app. {note}"
    )

    # SECONDARY: verify the specific failing test is present in results.json.
    # If the content-type assertion is removed/vacuated, stage_has_failing_test() returns False here
    # → this assert fires → we detect that the guard itself was removed (a meta-failure).
    if results is not None:
        for stage_name, test_name_substr in canary.get("stage_fail_checks", []):
            assert stage_has_failing_test(results, stage_name, test_name_substr), (
                f"[{canary['id']}] expected a failing test containing {test_name_substr!r} in "
                f"stage={stage_name!r}, but none found. "
                f"The guard may have been removed or vacuated. "
                f"Stage tests: {[t['name'] for t in _stage_tests(results, stage_name)]}. {note}"
            )


def _assert_red_at_tier(rc: int, results: dict | None, canary: dict, note: str) -> None:
    """Assert a per-tier RED canary: overall RED, failing_tier=fail, passing_tiers_before=pass.

    Proves the server catches failure AT THE INTENDED TIER (not just "fails somewhere"), and that
    the tiers before it still PASSED (no collateral damage from the fixture).
    If the harness returns 0 for any of these fixtures, false-green is detected at the primary assert.
    """
    failing_tier = canary.get("failing_tier")
    passing_before = canary.get("passing_tiers_before", [])

    # PRIMARY: harness must return non-zero.
    assert rc != 0, (
        f"[{canary['id']}] harness returned rc=0 (GREEN) for a KNOWN-BAD fixture at tier "
        f"{failing_tier!r} — FALSE-GREEN. {note}"
    )

    if results is None:
        return

    tier_results = results.get("results", {})

    # The intended failing tier must be "fail".
    if failing_tier:
        actual = tier_results.get(failing_tier)
        assert actual == "fail", (
            f"[{canary['id']}] expected tier {failing_tier!r}='fail', got {actual!r}. "
            f"All tier results: {tier_results}. {note}"
        )

    # Tiers before the failing tier must have passed (no collateral damage from the fixture).
    for tier in passing_before:
        actual = tier_results.get(tier)
        assert actual == "pass", (
            f"[{canary['id']}] expected prior tier {tier!r}='pass' before failing at "
            f"{failing_tier!r}, got {actual!r}. All results: {tier_results}. {note}"
        )

    # Optional: specific failing test name (for the restore-RED canary).
    for stage_name, test_name_substr in canary.get("stage_fail_checks", []):
        assert stage_has_failing_test(results, stage_name, test_name_substr), (
            f"[{canary['id']}] expected a failing test containing {test_name_substr!r} in "
            f"stage={stage_name!r}. "
            f"Stage tests: {[t['name'] for t in _stage_tests(results, stage_name)]}. {note}"
        )


def _stage_tests(results: dict, stage_name: str) -> list[dict]:
    for stage in results.get("stages", []):
        if stage.get("name") == stage_name:
            return stage.get("tests", [])
    return []