Some checks failed
continuous-integration/drone/push Build is failing
Four new per-tier RED canaries prove the server catches failure at every lifecycle tier: - bad-install: custom-html-tiny @ regression-bad-image (4ae88661) nonexistent image → prepull fails → install=fail STAGES=install → no prev-version lookup → chaos deploy of HEAD - bad-upgrade: same branch + SHA, STAGES=install,upgrade install uses prev-version (good image) → PASS upgrade chaos checks out HEAD (bad image) → prepull fails → FAIL - bad-backup: custom-html @ regression-bad-backup (e1e3c5fc) backupbot.backup.path=/nonexistent-path-cc-ci-canary-bad abra app backup create fails → backup=fail - bad-restore: custom-html @ regression-bad-restore (5a481cc1) backup targets .backup-data/ subdir (not where ci-marker.txt lives) backup succeeds; restore puts .backup-data back but NOT the marker marker stays "mutated" → test_restore_returns_state FAILS → restore=fail Each test asserts: rc!=0, failing_tier="fail", prior tiers="pass". Adds @pytest.mark.canary_fast for the fast subset. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
342 lines
15 KiB
Python
342 lines
15 KiB
Python
"""E2E canary regression tests — the server's standing self-test suite.
|
|
|
|
Seven canaries prove both halves of the server's job:
|
|
1. GREEN canaries — good apps are reported healthy (install+upgrade+backup/restore pass).
|
|
2. RED canaries — broken apps are caught at the intended tier; a false-green makes THIS test fail.
|
|
|
|
Fast subset (@pytest.mark.canary_fast): the four per-tier RED canaries on custom-html-tiny — fast
|
|
because the recipe deploys in seconds. Run with `-m canary_fast` as a pre-merge quick check.
|
|
Full suite (-m canary): includes good-significant (lasuite-docs, 10-20 min).
|
|
|
|
Run: cc-ci-run python -m pytest tests/regression/ -m canary -v
|
|
Pin policy: canary refs are pinned to specific SHAs. Update only after confirming the new ref gives
|
|
the expected verdict.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import os
|
|
import sys
|
|
|
|
import pytest
|
|
|
|
sys.path.insert(0, os.path.dirname(__file__))
|
|
import conftest as _reg # noqa: E402
|
|
|
|
run_recipe_ci = _reg.run_recipe_ci
|
|
stage_has_passing_test = _reg.stage_has_passing_test
|
|
stage_has_failing_test = _reg.stage_has_failing_test
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Canary definitions
|
|
# ---------------------------------------------------------------------------
|
|
|
|
# Good canary 1: minimal static-file server — fast signal, few deps.
|
|
_SIMPLE = {
|
|
"id": "good-simple",
|
|
"recipe": "custom-html-tiny",
|
|
"src": "recipe-maintainers/custom-html-tiny",
|
|
# Pin: main @ 2026-06-02 — update if the recipe publishes a new release and pin goes stale.
|
|
"ref": "435df8fc98ef7598084fcffcd6225470eca80053",
|
|
"expected_green": True,
|
|
# Named tests that MUST appear with "pass" in the result — these are the semantic teeth.
|
|
# If the generic install assertion is removed/vacated, test_serving disappears → this fails.
|
|
"stage_pass_checks": [
|
|
("install", "test_serving"),
|
|
],
|
|
"stage_fail_checks": [],
|
|
}
|
|
|
|
# Good canary 2: multi-service stack — backend + Postgres + Collabora WOPI + OIDC.
|
|
# Exercises real breadth. Slowest canary (~10-20 min full lifecycle).
|
|
_SIGNIFICANT = {
|
|
"id": "good-significant",
|
|
"recipe": "lasuite-docs",
|
|
"src": "recipe-maintainers/lasuite-docs",
|
|
# Pin: main @ 2026-06-02
|
|
"ref": "290a8ad72d06232f0b3f302d976af14bef0f3c53",
|
|
"expected_green": True,
|
|
"stage_pass_checks": [
|
|
("install", "test_serving_and_frontend"),
|
|
],
|
|
"stage_fail_checks": [],
|
|
}
|
|
|
|
# Bad canary: app is UP + passes all lifecycle tiers but the custom functional assertion detects a
|
|
# semantic defect (wrong Content-Type for .txt files). The harness MUST report RED.
|
|
# If the harness wrongly returns green for this fixture, assert rc != 0 fails → false-green caught.
|
|
_BAD = {
|
|
"id": "bad-false-green",
|
|
"recipe": "custom-html",
|
|
"src": "recipe-maintainers/custom-html",
|
|
# Pin: v5-stale-docroot @ 71e7326 — serves .txt as application/octet-stream; build #75 was RED.
|
|
# Recreate pattern if branch disappears: app up + passes lifecycle, fails one content assertion.
|
|
"ref": "71e7326a99bbb69035a046fba8fa51859ca66115",
|
|
"expected_green": False,
|
|
# The specific test that must have FAILED, proving the content-type assertion has teeth.
|
|
# If the assertion is vacated and the test disappears, stage_has_failing_test() returns False
|
|
# → the assert below fails → we detect that the guard was removed.
|
|
"stage_pass_checks": [],
|
|
"stage_fail_checks": [
|
|
("custom", "test_content_type"),
|
|
],
|
|
}
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Per-tier RED canaries (fast subset: @pytest.mark.canary_fast)
|
|
# Prove the server catches failure at EVERY lifecycle tier — false-green at any tier is caught.
|
|
# Each uses custom-html-tiny (deploys in seconds) or custom-html (fast nginx, has backup support).
|
|
# ---------------------------------------------------------------------------
|
|
|
|
# Shared bad-image branch: deploy fails at prepull because the image doesn't exist on Docker Hub.
|
|
# Used for install-RED (STAGES=install → chaos of HEAD with bad image → install=fail)
|
|
# and upgrade-RED (STAGES=install,upgrade → prev-version install passes, upgrade chaos fails).
|
|
_BAD_IMAGE_REF = "4ae8866100563204d40435c5aba00374aa5a8ed3" # regression-bad-image @ 2026-06-02
|
|
|
|
_BAD_INSTALL = {
|
|
"id": "bad-install",
|
|
"recipe": "custom-html-tiny",
|
|
"src": "recipe-maintainers/custom-html-tiny",
|
|
"ref": _BAD_IMAGE_REF,
|
|
"expected_green": False,
|
|
# STAGES=install only → no upgrade tier → prev=None → chaos deploy of HEAD (bad image) → fails.
|
|
"stages": "install",
|
|
# Assertions: install must be the failing tier.
|
|
"failing_tier": "install",
|
|
"passing_tiers_before": [],
|
|
"stage_pass_checks": [],
|
|
"stage_fail_checks": [],
|
|
}
|
|
|
|
_BAD_UPGRADE = {
|
|
"id": "bad-upgrade",
|
|
"recipe": "custom-html-tiny",
|
|
"src": "recipe-maintainers/custom-html-tiny",
|
|
"ref": _BAD_IMAGE_REF,
|
|
"expected_green": False,
|
|
# Default stages → prev-version deploy (good image) → install=PASS; upgrade chaos (bad image) → FAIL.
|
|
"stages": "install,upgrade,custom",
|
|
"failing_tier": "upgrade",
|
|
"passing_tiers_before": ["install"],
|
|
"stage_pass_checks": [],
|
|
"stage_fail_checks": [],
|
|
}
|
|
|
|
_BAD_BACKUP = {
|
|
"id": "bad-backup",
|
|
"recipe": "custom-html",
|
|
"src": "recipe-maintainers/custom-html",
|
|
# Pin: regression-bad-backup @ 2026-06-02 — backupbot.backup.path=/nonexistent-path-cc-ci-canary-bad
|
|
# `abra app backup create` fails → backup tier RED. install+upgrade still PASS.
|
|
"ref": "e1e3c5fc5e2bd414600b6d3a9f2266566415ff34",
|
|
"expected_green": False,
|
|
"stages": "install,upgrade,backup",
|
|
"failing_tier": "backup",
|
|
"passing_tiers_before": ["install"],
|
|
"stage_pass_checks": [],
|
|
"stage_fail_checks": [],
|
|
}
|
|
|
|
_BAD_RESTORE = {
|
|
"id": "bad-restore",
|
|
"recipe": "custom-html",
|
|
"src": "recipe-maintainers/custom-html",
|
|
# Pin: regression-bad-restore @ 2026-06-02 — backup captures /usr/share/nginx/html/.backup-data/
|
|
# (a subdir NOT containing ci-marker.txt). Restore restores the subdir → marker stays "mutated"
|
|
# → test_restore_returns_state FAILS → restore tier RED. install+upgrade+backup PASS.
|
|
"ref": "5a481cc1f6b2a46279b8e0eca09ca7cb4dc6f25d",
|
|
"expected_green": False,
|
|
"stages": "install,upgrade,backup,restore,custom",
|
|
"failing_tier": "restore",
|
|
"passing_tiers_before": ["install", "backup"],
|
|
"stage_pass_checks": [],
|
|
"stage_fail_checks": [
|
|
("restore", "test_restore_returns_state"),
|
|
],
|
|
}
|
|
|
|
CANARIES = [_SIMPLE, _SIGNIFICANT, _BAD]
|
|
CANARIES_FAST = [_BAD_INSTALL, _BAD_UPGRADE, _BAD_BACKUP, _BAD_RESTORE]
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Tests
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
@pytest.mark.canary
|
|
@pytest.mark.parametrize("canary", CANARIES, ids=[c["id"] for c in CANARIES])
|
|
def test_canary(canary, tmp_path):
|
|
"""Drive the full cold CI lifecycle for a canary recipe and verify the outcome.
|
|
|
|
For GREEN canaries: proves the harness correctly reports a healthy app as healthy, and that
|
|
the per-tier semantic assertions actually ran (not vacuous).
|
|
|
|
For the RED canary: proves the harness catches a broken app — if the harness wrongly returned
|
|
green, `assert rc != 0` fails, catching the false-green.
|
|
"""
|
|
stages = canary.get("stages", "install,upgrade,backup,restore,custom")
|
|
rc, results, artifact_dir = run_recipe_ci(
|
|
recipe=canary["recipe"],
|
|
src=canary["src"],
|
|
ref=canary["ref"],
|
|
runs_dir=str(tmp_path),
|
|
stages=stages,
|
|
)
|
|
|
|
_note = f"artifact_dir={artifact_dir}" # visible in -v output via assert messages
|
|
|
|
if canary["expected_green"]:
|
|
_assert_green(rc, results, canary, _note)
|
|
else:
|
|
_assert_red(rc, results, canary, _note)
|
|
|
|
|
|
@pytest.mark.canary
|
|
@pytest.mark.canary_fast
|
|
@pytest.mark.parametrize("canary", CANARIES_FAST, ids=[c["id"] for c in CANARIES_FAST])
|
|
def test_canary_fast(canary, tmp_path):
|
|
"""Fast per-tier RED canaries: each proves the server catches failure at a specific lifecycle tier.
|
|
|
|
Each canary is broken at exactly one tier; the test asserts:
|
|
- Overall verdict: RED (rc != 0)
|
|
- The intended failing tier has status "fail"
|
|
- Tiers BEFORE the intended failure have status "pass" (proving tier-specific detection, not
|
|
"fails somewhere")
|
|
|
|
These use fast recipes (custom-html-tiny deploys in seconds, custom-html is similarly fast)
|
|
and are intended as a pre-merge quick check alongside the full slow suite.
|
|
"""
|
|
stages = canary.get("stages", "install,upgrade,backup,restore,custom")
|
|
rc, results, artifact_dir = run_recipe_ci(
|
|
recipe=canary["recipe"],
|
|
src=canary["src"],
|
|
ref=canary["ref"],
|
|
runs_dir=str(tmp_path),
|
|
stages=stages,
|
|
)
|
|
|
|
_note = f"artifact_dir={artifact_dir}"
|
|
_assert_red_at_tier(rc, results, canary, _note)
|
|
|
|
|
|
def _assert_green(rc: int, results: dict | None, canary: dict, note: str) -> None:
|
|
"""Assert a good-canary run is GREEN with real semantic assertions."""
|
|
|
|
# 1. Harness exit code must be 0 (GREEN).
|
|
assert rc == 0, f"[{canary['id']}] harness returned non-zero rc={rc} — expected GREEN. {note}"
|
|
|
|
assert (
|
|
results is not None
|
|
), f"[{canary['id']}] results.json not written — harness may have crashed. {note}"
|
|
|
|
# 2. Install tier must have passed.
|
|
assert results.get("results", {}).get("install") == "pass", (
|
|
f"[{canary['id']}] install tier did not pass: " f"results={results.get('results')}. {note}"
|
|
)
|
|
|
|
# 3. No tier may have FAILED (skips are acceptable for recipes without backup or custom tests).
|
|
failed_tiers = [t for t, s in results.get("results", {}).items() if s == "fail"]
|
|
assert not failed_tiers, f"[{canary['id']}] tiers failed: {failed_tiers}. {note}"
|
|
|
|
# 4. Teardown must be clean (no leftover containers/volumes/secrets).
|
|
assert (
|
|
results.get("flags", {}).get("clean_teardown") is True
|
|
), f"[{canary['id']}] clean_teardown=False — residual state left on server. {note}"
|
|
|
|
# 5. No secret values leaked into the results artifact.
|
|
assert (
|
|
results.get("flags", {}).get("no_secret_leak") is True
|
|
), f"[{canary['id']}] no_secret_leak=False — a secret value appeared in results.json. {note}"
|
|
|
|
# 6. Semantic stage assertions — TEETH CHECK.
|
|
# These verify that specific named tests actually ran and passed in the expected stage.
|
|
# If a tier assertion is removed or made vacuous, the named test disappears from results.json
|
|
# and this assert fires — proving the regression suite guards against silent test removal.
|
|
for stage_name, test_name_substr in canary.get("stage_pass_checks", []):
|
|
assert stage_has_passing_test(results, stage_name, test_name_substr), (
|
|
f"[{canary['id']}] expected a passing test containing {test_name_substr!r} in "
|
|
f"stage={stage_name!r}, but none found. "
|
|
f"Stage tests: {[t['name'] for t in _stage_tests(results, stage_name)]}. {note}"
|
|
)
|
|
|
|
|
|
def _assert_red(rc: int, results: dict | None, canary: dict, note: str) -> None:
|
|
"""Assert a bad-canary run is RED (false-green guard).
|
|
|
|
The PRIMARY assertion is rc != 0. If the harness wrongly returns 0 (green) for this fixture,
|
|
this assert fails → the regression suite catches the false-green. This is the core guard.
|
|
"""
|
|
|
|
# PRIMARY: harness must return non-zero (RED).
|
|
# If the harness returns 0 for a broken app, the regression suite fails here — false-green caught.
|
|
assert rc != 0, (
|
|
f"[{canary['id']}] harness returned rc=0 (GREEN) for a KNOWN-BAD fixture — "
|
|
f"FALSE-GREEN detected. The harness failed to catch the broken app. {note}"
|
|
)
|
|
|
|
# SECONDARY: verify the specific failing test is present in results.json.
|
|
# If the content-type assertion is removed/vacuated, stage_has_failing_test() returns False here
|
|
# → this assert fires → we detect that the guard itself was removed (a meta-failure).
|
|
if results is not None:
|
|
for stage_name, test_name_substr in canary.get("stage_fail_checks", []):
|
|
assert stage_has_failing_test(results, stage_name, test_name_substr), (
|
|
f"[{canary['id']}] expected a failing test containing {test_name_substr!r} in "
|
|
f"stage={stage_name!r}, but none found. "
|
|
f"The guard may have been removed or vacuated. "
|
|
f"Stage tests: {[t['name'] for t in _stage_tests(results, stage_name)]}. {note}"
|
|
)
|
|
|
|
|
|
def _assert_red_at_tier(rc: int, results: dict | None, canary: dict, note: str) -> None:
|
|
"""Assert a per-tier RED canary: overall RED, failing_tier=fail, passing_tiers_before=pass.
|
|
|
|
Proves the server catches failure AT THE INTENDED TIER (not just "fails somewhere"), and that
|
|
the tiers before it still PASSED (no collateral damage from the fixture).
|
|
If the harness returns 0 for any of these fixtures, false-green is detected at the primary assert.
|
|
"""
|
|
failing_tier = canary.get("failing_tier")
|
|
passing_before = canary.get("passing_tiers_before", [])
|
|
|
|
# PRIMARY: harness must return non-zero.
|
|
assert rc != 0, (
|
|
f"[{canary['id']}] harness returned rc=0 (GREEN) for a KNOWN-BAD fixture at tier "
|
|
f"{failing_tier!r} — FALSE-GREEN. {note}"
|
|
)
|
|
|
|
if results is None:
|
|
return
|
|
|
|
tier_results = results.get("results", {})
|
|
|
|
# The intended failing tier must be "fail".
|
|
if failing_tier:
|
|
actual = tier_results.get(failing_tier)
|
|
assert actual == "fail", (
|
|
f"[{canary['id']}] expected tier {failing_tier!r}='fail', got {actual!r}. "
|
|
f"All tier results: {tier_results}. {note}"
|
|
)
|
|
|
|
# Tiers before the failing tier must have passed (no collateral damage from the fixture).
|
|
for tier in passing_before:
|
|
actual = tier_results.get(tier)
|
|
assert actual == "pass", (
|
|
f"[{canary['id']}] expected prior tier {tier!r}='pass' before failing at "
|
|
f"{failing_tier!r}, got {actual!r}. All results: {tier_results}. {note}"
|
|
)
|
|
|
|
# Optional: specific failing test name (for the restore-RED canary).
|
|
for stage_name, test_name_substr in canary.get("stage_fail_checks", []):
|
|
assert stage_has_failing_test(results, stage_name, test_name_substr), (
|
|
f"[{canary['id']}] expected a failing test containing {test_name_substr!r} in "
|
|
f"stage={stage_name!r}. "
|
|
f"Stage tests: {[t['name'] for t in _stage_tests(results, stage_name)]}. {note}"
|
|
)
|
|
|
|
|
|
def _stage_tests(results: dict, stage_name: str) -> list[dict]:
|
|
for stage in results.get("stages", []):
|
|
if stage.get("name") == stage_name:
|
|
return stage.get("tests", [])
|
|
return []
|