feat(regression): add 4 per-tier RED canaries (DoD#4) + canary_fast marker
Some checks failed
continuous-integration/drone/push Build is failing

Four new per-tier RED canaries prove the server catches failure at every
lifecycle tier:

- bad-install: custom-html-tiny @ regression-bad-image (4ae88661)
  nonexistent image → prepull fails → install=fail
  STAGES=install → no prev-version lookup → chaos deploy of HEAD

- bad-upgrade: same branch + SHA, STAGES=install,upgrade
  install uses prev-version (good image) → PASS
  upgrade chaos checks out HEAD (bad image) → prepull fails → FAIL

- bad-backup: custom-html @ regression-bad-backup (e1e3c5fc)
  backupbot.backup.path=/nonexistent-path-cc-ci-canary-bad
  abra app backup create fails → backup=fail

- bad-restore: custom-html @ regression-bad-restore (5a481cc1)
  backup targets .backup-data/ subdir (not where ci-marker.txt lives)
  backup succeeds; restore puts .backup-data back but NOT the marker
  marker stays "mutated" → test_restore_returns_state FAILS → restore=fail

Each test asserts: rc!=0, failing_tier="fail", prior tiers="pass".
Adds @pytest.mark.canary_fast for the fast subset.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
autonomic-bot
2026-06-02 01:49:28 +00:00
parent 3dd06ef0ce
commit cf405b4195
2 changed files with 163 additions and 7 deletions

View File

@ -23,6 +23,10 @@ def pytest_configure(config):
"markers",
"canary: slow E2E canary test — drives the full cold CI lifecycle; run on-demand only.",
)
config.addinivalue_line(
"markers",
"canary_fast: fast per-tier RED canary (still tagged canary); subset for quick pre-merge checks.",
)
def run_recipe_ci(

View File

@ -1,14 +1,16 @@
"""E2E canary regression tests — the server's standing self-test suite.
Three canaries prove both halves of the server's job:
Seven canaries prove both halves of the server's job:
1. GREEN canaries — good apps are reported healthy (install+upgrade+backup/restore pass).
2. RED canary — broken apps are caught; a false-green makes THIS test fail.
2. RED canaries — broken apps are caught at the intended tier; a false-green makes THIS test fail.
Fast subset (@pytest.mark.canary_fast): the four per-tier RED canaries on custom-html-tiny — fast
because the recipe deploys in seconds. Run with `-m canary_fast` as a pre-merge quick check.
Full suite (-m canary): includes good-significant (lasuite-docs, 10-20 min).
Run: cc-ci-run python -m pytest tests/regression/ -m canary -v
Slow: each canary drives the full cold lifecycle on the live server (minutes per run).
Pin policy: canary refs are pinned to specific SHAs for stability. Update them when the recipe
publishes a new release and the pin is stale (re-run to confirm GREEN before updating).
Pin policy: canary refs are pinned to specific SHAs. Update only after confirming the new ref gives
the expected verdict.
"""
from __future__ import annotations
@ -80,11 +82,85 @@ _BAD = {
],
}
# ---------------------------------------------------------------------------
# Per-tier RED canaries (fast subset: @pytest.mark.canary_fast)
# Prove the server catches failure at EVERY lifecycle tier — false-green at any tier is caught.
# Each uses custom-html-tiny (deploys in seconds) or custom-html (fast nginx, has backup support).
# ---------------------------------------------------------------------------
# Shared bad-image branch: deploy fails at prepull because the image doesn't exist on Docker Hub.
# Used for install-RED (STAGES=install → chaos of HEAD with bad image → install=fail)
# and upgrade-RED (STAGES=install,upgrade → prev-version install passes, upgrade chaos fails).
_BAD_IMAGE_REF = "4ae8866100563204d40435c5aba00374aa5a8ed3" # regression-bad-image @ 2026-06-02
_BAD_INSTALL = {
"id": "bad-install",
"recipe": "custom-html-tiny",
"src": "recipe-maintainers/custom-html-tiny",
"ref": _BAD_IMAGE_REF,
"expected_green": False,
# STAGES=install only → no upgrade tier → prev=None → chaos deploy of HEAD (bad image) → fails.
"stages": "install",
# Assertions: install must be the failing tier.
"failing_tier": "install",
"passing_tiers_before": [],
"stage_pass_checks": [],
"stage_fail_checks": [],
}
_BAD_UPGRADE = {
"id": "bad-upgrade",
"recipe": "custom-html-tiny",
"src": "recipe-maintainers/custom-html-tiny",
"ref": _BAD_IMAGE_REF,
"expected_green": False,
# Default stages → prev-version deploy (good image) → install=PASS; upgrade chaos (bad image) → FAIL.
"stages": "install,upgrade,custom",
"failing_tier": "upgrade",
"passing_tiers_before": ["install"],
"stage_pass_checks": [],
"stage_fail_checks": [],
}
_BAD_BACKUP = {
"id": "bad-backup",
"recipe": "custom-html",
"src": "recipe-maintainers/custom-html",
# Pin: regression-bad-backup @ 2026-06-02 — backupbot.backup.path=/nonexistent-path-cc-ci-canary-bad
# `abra app backup create` fails → backup tier RED. install+upgrade still PASS.
"ref": "e1e3c5fc5e2bd414600b6d3a9f2266566415ff34",
"expected_green": False,
"stages": "install,upgrade,backup",
"failing_tier": "backup",
"passing_tiers_before": ["install"],
"stage_pass_checks": [],
"stage_fail_checks": [],
}
_BAD_RESTORE = {
"id": "bad-restore",
"recipe": "custom-html",
"src": "recipe-maintainers/custom-html",
# Pin: regression-bad-restore @ 2026-06-02 — backup captures /usr/share/nginx/html/.backup-data/
# (a subdir NOT containing ci-marker.txt). Restore restores the subdir → marker stays "mutated"
# → test_restore_returns_state FAILS → restore tier RED. install+upgrade+backup PASS.
"ref": "5a481cc1f6b2a46279b8e0eca09ca7cb4dc6f25d",
"expected_green": False,
"stages": "install,upgrade,backup,restore,custom",
"failing_tier": "restore",
"passing_tiers_before": ["install", "backup"],
"stage_pass_checks": [],
"stage_fail_checks": [
("restore", "test_restore_returns_state"),
],
}
CANARIES = [_SIMPLE, _SIGNIFICANT, _BAD]
CANARIES_FAST = [_BAD_INSTALL, _BAD_UPGRADE, _BAD_BACKUP, _BAD_RESTORE]
# ---------------------------------------------------------------------------
# Test
# Tests
# ---------------------------------------------------------------------------
@ -99,11 +175,13 @@ def test_canary(canary, tmp_path):
For the RED canary: proves the harness catches a broken app — if the harness wrongly returned
green, `assert rc != 0` fails, catching the false-green.
"""
stages = canary.get("stages", "install,upgrade,backup,restore,custom")
rc, results, artifact_dir = run_recipe_ci(
recipe=canary["recipe"],
src=canary["src"],
ref=canary["ref"],
runs_dir=str(tmp_path),
stages=stages,
)
_note = f"artifact_dir={artifact_dir}" # visible in -v output via assert messages
@ -114,6 +192,34 @@ def test_canary(canary, tmp_path):
_assert_red(rc, results, canary, _note)
@pytest.mark.canary
@pytest.mark.canary_fast
@pytest.mark.parametrize("canary", CANARIES_FAST, ids=[c["id"] for c in CANARIES_FAST])
def test_canary_fast(canary, tmp_path):
"""Fast per-tier RED canaries: each proves the server catches failure at a specific lifecycle tier.
Each canary is broken at exactly one tier; the test asserts:
- Overall verdict: RED (rc != 0)
- The intended failing tier has status "fail"
- Tiers BEFORE the intended failure have status "pass" (proving tier-specific detection, not
"fails somewhere")
These use fast recipes (custom-html-tiny deploys in seconds, custom-html is similarly fast)
and are intended as a pre-merge quick check alongside the full slow suite.
"""
stages = canary.get("stages", "install,upgrade,backup,restore,custom")
rc, results, artifact_dir = run_recipe_ci(
recipe=canary["recipe"],
src=canary["src"],
ref=canary["ref"],
runs_dir=str(tmp_path),
stages=stages,
)
_note = f"artifact_dir={artifact_dir}"
_assert_red_at_tier(rc, results, canary, _note)
def _assert_green(rc: int, results: dict | None, canary: dict, note: str) -> None:
"""Assert a good-canary run is GREEN with real semantic assertions."""
@ -182,6 +288,52 @@ def _assert_red(rc: int, results: dict | None, canary: dict, note: str) -> None:
)
def _assert_red_at_tier(rc: int, results: dict | None, canary: dict, note: str) -> None:
"""Assert a per-tier RED canary: overall RED, failing_tier=fail, passing_tiers_before=pass.
Proves the server catches failure AT THE INTENDED TIER (not just "fails somewhere"), and that
the tiers before it still PASSED (no collateral damage from the fixture).
If the harness returns 0 for any of these fixtures, false-green is detected at the primary assert.
"""
failing_tier = canary.get("failing_tier")
passing_before = canary.get("passing_tiers_before", [])
# PRIMARY: harness must return non-zero.
assert rc != 0, (
f"[{canary['id']}] harness returned rc=0 (GREEN) for a KNOWN-BAD fixture at tier "
f"{failing_tier!r} — FALSE-GREEN. {note}"
)
if results is None:
return
tier_results = results.get("results", {})
# The intended failing tier must be "fail".
if failing_tier:
actual = tier_results.get(failing_tier)
assert actual == "fail", (
f"[{canary['id']}] expected tier {failing_tier!r}='fail', got {actual!r}. "
f"All tier results: {tier_results}. {note}"
)
# Tiers before the failing tier must have passed (no collateral damage from the fixture).
for tier in passing_before:
actual = tier_results.get(tier)
assert actual == "pass", (
f"[{canary['id']}] expected prior tier {tier!r}='pass' before failing at "
f"{failing_tier!r}, got {actual!r}. All results: {tier_results}. {note}"
)
# Optional: specific failing test name (for the restore-RED canary).
for stage_name, test_name_substr in canary.get("stage_fail_checks", []):
assert stage_has_failing_test(results, stage_name, test_name_substr), (
f"[{canary['id']}] expected a failing test containing {test_name_substr!r} in "
f"stage={stage_name!r}. "
f"Stage tests: {[t['name'] for t in _stage_tests(results, stage_name)]}. {note}"
)
def _stage_tests(results: dict, stage_name: str) -> list[dict]:
for stage in results.get("stages", []):
if stage.get("name") == stage_name: