Compare commits
30 Commits
regression
...
feat/expec
| Author | SHA1 | Date | |
|---|---|---|---|
| 46e2cdb93e | |||
| 3980340727 | |||
| d20ad1e989 | |||
| b3ab68a9dd | |||
| d733e2c4ca | |||
| f3a1ad5388 | |||
| 3b0a3d14ea | |||
| f5a6f7196f | |||
| a78ec2de12 | |||
| ef65d898ed | |||
| 0dea3410ee | |||
| 117028ff0a | |||
| c90cf1e1d0 | |||
| 49a56e873e | |||
| f2fa38df6f | |||
| 31b71f9949 | |||
| 9449b22f24 | |||
| 74364d0a46 | |||
| c7ede9cfbb | |||
| 3b7267cbee | |||
| 090724ec80 | |||
| 3859cd7f40 | |||
| cf405b4195 | |||
| 3dd06ef0ce | |||
| b268a14cad | |||
| a2a6eea757 | |||
| 464760ebb7 | |||
| fd3db37c49 | |||
| 91a7088f56 | |||
| f202c5aa7f |
@ -287,15 +287,11 @@ def process_testme(full_name, owner, name, number, user, comment_id, source, qui
|
|||||||
run_url = f"{DRONE_URL}/{CI_REPO}/{num}"
|
run_url = f"{DRONE_URL}/{CI_REPO}/{num}"
|
||||||
post_commit_status(owner, name, head["sha"], "pending", run_url, "cc-ci run in progress")
|
post_commit_status(owner, name, head["sha"], "pending", run_url, "cc-ci run in progress")
|
||||||
mode = " **(--quick: lower-confidence fast lane; does not gate merge)**" if quick else ""
|
mode = " **(--quick: lower-confidence fast lane; does not gate merge)**" if quick else ""
|
||||||
# R2/U3: one comment per PR, updated in place. Reuse the existing marked comment if present
|
# One NEW comment PER `!testme` (operator preference 2026-06-02): post a fresh ⏳ placeholder each
|
||||||
# (re-`!testme` refreshes it back to the ⏳ placeholder), else post a new one.
|
# run so every re-`!testme` is visible in the PR timeline; watch_and_reflect then edits THIS
|
||||||
|
# comment to its result. (Previously a single marked comment was reused/edited in place.)
|
||||||
start_body = start_comment_body(name, head["sha"], run_url, mode)
|
start_body = start_comment_body(name, head["sha"], run_url, mode)
|
||||||
existing = find_existing_comment(full_name, number)
|
cid = post_comment(owner, name, number, start_body)
|
||||||
if existing:
|
|
||||||
edit_comment(owner, name, existing, start_body)
|
|
||||||
cid = existing
|
|
||||||
else:
|
|
||||||
cid = post_comment(owner, name, number, start_body)
|
|
||||||
log(
|
log(
|
||||||
f"[{source}] triggered build {num} for {name}@{head['sha'][:8]} "
|
f"[{source}] triggered build {num} for {name}@{head['sha'][:8]} "
|
||||||
f"(PR #{number}, comment {comment_id}) by {user}"
|
f"(PR #{number}, comment {comment_id}) by {user}"
|
||||||
|
|||||||
131
machine-docs/BACKLOG-regression.md
Normal file
131
machine-docs/BACKLOG-regression.md
Normal file
@ -0,0 +1,131 @@
|
|||||||
|
# BACKLOG — server regression canaries phase
|
||||||
|
|
||||||
|
## Build backlog
|
||||||
|
|
||||||
|
- [x] Create `tests/regression/` suite (conftest + test_canaries + README)
|
||||||
|
- [ ] Run `good-simple` canary (custom-html-tiny main) → confirm GREEN + test_serving passes
|
||||||
|
- [ ] Run `bad-false-green` canary (custom-html v5-stale-docroot) → confirm RED + test_content_type fails
|
||||||
|
- [ ] Run `good-significant` canary (lasuite-docs main) → confirm GREEN + test_serving_and_frontend passes
|
||||||
|
- [ ] Open PR for operator review (DoD item 5: NOT merged)
|
||||||
|
- [ ] Claim gate once all canary runs are GREEN/RED as expected + PR is open
|
||||||
|
|
||||||
|
## Adversary findings
|
||||||
|
|
||||||
|
### A-reg-1 [adversary] CLOSED @2026-06-02T01:46Z — relative import fixed, 3 tests collect
|
||||||
|
**Filed:** 2026-06-02T01:37Z
|
||||||
|
**Severity:** CRITICAL — suite can't run at all until fixed
|
||||||
|
|
||||||
|
Cold-run `cc-ci-run -m pytest tests/regression/ --collect-only` on cc-ci confirms:
|
||||||
|
```
|
||||||
|
ImportError: attempted relative import with no known parent package
|
||||||
|
tests/regression/test_canaries.py:18: from .conftest import run_recipe_ci, ...
|
||||||
|
```
|
||||||
|
No tests collected. 0 canaries can run.
|
||||||
|
|
||||||
|
**Root cause:** `test_canaries.py` uses a relative import (`from .conftest import ...`) which
|
||||||
|
requires the directory to be a Python package. Without `tests/regression/__init__.py` (and
|
||||||
|
`tests/__init__.py`), pytest imports `test_canaries.py` as a top-level module, not a package
|
||||||
|
member. Relative imports fail.
|
||||||
|
|
||||||
|
**Repro:**
|
||||||
|
```bash
|
||||||
|
ssh cc-ci
|
||||||
|
cd /root/builder-clone
|
||||||
|
cc-ci-run -m pytest tests/regression/ --collect-only
|
||||||
|
# → ImportError: attempted relative import with no known parent package
|
||||||
|
```
|
||||||
|
|
||||||
|
**Fix (either approach):**
|
||||||
|
1. Add `tests/__init__.py` and `tests/regression/__init__.py` (makes it a real package)
|
||||||
|
2. OR replace `from .conftest import ...` with absolute sys.path manipulation (like other test
|
||||||
|
files do, e.g. `sys.path.insert(0, ...); import conftest`)
|
||||||
|
|
||||||
|
**Adversary closes:** after re-running `--collect-only` confirms 3+ tests collected, no error.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### A-reg-3 [adversary] CLOSED @2026-06-02T02:20Z — fixtures fixed; cold-verified correct tier failures
|
||||||
|
|
||||||
|
**Resolved:** Builder created separate recipes (`custom-html-bkp-bad`, `custom-html-rst-bad`) with
|
||||||
|
correct fixture structure. Cold-verified from cc-ci artifact dirs (no harness re-run needed).
|
||||||
|
|
||||||
|
**Evidence:**
|
||||||
|
- bad-backup-5 (`b6fe99de`, custom-html-bkp-bad): `install=pass, backup=fail` ✓
|
||||||
|
- `test_backup_artifact: pass` (snapshot IS produced)
|
||||||
|
- `test_backup_captures_state: fail` ("MISSING" not "original") ✓ — backup=RED
|
||||||
|
- bad-restore-3 (`9a73a184e739`, custom-html-rst-bad): `install=pass, backup=pass, restore=fail` ✓
|
||||||
|
- `test_restore_returns_state: fail` ("mutated" not "original") ✓ — restore=RED
|
||||||
|
|
||||||
|
### A-reg-3 [adversary] OPEN — CRITICAL: bad-backup and bad-restore fixtures broken (empty compose.yml)
|
||||||
|
**Filed:** 2026-06-02T01:58Z
|
||||||
|
**Severity:** CRITICAL — both fixtures fail at upgrade instead of their intended tier
|
||||||
|
|
||||||
|
Cold-verified by inspecting `regression-bad-backup` and `regression-bad-restore` branches:
|
||||||
|
```bash
|
||||||
|
ssh cc-ci 'cd /root/.abra/recipes/custom-html && git diff origin/main..origin/regression-bad-backup -- compose.yml'
|
||||||
|
```
|
||||||
|
Result: compose.yml is completely empty (entire file deleted, leaving only a blank line). Same
|
||||||
|
for `regression-bad-restore`.
|
||||||
|
|
||||||
|
**Evidence from run artifacts:**
|
||||||
|
- `regression-bad-backup-1`: `results: install=pass, upgrade=fail, backup=skip`
|
||||||
|
- Expected: `install=pass, upgrade=pass, backup=fail`
|
||||||
|
- Actual: upgrade fails because chaos deploy deploys empty compose → no service → deploy error
|
||||||
|
- `regression-bad-restore-*`: never ran to completion (same root cause blocks it)
|
||||||
|
|
||||||
|
**Impact on regression test assertions:**
|
||||||
|
`_assert_red_at_tier` for bad-backup:
|
||||||
|
- `failing_tier="backup"` → checks `results["backup"]="skip"` → FAIL: "expected 'backup'='fail', got 'skip'"
|
||||||
|
- Test would FAIL with confusing assertion, not passing as expected
|
||||||
|
|
||||||
|
**Fix:** Recreate both fixture branches with correct compose.yml that:
|
||||||
|
- bad-backup: keeps full valid nginx service, only changes `backupbot.backup.path` label to `/nonexistent-cc-ci-canary-bad`
|
||||||
|
- bad-restore: keeps full valid nginx service, changes backup scope to capture a subdir that doesn't contain ci-marker.txt (so restore doesn't recover the marker)
|
||||||
|
|
||||||
|
The compose.yml should be identical to main EXCEPT for the single label/config change.
|
||||||
|
|
||||||
|
**Repro:** `git diff origin/main..origin/regression-bad-backup -- compose.yml` → empty file
|
||||||
|
|
||||||
|
**Adversary closes:** after both fixtures are recreated correctly, runs confirm:
|
||||||
|
- bad-backup: `install=pass, upgrade=pass, backup=fail`
|
||||||
|
- bad-restore: `install=pass, upgrade=pass, backup=pass, restore=fail` with `test_restore_returns_state` FAIL
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### A-reg-2 [adversary] CLOSED @2026-06-02T02:20Z — 4 per-tier RED canaries cold-verified
|
||||||
|
|
||||||
|
**Resolved:** All 4 per-tier RED canaries added, artifacts cold-verified on cc-ci.
|
||||||
|
|
||||||
|
| Canary | Run artifact | failing_tier | passing_before | verdict |
|
||||||
|
|--------|-------------|-------------|---------------|---------|
|
||||||
|
| bad-install | regression-bad-install-v2 | install=fail ✓ | [] | CORRECT ✓ |
|
||||||
|
| bad-upgrade | regression-bad-upgrade-v2 | upgrade=fail ✓ | install=pass ✓ | CORRECT ✓ |
|
||||||
|
| bad-backup | regression-bad-backup-5 | backup=fail ✓ | install=pass ✓ | CORRECT ✓ |
|
||||||
|
| bad-restore | regression-bad-restore-3 | restore=fail ✓ | install=pass, backup=pass ✓ | CORRECT ✓ |
|
||||||
|
|
||||||
|
`@pytest.mark.canary_fast` marker added ✓. 7 tests collect ✓.
|
||||||
|
|
||||||
|
**Note:** bad-backup comment in test_canaries.py says "test_backup_artifact fails" but actual
|
||||||
|
behavior is test_backup_artifact PASSES and test_backup_captures_state FAILS. Functional result
|
||||||
|
(backup=fail) is correct; comment is misleading but non-blocking.
|
||||||
|
|
||||||
|
### A-reg-2 [adversary] OPEN — Plan gap: 4 per-tier RED canaries required by updated DoD
|
||||||
|
**Filed:** 2026-06-02T01:37Z
|
||||||
|
**Severity:** HIGH — DoD#4 unmet; Builder cannot claim DONE without these
|
||||||
|
|
||||||
|
Updated plan (commit 7bdeb74) added DoD#4: four per-tier RED canaries (install/upgrade/backup/
|
||||||
|
restore on `custom-html-tiny`) that prove the server reports RED at EACH tier. Each must:
|
||||||
|
- Assert overall verdict RED at the intended tier
|
||||||
|
- Assert prior tiers PASSED
|
||||||
|
- Have teeth: wrongly-green tier would FAIL the test
|
||||||
|
|
||||||
|
Current suite only has 3 canaries (good-simple, good-significant, bad-false-green). The 4
|
||||||
|
per-tier RED canaries are MISSING. This is a mandatory DoD item.
|
||||||
|
|
||||||
|
These also require:
|
||||||
|
- Fixture branches or SHA-pinned commits where custom-html-tiny is broken at exactly one tier
|
||||||
|
- A `@pytest.mark.canary_fast` sub-marker (plan recommends it for the fast RED subset)
|
||||||
|
- README update to document the fast subset
|
||||||
|
|
||||||
|
**Adversary closes:** after all 4 canaries exist, run, and the Adversary cold-verifies each
|
||||||
|
produces RED at the intended tier with prior tiers PASS.
|
||||||
76
machine-docs/JOURNAL-regression.md
Normal file
76
machine-docs/JOURNAL-regression.md
Normal file
@ -0,0 +1,76 @@
|
|||||||
|
# JOURNAL — server regression canaries phase (Builder)
|
||||||
|
|
||||||
|
**Phase:** server regression canaries
|
||||||
|
**Started:** 2026-06-02
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Step 0 — phase kickoff and design (2026-06-02)
|
||||||
|
|
||||||
|
**Context:** Mirror phase (plan-mirror-enroll-all-recipes.md) completed DONE at 2026-06-02T01:16Z.
|
||||||
|
Adversary initialized regression phase files in machine-docs/ at commit f202c5a.
|
||||||
|
|
||||||
|
**Decision: run regression tests ON cc-ci, not from the orchestrator**
|
||||||
|
|
||||||
|
The regression tests call `run_recipe_ci.py` which uses abra/docker/swarm — these only exist on
|
||||||
|
cc-ci. The test process runs under `cc-ci-run python -m pytest`, which sets up the right PATH
|
||||||
|
(abra, python3, playwright, etc.). The test then invokes `run_recipe_ci.py` as a subprocess using
|
||||||
|
`sys.executable` (inherits the same python3 from cc-ci-run).
|
||||||
|
|
||||||
|
The README.md documents the `ssh cc-ci "cc-ci-run python -m pytest tests/regression/ -m canary"`
|
||||||
|
invocation pattern.
|
||||||
|
|
||||||
|
**Canary selection:**
|
||||||
|
|
||||||
|
| ID | Recipe | SHA | Rationale |
|
||||||
|
|----|--------|-----|-----------|
|
||||||
|
| good-simple | custom-html-tiny | 435df8fc (main) | Fast, few deps, quick signal |
|
||||||
|
| good-significant | lasuite-docs | 290a8ad7 (main) | Multi-service, exercises real breadth |
|
||||||
|
| bad-false-green | custom-html | 71e7326a (v5-stale-docroot) | Already produced RED build #75; pinned fixture |
|
||||||
|
|
||||||
|
SHAs confirmed from Gitea API on 2026-06-02.
|
||||||
|
|
||||||
|
**Semantic checks ("teeth") design:**
|
||||||
|
|
||||||
|
The regression tests assert BOTH exit code AND named tests in results.json stages. This guards
|
||||||
|
against two failure modes:
|
||||||
|
1. Harness returns wrong exit code (false-green / false-red) → rc assertion catches it
|
||||||
|
2. A specific assertion is silently removed/vacuated → named test disappears from stages → semantic check catches it
|
||||||
|
|
||||||
|
For custom-html-tiny: `test_serving` (generic install) must appear passing
|
||||||
|
For lasuite-docs: `test_serving_and_frontend` (install overlay) must appear passing
|
||||||
|
For bad canary: `test_content_type` (custom functional) must appear failing
|
||||||
|
|
||||||
|
**File layout:**
|
||||||
|
- `tests/regression/conftest.py` — run_recipe_ci(), stage_has_passing_test(), stage_has_failing_test()
|
||||||
|
- `tests/regression/test_canaries.py` — parametrized @pytest.mark.canary test
|
||||||
|
- `tests/regression/README.md` — cadence policy + how to run + how to add
|
||||||
|
|
||||||
|
**Next step:** commit + push, then run good-simple and bad-false-green canaries to get real output.
|
||||||
|
lasuite-docs is slow (10-20 min) so will run it last.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Step 1 — initial canary runs (2026-06-02 ~01:28-01:40Z)
|
||||||
|
|
||||||
|
### bad-false-green run (regression-bad-canary-1)
|
||||||
|
Command: `RECIPE=custom-html REF=71e7326a... SRC=recipe-maintainers/custom-html cc-ci-run runner/run_recipe_ci.py`
|
||||||
|
Result: RC=1, custom=FAIL
|
||||||
|
Key output:
|
||||||
|
- `test_content_type_html_and_txt` FAILED: `ccci-89273b0b.txt Content-Type='application/octet-stream'`, expected `text/plain`
|
||||||
|
- All other tiers (install/upgrade/backup/restore): PASS
|
||||||
|
- `flags: {clean_teardown: True, no_secret_leak: True}`
|
||||||
|
- Confirms: regression test `assert rc != 0` will PASS ✓
|
||||||
|
- Confirms: `stage_has_failing_test(results, "custom", "test_content_type")` will return True ✓
|
||||||
|
|
||||||
|
### good-simple run (regression-good-simple-1)
|
||||||
|
Command: `RECIPE=custom-html-tiny REF=435df8fc... SRC=recipe-maintainers/custom-html-tiny cc-ci-run runner/run_recipe_ci.py`
|
||||||
|
Result: RC=0, install=pass, upgrade=pass, backup/restore/custom=skip
|
||||||
|
Key output:
|
||||||
|
- `test_serving` in install stage: PASSED ✓
|
||||||
|
- `flags: {clean_teardown: True, no_secret_leak: True}` ✓
|
||||||
|
- Confirms: all regression assertions for good-simple will PASS ✓
|
||||||
|
|
||||||
|
### good-significant run (regression-good-significant-1) [IN PROGRESS]
|
||||||
|
Started ~01:35Z. Multi-service stack (lasuite-docs + keycloak dep). Image pull in progress.
|
||||||
|
Expected: GREEN (install/upgrade pass, keycloak dep provisioned, SSO tests run).
|
||||||
238
machine-docs/REVIEW-regression.md
Normal file
238
machine-docs/REVIEW-regression.md
Normal file
@ -0,0 +1,238 @@
|
|||||||
|
# REVIEW — server regression canaries phase (Adversary ledger)
|
||||||
|
|
||||||
|
**Phase:** server regression canaries (codified E2E self-tests)
|
||||||
|
**SSOT:** `/srv/cc-ci/cc-ci-plan/plan-server-regression-canaries.md`
|
||||||
|
**Adversary loop started:** 2026-06-02T01:15Z
|
||||||
|
**Repo:** git.autonomic.zone/recipe-maintainers/cc-ci
|
||||||
|
**Adversary clone:** /srv/cc-ci/cc-ci-adv
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## D-gate verdicts
|
||||||
|
|
||||||
|
### D-final: PASS @2026-06-02T03:36Z — all 7 canaries cold-verified; PR#5 open; all DoD items met
|
||||||
|
|
||||||
|
**Cold verification result: PASS**
|
||||||
|
|
||||||
|
All DoD items independently verified (cold shell, Adversary clone, no cached state):
|
||||||
|
|
||||||
|
**DoD#1 — tests/regression/ committed:**
|
||||||
|
- `cc-ci-run -m pytest tests/regression/ --collect-only -q` on cc-ci from PR branch: 7 tests collected ✓
|
||||||
|
- Files present on `regression-canaries` branch: `conftest.py`, `test_canaries.py`, `README.md`, plus `tests/custom-html-bkp-bad/` and `tests/custom-html-rst-bad/` ✓
|
||||||
|
|
||||||
|
**DoD#2 — both good canaries GREEN with semantic assertion teeth:**
|
||||||
|
- `good-simple` (regression-good-simple-1, SHA `435df8fc`): `install=pass, upgrade=pass`, `test_serving` PASS in install stage ✓
|
||||||
|
- Teeth: if `test_serving` removed → `stage_has_passing_test("install","test_serving")` → False → assert fires ✓
|
||||||
|
- `good-significant` (regression-good-significant-2, SHA `290a8ad7`): `install=pass, upgrade=pass, backup=pass, restore=pass, custom=pass`, `clean_teardown=true`, `no_secret_leak=true` ✓
|
||||||
|
- `test_serving_and_frontend` PASS in install stage ✓
|
||||||
|
- Teeth: if `test_serving_and_frontend` removed → `stage_has_passing_test("install","test_serving_and_frontend")` → False → assert fires ✓
|
||||||
|
- Run 1 had upgrade=fail (convergence race, transient); run 2 fully GREEN. Known plan risk; no action needed unless persistent.
|
||||||
|
|
||||||
|
**DoD#3 — bad-false-green catches false-green:**
|
||||||
|
- `bad-false-green` (regression-bad-canary-1, SHA `71e7326a`): `custom=fail`, `test_content_type_html_and_txt: FAIL` (Content-Type='application/octet-stream') ✓
|
||||||
|
- Teeth: if harness returns rc=0 → `assert rc != 0` fires → false-green caught ✓
|
||||||
|
|
||||||
|
**DoD#4 — 4 per-tier RED canaries (cold-verified from artifacts):**
|
||||||
|
- `bad-install` (regression-bad-install-v2, SHA `4ae8866`): `install=fail, upgrade=na` ✓ — failing_tier=install, passing_before=[] ✓
|
||||||
|
- `bad-upgrade` (regression-bad-upgrade-v2, SHA `4ae8866`): `install=pass, upgrade=fail` ✓ — prior tier PASS verified ✓
|
||||||
|
- `bad-backup` (regression-bad-backup-5, SHA `b6fe99de`, recipe `custom-html-bkp-bad`): `install=pass, backup=fail` ✓ — `test_backup_captures_state` FAIL ✓
|
||||||
|
- `bad-restore` (regression-bad-restore-3, SHA `9a73a184`, recipe `custom-html-rst-bad`): `install=pass, backup=pass, restore=fail` ✓ — `test_restore_returns_state` FAIL ✓
|
||||||
|
- All 4: if harness wrongly returned rc=0 → `assert rc != 0` fires ✓; if wrong tier failed → tier check assertion fires ✓
|
||||||
|
|
||||||
|
**DoD#5 — README.md:**
|
||||||
|
- `tests/regression/README.md` present on regression-canaries branch ✓
|
||||||
|
- Contains: cadence policy ("Do NOT run on every commit"), canary table, per-tier teeth explanation, how to add a canary ✓
|
||||||
|
|
||||||
|
**DoD#6 — NOT merged, PR opened for operator review:**
|
||||||
|
- PR#5: `https://git.autonomic.zone/recipe-maintainers/cc-ci/pulls/5` — state=open, merged=False ✓
|
||||||
|
- Branch: `regression-canaries` → `main`. 10 files, 704 insertions ✓
|
||||||
|
- PR body says "Do not merge — loops never merge" ✓
|
||||||
|
|
||||||
|
**Observations (non-blocking, not DoD blockers):**
|
||||||
|
- good-significant run 1's upgrade=fail was a convergence race; transient (run 2 passed without retry). No test weakening, no retry added — consistent with plan policy.
|
||||||
|
- Semantic stage_pass_checks only explicitly guard install tier for good-significant. Upgrade/backup/restore tooth coverage is via `_assert_green`'s "no tier failed" check. Limitation noted; acceptable per plan DoD requirements.
|
||||||
|
- A-reg-2 comment in test_canaries.py says "test_backup_artifact fails" for bad-backup; actual behavior is test_backup_artifact passes and test_backup_captures_state fails. Misleading comment, non-blocking.
|
||||||
|
|
||||||
|
**Verdict: D-final PASS.** All 7 canaries verified. All 6 DoD items met. Phase is complete pending operator review of PR#5. No vetoes.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### D-initial update @2026-06-02T01:46Z — A-reg-1 CLOSED; A-reg-2 still open
|
||||||
|
|
||||||
|
**A-reg-1 RESOLVED.** Cold-verify after fix:
|
||||||
|
```
|
||||||
|
ssh cc-ci && cd /root/builder-clone && git pull --rebase
|
||||||
|
cc-ci-run -m pytest tests/regression/ --collect-only
|
||||||
|
```
|
||||||
|
Output: `collected 3 items` — `test_canary[good-simple]`, `test_canary[good-significant]`, `test_canary[bad-false-green]`. No errors.
|
||||||
|
|
||||||
|
**Canary artifacts cold-verified from cc-ci artifact dirs:**
|
||||||
|
|
||||||
|
`good-simple (custom-html-tiny)` — `/var/lib/cc-ci-runs/regression-good-simple-1/results.json`:
|
||||||
|
- `results: install=pass, upgrade=pass, backup=skip, restore=skip, custom=skip` ✓
|
||||||
|
- `flags: clean_teardown=true, no_secret_leak=true` ✓
|
||||||
|
- `install/test_serving`: PASS ✓ (stage_has_passing_test confirms teeth present)
|
||||||
|
|
||||||
|
`bad-false-green (custom-html v5-stale-docroot)` — `/var/lib/cc-ci-runs/regression-bad-canary-1/results.json`:
|
||||||
|
- `results: install=pass, upgrade=pass, backup=pass, restore=pass, custom=FAIL` ✓
|
||||||
|
- `flags: clean_teardown=true, no_secret_leak=true` ✓
|
||||||
|
- `custom/test_content_type_html_and_txt`: FAIL with `Content-Type='application/octet-stream'` ✓
|
||||||
|
- `rc` would be non-zero (any(v=="fail")) ✓ → regression test `assert rc != 0` PASSES
|
||||||
|
|
||||||
|
`good-significant (lasuite-docs)` — upgrade FAILED in Builder's run:
|
||||||
|
- `results: install=PASS, upgrade=FAIL` — `test_upgrade_reconverges` → convergence race
|
||||||
|
- This is the known WOPI/upgrade convergence risk from the plan (§ Risks). Builder is re-running.
|
||||||
|
- OBSERVATION (non-blocking now): if consistently flaky, add bounded retries to readiness probe per
|
||||||
|
plan policy ("bounded retries on readiness only, never on correctness assertion"). Will watch.
|
||||||
|
|
||||||
|
**A-reg-2 partially addressed** — 4 per-tier RED canary tests added to suite, 7 tests collect.
|
||||||
|
But bad-backup and bad-restore FIXTURES are broken (see A-reg-3). A-reg-2 cannot close until
|
||||||
|
all 4 canaries actually produce the expected results.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### D-initial-2 update @2026-06-02T02:00Z — A-reg-3 filed; bad-backup/bad-restore fixtures broken
|
||||||
|
|
||||||
|
4 per-tier RED canary tests now in suite (7 tests collect via cold --collect-only). SHAs verified:
|
||||||
|
- `4ae8866100563204` (custom-html-tiny, bad image) ✓ — bad-install + bad-upgrade fixture
|
||||||
|
- `e1e3c5fc5e2bd414` (custom-html, bad-backup) — SHA exists BUT compose.yml is empty (A-reg-3)
|
||||||
|
- `5a481cc1f6b2a462` (custom-html, bad-restore) — SHA exists BUT compose.yml is empty (A-reg-3)
|
||||||
|
|
||||||
|
**Cold-verified canary run results:**
|
||||||
|
|
||||||
|
bad-install (regression-bad-install-v2): `install=fail, upgrade=na` ✓ — install tier fails as intended
|
||||||
|
bad-upgrade (regression-bad-upgrade-v2): `install=pass, upgrade=fail, custom=skip` ✓ — upgrade tier fails as intended
|
||||||
|
bad-backup (regression-bad-backup-1): `install=pass, upgrade=fail, backup=skip` ✗ — WRONG TIER
|
||||||
|
|
||||||
|
Root cause A-reg-3: `regression-bad-backup` branch has empty compose.yml (whole file deleted, not
|
||||||
|
just backup path changed). Empty compose → chaos upgrade deploy fails → upgrade=fail, backup never
|
||||||
|
runs. Same issue for `regression-bad-restore` (same empty compose.yml diff).
|
||||||
|
|
||||||
|
**`_assert_red_at_tier` for bad-backup would FAIL** with `expected 'backup'='fail', got 'skip'` —
|
||||||
|
proving the fixture is broken, not the test.
|
||||||
|
|
||||||
|
**What still needs fixing before final gate:**
|
||||||
|
1. ~~A-reg-3~~ CLOSED — fixtures fixed and cold-verified ✓
|
||||||
|
2. ~~A-reg-2~~ CLOSED — all 4 per-tier RED canaries present and verified ✓
|
||||||
|
3. **good-significant**: still needs successful re-run (upgrade flakiness unresolved)
|
||||||
|
4. **Open PR** (DoD#6): not yet opened
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Comprehensive canary verification @2026-06-02T02:20Z
|
||||||
|
|
||||||
|
All 6 of 7 canaries cold-verified from cc-ci artifact dirs (fresh SSH shell, no cached state):
|
||||||
|
|
||||||
|
**GREEN canaries:**
|
||||||
|
- `good-simple` (regression-good-simple-1, SHA `435df8fc`): `install=pass, upgrade=pass, backup/restore/custom=skip`, `clean_teardown=true`, `no_secret_leak=true`, `test_serving: pass` ✓
|
||||||
|
- `good-significant` (regression-good-significant-1, SHA `290a8ad7`): PENDING — upgrade FAIL (convergence race). Needs re-run to confirm transient.
|
||||||
|
|
||||||
|
**Custom-assertion RED canary:**
|
||||||
|
- `bad-false-green` (regression-bad-canary-1, SHA `71e7326a`): `install/upgrade/backup/restore=pass, custom=fail`, `test_content_type_html_and_txt: FAIL` (Content-Type='application/octet-stream') ✓
|
||||||
|
|
||||||
|
**Per-tier RED canaries (all cold-verified from artifact dirs):**
|
||||||
|
- `bad-install` (regression-bad-install-v2, SHA `4ae8866`): `install=fail, upgrade=na` ✓ — failing_tier=install, no prior tier checked
|
||||||
|
- `bad-upgrade` (regression-bad-upgrade-v2, SHA `4ae8866`): `install=pass, upgrade=fail` ✓ — install=pass before failing
|
||||||
|
- `bad-backup` (regression-bad-backup-5, SHA `b6fe99de`, recipe `custom-html-bkp-bad`): `install=pass, backup=fail` ✓ — test_backup_captures_state FAIL
|
||||||
|
- `bad-restore` (regression-bad-restore-3, SHA `9a73a184`, recipe `custom-html-rst-bad`): `install=pass, backup=pass, restore=fail` ✓ — test_restore_returns_state FAIL
|
||||||
|
|
||||||
|
**Teeth verification:**
|
||||||
|
- good-simple: if test_serving removed → stage_has_passing_test("install","test_serving") returns False → regression test FAILS ✓
|
||||||
|
- bad-false-green: if harness returns rc=0 → assert rc!=0 FAILS → false-green caught ✓
|
||||||
|
- bad-install: if harness returns rc=0 for bad image → assert rc!=0 FAILS ✓
|
||||||
|
- bad-upgrade: if upgrade wrongly passes → tier_results["upgrade"]="pass"≠"fail" → assert FAILS ✓
|
||||||
|
- bad-backup: if backup wrongly passes → rc=0 → assert rc!=0 FAILS ✓
|
||||||
|
- bad-restore: if restore wrongly passes → tier_results["restore"]!="fail" → assert FAILS ✓; if backup wrongly fails → tier_results["backup"]!="pass" → assert FAILS ✓
|
||||||
|
|
||||||
|
**DoD status:**
|
||||||
|
- DoD#1 (tests/regression/ committed): ✓
|
||||||
|
- DoD#2 (good canaries GREEN with semantic assertions): good-simple ✓; good-significant PENDING re-run
|
||||||
|
- DoD#3 (bad-false-green catches false-green): ✓ verified
|
||||||
|
- DoD#4 (4 per-tier RED canaries): ✓ all 4 verified
|
||||||
|
- DoD#5 (README.md): ✓ present with cadence, canaries, how to add
|
||||||
|
- DoD#6 (PR open for operator review): NOT YET
|
||||||
|
|
||||||
|
**Remaining blockers before final PASS:**
|
||||||
|
1. good-significant must pass (or flakiness addressed with bounded retries on readiness)
|
||||||
|
2. PR must be opened (DoD#6)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### D-initial: FAIL @2026-06-02T01:38Z — suite won't collect (A-reg-1); plan gap (A-reg-2)
|
||||||
|
|
||||||
|
Builder claimed: test suite written, initial gate; canaries in-flight.
|
||||||
|
|
||||||
|
**Cold verification result: FAIL — two blocking issues.**
|
||||||
|
|
||||||
|
**A-reg-1 (CRITICAL): Relative import fails, 0 tests collected.**
|
||||||
|
```
|
||||||
|
ssh cc-ci && cd /root/builder-clone
|
||||||
|
cc-ci-run -m pytest tests/regression/ --collect-only
|
||||||
|
```
|
||||||
|
Output (cold, fresh shell):
|
||||||
|
```
|
||||||
|
collected 0 items / 1 error
|
||||||
|
ImportError: attempted relative import with no known parent package
|
||||||
|
tests/regression/test_canaries.py:18: from .conftest import run_recipe_ci, ...
|
||||||
|
!!!!!!!!!!!!!!!!! Interrupted: 1 error during collection !!!!!!!!!!!!!!!!!!!!!
|
||||||
|
```
|
||||||
|
Root cause: `tests/regression/__init__.py` and `tests/__init__.py` missing. Fix: add them or
|
||||||
|
use absolute imports (as other test files in this repo do).
|
||||||
|
|
||||||
|
**A-reg-2 (HIGH): Plan updated (commit 7bdeb74) — 4 per-tier RED canaries now mandatory (DoD#4).**
|
||||||
|
Updated plan requires RED canaries for install/upgrade/backup/restore tiers on custom-html-tiny,
|
||||||
|
each asserting RED at the intended tier with prior tiers PASS. Current suite: 3 canaries only
|
||||||
|
(2 good + 1 bad-custom-assertion). All four are MISSING. Cannot claim DONE without them.
|
||||||
|
|
||||||
|
**Other code quality observations (not blocking):**
|
||||||
|
- Canary SHAs all verified present on Gitea ✓
|
||||||
|
- custom-html-tiny: `435df8fc98ef7598` ✓ (main 2026-06-02 merge commit)
|
||||||
|
- lasuite-docs: `290a8ad72d06232f` ✓ (v0.3.3+v5.1.0 merge)
|
||||||
|
- custom-html v5-stale-docroot: `71e7326a99bbb690` ✓ (confirmed RED via build #81)
|
||||||
|
- `CCCI_RUN_ID` and `CCCI_RUNS_DIR` correctly picked up by `results.py` ✓
|
||||||
|
- `_assert_red` / `_assert_green` logic sound ✓
|
||||||
|
- README cadence policy complete ✓
|
||||||
|
|
||||||
|
**Verdict: FAIL. Standing issues: A-reg-1 (critical), A-reg-2 (high). Builder must fix both
|
||||||
|
before re-claiming this gate.**
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Adversary findings
|
||||||
|
|
||||||
|
*(See BACKLOG-regression.md § Adversary findings: A-reg-1, A-reg-2)*
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Break-it probes log
|
||||||
|
|
||||||
|
*(Break-it probes will be recorded here as they are run)*
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Pre-orientation findings @01:17Z
|
||||||
|
|
||||||
|
**Known-bad fixture confirmed present and working:**
|
||||||
|
- Branch: `recipe-maintainers/custom-html:v5-stale-docroot` (SHA `71e7326a99bb`)
|
||||||
|
- Build #81 (run 3h ago): confirmed RED — `custom` stage FAIL; specifically:
|
||||||
|
- `test_content_type_html_and_txt`: FAIL — `ccci-e0d6e804.txt Content-Type='application/octet-stream'`, expected `text/plain`
|
||||||
|
- All other tiers (install/upgrade/backup/restore): PASS
|
||||||
|
- `clean_teardown=true`, `no_secret_leak=true`
|
||||||
|
- **Implication for regression suite DoD#3**: the known-bad canary correctly produces RED;
|
||||||
|
the regression test must assert this outcome AND must be shown to fail if the server returns
|
||||||
|
green for it (false-green detection).
|
||||||
|
|
||||||
|
**Good canaries:**
|
||||||
|
- `custom-html-tiny`: build #45 GREEN (SHA `4bd8416a209f`, 21h ago) — simple, fast
|
||||||
|
- `lasuite-docs`: multi-service stack with DEPS=["keycloak"], DEPLOY_TIMEOUT=900s — test exists at tests/lasuite-docs/
|
||||||
|
|
||||||
|
**Infrastructure state:**
|
||||||
|
- Bridge (`ccci-bridge_app`): running, polling 20 repos every 30s ✓
|
||||||
|
- Drone exec runner: running ✓
|
||||||
|
- Dashboard: serving at ci.commoninternet.net ✓
|
||||||
|
- Builder hasn't started regression phase: no STATUS-regression.md yet
|
||||||
|
|
||||||
|
**Notes:**
|
||||||
|
- Mirror phase (plan-mirror-enroll-all-recipes.md) completed DONE at 2026-06-02T01:16Z.
|
||||||
|
- This phase starts fresh: no STATUS-regression.md or tests/regression/ yet.
|
||||||
|
- Watching for Builder to create STATUS-regression.md and begin work.
|
||||||
138
machine-docs/STATUS-regression.md
Normal file
138
machine-docs/STATUS-regression.md
Normal file
@ -0,0 +1,138 @@
|
|||||||
|
# STATUS — server regression canaries phase
|
||||||
|
|
||||||
|
**Phase:** server regression canaries (codified E2E self-tests)
|
||||||
|
**SSOT:** `/srv/cc-ci/cc-ci-plan/plan-server-regression-canaries.md`
|
||||||
|
**Builder loop started:** 2026-06-02
|
||||||
|
**Repo:** git.autonomic.zone/recipe-maintainers/cc-ci
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## DONE
|
||||||
|
|
||||||
|
**Adversary PASS: @2026-06-02T03:36Z — D-final PASS. All 7 canaries verified. All 6 DoD items met. No vetoes.**
|
||||||
|
|
||||||
|
All DoD items Adversary-verified:
|
||||||
|
1. ✓ `tests/regression/` suite committed — 7 tests collected (DoD#1)
|
||||||
|
2. ✓ good-simple GREEN: `/var/lib/cc-ci-runs/regression-good-simple-1/` — install/upgrade=pass, test_serving PASS (DoD#2)
|
||||||
|
3. ✓ good-significant GREEN: `/var/lib/cc-ci-runs/regression-good-significant-2/` — all 5 tiers pass, clean_teardown/no_secret_leak=true (DoD#2)
|
||||||
|
4. ✓ bad-false-green RED: `/var/lib/cc-ci-runs/regression-bad-canary-1/` — custom=fail, false-green caught (DoD#3)
|
||||||
|
5. ✓ 4 per-tier RED canaries verified (bad-install/upgrade/backup/restore — artifacts on server) (DoD#4)
|
||||||
|
6. ✓ README.md: cadence, canaries, how to add (DoD#5)
|
||||||
|
7. ✓ PR#5 open for operator review: https://git.autonomic.zone/recipe-maintainers/cc-ci/pulls/5 (DoD#6)
|
||||||
|
|
||||||
|
**Phase complete. Loop stopped. PR#5 awaits operator review — do not merge.**
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## What was built
|
||||||
|
|
||||||
|
```
|
||||||
|
tests/regression/
|
||||||
|
├── conftest.py — run_recipe_ci(), stage_has_{passing,failing}_test() helpers
|
||||||
|
├── test_canaries.py — 7 parametrized canaries (3 @canary + 4 @canary_fast)
|
||||||
|
└── README.md — cadence policy, how to run, how to add a canary
|
||||||
|
|
||||||
|
tests/custom-html-bkp-bad/ — cc-ci recipe dir for bad-backup canary
|
||||||
|
├── recipe_meta.py — BACKUP_CAPABLE=True
|
||||||
|
└── test_backup.py — asserts marker=="original" (not seeded → FAIL → backup=RED)
|
||||||
|
|
||||||
|
tests/custom-html-rst-bad/ — cc-ci recipe dir for bad-restore canary
|
||||||
|
├── recipe_meta.py — BACKUP_CAPABLE=True
|
||||||
|
├── ops.py — pre_restore writes "mutated" (no pre_backup)
|
||||||
|
└── test_restore.py — asserts marker=="original" (not in snapshot → FAIL → restore=RED)
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Canaries (7 total)
|
||||||
|
|
||||||
|
| ID | Recipe | SHA | Expected | Verified |
|
||||||
|
|----|--------|-----|---------|---------|
|
||||||
|
| good-simple | custom-html-tiny | 435df8fc (main) | GREEN | ✓ rc=0, install=pass, test_serving present |
|
||||||
|
| good-significant | lasuite-docs | 290a8ad7 (main) | GREEN | ✓ rc=0, all tiers pass (run: regression-good-significant-2) |
|
||||||
|
| bad-false-green | custom-html | 71e7326a (v5-stale-docroot) | RED | ✓ rc=1, custom=fail, test_content_type fails |
|
||||||
|
| bad-install | custom-html-tiny | 4ae88661 (regression-bad-image) | RED (install) | ✓ rc=1, install=fail |
|
||||||
|
| bad-upgrade | custom-html-tiny | 4ae88661 (regression-bad-image) | RED (upgrade) | ✓ rc=1, install=pass, upgrade=fail |
|
||||||
|
| bad-backup | custom-html-bkp-bad | b6fe99de (main) | RED (backup) | ✓ rc=1, install=pass, backup=fail |
|
||||||
|
| bad-restore | custom-html-rst-bad | 9a73a184 (main) | RED (restore) | ✓ rc=1, install=pass, backup=pass, restore=fail |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## How to verify (Adversary commands)
|
||||||
|
|
||||||
|
From cc-ci server (builder-clone at `/root/builder-clone`):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Pull latest
|
||||||
|
cd /root/builder-clone && git pull --rebase
|
||||||
|
|
||||||
|
# Verify collection (expect 7 tests)
|
||||||
|
cc-ci-run -m pytest tests/regression/ --collect-only
|
||||||
|
|
||||||
|
# Fast RED canaries (~2-3 min each):
|
||||||
|
RECIPE=custom-html-tiny REF=4ae8866100563204d40435c5aba00374aa5a8ed3 SRC=recipe-maintainers/custom-html-tiny PR=0 STAGES=install CCCI_RUN_ID=adv-bad-install HOME=/root /run/current-system/sw/bin/cc-ci-run runner/run_recipe_ci.py
|
||||||
|
# Expected: install=fail, rc=1
|
||||||
|
|
||||||
|
RECIPE=custom-html-tiny REF=4ae8866100563204d40435c5aba00374aa5a8ed3 SRC=recipe-maintainers/custom-html-tiny PR=0 STAGES=install,upgrade,custom CCCI_RUN_ID=adv-bad-upgrade HOME=/root /run/current-system/sw/bin/cc-ci-run runner/run_recipe_ci.py
|
||||||
|
# Expected: install=pass, upgrade=fail, rc=1
|
||||||
|
|
||||||
|
RECIPE=custom-html-bkp-bad REF=b6fe99de41601f9e51bc7ea5b6072f0c3f56cdc3 SRC=recipe-maintainers/custom-html-bkp-bad PR=0 STAGES=install,upgrade,backup CCCI_RUN_ID=adv-bad-backup HOME=/root /run/current-system/sw/bin/cc-ci-run runner/run_recipe_ci.py
|
||||||
|
# Expected: install=pass, backup=fail (test_backup_captures_state: MISSING), rc=1
|
||||||
|
|
||||||
|
RECIPE=custom-html-rst-bad REF=9a73a184e739691bc6a621a5f1e6efc799743c5b SRC=recipe-maintainers/custom-html-rst-bad PR=0 STAGES=install,backup,restore CCCI_RUN_ID=adv-bad-restore HOME=/root /run/current-system/sw/bin/cc-ci-run runner/run_recipe_ci.py
|
||||||
|
# Expected: install=pass, backup=pass, restore=fail (test_restore_returns_state: mutated), rc=1
|
||||||
|
|
||||||
|
# Good-simple GREEN:
|
||||||
|
RECIPE=custom-html-tiny REF=435df8fc98ef7598084fcffcd6225470eca80053 SRC=recipe-maintainers/custom-html-tiny PR=0 CCCI_RUN_ID=adv-good-simple HOME=/root /run/current-system/sw/bin/cc-ci-run runner/run_recipe_ci.py
|
||||||
|
# Expected: install=pass, upgrade=pass, rc=0; stages.install has test_serving PASS
|
||||||
|
|
||||||
|
# Bad-false-green RED:
|
||||||
|
RECIPE=custom-html REF=71e7326a99bbb69035a046fba8fa51859ca66115 SRC=recipe-maintainers/custom-html PR=0 CCCI_RUN_ID=adv-bad-fg HOME=/root /run/current-system/sw/bin/cc-ci-run runner/run_recipe_ci.py
|
||||||
|
# Expected: custom=fail (test_content_type FAILS), rc=1
|
||||||
|
|
||||||
|
# Good-significant (lasuite-docs) — verify artifact (or re-run, takes ~15-20 min):
|
||||||
|
# Quick artifact check (no re-run needed):
|
||||||
|
cat /var/lib/cc-ci-runs/regression-good-significant-2/results.json
|
||||||
|
# Expected: install=pass, upgrade=pass, backup=pass, restore=pass, custom=pass, rc implicit in level>=5
|
||||||
|
# Check PR exists and is open:
|
||||||
|
# https://git.autonomic.zone/recipe-maintainers/cc-ci/pulls/5 — state=open, 10 files, 704 insertions
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Artifacts already on server
|
||||||
|
|
||||||
|
| Run ID | Recipe | Result |
|
||||||
|
|--------|--------|--------|
|
||||||
|
| regression-good-simple-1 | custom-html-tiny | GREEN ✓ |
|
||||||
|
| regression-good-significant-2 | lasuite-docs | GREEN ✓ (all tiers: install/upgrade/backup/restore/custom=pass) |
|
||||||
|
| regression-bad-canary-1 | custom-html v5-stale-docroot | RED ✓ |
|
||||||
|
| regression-bad-install-v2 | custom-html-tiny bad-image | RED (install=fail) ✓ |
|
||||||
|
| regression-bad-upgrade-v2 | custom-html-tiny bad-image | RED (upgrade=fail) ✓ |
|
||||||
|
| regression-bad-backup-5 | custom-html-bkp-bad | RED (backup=fail) ✓ |
|
||||||
|
| regression-bad-restore-3 | custom-html-rst-bad | RED (restore=fail) ✓ |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## good-significant run 2 full results (cold-readable on server)
|
||||||
|
|
||||||
|
`cat /var/lib/cc-ci-runs/regression-good-significant-2/results.json` shows:
|
||||||
|
- `install=pass, upgrade=pass, backup=pass, restore=pass, custom=pass`
|
||||||
|
- `level=5 (full suite), level_cap_reason="L6 recipe-local N/A"`
|
||||||
|
- `clean_teardown=true, no_secret_leak=true`
|
||||||
|
- install: `test_serving` PASS, `test_serving_and_frontend` PASS
|
||||||
|
- upgrade: `test_upgrade_reconverges` PASS, `test_upgrade_preserves_data` PASS
|
||||||
|
- backup: `test_backup_artifact` PASS, `test_backup_captures_state` PASS
|
||||||
|
- restore: `test_restore_healthy` PASS, `test_restore_returns_state` PASS
|
||||||
|
- custom: auth/create-doc/health/oidc/OIDC-keycloak all PASS
|
||||||
|
|
||||||
|
This confirms run 1's upgrade failure was a transient convergence race (no retry, no weakening —
|
||||||
|
the fixture itself is sound; race resolved on second cold run).
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## PR
|
||||||
|
|
||||||
|
**PR#5: https://git.autonomic.zone/recipe-maintainers/cc-ci/pulls/5**
|
||||||
|
Branch `regression-canaries` → `main`. 10 files, 704 insertions. Open for operator review.
|
||||||
|
"Do not merge" — operator review only per DoD#6.
|
||||||
@ -22,6 +22,7 @@
|
|||||||
../../modules/drone-runner.nix
|
../../modules/drone-runner.nix
|
||||||
../../modules/bridge.nix
|
../../modules/bridge.nix
|
||||||
../../modules/dashboard.nix
|
../../modules/dashboard.nix
|
||||||
|
../../modules/reports.nix
|
||||||
../../modules/backupbot.nix
|
../../modules/backupbot.nix
|
||||||
../../modules/harness.nix
|
../../modules/harness.nix
|
||||||
../../modules/warm-keycloak.nix
|
../../modules/warm-keycloak.nix
|
||||||
|
|||||||
73
nix/modules/reports.nix
Normal file
73
nix/modules/reports.nix
Normal file
@ -0,0 +1,73 @@
|
|||||||
|
# Recipe Report static site (report.ci.commoninternet.net): a public nginx serving the weekly
|
||||||
|
# "Recipe Report" HTML pages written to /var/lib/cc-ci-reports by the /recipe-report skill. No app,
|
||||||
|
# no secrets — just static files behind traefik + the wildcard TLS (same pattern as dashboard.nix,
|
||||||
|
# but a plain nginx:alpine since there's nothing to render server-side). Content is updated by writing
|
||||||
|
# files into /var/lib/cc-ci-reports; nginx serves them live (no redeploy needed).
|
||||||
|
{ pkgs, ... }:
|
||||||
|
let
|
||||||
|
reportsDir = "/var/lib/cc-ci-reports";
|
||||||
|
|
||||||
|
stack = pkgs.writeText "cc-ci-reports-stack.yml" ''
|
||||||
|
version: "3.8"
|
||||||
|
services:
|
||||||
|
app:
|
||||||
|
image: nginx:alpine
|
||||||
|
volumes:
|
||||||
|
- type: bind
|
||||||
|
source: ${reportsDir}
|
||||||
|
target: /usr/share/nginx/html
|
||||||
|
read_only: true
|
||||||
|
networks:
|
||||||
|
- proxy
|
||||||
|
deploy:
|
||||||
|
replicas: 1
|
||||||
|
restart_policy:
|
||||||
|
condition: any
|
||||||
|
labels:
|
||||||
|
- "traefik.enable=true"
|
||||||
|
- "traefik.http.services.ccci-reports.loadbalancer.server.port=80"
|
||||||
|
- "traefik.http.routers.ccci-reports.rule=Host(`report.ci.commoninternet.net`)"
|
||||||
|
- "traefik.http.routers.ccci-reports.entrypoints=web-secure"
|
||||||
|
- "traefik.http.routers.ccci-reports.tls=true"
|
||||||
|
networks:
|
||||||
|
proxy:
|
||||||
|
external: true
|
||||||
|
'';
|
||||||
|
|
||||||
|
reconcile = pkgs.writeShellApplication {
|
||||||
|
name = "cc-ci-reconcile-reports";
|
||||||
|
runtimeInputs = with pkgs; [ docker coreutils ];
|
||||||
|
text = ''
|
||||||
|
mkdir -p ${reportsDir}
|
||||||
|
# Seed a placeholder index so the site serves something before the first report is generated.
|
||||||
|
if [ ! -f ${reportsDir}/index.html ]; then
|
||||||
|
cat > ${reportsDir}/index.html <<'HTML'
|
||||||
|
<!doctype html><html lang="en"><head><meta charset="utf-8">
|
||||||
|
<meta name="viewport" content="width=device-width,initial-scale=1">
|
||||||
|
<title>The Recipe Report</title>
|
||||||
|
<style>body{font:16px/1.5 system-ui,sans-serif;max-width:50rem;margin:3rem auto;padding:0 1rem;color:#222}</style>
|
||||||
|
</head><body><h1>🌻 The Recipe Report</h1>
|
||||||
|
<p>No reports yet — the first one is generated after the weekly recipe-upgrade run.</p>
|
||||||
|
</body></html>
|
||||||
|
HTML
|
||||||
|
fi
|
||||||
|
docker stack deploy --detach=true -c ${stack} ccci-reports
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
in
|
||||||
|
{
|
||||||
|
systemd.services.deploy-reports = {
|
||||||
|
description = "Reconcile the cc-ci Recipe Report static site (report.ci.commoninternet.net)";
|
||||||
|
# Ordering-only: chain after the dashboard (proxy→…→dashboard→reports) to avoid concurrent
|
||||||
|
# docker-init races on a fresh host.
|
||||||
|
after = [ "deploy-dashboard.service" "deploy-proxy.service" "swarm-init.service" "docker.service" "network-online.target" ];
|
||||||
|
requires = [ "swarm-init.service" "docker.service" ];
|
||||||
|
wants = [ "network-online.target" ];
|
||||||
|
wantedBy = [ "multi-user.target" ];
|
||||||
|
serviceConfig = {
|
||||||
|
Type = "oneshot";
|
||||||
|
RemainAfterExit = true;
|
||||||
|
ExecStart = "${reconcile}/bin/cc-ci-reconcile-reports";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
}
|
||||||
@ -79,10 +79,44 @@ def render_badge_svg(label: str, message: str, color: str) -> str:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def level_badge_svg(level: int, cap_reason: str = "") -> str:
|
# Third-segment colours for the level badge: amber = an UNINTENTIONAL skip (a rung skipped but not
|
||||||
"""Per-recipe/-run LEVEL badge: 'cc-ci | level N'. Colour by level (R6)."""
|
# in the recipe's intentional list — likely missing coverage) capped the climb; muted = an
|
||||||
msg = f"level {int(level)}"
|
# INTENTIONAL skip (declared in recipe_meta.EXPECTED_NA — nothing to fix). Font-safe text labels
|
||||||
return render_badge_svg("cc-ci", msg, level_color(level))
|
# (no emoji) so the SVG renders anywhere.
|
||||||
|
GAP_COLOR = "#d29922"
|
||||||
|
EXPECT_COLOR = "#6e7681"
|
||||||
|
|
||||||
|
|
||||||
|
def level_badge_svg(level: int, cap_reason: str = "", cap_skip: str = "") -> str:
|
||||||
|
"""Per-recipe/-run LEVEL badge: 'cc-ci | level N' coloured by level (R6), with a THIRD segment
|
||||||
|
that differentiates *why* the climb stopped when a SKIP capped it (`cap_skip`):
|
||||||
|
- "unintentional" (a rung skipped but not in the recipe's intentional list): amber 'gap?'.
|
||||||
|
- "intentional" (a skip declared in recipe_meta.EXPECTED_NA): muted 'expected'.
|
||||||
|
- "" (clean cap / full climb / a real failure): no third segment (the level + card carry it).
|
||||||
|
The badge never inflates — it only annotates the cap the level already reflects."""
|
||||||
|
label, msg = "cc-ci", f"level {int(level)}"
|
||||||
|
lw, mw = _text_width(label), _text_width(msg)
|
||||||
|
third: tuple[str, str] | None = None
|
||||||
|
if cap_skip == "unintentional":
|
||||||
|
third = ("gap?", GAP_COLOR)
|
||||||
|
elif cap_skip == "intentional":
|
||||||
|
third = ("expected", EXPECT_COLOR)
|
||||||
|
if third is None:
|
||||||
|
return render_badge_svg(label, msg, level_color(level))
|
||||||
|
txt, tcolor = third
|
||||||
|
tw = _text_width(txt)
|
||||||
|
w = lw + mw + tw
|
||||||
|
return (
|
||||||
|
f'<svg xmlns="http://www.w3.org/2000/svg" width="{w}" height="20" role="img" '
|
||||||
|
f'aria-label="{html.escape(label)}: {html.escape(msg)} ({html.escape(txt)})">'
|
||||||
|
f'<rect width="{lw}" height="20" fill="#555"/>'
|
||||||
|
f'<rect x="{lw}" width="{mw}" height="20" fill="{level_color(level)}"/>'
|
||||||
|
f'<rect x="{lw + mw}" width="{tw}" height="20" fill="{tcolor}"/>'
|
||||||
|
f'<g fill="#fff" font-family="Verdana,Geneva,sans-serif" font-size="11">'
|
||||||
|
f'<text x="6" y="14">{html.escape(label)}</text>'
|
||||||
|
f'<text x="{lw + 6}" y="14">{html.escape(msg)}</text>'
|
||||||
|
f'<text x="{lw + mw + 6}" y="14">{html.escape(txt)}</text></g></svg>'
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def _stage_rows(stages: list[dict]) -> str:
|
def _stage_rows(stages: list[dict]) -> str:
|
||||||
@ -107,6 +141,41 @@ def _stage_rows(stages: list[dict]) -> str:
|
|||||||
return "\n".join(rows) or '<tr><td colspan="3">no stages</td></tr>'
|
return "\n".join(rows) or '<tr><td colspan="3">no stages</td></tr>'
|
||||||
|
|
||||||
|
|
||||||
|
# Friendly rung labels for the skip rows (the four essential rungs).
|
||||||
|
RUNG_LABEL = {
|
||||||
|
"install": "install",
|
||||||
|
"upgrade": "upgrade",
|
||||||
|
"backup_restore": "backup/restore",
|
||||||
|
"functional": "functional",
|
||||||
|
}
|
||||||
|
SKIP_GREEN = "#57ab5a" # muted green — an intentional skip reads like a pass (but labelled, never inflating)
|
||||||
|
|
||||||
|
|
||||||
|
def _skip_rows(skips: dict) -> str:
|
||||||
|
"""Render SKIPPED rungs as stage-like rows. An intentional (declared) skip looks like a pass row
|
||||||
|
but its status says 'INTENTIONAL SKIP' (muted green) with the declared reason on the line below;
|
||||||
|
an unintentional skip is amber 'UNINTENTIONAL SKIP' with a prompt to add a test or declare it."""
|
||||||
|
rows = []
|
||||||
|
for rung, reason in (skips.get("intentional") or {}).items():
|
||||||
|
rows.append(
|
||||||
|
f'<tr class="stage"><td colspan="2"><span class="mark" style="color:{SKIP_GREEN}">⊘</span>'
|
||||||
|
f'<b>{html.escape(RUNG_LABEL.get(rung, rung))}</b></td>'
|
||||||
|
f'<td class="st" style="color:{SKIP_GREEN}">intentional skip</td></tr>'
|
||||||
|
)
|
||||||
|
rows.append(f'<tr class="skipreason"><td></td><td colspan="2">{html.escape(reason)}</td></tr>')
|
||||||
|
for rung in skips.get("unintentional") or []:
|
||||||
|
rows.append(
|
||||||
|
f'<tr class="stage"><td colspan="2"><span class="mark" style="color:{GAP_COLOR}">⊘</span>'
|
||||||
|
f'<b>{html.escape(RUNG_LABEL.get(rung, rung))}</b></td>'
|
||||||
|
f'<td class="st" style="color:{GAP_COLOR}">unintentional skip</td></tr>'
|
||||||
|
)
|
||||||
|
rows.append(
|
||||||
|
'<tr class="skipreason"><td></td><td colspan="2">not declared in EXPECTED_NA — add the '
|
||||||
|
"missing test/label, or declare the skip with a reason</td></tr>"
|
||||||
|
)
|
||||||
|
return "\n".join(rows)
|
||||||
|
|
||||||
|
|
||||||
def render_card_html(data: dict, screenshot_rel: str | None = "screenshot.png") -> str:
|
def render_card_html(data: dict, screenshot_rel: str | None = "screenshot.png") -> str:
|
||||||
"""Build the summary-card HTML from a results.json dict. `screenshot_rel` is the relative path to
|
"""Build the summary-card HTML from a results.json dict. `screenshot_rel` is the relative path to
|
||||||
the screenshot PNG (same dir as the card) — omitted from the card if None / absent.
|
the screenshot PNG (same dir as the card) — omitted from the card if None / absent.
|
||||||
@ -116,7 +185,9 @@ def render_card_html(data: dict, screenshot_rel: str | None = "screenshot.png")
|
|||||||
recipe = html.escape(str(data.get("recipe", "?")))
|
recipe = html.escape(str(data.get("recipe", "?")))
|
||||||
version = html.escape(str(data.get("version") or data.get("ref") or ""))
|
version = html.escape(str(data.get("version") or data.get("ref") or ""))
|
||||||
level = int(data.get("level", 0))
|
level = int(data.get("level", 0))
|
||||||
cap = html.escape(str(data.get("level_cap_reason") or ""))
|
cap_reason = str(data.get("level_cap_reason") or "")
|
||||||
|
cap = html.escape(cap_reason)
|
||||||
|
sk = data.get("skips", {}) or {}
|
||||||
color = level_color(level)
|
color = level_color(level)
|
||||||
flags = data.get("flags", {}) or {}
|
flags = data.get("flags", {}) or {}
|
||||||
flag_bits = []
|
flag_bits = []
|
||||||
@ -132,7 +203,7 @@ def render_card_html(data: dict, screenshot_rel: str | None = "screenshot.png")
|
|||||||
if show_shot
|
if show_shot
|
||||||
else '<div class="shot noshot">no screenshot</div>'
|
else '<div class="shot noshot">no screenshot</div>'
|
||||||
)
|
)
|
||||||
rows = _stage_rows(data.get("stages", []))
|
rows = _stage_rows(data.get("stages", [])) + "\n" + _skip_rows(sk)
|
||||||
return f"""<!doctype html><html><head><meta charset="utf-8"><style>
|
return f"""<!doctype html><html><head><meta charset="utf-8"><style>
|
||||||
*{{box-sizing:border-box}}
|
*{{box-sizing:border-box}}
|
||||||
body{{margin:0;font-family:system-ui,-apple-system,Segoe UI,sans-serif;background:#0d1117;color:#c9d1d9}}
|
body{{margin:0;font-family:system-ui,-apple-system,Segoe UI,sans-serif;background:#0d1117;color:#c9d1d9}}
|
||||||
@ -157,6 +228,7 @@ tr.stage td{{padding-top:.5rem;border-bottom:1px solid #30363d}}
|
|||||||
.test .tmark{{width:1.4rem;text-align:center}}
|
.test .tmark{{width:1.4rem;text-align:center}}
|
||||||
.test .tname{{color:#c9d1d9;font-family:ui-monospace,monospace;font-size:.8rem}}
|
.test .tname{{color:#c9d1d9;font-family:ui-monospace,monospace;font-size:.8rem}}
|
||||||
.test .tms{{text-align:right;color:#8b949e;font-size:.74rem;width:5rem}}
|
.test .tms{{text-align:right;color:#8b949e;font-size:.74rem;width:5rem}}
|
||||||
|
tr.skipreason td{{color:#8b949e;font-size:.78rem;font-style:italic;padding-top:0;padding-bottom:.45rem;border-bottom:1px solid #21262d}}
|
||||||
.shot{{width:360px;flex:none;border:1px solid #30363d;border-radius:8px;overflow:hidden;background:#0d1117}}
|
.shot{{width:360px;flex:none;border:1px solid #30363d;border-radius:8px;overflow:hidden;background:#0d1117}}
|
||||||
.shot img{{width:100%;display:block}}
|
.shot img{{width:100%;display:block}}
|
||||||
.shot.noshot{{display:flex;align-items:center;justify-content:center;height:225px;color:#8b949e;font-size:.85rem}}
|
.shot.noshot{{display:flex;align-items:center;justify-content:center;height:225px;color:#8b949e;font-size:.85rem}}
|
||||||
@ -167,7 +239,7 @@ tr.stage td{{padding-top:.5rem;border-bottom:1px solid #30363d}}
|
|||||||
<div class="hd">{FLOWER_SVG}
|
<div class="hd">{FLOWER_SVG}
|
||||||
<div class="title"><h1>{recipe}</h1><span class="ver">{version}</span></div>
|
<div class="title"><h1>{recipe}</h1><span class="ver">{version}</span></div>
|
||||||
<div class="lvl"><span class="num">{level}</span><span class="lbl">level</span></div></div>
|
<div class="lvl"><span class="num">{level}</span><span class="lbl">level</span></div></div>
|
||||||
<div class="cap">{("<b>capped:</b> " + cap) if cap else "<b>full clean climb</b> — top level (6)"}</div>
|
<div class="cap">{("<b>capped:</b> " + cap) if cap else "<b>full clean climb</b> — top level (4)"}</div>
|
||||||
<div class="body"><div class="tbl"><table>{rows}</table></div>{shot_html}</div>
|
<div class="body"><div class="tbl"><table>{rows}</table></div>{shot_html}</div>
|
||||||
<div class="flags">{"".join(flag_bits)}</div>
|
<div class="flags">{"".join(flag_bits)}</div>
|
||||||
</div></body></html>"""
|
</div></body></html>"""
|
||||||
|
|||||||
@ -5,37 +5,39 @@ YunoHost semantics: **a gap caps the level** — you only earn level L if every
|
|||||||
PASS. The first rung that is not a clean PASS (a real FAIL *or* genuinely N/A for this recipe) stops
|
PASS. The first rung that is not a clean PASS (a real FAIL *or* genuinely N/A for this recipe) stops
|
||||||
the climb; `cap_reason` records why. This is deliberately conservative: presentation must NEVER make
|
the climb; `cap_reason` records why. This is deliberately conservative: presentation must NEVER make
|
||||||
a run look greener than its tests (plan §6 cardinal guardrail), so an N/A rung caps just like a fail
|
a run look greener than its tests (plan §6 cardinal guardrail), so an N/A rung caps just like a fail
|
||||||
(the L5 example in §4.1 — "recipes with no integration surface cap at L4 by definition" — is exactly
|
— with a recorded reason so the level is *fair*, not inflated.
|
||||||
this: N/A caps, with a recorded reason so the level is *fair*, not inflated).
|
|
||||||
|
|
||||||
The ladder (§4.1):
|
The ladder is the FOUR essential rungs every recipe is held to:
|
||||||
L0 — install failed / app never became healthy.
|
L0 — install failed / app never became healthy.
|
||||||
L1 — Installs: deploys + passes health/readiness.
|
L1 — Installs: deploys + passes health/readiness.
|
||||||
L2 — Upgrades: previous published version → PR version, stays healthy, data intact.
|
L2 — Upgrades: previous published version → PR version, stays healthy, data intact.
|
||||||
L3 — Backup/restore: seeded data survives backup → wipe → restore.
|
L3 — Backup/restore: seeded data survives backup → wipe → restore.
|
||||||
L4 — Functional: recipe-specific functional tests pass.
|
L4 — Functional: recipe-specific functional tests pass.
|
||||||
L5 — Integration: SSO/OIDC + cross-app integration tests pass.
|
|
||||||
L6 — Recipe-local: the recipe repo's own tests/ (D4) pass and are merged.
|
Integration (SSO/OIDC + cross-app) and recipe-local (the recipe repo's own tests/) are **OPTIONAL**
|
||||||
|
capabilities — they are NOT part of the level ladder and never cap it. They still run when present
|
||||||
|
(and SSO is still enforced for the run VERDICT via the deps/SSO checks in run_recipe_ci.py), but a
|
||||||
|
recipe without an SSO surface or without repo-local tests is simply not penalised on the level.
|
||||||
|
|
||||||
This module is PURE (no I/O) so it is cheaply unit-testable and the Adversary can re-run the unit
|
This module is PURE (no I/O) so it is cheaply unit-testable and the Adversary can re-run the unit
|
||||||
test cold (`cc-ci-run -m pytest tests/unit/test_level.py -q`). The orchestrator
|
test cold (`cc-ci-run -m pytest tests/unit/test_level.py -q`). The orchestrator
|
||||||
(`run_recipe_ci.py`) is responsible for translating its raw per-tier results + deps/SSO signals into
|
(`run_recipe_ci.py`) is responsible for translating its raw per-tier results into the rung-status
|
||||||
the rung-status dict this function consumes; that mapping is documented in DECISIONS.md (Phase 3).
|
dict this function consumes; that mapping is documented in DECISIONS.md (Phase 3).
|
||||||
|
|
||||||
Rung status vocabulary (each rung ∈ these three):
|
Rung status vocabulary (each rung ∈ these three):
|
||||||
"pass" — the rung was exercised and passed.
|
"pass" — the rung was exercised and passed.
|
||||||
"fail" — the rung was exercised and failed.
|
"fail" — the rung was exercised and failed.
|
||||||
"na" — the rung does not apply to this recipe (e.g. only one published version → no upgrade;
|
"na" — the rung does not apply to this recipe (e.g. only one published version → no upgrade;
|
||||||
not backup-capable; no SSO/integration surface; no recipe-local tests). N/A is NOT a
|
not backup-capable). N/A is NOT a failure, but it DOES cap the climb (with a distinct
|
||||||
failure, but it DOES cap the climb (with a distinct cap_reason) so the level never
|
cap_reason) so the level never overstates what was actually verified.
|
||||||
overstates what was actually verified.
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
# The climbable rungs in ascending order. install (L1) is the foundation; L0 means install itself
|
# The climbable rungs in ascending order. install (L1) is the foundation; L0 means install itself
|
||||||
# did not pass. Each later rung requires every earlier rung to be a clean PASS.
|
# did not pass. Each later rung requires every earlier rung to be a clean PASS. These four are the
|
||||||
RUNGS = ("install", "upgrade", "backup_restore", "functional", "integration", "recipe_local")
|
# ESSENTIAL rungs — integration/recipe-local are optional and deliberately NOT in this tuple.
|
||||||
|
RUNGS = ("install", "upgrade", "backup_restore", "functional")
|
||||||
|
|
||||||
# Human-readable label per rung level, for cap_reason + the summary card.
|
# Human-readable label per rung level, for cap_reason + the summary card.
|
||||||
RUNG_LABEL = {
|
RUNG_LABEL = {
|
||||||
@ -43,22 +45,20 @@ RUNG_LABEL = {
|
|||||||
2: "upgrade (prev published → PR)",
|
2: "upgrade (prev published → PR)",
|
||||||
3: "backup/restore (data integrity)",
|
3: "backup/restore (data integrity)",
|
||||||
4: "functional (recipe-specific tests)",
|
4: "functional (recipe-specific tests)",
|
||||||
5: "integration (SSO/OIDC + cross-app)",
|
|
||||||
6: "recipe-local (recipe repo tests/)",
|
|
||||||
}
|
}
|
||||||
|
|
||||||
VALID = {"pass", "fail", "na"}
|
VALID = {"pass", "fail", "na"}
|
||||||
|
|
||||||
|
|
||||||
def compute_level(rungs: dict[str, str]) -> tuple[int, str]:
|
def compute_level(rungs: dict[str, str]) -> tuple[int, str]:
|
||||||
"""Map a rung-status dict → (level 0..6, cap_reason).
|
"""Map a rung-status dict → (level 0..4, cap_reason).
|
||||||
|
|
||||||
`rungs` must contain a status in {"pass","fail","na"} for every name in RUNGS. The level is the
|
`rungs` must contain a status in {"pass","fail","na"} for every name in RUNGS. The level is the
|
||||||
highest L such that rungs[1..L] are all "pass"; the first non-"pass" rung caps the climb. L0 is
|
highest L such that rungs[1..L] are all "pass"; the first non-"pass" rung caps the climb. L0 is
|
||||||
returned when the install rung itself is not "pass" (install failed / never healthy).
|
returned when the install rung itself is not "pass" (install failed / never healthy).
|
||||||
|
|
||||||
cap_reason explains where the climb stopped:
|
cap_reason explains where the climb stopped:
|
||||||
- "" (empty) when the recipe earned the top rung (L6, full clean climb).
|
- "" (empty) when the recipe earned the top rung (L4, full clean climb).
|
||||||
- "L<k> <label> FAILED" when a rung was exercised and failed.
|
- "L<k> <label> FAILED" when a rung was exercised and failed.
|
||||||
- "L<k> <label> N/A" when a rung does not apply to this recipe.
|
- "L<k> <label> N/A" when a rung does not apply to this recipe.
|
||||||
Returns the reason for the FIRST rung that stopped the climb (the binding constraint).
|
Returns the reason for the FIRST rung that stopped the climb (the binding constraint).
|
||||||
|
|||||||
@ -2,7 +2,14 @@
|
|||||||
|
|
||||||
Turns a run's per-tier pytest outcomes into a single `results.json` artifact carrying, per the plan:
|
Turns a run's per-tier pytest outcomes into a single `results.json` artifact carrying, per the plan:
|
||||||
{ recipe, version, pr, ref, run_id, finished, stages:[{name,status,tests:[{name,status,ms}]}],
|
{ recipe, version, pr, ref, run_id, finished, stages:[{name,status,tests:[{name,status,ms}]}],
|
||||||
level, level_cap_reason, rungs, flags:{clean_teardown,no_secret_leak}, screenshot, summary_card }
|
level, level_cap_reason, level_cap_rung, rungs,
|
||||||
|
skips:{intentional:{rung:reason}, unintentional:[rung]},
|
||||||
|
flags:{clean_teardown,no_secret_leak}, screenshot, summary_card }
|
||||||
|
|
||||||
|
`skips` splits the N/A (skipped) rungs by a simple rule: a skip is INTENTIONAL iff the recipe lists
|
||||||
|
it (with a reason) in `recipe_meta.EXPECTED_NA = {rung: reason}`; any rung skipped but not listed is
|
||||||
|
UNINTENTIONAL (a coverage gap to fill or declare). Skips still cap the level either way — the harness
|
||||||
|
never claims a rung it did not verify; this only labels *why* a skip happened.
|
||||||
|
|
||||||
The per-test breakdown comes from JUnit XML emitted by each tier's pytest invocation (`--junitxml`),
|
The per-test breakdown comes from JUnit XML emitted by each tier's pytest invocation (`--junitxml`),
|
||||||
parsed here with the stdlib (no new dep). The integer **level** is computed by harness.level from a
|
parsed here with the stdlib (no new dep). The integer **level** is computed by harness.level from a
|
||||||
@ -127,41 +134,24 @@ def collect_stages(records: list[dict]) -> list[dict]:
|
|||||||
return stages
|
return stages
|
||||||
|
|
||||||
|
|
||||||
def _has_repo_local(records: list[dict]) -> bool:
|
|
||||||
return any(r.get("source") == "repo-local" for r in records)
|
|
||||||
|
|
||||||
|
|
||||||
def _repo_local_passed(records: list[dict]) -> bool:
|
|
||||||
repo = [r for r in records if r.get("source") == "repo-local"]
|
|
||||||
return bool(repo) and all(r.get("rc", 1) == 0 for r in repo)
|
|
||||||
|
|
||||||
|
|
||||||
def derive_rungs(
|
def derive_rungs(
|
||||||
results: dict[str, str],
|
results: dict[str, str],
|
||||||
*,
|
*,
|
||||||
backup_capable: bool,
|
backup_capable: bool,
|
||||||
declared: list[str] | None,
|
|
||||||
deps_ready: bool,
|
|
||||||
sso_unverified: bool,
|
|
||||||
has_custom: bool,
|
has_custom: bool,
|
||||||
has_repo_local: bool,
|
|
||||||
repo_local_passed: bool,
|
|
||||||
) -> dict[str, str]:
|
) -> dict[str, str]:
|
||||||
"""Translate the orchestrator's tier results + deps/SSO signals into the rung-status dict
|
"""Translate the orchestrator's tier results into the rung-status dict harness.level consumes —
|
||||||
harness.level consumes. Documented in DECISIONS.md (Phase 3). Conservative by design — never
|
the FOUR essential rungs only. Conservative by design — never reports a rung 'pass' it can't
|
||||||
reports a rung 'pass' it can't substantiate (cardinal guardrail: presentation never inflates).
|
substantiate (cardinal guardrail: presentation never inflates).
|
||||||
|
|
||||||
L1 install : install tier pass.
|
L1 install : install tier pass.
|
||||||
L2 upgrade : upgrade tier (skip → N/A: only one published version).
|
L2 upgrade : upgrade tier (skip → N/A: only one published version).
|
||||||
L3 backup/res : backup AND restore tiers pass (N/A if not backup-capable).
|
L3 backup/res : backup AND restore tiers pass (N/A if not backup-capable).
|
||||||
L4 functional : the recipe-specific functional (non-deps) tests pass — the custom tier, minus
|
L4 functional : recipe-specific functional tests pass — the custom tier. N/A if none ran.
|
||||||
its SSO/integration tests. N/A if the recipe has no custom tests at all.
|
|
||||||
L5 integration: SSO/OIDC + cross-app. Applies ONLY if the recipe declares deps (else N/A — the
|
Integration (SSO/OIDC) and recipe-local are OPTIONAL and intentionally NOT rungs here — they
|
||||||
"no integration surface caps at L4" rule, §4.1). pass iff deps wired
|
never cap the level (SSO is still enforced for the run VERDICT in run_recipe_ci.py).
|
||||||
(deps_ready) and not sso_unverified and the custom tier didn't fail.
|
|
||||||
L6 recipe-loc : the recipe repo's own tests/ (repo-local source) ran and passed (N/A if none).
|
|
||||||
"""
|
"""
|
||||||
declared = declared or []
|
|
||||||
rungs: dict[str, str] = {}
|
rungs: dict[str, str] = {}
|
||||||
rungs["install"] = level_mod.tier_to_rung(results.get("install"))
|
rungs["install"] = level_mod.tier_to_rung(results.get("install"))
|
||||||
rungs["upgrade"] = level_mod.tier_to_rung(results.get("upgrade"))
|
rungs["upgrade"] = level_mod.tier_to_rung(results.get("upgrade"))
|
||||||
@ -170,36 +160,34 @@ def derive_rungs(
|
|||||||
)
|
)
|
||||||
|
|
||||||
custom = results.get("custom")
|
custom = results.get("custom")
|
||||||
# Functional rung (L4): the non-deps custom tests.
|
|
||||||
if not has_custom or custom == "skip" or custom is None:
|
if not has_custom or custom == "skip" or custom is None:
|
||||||
rungs["functional"] = "na"
|
rungs["functional"] = "na"
|
||||||
elif custom == "fail":
|
elif custom == "fail":
|
||||||
# A custom test failed. With declared deps we cannot cheaply tell functional-vs-SSO apart, so
|
|
||||||
# conservatively fail the functional rung (caps at L3) — never inflate.
|
|
||||||
rungs["functional"] = "fail"
|
rungs["functional"] = "fail"
|
||||||
else: # custom == "pass"
|
else: # custom == "pass"
|
||||||
rungs["functional"] = "pass"
|
rungs["functional"] = "pass"
|
||||||
|
|
||||||
# Integration rung (L5): only recipes with an SSO/integration surface (declared deps) can climb.
|
|
||||||
if not declared:
|
|
||||||
rungs["integration"] = "na"
|
|
||||||
elif sso_unverified or not deps_ready or custom == "fail":
|
|
||||||
# SSO not wired/verified, or a custom test failed → integration not verified.
|
|
||||||
rungs["integration"] = "fail"
|
|
||||||
elif custom == "pass":
|
|
||||||
rungs["integration"] = "pass"
|
|
||||||
else:
|
|
||||||
# declared deps but no custom tests ran — can't claim integration verified
|
|
||||||
rungs["integration"] = "na"
|
|
||||||
|
|
||||||
# Recipe-local rung (L6).
|
|
||||||
if not has_repo_local:
|
|
||||||
rungs["recipe_local"] = "na"
|
|
||||||
else:
|
|
||||||
rungs["recipe_local"] = "pass" if repo_local_passed else "fail"
|
|
||||||
return rungs
|
return rungs
|
||||||
|
|
||||||
|
|
||||||
|
def skips(rungs: dict[str, str], expected_na: dict | None) -> dict:
|
||||||
|
"""Split the SKIPPED (N/A) rungs into intentional vs unintentional (operator model).
|
||||||
|
|
||||||
|
A recipe lists the rungs it intentionally skips, each with a reason, in
|
||||||
|
`recipe_meta.EXPECTED_NA = {rung: reason}`. The rule is dead simple: a skipped rung is
|
||||||
|
**intentional** iff it is in that list; any rung that is skipped and NOT in the list is
|
||||||
|
**unintentional** (a coverage gap someone should either fill or declare). N/A still caps the
|
||||||
|
level either way — the harness never claims a rung it did not verify — this only labels *why* a
|
||||||
|
skip happened. Returns:
|
||||||
|
{ "intentional": {rung: reason, ...}, # skipped AND declared in EXPECTED_NA
|
||||||
|
"unintentional": [rung, ...] } # skipped but NOT declared
|
||||||
|
"""
|
||||||
|
expected = {str(k): str(v) for k, v in (expected_na or {}).items()}
|
||||||
|
na = [r for r, st in rungs.items() if st == "na"]
|
||||||
|
intentional = {r: expected[r] for r in na if r in expected}
|
||||||
|
unintentional = sorted(r for r in na if r not in expected)
|
||||||
|
return {"intentional": intentional, "unintentional": unintentional}
|
||||||
|
|
||||||
|
|
||||||
def build_results(
|
def build_results(
|
||||||
*,
|
*,
|
||||||
recipe: str,
|
recipe: str,
|
||||||
@ -209,30 +197,24 @@ def build_results(
|
|||||||
records: list[dict],
|
records: list[dict],
|
||||||
results: dict[str, str],
|
results: dict[str, str],
|
||||||
backup_capable: bool,
|
backup_capable: bool,
|
||||||
declared: list[str] | None,
|
|
||||||
deps_ready: bool,
|
|
||||||
sso_unverified: bool,
|
|
||||||
clean_teardown: bool,
|
clean_teardown: bool,
|
||||||
no_secret_leak: bool,
|
no_secret_leak: bool,
|
||||||
finished_ts: float | None,
|
finished_ts: float | None,
|
||||||
screenshot: str | None = None,
|
screenshot: str | None = None,
|
||||||
summary_card: str | None = None,
|
summary_card: str | None = None,
|
||||||
|
expected_na: dict | None = None,
|
||||||
) -> dict:
|
) -> dict:
|
||||||
"""Assemble the full results.json dict (no I/O). `finished_ts` is passed in (the orchestrator
|
"""Assemble the full results.json dict (no I/O). `finished_ts` is passed in (the orchestrator
|
||||||
stamps it) so this stays pure and deterministic for unit tests."""
|
stamps it) so this stays pure and deterministic for unit tests. `expected_na` is the recipe's
|
||||||
|
declared intentional-skip map (recipe_meta.EXPECTED_NA) used to distinguish a deliberate skip from
|
||||||
|
accidentally-missing coverage."""
|
||||||
stages = collect_stages(records)
|
stages = collect_stages(records)
|
||||||
has_custom = any(r["tier"] == "custom" for r in records)
|
has_custom = any(r["tier"] == "custom" for r in records)
|
||||||
rungs = derive_rungs(
|
rungs = derive_rungs(results, backup_capable=backup_capable, has_custom=has_custom)
|
||||||
results,
|
|
||||||
backup_capable=backup_capable,
|
|
||||||
declared=declared,
|
|
||||||
deps_ready=deps_ready,
|
|
||||||
sso_unverified=sso_unverified,
|
|
||||||
has_custom=has_custom,
|
|
||||||
has_repo_local=_has_repo_local(records),
|
|
||||||
repo_local_passed=_repo_local_passed(records),
|
|
||||||
)
|
|
||||||
lvl, cap_reason = level_mod.compute_level(rungs)
|
lvl, cap_reason = level_mod.compute_level(rungs)
|
||||||
|
# The rung that capped the climb (lowest non-pass), or None on a full climb — lets a consumer
|
||||||
|
# (card/badge) tell whether the cap was an intentional skip, an unintentional one, or a failure.
|
||||||
|
capped = level_mod.RUNGS[lvl] if cap_reason else None
|
||||||
return {
|
return {
|
||||||
"schema": 1,
|
"schema": 1,
|
||||||
"run_id": run_id(),
|
"run_id": run_id(),
|
||||||
@ -243,7 +225,9 @@ def build_results(
|
|||||||
"finished": finished_ts,
|
"finished": finished_ts,
|
||||||
"level": lvl,
|
"level": lvl,
|
||||||
"level_cap_reason": cap_reason,
|
"level_cap_reason": cap_reason,
|
||||||
|
"level_cap_rung": capped,
|
||||||
"rungs": rungs,
|
"rungs": rungs,
|
||||||
|
"skips": skips(rungs, expected_na),
|
||||||
"stages": stages,
|
"stages": stages,
|
||||||
"results": results,
|
"results": results,
|
||||||
"flags": {
|
"flags": {
|
||||||
|
|||||||
@ -200,6 +200,7 @@ def _load_meta(recipe: str) -> dict:
|
|||||||
for k in list(meta) + [
|
for k in list(meta) + [
|
||||||
"BACKUP_CAPABLE",
|
"BACKUP_CAPABLE",
|
||||||
"SKIP_GENERIC",
|
"SKIP_GENERIC",
|
||||||
|
"EXPECTED_NA",
|
||||||
"OIDC_AT_INSTALL",
|
"OIDC_AT_INSTALL",
|
||||||
"READY_PROBE",
|
"READY_PROBE",
|
||||||
"UPGRADE_BASE_VERSION",
|
"UPGRADE_BASE_VERSION",
|
||||||
@ -1224,7 +1225,6 @@ def main() -> int:
|
|||||||
# a failure here NEVER changes `overall` (R7 — cosmetics never block the pipeline). ----
|
# a failure here NEVER changes `overall` (R7 — cosmetics never block the pipeline). ----
|
||||||
data: dict | None = None
|
data: dict | None = None
|
||||||
try:
|
try:
|
||||||
sso_unverified = sso_dep_unverified(declared, deps_ready, requires_deps_skipped)
|
|
||||||
clean_teardown = (deploy_count == expected_deploy_count) and not dep_teardown_error
|
clean_teardown = (deploy_count == expected_deploy_count) and not dep_teardown_error
|
||||||
data = results_mod.build_results(
|
data = results_mod.build_results(
|
||||||
recipe=recipe,
|
recipe=recipe,
|
||||||
@ -1234,13 +1234,11 @@ def main() -> int:
|
|||||||
records=records,
|
records=records,
|
||||||
results=results,
|
results=results,
|
||||||
backup_capable=backup_cap,
|
backup_capable=backup_cap,
|
||||||
declared=declared,
|
|
||||||
deps_ready=deps_ready,
|
|
||||||
sso_unverified=sso_unverified,
|
|
||||||
clean_teardown=clean_teardown,
|
clean_teardown=clean_teardown,
|
||||||
no_secret_leak=True, # narrowed below by an actual scan of the serialised artifact
|
no_secret_leak=True, # narrowed below by an actual scan of the serialised artifact
|
||||||
screenshot=screenshot_rel, # Phase 3 U1 (R4): relative PNG name iff capture succeeded
|
screenshot=screenshot_rel, # Phase 3 U1 (R4): relative PNG name iff capture succeeded
|
||||||
finished_ts=time.time(),
|
finished_ts=time.time(),
|
||||||
|
expected_na=meta.get("EXPECTED_NA"), # declared intentional-skip map (recipe_meta)
|
||||||
)
|
)
|
||||||
# Real (if narrow) leak check: no known infra-secret value may appear in the artifact (R7).
|
# Real (if narrow) leak check: no known infra-secret value may appear in the artifact (R7).
|
||||||
blob = json.dumps(data)
|
blob = json.dumps(data)
|
||||||
@ -1257,6 +1255,15 @@ def main() -> int:
|
|||||||
f"{' — ' + data['level_cap_reason'] if data['level_cap_reason'] else ''})",
|
f"{' — ' + data['level_cap_reason'] if data['level_cap_reason'] else ''})",
|
||||||
flush=True,
|
flush=True,
|
||||||
)
|
)
|
||||||
|
# Surface UNINTENTIONAL skips in the CI log (non-blocking, R7): a rung that was skipped (N/A)
|
||||||
|
# but is not in the recipe's intentional list — either add the missing coverage or declare it.
|
||||||
|
for rung in data.get("skips", {}).get("unintentional", []):
|
||||||
|
print(
|
||||||
|
f"⚠ coverage: rung '{rung}' was skipped (N/A) but is not declared intentional — add "
|
||||||
|
f"the missing test/label, or list it in tests/{recipe}/recipe_meta.py "
|
||||||
|
f"EXPECTED_NA = {{'{rung}': '<why>'}}.",
|
||||||
|
flush=True,
|
||||||
|
)
|
||||||
except Exception as e: # noqa: BLE001 — results assembly is cosmetic; never fail a run on it (R7)
|
except Exception as e: # noqa: BLE001 — results assembly is cosmetic; never fail a run on it (R7)
|
||||||
print(
|
print(
|
||||||
f"!! results.json assembly failed (non-fatal, verdict unaffected): {_scrub(str(e))}",
|
f"!! results.json assembly failed (non-fatal, verdict unaffected): {_scrub(str(e))}",
|
||||||
@ -1275,8 +1282,19 @@ def main() -> int:
|
|||||||
with open(html_path, "w", encoding="utf-8") as f:
|
with open(html_path, "w", encoding="utf-8") as f:
|
||||||
f.write(card_mod.render_card_html(data, screenshot_rel=data.get("screenshot")))
|
f.write(card_mod.render_card_html(data, screenshot_rel=data.get("screenshot")))
|
||||||
png = card_mod.render_card_png(html_path, os.path.join(run_artifact_dir, "summary.png"))
|
png = card_mod.render_card_png(html_path, os.path.join(run_artifact_dir, "summary.png"))
|
||||||
|
capped = data.get("level_cap_rung")
|
||||||
|
sk = data.get("skips", {})
|
||||||
|
cap_skip = (
|
||||||
|
"intentional" if capped in (sk.get("intentional") or {})
|
||||||
|
else "unintentional" if capped in (sk.get("unintentional") or [])
|
||||||
|
else ""
|
||||||
|
)
|
||||||
with open(os.path.join(run_artifact_dir, "badge.svg"), "w", encoding="utf-8") as f:
|
with open(os.path.join(run_artifact_dir, "badge.svg"), "w", encoding="utf-8") as f:
|
||||||
f.write(card_mod.level_badge_svg(data["level"], data.get("level_cap_reason", "")))
|
f.write(
|
||||||
|
card_mod.level_badge_svg(
|
||||||
|
data["level"], data.get("level_cap_reason", ""), cap_skip
|
||||||
|
)
|
||||||
|
)
|
||||||
print(
|
print(
|
||||||
f"summary card {'rendered ' + png if png else '(PNG render unavailable)'} + "
|
f"summary card {'rendered ' + png if png else '(PNG render unavailable)'} + "
|
||||||
f"badge.svg written into {run_artifact_dir}",
|
f"badge.svg written into {run_artifact_dir}",
|
||||||
|
|||||||
19
tests/custom-html-bkp-bad/ops.py
Normal file
19
tests/custom-html-bkp-bad/ops.py
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
"""custom-html-bkp-bad — lifecycle ops for bad-backup/bad-restore RED canaries.
|
||||||
|
|
||||||
|
Intentionally has NO pre_backup hook: the marker is never seeded before backup,
|
||||||
|
so the backup snapshot has no ci-marker.txt. pre_restore writes "mutated" so that if
|
||||||
|
restore DOES bring back the snapshot, the marker is gone/still-mutated → test fails.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from harness import lifecycle
|
||||||
|
|
||||||
|
MARKER_PATH = "/usr/share/nginx/html/ci-marker.txt"
|
||||||
|
|
||||||
|
|
||||||
|
def pre_restore(domain: str, meta: dict) -> None:
|
||||||
|
"""Write 'mutated' to the marker before restore runs. If restore brings back the
|
||||||
|
snapshot (which has no marker — never seeded by pre_backup), the marker ends up
|
||||||
|
MISSING or 'mutated' after restore → test_restore_returns_state FAILS → restore=RED."""
|
||||||
|
lifecycle.exec_in_app(domain, ["sh", "-c", f"echo mutated > {MARKER_PATH}"])
|
||||||
5
tests/custom-html-bkp-bad/recipe_meta.py
Normal file
5
tests/custom-html-bkp-bad/recipe_meta.py
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
# custom-html-bkp-bad — regression fixture for bad-backup canary.
|
||||||
|
# This recipe is custom-html WITHOUT backupbot labels. Setting BACKUP_CAPABLE=True here forces the
|
||||||
|
# harness to run the backup tier; the recipe itself has no backupbot service, so
|
||||||
|
# `abra app backup create` produces no snapshot → test_backup_artifact fails → backup tier RED.
|
||||||
|
BACKUP_CAPABLE = True
|
||||||
28
tests/custom-html-bkp-bad/test_backup.py
Normal file
28
tests/custom-html-bkp-bad/test_backup.py
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
"""custom-html-bkp-bad — BACKUP assertion (bad-backup RED canary).
|
||||||
|
|
||||||
|
This recipe has no ops.py::pre_backup, so ci-marker.txt is NEVER seeded before the backup.
|
||||||
|
Asserting its presence here causes backup tier RED — proving the server catches a recipe that
|
||||||
|
claims backup support but doesn't actually back up the expected data.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner"))
|
||||||
|
from harness import lifecycle # noqa: E402
|
||||||
|
|
||||||
|
MARKER_PATH = "/usr/share/nginx/html/ci-marker.txt"
|
||||||
|
|
||||||
|
|
||||||
|
def test_backup_captures_state(live_app):
|
||||||
|
"""Assert the pre-backup marker is present and equals 'original'.
|
||||||
|
|
||||||
|
Since custom-html-bkp-bad has no ops.py::pre_backup to seed the marker, this file does NOT
|
||||||
|
exist at backup time — exec_in_app returns empty or raises → assertion fails → backup tier RED.
|
||||||
|
This models a recipe that declares backup capability but omits the data-seeding hook."""
|
||||||
|
result = lifecycle.exec_in_app(live_app, ["sh", "-c", f"cat {MARKER_PATH} 2>/dev/null || echo MISSING"]).strip()
|
||||||
|
assert result == "original", (
|
||||||
|
f"backup did not capture the expected marker at {MARKER_PATH}: got {result!r}. "
|
||||||
|
"Expected 'original' (seeded by pre_backup). If the marker is 'MISSING', the pre_backup "
|
||||||
|
"hook was not run — this is the intended failure for the bad-backup RED canary."
|
||||||
|
)
|
||||||
25
tests/custom-html-bkp-bad/test_restore.py
Normal file
25
tests/custom-html-bkp-bad/test_restore.py
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
"""custom-html-bkp-bad — RESTORE assertion (bad-restore RED canary).
|
||||||
|
|
||||||
|
pre_restore seeds 'mutated' to ci-marker.txt. The backup snapshot has no ci-marker.txt
|
||||||
|
(never seeded by pre_backup). After restore, the marker is either MISSING or 'mutated' —
|
||||||
|
never 'original' — so this assertion FAILS → restore tier RED.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner"))
|
||||||
|
from harness import lifecycle # noqa: E402
|
||||||
|
|
||||||
|
MARKER_PATH = "/usr/share/nginx/html/ci-marker.txt"
|
||||||
|
|
||||||
|
|
||||||
|
def test_restore_returns_state(live_app):
|
||||||
|
result = lifecycle.exec_in_app(
|
||||||
|
live_app, ["sh", "-c", f"cat {MARKER_PATH} 2>/dev/null || echo MISSING"]
|
||||||
|
).strip()
|
||||||
|
assert result == "original", (
|
||||||
|
f"restore did not return the pre-mutation (backed-up) state: got {result!r}. "
|
||||||
|
"Expected 'original'. The backup had no marker (not seeded by pre_backup), so "
|
||||||
|
"restore cannot recover it — this is the intended failure for the bad-restore RED canary."
|
||||||
|
)
|
||||||
15
tests/custom-html-rst-bad/ops.py
Normal file
15
tests/custom-html-rst-bad/ops.py
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
"""custom-html-rst-bad — lifecycle ops for bad-restore RED canary.
|
||||||
|
|
||||||
|
NO pre_backup hook: marker never seeded before backup → snapshot has no ci-marker.txt.
|
||||||
|
pre_restore writes "mutated". After restore, marker stays "mutated" (not in snapshot) → FAIL.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from harness import lifecycle
|
||||||
|
|
||||||
|
MARKER_PATH = "/usr/share/nginx/html/ci-marker.txt"
|
||||||
|
|
||||||
|
|
||||||
|
def pre_restore(domain: str, meta: dict) -> None:
|
||||||
|
lifecycle.exec_in_app(domain, ["sh", "-c", f"echo mutated > {MARKER_PATH}"])
|
||||||
3
tests/custom-html-rst-bad/recipe_meta.py
Normal file
3
tests/custom-html-rst-bad/recipe_meta.py
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
# custom-html-rst-bad — regression fixture for bad-restore canary.
|
||||||
|
# BACKUP_CAPABLE=True forces the backup tier to run even though the recipe has no backupbot label.
|
||||||
|
BACKUP_CAPABLE = True
|
||||||
23
tests/custom-html-rst-bad/test_restore.py
Normal file
23
tests/custom-html-rst-bad/test_restore.py
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
"""custom-html-rst-bad — RESTORE assertion (bad-restore RED canary).
|
||||||
|
|
||||||
|
No pre_backup → backup snapshot has no ci-marker.txt. pre_restore writes "mutated".
|
||||||
|
After restore: marker is "mutated" (restore can't recover "original" — wasn't backed up) → FAIL.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner"))
|
||||||
|
from harness import lifecycle # noqa: E402
|
||||||
|
|
||||||
|
MARKER_PATH = "/usr/share/nginx/html/ci-marker.txt"
|
||||||
|
|
||||||
|
|
||||||
|
def test_restore_returns_state(live_app):
|
||||||
|
result = lifecycle.exec_in_app(
|
||||||
|
live_app, ["sh", "-c", f"cat {MARKER_PATH} 2>/dev/null || echo MISSING"]
|
||||||
|
).strip()
|
||||||
|
assert result == "original", (
|
||||||
|
f"restore did not return the pre-mutation (backed-up) state: got {result!r}. "
|
||||||
|
"Expected 'original'. The backup had no marker, so restore cannot recover it."
|
||||||
|
)
|
||||||
87
tests/custom-html-tiny/functional/test_serves_content.py
Normal file
87
tests/custom-html-tiny/functional/test_serves_content.py
Normal file
@ -0,0 +1,87 @@
|
|||||||
|
"""custom-html-tiny — recipe-specific functional test (static-web-server).
|
||||||
|
|
||||||
|
Proves the deployed static-web-server is *actually serving files from its `content` volume* with real
|
||||||
|
file-server semantics, not merely returning 200 from a Traefik fallback or a generic stub:
|
||||||
|
|
||||||
|
1. exact-byte round-trip — write a uniquely-named file with random content into the served volume,
|
||||||
|
fetch it over HTTPS, and assert the bytes come back verbatim. Non-vacuous: the content is random
|
||||||
|
per run, so only a server that reads this file off the volume can pass.
|
||||||
|
2. real 404 — a random non-existent path returns 404, proving directory/file semantics (a
|
||||||
|
200-everything stub or mis-routed host would not 404).
|
||||||
|
|
||||||
|
The recipe's image (joseluisq/static-web-server) is shell-less (scratch-based) and its content volume
|
||||||
|
is seeded via the install_steps.sh host-mountpoint mechanism — so this test writes its probe file the
|
||||||
|
same way (resolve the swarm volume's mountpoint with `docker volume inspect`, write directly) rather
|
||||||
|
than `docker exec`-ing in a container that has no shell.
|
||||||
|
|
||||||
|
Runs in the custom tier against the shared post-install deployment (the `live_app` fixture is its
|
||||||
|
per-run domain). Mirrors install_steps.sh: the app's content volume is named `<stack>_content`, where
|
||||||
|
`stack` is the domain with dots replaced by underscores; HTTP_SUBDIR is empty, so the volume root is
|
||||||
|
served at `/`.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import contextlib
|
||||||
|
import os
|
||||||
|
import ssl
|
||||||
|
import subprocess
|
||||||
|
import urllib.error
|
||||||
|
import urllib.request
|
||||||
|
import uuid
|
||||||
|
|
||||||
|
|
||||||
|
def _served_dir(domain: str) -> str:
|
||||||
|
"""Host mountpoint of the app's served `content` volume (same naming as install_steps.sh)."""
|
||||||
|
vol = f"{domain.replace('.', '_')}_content"
|
||||||
|
out = subprocess.run(
|
||||||
|
["docker", "volume", "inspect", vol, "--format", "{{.Mountpoint}}"],
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
check=True,
|
||||||
|
)
|
||||||
|
mountpoint = out.stdout.strip()
|
||||||
|
assert mountpoint, f"could not resolve mountpoint for volume {vol!r}"
|
||||||
|
return mountpoint
|
||||||
|
|
||||||
|
|
||||||
|
def _get(url: str) -> tuple[int, bytes]:
|
||||||
|
"""GET the URL; return (status, body). A 4xx/5xx is returned, not raised (we assert on the code).
|
||||||
|
TLS verification is relaxed: the served wildcard cert is validated separately by the infra check;
|
||||||
|
here we care only about the app's response."""
|
||||||
|
ctx = ssl.create_default_context()
|
||||||
|
ctx.check_hostname = False
|
||||||
|
ctx.verify_mode = ssl.CERT_NONE
|
||||||
|
try:
|
||||||
|
with urllib.request.urlopen(url, timeout=20, context=ctx) as resp:
|
||||||
|
return resp.status, resp.read()
|
||||||
|
except urllib.error.HTTPError as e:
|
||||||
|
return e.code, e.read()
|
||||||
|
|
||||||
|
|
||||||
|
def test_static_file_roundtrip_and_404(live_app):
|
||||||
|
"""Write a random file into the served volume → fetch it → bytes match; and a missing path 404s."""
|
||||||
|
served = _served_dir(live_app)
|
||||||
|
token = uuid.uuid4().hex
|
||||||
|
name = f"ccci-probe-{token}.txt"
|
||||||
|
body = f"cc-ci-functional-{token}\n".encode()
|
||||||
|
path = os.path.join(served, name)
|
||||||
|
with open(path, "wb") as fh:
|
||||||
|
fh.write(body)
|
||||||
|
try:
|
||||||
|
status, got = _get(f"https://{live_app}/{name}")
|
||||||
|
assert status == 200, f"served probe file returned {status} (expected 200)"
|
||||||
|
assert got == body, (
|
||||||
|
f"content round-trip mismatch: served {got!r}, wrote {body!r} "
|
||||||
|
"(static-web-server not serving the content volume?)"
|
||||||
|
)
|
||||||
|
|
||||||
|
# A random non-existent path must 404 — proves real static-file semantics, distinguishing a
|
||||||
|
# working server from a 200-everything stub or a mis-routed Traefik fallback.
|
||||||
|
miss_status, _ = _get(f"https://{live_app}/ccci-missing-{uuid.uuid4().hex}.txt")
|
||||||
|
assert miss_status == 404, (
|
||||||
|
f"missing path returned {miss_status} (expected 404 — generic 200-returner / mis-route?)"
|
||||||
|
)
|
||||||
|
finally:
|
||||||
|
with contextlib.suppress(OSError):
|
||||||
|
os.remove(path)
|
||||||
@ -3,3 +3,14 @@
|
|||||||
# (DG5) is detected quickly instead of waiting the default 300s HTTP timeout.
|
# (DG5) is detected quickly instead of waiting the default 300s HTTP timeout.
|
||||||
DEPLOY_TIMEOUT = 120
|
DEPLOY_TIMEOUT = 120
|
||||||
HTTP_TIMEOUT = 90
|
HTTP_TIMEOUT = 90
|
||||||
|
|
||||||
|
# Rungs this recipe INTENTIONALLY skips, each with a reason. Any essential rung skipped (N/A) and NOT
|
||||||
|
# listed here is reported as an *unintentional* skip (a coverage gap to fill or declare). A skip still
|
||||||
|
# caps the level either way — the harness never claims a rung it did not verify; this only records
|
||||||
|
# that the skip is deliberate. (The level ladder is the four essential rungs install/upgrade/
|
||||||
|
# backup_restore/functional; integration + recipe-local are optional and not leveled.)
|
||||||
|
# custom-html-tiny is a stateless static-web-server, so it has no backup surface:
|
||||||
|
EXPECTED_NA = {
|
||||||
|
"backup_restore": "stateless static file server: serves an ephemeral content volume seeded at "
|
||||||
|
"deploy, with no persistent/user data to back up or restore (no backupbot.backup label)",
|
||||||
|
}
|
||||||
|
|||||||
136
tests/regression/README.md
Normal file
136
tests/regression/README.md
Normal file
@ -0,0 +1,136 @@
|
|||||||
|
# Regression canaries — E2E self-tests for the cc-ci server
|
||||||
|
|
||||||
|
A standing pytest suite that drives the **real** cc-ci lifecycle harness against pinned canary
|
||||||
|
recipes and verifies both halves of the server's job:
|
||||||
|
|
||||||
|
1. **Good canaries** — healthy apps are reported GREEN (install + upgrade + backup/restore pass).
|
||||||
|
2. **Bad canary** — broken apps are caught RED; a false-green makes the regression test itself fail.
|
||||||
|
|
||||||
|
These tests run the full cold lifecycle on the live cc-ci server. They are **slow** (minutes per
|
||||||
|
canary) and **opt-in** — kept out of the per-commit fast path by the `canary` marker.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## How to run
|
||||||
|
|
||||||
|
Run on the cc-ci server (abra + Docker + Swarm required):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
ssh cc-ci
|
||||||
|
cd /root/cc-ci # or wherever the repo is checked out
|
||||||
|
cc-ci-run python -m pytest tests/regression/ -m canary -v
|
||||||
|
```
|
||||||
|
|
||||||
|
Or a single canary:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cc-ci-run python -m pytest tests/regression/ -m canary -k good-simple -v
|
||||||
|
```
|
||||||
|
|
||||||
|
From the orchestrator:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
ssh cc-ci "cd /root/cc-ci && cc-ci-run python -m pytest tests/regression/ -m canary -v"
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Canaries
|
||||||
|
|
||||||
|
| ID | Recipe | Purpose | Expected verdict |
|
||||||
|
|----|--------|---------|-----------------|
|
||||||
|
| `good-simple` | `custom-html-tiny` | Minimal static server — fast signal | GREEN |
|
||||||
|
| `good-significant` | `lasuite-docs` | Multi-service (backend + Postgres + Collabora + OIDC) | GREEN |
|
||||||
|
| `bad-false-green` | `custom-html` @ `v5-stale-docroot` | App is UP but serves wrong Content-Type — catches false-green | RED |
|
||||||
|
|
||||||
|
### Why the bad canary exists
|
||||||
|
|
||||||
|
The scariest regression is a **false-green**: the server reports PASS while the app is broken.
|
||||||
|
We already saw a fabricated full-PASS during the build. The `bad-false-green` canary pins a known-
|
||||||
|
broken fixture (`v5-stale-docroot`: nginx serves `.txt` as `application/octet-stream`). The
|
||||||
|
harness's `test_content_type_html_and_txt` catches this and returns RED (build #75 was RED for
|
||||||
|
exactly this fixture).
|
||||||
|
|
||||||
|
The regression test asserts `rc != 0`. If the harness ever wrongly returns green for this fixture,
|
||||||
|
that assert fires — false-green is caught before any merge.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## What each canary verifies
|
||||||
|
|
||||||
|
### Per-tier semantic assertions (the "teeth")
|
||||||
|
|
||||||
|
The tests assert MORE than the harness exit code: they check that **specific named assertions**
|
||||||
|
ran and got the expected result. This guards against a different failure mode — a tier that
|
||||||
|
nominally "passes" because the assertion was silently removed or made vacuous.
|
||||||
|
|
||||||
|
| Stage | Test name | What it proves |
|
||||||
|
|-------|-----------|---------------|
|
||||||
|
| install | `test_serving` | Generic HTTP readiness check actually ran |
|
||||||
|
| install | `test_serving_and_frontend` | Lasuite-docs frontend (SPA shell) actually loaded |
|
||||||
|
| custom | `test_content_type` | Content-type assertion actually ran (bad canary only) |
|
||||||
|
|
||||||
|
If a tier assertion is removed: the named test disappears from `results.json` → the semantic
|
||||||
|
check fires → the regression suite catches the removal.
|
||||||
|
|
||||||
|
### Additional structural assertions (good canaries)
|
||||||
|
|
||||||
|
- `install` tier: "pass" (not fail, not skip)
|
||||||
|
- No tier is "fail" (skips acceptable for recipes without backup/custom tests)
|
||||||
|
- `flags.clean_teardown = True` (no leftover containers/volumes/secrets)
|
||||||
|
- `flags.no_secret_leak = True` (no secret value in the results artifact)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Cadence policy
|
||||||
|
|
||||||
|
**Do NOT run on every commit or PR.** These are slow and resource-heavy. Run them:
|
||||||
|
|
||||||
|
- Before a **release** of the cc-ci server (after a batch of server changes).
|
||||||
|
- As a **polishing pass** or pre-merge check for significant server refactors.
|
||||||
|
- On-demand when you suspect a regression: `pytest -m canary`.
|
||||||
|
|
||||||
|
They are NOT wired to the per-commit Drone pipeline. If adding a `!testme`-style trigger for the
|
||||||
|
cc-ci repo, gate it behind a deliberate label (e.g. `run-canaries`) — not an automatic run on
|
||||||
|
every push.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## How to add a canary
|
||||||
|
|
||||||
|
1. Identify a recipe that is already deployable and has pinned version tags.
|
||||||
|
2. Decide the expected verdict (GREEN or RED) and which tier assertions have teeth.
|
||||||
|
3. Add an entry to `CANARIES` in `test_canaries.py`:
|
||||||
|
|
||||||
|
```python
|
||||||
|
{
|
||||||
|
"id": "good-myrecipe",
|
||||||
|
"recipe": "my-recipe",
|
||||||
|
"src": "recipe-maintainers/my-recipe",
|
||||||
|
"ref": "<pinned-sha>", # pin to a specific commit for stability
|
||||||
|
"expected_green": True,
|
||||||
|
"stage_pass_checks": [
|
||||||
|
("install", "test_serving"), # verify this named test ran and passed
|
||||||
|
],
|
||||||
|
"stage_fail_checks": [],
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
4. Run the canary once to confirm it passes:
|
||||||
|
`cc-ci-run python -m pytest tests/regression/ -m canary -k good-myrecipe -v`
|
||||||
|
|
||||||
|
5. Update the pin comment with the date and the recipe version it was pinned at.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Pin maintenance
|
||||||
|
|
||||||
|
Canary refs are pinned to specific SHAs for stability. When a recipe publishes a new release:
|
||||||
|
|
||||||
|
1. Update the `"ref"` SHA in the canary definition (use the new main-branch HEAD).
|
||||||
|
2. Update the pin comment with the new date/version.
|
||||||
|
3. Re-run the canary to confirm GREEN before committing the pin update.
|
||||||
|
|
||||||
|
The bad canary (`v5-stale-docroot`) is a stable fixture branch — update only if the branch is
|
||||||
|
deleted. If deleted, recreate the pattern: an app that is up + passes lifecycle tiers but fails
|
||||||
|
one functional assertion.
|
||||||
106
tests/regression/conftest.py
Normal file
106
tests/regression/conftest.py
Normal file
@ -0,0 +1,106 @@
|
|||||||
|
"""Shared fixtures and helpers for E2E canary regression tests.
|
||||||
|
|
||||||
|
The regression tests call the real cc-ci harness (run_recipe_ci.py) as a subprocess and assert on
|
||||||
|
its outputs (exit code, results.json). They run ON the cc-ci server, not the orchestrator — abra,
|
||||||
|
Docker, and Swarm must be present.
|
||||||
|
|
||||||
|
Invoke: cc-ci-run python -m pytest tests/regression/ -m canary -v
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
|
||||||
|
ROOT = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
|
||||||
|
def pytest_configure(config):
|
||||||
|
config.addinivalue_line(
|
||||||
|
"markers",
|
||||||
|
"canary: slow E2E canary test — drives the full cold CI lifecycle; run on-demand only.",
|
||||||
|
)
|
||||||
|
config.addinivalue_line(
|
||||||
|
"markers",
|
||||||
|
"canary_fast: fast per-tier RED canary (still tagged canary); subset for quick pre-merge checks.",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def run_recipe_ci(
|
||||||
|
recipe: str,
|
||||||
|
src: str,
|
||||||
|
ref: str,
|
||||||
|
pr: str = "0",
|
||||||
|
stages: str = "install,upgrade,backup,restore,custom",
|
||||||
|
runs_dir: str | None = None,
|
||||||
|
run_id_prefix: str = "regression",
|
||||||
|
timeout: int = 3600,
|
||||||
|
) -> tuple[int, dict | None, str]:
|
||||||
|
"""Invoke run_recipe_ci.py with the given canary params.
|
||||||
|
|
||||||
|
Returns (rc, results_dict_or_None, run_artifact_dir).
|
||||||
|
Stdout/stderr stream live so a human can follow progress.
|
||||||
|
"""
|
||||||
|
ts = int(time.time())
|
||||||
|
run_id = f"{run_id_prefix}-{recipe}-{ref[:12]}-{ts}"
|
||||||
|
if runs_dir is None:
|
||||||
|
runs_dir = "/var/lib/cc-ci-runs"
|
||||||
|
|
||||||
|
env = dict(os.environ)
|
||||||
|
env.update(
|
||||||
|
{
|
||||||
|
"RECIPE": recipe,
|
||||||
|
"REF": ref,
|
||||||
|
"SRC": src,
|
||||||
|
"PR": pr,
|
||||||
|
"STAGES": stages,
|
||||||
|
"CCCI_RUN_ID": run_id,
|
||||||
|
"CCCI_RUNS_DIR": runs_dir,
|
||||||
|
"HOME": "/root",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
# Keep PLAYWRIGHT env from the outer cc-ci-run wrapper (already in os.environ if running under it)
|
||||||
|
|
||||||
|
script = os.path.join(ROOT, "runner", "run_recipe_ci.py")
|
||||||
|
result = subprocess.run(
|
||||||
|
[sys.executable, script],
|
||||||
|
env=env,
|
||||||
|
timeout=timeout,
|
||||||
|
)
|
||||||
|
rc = result.returncode
|
||||||
|
|
||||||
|
artifact_dir = os.path.join(runs_dir, run_id)
|
||||||
|
results_path = os.path.join(artifact_dir, "results.json")
|
||||||
|
results_data: dict | None = None
|
||||||
|
if os.path.exists(results_path):
|
||||||
|
with open(results_path) as f:
|
||||||
|
results_data = json.load(f)
|
||||||
|
|
||||||
|
return rc, results_data, artifact_dir
|
||||||
|
|
||||||
|
|
||||||
|
def find_stage_tests(results: dict, stage_name: str) -> list[dict]:
|
||||||
|
"""Return the per-test list for a named stage from results.json, or []."""
|
||||||
|
for stage in results.get("stages", []):
|
||||||
|
if stage.get("name") == stage_name:
|
||||||
|
return stage.get("tests", [])
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
def stage_has_passing_test(results: dict, stage_name: str, test_name_substr: str) -> bool:
|
||||||
|
"""True if the named stage contains a passing test whose name includes test_name_substr."""
|
||||||
|
for t in find_stage_tests(results, stage_name):
|
||||||
|
if test_name_substr in t.get("name", "") and t.get("status") == "pass":
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def stage_has_failing_test(results: dict, stage_name: str, test_name_substr: str) -> bool:
|
||||||
|
"""True if the named stage contains a failing test whose name includes test_name_substr."""
|
||||||
|
for t in find_stage_tests(results, stage_name):
|
||||||
|
if test_name_substr in t.get("name", "") and t.get("status") in ("fail", "error"):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
344
tests/regression/test_canaries.py
Normal file
344
tests/regression/test_canaries.py
Normal file
@ -0,0 +1,344 @@
|
|||||||
|
"""E2E canary regression tests — the server's standing self-test suite.
|
||||||
|
|
||||||
|
Seven canaries prove both halves of the server's job:
|
||||||
|
1. GREEN canaries — good apps are reported healthy (install+upgrade+backup/restore pass).
|
||||||
|
2. RED canaries — broken apps are caught at the intended tier; a false-green makes THIS test fail.
|
||||||
|
|
||||||
|
Fast subset (@pytest.mark.canary_fast): the four per-tier RED canaries on custom-html-tiny — fast
|
||||||
|
because the recipe deploys in seconds. Run with `-m canary_fast` as a pre-merge quick check.
|
||||||
|
Full suite (-m canary): includes good-significant (lasuite-docs, 10-20 min).
|
||||||
|
|
||||||
|
Run: cc-ci-run python -m pytest tests/regression/ -m canary -v
|
||||||
|
Pin policy: canary refs are pinned to specific SHAs. Update only after confirming the new ref gives
|
||||||
|
the expected verdict.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.dirname(__file__))
|
||||||
|
import conftest as _reg # noqa: E402
|
||||||
|
|
||||||
|
run_recipe_ci = _reg.run_recipe_ci
|
||||||
|
stage_has_passing_test = _reg.stage_has_passing_test
|
||||||
|
stage_has_failing_test = _reg.stage_has_failing_test
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Canary definitions
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
# Good canary 1: minimal static-file server — fast signal, few deps.
|
||||||
|
_SIMPLE = {
|
||||||
|
"id": "good-simple",
|
||||||
|
"recipe": "custom-html-tiny",
|
||||||
|
"src": "recipe-maintainers/custom-html-tiny",
|
||||||
|
# Pin: main @ 2026-06-02 — update if the recipe publishes a new release and pin goes stale.
|
||||||
|
"ref": "435df8fc98ef7598084fcffcd6225470eca80053",
|
||||||
|
"expected_green": True,
|
||||||
|
# Named tests that MUST appear with "pass" in the result — these are the semantic teeth.
|
||||||
|
# If the generic install assertion is removed/vacated, test_serving disappears → this fails.
|
||||||
|
"stage_pass_checks": [
|
||||||
|
("install", "test_serving"),
|
||||||
|
],
|
||||||
|
"stage_fail_checks": [],
|
||||||
|
}
|
||||||
|
|
||||||
|
# Good canary 2: multi-service stack — backend + Postgres + Collabora WOPI + OIDC.
|
||||||
|
# Exercises real breadth. Slowest canary (~10-20 min full lifecycle).
|
||||||
|
_SIGNIFICANT = {
|
||||||
|
"id": "good-significant",
|
||||||
|
"recipe": "lasuite-docs",
|
||||||
|
"src": "recipe-maintainers/lasuite-docs",
|
||||||
|
# Pin: main @ 2026-06-02
|
||||||
|
"ref": "290a8ad72d06232f0b3f302d976af14bef0f3c53",
|
||||||
|
"expected_green": True,
|
||||||
|
"stage_pass_checks": [
|
||||||
|
("install", "test_serving_and_frontend"),
|
||||||
|
],
|
||||||
|
"stage_fail_checks": [],
|
||||||
|
}
|
||||||
|
|
||||||
|
# Bad canary: app is UP + passes all lifecycle tiers but the custom functional assertion detects a
|
||||||
|
# semantic defect (wrong Content-Type for .txt files). The harness MUST report RED.
|
||||||
|
# If the harness wrongly returns green for this fixture, assert rc != 0 fails → false-green caught.
|
||||||
|
_BAD = {
|
||||||
|
"id": "bad-false-green",
|
||||||
|
"recipe": "custom-html",
|
||||||
|
"src": "recipe-maintainers/custom-html",
|
||||||
|
# Pin: v5-stale-docroot @ 71e7326 — serves .txt as application/octet-stream; build #75 was RED.
|
||||||
|
# Recreate pattern if branch disappears: app up + passes lifecycle, fails one content assertion.
|
||||||
|
"ref": "71e7326a99bbb69035a046fba8fa51859ca66115",
|
||||||
|
"expected_green": False,
|
||||||
|
# The specific test that must have FAILED, proving the content-type assertion has teeth.
|
||||||
|
# If the assertion is vacated and the test disappears, stage_has_failing_test() returns False
|
||||||
|
# → the assert below fails → we detect that the guard was removed.
|
||||||
|
"stage_pass_checks": [],
|
||||||
|
"stage_fail_checks": [
|
||||||
|
("custom", "test_content_type"),
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Per-tier RED canaries (fast subset: @pytest.mark.canary_fast)
|
||||||
|
# Prove the server catches failure at EVERY lifecycle tier — false-green at any tier is caught.
|
||||||
|
# Each uses custom-html-tiny (deploys in seconds) or custom-html (fast nginx, has backup support).
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
# Shared bad-image branch: deploy fails at prepull because the image doesn't exist on Docker Hub.
|
||||||
|
# Used for install-RED (STAGES=install → chaos of HEAD with bad image → install=fail)
|
||||||
|
# and upgrade-RED (STAGES=install,upgrade → prev-version install passes, upgrade chaos fails).
|
||||||
|
_BAD_IMAGE_REF = "4ae8866100563204d40435c5aba00374aa5a8ed3" # regression-bad-image @ 2026-06-02
|
||||||
|
|
||||||
|
_BAD_INSTALL = {
|
||||||
|
"id": "bad-install",
|
||||||
|
"recipe": "custom-html-tiny",
|
||||||
|
"src": "recipe-maintainers/custom-html-tiny",
|
||||||
|
"ref": _BAD_IMAGE_REF,
|
||||||
|
"expected_green": False,
|
||||||
|
# STAGES=install only → no upgrade tier → prev=None → chaos deploy of HEAD (bad image) → fails.
|
||||||
|
"stages": "install",
|
||||||
|
# Assertions: install must be the failing tier.
|
||||||
|
"failing_tier": "install",
|
||||||
|
"passing_tiers_before": [],
|
||||||
|
"stage_pass_checks": [],
|
||||||
|
"stage_fail_checks": [],
|
||||||
|
}
|
||||||
|
|
||||||
|
_BAD_UPGRADE = {
|
||||||
|
"id": "bad-upgrade",
|
||||||
|
"recipe": "custom-html-tiny",
|
||||||
|
"src": "recipe-maintainers/custom-html-tiny",
|
||||||
|
"ref": _BAD_IMAGE_REF,
|
||||||
|
"expected_green": False,
|
||||||
|
# Default stages → prev-version deploy (good image) → install=PASS; upgrade chaos (bad image) → FAIL.
|
||||||
|
"stages": "install,upgrade,custom",
|
||||||
|
"failing_tier": "upgrade",
|
||||||
|
"passing_tiers_before": ["install"],
|
||||||
|
"stage_pass_checks": [],
|
||||||
|
"stage_fail_checks": [],
|
||||||
|
}
|
||||||
|
|
||||||
|
_BAD_BACKUP = {
|
||||||
|
"id": "bad-backup",
|
||||||
|
"recipe": "custom-html-bkp-bad",
|
||||||
|
"src": "recipe-maintainers/custom-html-bkp-bad",
|
||||||
|
# Pin: custom-html-bkp-bad main @ 2026-06-02 — custom-html WITHOUT backupbot labels.
|
||||||
|
# cc-ci recipe_meta sets BACKUP_CAPABLE=True → harness runs backup tier.
|
||||||
|
# No backupbot.backup=true label → backup-bot-two finds no containers → no snapshot.
|
||||||
|
# parse_snapshot_id returns None → test_backup_artifact fails → backup tier RED.
|
||||||
|
"ref": "b6fe99de41601f9e51bc7ea5b6072f0c3f56cdc3",
|
||||||
|
"expected_green": False,
|
||||||
|
"stages": "install,upgrade,backup",
|
||||||
|
"failing_tier": "backup",
|
||||||
|
"passing_tiers_before": ["install"],
|
||||||
|
"stage_pass_checks": [],
|
||||||
|
"stage_fail_checks": [],
|
||||||
|
}
|
||||||
|
|
||||||
|
_BAD_RESTORE = {
|
||||||
|
"id": "bad-restore",
|
||||||
|
"recipe": "custom-html-rst-bad",
|
||||||
|
"src": "recipe-maintainers/custom-html-rst-bad",
|
||||||
|
# Pin: custom-html-rst-bad main @ 2026-06-02 (9a73a184).
|
||||||
|
# No pre_backup hook → backup snapshot has no ci-marker.txt.
|
||||||
|
# pre_restore writes "mutated". After restore: marker stays "mutated" → FAIL → restore=RED.
|
||||||
|
# install+backup PASS (no test_backup.py in cc-ci dir); upgrade=skip (no version tags).
|
||||||
|
"ref": "9a73a184e739691bc6a621a5f1e6efc799743c5b",
|
||||||
|
"expected_green": False,
|
||||||
|
"stages": "install,backup,restore,custom",
|
||||||
|
"failing_tier": "restore",
|
||||||
|
"passing_tiers_before": ["install", "backup"],
|
||||||
|
"stage_pass_checks": [],
|
||||||
|
"stage_fail_checks": [
|
||||||
|
("restore", "test_restore_returns_state"),
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
CANARIES = [_SIMPLE, _SIGNIFICANT, _BAD]
|
||||||
|
CANARIES_FAST = [_BAD_INSTALL, _BAD_UPGRADE, _BAD_BACKUP, _BAD_RESTORE]
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Tests
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.canary
|
||||||
|
@pytest.mark.parametrize("canary", CANARIES, ids=[c["id"] for c in CANARIES])
|
||||||
|
def test_canary(canary, tmp_path):
|
||||||
|
"""Drive the full cold CI lifecycle for a canary recipe and verify the outcome.
|
||||||
|
|
||||||
|
For GREEN canaries: proves the harness correctly reports a healthy app as healthy, and that
|
||||||
|
the per-tier semantic assertions actually ran (not vacuous).
|
||||||
|
|
||||||
|
For the RED canary: proves the harness catches a broken app — if the harness wrongly returned
|
||||||
|
green, `assert rc != 0` fails, catching the false-green.
|
||||||
|
"""
|
||||||
|
stages = canary.get("stages", "install,upgrade,backup,restore,custom")
|
||||||
|
rc, results, artifact_dir = run_recipe_ci(
|
||||||
|
recipe=canary["recipe"],
|
||||||
|
src=canary["src"],
|
||||||
|
ref=canary["ref"],
|
||||||
|
runs_dir=str(tmp_path),
|
||||||
|
stages=stages,
|
||||||
|
)
|
||||||
|
|
||||||
|
_note = f"artifact_dir={artifact_dir}" # visible in -v output via assert messages
|
||||||
|
|
||||||
|
if canary["expected_green"]:
|
||||||
|
_assert_green(rc, results, canary, _note)
|
||||||
|
else:
|
||||||
|
_assert_red(rc, results, canary, _note)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.canary
|
||||||
|
@pytest.mark.canary_fast
|
||||||
|
@pytest.mark.parametrize("canary", CANARIES_FAST, ids=[c["id"] for c in CANARIES_FAST])
|
||||||
|
def test_canary_fast(canary, tmp_path):
|
||||||
|
"""Fast per-tier RED canaries: each proves the server catches failure at a specific lifecycle tier.
|
||||||
|
|
||||||
|
Each canary is broken at exactly one tier; the test asserts:
|
||||||
|
- Overall verdict: RED (rc != 0)
|
||||||
|
- The intended failing tier has status "fail"
|
||||||
|
- Tiers BEFORE the intended failure have status "pass" (proving tier-specific detection, not
|
||||||
|
"fails somewhere")
|
||||||
|
|
||||||
|
These use fast recipes (custom-html-tiny deploys in seconds, custom-html is similarly fast)
|
||||||
|
and are intended as a pre-merge quick check alongside the full slow suite.
|
||||||
|
"""
|
||||||
|
stages = canary.get("stages", "install,upgrade,backup,restore,custom")
|
||||||
|
rc, results, artifact_dir = run_recipe_ci(
|
||||||
|
recipe=canary["recipe"],
|
||||||
|
src=canary["src"],
|
||||||
|
ref=canary["ref"],
|
||||||
|
runs_dir=str(tmp_path),
|
||||||
|
stages=stages,
|
||||||
|
)
|
||||||
|
|
||||||
|
_note = f"artifact_dir={artifact_dir}"
|
||||||
|
_assert_red_at_tier(rc, results, canary, _note)
|
||||||
|
|
||||||
|
|
||||||
|
def _assert_green(rc: int, results: dict | None, canary: dict, note: str) -> None:
|
||||||
|
"""Assert a good-canary run is GREEN with real semantic assertions."""
|
||||||
|
|
||||||
|
# 1. Harness exit code must be 0 (GREEN).
|
||||||
|
assert rc == 0, f"[{canary['id']}] harness returned non-zero rc={rc} — expected GREEN. {note}"
|
||||||
|
|
||||||
|
assert (
|
||||||
|
results is not None
|
||||||
|
), f"[{canary['id']}] results.json not written — harness may have crashed. {note}"
|
||||||
|
|
||||||
|
# 2. Install tier must have passed.
|
||||||
|
assert results.get("results", {}).get("install") == "pass", (
|
||||||
|
f"[{canary['id']}] install tier did not pass: " f"results={results.get('results')}. {note}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# 3. No tier may have FAILED (skips are acceptable for recipes without backup or custom tests).
|
||||||
|
failed_tiers = [t for t, s in results.get("results", {}).items() if s == "fail"]
|
||||||
|
assert not failed_tiers, f"[{canary['id']}] tiers failed: {failed_tiers}. {note}"
|
||||||
|
|
||||||
|
# 4. Teardown must be clean (no leftover containers/volumes/secrets).
|
||||||
|
assert (
|
||||||
|
results.get("flags", {}).get("clean_teardown") is True
|
||||||
|
), f"[{canary['id']}] clean_teardown=False — residual state left on server. {note}"
|
||||||
|
|
||||||
|
# 5. No secret values leaked into the results artifact.
|
||||||
|
assert (
|
||||||
|
results.get("flags", {}).get("no_secret_leak") is True
|
||||||
|
), f"[{canary['id']}] no_secret_leak=False — a secret value appeared in results.json. {note}"
|
||||||
|
|
||||||
|
# 6. Semantic stage assertions — TEETH CHECK.
|
||||||
|
# These verify that specific named tests actually ran and passed in the expected stage.
|
||||||
|
# If a tier assertion is removed or made vacuous, the named test disappears from results.json
|
||||||
|
# and this assert fires — proving the regression suite guards against silent test removal.
|
||||||
|
for stage_name, test_name_substr in canary.get("stage_pass_checks", []):
|
||||||
|
assert stage_has_passing_test(results, stage_name, test_name_substr), (
|
||||||
|
f"[{canary['id']}] expected a passing test containing {test_name_substr!r} in "
|
||||||
|
f"stage={stage_name!r}, but none found. "
|
||||||
|
f"Stage tests: {[t['name'] for t in _stage_tests(results, stage_name)]}. {note}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _assert_red(rc: int, results: dict | None, canary: dict, note: str) -> None:
|
||||||
|
"""Assert a bad-canary run is RED (false-green guard).
|
||||||
|
|
||||||
|
The PRIMARY assertion is rc != 0. If the harness wrongly returns 0 (green) for this fixture,
|
||||||
|
this assert fails → the regression suite catches the false-green. This is the core guard.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# PRIMARY: harness must return non-zero (RED).
|
||||||
|
# If the harness returns 0 for a broken app, the regression suite fails here — false-green caught.
|
||||||
|
assert rc != 0, (
|
||||||
|
f"[{canary['id']}] harness returned rc=0 (GREEN) for a KNOWN-BAD fixture — "
|
||||||
|
f"FALSE-GREEN detected. The harness failed to catch the broken app. {note}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# SECONDARY: verify the specific failing test is present in results.json.
|
||||||
|
# If the content-type assertion is removed/vacuated, stage_has_failing_test() returns False here
|
||||||
|
# → this assert fires → we detect that the guard itself was removed (a meta-failure).
|
||||||
|
if results is not None:
|
||||||
|
for stage_name, test_name_substr in canary.get("stage_fail_checks", []):
|
||||||
|
assert stage_has_failing_test(results, stage_name, test_name_substr), (
|
||||||
|
f"[{canary['id']}] expected a failing test containing {test_name_substr!r} in "
|
||||||
|
f"stage={stage_name!r}, but none found. "
|
||||||
|
f"The guard may have been removed or vacuated. "
|
||||||
|
f"Stage tests: {[t['name'] for t in _stage_tests(results, stage_name)]}. {note}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _assert_red_at_tier(rc: int, results: dict | None, canary: dict, note: str) -> None:
|
||||||
|
"""Assert a per-tier RED canary: overall RED, failing_tier=fail, passing_tiers_before=pass.
|
||||||
|
|
||||||
|
Proves the server catches failure AT THE INTENDED TIER (not just "fails somewhere"), and that
|
||||||
|
the tiers before it still PASSED (no collateral damage from the fixture).
|
||||||
|
If the harness returns 0 for any of these fixtures, false-green is detected at the primary assert.
|
||||||
|
"""
|
||||||
|
failing_tier = canary.get("failing_tier")
|
||||||
|
passing_before = canary.get("passing_tiers_before", [])
|
||||||
|
|
||||||
|
# PRIMARY: harness must return non-zero.
|
||||||
|
assert rc != 0, (
|
||||||
|
f"[{canary['id']}] harness returned rc=0 (GREEN) for a KNOWN-BAD fixture at tier "
|
||||||
|
f"{failing_tier!r} — FALSE-GREEN. {note}"
|
||||||
|
)
|
||||||
|
|
||||||
|
if results is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
tier_results = results.get("results", {})
|
||||||
|
|
||||||
|
# The intended failing tier must be "fail".
|
||||||
|
if failing_tier:
|
||||||
|
actual = tier_results.get(failing_tier)
|
||||||
|
assert actual == "fail", (
|
||||||
|
f"[{canary['id']}] expected tier {failing_tier!r}='fail', got {actual!r}. "
|
||||||
|
f"All tier results: {tier_results}. {note}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Tiers before the failing tier must have passed (no collateral damage from the fixture).
|
||||||
|
for tier in passing_before:
|
||||||
|
actual = tier_results.get(tier)
|
||||||
|
assert actual == "pass", (
|
||||||
|
f"[{canary['id']}] expected prior tier {tier!r}='pass' before failing at "
|
||||||
|
f"{failing_tier!r}, got {actual!r}. All results: {tier_results}. {note}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Optional: specific failing test name (for the restore-RED canary).
|
||||||
|
for stage_name, test_name_substr in canary.get("stage_fail_checks", []):
|
||||||
|
assert stage_has_failing_test(results, stage_name, test_name_substr), (
|
||||||
|
f"[{canary['id']}] expected a failing test containing {test_name_substr!r} in "
|
||||||
|
f"stage={stage_name!r}. "
|
||||||
|
f"Stage tests: {[t['name'] for t in _stage_tests(results, stage_name)]}. {note}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _stage_tests(results: dict, stage_name: str) -> list[dict]:
|
||||||
|
for stage in results.get("stages", []):
|
||||||
|
if stage.get("name") == stage_name:
|
||||||
|
return stage.get("tests", [])
|
||||||
|
return []
|
||||||
@ -14,7 +14,7 @@ sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner")
|
|||||||
from harness import card as C # noqa: E402
|
from harness import card as C # noqa: E402
|
||||||
|
|
||||||
|
|
||||||
def _data(level=4, cap="L5 integration (SSO/OIDC + cross-app) N/A"):
|
def _data(level=3, cap="L4 functional (recipe-specific tests) N/A"):
|
||||||
return {
|
return {
|
||||||
"recipe": "uptime-kuma",
|
"recipe": "uptime-kuma",
|
||||||
"version": "1.23.0",
|
"version": "1.23.0",
|
||||||
@ -51,6 +51,35 @@ def test_badge_svg_wellformed():
|
|||||||
assert svg.startswith("<svg") and svg.endswith("</svg>")
|
assert svg.startswith("<svg") and svg.endswith("</svg>")
|
||||||
assert "level 4" in svg
|
assert "level 4" in svg
|
||||||
assert C.level_color(4) in svg
|
assert C.level_color(4) in svg
|
||||||
|
# plain cap (no intent) → two-box badge, no third segment
|
||||||
|
assert "expected" not in svg and "gap?" not in svg
|
||||||
|
|
||||||
|
|
||||||
|
def test_badge_svg_differentiates_intentional_vs_unintentional_skip():
|
||||||
|
# an intentional (declared) skip capped the climb → muted "expected" third segment
|
||||||
|
exp = C.level_badge_svg(2, "L3 backup/restore N/A", "intentional")
|
||||||
|
assert "level 2" in exp and "expected" in exp and C.EXPECT_COLOR in exp
|
||||||
|
assert "gap?" not in exp
|
||||||
|
# an unintentional skip (not declared) → amber "gap?" third segment
|
||||||
|
gap = C.level_badge_svg(2, "L3 backup/restore N/A", "unintentional")
|
||||||
|
assert "level 2" in gap and "gap?" in gap and C.GAP_COLOR in gap
|
||||||
|
assert "expected" not in gap
|
||||||
|
|
||||||
|
|
||||||
|
def test_skip_rows_intentional_and_unintentional():
|
||||||
|
html_out = C._skip_rows(
|
||||||
|
{"intentional": {"backup_restore": "no persistent data"}, "unintentional": ["functional"]}
|
||||||
|
)
|
||||||
|
# intentional skip: labelled row (muted green) + the reason on its own line
|
||||||
|
assert "intentional skip" in html_out and C.SKIP_GREEN in html_out
|
||||||
|
assert "backup/restore" in html_out and "no persistent data" in html_out
|
||||||
|
# unintentional skip: amber row + prompt to declare/add coverage
|
||||||
|
assert "unintentional skip" in html_out and C.GAP_COLOR in html_out
|
||||||
|
assert "functional" in html_out and "EXPECTED_NA" in html_out
|
||||||
|
|
||||||
|
|
||||||
|
def test_skip_rows_empty_when_no_skips():
|
||||||
|
assert C._skip_rows({"intentional": {}, "unintentional": []}) == ""
|
||||||
|
|
||||||
|
|
||||||
def test_card_html_reports_level_verbatim():
|
def test_card_html_reports_level_verbatim():
|
||||||
|
|||||||
@ -24,7 +24,7 @@ import dashboard # noqa: E402
|
|||||||
def _row(**kw):
|
def _row(**kw):
|
||||||
base = {
|
base = {
|
||||||
"recipe": "custom-html", "status": "success", "number": 4, "ref": "db9a9502",
|
"recipe": "custom-html", "status": "success", "number": 4, "ref": "db9a9502",
|
||||||
"version": "db9a95024e9d", "level": 4, "level_cap_reason": "L5 integration N/A",
|
"version": "db9a95024e9d", "level": 4, "level_cap_reason": "",
|
||||||
"has_screenshot": True, "flags": {"clean_teardown": True, "no_secret_leak": True},
|
"has_screenshot": True, "flags": {"clean_teardown": True, "no_secret_leak": True},
|
||||||
"finished": 0, "url": "https://drone.x/cc-ci/4",
|
"finished": 0, "url": "https://drone.x/cc-ci/4",
|
||||||
}
|
}
|
||||||
|
|||||||
@ -19,33 +19,23 @@ def _rungs(
|
|||||||
upgrade="pass",
|
upgrade="pass",
|
||||||
backup_restore="pass",
|
backup_restore="pass",
|
||||||
functional="pass",
|
functional="pass",
|
||||||
integration="pass",
|
|
||||||
recipe_local="pass",
|
|
||||||
):
|
):
|
||||||
return {
|
return {
|
||||||
"install": install,
|
"install": install,
|
||||||
"upgrade": upgrade,
|
"upgrade": upgrade,
|
||||||
"backup_restore": backup_restore,
|
"backup_restore": backup_restore,
|
||||||
"functional": functional,
|
"functional": functional,
|
||||||
"integration": integration,
|
|
||||||
"recipe_local": recipe_local,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
# ---- the U0 gate: L4-pass and L2-cap ----
|
# ---- the ladder: four essential rungs, top is L4 (functional) ----
|
||||||
|
|
||||||
|
|
||||||
def test_full_clean_climb_to_L6():
|
def test_full_clean_climb_to_L4():
|
||||||
|
# All four essential rungs pass → L4 (the top; integration/recipe-local are optional, not leveled).
|
||||||
lvl, reason = L.compute_level(_rungs())
|
lvl, reason = L.compute_level(_rungs())
|
||||||
assert lvl == 6
|
|
||||||
assert reason == ""
|
|
||||||
|
|
||||||
|
|
||||||
def test_climbs_through_L4_then_no_integration_surface_caps_at_L4():
|
|
||||||
# GATE: a recipe whose functional tests pass but has no SSO/integration surface caps at L4.
|
|
||||||
lvl, reason = L.compute_level(_rungs(integration="na", recipe_local="na"))
|
|
||||||
assert lvl == 4
|
assert lvl == 4
|
||||||
assert "L5" in reason and "N/A" in reason
|
assert reason == ""
|
||||||
|
|
||||||
|
|
||||||
def test_fails_at_L2_capped_at_L1():
|
def test_fails_at_L2_capped_at_L1():
|
||||||
@ -69,34 +59,27 @@ def test_install_fail_is_L0():
|
|||||||
|
|
||||||
def test_higher_pass_does_not_rescue_lower_na():
|
def test_higher_pass_does_not_rescue_lower_na():
|
||||||
# backup/restore N/A (stateless app) caps at L2 even though functional would pass.
|
# backup/restore N/A (stateless app) caps at L2 even though functional would pass.
|
||||||
lvl, reason = L.compute_level(_rungs(backup_restore="na", functional="pass", integration="na"))
|
lvl, reason = L.compute_level(_rungs(backup_restore="na", functional="pass"))
|
||||||
assert lvl == 2
|
assert lvl == 2
|
||||||
assert "L3" in reason and "N/A" in reason
|
assert "L3" in reason and "N/A" in reason
|
||||||
|
|
||||||
|
|
||||||
def test_upgrade_na_caps_at_L1():
|
def test_upgrade_na_caps_at_L1():
|
||||||
# only one published version → no upgrade possible → N/A caps at L1.
|
# only one published version → no upgrade possible → N/A caps at L1 (upgrade is essential).
|
||||||
lvl, reason = L.compute_level(_rungs(upgrade="na"))
|
lvl, reason = L.compute_level(_rungs(upgrade="na"))
|
||||||
assert lvl == 1
|
assert lvl == 1
|
||||||
assert "L2" in reason and "N/A" in reason
|
assert "L2" in reason and "N/A" in reason
|
||||||
|
|
||||||
|
|
||||||
def test_integration_fail_caps_at_L4():
|
def test_functional_na_caps_at_L3():
|
||||||
# SSO declared but unverified (failed) → integration rung fails → cap at L4.
|
# no recipe-specific functional tests → functional N/A caps at L3.
|
||||||
lvl, reason = L.compute_level(_rungs(integration="fail", recipe_local="na"))
|
lvl, reason = L.compute_level(_rungs(functional="na"))
|
||||||
assert lvl == 4
|
assert lvl == 3
|
||||||
assert "L5" in reason and "FAILED" in reason
|
assert "L4" in reason and "N/A" in reason
|
||||||
|
|
||||||
|
|
||||||
def test_recipe_local_na_caps_at_L5():
|
|
||||||
# SSO passes but no recipe-local tests → cap at L5 (L6 N/A).
|
|
||||||
lvl, reason = L.compute_level(_rungs(recipe_local="na"))
|
|
||||||
assert lvl == 5
|
|
||||||
assert "L6" in reason and "N/A" in reason
|
|
||||||
|
|
||||||
|
|
||||||
def test_functional_fail_caps_at_L3():
|
def test_functional_fail_caps_at_L3():
|
||||||
lvl, reason = L.compute_level(_rungs(functional="fail", integration="na"))
|
lvl, reason = L.compute_level(_rungs(functional="fail"))
|
||||||
assert lvl == 3
|
assert lvl == 3
|
||||||
assert "L4" in reason and "FAILED" in reason
|
assert "L4" in reason and "FAILED" in reason
|
||||||
|
|
||||||
|
|||||||
@ -105,83 +105,31 @@ def _results(**kw):
|
|||||||
return base
|
return base
|
||||||
|
|
||||||
|
|
||||||
def test_derive_rungs_full_stateful_sso():
|
def test_derive_rungs_full_climb_four_essential():
|
||||||
rungs = R.derive_rungs(
|
rungs = R.derive_rungs(_results(), backup_capable=True, has_custom=True)
|
||||||
_results(),
|
# only the four essential rungs — integration/recipe-local are optional, not produced here.
|
||||||
backup_capable=True,
|
|
||||||
declared=["keycloak"],
|
|
||||||
deps_ready=True,
|
|
||||||
sso_unverified=False,
|
|
||||||
has_custom=True,
|
|
||||||
has_repo_local=False,
|
|
||||||
repo_local_passed=False,
|
|
||||||
)
|
|
||||||
assert rungs == {
|
assert rungs == {
|
||||||
"install": "pass",
|
"install": "pass",
|
||||||
"upgrade": "pass",
|
"upgrade": "pass",
|
||||||
"backup_restore": "pass",
|
"backup_restore": "pass",
|
||||||
"functional": "pass",
|
"functional": "pass",
|
||||||
"integration": "pass",
|
|
||||||
"recipe_local": "na",
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def test_derive_rungs_no_sso_surface_is_integration_na():
|
def test_derive_rungs_stateless_backup_and_functional_na():
|
||||||
rungs = R.derive_rungs(
|
|
||||||
_results(),
|
|
||||||
backup_capable=True,
|
|
||||||
declared=[],
|
|
||||||
deps_ready=True,
|
|
||||||
sso_unverified=False,
|
|
||||||
has_custom=True,
|
|
||||||
has_repo_local=False,
|
|
||||||
repo_local_passed=False,
|
|
||||||
)
|
|
||||||
assert rungs["integration"] == "na"
|
|
||||||
assert rungs["functional"] == "pass"
|
|
||||||
|
|
||||||
|
|
||||||
def test_derive_rungs_stateless_backup_na():
|
|
||||||
rungs = R.derive_rungs(
|
rungs = R.derive_rungs(
|
||||||
_results(backup="skip", restore="skip", custom="skip"),
|
_results(backup="skip", restore="skip", custom="skip"),
|
||||||
backup_capable=False,
|
backup_capable=False,
|
||||||
declared=[],
|
|
||||||
deps_ready=True,
|
|
||||||
sso_unverified=False,
|
|
||||||
has_custom=False,
|
has_custom=False,
|
||||||
has_repo_local=False,
|
|
||||||
repo_local_passed=False,
|
|
||||||
)
|
)
|
||||||
assert rungs["backup_restore"] == "na"
|
assert rungs["backup_restore"] == "na"
|
||||||
assert rungs["functional"] == "na"
|
assert rungs["functional"] == "na"
|
||||||
|
assert "integration" not in rungs and "recipe_local" not in rungs
|
||||||
|
|
||||||
|
|
||||||
def test_derive_rungs_sso_unverified_is_integration_fail():
|
def test_derive_rungs_functional_fail():
|
||||||
rungs = R.derive_rungs(
|
rungs = R.derive_rungs(_results(custom="fail"), backup_capable=True, has_custom=True)
|
||||||
_results(),
|
assert rungs["functional"] == "fail"
|
||||||
backup_capable=True,
|
|
||||||
declared=["keycloak"],
|
|
||||||
deps_ready=False,
|
|
||||||
sso_unverified=True,
|
|
||||||
has_custom=True,
|
|
||||||
has_repo_local=False,
|
|
||||||
repo_local_passed=False,
|
|
||||||
)
|
|
||||||
assert rungs["integration"] == "fail"
|
|
||||||
|
|
||||||
|
|
||||||
def test_derive_rungs_repo_local_pass():
|
|
||||||
rungs = R.derive_rungs(
|
|
||||||
_results(),
|
|
||||||
backup_capable=True,
|
|
||||||
declared=[],
|
|
||||||
deps_ready=True,
|
|
||||||
sso_unverified=False,
|
|
||||||
has_custom=True,
|
|
||||||
has_repo_local=True,
|
|
||||||
repo_local_passed=True,
|
|
||||||
)
|
|
||||||
assert rungs["recipe_local"] == "pass"
|
|
||||||
|
|
||||||
|
|
||||||
# ---- build_results: end-to-end incl level + flags ----
|
# ---- build_results: end-to-end incl level + flags ----
|
||||||
@ -212,16 +160,13 @@ def test_build_results_level_and_flags(tmp_path):
|
|||||||
records=recs,
|
records=recs,
|
||||||
results=_results(),
|
results=_results(),
|
||||||
backup_capable=True,
|
backup_capable=True,
|
||||||
declared=[],
|
|
||||||
deps_ready=True,
|
|
||||||
sso_unverified=False,
|
|
||||||
clean_teardown=True,
|
clean_teardown=True,
|
||||||
no_secret_leak=True,
|
no_secret_leak=True,
|
||||||
finished_ts=1234.0,
|
finished_ts=1234.0,
|
||||||
)
|
)
|
||||||
# stateful, functional pass, no SSO surface, no repo-local → caps at L4
|
# all four essential rungs pass → full climb to L4 (the top), no cap
|
||||||
assert data["level"] == 4
|
assert data["level"] == 4
|
||||||
assert "L5" in data["level_cap_reason"]
|
assert data["level_cap_reason"] == ""
|
||||||
assert data["recipe"] == "hedgedoc"
|
assert data["recipe"] == "hedgedoc"
|
||||||
assert data["ref"] == "deadbeefcafe"
|
assert data["ref"] == "deadbeefcafe"
|
||||||
assert data["flags"] == {"clean_teardown": True, "no_secret_leak": True}
|
assert data["flags"] == {"clean_teardown": True, "no_secret_leak": True}
|
||||||
@ -246,9 +191,6 @@ def test_build_results_capped_at_L1_on_upgrade_fail(tmp_path):
|
|||||||
records=recs,
|
records=recs,
|
||||||
results=_results(upgrade="fail"),
|
results=_results(upgrade="fail"),
|
||||||
backup_capable=True,
|
backup_capable=True,
|
||||||
declared=[],
|
|
||||||
deps_ready=True,
|
|
||||||
sso_unverified=False,
|
|
||||||
clean_teardown=True,
|
clean_teardown=True,
|
||||||
no_secret_leak=True,
|
no_secret_leak=True,
|
||||||
finished_ts=0.0,
|
finished_ts=0.0,
|
||||||
@ -257,6 +199,85 @@ def test_build_results_capped_at_L1_on_upgrade_fail(tmp_path):
|
|||||||
assert "L2" in data["level_cap_reason"]
|
assert "L2" in data["level_cap_reason"]
|
||||||
|
|
||||||
|
|
||||||
|
# ---- skips: intentional (declared) vs unintentional (everything else skipped) ----
|
||||||
|
|
||||||
|
|
||||||
|
def _rungs(**kw):
|
||||||
|
base = {
|
||||||
|
"install": "pass",
|
||||||
|
"upgrade": "pass",
|
||||||
|
"backup_restore": "pass",
|
||||||
|
"functional": "pass",
|
||||||
|
}
|
||||||
|
base.update(kw)
|
||||||
|
return base
|
||||||
|
|
||||||
|
|
||||||
|
def test_skips_intentional_vs_unintentional():
|
||||||
|
rungs = _rungs(backup_restore="na", functional="na")
|
||||||
|
sk = R.skips(rungs, {"backup_restore": "stateless static server"})
|
||||||
|
# backup_restore is declared (intentional, with reason); functional skipped but not declared.
|
||||||
|
assert sk["intentional"] == {"backup_restore": "stateless static server"}
|
||||||
|
assert sk["unintentional"] == ["functional"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_skips_none_declared_all_unintentional():
|
||||||
|
rungs = _rungs(backup_restore="na")
|
||||||
|
sk = R.skips(rungs, None)
|
||||||
|
assert sk["intentional"] == {}
|
||||||
|
assert sk["unintentional"] == ["backup_restore"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_skips_declaration_only_counts_when_actually_skipped():
|
||||||
|
# backup_restore actually ran (pass) → not a skip, so a declaration for it is simply inert.
|
||||||
|
rungs = _rungs(backup_restore="pass")
|
||||||
|
sk = R.skips(rungs, {"backup_restore": "reason"})
|
||||||
|
assert "backup_restore" not in sk["intentional"]
|
||||||
|
assert "backup_restore" not in sk["unintentional"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_build_results_threads_expected_na(tmp_path):
|
||||||
|
# Mirrors custom-html-tiny post-change: install + a passing functional (custom) test, but no
|
||||||
|
# backup surface (backup_restore declared intentionally skipped).
|
||||||
|
recs = [
|
||||||
|
{
|
||||||
|
"tier": "install",
|
||||||
|
"source": "generic",
|
||||||
|
"file": "g/test_install.py",
|
||||||
|
"rc": 0,
|
||||||
|
"junit": _write(tmp_path, "i.xml", JUNIT_PASS),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"tier": "custom",
|
||||||
|
"source": "cc-ci",
|
||||||
|
"file": "c/test_serves_content.py",
|
||||||
|
"rc": 0,
|
||||||
|
"junit": _write(tmp_path, "c.xml", JUNIT_PASS),
|
||||||
|
},
|
||||||
|
]
|
||||||
|
data = R.build_results(
|
||||||
|
recipe="custom-html-tiny",
|
||||||
|
version="1.1.0",
|
||||||
|
pr="0",
|
||||||
|
ref=None,
|
||||||
|
records=recs,
|
||||||
|
results=_results(backup="skip", restore="skip"), # custom=pass (default) → functional pass
|
||||||
|
backup_capable=False, # no backupbot label → backup_restore skipped (N/A)
|
||||||
|
clean_teardown=True,
|
||||||
|
no_secret_leak=True,
|
||||||
|
finished_ts=0.0,
|
||||||
|
expected_na={"backup_restore": "stateless static file server"},
|
||||||
|
)
|
||||||
|
# backup_restore skip still caps at L2 (never inflates) — even though functional passes above it,
|
||||||
|
# the skip caps the climb — but it's the declared (intentional) rung that capped.
|
||||||
|
assert data["level"] == 2
|
||||||
|
assert "L3" in data["level_cap_reason"]
|
||||||
|
assert data["level_cap_rung"] == "backup_restore"
|
||||||
|
assert data["rungs"]["functional"] == "pass"
|
||||||
|
assert data["skips"]["intentional"]["backup_restore"] == "stateless static file server"
|
||||||
|
assert data["skips"]["unintentional"] == [] # backup_restore declared; functional passed → clean
|
||||||
|
|
||||||
|
|
||||||
def test_write_results_roundtrip(tmp_path):
|
def test_write_results_roundtrip(tmp_path):
|
||||||
data = {"run_id": "42", "level": 3, "stages": []}
|
data = {"run_id": "42", "level": 3, "stages": []}
|
||||||
path = R.write_results(data, runs_dir_override=str(tmp_path))
|
path = R.write_results(data, runs_dir_override=str(tmp_path))
|
||||||
|
|||||||
Reference in New Issue
Block a user