diff --git a/docs/enroll-recipe.md b/docs/enroll-recipe.md index f9b4526..f953e12 100644 --- a/docs/enroll-recipe.md +++ b/docs/enroll-recipe.md @@ -14,8 +14,9 @@ those are discovered and run against the live app (D4 — see below). ``` tests// ├── recipe_meta.py # optional per-recipe harness config (see below) -├── install_steps.sh # optional custom install-steps hook (pre-deploy setup) -├── ops.py # optional pre-op seed hooks (pre_install/pre_upgrade/pre_backup/pre_restore) +├── install_steps.sh # optional custom install-steps hook (pre-deploy setup + deps env wiring) +├── compose.ccci.yml # optional CI-only compose overlay (harness-copied, auto-chaos base deploy) +├── ops.py # optional pre_(ctx) seed hooks (install/upgrade/backup/restore) ├── test_install.py # optional install overlay (runs ADDITIVELY alongside generic) ├── test_upgrade.py # optional upgrade overlay (runs ADDITIVELY alongside generic) ├── test_backup.py # optional backup overlay (runs ADDITIVELY alongside generic) @@ -39,11 +40,14 @@ To add recipe-specific coverage, drop a `tests//test_.py` **overlay* **ALONGSIDE** the generic for that op (HC3 additive, Phase 1e); the generic floor is never silently dropped. Overlays are **assertion-only** against the shared live deployment (the `live_app` fixture; they never perform the op or deploy/teardown — the orchestrator owns those). If the overlay needs to -SEED pre-op state (data-continuity markers, the backup→restore divergence), put `pre_(domain, -meta)` callables in `tests//ops.py` — the orchestrator runs them BEFORE the op. Copy an +SEED pre-op state (data-continuity markers, the backup→restore divergence), put `pre_(ctx)` +callables in `tests//ops.py` — the orchestrator runs them BEFORE the op (`ctx` is the +uniform `HookCtx` every hook receives — `docs/recipe-customization.md` §4.1). Copy an existing recipe (`tests/custom-html/` simple/volume marker; `tests/keycloak/` admin-API; `tests/ matrix-synapse/` `db`-service psql marker). **Do not edit the shared `tests/conftest.py` / -`runner/harness/` to add a recipe** — set per-recipe knobs in `recipe_meta.py`: +`runner/harness/` to add a recipe** — set per-recipe knobs in `recipe_meta.py` (the COMPLETE key +reference is the generated table in `docs/recipe-customization.md` §4; unknown ALL-CAPS keys are +hard errors, recipe-private constants are underscore-prefixed `_FOO`): ```python HEALTH_PATH = "/realms/master" # path that returns a healthy status (default "/") @@ -51,9 +55,7 @@ HEALTH_OK = (200,) # acceptable status codes (default 200/301/302) DEPLOY_TIMEOUT = 600 # seconds for services to converge (default 600) HTTP_TIMEOUT = 600 # seconds for the app to answer (default 300) BACKUP_CAPABLE = True # override backup-capability auto-detect (default: scan compose) -EXTRA_ENV = {"KEY": "value"} # or EXTRA_ENV(domain) -> dict; extra .env keys set at deploy -SKIP_GENERIC = ["upgrade"] # per-recipe opt-out from the generic floor for the listed ops - # ("all"/"*" = every op); rarely needed — generic is the floor +EXTRA_ENV = {"KEY": "value"} # or EXTRA_ENV(ctx) -> dict; extra .env keys set at deploy ``` Useful `harness.lifecycle` helpers for overlays: `http_get`, `http_fetch`, `http_body`, @@ -76,9 +78,10 @@ Beyond the lifecycle overlays, each recipe carries (plan §4.1): - **`playwright/`** — browser flows where the recipe's core UX is a UI (P6). The orchestrator's **custom** tier discovers `test_*.py` in `tests//{functional,playwright}/` -(recursive, via `runner/harness/discovery.custom_tests`) and runs each as its own pytest against -the same `live_app` shared deployment. Lifecycle-named files (`test_install.py`/etc.) are -**excluded** from the custom tier — they live at the top level and run as lifecycle overlays. +ONLY (the placement rule, via `runner/harness/discovery.custom_tests` — a top-level `test_*.py` +is a lifecycle overlay and nothing else) and runs each as its own pytest against the same +`live_app` shared deployment. Lifecycle-named files (`test_install.py`/etc.) are **excluded** +from the custom tier even inside those subdirs (safety net against double-running). ### 2.2 Recipe-test dependencies — DEPS = [...] (Phase 2 Q2.3) @@ -89,23 +92,28 @@ them in `recipe_meta.py`: DEPS = ["keycloak"] # one entry per dep recipe name (cc-ci tests// must exist + work) ``` -The orchestrator (plan §4.2): -1. Reads `DEPS` BEFORE deploying the recipe under test. -2. Deploys each dep at a per-run domain `-<6hex>.ci.commoninternet.net` (the 6hex is - hashed from `parent_recipe + pr + ref + dep_recipe` so two recipes' deps of the same kind do - not collide on a single node). -3. Waits each dep healthy using its own `recipe_meta.py` (HEALTH_PATH/HEALTH_OK/timeouts). -4. Persists `[{"recipe": "", "domain": ""}, ...]` to `$CCCI_DEPS_FILE`. -5. Deploys + tests the recipe under test as usual. -6. Tears down the dep LAST in `finally` (reverse declaration order, with `verify=True` — leaked +The orchestrator (plan §4.2; install-time provisioning is the ONLY mode): +1. Reads `DEPS` and provisions every dep **BEFORE the single deploy** of the recipe under test — + each dep at a per-run domain `-<6hex>.ci.commoninternet.net` (the 6hex is hashed from + `parent_recipe + pr + ref + dep_recipe` so two recipes' deps of the same kind do not collide on + a single node), waited healthy using the dep's own `recipe_meta.py`. +2. Persists the full per-dep identity + SSO creds dict to `$CCCI_DEPS_FILE` (jq-readable JSON, + `{"": {"domain": ..., "realm": ..., "client_secret": ..., ...}}`). +3. Deploys the recipe under test — its `install_steps.sh` reads `$CCCI_DEPS_FILE` and wires + OIDC env into that ONE deploy (no post-deploy redeploy). A dep-provisioning failure does NOT + block the run: the recipe deploys alone, generic tiers run, and `requires_deps` tests skip + with a counted reason (F2-11). +4. Tears down the dep LAST in `finally` (reverse declaration order, with `verify=True` — leaked deps fail the run loudly per §9 teardown sacred / F2-5 fix). -Tests access dep domains via the **`deps_apps` pytest fixture** (`tests/conftest.py`): +Tests access deps via the **`deps` pytest fixture** (`tests/conftest.py`) — entries expose +`.domain` plus the full creds dict (attribute or dict-style): ```python -def test_my_recipe_uses_keycloak(live_app, deps_apps): - assert "keycloak" in deps_apps, f"keycloak dep not deployed; {deps_apps}" - kc_domain = deps_apps["keycloak"] +@pytest.mark.requires_deps +def test_my_recipe_uses_keycloak(live_app, deps): + assert "keycloak" in deps, f"keycloak dep not deployed; {deps}" + kc_domain = deps["keycloak"].domain … ``` @@ -120,7 +128,7 @@ For OIDC-dependent recipes, the shared `runner/harness/sso.py` provides: from harness import sso creds = sso.setup_keycloak_realm( - kc_domain, # = deps_apps["keycloak"] + kc_domain, # = deps["keycloak"].domain realm="my-realm", client_id="my-client", redirect_uris=[f"https://{live_app}/*"], @@ -144,10 +152,10 @@ ARE provider-pluggable. Not every recipe is a single HTTP app. `recipe_meta.py` + a few harness mechanisms cover the harder shapes (proven on mumble, mailu, and the SSO-dependent suite): -- **`EXTRA_ENV`** — a dict **or** a `callable(domain) -> dict`. The callable form derives values from - the per-run domain (e.g. `MAIL_DOMAIN`/`HOSTNAMES` for mailu, `SANDBOX_DOMAIN` for cryptpad). Applied - at every deploy (`abra.env_set`), so a recipe enrolls with NO shared-harness change. -- **`READY_PROBE(domain) -> [...]`** — readiness signals beyond replica-convergence + the app's +- **`EXTRA_ENV`** — a dict **or** a `callable(ctx) -> dict`. The callable form derives values from + the per-run domain (`ctx.domain` — e.g. `MAIL_DOMAIN`/`HOSTNAMES` for mailu, `SANDBOX_DOMAIN` for + cryptpad). Applied at every deploy (`abra.env_set`), so a recipe enrolls with NO shared-harness change. +- **`READY_PROBE(ctx) -> [...]`** — readiness signals beyond replica-convergence + the app's `HEALTH_PATH`. Two probe shapes: - HTTP: `{"host": "...", "path": "/...", "ok": (200,)}` (e.g. lasuite-drive collabora WOPI discovery). - **TCP**: `{"tcp_host": "127.0.0.1", "tcp_port": 64738, "stable": 3}` — polls a socket connect N @@ -155,16 +163,16 @@ shapes (proven on mumble, mailu, and the SSO-dependent suite): service (mumble: the mumble-web sidecar serves HTTP 200 while the voice server on 64738 is still rebinding after an upgrade redeploy — the TCP probe gates the backup tier until the voice server is actually up). Runs after install AND after the upgrade chaos redeploy. -- **`CHAOS_BASE_DEPLOY = True`** — make the pinned base deploy use `--chaos` (skips abra's clean-tree + - lint gates, still deploys the explicitly-checked-out pinned version, NOT latest). Needed when an - `install_steps.sh` adds an UNTRACKED file to the recipe checkout (e.g. mumble copies a - `compose.host-ports.yml` into versions that predate it) — abra's pinned-deploy clean-tree check would - otherwise FATA. `abra.recipe_checkout` force-checks-out (`-f`) so the upgrade tier's re-checkout to - PR-head overwrites such overlays cleanly. +- **`compose.ccci.yml`** (first-class at `tests//compose.ccci.yml`) — a CI-only compose + overlay the harness itself copies into the recipe checkout before the base deploy, automatically + using `--chaos` for that deploy (the untracked file would otherwise trip abra's pinned-deploy + clean-tree check). Reference it from `EXTRA_ENV`'s `COMPOSE_FILE`. Minimal, justified fallback + only (e.g. ghost's 15m `start_period` grace). `abra.recipe_checkout` force-checks-out (`-f`) so + the upgrade tier's re-checkout to PR-head overwrites such overlays cleanly. - **`install_steps.sh`** (auto-discovered at `tests//install_steps.sh`) — runs after `abra app new` + EXTRA_ENV + secret-generate, BEFORE the single deploy, with `CCCI_APP_DOMAIN` / - `CCCI_APP_ENV` / `CCCI_RECIPE` (and `CCCI_DEPS_FILE` when DEPS are provisioned at install). Use it to - drop a cc-ci-owned compose overlay into the checkout, wire dep-derived env/secrets, etc. + `CCCI_APP_ENV` / `CCCI_RECIPE` (and `CCCI_DEPS_FILE` when the recipe declares DEPS — deps are + always provisioned before the deploy). Use it to wire dep-derived env/secrets, seed config, etc. **Non-HTTP protocol tests (mumble).** Reach a TCP service published `mode: host` (via a host-ports overlay) at `127.0.0.1:` — cc-ci runs tests on-host (cc-ci-run). mumble ships a stdlib protocol @@ -227,9 +235,10 @@ RECIPE= PR= REF= SRC=recipe-maintainers/ \ ``` tests/lasuite-docs/ -├── recipe_meta.py # HEALTH_PATH="/", DEPLOY_TIMEOUT=900, EXTRA_ENV(domain) for cold-pull, +├── recipe_meta.py # HEALTH_PATH="/", DEPLOY_TIMEOUT=900, EXTRA_ENV(ctx) for cold-pull, │ # DEPS=["keycloak"] ← Phase 2 dep declaration -├── ops.py # pre_ seed hooks (volume marker for backup/restore data-integrity) +├── install_steps.sh # wires OIDC env from $CCCI_DEPS_FILE into the single deploy +├── ops.py # pre_(ctx) seed hooks (volume marker for backup/restore data-integrity) ├── test_install.py # lifecycle install overlay (Playwright frontend SPA load) ├── test_upgrade.py # lifecycle upgrade overlay (marker survives chaos redeploy) ├── test_backup.py # lifecycle backup overlay (marker captured) @@ -239,12 +248,14 @@ tests/lasuite-docs/ ├── test_health_check.py # parity port (SOURCE comment cites recipe-info file) ├── test_auth_required.py # specific: /api/v1.0/users/me/ → 401 without auth └── test_oidc_with_keycloak.py # specific: full OIDC flow against the dep keycloak (uses - # harness.sso primitives + deps_apps["keycloak"]) + # harness.sso primitives + the `deps` fixture) ``` `!testme` on a lasuite-docs PR drives the orchestrator to: -1. Deploy the per-run keycloak dep (`keyc-<6hex>.ci.commoninternet.net`) and wait healthy. -2. Deploy lasuite-docs (`lasu-<6hex>.ci.commoninternet.net`). +1. Provision the per-run keycloak dep (`keyc-<6hex>.ci.commoninternet.net`), wait healthy, write + creds to `$CCCI_DEPS_FILE` — BEFORE the recipe deploy. +2. Deploy lasuite-docs (`lasu-<6hex>.ci.commoninternet.net`); `install_steps.sh` wires the OIDC + env into that one deploy. 3. Run install / upgrade / backup / restore + the 3 functional tests against the shared deployment (custom tier). 4. Teardown lasuite-docs, then the keycloak dep (LAST), both with verify=True. @@ -254,12 +265,13 @@ tests/lasuite-docs/ ### Other shapes (concrete references) - **TCP / voice recipe — `tests/mumble/`**: `recipe_meta.py` (EXTRA_ENV sets - `COMPOSE_FILE=compose.yml:compose.mumbleweb.yml:compose.host-ports.yml`, `WELCOME_TEXT`/`USERS` - markers, `CHAOS_BASE_DEPLOY=True`, `READY_PROBE` TCP 64738), `install_steps.sh` (provides the - host-ports overlay to older versions), `functional/_mumble_proto.py` + the protocol/config-round-trip + `COMPOSE_FILE=compose.yml:compose.mumbleweb.yml` for the base; `UPGRADE_EXTRA_ENV` adds the + native `compose.host-ports.yml` at PR-head so 64738 is host-published on latest; private + `_WELCOME_TEXT_MARKER`/`_MAX_USERS` constants; `READY_PROBE(ctx)` TCP 64738 — phase-aware via + the live COMPOSE_FILE), `functional/_mumble_proto.py` + the protocol/config-round-trip tests, `ops.py`/`test_backup.py`/`test_restore.py` (sqlite P4). See §2.4. - **Multi-service, dep-less, in-container functional — `tests/mailu/`**: `recipe_meta.py` - (`EXTRA_ENV(domain)` with `TLS_FLAVOR=notls` + `MAIL_DOMAIN`/`HOSTNAMES`/`TRAEFIK_STACK_NAME`), + (`EXTRA_ENV(ctx)` with `TLS_FLAVOR=notls` + `MAIL_DOMAIN`/`HOSTNAMES`/`TRAEFIK_STACK_NAME`), `functional/_mailu.py` (flask-CLI helpers), `test_mailbox.py` (create→config-export read-back), `test_mail_flow.py` (in-container sendmail→doveadm delivery). No backupbot → P4 N/A (PARITY.md + DEFERRED.md). See §2.4. diff --git a/docs/recipe-customization.md b/docs/recipe-customization.md index f2ac579..ef2fea9 100644 --- a/docs/recipe-customization.md +++ b/docs/recipe-customization.md @@ -1,8 +1,9 @@ -# Recipe customization — review spec +# Recipe customization — reference -Status: REVIEW SPEC — describes the customization surface as it exists today (main), written so -the structure can be reviewed and potentially restructured. §8 lists known limitations and -restructuring candidates; everything before it is purely descriptive. +Status: REFERENCE — describes the customization system as restructured on branch +`restructure/recipe-custom` (the "rcust" restructure). The pre-restructure system and its defects +are documented in this file's history (commit `76a4b6b`, the review spec whose §8 R1–R9 drove the +restructure); §8 below records how each was resolved. Companion docs: `docs/testing.md` (test architecture / tier semantics), `docs/enroll-recipe.md` (step-by-step enrollment). This doc is the **complete reference** for the two questions those docs @@ -15,17 +16,18 @@ answer only partially: ## 1. The three customization surfaces -A recipe customizes its CI through **three distinct mechanisms** (worth noticing for the -restructure review — they are three different config languages): +A recipe customizes its CI through **three distinct mechanisms**: | Surface | Form | Examples | |---|---|---| | **Declarative settings** | Python assignments in `tests//recipe_meta.py` | `DEPLOY_TIMEOUT = 1500`, `UPGRADE_BASE_VERSION = "2.3.1+..."` | -| **Code hooks** | Callables in `recipe_meta.py`, `ops.py` functions, shell hooks | `def READY_PROBE(domain): ...`, `pre_upgrade()`, `install_steps.sh` | +| **Code hooks** | Callables in `recipe_meta.py`, `ops.py` functions, one shell hook | `def READY_PROBE(ctx): ...`, `pre_upgrade(ctx)`, `install_steps.sh` | | **File presence** | A file existing at a discovered path changes behavior | `test_upgrade.py` overlay, `functional/test_*.py`, `compose.ccci.yml` | -There is additionally a fourth, operator-facing surface: **environment variables** -(`CCCI_SKIP_GENERIC*`) that override declarative settings at run time (§4.4). +There is additionally a fourth, **operator-facing, local-dev-only** surface: environment variables +(`CCCI_SKIP_GENERIC*`) that suppress the generic floor at run time (§7). Whatever a run resolves +from all four surfaces is printed at run start as the **customization manifest** and embedded in +`results.json` under `"customization"` (§7) — one block answers "what does this recipe customize?". ## 2. Zero-config baseline @@ -55,105 +57,98 @@ Two locations, with precedence and a security gate between them: ``` tests// # cc-ci side (repo-local mirrors the same shape) -├── recipe_meta.py # ALL declarative settings + meta callables (§4) +├── recipe_meta.py # THE config file: registry-validated keys + ctx-hooks (§4) ├── test_.py # lifecycle overlay assertions, op ∈ install|upgrade|backup|restore (§5.1) -├── ops.py # pre_(domain, meta) seed hooks (§5.2) -├── test_*.py # custom-tier tests (top-level, cross-cutting)(§5.3) +├── ops.py # pre_(ctx) seed hooks (§5.2) ├── functional/test_*.py # custom tier: parity ports + recipe-specific (§5.3) ├── playwright/test_*.py # custom tier: UI flows (§5.3) -├── install_steps.sh # pre-deploy shell hook (§5.4) -├── setup_custom_tests.sh # deps/OIDC credential wiring hook (§5.5) -├── compose.ccci.yml # CI-only compose overlay (via install_steps) (§5.6) +├── install_steps.sh # pre-deploy shell hook (the ONLY shell hook) (§5.4) +├── compose.ccci.yml # CI-only compose overlay (first-class) (§5.5) └── PARITY.md # enrollment contract doc (human-read only) ``` +**Placement rule (custom tests):** ALL custom-tier tests live under `functional/` or +`playwright/`. A top-level `test_*.py` is a lifecycle overlay (`test_.py`) and nothing else — +top-level non-lifecycle files are NOT discovered (`discovery.custom_tests`; the lifecycle-name +exclusion stays as a safety net so a misfiled `test_.py` can never double-run). + Precedence (machine-docs/DECISIONS.md, implemented in `discovery.py`): - lifecycle overlay `test_.py`: repo-local **wins** over cc-ci (same-name collision); the generic floor still runs additively alongside. -- custom tier `test_*.py`: **ALL** run, from both locations (no collision concept). +- custom tier (`functional/` + `playwright/`): **ALL** run, from both locations (no collision + concept). - `install_steps.sh`: repo-local > cc-ci, or none. - `ops.py` pre-op hook: cc-ci wins; repo-local consulted only if approved. -- `recipe_meta.py`: cc-ci only — repo-local recipes cannot set CI settings (by design; the - settings surface stays maintainer-controlled). +- `recipe_meta.py` and `compose.ccci.yml`: cc-ci only — repo-local recipes cannot set CI settings + or compose overlays (by design; those surfaces stay maintainer-controlled). ## 4. `recipe_meta.py` — complete settings reference -The single settings file. Plain Python, `exec()`d by the harness (trusted, in-repo). A key is "set" -by a top-level assignment or `def`. Unknown names are ignored silently (a recipe may keep private -constants here, e.g. mumble's `WELCOME_TEXT_MARKER` — but see §8 R6: typos in real key names are -also silently ignored). +The single settings file. Plain Python, `exec()`d by the harness in exactly ONE place: the +registry-backed loader `runner/harness/meta.py::load(recipe) -> RecipeMeta`. Every consumer — the +orchestrator (which loads once and passes the object down), the pytest `meta` fixture, lifecycle, +deps, canonical, screenshot — reads from that one loaded object. -**Loader column legend** — this is the structural finding for the review (§8 R1). There is no -single loader; six independent code paths each `exec()` the file and pick out their own keys: +**Validation (hard errors at load, before any deploy):** -| # | Loader | Keys it sees | -|---|---|---| -| L1 | `runner/run_recipe_ci.py:_load_meta` (orchestrator) | 4 base + explicit 8-key allowlist | -| L2 | `tests/conftest.py:_recipe_meta` (pytest `meta` fixture) | 4 base keys ONLY | -| L3 | `runner/harness/lifecycle.py:_recipe_extra_env` | `EXTRA_ENV` only | -| L4 | `runner/harness/lifecycle.py:_recipe_meta_flag` | boolean flags by name (`CHAOS_BASE_DEPLOY`) | -| L5 | `runner/harness/deps.py:declared_deps` | `DEPS` only | -| L6 | `runner/harness/canonical.py:is_canonical_enrolled` | `WARM_CANONICAL` only | +- A key is "set" by a top-level ALL-CAPS assignment or `def`. Unknown ALL-CAPS top-level names + raise `MetaError` listing the unknown name and the nearest registered key (typo gate — + misspelling `READY_PROBE` can no longer silently disable the probe). +- Type mismatches raise `MetaError`; callables are accepted only for hook-typed keys. +- **Underscore-prefixed names (`_FOO`) are recipe-private and exempt** — that's where private + constants live (e.g. mumble's `_WELCOME_TEXT_MARKER`). Lowercase names (helpers/imports) are + ignored. +- Hook callables must have the registered signature (below); a legacy-signature hook raises a + `MetaError` naming the migration, never a silent `TypeError` mid-run. -### 4.1 HTTP / health / timing (base 4 — seen by L1 AND L2) +A unit test (`tests/unit/test_meta.py`) loads every `tests/*/recipe_meta.py` through the registry, +so a typo'd key fails at PR time, not at run time. -| Key | Type / default | Meaning | Used by | + + +_This table is GENERATED from the `runner/harness/meta.py` KEYS registry by `scripts/gen-meta-docs.py` — do not edit by hand (a unit test pins the sync)._ + +| Key | Type | Default | Meaning | |---|---|---|---| -| `HEALTH_PATH` | str, `"/"` | Path probed for serving/health checks | deploy wait (`lifecycle.py`), generic `assert_serving` | -| `HEALTH_OK` | tuple, `(200, 301, 302)` | Acceptable HTTP status codes for health | same | -| `DEPLOY_TIMEOUT` | int s, `600` | Max wait for swarm convergence per deploy | `lifecycle.py`, generic ops | -| `HTTP_TIMEOUT` | int s, `300` | Max wait for HTTP health after converged | same | +| `HEALTH_PATH` | `str` | `'/'` | Path probed for serving/health checks (deploy wait + generic `assert_serving`). | +| `HEALTH_OK` | `tuple[int]` | `(200, 301, 302)` | Acceptable HTTP status codes for health. | +| `DEPLOY_TIMEOUT` | `int` | `600` | Max seconds to wait for swarm convergence per deploy. | +| `HTTP_TIMEOUT` | `int` | `300` | Max seconds to wait for HTTP health after convergence. | +| `BACKUP_CAPABLE` | `bool` | `None` | Override the backup-tier capability auto-detect (compose `backupbot.backup` labels). `False` forces N/A; `True` forces the tier on; unset = auto-detect. | +| `EXPECTED_NA` | `dict` | `None` | Declare an N/A rung intentional: `{rung: reason}`. The cap stands either way; only the report wording changes. | +| `READY_PROBE` | `hook` | `None` | Callable `(ctx) -> [probe, ...]` returning extra readiness probes, run after install AND after upgrade: HTTP `{host, path, ok}` or TCP `{tcp_host, tcp_port, stable}`. | +| `UPGRADE_BASE_VERSION` | `str` | `None` | Exact published tag overriding the upgrade tier's base (default: `recipe_versions[-2]`). | +| `BACKUP_VERIFY` | `hook` | `None` | Callable `(ctx) -> bool` post-backup data-capture check; `False` re-runs the backup (truncated-dump race guard), retried up to 3 attempts. | +| `UPGRADE_EXTRA_ENV` | `dict_or_hook` | `None` | Extra `.env` keys applied after the PR-head checkout, before the chaos redeploy (env that exists only at head). Dict, or callable `(ctx) -> dict`. | +| `EXTRA_ENV` | `dict_or_hook` | `{}` | Extra `.env` keys applied at EVERY deploy (base install AND upgrade old-app). Dict, or callable `(ctx) -> dict` deriving values from the per-run domain (`ctx.domain`). | +| `DEPS` | `list[str]` | `[]` | Dep recipes deployed/provisioned alongside (e.g. `["keycloak"]`); creds land in `$CCCI_DEPS_FILE`. | +| `WARM_CANONICAL` | `bool` | `False` | Enroll the recipe in the warm/canonical app system (docs/warm.md): green cold runs on LATEST advance the canonical snapshot. | +| `SCREENSHOT` | `hook` | `None` | Callable `(page, ctx)` driving Playwright to a safe, credential-free post-login view for the results-card screenshot (default: landing page). | -Example: immich sets `DEPLOY_TIMEOUT = 1500`, `HTTP_TIMEOUT = 600` (ML containers are slow). + -### 4.2 Upgrade tier (loader L1) +### 4.1 The uniform hook convention — `HookCtx` -| Key | Type / default | Meaning | -|---|---|---| -| `UPGRADE_BASE_VERSION` | str (exact published tag), default `None` | **The "base pin"** — overrides the harness default base for the upgrade tier. Default base = `recipe_versions[-2]` (the previous published version); pin when that is not the PR's true predecessor (e.g. the PR is the first release on a new major, or the previous tag is known-broken). Must be an exact published tag — typos fail the base deploy. Consumed at `run_recipe_ci.py` (`prev = meta.get("UPGRADE_BASE_VERSION") or lifecycle.previous_version(recipe)`). Users: discourse, plausible. | -| `UPGRADE_EXTRA_ENV` | dict **or** callable `(domain) -> dict`, default `None` | Extra `.env` keys applied **after** the PR-head checkout, **before** the chaos redeploy (F2-14c) — for env vars that exist only at head (a new required setting introduced by the PR). Consumed in `generic.py:256`. User: mumble. | +Every recipe callable takes a single `ctx` argument (`harness/meta.py::HookCtx`, frozen): -### 4.3 Every-deploy shaping (loaders L3/L4 — NOT in the L1 allowlist) +| Field | Meaning | +|---|---| +| `ctx.domain` | the app's per-run domain | +| `ctx.base_url` | `https://` | +| `ctx.meta` | the recipe's full `RecipeMeta` | +| `ctx.deps` | provisioned dep creds (`{dep_recipe: entry}`) or `None` | +| `ctx.op` | current lifecycle op (`install`/`upgrade`/`backup`/`restore`) or `None` | -| Key | Type / default | Meaning | -|---|---|---| -| `EXTRA_ENV` | dict **or** callable `(domain) -> dict`, default `{}` | Extra `.env` keys applied at **every** deploy (base install AND upgrade old-app). Callable form derives values from the per-run domain (e.g. cryptpad's `SANDBOX_DOMAIN`). Loaded by `lifecycle.py:_recipe_extra_env` (its own `exec()`). Users: cryptpad, discourse, ghost, matrix-synapse, mattermost-lts, mumble, plausible. | -| `CHAOS_BASE_DEPLOY` | bool, default `False` | Base deploy uses `--chaos` so it survives untracked files in the recipe checkout (required when `install_steps.sh` copies in a `compose.ccci.yml` overlay — §5.6; implicit coupling, see §8 R7). Loaded by `lifecycle.py:_recipe_meta_flag`. Users: discourse, ghost. | +Signatures: `EXTRA_ENV(ctx)`, `UPGRADE_EXTRA_ENV(ctx)`, `READY_PROBE(ctx)`, `BACKUP_VERIFY(ctx)`, +`SCREENSHOT(page, ctx)`, ops.py `pre_(ctx)`. Dict-valued `EXTRA_ENV`/`UPGRADE_EXTRA_ENV` +(non-callable) are still fine — only the callable form takes ctx. The loader enforces the +parameter names at load time (a pre-restructure `(domain)`/`(domain, meta)` hook gets a pointed +`MetaError`, not a mid-run crash). -### 4.4 Skips and intentional N/A (loader L1) - -| Key | Type / default | Meaning | -|---|---|---| -| `SKIP_GENERIC` | list of op names or `"all"`/`"*"`, default `[]` | Suppress the generic floor for the listed ops (overlay becomes override instead of additive). Two env equivalents at run time: `CCCI_SKIP_GENERIC=1` (all ops), `CCCI_SKIP_GENERIC_=1` (one op). Currently set by **no enrolled recipe** (env form is the one used, ad hoc). | -| `EXPECTED_NA` | dict `{rung: reason}`, default `None` | Declares an N/A rung **intentional** (e.g. `{"backup": "stateless, nothing to back up"}`). Undeclared N/A is reported as an *unintentional coverage gap*. Both cap the achievable level — declaring does not un-cap, it only changes the report wording (`results.py`). User: custom-html-tiny. | -| `BACKUP_CAPABLE` | bool, default auto-detect | Overrides the backup-tier capability detection (scan of recipe compose files for `backupbot.backup` labels, `generic.py:34`). `False` forces N/A; `True` forces the tier on. Users: custom-html-bkp-bad/rst-bad (harness self-test recipes). | - -### 4.5 Readiness & data-verification hooks (loader L1, callable values) - -| Key | Type / default | Meaning | -|---|---|---| -| `READY_PROBE` | callable `(domain) -> [probe, ...]`, default `None` | Extra readiness probes run after install AND after upgrade, before that tier's assertions. Probe dicts: HTTP `{host, path, ok}` or TCP `{tcp_host, tcp_port, stable}` (`stable`: must stay connectable across 3 checks — for UDP-adjacent voice ports etc.). Consumed at `lifecycle.py:516`. Users: lasuite-drive, mumble (TCP voice port). | -| `BACKUP_VERIFY` | callable `(domain) -> bool`, default `None` | Post-backup data-capture check, retried — guards the truncated-dump race (backup snapshot taken before the seeded marker row hit disk). Return `False` → retry the backup, then fail. Users: discourse, ghost. | - -### 4.6 Dependencies / SSO (loaders L5 + L1) - -| Key | Type / default | Meaning | -|---|---|---| -| `DEPS` | list of recipe names, default `[]` | Dep recipes deployed alongside (e.g. `["keycloak"]`). Dep domain is `-<6hex>`, hashed from (parent, pr, ref, dep) — collision-free per run. Creds land in `$CCCI_DEPS_FILE` (JSON); tests use the `deps_apps` fixture; teardown deps LAST. Deploy-count guard becomes `1 + len(DEPS)`. Loaded by `deps.py:declared_deps`. Users: lasuite-docs/-drive/-meet. | -| `OIDC_AT_INSTALL` | bool, default `False` | Provision deps **before** the single base deploy so `install_steps.sh` can wire OIDC env into that one deploy (reads `$CCCI_DEPS_FILE`). Default (legacy) is post-deploy provisioning + a `setup_custom_tests.sh` redeploy. Consumed at `run_recipe_ci.py:514`. Users: lasuite-drive, lasuite-meet. | - -### 4.7 Warm-canonical enrollment (loader L6) - -| Key | Type / default | Meaning | -|---|---|---| -| `WARM_CANONICAL` | bool, default `False` | Enrolls the recipe in the warm/canonical app system (`docs/warm.md`): green COLD runs on LATEST advance the canonical snapshot; the nightly sweep iterates enrolled recipes. Loaded by `canonical.py:is_canonical_enrolled`. User: custom-html. | - -### 4.8 Cosmetic (BROKEN — see §8 R2) - -| Key | Type / default | Meaning | -|---|---|---| -| `SCREENSHOT` | callable `(page, domain, meta) -> None` | Drives Playwright to a safe post-login view for the results-card screenshot (default: landing page). **Currently unreachable from the CI path**: `screenshot.py:41` reads it from the meta dict the orchestrator passes (`run_recipe_ci.py:1056`), but the L1 allowlist never loads `SCREENSHOT`, so the hook is always `None`. No recipe sets it (consistent with it never having worked). | +Worked hook examples: cryptpad (`EXTRA_ENV(ctx)` derives `SANDBOX_DOMAIN` from `ctx.domain`), +mumble (`READY_PROBE(ctx)` TCP voice-port probe, `UPGRADE_EXTRA_ENV(ctx)` adds a head-only compose +overlay), ghost/discourse (`BACKUP_VERIFY(ctx)` dump-capture check). ## 5. Writing custom tests & hooks @@ -166,104 +161,122 @@ test runs additively against the same state. Conventions (see `tests/immich/test_backup.py` etc.): - use the `live_app` fixture (asserts `CCCI_APP_DOMAIN` is set, yields the domain) -- use the `meta` fixture for HEALTH_*/timeouts (note: only the 4 base keys — §8 R3) -- read op context from `$CCCI_OP_STATE_FILE` (JSON written by the orchestrator after the op: - versions, artifact paths) +- use the `meta` fixture — the recipe's FULL validated `RecipeMeta` (attribute access) +- use the `op_state` fixture for op context (versions, `snapshot_id`, artifact paths — the + orchestrator's run-scoped op record; skips with a clear reason outside an orchestrator run) - execute in-container checks via `harness.lifecycle.exec_in_app(domain, service, cmd)` ### 5.2 Pre-op seed hooks — `ops.py` -`def pre_(domain, meta)` callables, imported and called by the orchestrator **before** -performing the op. This is where data gets seeded so the post-op overlay can assert on it: +`def pre_(ctx)` callables, imported and called by the orchestrator **before** performing the +op. This is where data gets seeded so the post-op overlay can assert on it: ```python # tests/immich/ops.py (pattern) -def pre_upgrade(domain, meta): _psql(domain, "INSERT ... 'upgrade-survives'") -def pre_backup(domain, meta): _psql(domain, "INSERT ... 'original'") -def pre_restore(domain, meta): _psql(domain, "DROP TABLE ci_marker") # damage, restore must undo +def pre_upgrade(ctx): _psql(ctx.domain, "INSERT ... 'upgrade-survives'") +def pre_backup(ctx): _psql(ctx.domain, "INSERT ... 'original'") +def pre_restore(ctx): _psql(ctx.domain, "DROP TABLE ci_marker") # damage, restore must undo ``` Seed → op → assert is the whole pattern: `pre_backup` writes a marker, the orchestrator backs up, `pre_restore` destroys it, the orchestrator restores, `test_restore.py` asserts the marker is back. -### 5.3 Custom tier — `functional/`, `playwright/`, top-level `test_*.py` +### 5.3 Custom tier — `functional/` and `playwright/` ONLY -All non-lifecycle `test_*.py` (discovery: `discovery.py:custom_tests`, recursive over the -top-level dir + `functional/` + `playwright/`; files named `test_.py` excluded). Run in the -CUSTOM tier, after restore, against the post-upgrade (PR-head) app. ALL discovered files run — -cc-ci's and (if HC2-approved) repo-local's, additively. +All custom-tier tests live under `tests//functional/` or `tests//playwright/` +(discovery: `discovery.custom_tests`; the placement rule, §3). Run in the CUSTOM tier, after +restore, against the post-upgrade (PR-head) app. ALL discovered files run — cc-ci's and (if +HC2-approved) repo-local's, additively. Enrollment contract (`docs/enroll-recipe.md`): ≥2 NEW functional tests beyond ports of existing upstream checks; ported tests carry `SOURCE:` comments. Playwright tests get the shared browser/harness helpers (`harness.browser`); SSO recipes get `harness.sso` -(`setup_keycloak_realm` — idempotent, `oidc_password_grant` — provider-pluggable). +(`setup_keycloak_realm` — idempotent, `oidc_password_grant` — provider-pluggable). The documented +import toolbox for custom tests is `from harness import lifecycle, sso, browser`. -Tests gate on deps via `CCCI_DEPS_READY` (skip-with-reason when `0`; the skip is counted and -fails the run if deps were declared but unprovisionable — `run_recipe_ci.py:816`). +Tests needing deps use the `deps` fixture (entries expose `.domain` plus the full creds dict) and +carry `@pytest.mark.requires_deps` — when dep provisioning failed they skip with reason +`deps-not-ready` and the skip count is reported and FAILS a declared-deps run (F2-11; a green exit +must not mask an unrun SSO test). Fixtures replace direct `os.environ` reads — after the +restructure no recipe test parses env by hand. ### 5.4 Pre-deploy shell hook — `install_steps.sh` -Runs after `abra app new` + `EXTRA_ENV` application + secret generation, **before** the base -deploy. For setup that must precede the first deploy: writing extra config files into the recipe -checkout, copying in a `compose.ccci.yml` overlay (§5.6), editing `.env` beyond simple key=val. +The ONLY shell hook. Runs after `abra app new` + `EXTRA_ENV` application + secret generation, +**before** the single base deploy. For setup that must precede the first deploy: writing extra +config files into the recipe checkout, editing `.env` beyond simple key=val, and — for recipes +with `DEPS` — wiring dep-derived OIDC env into the deploy (deps are always provisioned BEFORE the +deploy; install-time wiring is the only mode, so there is exactly one deploy and no post-deploy +redeploy hook). Env contract: `CCCI_APP_DOMAIN`, `CCCI_RECIPE`, `CCCI_APP_ENV` (path to the app's `.env`), and — -when `OIDC_AT_INSTALL` deps exist — `CCCI_DEPS_FILE`. Must locate the recipe checkout -ABRA_DIR-aware: `RECIPE_DIR="${ABRA_DIR:-${HOME}/.abra}/recipes/${CCCI_RECIPE}"` (per-run -`ABRA_DIR` since the concurrency restructure — a hardcoded `~/.abra` writes to the wrong tree). +when `DEPS` is declared — `CCCI_DEPS_FILE` (jq-readable JSON of dep creds/URLs; see +lasuite-drive/-meet/-docs for the pattern). Must locate the recipe checkout ABRA_DIR-aware: +`RECIPE_DIR="${ABRA_DIR:-${HOME}/.abra}/recipes/${CCCI_RECIPE}"` (per-run `ABRA_DIR` since the +concurrency restructure — a hardcoded `~/.abra` writes to the wrong tree). Graceful-generic rule: a recipe needing a hook but not shipping one simply fails the generic install — a correct reported outcome, not a harness error. -### 5.5 Deps credential wiring — `setup_custom_tests.sh` +### 5.5 CI-only compose overlay — `compose.ccci.yml` -For legacy (post-deploy) deps provisioning: runs after deps are up, reads `$CCCI_DEPS_FILE` -(jq-readable JSON of dep creds/URLs), wires OIDC config via `abra app config set` + secrets, and -redeploys. With `OIDC_AT_INSTALL = True` this hook is unnecessary (wiring happens in -`install_steps.sh` before the only deploy) — preferred for new enrollments (one deploy, no -deploy-count exception). +**First-class:** if `tests//compose.ccci.yml` exists, the harness itself copies it into +the recipe checkout (ABRA_DIR-aware) before the base deploy and automatically uses `--chaos` for +that deploy (the untracked file would otherwise trip abra's clean-tree gate). No +`install_steps.sh` copy boilerplate, no flag to remember (the old `CHAOS_BASE_DEPLOY` ⇄ overlay +coupling is gone). The overlay is cc-ci-owned only. -### 5.6 CI-only compose overlay — `compose.ccci.yml` +Policy unchanged: overlays are a minimal, justified fallback (ghost's is a 15m `start_period` +grace — a literal, because abra validates `start_period` before env substitution). Reference the +overlay from `EXTRA_ENV`'s `COMPOSE_FILE` as usual. Users: ghost, discourse. -Not auto-discovered: `install_steps.sh` copies it into the recipe checkout, and the recipe must -set `CHAOS_BASE_DEPLOY = True` so the base deploy (`--chaos`) tolerates the untracked file. -Policy: minimal, justified fallback only (ghost's is a 15m `start_period` grace — a literal, -because abra validates `start_period` before env substitution). The overlay is cc-ci-owned even -though it rides in the recipe checkout. +### 5.6 Environment & fixture contract (what custom code can read) -### 5.7 Environment contract summary (what custom code can read) +Pytest fixtures (`tests/conftest.py` — the single fixture file): + +| Fixture | Yields | +|---|---| +| `recipe` | the recipe name (`$RECIPE`) | +| `meta` | the FULL validated `RecipeMeta` (single loader) | +| `live_app` | the shared deployment's domain (asserts it exists) | +| `op_state` | the orchestrator's op-context dict (skips cleanly outside a run) | +| `deps` | `{dep_recipe: entry}` — entries expose `.domain` + full SSO creds | + +Environment (hooks/shell, and approved repo-local code): | Var | Set for | Meaning | |---|---|---| | `CCCI_APP_DOMAIN` | all tests + hooks | the app's per-run domain | | `CCCI_BASE_URL` | approved repo-local code | `https://` | | `CCCI_RECIPE`, `CCCI_APP_ENV` | `install_steps.sh` | recipe name, app `.env` path | -| `CCCI_OP_STATE_FILE` | overlay tests | JSON op context (versions, artifacts) | -| `CCCI_DEPS_FILE` | deps hooks + tests | JSON dep creds dict | -| `CCCI_DEPS_READY` / `CCCI_DEPS_NOT_READY_REASON` | custom tier | gate SSO tests, skip-with-reason | +| `CCCI_OP_STATE_FILE` | overlay tests (via `op_state`) | JSON op context (versions, artifacts) | +| `CCCI_DEPS_FILE` | `install_steps.sh` + harness | JSON dep creds dict | +| `CCCI_DEPS_READY` / `CCCI_DEPS_NOT_READY_REASON` | custom tier (via `requires_deps`) | gate SSO tests, skip-with-reason | ## 6. Run-model context (what the settings plug into) One deploy chain per run (full detail: `docs/testing.md` §2): ``` +[DEPS? provision deps FIRST → $CCCI_DEPS_FILE] deploy BASE (UPGRADE_BASE_VERSION or recipe_versions[-2]; EXTRA_ENV; install_steps.sh; - CHAOS_BASE_DEPLOY?; OIDC_AT_INSTALL deps first?) + compose.ccci.yml auto-copied + auto-chaos) → INSTALL tier (READY_PROBE; generic + overlay asserts) - → pre_upgrade → chaos-deploy PR HEAD (UPGRADE_EXTRA_ENV) + → pre_upgrade(ctx) → chaos-deploy PR HEAD (UPGRADE_EXTRA_ENV) → UPGRADE tier (READY_PROBE; version-label == head_ref) - → pre_backup → backup (BACKUP_CAPABLE; BACKUP_VERIFY) + → pre_backup(ctx) → backup (BACKUP_CAPABLE; BACKUP_VERIFY) → BACKUP tier - → pre_restore → restore + → pre_restore(ctx) → restore → RESTORE tier - → CUSTOM tier (functional/ + playwright/; deps via CCCI_DEPS_*) + → CUSTOM tier (functional/ + playwright/; deps via the `deps` fixture) + → SCREENSHOT (best-effort, never affects the verdict) → teardown (deps LAST) ``` Deploy-count guard (DG4.1): exactly `1 + len(DEPS)` deploys per run (chaos redeploys don't count); the per-run counter file is keyed by run since the concurrency restructure. -## 7. Local iteration +## 7. Local iteration, the manifest, and the dev-only escape hatch ``` RECIPE= PR= REF= SRC=recipe-maintainers/ \ @@ -273,81 +286,75 @@ RECIPE= PR= REF= SRC=recipe-maintainers/ \ (`docs/enroll-recipe.md` §5 for the full loop, including dep teardown caveats.) -## 8. Known limitations & restructuring candidates +**Customization manifest.** Every run prints, right after meta load + discovery, one block: -The review section. Ordered by how much they'd shape a restructure. +``` +===== customization manifest: ===== +meta (non-default): DEPLOY_TIMEOUT=1500 DEPS=['keycloak'] EXTRA_ENV='' +hooks: ops.py[pre_backup,pre_upgrade](cc-ci) install_steps.sh(cc-ci) compose.ccci.yml(cc-ci) +overlays: test_backup.py(cc-ci) test_restore.py(repo-local) +custom tests: functional/=5 playwright/=2 (cc-ci) +env overrides: (none) +``` -**R1 — Six divergent meta loaders (the core drift hazard).** §4's L1–L6: every loader re-`exec()`s -`recipe_meta.py` and cherry-picks its own keys. Adding a key means knowing *which* loader to touch -(or that you must extend the L1 allowlist — `SCREENSHOT` proves people don't, R2). Two conventions -coexist: L1's explicit allowlist vs L3–L6's ad-hoc `ns.get(...)` which silently bypasses it. -*Candidate:* one `harness.meta.load(recipe) -> RecipeMeta` with a declarative key registry -(name, type, default, validator, consumer) as the single source of truth; L1–L6 become lookups -into the one loaded object; the registry also generates §4 of this doc (kills doc drift, R5). +The same dict is embedded in `results.json` under `"customization"`. It is pure presentation — +built from the SAME discovery/meta calls the run uses (so it cannot disagree with what executes, +and it honors the HC2 gate) — and never influences a verdict. -**R2 — `SCREENSHOT` is a dead knob.** Fully implemented consumer (`screenshot.py`), documented -hook contract, never reachable: the orchestrator's allowlist omits it, so the dict passed at -`run_recipe_ci.py:1056` can never contain it. Direct evidence of R1. *Candidate:* fix trivially by -adding to the allowlist — or delete the hook path if post-login screenshots aren't wanted; decide -during the restructure. +**Dev-only generic skip.** `CCCI_SKIP_GENERIC=1` (all ops) / `CCCI_SKIP_GENERIC_=1` (one op) +suppress the generic floor — a LOCAL-DEV-ONLY escape hatch for iterating on one tier. There is no +declarative equivalent (the old `SKIP_GENERIC` meta key is deleted). If the env form is active in +a CI (drone) run, the run prints a loud `!!` warning and the manifest records it. -**R3 — The pytest `meta` fixture sees 4 keys.** `tests/conftest.py:_recipe_meta` loads only -HEALTH_*/timeouts. An overlay test wanting e.g. `EXPECTED_NA` or a recipe constant must re-exec -the file itself. Probably intended minimalism, but it's a third key-set to keep in sync. -*Folds into R1.* +## 8. Restructure outcomes (the review spec's R1–R9) -**R4 — Settings split across three config languages** (§1): recipe_meta keys, file-presence -(`install_steps.sh` existing changes deploy behavior), and run-time env (`CCCI_SKIP_GENERIC*`). -A reviewer asking "what does this recipe customize?" must check all three. *Candidate:* keep the -three surfaces (they serve different actors) but make the run header log a single resolved -"customization manifest" per run: every non-default key + every discovered hook file + every -CCCI_* override, in one block. +How each defect identified in the review spec (commit `76a4b6b` §8) was resolved: -**R5 — Reference-doc drift already happened.** `docs/testing.md` documents 6 meta keys, -`docs/enroll-recipe.md` shows others by example; neither is complete (18 keys exist). This doc is -now complete but handwritten — it will drift too. *Candidate:* generate the key table from the R1 -registry (test asserts doc ⊆ registry). +- **R1 — six divergent meta loaders → RESOLVED.** One registry-backed loader + (`harness/meta.py::load`), the only `exec()` of `recipe_meta.py`. The orchestrator loads once + and passes the `RecipeMeta` down; conftest/lifecycle/deps/canonical all read the one object. +- **R2 — dead `SCREENSHOT` knob → RESOLVED (kept + fixed).** The registry replaced the allowlist + that orphaned it; the orchestrator path now delivers the hook to `screenshot.py` + (proven end-to-end by `tests/unit/test_screenshot.py::test_screenshot_reachable_through_real_load_path`). +- **R3 — 4-key pytest `meta` fixture → RESOLVED.** The fixture returns the full validated + `RecipeMeta`. +- **R4 — three config languages → MITIGATED by the manifest** (§7): the surfaces stay (they serve + different actors), but every run resolves them into one visible block + results key. +- **R5 — reference-doc drift → RESOLVED.** §4's key table is generated from the registry + (`scripts/gen-meta-docs.py`); a unit test fails CI on drift; `testing.md`/`enroll-recipe.md` + point here instead of keeping partial lists. +- **R6 — silent typos → RESOLVED.** Unknown ALL-CAPS keys and type mismatches are hard + `MetaError`s; private constants are underscore-prefixed (exempt). +- **R7 — `compose.ccci.yml` ⇄ `CHAOS_BASE_DEPLOY` coupling → RESOLVED.** The overlay is + first-class: harness-copied, auto-chaos. The flag is deleted. +- **R8 — zero-user `SKIP_GENERIC` meta key → RESOLVED (deleted).** Env form remains, documented + dev-only, loudly flagged in CI runs (§7). +- **R9 — `recipe_meta.py` is code, not config → REJECTED by decision.** No data/hooks file split: + registry validation gets the value (typed, validated keys) at lower cost; one file per recipe + remains the single config place. The expressiveness need is real (cryptpad derives env from the + per-run domain). -**R6 — No schema validation / silent typos.** Unknown top-level names in `recipe_meta.py` are -ignored, which is load-bearing (recipes keep private constants there: mumble's -`WELCOME_TEXT_MARKER`, `MAX_USERS`). Consequence: misspelling `READY_PROBE` as `READINESS_PROBE` -silently disables the probe — the run goes green with less coverage, the worst failure mode for a -CI harness. *Candidate:* with the R1 registry, warn (not fail) on ALL-CAPS top-level names that -are not registered and not referenced by the recipe's own tests; or namespace private constants -(`_WELCOME_TEXT_MARKER`). - -**R7 — `compose.ccci.yml` ⇄ `CHAOS_BASE_DEPLOY` implicit coupling.** The overlay only works if -the recipe *also* sets the flag; forgetting it fails the base deploy with an abra -untracked-files error far from the cause. *Candidate:* if `install_steps.sh` exists alongside a -`compose.ccci.yml`, the harness could auto-enable chaos for the base deploy (or at least assert -the flag and fail with a pointed message). - -**R8 — `SKIP_GENERIC` (meta form) has zero users.** Only the env-var form is used, ad hoc. Either -the meta key earns its place (first real user) or it's surface to delete in the restructure. - -**R9 — `recipe_meta.py` is code, not config.** Five keys take callables (`EXTRA_ENV`, -`UPGRADE_EXTRA_ENV`, `READY_PROBE`, `BACKUP_VERIFY`, `SCREENSHOT`), so the file must stay an -`exec()`d Python module — it can't be validated as data, serialized into results, or diffed -declaratively. This is a real expressiveness need (cryptpad derives `SANDBOX_DOMAIN` from the -per-run domain), not an accident. *Candidate if restructuring:* split data keys (TOML-able, -schema-validated) from a `hooks.py` (callables only) — but weigh against the cost of two files -per recipe; the R1 registry gets most of the value without the split. +Also settled in the restructure: install-time deps provisioning is the ONLY mode (the legacy +post-deploy `setup_custom_tests.sh` machinery and its extra redeploy are deleted); the custom-test +placement rule (§3); the uniform ctx hook convention (§4.1); the consolidated fixture surface +(§5.6 — `deps` replaces `deps_apps`+`deps_creds`; dead `deployed`/`deployed_app`/`app_domain` +fixtures deleted). ## 9. File / symbol index | Concern | Where | |---|---| -| Orchestrator meta loader (L1, allowlist) | `runner/run_recipe_ci.py:250` `_load_meta` | -| Pytest meta fixture (L2) | `tests/conftest.py` `_recipe_meta` | -| `EXTRA_ENV` loader (L3) | `runner/harness/lifecycle.py:114` `_recipe_extra_env` | -| Boolean-flag loader (L4) | `runner/harness/lifecycle.py:132` `_recipe_meta_flag` | -| `DEPS` loader (L5) | `runner/harness/deps.py:37` `declared_deps` | -| `WARM_CANONICAL` loader (L6) | `runner/harness/canonical.py:36` `is_canonical_enrolled` | -| Overlay/custom/hook discovery + HC2 gate | `runner/harness/discovery.py` | +| THE meta loader + key registry + `HookCtx` + `MetaError` | `runner/harness/meta.py` (`load`, `KEYS`, `check_hook_signature`) | +| Generated key table | `scripts/gen-meta-docs.py` → §4 above (sync pinned by `tests/unit/test_meta.py`) | +| Customization manifest | `runner/harness/manifest.py` (`build`, `render`), printed by `runner/run_recipe_ci.py` | +| Overlay/custom/hook discovery + HC2 gate + placement rule | `runner/harness/discovery.py` | | HC2 allowlist | `tests/repo-local-approved.txt` | | Generic assertions + `BACKUP_CAPABLE` detect | `runner/harness/generic.py` | -| `READY_PROBE` / `CHAOS_BASE_DEPLOY` consumption | `runner/harness/lifecycle.py:516` / `:283` | +| `compose.ccci.yml` auto-copy + auto-chaos | `runner/harness/lifecycle.py` (`provide_ccci_overlay`, `deploy_app`) | +| `READY_PROBE` consumption | `runner/harness/lifecycle.py` (`wait_ready_probes`) | | `EXPECTED_NA` reporting | `runner/harness/results.py` | -| Dead `SCREENSHOT` consumer | `runner/harness/screenshot.py:36`, called `run_recipe_ci.py:1056` | -| Skip-generic logic (meta + env) | `runner/run_recipe_ci.py:285` | -| Worked examples | `tests/ghost/` (overlay+chaos), `tests/mumble/` (TCP probe, UPGRADE_EXTRA_ENV), `tests/lasuite-drive/` (DEPS+OIDC_AT_INSTALL), `tests/immich/` (ops.py seed pattern) | +| `SCREENSHOT` consumer | `runner/harness/screenshot.py` | +| Fixtures (`recipe`/`meta`/`live_app`/`op_state`/`deps`) + F2-11 skip-report | `tests/conftest.py` | +| Skip-generic env logic (dev-only) | `runner/run_recipe_ci.py` (`_skip_generic`) | +| Unit tests pinning all of the above | `tests/unit/test_meta.py`, `test_manifest.py`, `test_discovery*.py` | +| Worked examples | `tests/ghost/` (overlay+compose.ccci.yml), `tests/mumble/` (TCP probe, UPGRADE_EXTRA_ENV, private `_` constants), `tests/lasuite-drive/` (DEPS + install-time OIDC wiring), `tests/immich/` (ops.py seed pattern) | diff --git a/docs/testing.md b/docs/testing.md index ed56a28..7a0092c 100644 --- a/docs/testing.md +++ b/docs/testing.md @@ -16,12 +16,13 @@ year from now, this is the one rule that should still hold. ship as the floor for every recipe. No SSO provider, no external deps, no per-recipe state scaffolding — just "does this recipe deploy and lifecycle work?" - **Generic must not depend on custom.** A custom test or a custom-tests setup (e.g. SSO/OIDC dep - provisioning) **can never be a precondition for the generic tier to pass.** Concretely: the - orchestrator runs all generic tiers (install → upgrade → backup → restore) against the recipe - **alone, with no deps deployed**, then runs the `setup_custom_tests` step (deps + post-deps - wiring) only after — and a failure there is **isolated** to the custom tier (tests tagged - `@pytest.mark.requires_deps` skip with reason `"deps-not-ready"`; generic tier reports - normally). See `cc-ci-plan/plan-sso-dep-testing.md` for the SSO-dep specifics. + provisioning) **can never be a precondition for the generic tier to pass.** Concretely: deps are + provisioned BEFORE the single deploy (so `install_steps.sh` can wire OIDC env into that one + deploy), but a dep-provisioning failure is **isolated** to the custom tier — the recipe still + deploys alone, every generic tier (install → upgrade → backup → restore) runs normally, and + tests tagged `@pytest.mark.requires_deps` skip with reason `"deps-not-ready"` (a counted, + reported skip — F2-11). A deps failure can never fail or block a generic tier. See + `cc-ci-plan/plan-sso-dep-testing.md` for the SSO-dep specifics. - **Custom tests are the thoroughness layer — and they cost more to maintain.** They're more thorough (authenticated APIs, multi-app flows, version-specific browser selectors, helper scripts, state-management) and *therefore* take more maintenance: an SSO provider's admin API @@ -113,9 +114,11 @@ repo-local /tests/test_.py (upstream-authoritative; gated Only ONE overlay source wins for a given op (repo-local > cc-ci); the generic floor runs **in addition** unless explicitly opted out. -**Custom (non-lifecycle) `test_*.py`** — any other `test_*.py` (e.g. `test_sso.py`) is **opt-in and -additive**: it has no generic equivalent and runs only when present, discovered from both locations -(repo-local gated by the HC2 allowlist). +**Custom (non-lifecycle) tests** — e.g. `functional/test_sso.py` — are **opt-in and additive**: +they have no generic equivalent and run only when present, discovered from both locations +(repo-local gated by the HC2 allowlist). Placement rule: custom tests live ONLY under +`functional/` or `playwright/`; a top-level `test_*.py` is a lifecycle overlay and nothing else +(top-level non-lifecycle files are not discovered). ### Pre-op seed hooks (per-recipe `ops.py`) @@ -127,35 +130,38 @@ etc.). Since the orchestrator owns the op, overlays place their seed in an optio # tests//ops.py from harness import lifecycle -def pre_upgrade(domain, meta): +def pre_upgrade(ctx): # seed a marker before the harness performs the upgrade - lifecycle.exec_in_app(domain, ["sh", "-c", "echo upgrade-survives > /path/marker"]) + lifecycle.exec_in_app(ctx.domain, ["sh", "-c", "echo upgrade-survives > /path/marker"]) -def pre_backup(domain, meta): +def pre_backup(ctx): # establish a known "original" state before the backup op captures it - lifecycle.exec_in_app(domain, ["sh", "-c", "echo original > /path/marker"]) + lifecycle.exec_in_app(ctx.domain, ["sh", "-c", "echo original > /path/marker"]) -def pre_restore(domain, meta): +def pre_restore(ctx): # diverge from the backed-up state so a successful restore is observable - lifecycle.exec_in_app(domain, ["sh", "-c", "echo mutated > /path/marker"]) + lifecycle.exec_in_app(ctx.domain, ["sh", "-c", "echo mutated > /path/marker"]) ``` The orchestrator imports `ops.py` in-process (with the recipe dir on `sys.path`, so it can import -sibling helpers like `kc_admin.py`) and calls `pre_(domain, meta)` immediately before performing -the op. Then `test_.py` asserts the post-op state. See `tests/custom-html/` (volume marker), +sibling helpers like `kc_admin.py`) and calls `pre_(ctx)` immediately before performing the +op — `ctx` is the uniform `HookCtx` every recipe hook receives (`.domain`, `.base_url`, `.meta`, +`.deps`, `.op` — `docs/recipe-customization.md` §4.1). Then `test_.py` asserts the post-op +state. See `tests/custom-html/` (volume marker), `tests/keycloak/` (admin-API/realm), `tests/matrix-synapse/`, `tests/lasuite-docs/` (psql in the `db` service) for worked examples. -### Opting out of the generic floor +### Opting out of the generic floor (LOCAL-DEV-ONLY) -The generic runs additively by default. To skip it (e.g. when an overlay's recipe-specific check -fully replaces the generic's mechanism check) set, in increasing specificity: +The generic runs additively by default and there is **no declarative opt-out** — no recipe can +ship without the floor. For local iteration only (e.g. re-running one tier while developing an +overlay), two env escape hatches exist: - **env `CCCI_SKIP_GENERIC=1`** — skip generic for ALL ops (run-wide). - **env `CCCI_SKIP_GENERIC_=1`** — e.g. `CCCI_SKIP_GENERIC_UPGRADE=1` — skip generic for that one op. -- **declarative in `recipe_meta.py`** — `SKIP_GENERIC = ["upgrade"]` (per-op) or `SKIP_GENERIC = ["all"]`. -Opting out is per-recipe and visible in git — not a hidden global. Truthy = `1`/`true`/`yes`/`on`. +Truthy = `1`/`true`/`yes`/`on`. If either is active in a CI (drone) run, the run prints a loud +`!!` warning and the customization manifest records it (`docs/recipe-customization.md` §7). ## Repo-local trust gate (HC2) — default-deny @@ -215,12 +221,14 @@ installs and stays 1. `tests/custom-html/test_upgrade.py`). Assert the POST-op state — reading app state through `lifecycle.exec_in_app` (volume/DB) for data checks, not HTTP. Generic + your overlay both run. 3. If the overlay needs to seed PRE-op state (data-continuity markers, the backup→restore - divergence), drop `tests//ops.py` with `pre_upgrade/pre_backup/pre_restore(domain, meta)`. + divergence), drop `tests//ops.py` with `pre_upgrade/pre_backup/pre_restore(ctx)`. 4. If the recipe needs install-time setup, add `tests//install_steps.sh`. -5. Set per-recipe knobs (health path, timeouts, opt-out) in `recipe_meta.py`. +5. Set per-recipe knobs (health path, timeouts) in `recipe_meta.py`. 6. **Never weaken or skip an assertion to make a run pass** — a red tier is information. -Per-recipe config (`tests//recipe_meta.py`, all optional): +Per-recipe config (`tests//recipe_meta.py`, all optional — the COMPLETE key reference is +the generated table in `docs/recipe-customization.md` §4; unknown keys are hard errors, private +constants are underscore-prefixed): ```python HEALTH_PATH = "/realms/master" # path that returns a healthy status (default "/") @@ -228,8 +236,7 @@ HEALTH_OK = (200,) # acceptable status codes (default 200/301/302) DEPLOY_TIMEOUT = 600 # seconds for services to converge (default 600) HTTP_TIMEOUT = 600 # seconds for the app to answer (default 300) BACKUP_CAPABLE = True # override backup-capability auto-detection (default: scan compose) -EXTRA_ENV = {"KEY": "value"} # or EXTRA_ENV(domain) -> dict; extra .env keys set at deploy -SKIP_GENERIC = ["upgrade"] # per-recipe declarative opt-out from generic ops ("all" = every op) +EXTRA_ENV = {"KEY": "value"} # or EXTRA_ENV(ctx) -> dict; extra .env keys set at deploy ``` The harness self-tests for discovery / precedence / the HC2 allowlist live in `tests/unit/` (run: diff --git a/runner/harness/canonical.py b/runner/harness/canonical.py index a80d299..e176369 100644 --- a/runner/harness/canonical.py +++ b/runner/harness/canonical.py @@ -30,17 +30,13 @@ import subprocess import time from . import abra, warm, warmsnap +from . import meta as meta_mod def is_enrolled(recipe: str) -> bool: - """True if `tests//recipe_meta.py` sets `WARM_CANONICAL = True`. Missing meta → False.""" - path = os.path.join(os.path.dirname(__file__), "..", "..", "tests", recipe, "recipe_meta.py") - if not os.path.exists(path): - return False - ns: dict = {} - with open(path) as fh: - exec(compile(fh.read(), path, "exec"), ns) # noqa: S102 (trusted, in-repo) - return bool(ns.get("WARM_CANONICAL")) + """True if `tests//recipe_meta.py` sets `WARM_CANONICAL = True`. Missing meta → False. + Reads through the single meta loader (rcust P1 — no per-module exec).""" + return bool(meta_mod.load(recipe).WARM_CANONICAL) def canonical_domain(recipe: str) -> str: @@ -51,7 +47,7 @@ def canonical_domain(recipe: str) -> str: def enrolled_recipes() -> list[str]: """All recipes enrolled as data-warm canonicals (recipe_meta.WARM_CANONICAL=True), sorted. Used by the WC6 nightly sweep to know which canonicals to refresh via a green cold run on latest.""" - tests_dir = os.path.join(os.path.dirname(__file__), "..", "..", "tests") + tests_dir = meta_mod.TESTS_DIR out = [] try: for name in sorted(os.listdir(tests_dir)): diff --git a/runner/harness/deps.py b/runner/harness/deps.py index bd679a1..f0fc97e 100644 --- a/runner/harness/deps.py +++ b/runner/harness/deps.py @@ -20,7 +20,7 @@ Per Phase-2 DECISIONS: Run state: - `$CCCI_DEPS_FILE` — JSON file written by the orchestrator after each dep deploys; each entry is `{"recipe": "", "domain": "", "version": null}`. Tests access via the - `deps_apps` pytest fixture defined in `tests/conftest.py`. + `deps` pytest fixture defined in `tests/conftest.py`. """ from __future__ import annotations @@ -31,19 +31,7 @@ import os from collections.abc import Iterable from . import lifecycle, naming - - -def declared_deps(recipe: str) -> list[str]: - """Read `DEPS` from `tests//recipe_meta.py` — a list of recipe names this recipe needs - deployed alongside it. Returns [] if none.""" - path = os.path.join(os.path.dirname(__file__), "..", "..", "tests", recipe, "recipe_meta.py") - if not os.path.exists(path): - return [] - ns: dict = {} - with open(path) as fh: - exec(compile(fh.read(), path, "exec"), ns) # noqa: S102 (trusted, in-repo) - deps = ns.get("DEPS") or [] - return [str(d) for d in deps if d] +from . import meta as meta_mod def dep_domain(parent_recipe: str, pr: str, ref: str | None, dep_recipe: str) -> str: @@ -62,11 +50,11 @@ def write_run_state(deps_state) -> None: """Write the deps state file ($CCCI_DEPS_FILE). Two shapes supported (canonical=keyed dict): 1. **Legacy list-of-entries:** `[{"recipe": "", "domain": ""}, ...]` (Q2.3 original). - Still accepted by `load_run_state` for backwards compat — `deps_apps` fixture flattens. + Still accepted by `load_run_state` for backwards compat — the `deps` fixture flattens. 2. **NEW per-spec dict (operator-2026-05-28 SSO-dep plan §3.2):** `{"": {"recipe": "", "domain": "", "realm": "...", "client_id": "...", "client_secret": "...", "admin_user": "...", "admin_password": "..."}}`. - The `setup_custom_tests.sh` per-recipe hook reads this via `jq` to wire OIDC env. + The per-recipe `install_steps.sh` hook reads this via `jq` to wire OIDC env. No-op if `$CCCI_DEPS_FILE` isn't set.""" path = os.environ.get("CCCI_DEPS_FILE") @@ -81,11 +69,12 @@ def deploy_deps( pr: str, ref: str | None, deps: Iterable[str], - meta_for: dict[str, dict] | None = None, + meta_for: dict | None = None, ) -> list[dict]: """Deploy each declared dep, sequentially, at its per-run domain. Returns the list of state - dicts (one per dep). `meta_for` maps dep_recipe -> meta (HEALTH_PATH/HEALTH_OK/timeouts) so the - readiness wait uses per-dep config; missing dep meta falls back to (/, 200/301/302, 600s).""" + dicts (one per dep). `meta_for` maps dep_recipe -> RecipeMeta (HEALTH_PATH/HEALTH_OK/timeouts) + so the readiness wait uses per-dep config; a missing dep meta is loaded via meta.load() + (defaults: /, 200/301/302, 600s).""" meta_for = meta_for or {} state: list[dict] = [] for dep in deps: @@ -94,20 +83,21 @@ def deploy_deps( # NB: each dep_app gets a fresh deploy_count entry only on `_record_deploy` which fires # inside `lifecycle.deploy_app`. For Phase 2 the deploy-count guard (DG4.1) counts the # parent + its deps as distinct install events — by design, since each is a separate app. - dm = meta_for.get(dep, {}) + dm = meta_for.get(dep) or meta_mod.load(dep) lifecycle.deploy_app( dep, domain, secrets=True, - deploy_timeout=int(dm.get("DEPLOY_TIMEOUT", 900)), + deploy_timeout=int(dm.DEPLOY_TIMEOUT), + meta=dm, ) try: lifecycle.wait_healthy( domain, - ok_codes=tuple(dm.get("HEALTH_OK", (200, 301, 302))), - path=dm.get("HEALTH_PATH", "/"), - deploy_timeout=int(dm.get("DEPLOY_TIMEOUT", 600)), - http_timeout=int(dm.get("HTTP_TIMEOUT", 600)), + ok_codes=tuple(dm.HEALTH_OK), + path=dm.HEALTH_PATH, + deploy_timeout=int(dm.DEPLOY_TIMEOUT), + http_timeout=int(dm.HTTP_TIMEOUT), ) except Exception: # If a dep fails to converge, abort the whole resolve — let the caller teardown @@ -163,7 +153,7 @@ def load_run_state(): def deps_as_dict(state) -> dict[str, dict]: - """Coerce either shape (legacy list or new dict) into a recipe→entry dict for the deps_apps + """Coerce either shape (legacy list or new dict) into a recipe→entry dict for the `deps` fixture + dependent-tests consumption.""" if isinstance(state, dict): return state diff --git a/runner/harness/discovery.py b/runner/harness/discovery.py index c4698bd..87de859 100644 --- a/runner/harness/discovery.py +++ b/runner/harness/discovery.py @@ -11,7 +11,8 @@ hook; the orchestrator decides additive-vs-skip. Sources, in precedence order > cc-ci tests//test_.py (the generic tests/_generic/test_.py is the always-present floor, run separately by default) - custom (non-lifecycle) test_*.py — ALL run, additively, from BOTH locations (opt-in). + custom test_*.py (functional/ + playwright/ ONLY, rcust P4 placement rule) — ALL run, + additively, from BOTH locations (opt-in). install-steps hook — install_steps.sh: repo-local > cc-ci, or none. @@ -100,29 +101,22 @@ def resolve_op(recipe: str, op: str, repo_local_dir: str | None) -> tuple[str, s def custom_tests(recipe: str, repo_local_dir: str | None) -> list[tuple[str, str]]: - """All non-lifecycle test_*.py from cc-ci's tests// and (if approved) the recipe's - repo-local tests/. Discovered locations (Phase 2 §4.1): - - the top-level dir tests//test_*.py (legacy + cross-cutting) - - functional/ tests//functional/test_*.py (parity ports + recipe-specific) - - playwright/ tests//playwright/test_*.py (UI flows P6) - Files named `test_.py` (lifecycle ops) are excluded from this list — the orchestrator runs - those in their lifecycle tier, not the custom one. Repo-local is consulted only for - allowlist-approved recipes (HC2).""" + """All custom-tier test_*.py from cc-ci's tests// and (if approved) the recipe's + repo-local tests/. PLACEMENT RULE (rcust P4): custom tests live ONLY under + - functional/ tests//functional/test_*.py (parity ports + recipe-specific) + - playwright/ tests//playwright/test_*.py (UI flows) + A top-level test_*.py is a LIFECYCLE OVERLAY (test_.py) and nothing else — top-level + non-lifecycle files are NOT discovered (zero users at the time of the change; the lifecycle- + name exclusion below stays as a safety net so a misfiled test_.py can never double-run). + Repo-local is consulted only for allowlist-approved recipes (HC2).""" lifecycle_names = {f"test_{op}.py" for op in LIFECYCLE_OPS} subdirs = ("functional", "playwright") found: list[tuple[str, str]] = [] for source, d in (("cc-ci", cc_ci_dir(recipe)), ("repo-local", _gated(recipe, repo_local_dir))): if not d or not os.path.isdir(d): continue - # top-level (legacy / cross-cutting tests not under functional/playwright) - for p in sorted(glob.glob(os.path.join(d, "test_*.py"))): - if os.path.basename(p) not in lifecycle_names: - found.append((source, p)) - # functional/ and playwright/ subdirs (Phase 2 §4.1) for sub in subdirs: for p in sorted(glob.glob(os.path.join(d, sub, "test_*.py"))): - # Phase-2 layout: lifecycle ops never live under functional/playwright, but be - # explicit so a misfiled file doesn't silently get double-run. if os.path.basename(p) not in lifecycle_names: found.append((source, p)) return found @@ -144,7 +138,7 @@ def install_steps(recipe: str, repo_local_dir: str | None) -> tuple[str, str] | def pre_op_hook(recipe: str, op: str, repo_local_dir: str | None) -> tuple[str, str] | None: """The pre-op seed hook for `op`: the path to a recipe `ops.py` module that defines a - `pre_(domain, meta)` callable, or None. cc-ci's tests//ops.py wins; the repo-local + `pre_(ctx)` callable, or None. cc-ci's tests//ops.py wins; the repo-local ops.py is consulted only for allowlist-approved recipes (HC2). The orchestrator imports the module and calls pre_ BEFORE performing the op (HC3 op/assertion split — overlays seed pre-op state here, then assert post-op in test_.py).""" diff --git a/runner/harness/generic.py b/runner/harness/generic.py index d468dbf..b377c92 100644 --- a/runner/harness/generic.py +++ b/runner/harness/generic.py @@ -19,6 +19,7 @@ import ssl import time from . import abra, lifecycle +from . import meta as meta_mod # A recipe is backup-capable iff a compose file carries a truthy backupbot.backup label. _BACKUPBOT_RE = re.compile(r"backupbot\.backup\b[^\n]*\btrue\b", re.IGNORECASE) @@ -28,13 +29,14 @@ def _recipe_dir(recipe: str) -> str: return abra.recipe_dir(recipe) # the per-run tree inside a CI run ($ABRA_DIR) -def backup_capable(recipe: str, meta: dict | None = None) -> bool: +def backup_capable(recipe: str, meta=None) -> bool: """Whether the harness should run the backup/restore tiers (else they are a clean N/A skip, DG3). - `recipe_meta.BACKUP_CAPABLE` (bool) overrides; otherwise auto-detect by scanning the recipe's - compose*.yml for a truthy `backupbot.backup` label (the Co-op Cloud backup convention).""" - if meta and "BACKUP_CAPABLE" in meta: - return bool(meta["BACKUP_CAPABLE"]) + `recipe_meta.BACKUP_CAPABLE` (bool) overrides when explicitly set (RecipeMeta default is None = + unset); otherwise auto-detect by scanning the recipe's compose*.yml for a truthy + `backupbot.backup` label (the Co-op Cloud backup convention).""" + if meta is not None and meta.BACKUP_CAPABLE is not None: + return bool(meta.BACKUP_CAPABLE) for path in glob.glob(os.path.join(_recipe_dir(recipe), "compose*.yml")): try: with open(path) as fh: @@ -75,7 +77,7 @@ def served_cert(domain: str, port: int = 443) -> tuple[bool, str]: return (True, f"CN={cn} SAN={sans}") -def assert_serving(domain: str, meta: dict) -> None: +def assert_serving(domain: str, meta) -> None: """The single generic "is the app really serving?" assertion (DG1). The app-vs-Traefik-fallback proof is steps 1+2 (both load-bearing, verified by the Adversary): @@ -90,14 +92,14 @@ def assert_serving(domain: str, meta: dict) -> None: Steps 1–2 are BOUNDED POLLS (no bare sleep), so a state-mutating op (upgrade/restore) that leaves the app briefly reconverging settles, while a persistent failure still fails within the timeout.""" - deadline = time.time() + meta["DEPLOY_TIMEOUT"] + deadline = time.time() + meta.DEPLOY_TIMEOUT while time.time() < deadline and not lifecycle.services_converged(domain): time.sleep(5) assert lifecycle.services_converged(domain), f"{domain}: services did not converge" - path = meta["HEALTH_PATH"] - ok = tuple(meta["HEALTH_OK"]) - deadline = time.time() + meta["HTTP_TIMEOUT"] + path = meta.HEALTH_PATH + ok = tuple(meta.HEALTH_OK) + deadline = time.time() + meta.HTTP_TIMEOUT served = False status, body = 0, "" while time.time() < deadline: @@ -141,7 +143,7 @@ def op_state() -> dict: return {} -def assert_upgraded(domain: str, meta: dict) -> None: +def assert_upgraded(domain: str, meta) -> None: """Generic UPGRADE assertion (post-op): the orchestrator already performed the upgrade once via `abra app deploy --chaos` of the PR-head checkout. Assert it reconverged + still serves AND that the deployment is genuinely the PR-head code under test (HC1) — non-vacuously (guarding F1d-2). @@ -212,7 +214,7 @@ def assert_backup_artifact(domain: str) -> str: return snap_id -def assert_restore_healthy(domain: str, meta: dict) -> None: +def assert_restore_healthy(domain: str, meta) -> None: """Generic RESTORE assertion (post-op): the orchestrator already restored. Assert the app is healthy + serving again (assert_serving polls, so the post-restore reconverge settles).""" assert_serving(domain, meta) @@ -226,7 +228,7 @@ def perform_upgrade( recipe: str, head_ref: str | None, deploy_timeout: int = 900, - meta: dict | None = None, + meta=None, ) -> dict[str, str | None]: """Perform the UPGRADE op once, in place, to the PR-HEAD code under test (HC1): re-checkout the PR head (the prev-tag base deploy reset the recipe working tree), then `abra app deploy --chaos` @@ -244,7 +246,8 @@ def perform_upgrade( STRICTER convergence+health wait here: services N/N (wait_healthy) + app HEALTH_PATH healthy + any recipe READY_PROBE (collabora WOPI discovery 200). This bounds readiness by OUR generous deadline, not abra's impatient one — and is stronger evidence than abra's monitor.""" - meta = meta or {} + if meta is None: + meta = meta_mod.load(recipe) before = lifecycle.deployed_identity(domain) if head_ref: lifecycle.recipe_checkout_ref(recipe, head_ref) @@ -253,9 +256,7 @@ def perform_upgrade( # (target) version, so the base deploys minimally WITHOUT it and the upgrade adds it to COMPOSE_FILE # here, after the PR-head checkout (which ships the overlay) and before the chaos redeploy that # picks up the new .env. Dict or callable(domain)->dict. No-op for recipes without it. - upgrade_env = meta.get("UPGRADE_EXTRA_ENV") or {} - if callable(upgrade_env): - upgrade_env = upgrade_env(domain) or {} + upgrade_env = meta_mod.upgrade_extra_env(meta, meta_mod.hook_ctx(domain, meta, op="upgrade")) for k, v in upgrade_env.items(): print(f" upgrade-env: {k}={v}", flush=True) abra.env_set(domain, k, v) @@ -266,14 +267,12 @@ def perform_upgrade( # Own the convergence verification (abra's monitor was skipped via -c). lifecycle.wait_healthy( domain, - ok_codes=tuple(meta.get("HEALTH_OK", (200, 301, 302))), - path=meta.get("HEALTH_PATH", "/"), - deploy_timeout=int(meta.get("DEPLOY_TIMEOUT", deploy_timeout)), - http_timeout=int(meta.get("HTTP_TIMEOUT", 300)), - ) - lifecycle.wait_ready_probes( - meta, domain, timeout=int(meta.get("DEPLOY_TIMEOUT", deploy_timeout)) + ok_codes=tuple(meta.HEALTH_OK), + path=meta.HEALTH_PATH, + deploy_timeout=int(meta.DEPLOY_TIMEOUT), + http_timeout=int(meta.HTTP_TIMEOUT), ) + lifecycle.wait_ready_probes(meta, domain, timeout=int(meta.DEPLOY_TIMEOUT), op="upgrade") after = lifecycle.deployed_identity(domain) # Evidence (HC1): the chaos-version label = the deployed recipe commit; it should match the # PR-head we checked out — proving the upgrade deployed the code under test, not a published tag. diff --git a/runner/harness/lifecycle.py b/runner/harness/lifecycle.py index 10a1d4b..fddc286 100644 --- a/runner/harness/lifecycle.py +++ b/runner/harness/lifecycle.py @@ -12,6 +12,7 @@ import glob import json import os import re +import shutil import socket import ssl import subprocess @@ -19,6 +20,7 @@ import time import urllib.request from . import abra, lifetime +from . import meta as meta_mod GATEWAY_IP = "143.244.213.108" # *.ci.commoninternet.net -> gateway (TLS passthrough to cc-ci) # A run app domain is "-<6hex>.ci.commoninternet.net" (see DECISIONS.md). Used by the @@ -111,37 +113,6 @@ def _residual(domain: str) -> dict: } -def _recipe_extra_env(recipe: str, domain: str) -> dict[str, str]: - """Per-recipe extra .env keys, applied at every deploy (install + upgrade's old_app) so a recipe - with multi-domain / config needs is enrolled with NO shared-harness change (D5/M6.5). A recipe - declares `EXTRA_ENV` in tests//recipe_meta.py as either a dict or a callable - `EXTRA_ENV(domain) -> dict` (callable form lets it derive values from the per-run domain, e.g. - cryptpad's SANDBOX_DOMAIN). Returns {} if none.""" - path = os.path.join(os.path.dirname(__file__), "..", "..", "tests", recipe, "recipe_meta.py") - if not os.path.exists(path): - return {} - ns: dict = {} - with open(path) as fh: - exec(compile(fh.read(), path, "exec"), ns) # noqa: S102 (trusted, in-repo) - ee = ns.get("EXTRA_ENV") - if callable(ee): - ee = ee(domain) - return {str(k): str(v) for k, v in (ee or {}).items()} - - -def _recipe_meta_flag(recipe: str, key: str) -> bool: - """Read a boolean flag from tests//recipe_meta.py (e.g. CHAOS_BASE_DEPLOY). Returns - False if the recipe ships no meta or the flag is absent/falsey. Trusted in-repo exec, same as - _recipe_extra_env.""" - path = os.path.join(os.path.dirname(__file__), "..", "..", "tests", recipe, "recipe_meta.py") - if not os.path.exists(path): - return False - ns: dict = {} - with open(path) as fh: - exec(compile(fh.read(), path, "exec"), ns) # noqa: S102 (trusted, in-repo) - return bool(ns.get(key)) - - def _record_deploy() -> None: """Increment the per-run deploy counter (DG4.1: one deploy per run). No-op unless the orchestrator set CCCI_DEPLOY_COUNT_FILE — so it never affects standalone/manual use.""" @@ -155,6 +126,34 @@ def _record_deploy() -> None: f.write(str(n + 1)) +def ccci_overlay_path(recipe: str) -> str: + """The cc-ci-owned compose overlay for a recipe (rcust P2a: first-class, auto-discovered).""" + return os.path.join(meta_mod.TESTS_DIR, recipe, "compose.ccci.yml") + + +def has_ccci_overlay(recipe: str) -> bool: + return os.path.isfile(ccci_overlay_path(recipe)) + + +def provide_ccci_overlay(recipe: str) -> None: + """Copy tests//compose.ccci.yml into THIS run's recipe checkout (ABRA_DIR-aware), so + the recipe's COMPOSE_FILE reference resolves (rcust P2a — the harness owns the copy; recipes + no longer ship install_steps.sh boilerplate for it). No-op for recipes without an overlay.""" + src = ccci_overlay_path(recipe) + if not os.path.isfile(src): + return + dest_dir = abra.recipe_dir(recipe) + if not os.path.isdir(dest_dir): + print(f" ccci-overlay: recipe dir {dest_dir} missing — cannot provide overlay", flush=True) + raise RuntimeError(f"recipe checkout missing for {recipe}: {dest_dir}") + shutil.copy(src, os.path.join(dest_dir, "compose.ccci.yml")) + print( + f" ccci-overlay: provided compose.ccci.yml to the {recipe} checkout " + "(first-class overlay; base deploy auto-chaos)", + flush=True, + ) + + def _run_install_steps(hook: tuple[str, str], recipe: str, domain: str) -> None: """Run a recipe's custom install-steps hook (install_steps.sh) during the install tier — after `abra app new` + env defaults + secret generate, before deploy (Phase 1d DG5). The hook gets the @@ -238,15 +237,23 @@ def deploy_app( secrets: bool = True, install_steps_hook: tuple[str, str] | None = None, deploy_timeout: int = 900, + meta=None, ) -> None: """Create + configure + deploy an app. Forces LETS_ENCRYPT_ENV='' so traefik serves the wildcard cert via the file provider and NEVER attempts ACME (adversary finding A1). Applies any - per-recipe EXTRA_ENV (recipe_meta.py) and the custom install-steps hook (Phase 1d) before deploy. + per-recipe EXTRA_ENV (recipe_meta.py), the custom install-steps hook (Phase 1d), and the + first-class `tests//compose.ccci.yml` overlay (rcust P2a) before deploy. + + `meta` is the recipe's loaded RecipeMeta (EXTRA_ENV); the orchestrator loads once and passes + it down. Callers without one in hand (fixtures, warm reconcile) may omit it — it is then + loaded here via the single meta.load() path. `deploy_timeout` is the subprocess timeout for `abra app deploy`. Caller (orchestrator) passes `recipe_meta.DEPLOY_TIMEOUT` so heavy recipes (ghost, matrix-synapse, lasuite-meet) can extend past the 900s default. abra's INTERNAL TIMEOUT (recipe's TIMEOUT env, default 300s) is set via EXTRA_ENV; this is the Python subprocess wrapper's timeout so abra doesn't get SIGKILLed mid-deploy.""" + if meta is None: + meta = meta_mod.load(recipe) _record_deploy() # Lock BEFORE the app exists: a concurrent run's janitor must never see this app without a # held app lock (it would probe it as an orphan and reap an in-flight deploy). Also the @@ -274,16 +281,18 @@ def deploy_app( flush=True, ) chaos = True - # A recipe may force a chaos base deploy via recipe_meta CHAOS_BASE_DEPLOY=True when an - # install_steps hook adds an untracked compose overlay to the recipe checkout (e.g. discourse's - # compose.ccci.yml, provided by install_steps for the pinned base). The untracked file makes - # abra's pinned-deploy clean-tree check FATA ('has locally unstaged changes'); chaos skips lint + - # the clean-tree gate and deploys the EXPLICITLY-checked-out pinned version (we already ran - # recipe_checkout(version) above) — NOT latest. Same mechanism as the lightweight-tag branch. - elif _recipe_meta_flag(recipe, "CHAOS_BASE_DEPLOY"): + # A first-class cc-ci compose overlay (tests//compose.ccci.yml, copied into the + # checkout below — rcust P2a) is an UNTRACKED file in the recipe checkout, which makes + # abra's pinned-deploy clean-tree check FATA ('has locally unstaged changes'). Auto-chaos: + # chaos skips lint + the clean-tree gate and deploys the EXPLICITLY-checked-out pinned + # version (we already ran recipe_checkout(version) above) — NOT latest. Same mechanism as + # the lightweight-tag branch. (Replaces the deleted CHAOS_BASE_DEPLOY meta flag — the + # overlay's presence IS the signal, killing the R7 implicit coupling.) + elif has_ccci_overlay(recipe): print( - f" deploy_app({recipe}@{version}): CHAOS_BASE_DEPLOY set → chaos base deploy of the " - "checked-out pinned version (skips clean-tree/lint; deploys version, not LATEST)", + f" deploy_app({recipe}@{version}): compose.ccci.yml overlay present → chaos base " + "deploy of the checked-out pinned version (skips clean-tree/lint; deploys version, " + "not LATEST)", flush=True, ) chaos = True @@ -293,12 +302,18 @@ def deploy_app( # it ourselves is recipe-agnostic and canonical (the run domain IS the app's domain). abra.env_set(domain, "DOMAIN", domain) abra.env_set(domain, "LETS_ENCRYPT_ENV", "") - for k, v in _recipe_extra_env(recipe, domain).items(): + for k, v in meta_mod.extra_env(meta, meta_mod.hook_ctx(domain, meta)).items(): abra.env_set(domain, k, v) if secrets: abra.secret_generate(domain) if install_steps_hook: _run_install_steps(install_steps_hook, recipe, domain) + # First-class cc-ci compose overlay (rcust P2a): if the recipe ships + # tests//compose.ccci.yml, copy it into THIS run's recipe checkout (ABRA_DIR-aware) + # so the COMPOSE_FILE reference in the recipe's EXTRA_ENV resolves. Untracked, so it persists + # across the later PR-head checkout (idempotent when the head ships the same fix). Replaces + # the per-recipe install_steps.sh copy boilerplate + CHAOS_BASE_DEPLOY flag (auto-chaos above). + provide_ccci_overlay(recipe) # HQ1: warm the local image store before the (real, unchanged) abra deploy. prepull_images(recipe, domain) abra.deploy(domain, chaos=chaos, timeout=deploy_timeout) @@ -510,7 +525,7 @@ def chaos_redeploy( abra.deploy(domain, chaos=True, timeout=deploy_timeout, no_converge_checks=no_converge_checks) -def wait_ready_probes(meta: dict, domain: str, timeout: int = 600) -> None: +def wait_ready_probes(meta, domain: str, timeout: int = 600, op: str | None = None) -> None: """Poll a recipe's optional READY_PROBE endpoints until each returns an accepted status, or raise. A recipe_meta may define `READY_PROBE(domain) -> [{"host":..., "path":..., "ok":(200,)}, ...]` @@ -527,10 +542,10 @@ def wait_ready_probes(meta: dict, domain: str, timeout: int = 600) -> None: must be released by the old task + rebound by the new) the voice server can be down while HTTP-200 still passes — and backup-bot then execs into a not-running app container (409). Requiring the voice port to be stably listening before proceeding closes that window.""" - probe_fn = meta.get("READY_PROBE") + probe_fn = meta.READY_PROBE if not callable(probe_fn): return - probes = probe_fn(domain) or [] + probes = probe_fn(meta_mod.hook_ctx(domain, meta, op=op)) or [] for probe in probes: if "tcp_port" in probe: host = probe.get("tcp_host", "127.0.0.1") diff --git a/runner/harness/manifest.py b/runner/harness/manifest.py new file mode 100644 index 0000000..7a2a00c --- /dev/null +++ b/runner/harness/manifest.py @@ -0,0 +1,153 @@ +"""Customization manifest (rcust P5; spec §8 R4 mitigation). + +One block at run start answering "what does this recipe customize?" across ALL the surfaces +(recipe_meta keys, hook files, file-presence, run-time env overrides) — printed to the run log and +embedded verbatim in results.json under "customization". PURE PRESENTATION: building or printing +the manifest must never influence any verdict (R7-class invariant). +""" + +from __future__ import annotations + +import os +import re + +from . import discovery, lifecycle +from . import meta as meta_mod + +_PRE_OP_RE = re.compile(r"^def (pre_[a-z]+)\(", re.MULTILINE) + +# Meta values are repo-public by construction (recipe_meta.py is committed; real secrets are +# class-B generated, never meta), but the manifest lands on the dashboard — mask values whose +# key NAME is secret-shaped so a field literally called SECRET_KEY_BASE never shows a value +# (defense in depth + keeps dashboard secret-scans quiet). `KEY` matches only as a word segment +# (API_KEY yes, KEYCLOAK_URL no). +_SENSITIVE_NAME_RE = re.compile(r"SECRET|PASSWORD|TOKEN|CREDENTIAL|(^|_)KEY(_|$)", re.IGNORECASE) + + +def _jsonable(v, name=""): + """Manifest values must be JSON-serializable + deterministic: hooks render as '', + tuples become lists, secret-named entries (by key name, incl. nested dict keys) as + ''.""" + if callable(v): + return "" + if name and _SENSITIVE_NAME_RE.search(name): + return "" + if isinstance(v, tuple): + return list(v) + if isinstance(v, dict): + return {k: _jsonable(x, name=str(k)) for k, x in v.items()} + return v + + +def _pre_ops(path: str) -> list[str]: + """The pre_ hook names an ops.py defines (cheap source scan, same approach as + discovery._module_defines — no import).""" + try: + with open(path) as fh: + return sorted(set(_PRE_OP_RE.findall(fh.read()))) + except OSError: + return [] + + +def _custom_counts(recipe: str, repo_local: str | None) -> dict[str, dict[str, int]]: + out: dict[str, dict[str, int]] = {} + for source, path in discovery.custom_tests(recipe, repo_local): + sub = os.path.basename(os.path.dirname(path)) # functional | playwright + out.setdefault(source, {}).setdefault(sub, 0) + out[source][sub] += 1 + return out + + +def build(recipe: str, meta, repo_local: str | None) -> dict: + """Collect the run's resolved customization into one deterministic, JSON-serializable dict. + + Keys: meta_non_default (explicitly-customized recipe_meta keys), hooks (ops.py pre-ops + + install_steps.sh + compose.ccci.yml with their source), overlays (lifecycle overlay files by + op + source), custom_tests (counts per source/subdir), env_overrides (active + CCCI_SKIP_GENERIC* — the dev-only escape hatch, flagged when riding a CI run).""" + hooks: dict = {} + pre_ops: dict[str, list[str]] = {} + for source, d in ( + ("cc-ci", discovery.cc_ci_dir(recipe)), + ("repo-local", discovery._gated(recipe, repo_local)), # noqa: SLF001 — same HC2 gate + ): + if not d: + continue + p = os.path.join(d, "ops.py") + if os.path.isfile(p): + ops = _pre_ops(p) + if ops: + pre_ops[source] = ops + if pre_ops: + hooks["ops.py"] = pre_ops + ist = discovery.install_steps(recipe, repo_local) + if ist: + hooks["install_steps.sh"] = ist[0] + if lifecycle.has_ccci_overlay(recipe): + hooks["compose.ccci.yml"] = "cc-ci" + + overlays = {} + for op in discovery.LIFECYCLE_OPS: + ov = discovery.resolve_overlay_op(recipe, op, repo_local) + if ov: + overlays[op] = ov[0] + + env_overrides = sorted( + k + for k in os.environ + if k.startswith("CCCI_SKIP_GENERIC") + and str(os.environ.get(k) or "").strip().lower() in ("1", "true", "yes", "on") + ) + + return { + "meta_non_default": { + k: _jsonable(v, name=k) for k, v in sorted(meta_mod.non_default(meta).items()) + }, + "hooks": hooks, + "overlays": overlays, + "custom_tests": _custom_counts(recipe, repo_local), + "env_overrides": env_overrides, + } + + +def render(recipe: str, manifest: dict) -> str: + """The human block printed at run start (same content as the results.json key).""" + lines = [f"===== customization manifest: {recipe} ====="] + nd = manifest["meta_non_default"] + lines.append( + "meta (non-default): " + + (" ".join(f"{k}={v!r}" for k, v in nd.items()) if nd else "(none — zero-config floor)") + ) + hk = manifest["hooks"] + parts = [] + for source, ops in hk.get("ops.py", {}).items(): + parts.append(f"ops.py[{','.join(ops)}]({source})") + if "install_steps.sh" in hk: + parts.append(f"install_steps.sh({hk['install_steps.sh']})") + if "compose.ccci.yml" in hk: + parts.append(f"compose.ccci.yml({hk['compose.ccci.yml']})") + lines.append("hooks: " + (" ".join(parts) if parts else "(none)")) + ov = manifest["overlays"] + lines.append( + "overlays: " + + (" ".join(f"test_{op}.py({src})" for op, src in ov.items()) if ov else "(none)") + ) + ct = manifest["custom_tests"] + lines.append( + "custom tests: " + + ( + " ".join( + " ".join(f"{sub}/={n}" for sub, n in sorted(counts.items())) + f" ({source})" + for source, counts in sorted(ct.items()) + ) + if ct + else "(none)" + ) + ) + eo = manifest["env_overrides"] + if eo: + suffix = " !! dev-only override active in CI" if os.environ.get("DRONE") else "" + lines.append("env overrides: " + " ".join(f"{k}=1" for k in eo) + suffix) + else: + lines.append("env overrides: (none)") + return "\n".join(lines) diff --git a/runner/harness/meta.py b/runner/harness/meta.py new file mode 100644 index 0000000..e1d786a --- /dev/null +++ b/runner/harness/meta.py @@ -0,0 +1,320 @@ +"""Single recipe-meta loader + declarative key registry (recipe-custom restructure P1; spec +docs/recipe-customization.md §8 R1). + +THE one place `tests//recipe_meta.py` is `exec()`d. Every consumer (orchestrator, pytest +`meta` fixture, deploy env shaping, deps, warm-canonical enrollment, screenshot) reads the ONE +loaded `RecipeMeta` object instead of re-exec'ing the file and cherry-picking keys — that drift +(six divergent loaders, spec §4 L1–L6) is what made `SCREENSHOT` an unreachable knob (R2) and let +key typos silently disable coverage (R6). + +Validation (locked decision, recipe-custom-restructure-full-plan.md): +- unknown ALL-CAPS top-level name → MetaError (hard error, fails fast at load; the all-recipes + unit test catches it at PR time). Underscore-prefixed names (`_FOO`) are recipe-private and + exempt; lowercase names (helper functions/imports) are ignored. +- type mismatch → MetaError. Callables are accepted ONLY for hook-typed keys. + +The KEYS registry is the single source of truth for the key set: it drives validation, the +RecipeMeta dataclass fields, and the generated reference table in docs/recipe-customization.md §4 +(scripts/gen-meta-docs.py; a unit test asserts the committed table matches). +""" + +from __future__ import annotations + +import copy +import dataclasses +import difflib +import inspect +import json +import os +from collections.abc import Callable + +ROOT = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +TESTS_DIR = os.path.join(ROOT, "tests") + + +class MetaError(Exception): + """A recipe_meta.py failed registry validation (unknown key / type mismatch / callable on a + data key). Hard error by design: a typo'd key must fail the run at load, not silently reduce + coverage (spec §8 R6 — the worst failure mode for a CI harness).""" + + +@dataclasses.dataclass(frozen=True) +class Key: + """One registered recipe_meta key: name, type tag, default, one-line doc (rendered into the + generated reference table), optional extra validator, and a deprecation marker (deprecated + keys still load+validate but are scheduled for deletion).""" + + name: str + type: str # "int"|"str"|"tuple[int]"|"bool"|"dict_or_hook"|"hook"|"list[str]"|"dict" + default: object + doc: str + validate: Callable[[object], None] | None = None + deprecated: bool = False + # Expected positional-parameter names for a callable value (rcust P3 uniform ctx convention). + # Enforced at load so a legacy-signature hook (e.g. `def READY_PROBE(domain)`) fails with a + # CLEAR MetaError naming the migration — never a silent TypeError mid-run. + hook_params: tuple[str, ...] | None = None + + +KEYS: tuple[Key, ...] = ( + Key( + "HEALTH_PATH", + "str", + "/", + "Path probed for serving/health checks (deploy wait + generic `assert_serving`).", + ), + Key("HEALTH_OK", "tuple[int]", (200, 301, 302), "Acceptable HTTP status codes for health."), + Key("DEPLOY_TIMEOUT", "int", 600, "Max seconds to wait for swarm convergence per deploy."), + Key("HTTP_TIMEOUT", "int", 300, "Max seconds to wait for HTTP health after convergence."), + Key( + "BACKUP_CAPABLE", + "bool", + None, + "Override the backup-tier capability auto-detect (compose `backupbot.backup` labels). `False` forces N/A; `True` forces the tier on; unset = auto-detect.", + ), + Key( + "EXPECTED_NA", + "dict", + None, + "Declare an N/A rung intentional: `{rung: reason}`. The cap stands either way; only the report wording changes.", + ), + Key( + "READY_PROBE", + "hook", + None, + "Callable `(ctx) -> [probe, ...]` returning extra readiness probes, run after install AND after upgrade: HTTP `{host, path, ok}` or TCP `{tcp_host, tcp_port, stable}`.", + hook_params=("ctx",), + ), + Key( + "UPGRADE_BASE_VERSION", + "str", + None, + "Exact published tag overriding the upgrade tier's base (default: `recipe_versions[-2]`).", + ), + Key( + "BACKUP_VERIFY", + "hook", + None, + "Callable `(ctx) -> bool` post-backup data-capture check; `False` re-runs the backup (truncated-dump race guard), retried up to 3 attempts.", + hook_params=("ctx",), + ), + Key( + "UPGRADE_EXTRA_ENV", + "dict_or_hook", + None, + "Extra `.env` keys applied after the PR-head checkout, before the chaos redeploy (env that exists only at head). Dict, or callable `(ctx) -> dict`.", + hook_params=("ctx",), + ), + Key( + "EXTRA_ENV", + "dict_or_hook", + {}, + "Extra `.env` keys applied at EVERY deploy (base install AND upgrade old-app). Dict, or callable `(ctx) -> dict` deriving values from the per-run domain (`ctx.domain`).", + hook_params=("ctx",), + ), + Key( + "DEPS", + "list[str]", + [], + 'Dep recipes deployed/provisioned alongside (e.g. `["keycloak"]`); creds land in `$CCCI_DEPS_FILE`.', + ), + Key( + "WARM_CANONICAL", + "bool", + False, + "Enroll the recipe in the warm/canonical app system (docs/warm.md): green cold runs on LATEST advance the canonical snapshot.", + ), + Key( + "SCREENSHOT", + "hook", + None, + "Callable `(page, ctx)` driving Playwright to a safe, credential-free post-login view for the results-card screenshot (default: landing page).", + hook_params=("page", "ctx"), + ), + # (CHAOS_BASE_DEPLOY, OIDC_AT_INSTALL and SKIP_GENERIC were deleted in restructure P2: + # compose.ccci.yml is first-class + auto-chaos; install-time deps wiring is the only mode; + # the generic floor is suppressible only via the dev-only CCCI_SKIP_GENERIC* env form.) +) + +_REGISTRY: dict[str, Key] = {k.name: k for k in KEYS} + +# The one validated, attribute-access view of a recipe's customization. Generated from KEYS so the +# field set can never drift from the registry (frozen: consumers share one immutable object). +RecipeMeta = dataclasses.make_dataclass( + "RecipeMeta", + [(k.name, object, dataclasses.field(default=None)) for k in KEYS], + frozen=True, +) +RecipeMeta.__doc__ = ( + "Validated per-recipe customization (one field per registered key; attribute access). " + "Built ONLY by meta.load()." +) + + +def meta_path(recipe: str, tests_dir: str | None = None) -> str: + """Canonical path of a recipe's meta file (pure).""" + return os.path.join(tests_dir or TESTS_DIR, recipe, "recipe_meta.py") + + +def check_hook_signature(fn, expected: tuple[str, ...], where: str) -> None: + """Enforce the uniform ctx hook convention (rcust P3): a hook callable's positional parameters + must be exactly `expected` (e.g. ("ctx",) or ("page", "ctx")). A legacy-signature hook (the + pre-restructure `(domain)` / `(domain, meta)` / `(page, domain, meta)` forms) raises a CLEAR + MetaError naming the migration — never a silent TypeError mid-run.""" + try: + params = [ + p.name + for p in inspect.signature(fn).parameters.values() + if p.kind in (p.POSITIONAL_ONLY, p.POSITIONAL_OR_KEYWORD) + ] + except (TypeError, ValueError): # builtins/odd callables — let the call site surface it + return + if tuple(params) != expected: + raise MetaError( + f"{where}: hook signature is ({', '.join(params)}) — the recipe-customization " + f"restructure (P3) changed ALL recipe hook signatures to ({', '.join(expected)}); " + f"read fields off the HookCtx (ctx.domain, ctx.base_url, ctx.meta, ctx.deps, ctx.op). " + f"See docs/recipe-customization.md §5." + ) + + +def _coerce(key: Key, value: object, path: str) -> object: + """Validate `value` against `key`'s declared type; normalize containers (tuple[int]/list[str]). + Raises MetaError on mismatch — including a callable supplied for a data-typed key.""" + t = key.type + if callable(value) and t not in ("hook", "dict_or_hook"): + raise MetaError( + f"{path}: {key.name} is a data key (type {t}) — callables are accepted only for " + f"hook-typed keys" + ) + if t == "int": + if isinstance(value, int) and not isinstance(value, bool): + return value + elif t == "str": + if isinstance(value, str): + return value + elif t == "bool": + if isinstance(value, bool): + return value + elif t == "tuple[int]": + if isinstance(value, tuple | list) and all( + isinstance(x, int) and not isinstance(x, bool) for x in value + ): + return tuple(value) + elif t == "list[str]": + if isinstance(value, tuple | list) and all(isinstance(x, str) for x in value): + return list(value) + elif t == "dict": + if isinstance(value, dict): + return value + elif ( + t == "hook" + and callable(value) + or t == "dict_or_hook" + and (isinstance(value, dict) or callable(value)) + ): + return value + raise MetaError(f"{path}: {key.name} must be {t}, got {type(value).__name__} ({value!r})") + + +def load(recipe: str, tests_dir: str | None = None): + """Load + validate a recipe's customization -> RecipeMeta. THE only exec() of recipe_meta.py. + + Missing file -> all registry defaults (the zero-config baseline, spec §2). Unknown + non-underscore ALL-CAPS top-level name or type mismatch -> MetaError (hard error). + `tests_dir` overrides the recipe-meta root (unit tests / fixtures).""" + path = meta_path(recipe, tests_dir) + values = {k.name: copy.copy(k.default) for k in KEYS} + if os.path.exists(path): + ns: dict = {} + with open(path) as fh: + exec(compile(fh.read(), path, "exec"), ns) # noqa: S102 (trusted, in-repo) + for name in sorted(ns): + if name.startswith("_") or not name.isupper(): + continue # _FOO = recipe-private (exempt); lowercase = helpers/imports (ignored) + key = _REGISTRY.get(name) + if key is None: + near = difflib.get_close_matches(name, _REGISTRY, n=1) + hint = f" — did you mean {near[0]!r}?" if near else "" + raise MetaError( + f"{path}: unknown recipe_meta key {name!r}{hint}. Registered keys: " + f"{', '.join(sorted(_REGISTRY))}. Recipe-private constants must be " + f"underscore-prefixed (e.g. _{name})." + ) + values[name] = _coerce(key, ns[name], path) + if key.hook_params and callable(values[name]): + check_hook_signature(values[name], key.hook_params, f"{path}: {name}") + if key.validate: + key.validate(values[name]) + return RecipeMeta(**values) + + +def as_dict(meta) -> dict: + """RecipeMeta -> {key: value} (every registered key, defaults included).""" + return dataclasses.asdict(meta) + + +def non_default(meta) -> dict: + """The keys a recipe explicitly customized: {key: value} where value differs from the registry + default. Hooks compare by identity-vs-None (a set hook is always non-default). Feeds the run's + customization manifest (P5).""" + out = {} + for k in KEYS: + v = getattr(meta, k.name) + if v != k.default: + out[k.name] = v + return out + + +@dataclasses.dataclass(frozen=True) +class HookCtx: + """The single argument every recipe hook receives (rcust P3 uniform ctx convention): + `EXTRA_ENV(ctx)`, `UPGRADE_EXTRA_ENV(ctx)`, `READY_PROBE(ctx)`, `BACKUP_VERIFY(ctx)`, + `SCREENSHOT(page, ctx)`, ops.py `pre_(ctx)`.""" + + domain: str # the app's per-run domain + base_url: str # https:// + meta: object # the recipe's full RecipeMeta + deps: dict | None # provisioned dep creds ({dep_recipe: entry}) or None if absent/empty + op: str | None # current lifecycle op (install|upgrade|backup|restore) or None + + +def _run_deps() -> dict | None: + """The current run's provisioned dep creds from $CCCI_DEPS_FILE (either shape), or None. + Read directly (not via harness.deps) to keep meta.py import-cycle-free.""" + path = os.environ.get("CCCI_DEPS_FILE") + if not path or not os.path.exists(path): + return None + try: + with open(path) as f: + data = json.load(f) + except (OSError, ValueError): + return None + if isinstance(data, dict): + return data or None + if isinstance(data, list): + out = {e["recipe"]: e for e in data if isinstance(e, dict) and e.get("recipe")} + return out or None + return None + + +def hook_ctx(domain: str, meta, *, op: str | None = None) -> HookCtx: + """Build the HookCtx for a hook call site. Dep creds are picked up from the run's + $CCCI_DEPS_FILE when present (None otherwise).""" + return HookCtx(domain=domain, base_url=f"https://{domain}", meta=meta, deps=_run_deps(), op=op) + + +def _env_map(value, ctx: HookCtx) -> dict[str, str]: + if callable(value): + value = value(ctx) + return {str(k): str(v) for k, v in (value or {}).items()} + + +def extra_env(meta, ctx: HookCtx) -> dict[str, str]: + """Resolve EXTRA_ENV (dict or callable(ctx)->dict) to the concrete per-run env map.""" + return _env_map(meta.EXTRA_ENV, ctx) + + +def upgrade_extra_env(meta, ctx: HookCtx) -> dict[str, str]: + """Resolve UPGRADE_EXTRA_ENV (dict or callable(ctx)->dict) to the concrete env map.""" + return _env_map(meta.UPGRADE_EXTRA_ENV, ctx) diff --git a/runner/harness/results.py b/runner/harness/results.py index 910e827..72a40cf 100644 --- a/runner/harness/results.py +++ b/runner/harness/results.py @@ -203,6 +203,7 @@ def build_results( screenshot: str | None = None, summary_card: str | None = None, expected_na: dict | None = None, + customization: dict | None = None, ) -> dict: """Assemble the full results.json dict (no I/O). `finished_ts` is passed in (the orchestrator stamps it) so this stays pure and deterministic for unit tests. `expected_na` is the recipe's @@ -236,6 +237,9 @@ def build_results( }, "screenshot": screenshot, "summary_card": summary_card, + # rcust P5: the run's resolved customization manifest (pure presentation — consumers must + # never derive a verdict from it). + "customization": customization, } diff --git a/runner/harness/screenshot.py b/runner/harness/screenshot.py index de69766..66c178a 100644 --- a/runner/harness/screenshot.py +++ b/runner/harness/screenshot.py @@ -8,7 +8,7 @@ Secret-safety (R7, the cardinal screenshot guardrail): the screenshot step must that displays generated credentials (an install wizard showing the initial admin password, a secrets page, etc.). The DEFAULT capture is the app's **landing page** (a login form shows fields, not the password) — safe for every recipe. A recipe that needs a post-login view opts in via a recipe-meta -`SCREENSHOT` hook: a callable `screenshot(page, domain, meta) -> None` that drives Playwright to a +`SCREENSHOT` hook: a callable `SCREENSHOT(page, ctx) -> None` that drives Playwright to a safe, credential-free view and is responsible for not landing on a secrets page. The harness never auto-fills a wizard. @@ -21,6 +21,7 @@ from __future__ import annotations import os from . import browser as harness_browser +from . import meta as meta_mod # Default viewport for the captured screenshot — a desktop-ish frame that crops well into the card. VIEWPORT = {"width": 1280, "height": 800} @@ -33,12 +34,19 @@ def screenshot_path(run_artifact_dir: str) -> str: return os.path.join(run_artifact_dir, "screenshot.png") -def _load_screenshot_hook(recipe_meta: dict | None): +def _load_screenshot_hook(recipe_meta): """Return the recipe's optional SCREENSHOT hook (a callable) if it declared one, else None. - The hook drives Playwright to a safe post-login view; default is the landing page.""" - if not recipe_meta: + The hook drives Playwright to a safe post-login view; default is the landing page. + + `recipe_meta` is the loaded RecipeMeta (rcust P1 — the single loader actually delivers + SCREENSHOT now; under the old L1 allowlist the key never arrived, spec §8 R2). A plain dict + is still accepted for direct/manual callers.""" + if recipe_meta is None: return None - hook = recipe_meta.get("SCREENSHOT") + if isinstance(recipe_meta, dict): + hook = recipe_meta.get("SCREENSHOT") + else: + hook = getattr(recipe_meta, "SCREENSHOT", None) return hook if callable(hook) else None @@ -67,8 +75,9 @@ def capture(domain: str, out_path: str, *, recipe_meta: dict | None = None) -> s if hook is not None: # Recipe-specific safe view (post-login etc.). The hook owns navigation + # the no-secret-page guarantee; it should call page.screenshot itself, but if - # it doesn't, we still snap the resulting page below. - hook(page, domain, recipe_meta) + # it doesn't, we still snap the resulting page below. SCREENSHOT(page, ctx) — + # the uniform ctx convention (rcust P3). + hook(page, meta_mod.hook_ctx(domain, recipe_meta)) if not os.path.exists(out_path): page.screenshot(path=out_path, full_page=False) else: diff --git a/runner/run_recipe_ci.py b/runner/run_recipe_ci.py index 77891cf..980b4f8 100644 --- a/runner/run_recipe_ci.py +++ b/runner/run_recipe_ci.py @@ -58,6 +58,12 @@ from harness import ( # noqa: E402 from harness import ( # noqa: E402 deps as deps_mod, ) +from harness import ( # noqa: E402 + manifest as manifest_mod, +) +from harness import ( # noqa: E402 + meta as meta_mod, +) from harness import ( # noqa: E402 results as results_mod, ) @@ -70,7 +76,7 @@ ALL_STAGES = ("install", "upgrade", "backup", "restore", "custom") def sso_dep_unverified(declared, deps_ready: bool, requires_deps_skipped: int) -> bool: """F2-11 gate predicate (pure, unit-tested). True when a recipe declares DEPS but its - setup_custom_tests failed (deps not ready) AND that caused ≥1 `requires_deps` (SSO/OIDC) test + dep provisioning failed (deps not ready) AND that caused ≥1 `requires_deps` (SSO/OIDC) test to SKIP. In that case the recipe's characteristic SSO claim was NOT verified, so the run must NOT report GREEN — even though a skip-only pytest file exits 0 and leaves every tier 'pass'. Generic-tier failure-isolation is preserved (those results stand); only the green SIGNAL is @@ -247,52 +253,29 @@ def snapshot_recipe_tests(recipe: str) -> str | None: return dst -def _load_meta(recipe: str) -> dict: - """Mirror tests/conftest._recipe_meta so the orchestrator's deploy/wait uses the same per-recipe - config the tiers see (timeouts, health path/codes).""" - meta = { - "HEALTH_PATH": "/", - "HEALTH_OK": (200, 301, 302), - "DEPLOY_TIMEOUT": 600, - "HTTP_TIMEOUT": 300, - } - path = os.path.join(ROOT, "tests", recipe, "recipe_meta.py") - if os.path.exists(path): - ns: dict = {} - with open(path) as fh: - exec(compile(fh.read(), path, "exec"), ns) # noqa: S102 (trusted, in-repo) - for k in list(meta) + [ - "BACKUP_CAPABLE", - "SKIP_GENERIC", - "EXPECTED_NA", - "OIDC_AT_INSTALL", - "READY_PROBE", - "UPGRADE_BASE_VERSION", - "BACKUP_VERIFY", - "UPGRADE_EXTRA_ENV", - ]: - if k in ns: - meta[k] = ns[k] - return meta - - def _tier_env(domain: str) -> dict: return dict(os.environ, CCCI_APP_DOMAIN=domain, CCCI_BASE_URL=f"https://{domain}") -def _skip_generic(op: str, meta: dict) -> bool: +def skip_generic_env_overrides() -> list[str]: + """Active CCCI_SKIP_GENERIC* env overrides (rcust P2c: the meta key is deleted; the env form + is a documented LOCAL-DEV-ONLY escape hatch). Surfaced loudly when set in a CI (drone) run — + it reduces generic-floor coverage and must never silently ride a CI verdict.""" + return sorted( + k for k in os.environ if k.startswith("CCCI_SKIP_GENERIC") and _truthy(os.environ.get(k)) + ) + + +def _skip_generic(op: str) -> bool: """Whether the generic assertion for `op` is opted out (Phase 1e HC3). Default: run (additive). - Opt-out, any of: env CCCI_SKIP_GENERIC (all ops), env CCCI_SKIP_GENERIC_, or the recipe's - declarative recipe_meta.SKIP_GENERIC list (op name, or "all"/"*").""" + Opt-out via env only (dev-only escape hatch, P2c): CCCI_SKIP_GENERIC (all ops) or + CCCI_SKIP_GENERIC_. The recipe_meta SKIP_GENERIC key is deleted (zero users).""" if _truthy(os.environ.get("CCCI_SKIP_GENERIC")): return True - if _truthy(os.environ.get(f"CCCI_SKIP_GENERIC_{op.upper()}")): - return True - sg = [str(s).lower() for s in (meta.get("SKIP_GENERIC") or [])] - return "all" in sg or "*" in sg or op in sg + return _truthy(os.environ.get(f"CCCI_SKIP_GENERIC_{op.upper()}")) -def _run_pre_hook(recipe: str, op: str, repo_local: str | None, domain: str, meta: dict) -> None: +def _run_pre_hook(recipe: str, op: str, repo_local: str | None, domain: str, meta) -> None: """Run the optional pre-op seed hook (recipe ops.py `pre_`) BEFORE the harness performs the op (HC3 op/assertion split): overlays seed data-continuity markers / the backup→restore mutation here, then assert post-op in test_.py. cc-ci's ops.py is trusted; a repo-local ops.py is @@ -309,7 +292,11 @@ def _run_pre_hook(recipe: str, op: str, repo_local: str | None, domain: str, met mod = importlib.util.module_from_spec(spec) spec.loader.exec_module(mod) print(f" pre-op seed ({source}): {os.path.relpath(path, ROOT)}::pre_{op}", flush=True) - getattr(mod, f"pre_{op}")(domain, meta) + fn = getattr(mod, f"pre_{op}") + # Uniform ctx convention (rcust P3): pre_(ctx). A legacy (domain, meta) hook fails + # HERE with a clear migration message, not a TypeError mid-call. + meta_mod.check_hook_signature(fn, ("ctx",), f"{os.path.relpath(path, ROOT)}::pre_{op}") + fn(meta_mod.hook_ctx(domain, meta, op=op)) finally: if d in sys.path: sys.path.remove(d) @@ -322,7 +309,7 @@ def _perform_op( head_ref: str | None, op_state: dict, deploy_timeout: int = 900, - meta: dict | None = None, + meta=None, ) -> None: """Perform the single mutating op ONCE (the harness owns the op, HC3). install has no op. Records what the assertions need (pre-upgrade identity, backup snapshot_id) into op_state. None of these @@ -345,9 +332,10 @@ def _perform_op( # verify fails we re-run the WHOLE backup (fresh restic snapshot) with a re-stabilised DB, up to # 3 attempts. Recipes without BACKUP_VERIFY are unaffected (single backup, as before). snap = generic.perform_backup(domain) - verify = meta.get("BACKUP_VERIFY") if meta else None + verify = meta.BACKUP_VERIFY if meta else None + verify_ctx = meta_mod.hook_ctx(domain, meta, op="backup") if meta else None attempt = 1 - while callable(verify) and not verify(domain) and attempt < 3: + while callable(verify) and not verify(verify_ctx) and attempt < 3: attempt += 1 print( f" backup-verify FAILED (attempt {attempt - 1}/3) — backup did not capture the " @@ -355,7 +343,7 @@ def _perform_op( flush=True, ) snap = generic.perform_backup(domain) - if callable(verify) and not verify(domain): + if callable(verify) and not verify(verify_ctx): print( f" !! backup-verify still FAILED after {attempt} attempts — backup is incomplete", flush=True, @@ -371,7 +359,7 @@ def run_lifecycle_tier( op: str, repo_local: str | None, domain: str, - meta: dict, + meta, head_ref: str | None, op_state: dict, records: list[dict] | None = None, @@ -386,7 +374,7 @@ def run_lifecycle_tier( a {tier,source,file,rc,junit} record appended, so the run can assemble per-stage/per-test results.json + the level afterwards. Purely additive — does not change the verdict.""" overlay = discovery.resolve_overlay_op(recipe, op, repo_local) - skip_gen = _skip_generic(op, meta) + skip_gen = _skip_generic(op) files: list[tuple[str, str]] = [] if not skip_gen: files.append(discovery.generic_op(op)) @@ -411,7 +399,7 @@ def run_lifecycle_tier( recipe, head_ref, op_state, - deploy_timeout=int(meta.get("DEPLOY_TIMEOUT", 900)), + deploy_timeout=int(meta.DEPLOY_TIMEOUT), meta=meta, ) with open(os.environ["CCCI_OP_STATE_FILE"], "w") as f: @@ -449,7 +437,7 @@ def run_lifecycle_tier( def _enrich_deps_with_sso(parent_recipe: str, parent_domain: str, deps_list) -> dict[str, dict]: """For each dep, set up a fresh realm/client + test user via the harness's provider-specific setup function, then return a recipe→entry dict carrying domain + admin + realm/client/user - info — the shape the `setup_custom_tests.sh` hook (and dependent tests) read. + info — the shape the `install_steps.sh` hook (and dependent tests) read. Provider routing: today only `keycloak` is supported. authentik will need a parallel `setup_authentik_realm` when an authentik-dep recipe enrolls (DEFERRED.md #9). @@ -463,7 +451,7 @@ def _enrich_deps_with_sso(parent_recipe: str, parent_domain: str, deps_list) -> if not dep_recipe or not dep_domain: continue if dep_recipe != "keycloak": - # Provider not yet supported — record bare entry; setup_custom_tests.sh / tests will + # Provider not yet supported — record bare entry; install_steps.sh / tests will # raise if they need realm/client info they don't see. out[dep_recipe] = entry continue @@ -507,12 +495,10 @@ def _provision_deps( Splits deps into live-warm (shared provider at a stable domain + a per-run realm) vs cold (co-deployed per run), provisions each dep's SSO realm/client/user, and persists the enriched - dict the `setup_custom_tests.sh`/`install_steps.sh` hooks + dependent tests read. Raises on any - failure (the caller marks deps-not-ready). Used by BOTH wiring paths: - - post-deploy (legacy): provision AFTER generic tiers, then `setup_custom_tests.sh` does an - in-place OIDC redeploy. - - install-time (`OIDC_AT_INSTALL`, Q3.2a): provision BEFORE the single deploy so the - install-tier `install_steps.sh` hook wires OIDC env into that one deploy — no reconverge. + dict the `install_steps.sh` hooks + dependent tests read. Raises on any failure (the caller + marks deps-not-ready). Install-time wiring is the ONLY mode (rcust P2b): provision BEFORE the + single deploy so the install-tier `install_steps.sh` hook wires OIDC env into that one deploy — + no reconverge, no post-deploy `setup_custom_tests.sh` machinery. """ warm_deps, cold_deps = [], [] for d in declared: @@ -523,7 +509,7 @@ def _provision_deps( if wd: print(f" dep: {d} warm provider {wd} not up — cold fallback", flush=True) cold_deps.append(d) - dep_metas = {d: _load_meta(d) for d in cold_deps} + dep_metas = {d: meta_mod.load(d) for d in cold_deps} deps_list = ( deps_mod.deploy_deps(recipe, os.environ.get("PR", "0"), ref, cold_deps, meta_for=dep_metas) if cold_deps @@ -541,32 +527,6 @@ def _provision_deps( return deps_state -def _run_setup_custom_tests_hook(recipe: str, domain: str, deps_file: str) -> None: - """Run `tests//setup_custom_tests.sh` if present (operator-2026-05-28 SSO-dep plan - §3.2). The hook reads `$CCCI_DEPS_FILE`, sets OIDC env via `abra app config set` + secret - insert, and triggers an in-place `abra app deploy --force --chaos`. Failure here propagates - to mark deps-not-ready (caught in main()).""" - path = os.path.join(ROOT, "tests", recipe, "setup_custom_tests.sh") - if not os.path.isfile(path): - # No hook = recipe doesn't need post-deps wiring; deps are deployed + creds available - # via deps_apps fixture as-is. - print( - f" setup_custom_tests: no hook at {os.path.relpath(path, ROOT)} (deps creds ready in $CCCI_DEPS_FILE)", - flush=True, - ) - return - print(f" setup_custom_tests hook: {os.path.relpath(path, ROOT)}", flush=True) - rc = subprocess.run( - ["bash", path], - check=False, - env=dict(os.environ, CCCI_APP_DOMAIN=domain, CCCI_RECIPE=recipe, CCCI_DEPS_FILE=deps_file), - ) - if rc.returncode != 0: - raise RuntimeError( - f"setup_custom_tests.sh exited {rc.returncode} (deps env not wired into parent)" - ) - - def run_custom( recipe: str, repo_local: str | None, @@ -609,7 +569,7 @@ def _wait_undeployed(domain: str, timeout: int = 120) -> None: def run_quick( - recipe: str, ref: str | None, head_ref: str | None, repo_local: str | None, meta: dict + recipe: str, ref: str | None, head_ref: str | None, repo_local: str | None, meta ) -> int: """WC4 `--quick` opt-in fast lane (plan §2). Reattach the data-warm canonical (known-good volume) → upgrade IN PLACE to the PR head (chaos) → assert generic UPGRADE (reconverge+moved+serving) + @@ -645,7 +605,7 @@ def run_quick( op_state: dict = {} results: dict[str, str] = {} - declared = deps_mod.declared_deps(recipe) + declared = list(meta.DEPS) deps_state: dict = {} deps_ready = True deps_not_ready_reason = "" @@ -657,28 +617,32 @@ def run_quick( try: # 1) reattach the canonical (warm boot at the known-good version + retained volume) try: - canonical.deploy_canonical(recipe, timeout=int(meta.get("DEPLOY_TIMEOUT", 900))) + canonical.deploy_canonical(recipe, timeout=int(meta.DEPLOY_TIMEOUT)) lifecycle.wait_healthy( domain, - ok_codes=tuple(meta["HEALTH_OK"]), - path=meta["HEALTH_PATH"], - deploy_timeout=meta["DEPLOY_TIMEOUT"], - http_timeout=meta["HTTP_TIMEOUT"], + ok_codes=tuple(meta.HEALTH_OK), + path=meta.HEALTH_PATH, + deploy_timeout=meta.DEPLOY_TIMEOUT, + http_timeout=meta.HTTP_TIMEOUT, ) warm_ok = True except Exception as e: # noqa: BLE001 print(f"!! canonical reattach/readiness failed: {_scrub(str(e))}", flush=True) if warm_ok: - # 2) deps (warm keycloak + per-run realm) — mirrors main()'s warm/cold split + # 2) deps (warm keycloak + per-run realm) — mirrors main()'s warm/cold split. NB + # (rcust P2b): deps are provisioned (realm/creds in $CCCI_DEPS_FILE) but quick mode + # cannot do install-time OIDC env wiring — the canonical app pre-exists its per-run + # realm. No quick-enrolled recipe declares DEPS today; if one ever does, its + # requires_deps tests will exercise creds-only flows or skip (F2-11 keeps the signal). if declared: - print(f"\n===== setup_custom_tests (quick): deps {declared} =====", flush=True) + print(f"\n===== deps (quick): {declared} =====", flush=True) try: warm_deps, cold_deps = [], [] for d in declared: wd = warm.warm_domain(d) (warm_deps if (wd and warm.is_warm_up(d, wd)) else cold_deps).append(d) - dep_metas = {d: _load_meta(d) for d in cold_deps} + dep_metas = {d: meta_mod.load(d) for d in cold_deps} deps_list = ( deps_mod.deploy_deps( recipe, os.environ.get("PR", "0"), ref, cold_deps, meta_for=dep_metas @@ -693,12 +657,11 @@ def run_quick( print(f" dep: using live-warm {d} @ {wd} (per-run realm)", flush=True) deps_state = _enrich_deps_with_sso(recipe, domain, deps_list) deps_mod.write_run_state(deps_state) - _run_setup_custom_tests_hook(recipe, domain, depsfile) except Exception as e: # noqa: BLE001 deps_ready = False deps_not_ready_reason = _scrub(str(e))[:300] print( - f"!! setup_custom_tests failed (deps-not-ready): {deps_not_ready_reason}", + f"!! dep provisioning failed (deps-not-ready): {deps_not_ready_reason}", flush=True, ) @@ -813,7 +776,7 @@ def run_quick( overall = 1 if sso_unverified: print( - f"!! DEPS={declared} but setup_custom_tests failed and {requires_deps_skipped} " + f"!! DEPS={declared} but dep provisioning failed and {requires_deps_skipped} " "requires_deps SKIPPED — SSO NOT verified (F2-11)", file=sys.stderr, ) @@ -848,7 +811,7 @@ def promote_canonical(recipe: str, head_ref: str | None) -> None: if not latest: print(f"WC5 promote: no version tags for {recipe} — skip", flush=True) return - meta = _load_meta(recipe) + meta = meta_mod.load(recipe) # The cold run's deploy-count was already asserted + the countfile removed; don't perturb it. os.environ.pop("CCCI_DEPLOY_COUNT_FILE", None) print( @@ -860,14 +823,15 @@ def promote_canonical(recipe: str, head_ref: str | None) -> None: domain, version=latest, secrets=True, - deploy_timeout=int(meta.get("DEPLOY_TIMEOUT", 900)), + deploy_timeout=int(meta.DEPLOY_TIMEOUT), + meta=meta, ) lifecycle.wait_healthy( domain, - ok_codes=tuple(meta["HEALTH_OK"]), - path=meta["HEALTH_PATH"], - deploy_timeout=meta["DEPLOY_TIMEOUT"], - http_timeout=meta["HTTP_TIMEOUT"], + ok_codes=tuple(meta.HEALTH_OK), + path=meta.HEALTH_PATH, + deploy_timeout=meta.DEPLOY_TIMEOUT, + http_timeout=meta.HTTP_TIMEOUT, ) abra.undeploy(domain) _wait_undeployed(domain) @@ -896,6 +860,17 @@ def main() -> int: print( f"== cc-ci run: recipe={recipe} ref={ref} pr={os.environ.get('PR', '0')} stages={sorted(stages)}" ) + # P2c: the CCCI_SKIP_GENERIC* env escape hatch is LOCAL-DEV-ONLY. If it rides a CI (drone) + # run, shout — generic-floor coverage is reduced and the verdict must not look routine. + for ov in skip_generic_env_overrides(): + if os.environ.get("DRONE"): + print( + f"!! {ov}=1 — dev-only generic-floor override ACTIVE IN A CI RUN; generic " + "assertions are suppressed for the affected op(s). This must never gate a merge.", + flush=True, + ) + else: + print(f"== {ov}=1 (dev-only generic-floor override active)", flush=True) # Concurrent-run safety is structural: this run's recipe trees live in its own ABRA_DIR # (exported here, before ANY abra call), so no recipe-tree lock exists; same-DOMAIN runs # serialise on the app-domain flock taken in deploy_app (see docs/concurrency.md). @@ -906,7 +881,13 @@ def main() -> int: # HEAD (the catalogue current) for a non-PR `!testme`. Captured before any version-tag checkout. head_ref = ref or lifecycle.recipe_head_commit(recipe) repo_local = snapshot_recipe_tests(recipe) - meta = _load_meta(recipe) + meta = meta_mod.load(recipe) + + # Customization manifest (rcust P5, R4): ONE block answering "what does this recipe + # customize?" across all surfaces — printed here and embedded verbatim in results.json under + # "customization". Pure presentation; never influences a verdict. + customization = manifest_mod.build(recipe, meta, repo_local) + print("\n" + manifest_mod.render(recipe, customization) + "\n", flush=True) # WC4/WC7: opt-in `--quick` fast lane. Requires an existing data-warm canonical; if none, fall # back cleanly to the full COLD run below so the PR is still tested (DECISIONS Phase-2w). @@ -929,9 +910,7 @@ def main() -> int: # override must be an exact published version tag (deployed as a pinned base). (Adversary §7.1.) want_upgrade = "upgrade" in stages prev = ( - (meta.get("UPGRADE_BASE_VERSION") or lifecycle.previous_version(recipe)) - if want_upgrade - else None + (meta.UPGRADE_BASE_VERSION or lifecycle.previous_version(recipe)) if want_upgrade else None ) base = prev or target backup_cap = generic.backup_capable(recipe, meta) @@ -960,10 +939,8 @@ def main() -> int: os.environ["CCCI_OP_STATE_FILE"] = statefile op_state: dict = {} - # Run-scoped dep state (Phase 2 Q2.3, refined per operator-2026-05-28 SSO-dep plan §1): - # deps now deploy AFTER generic tiers (between RESTORE and CUSTOM) so a failed dep deploy - # cannot break the generic-tier signal. The `setup_custom_tests` step deploys each dep + runs - # `tests//setup_custom_tests.sh` to wire OIDC env via in-place redeploy. + # Run-scoped dep state (Phase 2 Q2.3; install-time-only since rcust P2b): deps are provisioned + # BEFORE the single deploy so install_steps.sh wires OIDC env into that one deploy. # `$CCCI_DEPS_FILE` is written with the full creds dict the hook script needs (jq-readable). depsfile = _run_state_path("deps") + ".json" with open(depsfile, "w") as f: @@ -974,15 +951,9 @@ def main() -> int: with contextlib.suppress(OSError): os.remove(skipfile) os.environ["CCCI_DEPS_SKIP_REPORT"] = skipfile - declared = deps_mod.declared_deps(recipe) - # Q3.2a: a recipe that tolerates OIDC env at first boot AND whose deps are live-warm wires OIDC - # at INSTALL time (provision the realm BEFORE the single deploy; install_steps.sh writes the env - # into it) instead of the post-deploy in-place `--chaos` redeploy — which is flaky on the heavy - # 12-service lasuite-drive stack (collabora WOPI race; see JOURNAL Step 0). Opt-in per recipe. - oidc_at_install = bool(meta.get("OIDC_AT_INSTALL")) and bool(declared) + declared = list(meta.DEPS) if declared: - when = "BEFORE deploy (install-time OIDC)" if oidc_at_install else "AFTER generic tiers" - print(f"\n===== DEPS declared (provision {when}): {declared} =====", flush=True) + print(f"\n===== DEPS declared (provision BEFORE deploy): {declared} =====", flush=True) deps_state: dict[str, dict] = {} # new shape: recipe→entry dict (sso-dep plan §1) deps_ready = True deps_not_ready_reason: str = "" @@ -996,7 +967,7 @@ def main() -> int: # install_steps.sh can read $CCCI_DEPS_FILE and wire the OIDC env into that one deploy. On # failure we mark deps-not-ready but STILL deploy the recipe alone (install_steps.sh no-ops # on an empty deps file) so the generic tiers run; the OIDC custom test then skips → F2-11. ---- - if oidc_at_install: + if declared: print( f"\n===== install-time OIDC: provisioning deps {declared} BEFORE deploy =====", flush=True, @@ -1023,18 +994,21 @@ def main() -> int: version=base, secrets=True, install_steps_hook=hook, - deploy_timeout=int(meta.get("DEPLOY_TIMEOUT", 900)), + deploy_timeout=int(meta.DEPLOY_TIMEOUT), + meta=meta, ) lifecycle.wait_healthy( domain, - ok_codes=tuple(meta["HEALTH_OK"]), - path=meta["HEALTH_PATH"], - deploy_timeout=meta["DEPLOY_TIMEOUT"], - http_timeout=meta["HTTP_TIMEOUT"], + ok_codes=tuple(meta.HEALTH_OK), + path=meta.HEALTH_PATH, + deploy_timeout=meta.DEPLOY_TIMEOUT, + http_timeout=meta.HTTP_TIMEOUT, ) # Recipe READY_PROBE (e.g. lasuite-drive collabora WOPI discovery) — readiness beyond # replica convergence + app HEALTH_PATH; no-op for recipes without one. - lifecycle.wait_ready_probes(meta, domain, timeout=int(meta.get("DEPLOY_TIMEOUT", 900))) + lifecycle.wait_ready_probes( + meta, domain, timeout=int(meta.DEPLOY_TIMEOUT), op="install" + ) deploy_ok = True except Exception as e: # noqa: BLE001 — a failed deploy is a reported INSTALL failure print(f"!! deploy/readiness failed: {e}", flush=True) @@ -1131,41 +1105,11 @@ def main() -> int: if backup_cap else "skip" ) - # ---- setup_custom_tests step (NEW, operator-2026-05-28 SSO-dep plan §3.2) ---- - # Deploy each declared dep + wire OIDC env into the parent app via the per-recipe - # setup_custom_tests.sh hook + in-place redeploy. Failure here marks deps-not-ready - # but does NOT abort the run — @pytest.mark.requires_deps tests skip with reason; - # non-deps custom tests still run normally. - if declared and not oidc_at_install: - # LEGACY post-deploy path: provision deps AFTER generic tiers, then wire OIDC env - # into the parent via the setup_custom_tests.sh hook + an in-place `--chaos` redeploy. - print("\n===== setup_custom_tests: deps + OIDC wiring =====", flush=True) - try: - deps_state = _provision_deps(recipe, domain, ref, declared) - # Run the per-recipe post-deps hook (jq-driven OIDC wiring + in-place redeploy) - _run_setup_custom_tests_hook(recipe, domain, depsfile) - except Exception as e: # noqa: BLE001 — setup failure is ISOLATED to dep-marked tests - deps_ready = False - deps_not_ready_reason = _scrub(str(e))[:300] - print( - f"!! setup_custom_tests failed (deps-not-ready): {deps_not_ready_reason}", - flush=True, - ) - elif declared and oidc_at_install and deps_ready: - # INSTALL-TIME path (Q3.2a): deps were provisioned BEFORE the single deploy and the - # install-tier install_steps.sh hook already wired OIDC env into that one deploy — - # so NO re-provision, NO reconverge here. Run only the post-deploy setup hook - # (e.g. lasuite-drive's minio-createbuckets one-shot), which needs the live stack. - print("\n===== post-deploy setup (OIDC already wired at install) =====", flush=True) - try: - _run_setup_custom_tests_hook(recipe, domain, depsfile) - except Exception as e: # noqa: BLE001 — isolated to dep-marked / state-dependent tests - deps_ready = False - deps_not_ready_reason = _scrub(str(e))[:300] - print( - f"!! post-deploy setup failed: {deps_not_ready_reason}", - flush=True, - ) + # (rcust P2b: install-time deps wiring is the ONLY mode — deps were provisioned BEFORE + # the single deploy and install_steps.sh wired the OIDC env into it. The legacy + # post-deploy provisioning + setup_custom_tests.sh redeploy machinery is deleted; a + # recipe's post-deploy seeding belongs in ops.py pre_install, e.g. lasuite-drive's + # MinIO bucket one-shot.) # ---- CUSTOM tier ---- if "custom" in stages: @@ -1240,8 +1184,7 @@ def main() -> int: # ---- per-op summary (DG6 feed) ---- # SSO-dep plan §1: DG4.1 generalised — one `abra app new` per app in the run (recipe + each - # COLD dep). In-place reconfigure-and-redeploy (the setup_custom_tests step's - # `abra app deploy --force --chaos`) is NOT a fresh `app_new` and does NOT increment the count. + # COLD dep). Chaos redeploys are NOT a fresh `app_new` and do NOT increment the count. # WC1: a live-warm dep (keycloak) is NOT deployed by the run — it only gets a per-run realm — so # warm deps contribute 0. So expected = 1 + (number of COLD deps that actually got deployed). _dep_entries = deps_state.values() if isinstance(deps_state, dict) else (deps_state or []) @@ -1282,12 +1225,12 @@ def main() -> int: overall = 1 if any(v == "fail" for v in results.values()): overall = 1 - # F2-11: a deps-declaring recipe whose setup_custom_tests failed has NOT verified its SSO/OIDC + # F2-11: a deps-declaring recipe whose dep provisioning failed has NOT verified its SSO/OIDC # claim — its requires_deps tests SKIPPED (a skip-only file exits 0, so without this the run # would report GREEN). Fail the run for that recipe; generic-tier results above are untouched. if sso_dep_unverified(declared, deps_ready, requires_deps_skipped): print( - f"!! recipe declares DEPS={declared} but setup_custom_tests failed and " + f"!! recipe declares DEPS={declared} but dep provisioning failed and " f"{requires_deps_skipped} requires_deps (SSO) test(s) were SKIPPED — SSO claim NOT " f"verified; failing run (F2-11). deps-not-ready: {deps_not_ready_reason}", file=sys.stderr, @@ -1314,7 +1257,8 @@ def main() -> int: no_secret_leak=True, # narrowed below by an actual scan of the serialised artifact screenshot=screenshot_rel, # Phase 3 U1 (R4): relative PNG name iff capture succeeded finished_ts=time.time(), - expected_na=meta.get("EXPECTED_NA"), # declared intentional-skip map (recipe_meta) + expected_na=meta.EXPECTED_NA, # declared intentional-skip map (recipe_meta) + customization=customization, # rcust P5: the run-start manifest, verbatim ) # Real (if narrow) leak check: no known infra-secret value may appear in the artifact (R7). blob = json.dumps(data) diff --git a/scripts/gen-meta-docs.py b/scripts/gen-meta-docs.py new file mode 100644 index 0000000..f9135f9 --- /dev/null +++ b/scripts/gen-meta-docs.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python3 +"""Render the harness.meta KEYS registry to the markdown key-reference table in +docs/recipe-customization.md §4 (rcust P1.5; kills the R5 doc-drift class). + +Usage: + python3 scripts/gen-meta-docs.py # rewrite the table in-place between the markers + python3 scripts/gen-meta-docs.py --print # print the rendered table to stdout (used by the + # doc-sync unit test, tests/unit/test_meta.py) + +The table lives between `` / `` markers; a unit +test asserts the committed table equals this rendering, so editing it by hand fails CI. +""" + +from __future__ import annotations + +import os +import sys + +ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +sys.path.insert(0, os.path.join(ROOT, "runner")) +from harness.meta import KEYS # noqa: E402 + +DOC = os.path.join(ROOT, "docs", "recipe-customization.md") +START = "" +END = "" + + +def _default_repr(v) -> str: + if v is None: + return "`None`" + return f"`{v!r}`" + + +def render() -> str: + lines = [ + START, + "", + "_This table is GENERATED from the `runner/harness/meta.py` KEYS registry by" + " `scripts/gen-meta-docs.py` — do not edit by hand (a unit test pins the sync)._", + "", + "| Key | Type | Default | Meaning |", + "|---|---|---|---|", + ] + for k in KEYS: + doc = k.doc.replace("|", "\\|") + name = f"`{k.name}`" + (" **(deprecated)**" if k.deprecated else "") + lines.append(f"| {name} | `{k.type}` | {_default_repr(k.default)} | {doc} |") + lines += ["", END] + return "\n".join(lines) + + +def main() -> int: + table = render() + if "--print" in sys.argv: + print(table) + return 0 + with open(DOC) as f: + text = f.read() + if START not in text or END not in text: + print(f"{DOC}: missing {START}/{END} markers", file=sys.stderr) + return 1 + head, _, rest = text.partition(START) + _, _, tail = rest.partition(END) + with open(DOC, "w") as f: + f.write(head + table + tail) + print(f"{DOC}: key table rewritten from the registry ({len(KEYS)} keys)") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/tests/bluesky-pds/ops.py b/tests/bluesky-pds/ops.py index e2bafd9..6e12f40 100644 --- a/tests/bluesky-pds/ops.py +++ b/tests/bluesky-pds/ops.py @@ -9,14 +9,14 @@ sys.path.insert(0, os.path.dirname(__file__)) import _p4 # noqa: E402 -def pre_upgrade(domain, meta): - _p4.create_account(domain) +def pre_upgrade(ctx): + _p4.create_account(ctx.domain) -def pre_backup(domain, meta): - _p4.create_account(domain) +def pre_backup(ctx): + _p4.create_account(ctx.domain) -def pre_restore(domain, meta): - _p4.delete_account(domain) - assert not _p4.account_exists(domain), "marker account delete did not take (pre_restore)" +def pre_restore(ctx): + _p4.delete_account(ctx.domain) + assert not _p4.account_exists(ctx.domain), "marker account delete did not take (pre_restore)" diff --git a/tests/conftest.py b/tests/conftest.py index e9be5cb..9dfbee9 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -14,32 +14,7 @@ import pytest sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "runner")) from harness import deps as deps_mod # noqa: E402 -from harness import lifecycle, naming - - -def _short(s: str, n: int = 8) -> str: - return "".join(c for c in s if c.isalnum())[:n] or "local" - - -def _recipe_meta(recipe: str) -> dict: - """Optional per-recipe config so enrolling a recipe needs NO shared-harness change (D5). - A recipe may ship tests//recipe_meta.py with any of: HEALTH_PATH (str), - HEALTH_OK (tuple of status codes), DEPLOY_TIMEOUT (int), HTTP_TIMEOUT (int).""" - path = os.path.join(os.path.dirname(__file__), recipe, "recipe_meta.py") - meta = { - "HEALTH_PATH": "/", - "HEALTH_OK": (200, 301, 302), - "DEPLOY_TIMEOUT": 600, - "HTTP_TIMEOUT": 300, - } - if os.path.exists(path): - ns: dict = {} - with open(path) as fh: - exec(compile(fh.read(), path, "exec"), ns) # noqa: S102 (trusted, in-repo) - for k in meta: - if k in ns: - meta[k] = ns[k] - return meta +from harness import meta as meta_mod # noqa: E402 @pytest.fixture(scope="session") @@ -48,18 +23,10 @@ def recipe() -> str: @pytest.fixture(scope="session") -def app_domain(recipe) -> str: - # Docker swarm config/secret names = __ must be <= 64 chars, and - # stackname is the sanitized domain. ".ci.commoninternet.net" alone is 22 chars, so the - # subdomain label must stay short. Use -<6hex(recipe|pr|ref)> — unique per run, - # collision-safe across recipes (full recipe in the hash), readable context lives in the - # Drone build params + PR comment. (Deviation from plan §4.0 long name; see DECISIONS.md.) - return naming.app_domain(recipe, os.environ.get("PR", "0"), os.environ.get("REF")) - - -@pytest.fixture(scope="session") -def meta(recipe) -> dict: - return _recipe_meta(recipe) +def meta(recipe): + """The recipe's FULL validated customization (RecipeMeta, attribute access) via the single + loader (rcust P1 — previously this fixture saw only the 4 base keys, spec §8 R3).""" + return meta_mod.load(recipe) @pytest.fixture(scope="session") @@ -73,32 +40,55 @@ def live_app() -> str: return domain -@pytest.fixture(scope="session") -def deps_apps() -> dict[str, str]: - """Phase 2 Q2.3 dependency-resolver contract (refined operator-2026-05-28 SSO-dep plan §1): - when a recipe declares `DEPS = [...]` in its `recipe_meta.py`, the orchestrator deploys each - dep AFTER the generic tiers (between RESTORE and CUSTOM) and persists their per-run identity - + SSO creds to `$CCCI_DEPS_FILE`. Tests access the dep's per-run domain via this fixture. - For full SSO creds (realm/client/secret/admin) use the `deps_creds` fixture instead. +@pytest.fixture +def op_state() -> dict: + """The orchestrator's run-scoped op context (rcust P4): versions, artifact paths — written to + `$CCCI_OP_STATE_FILE` after each lifecycle op (e.g. `{"upgrade": {"before": {...}, + "head_ref": ...}, "backup": {"snapshot_id": ...}}`). Overlay tests read op facts from here + instead of hand-parsing env/JSON. Skips with a clear reason outside an orchestrator run.""" + import json - Returns `{dep_recipe: domain}` (str→str). Empty when no deps declared OR deps-not-ready.""" + path = os.environ.get("CCCI_OP_STATE_FILE") + if not path: + pytest.skip( + "CCCI_OP_STATE_FILE not set — op_state is only available under the orchestrator" + ) + if not os.path.exists(path): + pytest.skip(f"op-state file missing ({path}) — orchestrator has not performed an op yet") + try: + with open(path) as f: + return json.load(f) + except ValueError: + pytest.skip(f"op-state file unreadable/not JSON ({path})") + + +class _DepEntry(dict): + """One provisioned dep (full creds dict) with attribute sugar: `entry.domain`, `entry.realm`, + `entry.client_secret`, ... — dict-style access works too (rcust P2d).""" + + def __getattr__(self, name): + try: + return self[name] + except KeyError as e: + raise AttributeError(name) from e + + +@pytest.fixture(scope="session") +def deps() -> dict[str, _DepEntry]: + """The recipe's provisioned deps (rcust P2d — consolidates the old `deps_apps`+`deps_creds` + pair). When a recipe declares `DEPS = [...]` in its `recipe_meta.py`, the orchestrator + provisions each dep BEFORE the single deploy and persists per-run identity + SSO creds to + `$CCCI_DEPS_FILE`. `deps["keycloak"]` carries domain/realm/client_id/client_secret/user/ + password/email/admin_user/admin_password/discovery_url/token_url/... (`.domain` etc. work as + attributes). Empty when no deps declared OR deps-not-ready — pair with + `@pytest.mark.requires_deps` so the F2-11 skip-report keeps the green signal honest.""" state = deps_mod.deps_as_dict(deps_mod.load_run_state()) - return {r: e["domain"] for r, e in state.items() if e.get("domain")} - - -@pytest.fixture(scope="session") -def deps_creds() -> dict[str, dict]: - """Full SSO-creds dict for each declared dep (operator-2026-05-28 SSO-dep plan §1). - `deps_creds["keycloak"]` returns the entry written by setup_custom_tests with keys - domain/realm/client_id/client_secret/user/password/email/admin_user/admin_password/ - discovery_url/token_url/.... Use this in `@pytest.mark.requires_deps` tests that need to - authenticate via OIDC.""" - return deps_mod.deps_as_dict(deps_mod.load_run_state()) + return {r: _DepEntry(e) for r, e in state.items()} def pytest_collection_modifyitems(config, items): """SSO-dep plan §4: tests marked `@pytest.mark.requires_deps` are skipped with reason - `deps-not-ready: ` when the orchestrator's setup_custom_tests step failed + `deps-not-ready: ` when the orchestrator's dep provisioning failed (orchestrator sets CCCI_DEPS_READY=0 in env). Non-deps custom tests are unaffected. This is failure-isolation per plan §1 — generic tiers cannot break the SSO-marked tests' @@ -131,40 +121,5 @@ def pytest_configure(config): """Register the `requires_deps` marker so pytest doesn't warn about it.""" config.addinivalue_line( "markers", - "requires_deps: test requires DEPS-declared services + setup_custom_tests success.", + "requires_deps: test requires DEPS-declared services + dep provisioning success.", ) - - -def _wait_healthy(domain, meta): - lifecycle.wait_healthy( - domain, - ok_codes=tuple(meta["HEALTH_OK"]), - path=meta["HEALTH_PATH"], - deploy_timeout=meta["DEPLOY_TIMEOUT"], - http_timeout=meta["HTTP_TIMEOUT"], - ) - - -@pytest.fixture -def deployed(recipe, app_domain, meta, request): - """Function-scoped: deploy the current/$REF version healthy, guaranteed teardown after. - Used by stages that start from current (install/backup).""" - version = os.environ.get("VERSION") or None - lifecycle.janitor() - request.addfinalizer(lambda: lifecycle.teardown_app(app_domain)) - lifecycle.deploy_app(recipe, app_domain, version=version) - _wait_healthy(app_domain, meta) - return app_domain - - -@pytest.fixture(scope="session") -def deployed_app(recipe, app_domain, meta): - """Install stage: deploy the recipe and wait until healthy; tear down at session end.""" - version = os.environ.get("VERSION") or None - lifecycle.janitor() # sweep orphans from crashed runs first - try: - lifecycle.deploy_app(recipe, app_domain, version=version, secrets=True) - _wait_healthy(app_domain, meta) - yield app_domain - finally: - lifecycle.teardown_app(app_domain) diff --git a/tests/cryptpad/ops.py b/tests/cryptpad/ops.py index a0adac8..8d8c9c9 100644 --- a/tests/cryptpad/ops.py +++ b/tests/cryptpad/ops.py @@ -15,13 +15,13 @@ def _write(domain, val): lifecycle.exec_in_app(domain, ["sh", "-c", f"echo {val} > {MARKER}"]) -def pre_upgrade(domain, meta): - _write(domain, "upgrade-survives") +def pre_upgrade(ctx): + _write(ctx.domain, "upgrade-survives") -def pre_backup(domain, meta): - _write(domain, "original") +def pre_backup(ctx): + _write(ctx.domain, "original") -def pre_restore(domain, meta): - _write(domain, "mutated") # diverge so a successful restore is observable +def pre_restore(ctx): + _write(ctx.domain, "mutated") # diverge so a successful restore is observable diff --git a/tests/cryptpad/recipe_meta.py b/tests/cryptpad/recipe_meta.py index bc1099d..e345b78 100644 --- a/tests/cryptpad/recipe_meta.py +++ b/tests/cryptpad/recipe_meta.py @@ -7,9 +7,9 @@ DEPLOY_TIMEOUT = 600 HTTP_TIMEOUT = 600 -def EXTRA_ENV(domain): +def EXTRA_ENV(ctx): """cryptpad needs a SANDBOX_DOMAIN distinct from the main DOMAIN (it serves user content from a separate origin; the web router routes both). Derive a sibling subdomain under the same wildcard (covered by the wildcard cert, so no cert work).""" - label, _, rest = domain.partition(".") + label, _, rest = ctx.domain.partition(".") return {"SANDBOX_DOMAIN": f"{label}-sb.{rest}"} diff --git a/tests/custom-html-bkp-bad/ops.py b/tests/custom-html-bkp-bad/ops.py index f6db098..dfa9567 100644 --- a/tests/custom-html-bkp-bad/ops.py +++ b/tests/custom-html-bkp-bad/ops.py @@ -12,8 +12,8 @@ from harness import lifecycle MARKER_PATH = "/usr/share/nginx/html/ci-marker.txt" -def pre_restore(domain: str, meta: dict) -> None: +def pre_restore(ctx) -> None: """Write 'mutated' to the marker before restore runs. If restore brings back the snapshot (which has no marker — never seeded by pre_backup), the marker ends up MISSING or 'mutated' after restore → test_restore_returns_state FAILS → restore=RED.""" - lifecycle.exec_in_app(domain, ["sh", "-c", f"echo mutated > {MARKER_PATH}"]) + lifecycle.exec_in_app(ctx.domain, ["sh", "-c", f"echo mutated > {MARKER_PATH}"]) diff --git a/tests/custom-html-rst-bad/ops.py b/tests/custom-html-rst-bad/ops.py index 3f3b920..e8272aa 100644 --- a/tests/custom-html-rst-bad/ops.py +++ b/tests/custom-html-rst-bad/ops.py @@ -11,5 +11,5 @@ from harness import lifecycle MARKER_PATH = "/usr/share/nginx/html/ci-marker.txt" -def pre_restore(domain: str, meta: dict) -> None: - lifecycle.exec_in_app(domain, ["sh", "-c", f"echo mutated > {MARKER_PATH}"]) +def pre_restore(ctx) -> None: + lifecycle.exec_in_app(ctx.domain, ["sh", "-c", f"echo mutated > {MARKER_PATH}"]) diff --git a/tests/custom-html/ops.py b/tests/custom-html/ops.py index 9c7b349..b3df744 100644 --- a/tests/custom-html/ops.py +++ b/tests/custom-html/ops.py @@ -1,4 +1,4 @@ -"""custom-html — pre-op seed hooks (Phase 1e HC3). The orchestrator runs `pre_(domain, meta)` +"""custom-html — pre-op seed hooks (Phase 1e HC3). The orchestrator runs `pre_(ctx)` BEFORE it performs the op; the matching test_.py asserts the post-op state (assertion-only). nginx serves the volume at /usr/share/nginx/html, so the marker file survives an upgrade / a @@ -17,16 +17,16 @@ def _write(domain: str, val: str) -> None: lifecycle.exec_in_app(domain, ["sh", "-c", f"echo {val} > {MARKER_PATH}"]) -def pre_upgrade(domain, meta): +def pre_upgrade(ctx): # seed a marker before the upgrade so the overlay can prove the data survives it - _write(domain, "upgrade-survives") + _write(ctx.domain, "upgrade-survives") -def pre_backup(domain, meta): +def pre_backup(ctx): # establish a known original state before the backup op captures it - _write(domain, "original") + _write(ctx.domain, "original") -def pre_restore(domain, meta): +def pre_restore(ctx): # diverge from the backed-up state so a successful restore (back to "original") is observable - _write(domain, "mutated") + _write(ctx.domain, "mutated") diff --git a/tests/discourse/install_steps.sh b/tests/discourse/install_steps.sh deleted file mode 100755 index 930e663..0000000 --- a/tests/discourse/install_steps.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env bash -# discourse — INSTALL-TIME hook (Phase 2 Q4.6). Runs during the install tier AFTER `abra app new` + -# EXTRA_ENV + `abra app secret generate` and BEFORE the single `abra app deploy` -# (lifecycle.py::_run_install_steps), with CCCI_RECIPE / CCCI_APP_DOMAIN in env. -# -# Purpose: provide the cc-ci re-pin+grace overlay (compose.ccci.yml) to the recipe checkout so the -# UPGRADE-tier BASE deploy (published 0.7.0+3.3.1, whose compose pins the Docker-Hub-removed -# `bitnami/discourse:3.3.1` and ships a too-tight 5m start_period) is deployable and can survive the -# 15-25min Rails cold boot — so upgrade-to-latest can run. See compose.ccci.yml's header for the full -# rationale. The overlay is referenced by recipe_meta COMPOSE_FILE; it is a cc-ci file (not part of the -# recipe), so copying it here makes it resolvable. It persists across the later `git checkout ` -# (untracked) so the head deploy also merges it (idempotent — the PR head already re-pins + ships 20m). -# CHAOS_BASE_DEPLOY=True is set so abra's pinned-deploy clean-tree check doesn't FATA on the overlay. -set -euo pipefail - -: "${CCCI_RECIPE:?missing CCCI_RECIPE}" -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -# Resolve the recipe tree the way abra does: $ABRA_DIR (the per-run tree inside a CI run) else -# the canonical ~/.abra — the overlay must land in the tree this run actually deploys from. -RECIPE_DIR="${ABRA_DIR:-${HOME}/.abra}/recipes/${CCCI_RECIPE}" - -if [ ! -d "$RECIPE_DIR" ]; then - echo " discourse install_steps: recipe dir $RECIPE_DIR missing — cannot provide compose.ccci.yml" >&2 - exit 1 -fi - -cp "$SCRIPT_DIR/compose.ccci.yml" "$RECIPE_DIR/compose.ccci.yml" -echo " discourse install_steps: provided compose.ccci.yml (bitnamilegacy re-pin + 20m start_period grace) to recipe checkout (${CCCI_RECIPE})" diff --git a/tests/discourse/ops.py b/tests/discourse/ops.py index 5f619a7..35087fa 100644 --- a/tests/discourse/ops.py +++ b/tests/discourse/ops.py @@ -30,18 +30,18 @@ def _seed(domain, value): assert got == value, f"seed did not commit (read back {got!r}, expected {value!r})" -def pre_upgrade(domain, meta): - _seed(domain, "upgrade-survives") +def pre_upgrade(ctx): + _seed(ctx.domain, "upgrade-survives") -def pre_backup(domain, meta): - _seed(domain, "original") +def pre_backup(ctx): + _seed(ctx.domain, "original") -def pre_restore(domain, meta): +def pre_restore(ctx): # diverge from the backup so a successful restore is observable - _psql(domain, "DROP TABLE IF EXISTS ci_marker;") - assert _psql(domain, "SELECT to_regclass('public.ci_marker');") in ( + _psql(ctx.domain, "DROP TABLE IF EXISTS ci_marker;") + assert _psql(ctx.domain, "SELECT to_regclass('public.ci_marker');") in ( "", "NULL", ), "drop did not take" diff --git a/tests/discourse/recipe_meta.py b/tests/discourse/recipe_meta.py index 9092875..32b7c9f 100644 --- a/tests/discourse/recipe_meta.py +++ b/tests/discourse/recipe_meta.py @@ -29,11 +29,11 @@ HTTP_TIMEOUT = 1200 # (1) it pins the Docker-Hub-removed `bitnami/discourse:3.3.1` (404) → overlay re-pins app+sidekiq to # `bitnamilegacy/discourse:3.3.1` (namespace-only, identical image), the same re-pin the PR makes; # (2) its 5m start_period is too tight for the 15-25min Rails boot → overlay widens it to 20m (grace). -# install_steps.sh provides the overlay; CHAOS_BASE_DEPLOY skips the clean-tree gate on the untracked -# overlay; it persists across the head checkout (idempotent — the PR head already re-pins + ships 20m). +# The harness auto-provides the overlay to the checkout and auto-chaoses the base deploy +# (first-class compose.ccci.yml, rcust P2a); it persists across the head checkout (idempotent — the +# PR head already re-pins + ships 20m). # Upgrade crossover: 0.7.0 (re-pinned base) → PR head; full assertions run on the HEAD. The 0.7.0 # *custom* tests are not separately run (custom tier runs once, on the head — policy §1 allows skip+record). -CHAOS_BASE_DEPLOY = True UPGRADE_BASE_VERSION = "0.7.0+3.3.1" EXTRA_ENV = { "TIMEOUT": "3600", # abra's internal convergence wait; matches DEPLOY_TIMEOUT (slow Rails boot headroom) @@ -41,7 +41,7 @@ EXTRA_ENV = { } -def BACKUP_VERIFY(domain): +def BACKUP_VERIFY(ctx): """Post-backup integrity check (Q4.6, same race ghost F2-14b hit). The recipe's backupbot db pre-hook (`/pg_backup.sh backup`) dumps the discourse postgres DB to `/var/lib/postgresql/data/ backup.sql` (gzip), then restic captures that path. On the loaded single CI node the db container @@ -60,7 +60,7 @@ def BACKUP_VERIFY(domain): try: out = lifecycle.exec_in_app( - domain, + ctx.domain, [ "sh", "-c", diff --git a/tests/ghost/install_steps.sh b/tests/ghost/install_steps.sh deleted file mode 100755 index 2c2dc50..0000000 --- a/tests/ghost/install_steps.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env bash -# ghost — INSTALL-TIME hook (Phase 2 F2-14b). Runs during the install tier AFTER `abra app new` + -# EXTRA_ENV + `abra app secret generate` and BEFORE the single `abra app deploy` -# (lifecycle.py::_run_install_steps), with CCCI_RECIPE / CCCI_APP_DOMAIN in env. -# -# Purpose: provide the cc-ci start_period-grace overlay (compose.ccci.yml) to the recipe checkout so -# the UPGRADE-tier BASE deploy (a previous published version whose app healthcheck still ships the -# too-tight 1m start_period) can survive ghost's ~6-9min fresh-DB migration and converge. See -# compose.ccci.yml's header for the full rationale. The overlay is referenced by recipe_meta -# COMPOSE_FILE; copying it here (it is a cc-ci file, not part of the recipe) makes it resolvable. -# It persists across the later `git checkout ` (untracked) so the head deploy also merges it -# (idempotent — the PR head already ships 15m). CHAOS_BASE_DEPLOY=True is set so abra's pinned-deploy -# clean-tree check doesn't FATA on the untracked overlay. -set -euo pipefail - -: "${CCCI_RECIPE:?missing CCCI_RECIPE}" -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -# Resolve the recipe tree the way abra does: $ABRA_DIR (the per-run tree inside a CI run) else -# the canonical ~/.abra — the overlay must land in the tree this run actually deploys from. -RECIPE_DIR="${ABRA_DIR:-${HOME}/.abra}/recipes/${CCCI_RECIPE}" - -if [ ! -d "$RECIPE_DIR" ]; then - echo " ghost install_steps: recipe dir $RECIPE_DIR missing — cannot provide compose.ccci.yml" >&2 - exit 1 -fi - -cp "$SCRIPT_DIR/compose.ccci.yml" "$RECIPE_DIR/compose.ccci.yml" -echo " ghost install_steps: provided compose.ccci.yml (app start_period grace) to recipe checkout (${CCCI_RECIPE})" diff --git a/tests/ghost/ops.py b/tests/ghost/ops.py index 24448ba..74ec035 100644 --- a/tests/ghost/ops.py +++ b/tests/ghost/ops.py @@ -36,19 +36,19 @@ def _seed(domain, value): assert got == value, f"seed did not commit (read back {got!r}, expected {value!r})" -def pre_upgrade(domain, meta): - _seed(domain, "upgrade-survives") +def pre_upgrade(ctx): + _seed(ctx.domain, "upgrade-survives") -def pre_backup(domain, meta): - _seed(domain, "original") +def pre_backup(ctx): + _seed(ctx.domain, "original") -def pre_restore(domain, meta): +def pre_restore(ctx): # diverge from the backup so a successful restore is observable: drop the marker table. - _mysql(domain, "DROP TABLE IF EXISTS ci_marker;") + _mysql(ctx.domain, "DROP TABLE IF EXISTS ci_marker;") got = _mysql( - domain, + ctx.domain, "SELECT COUNT(*) FROM information_schema.tables " "WHERE table_schema='ghost' AND table_name='ci_marker';", ) diff --git a/tests/ghost/recipe_meta.py b/tests/ghost/recipe_meta.py index 710f50f..44b2580 100644 --- a/tests/ghost/recipe_meta.py +++ b/tests/ghost/recipe_meta.py @@ -31,23 +31,22 @@ HTTP_TIMEOUT = 900 # (plan-ccci-compose-overlay-policy.md §1), so the harness base-deploys the previous PUBLISHED version # (1.1.1+6-alpine) — which predates the PR and still ships the too-tight 1m start_period → it would # deadlock on the same migration kill. compose.ccci.yml re-applies the 15m grace to the BASE so the -# from-version is deployable; install_steps.sh provides it to the checkout; CHAOS_BASE_DEPLOY skips the -# clean-tree gate on that untracked overlay. It persists across the head checkout (idempotent — the PR -# head already ships 15m). This is the policy-blessed "minimal overlay on the from-version so +# from-version is deployable; the harness auto-provides it to the checkout and auto-chaoses the base +# deploy (first-class compose.ccci.yml, rcust P2a). It persists across the head checkout (idempotent — +# the PR head already ships 15m). This is the policy-blessed "minimal overlay on the from-version so # upgrade-to-latest can run" — grace-only, masks no defect, weakens no test. # TIMEOUT/DEPLOY_TIMEOUT 2400s: the BASE cold boot's wall-time is mysql fresh-dir init (~6min, during # which the app crash-loops harmlessly on `ECONNREFUSED 3306` until mysql accepts connections — no # migration progress lost, it hasn't started) PLUS the ~9-15min schema migration (round-trip-bound, # slower under host load). 1200s was too tight (full4 killed at the near-final `email_recipients` # tables while still 0/1); 2400s gives headroom while still bounding a genuine hang (matches discourse). -CHAOS_BASE_DEPLOY = True EXTRA_ENV = { "TIMEOUT": "2400", "COMPOSE_FILE": "compose.yml:compose.ccci.yml", } -def BACKUP_VERIFY(domain): +def BACKUP_VERIFY(ctx): """Post-backup integrity check (F2-14b). The recipe's backupbot db pre-hook dumps the ghost MySQL DB to `/var/lib/mysql/backup.sql.gz` (then restic captures that path). On the loaded single CI node the db container intermittently CYCLES mid-dump (observed: full5/6/7 RED, full8 green — pure race; @@ -62,7 +61,7 @@ def BACKUP_VERIFY(domain): try: out = lifecycle.exec_in_app( - domain, + ctx.domain, [ "sh", "-c", diff --git a/tests/immich/ops.py b/tests/immich/ops.py index daa4d7d..0a82465 100644 --- a/tests/immich/ops.py +++ b/tests/immich/ops.py @@ -25,17 +25,17 @@ def _seed(domain, value): assert _psql(domain, "SELECT v FROM ci_marker;") == value -def pre_upgrade(domain, meta): - _seed(domain, "upgrade-survives") +def pre_upgrade(ctx): + _seed(ctx.domain, "upgrade-survives") -def pre_backup(domain, meta): - _seed(domain, "original") +def pre_backup(ctx): + _seed(ctx.domain, "original") -def pre_restore(domain, meta): - _psql(domain, "DROP TABLE ci_marker;") - assert _psql(domain, "SELECT to_regclass('public.ci_marker');") in ( +def pre_restore(ctx): + _psql(ctx.domain, "DROP TABLE ci_marker;") + assert _psql(ctx.domain, "SELECT to_regclass('public.ci_marker');") in ( "", "NULL", ), "drop did not take" diff --git a/tests/keycloak/ops.py b/tests/keycloak/ops.py index b9c925a..118fe38 100644 --- a/tests/keycloak/ops.py +++ b/tests/keycloak/ops.py @@ -14,20 +14,20 @@ def _token(domain): return kc_admin.admin_token(domain, kc_admin.admin_password(domain)) -def pre_upgrade(domain, meta): +def pre_upgrade(ctx): # create the marker realm (DB data) before the upgrade so the overlay can prove it survives - assert kc_admin.create_marker_realm(domain, _token(domain)) in (201, 409) + assert kc_admin.create_marker_realm(ctx.domain, _token(ctx.domain)) in (201, 409) -def pre_backup(domain, meta): +def pre_backup(ctx): # establish the marker realm before the backup op captures mariadb - assert kc_admin.create_marker_realm(domain, _token(domain)) in (201, 409) + assert kc_admin.create_marker_realm(ctx.domain, _token(ctx.domain)) in (201, 409) -def pre_restore(domain, meta): +def pre_restore(ctx): # backup-bot-two cycles the keycloak container during backup → wait for serving, re-auth, then # delete the realm (diverge from the backup) so a successful restore is observable - generic.assert_serving(domain, meta) - tok = _token(domain) - assert kc_admin.delete_marker_realm(domain, tok) in (204, 200) - assert not kc_admin.marker_realm_exists(domain, tok), "delete did not take" + generic.assert_serving(ctx.domain, ctx.meta) + tok = _token(ctx.domain) + assert kc_admin.delete_marker_realm(ctx.domain, tok) in (204, 200) + assert not kc_admin.marker_realm_exists(ctx.domain, tok), "delete did not take" diff --git a/tests/lasuite-docs/functional/test_create_doc.py b/tests/lasuite-docs/functional/test_create_doc.py index 161462d..e73d70f 100644 --- a/tests/lasuite-docs/functional/test_create_doc.py +++ b/tests/lasuite-docs/functional/test_create_doc.py @@ -5,7 +5,7 @@ persistence". This is the canonical create-an-object + read-it-back for lasuite- Flow (uses an OIDC token from the dep keycloak): 1. Obtain a JWT via OIDC password grant against the dep keycloak (the test user is provisioned - by the orchestrator's setup_custom_tests step). + by the orchestrator's dep-provisioning step). 2. POST `/api/v1.0/documents/` with `Authorization: Bearer ` to create a new doc with a unique title; capture the returned `id`. 3. GET `/api/v1.0/documents//` with the same Bearer token; assert the returned title and @@ -15,7 +15,7 @@ Non-vacuous: a misconfigured OIDC, broken backend, or missing endpoint fails at broken. The marker-in-the-title + id round-trip proves the doc actually persisted in lasuite- docs's database after going through the recipe's nginx → backend → postgres path. -Marked @pytest.mark.requires_deps — skips with `deps-not-ready` if setup_custom_tests failed. +Marked @pytest.mark.requires_deps — skips with `deps-not-ready` if dep provisioning failed. """ from __future__ import annotations @@ -32,9 +32,9 @@ from harness import sso @pytest.mark.requires_deps -def test_create_doc_and_read_back(live_app, deps_creds): +def test_create_doc_and_read_back(live_app, deps): """Create a doc via the authenticated API; fetch it back; assert round-trip.""" - kc = deps_creds["keycloak"] + kc = deps["keycloak"] # Obtain a JWT via OIDC password grant access_token = sso.oidc_password_grant( diff --git a/tests/lasuite-docs/functional/test_oidc_login.py b/tests/lasuite-docs/functional/test_oidc_login.py index 2a05c68..fa527cb 100644 --- a/tests/lasuite-docs/functional/test_oidc_login.py +++ b/tests/lasuite-docs/functional/test_oidc_login.py @@ -5,13 +5,13 @@ SOURCE: references/recipe-maintainer/recipe-info/lasuite-docs/tests/oidc_login.p End-to-end flow: 1. GET `/api/v1.0/users/me/` without auth → asserts the response REDIRECTS to the dep keycloak's realm auth endpoint (the recipe is correctly configured to challenge - unauthenticated callers — wired via setup_custom_tests.sh). + unauthenticated callers — wired via install_steps.sh). 2. Obtain an OIDC token from the dep keycloak via password grant (the test user provisioned by the orchestrator's realm setup). 3. Call `/api/v1.0/users/me/` with `Authorization: Bearer ` → asserts 200 and the returned user's email matches the provisioned test user. -Marked @pytest.mark.requires_deps — skips with `deps-not-ready` if setup_custom_tests failed. +Marked @pytest.mark.requires_deps — skips with `deps-not-ready` if dep provisioning failed. """ from __future__ import annotations @@ -51,9 +51,9 @@ def _get_no_redirect(url: str) -> tuple[int, str]: @pytest.mark.requires_deps -def test_oidc_login_via_keycloak(live_app, deps_creds): +def test_oidc_login_via_keycloak(live_app, deps): """Anonymous → redirect to keycloak; password-grant token → 200 from /api/v1.0/users/me/.""" - kc = deps_creds["keycloak"] + kc = deps["keycloak"] # Step 1: unauthenticated GET → 302 to keycloak realm's auth endpoint status, redirect = _get_no_redirect(f"https://{live_app}/api/v1.0/users/me/") diff --git a/tests/lasuite-docs/functional/test_oidc_with_keycloak.py b/tests/lasuite-docs/functional/test_oidc_with_keycloak.py index 4c3d19e..bd865de 100644 --- a/tests/lasuite-docs/functional/test_oidc_with_keycloak.py +++ b/tests/lasuite-docs/functional/test_oidc_with_keycloak.py @@ -3,10 +3,10 @@ Refactored to the refined SSO-dep model: - The orchestrator deploys a per-run keycloak dep AFTER generic tiers and provisions a fresh realm/client/user via `harness.sso.setup_keycloak_realm`. The creds are written to - `$CCCI_DEPS_FILE` (read here via the `deps_creds` fixture). + `$CCCI_DEPS_FILE` (read here via the `deps` fixture). - This test no longer calls `setup_keycloak_realm` itself — that's the orchestrator's job in - the setup_custom_tests step. We just consume the credentials and exercise the OIDC flow. -- Marked `@pytest.mark.requires_deps` so if setup_custom_tests failed, this test SKIPs with a + the dep-provisioning step. We just consume the credentials and exercise the OIDC flow. +- Marked `@pytest.mark.requires_deps` so if dep provisioning failed, this test SKIPs with a clear `deps-not-ready` reason rather than red-flagging a non-recipe failure. """ @@ -31,13 +31,13 @@ def _b64url_decode(seg: str) -> bytes: @pytest.mark.requires_deps -def test_oidc_password_grant_against_dep_keycloak(live_app, deps_creds): +def test_oidc_password_grant_against_dep_keycloak(live_app, deps): """The dep keycloak issues a JWT for the pre-provisioned test user via OIDC password grant.""" - assert "keycloak" in deps_creds, ( - f"keycloak creds not in deps_creds; got {list(deps_creds.keys())}. " - "setup_custom_tests should have populated this." + assert "keycloak" in deps, ( + f"keycloak creds not in deps; got {list(deps.keys())}. " + "dep provisioning should have populated this." ) - kc = deps_creds["keycloak"] + kc = deps["keycloak"] # Sanity-check the creds shape — orchestrator-written assert kc["domain"] diff --git a/tests/lasuite-docs/install_steps.sh b/tests/lasuite-docs/install_steps.sh new file mode 100755 index 0000000..d845418 --- /dev/null +++ b/tests/lasuite-docs/install_steps.sh @@ -0,0 +1,74 @@ +#!/usr/bin/env bash +# lasuite-docs — INSTALL-TIME OIDC wiring hook (rcust P2b; migrated from the deleted +# setup_custom_tests.sh post-deploy path — sibling of lasuite-drive/-meet's hooks). +# +# Runs during the install tier AFTER `abra app new` + EXTRA_ENV + `abra app secret generate`, and +# BEFORE the single `abra app deploy` (lifecycle.py::_run_install_steps). Writing OIDC env + the +# real client secret HERE means the recipe deploys ONCE with OIDC already wired — no post-deploy +# reconverge. The orchestrator provisions the per-run realm/client on the (live-warm) keycloak +# BEFORE this hook and writes $CCCI_DEPS_FILE (the recipe→creds dict). docs' OIDC settings are +# config-only (validated by `manage.py check`, not fetched at boot), so the stack boots healthy +# with the env set. Env names per lasuite-docs's .env.sample (same values the old post-deploy +# hook wrote — byte-identical wiring, only the timing moved). +# +# Env supplied by the harness: +# CCCI_APP_DOMAIN — the per-run lasuite-docs app domain +# CCCI_APP_ENV — path to the app's .env (the one `abra app deploy` reads) +# CCCI_DEPS_FILE — JSON {keycloak: {domain, realm, client_id, client_secret, ...}} (may be empty) +set -euo pipefail + +: "${CCCI_APP_DOMAIN:?missing}" +ENV_PATH="${CCCI_APP_ENV:?missing}" + +# No deps file / no keycloak entry → install-time provisioning failed or was skipped. NO-OP so the +# recipe still boots; the @requires_deps OIDC custom test then SKIPs and F2-11 flips the run RED. +if [ -z "${CCCI_DEPS_FILE:-}" ] || [ ! -s "${CCCI_DEPS_FILE}" ]; then + echo " install_steps: no deps file — skipping OIDC wiring (recipe boots without OIDC)" + exit 0 +fi +KC_DOMAIN=$(jq -r '.keycloak.domain // empty' "$CCCI_DEPS_FILE") +KC_REALM=$(jq -r '.keycloak.realm // empty' "$CCCI_DEPS_FILE") +KC_CLIENT=$(jq -r '.keycloak.client_id // empty' "$CCCI_DEPS_FILE") +KC_SECRET=$(jq -r '.keycloak.client_secret // empty' "$CCCI_DEPS_FILE") +if [ -z "$KC_DOMAIN" ] || [ -z "$KC_SECRET" ]; then + echo " install_steps: deps file has no keycloak domain/secret — skipping OIDC wiring" + exit 0 +fi + +echo " lasuite-docs install_steps: wiring OIDC at install against keycloak ${KC_DOMAIN}" + +# 1) Insert the OIDC client secret at a bumped version (abra already generated oidc_rpcs:v1; swarm +# forbids overwriting a secret at the same version). The app is not deployed yet — a swarm secret +# can be created independently — so the single deploy below picks up v2. +CUR_VER=$(grep -E '^\s*SECRET_OIDC_RPCS_VERSION=' "$ENV_PATH" | tail -1 | cut -d= -f2 | tr -d '"\r' || echo "v1") +NEW_NUM=$((${CUR_VER#v} + 1)) +NEW_VER="v${NEW_NUM}" +INSERT_LOG=$(abra app secret insert "$CCCI_APP_DOMAIN" oidc_rpcs "$NEW_VER" "$KC_SECRET" --no-input -C -o 2>&1) || + INSERT_LOG=$(script -qec "abra app secret insert $CCCI_APP_DOMAIN oidc_rpcs $NEW_VER $KC_SECRET --no-input -C -o" /dev/null 2>&1) || + { + echo " install_steps: abra app secret insert oidc_rpcs@$NEW_VER failed: $INSERT_LOG" + exit 1 + } +sed -i "s|^\s*SECRET_OIDC_RPCS_VERSION=.*|SECRET_OIDC_RPCS_VERSION=$NEW_VER|" "$ENV_PATH" +echo " install_steps: oidc_rpcs secret inserted at $NEW_VER (was $CUR_VER)" + +# 2) Write OIDC env vars to the app's .env (names per lasuite-docs's .env.sample). Ensure a +# trailing newline first so appends never concatenate onto the last line. +write_env() { + local key="$1" val="$2" + sed -i "/^\s*#\?\s*${key}=/d" "$ENV_PATH" + [ -z "$(tail -c1 "$ENV_PATH" 2>/dev/null)" ] || printf '\n' >>"$ENV_PATH" + printf '%s=%s\n' "$key" "$val" >>"$ENV_PATH" +} +write_env OIDC_REALM "$KC_REALM" +write_env OIDC_OP_DISCOVERY_ENDPOINT "https://${KC_DOMAIN}/realms/${KC_REALM}/.well-known/openid-configuration" +write_env OIDC_OP_AUTHORIZATION_ENDPOINT "https://${KC_DOMAIN}/realms/${KC_REALM}/protocol/openid-connect/auth" +write_env OIDC_OP_TOKEN_ENDPOINT "https://${KC_DOMAIN}/realms/${KC_REALM}/protocol/openid-connect/token" +write_env OIDC_OP_USER_ENDPOINT "https://${KC_DOMAIN}/realms/${KC_REALM}/protocol/openid-connect/userinfo" +write_env OIDC_OP_LOGOUT_ENDPOINT "https://${KC_DOMAIN}/realms/${KC_REALM}/protocol/openid-connect/logout" +write_env OIDC_OP_JWKS_ENDPOINT "https://${KC_DOMAIN}/realms/${KC_REALM}/protocol/openid-connect/certs" +write_env OIDC_RP_CLIENT_ID "$KC_CLIENT" +write_env OIDC_RP_SIGN_ALGO "RS256" +write_env OIDC_RP_SCOPES "openid email profile" + +echo " lasuite-docs install_steps: OIDC env wired into .env (deploy will pick it up, no reconverge)" diff --git a/tests/lasuite-docs/ops.py b/tests/lasuite-docs/ops.py index 3348166..8094cc4 100644 --- a/tests/lasuite-docs/ops.py +++ b/tests/lasuite-docs/ops.py @@ -24,18 +24,18 @@ def _seed(domain, value): assert _psql(domain, "SELECT v FROM ci_marker;") == value -def pre_upgrade(domain, meta): - _seed(domain, "upgrade-survives") +def pre_upgrade(ctx): + _seed(ctx.domain, "upgrade-survives") -def pre_backup(domain, meta): - _seed(domain, "original") +def pre_backup(ctx): + _seed(ctx.domain, "original") -def pre_restore(domain, meta): +def pre_restore(ctx): # drop the marker table (diverge from the backup) so a successful restore is observable - _psql(domain, "DROP TABLE ci_marker;") - assert _psql(domain, "SELECT to_regclass('public.ci_marker');") in ( + _psql(ctx.domain, "DROP TABLE ci_marker;") + assert _psql(ctx.domain, "SELECT to_regclass('public.ci_marker');") in ( "", "NULL", ), "drop did not take" diff --git a/tests/lasuite-docs/recipe_meta.py b/tests/lasuite-docs/recipe_meta.py index 08ef29b..6cdccb3 100644 --- a/tests/lasuite-docs/recipe_meta.py +++ b/tests/lasuite-docs/recipe_meta.py @@ -15,7 +15,7 @@ HTTP_TIMEOUT = 600 DEPS = ["keycloak"] -def EXTRA_ENV(domain): +def EXTRA_ENV(ctx): # abra's internal per-deploy convergence timeout (the recipe's TIMEOUT env, default 300s) is too # short for this 9-service stack on a COLD image cache (~9 large images: impress frontend/backend, # minio, postgres18, redis, docspec, y-provider). Cold pulls exceed 300s -> "deploy timed out 🟠". diff --git a/tests/lasuite-docs/setup_custom_tests.sh b/tests/lasuite-docs/setup_custom_tests.sh deleted file mode 100755 index 8f61331..0000000 --- a/tests/lasuite-docs/setup_custom_tests.sh +++ /dev/null @@ -1,91 +0,0 @@ -#!/usr/bin/env bash -# lasuite-docs — post-deps setup hook (operator-2026-05-28 SSO-dep plan §3.2). -# -# Runs AFTER the generic tiers (install/upgrade/backup/restore) and AFTER each declared dep is -# deployed + provisioned with realm/client via the harness. The orchestrator has written -# $CCCI_DEPS_FILE with the keycloak dep's domain + realm + client_secret + admin creds. -# -# This hook: -# 1. Reads the dep's connection info from $CCCI_DEPS_FILE. -# 2. Inserts the OIDC client secret as an abra app secret (recipe-conventional name oidc_rpcs). -# 3. Writes the OIDC env vars to the running app's .env via `abra app config set`. -# 4. Triggers an in-place `abra app deploy --force --chaos` so the new env takes effect. -# THIS IS NOT a fresh `abra app new` — the deploy-count guard (DG4.1, generalised) still -# sees one app_new per app. -# -# Env supplied by the orchestrator: -# CCCI_APP_DOMAIN — the running per-run lasuite-docs app domain -# CCCI_RECIPE — "lasuite-docs" -# CCCI_DEPS_FILE — JSON file (dict shape: {dep_recipe: {domain, realm, client_id, ...}, ...}) -set -euo pipefail - -: "${CCCI_APP_DOMAIN:?missing}" -: "${CCCI_DEPS_FILE:?missing}" -test -s "$CCCI_DEPS_FILE" || { - echo " setup_custom_tests: deps file empty" - exit 1 -} - -# Read keycloak dep info via jq -KC_DOMAIN=$(jq -r '.keycloak.domain' "$CCCI_DEPS_FILE") -KC_REALM=$(jq -r '.keycloak.realm' "$CCCI_DEPS_FILE") -KC_CLIENT=$(jq -r '.keycloak.client_id' "$CCCI_DEPS_FILE") -KC_SECRET=$(jq -r '.keycloak.client_secret' "$CCCI_DEPS_FILE") -if [ -z "$KC_DOMAIN" ] || [ "$KC_DOMAIN" = "null" ]; then - echo " setup_custom_tests: no keycloak.domain in deps" - exit 1 -fi -if [ -z "$KC_SECRET" ] || [ "$KC_SECRET" = "null" ]; then - echo " setup_custom_tests: no keycloak.client_secret" - exit 1 -fi - -echo " lasuite-docs setup_custom_tests: wiring OIDC against keycloak dep ${KC_DOMAIN}" - -# 1) Insert the OIDC client secret AT A BUMPED VERSION (the recipe-maintainer pattern). -# `abra app new -S` already generated `oidc_rpcs:v1` (random) — Docker Swarm forbids overwriting -# a secret at the same version, so we bump the version (v2), insert our value there, then -# update SECRET_OIDC_RPCS_VERSION in the .env to point at the new one. -ENV_PATH="$HOME/.abra/servers/default/${CCCI_APP_DOMAIN}.env" -CUR_VER=$(grep -E '^\s*SECRET_OIDC_RPCS_VERSION=' "$ENV_PATH" | tail -1 | cut -d= -f2 | tr -d '"\r' || echo "v1") -NEW_NUM=$((${CUR_VER#v} + 1)) -NEW_VER="v${NEW_NUM}" - -INSERT_LOG=$(abra app secret insert "$CCCI_APP_DOMAIN" oidc_rpcs "$NEW_VER" "$KC_SECRET" --no-input -C -o 2>&1) || - INSERT_LOG=$(script -qec "abra app secret insert $CCCI_APP_DOMAIN oidc_rpcs $NEW_VER $KC_SECRET --no-input -C -o" /dev/null 2>&1) || - { - echo " setup_custom_tests: abra app secret insert oidc_rpcs@$NEW_VER failed: $INSERT_LOG" - exit 1 - } -# Repoint the env var to the new version -sed -i "s|^\s*SECRET_OIDC_RPCS_VERSION=.*|SECRET_OIDC_RPCS_VERSION=$NEW_VER|" "$ENV_PATH" -echo " setup_custom_tests: oidc_rpcs secret inserted at $NEW_VER (was $CUR_VER)" - -# 2) Write OIDC env vars to the app's .env (names per lasuite-docs's .env.sample). -# Ensure the file ends with a newline FIRST so our appends don't concatenate onto the last line -# (we saw `TIMEOUT=900OIDC_REALM=...` malformed by a missing-trailing-newline file). -[ -z "$(tail -c1 "$ENV_PATH" 2>/dev/null)" ] || printf '\n' >>"$ENV_PATH" -write_env() { - local key="$1" val="$2" - # remove any existing key (commented or live) then append the live key=val - sed -i "/^\s*#\?\s*${key}=/d" "$ENV_PATH" - # Re-ensure trailing newline after each delete (sed may leave the file without one) - [ -z "$(tail -c1 "$ENV_PATH" 2>/dev/null)" ] || printf '\n' >>"$ENV_PATH" - printf '%s=%s\n' "$key" "$val" >>"$ENV_PATH" -} -write_env OIDC_REALM "$KC_REALM" -write_env OIDC_OP_DISCOVERY_ENDPOINT "https://${KC_DOMAIN}/realms/${KC_REALM}/.well-known/openid-configuration" -write_env OIDC_OP_AUTHORIZATION_ENDPOINT "https://${KC_DOMAIN}/realms/${KC_REALM}/protocol/openid-connect/auth" -write_env OIDC_OP_TOKEN_ENDPOINT "https://${KC_DOMAIN}/realms/${KC_REALM}/protocol/openid-connect/token" -write_env OIDC_OP_USER_ENDPOINT "https://${KC_DOMAIN}/realms/${KC_REALM}/protocol/openid-connect/userinfo" -write_env OIDC_OP_LOGOUT_ENDPOINT "https://${KC_DOMAIN}/realms/${KC_REALM}/protocol/openid-connect/logout" -write_env OIDC_OP_JWKS_ENDPOINT "https://${KC_DOMAIN}/realms/${KC_REALM}/protocol/openid-connect/certs" -write_env OIDC_RP_CLIENT_ID "$KC_CLIENT" -write_env OIDC_RP_SIGN_ALGO "RS256" -write_env OIDC_RP_SCOPES "openid email profile" - -# 3) Trigger an in-place redeploy so the env update takes effect. --force re-deploys even when -# the recipe hasn't changed; --chaos avoids the chaos prompt; --no-input non-interactive. -abra app deploy "$CCCI_APP_DOMAIN" --force --chaos --no-input 2>&1 | tail -10 - -echo " lasuite-docs setup_custom_tests: OIDC wired + redeployed" diff --git a/tests/lasuite-drive/functional/test_oidc_with_keycloak.py b/tests/lasuite-drive/functional/test_oidc_with_keycloak.py index e2aaef7..7a8f8f7 100644 --- a/tests/lasuite-drive/functional/test_oidc_with_keycloak.py +++ b/tests/lasuite-drive/functional/test_oidc_with_keycloak.py @@ -3,12 +3,12 @@ Drive (La Suite Drive) is OIDC-required: login is gated by an external OpenID Connect provider. Mirrors the proven lasuite-docs SSO model: - The orchestrator deploys a per-run keycloak dep AFTER the generic tiers and provisions a fresh - realm/client/user via `harness.sso.setup_keycloak_realm`; `setup_custom_tests.sh` then wires the + realm/client/user via `harness.sso.setup_keycloak_realm`; `install_steps.sh` then wires the OIDC env + client secret into the running drive app and redeploys. Creds land in `$CCCI_DEPS_FILE` - (read here via the `deps_creds` fixture). + (read here via the `deps` fixture). - This test consumes those creds and exercises the real OIDC flow against the dep keycloak: discovery endpoint advertises the realm, and a password grant yields a valid JWT with the expected claims. -- Marked `@pytest.mark.requires_deps` so if setup_custom_tests failed the test SKIPs with a clear +- Marked `@pytest.mark.requires_deps` so if dep provisioning failed the test SKIPs with a clear `deps-not-ready` reason — and (per F2-11) the orchestrator then fails the run rather than going green on a skipped SSO test. @@ -36,13 +36,13 @@ def _b64url_decode(seg: str) -> bytes: @pytest.mark.requires_deps -def test_oidc_password_grant_against_dep_keycloak(live_app, deps_creds): +def test_oidc_password_grant_against_dep_keycloak(live_app, deps): """The dep keycloak issues a JWT for the pre-provisioned test user via OIDC password grant.""" - assert "keycloak" in deps_creds, ( - f"keycloak creds not in deps_creds; got {list(deps_creds.keys())}. " - "setup_custom_tests should have populated this." + assert "keycloak" in deps, ( + f"keycloak creds not in deps; got {list(deps.keys())}. " + "dep provisioning should have populated this." ) - kc = deps_creds["keycloak"] + kc = deps["keycloak"] # Creds shape. WC1: realm is per-run namespaced "-<6hex>"; client_id stays the parent. assert kc["domain"] diff --git a/tests/lasuite-drive/install_steps.sh b/tests/lasuite-drive/install_steps.sh index c864ff3..01999e3 100755 --- a/tests/lasuite-drive/install_steps.sh +++ b/tests/lasuite-drive/install_steps.sh @@ -6,7 +6,7 @@ # BEFORE the single `abra app deploy` (runner/harness/lifecycle.py::_run_install_steps). By writing # the OIDC env + the real client secret into the app's `.env` HERE, the recipe deploys ONCE with # OIDC already wired — eliminating the flaky post-deploy in-place `--force --chaos` 12-service -# reconverge that the old setup_custom_tests.sh did (collabora WOPI-discovery race; see JOURNAL +# post-deploy reconverge (collabora WOPI-discovery race; see JOURNAL # Step 0). The orchestrator provisions the per-run realm/client on the live-warm keycloak BEFORE # this hook and writes $CCCI_DEPS_FILE (the recipe→creds dict). # diff --git a/tests/lasuite-drive/ops.py b/tests/lasuite-drive/ops.py index ca97673..856e9e0 100644 --- a/tests/lasuite-drive/ops.py +++ b/tests/lasuite-drive/ops.py @@ -5,6 +5,7 @@ in the `db` service. The backup path exercises the recipe's pg_backup.sh DB-dump backupbot-labelled).""" import os +import subprocess import sys import time @@ -12,6 +13,47 @@ sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner") from harness import lifecycle # noqa: E402 +def pre_install(ctx): + """Post-deploy seed for the custom tier (the former setup_custom_tests.sh, moved here in rcust + P2b — install_steps.sh runs PRE-deploy and cannot touch the live stack). The deploy alone does + NOT create the MinIO bucket: `minio-createbuckets` is a `replicas:0` one-shot (restart_policy: + none) that must be triggered. The MinIO storage test asserts the bucket exists, so trigger it + here and poll. `--detach` is REQUIRED: the job creates the bucket then EXITS 0, so it never + holds a steady 1/1 replica — a blocking scale would wait forever.""" + stack = ctx.domain.replace(".", "_") + print(" pre_install: creating MinIO bucket via the minio-createbuckets one-shot", flush=True) + subprocess.run( + ["docker", "service", "scale", "--detach", f"{stack}_minio-createbuckets=1"], + capture_output=True, + check=False, + ) + check = ( + 'mc alias set _c http://localhost:9000 "$(cat /run/secrets/minio_ru)" ' + '"$(cat /run/secrets/minio_rp)" >/dev/null 2>&1 && ' + "mc ls _c/drive-media-storage >/dev/null 2>&1" + ) + for i in range(30): + cid = subprocess.run( + ["docker", "ps", "-q", "-f", f"name={stack}_minio.1"], + capture_output=True, + text=True, + check=False, + ).stdout.split() + if cid and ( + subprocess.run( + ["docker", "exec", cid[0], "sh", "-c", check], capture_output=True, check=False + ).returncode + == 0 + ): + print( + f" pre_install: bucket drive-media-storage present after {i + 1} poll(s)", + flush=True, + ) + return + time.sleep(3) + raise AssertionError("minio-createbuckets one-shot did not create drive-media-storage in 90s") + + def _wait_collabora_ready(domain, timeout=420): """Gate the upgrade op on collabora being FULLY ready (WOPI discovery endpoint → 200), not just container 1/1 'running'. coolwsd takes ~2min to boot (pre-reads 1300+ l10n files + RSA keygen); @@ -49,21 +91,21 @@ def _seed(domain, value): assert _psql(domain, "SELECT v FROM ci_marker;") == value -def pre_upgrade(domain, meta): +def pre_upgrade(ctx): # Gate the chaos redeploy on a fully-ready collabora (else it kills a still-booting coolwsd and # abra aborts the upgrade deploy — Q3.2a run 1). Then seed the data-integrity marker. - _wait_collabora_ready(domain) - _seed(domain, "upgrade-survives") + _wait_collabora_ready(ctx.domain) + _seed(ctx.domain, "upgrade-survives") -def pre_backup(domain, meta): - _seed(domain, "original") +def pre_backup(ctx): + _seed(ctx.domain, "original") -def pre_restore(domain, meta): +def pre_restore(ctx): # drop the marker table (diverge from the backup) so a successful restore is observable - _psql(domain, "DROP TABLE ci_marker;") - assert _psql(domain, "SELECT to_regclass('public.ci_marker');") in ( + _psql(ctx.domain, "DROP TABLE ci_marker;") + assert _psql(ctx.domain, "SELECT to_regclass('public.ci_marker');") in ( "", "NULL", ), "drop did not take" diff --git a/tests/lasuite-drive/recipe_meta.py b/tests/lasuite-drive/recipe_meta.py index 615a51c..d444953 100644 --- a/tests/lasuite-drive/recipe_meta.py +++ b/tests/lasuite-drive/recipe_meta.py @@ -18,34 +18,31 @@ DEPLOY_TIMEOUT = 1800 HTTP_TIMEOUT = 900 # Base deploy/lifecycle proven cold-green @2026-05-28 (install: pass; 12 services incl. -# onlyoffice+collabora) once the Docker Hub rate limit was fixed. The keycloak SSO dep is now -# enabled: declaring DEPS triggers the orchestrator's setup_custom_tests step (deploy keycloak + -# provision realm/client/user + run tests/lasuite-drive/setup_custom_tests.sh to wire OIDC env + -# in-place redeploy). functional/test_oidc_with_keycloak.py then exercises the SSO flow. +# onlyoffice+collabora) once the Docker Hub rate limit was fixed. Declaring DEPS makes the +# orchestrator provision keycloak (realm/client/user) BEFORE the single deploy; +# functional/test_oidc_with_keycloak.py then exercises the SSO flow. DEPS = ["keycloak"] -# Q3.2a (plan-lasuite-drive-oidc-robustness.md Part A): wire OIDC at INSTALL time, not via a -# post-deploy in-place `--chaos` redeploy. The orchestrator provisions the per-run realm on the -# live-warm keycloak BEFORE the single `abra app deploy`, and tests/lasuite-drive/install_steps.sh -# writes the OIDC env + client secret into the .env that one deploy reads. This eliminates the flaky -# 12-service reconverge (collabora WOPI-discovery race; JOURNAL Step 0). Drive boots fine with OIDC -# env set because keycloak is live-warm (discovery reachable at boot). setup_custom_tests.sh now -# only triggers the post-deploy MinIO bucket one-shot. -OIDC_AT_INSTALL = True +# OIDC is wired at INSTALL time (the only deps mode since rcust P2b; Q3.2a pioneered it here): +# the orchestrator provisions the per-run realm on the live-warm keycloak BEFORE the single +# `abra app deploy`, and tests/lasuite-drive/install_steps.sh writes the OIDC env + client secret +# into the .env that one deploy reads. No post-deploy reconverge (the flaky 12-service collabora +# WOPI race is structurally gone). The post-deploy MinIO bucket one-shot lives in ops.py +# pre_install (the former setup_custom_tests.sh, deleted in P2b). -def READY_PROBE(domain): +def READY_PROBE(ctx): """Readiness signals beyond replica-convergence + the app HEALTH_PATH (Q3.2/F2-12). collabora's coolwsd reports its container 1/1 'running' while still doing jail/config init, and its WOPI discovery endpoint 404s until ready — so the harness waits for `/hosting/discovery` → 200 on the collabora sibling host after the install deploy AND after the upgrade chaos redeploy. This is what makes the heavy prev→PR-head crossover reliably green (the new collabora 25.04.9.x finishes init within swarm's healthcheck retries; abra's own converge monitor was too impatient — F2-12).""" - label, _, rest = domain.partition(".") - return [{"host": f"collabora-{domain}", "path": "/hosting/discovery", "ok": (200,)}] + label, _, rest = ctx.domain.partition(".") + return [{"host": f"collabora-{ctx.domain}", "path": "/hosting/discovery", "ok": (200,)}] -def EXTRA_ENV(domain): +def EXTRA_ENV(ctx): # Two of lasuite-drive's services route on DOMAIN-DERIVED **nested** subdomains — # `MINIO_DOMAIN="minio.${DOMAIN}"` and `COLLABORA_DOMAIN="collabora.${DOMAIN}"`. The cc-ci # wildcard TLS cert is `*.ci.commoninternet.net` (single label only), so a 2-label name like @@ -55,8 +52,8 @@ def EXTRA_ENV(domain): # no cert/gateway change. See DECISIONS.md "Phase 2 — nested DOMAIN-derived subdomains". # `AWS_S3_DOMAIN_REPLACE` derives from MINIO_DOMAIN in-compose, so setting MINIO_DOMAIN is enough. return { - "MINIO_DOMAIN": f"minio-{domain}", - "COLLABORA_DOMAIN": f"collabora-{domain}", + "MINIO_DOMAIN": f"minio-{ctx.domain}", + "COLLABORA_DOMAIN": f"collabora-{ctx.domain}", # abra's internal per-deploy convergence timeout (recipe TIMEOUT env, default 300s) is too # short for this 12-service stack on a cold image cache (impress frontend/backend, minio, # postgres, redis, collabora ~1GB, onlyoffice ~2GB). Bump so abra waits long enough for diff --git a/tests/lasuite-drive/setup_custom_tests.sh b/tests/lasuite-drive/setup_custom_tests.sh deleted file mode 100755 index 65d84f2..0000000 --- a/tests/lasuite-drive/setup_custom_tests.sh +++ /dev/null @@ -1,39 +0,0 @@ -#!/usr/bin/env bash -# lasuite-drive — POST-DEPLOY setup hook (Phase 2 Q3.2a). -# -# As of Q3.2a (plan-lasuite-drive-oidc-robustness.md Part A) OIDC is wired at INSTALL time by -# tests/lasuite-drive/install_steps.sh (before the single `abra app deploy`), so this hook NO LONGER -# does any OIDC env wiring or in-place redeploy — that eliminated the flaky 12-service reconverge -# (collabora WOPI race; see JOURNAL Step 0). What remains here is the ONE post-deploy step that -# genuinely needs the live stack: triggering the MinIO bucket-creation one-shot. The orchestrator -# runs this only on the install-time path AFTER the deploy is healthy (deps already provisioned). -# -# Env supplied by the orchestrator: -# CCCI_APP_DOMAIN — the running per-run lasuite-drive app domain -# CCCI_DEPS_FILE — JSON deps creds dict (unused here now; OIDC handled at install) -set -euo pipefail - -: "${CCCI_APP_DOMAIN:?missing}" - -# The deploy alone does NOT create the MinIO bucket — `minio-createbuckets` is a `replicas:0` -# one-shot (restart_policy: none) that must be triggered. The MinIO storage test asserts the bucket -# exists, so create it here. `--detach` is REQUIRED: the job creates the bucket then EXITS 0, so it -# never holds a steady 1/1 replica; a blocking `docker service scale ...=1` would wait forever and -# hang the run. With `--detach` the scale just submits the one-run and returns; the poll loop below -# confirms the bucket was actually created. -STACK=$(printf '%s' "$CCCI_APP_DOMAIN" | tr '.' '_') -echo " setup: creating MinIO bucket via the minio-createbuckets one-shot (scale 0->1)" -docker service scale --detach "${STACK}_minio-createbuckets=1" >/dev/null 2>&1 || true -# Wait up to 90s for the one-shot to create the bucket (mc mb drive/drive-media-storage; exit 0). -# Poll by checking the bucket directly from the running minio replica container. -for i in $(seq 1 30); do - MC_CID=$(docker ps -q -f "name=${STACK}_minio.1" | head -1) - if [ -n "$MC_CID" ] && docker exec "$MC_CID" sh -c \ - 'mc alias set _c http://localhost:9000 "$(cat /run/secrets/minio_ru)" "$(cat /run/secrets/minio_rp)" >/dev/null 2>&1 && mc ls _c/drive-media-storage >/dev/null 2>&1'; then - echo " setup: bucket drive-media-storage present after ${i} poll(s)" - break - fi - sleep 3 -done - -echo " lasuite-drive setup_custom_tests: post-deploy MinIO bucket step complete (OIDC wired at install)" diff --git a/tests/lasuite-meet/functional/test_meeting_flow.py b/tests/lasuite-meet/functional/test_meeting_flow.py index 333f6a4..4e72c4b 100644 --- a/tests/lasuite-meet/functional/test_meeting_flow.py +++ b/tests/lasuite-meet/functional/test_meeting_flow.py @@ -36,8 +36,8 @@ def _b64url(seg: str) -> bytes: return base64.urlsafe_b64decode(seg + "=" * ((4 - len(seg) % 4) % 4)) -def _creds(deps_creds: dict) -> dict: - kc = deps_creds["keycloak"] +def _creds(deps: dict) -> dict: + kc = deps["keycloak"] return { "provider": "keycloak", "provider_domain": kc["domain"], @@ -55,10 +55,10 @@ def _creds(deps_creds: dict) -> dict: @pytest.mark.requires_deps -def test_create_room_get_livekit_token_and_read_back(live_app, deps_creds): - assert "keycloak" in deps_creds, f"keycloak creds missing; got {list(deps_creds.keys())}" +def test_create_room_get_livekit_token_and_read_back(live_app, deps): + assert "keycloak" in deps, f"keycloak creds missing; got {list(deps.keys())}" base = f"https://{live_app}" - token = sso.oidc_password_grant(_creds(deps_creds)) + token = sso.oidc_password_grant(_creds(deps)) assert isinstance(token, str) and token.count(".") == 2, "OIDC access token is not a JWT" auth = {"Authorization": f"Bearer {token}"} diff --git a/tests/lasuite-meet/functional/test_oidc_with_keycloak.py b/tests/lasuite-meet/functional/test_oidc_with_keycloak.py index 3335d2c..bac033d 100644 --- a/tests/lasuite-meet/functional/test_oidc_with_keycloak.py +++ b/tests/lasuite-meet/functional/test_oidc_with_keycloak.py @@ -3,12 +3,12 @@ Meet (La Suite Meet) is OIDC-required: login is gated by an external OpenID Connect provider. Mirrors the proven lasuite-docs SSO model: - The orchestrator deploys a per-run keycloak dep AFTER the generic tiers and provisions a fresh - realm/client/user via `harness.sso.setup_keycloak_realm`; `setup_custom_tests.sh` then wires the + realm/client/user via `harness.sso.setup_keycloak_realm`; `install_steps.sh` then wires the OIDC env + client secret into the running drive app and redeploys. Creds land in `$CCCI_DEPS_FILE` - (read here via the `deps_creds` fixture). + (read here via the `deps` fixture). - This test consumes those creds and exercises the real OIDC flow against the dep keycloak: discovery endpoint advertises the realm, and a password grant yields a valid JWT with the expected claims. -- Marked `@pytest.mark.requires_deps` so if setup_custom_tests failed the test SKIPs with a clear +- Marked `@pytest.mark.requires_deps` so if dep provisioning failed the test SKIPs with a clear `deps-not-ready` reason — and (per F2-11) the orchestrator then fails the run rather than going green on a skipped SSO test. @@ -36,13 +36,13 @@ def _b64url_decode(seg: str) -> bytes: @pytest.mark.requires_deps -def test_oidc_password_grant_against_dep_keycloak(live_app, deps_creds): +def test_oidc_password_grant_against_dep_keycloak(live_app, deps): """The dep keycloak issues a JWT for the pre-provisioned test user via OIDC password grant.""" - assert "keycloak" in deps_creds, ( - f"keycloak creds not in deps_creds; got {list(deps_creds.keys())}. " - "setup_custom_tests should have populated this." + assert "keycloak" in deps, ( + f"keycloak creds not in deps; got {list(deps.keys())}. " + "dep provisioning should have populated this." ) - kc = deps_creds["keycloak"] + kc = deps["keycloak"] # Creds shape. WC1: realm is per-run namespaced "-<6hex>"; client_id stays the parent. assert kc["domain"] diff --git a/tests/lasuite-meet/install_steps.sh b/tests/lasuite-meet/install_steps.sh index 8d310eb..3ea39e1 100755 --- a/tests/lasuite-meet/install_steps.sh +++ b/tests/lasuite-meet/install_steps.sh @@ -4,7 +4,8 @@ # Runs during the install tier AFTER `abra app new` + EXTRA_ENV + `abra app secret generate`, and # BEFORE the single `abra app deploy` (lifecycle.py::_run_install_steps). Writing OIDC env + the real # client secret HERE means the recipe deploys ONCE with OIDC already wired — no post-deploy reconverge -# (OIDC_AT_INSTALL). The orchestrator provisions the per-run realm/client on the live-warm keycloak +# (install-time deps wiring — the only mode since rcust P2b). The orchestrator provisions the +# per-run realm/client on the live-warm keycloak # BEFORE this hook and writes $CCCI_DEPS_FILE (the recipe→creds dict). # # Meet's OIDC is REQUIRED (recipe README). Same La Suite/impress env contract as drive, with meet's diff --git a/tests/lasuite-meet/ops.py b/tests/lasuite-meet/ops.py index d5e5627..5e410ae 100644 --- a/tests/lasuite-meet/ops.py +++ b/tests/lasuite-meet/ops.py @@ -27,18 +27,18 @@ def _seed(domain, value): assert _psql(domain, "SELECT v FROM ci_marker;") == value -def pre_upgrade(domain, meta): - _seed(domain, "upgrade-survives") +def pre_upgrade(ctx): + _seed(ctx.domain, "upgrade-survives") -def pre_backup(domain, meta): - _seed(domain, "original") +def pre_backup(ctx): + _seed(ctx.domain, "original") -def pre_restore(domain, meta): +def pre_restore(ctx): # drop the marker table (diverge from the backup) so a successful restore is observable - _psql(domain, "DROP TABLE ci_marker;") - assert _psql(domain, "SELECT to_regclass('public.ci_marker');") in ( + _psql(ctx.domain, "DROP TABLE ci_marker;") + assert _psql(ctx.domain, "SELECT to_regclass('public.ci_marker');") in ( "", "NULL", ), "drop did not take" diff --git a/tests/lasuite-meet/recipe_meta.py b/tests/lasuite-meet/recipe_meta.py index b921e6b..32998f3 100644 --- a/tests/lasuite-meet/recipe_meta.py +++ b/tests/lasuite-meet/recipe_meta.py @@ -13,16 +13,15 @@ HEALTH_OK = (200, 301, 302) DEPLOY_TIMEOUT = 1200 HTTP_TIMEOUT = 600 -# SSO-dependent (recipe.toml requires=["keycloak"], [sso] provider=keycloak). Wire OIDC at INSTALL -# time against the live-warm keycloak — same machinery as lasuite-drive (Q3.2a): the orchestrator -# provisions the per-run realm BEFORE the single `abra app deploy`, and tests/lasuite-meet/ -# install_steps.sh writes the OIDC env + client secret into that one deploy (no post-deploy -# reconverge). Meet boots fine with OIDC env set because keycloak is live-warm. +# SSO-dependent (recipe.toml requires=["keycloak"], [sso] provider=keycloak). OIDC is wired at +# INSTALL time (the only deps mode since rcust P2b) against the live-warm keycloak: the +# orchestrator provisions the per-run realm BEFORE the single `abra app deploy`, and +# tests/lasuite-meet/install_steps.sh writes the OIDC env + client secret into that one deploy +# (no post-deploy reconverge). Meet boots fine with OIDC env set because keycloak is live-warm. DEPS = ["keycloak"] -OIDC_AT_INSTALL = True -def EXTRA_ENV(domain): +def EXTRA_ENV(ctx): # lasuite-meet routes LiveKit's WebSocket signaling on a DOMAIN-derived **nested** subdomain # `LIVEKIT_DOMAIN="livekit.${DOMAIN}"`. The cc-ci wildcard TLS cert is `*.ci.commoninternet.net` # (single label only), so a 2-label name like `livekit.lasuite-meet-pr0-abc.ci.commoninternet.net` @@ -31,7 +30,7 @@ def EXTRA_ENV(domain): # no cert/gateway change. Same fix as lasuite-drive's minio/collabora siblings (DECISIONS.md # "Phase 2 — nested DOMAIN-derived subdomains"). return { - "LIVEKIT_DOMAIN": f"livekit-{domain}", + "LIVEKIT_DOMAIN": f"livekit-{ctx.domain}", # abra's internal per-deploy convergence TIMEOUT (default 300s) is too short for this stack on # a cold image cache; bump it (kept under DEPLOY_TIMEOUT so Python never kills abra mid-wait). "TIMEOUT": "1000", diff --git a/tests/mailu/recipe_meta.py b/tests/mailu/recipe_meta.py index 21cc553..1683a90 100644 --- a/tests/mailu/recipe_meta.py +++ b/tests/mailu/recipe_meta.py @@ -21,10 +21,10 @@ DEPLOY_TIMEOUT = 900 HTTP_TIMEOUT = 600 -def EXTRA_ENV(domain): +def EXTRA_ENV(ctx): return { - "MAIL_DOMAIN": domain, - "HOSTNAMES": domain, + "MAIL_DOMAIN": ctx.domain, + "HOSTNAMES": ctx.domain, "TRAEFIK_STACK_NAME": "traefik_ci_commoninternet_net", "TLS_FLAVOR": "notls", "SITENAME": "ccci-mail", diff --git a/tests/matrix-synapse/ops.py b/tests/matrix-synapse/ops.py index a58dad7..85fccf6 100644 --- a/tests/matrix-synapse/ops.py +++ b/tests/matrix-synapse/ops.py @@ -24,18 +24,18 @@ def _seed(domain, value): assert _psql(domain, "SELECT v FROM ci_marker;") == value -def pre_upgrade(domain, meta): - _seed(domain, "upgrade-survives") +def pre_upgrade(ctx): + _seed(ctx.domain, "upgrade-survives") -def pre_backup(domain, meta): - _seed(domain, "original") +def pre_backup(ctx): + _seed(ctx.domain, "original") -def pre_restore(domain, meta): +def pre_restore(ctx): # drop the marker table (diverge from the backup) so a successful restore is observable - _psql(domain, "DROP TABLE ci_marker;") - assert _psql(domain, "SELECT to_regclass('public.ci_marker');") in ( + _psql(ctx.domain, "DROP TABLE ci_marker;") + assert _psql(ctx.domain, "SELECT to_regclass('public.ci_marker');") in ( "", "NULL", ), "drop did not take" diff --git a/tests/mattermost-lts/ops.py b/tests/mattermost-lts/ops.py index 605222d..3dcdd21 100644 --- a/tests/mattermost-lts/ops.py +++ b/tests/mattermost-lts/ops.py @@ -29,18 +29,18 @@ def _seed(domain, value): assert _psql(domain, "SELECT v FROM ci_marker;") == value -def pre_upgrade(domain, meta): - _seed(domain, "upgrade-survives") +def pre_upgrade(ctx): + _seed(ctx.domain, "upgrade-survives") -def pre_backup(domain, meta): - _seed(domain, "original") +def pre_backup(ctx): + _seed(ctx.domain, "original") -def pre_restore(domain, meta): +def pre_restore(ctx): # drop the marker table (diverge from the backup) so a successful restore is observable - _psql(domain, "DROP TABLE ci_marker;") - assert _psql(domain, "SELECT to_regclass('public.ci_marker');") in ( + _psql(ctx.domain, "DROP TABLE ci_marker;") + assert _psql(ctx.domain, "SELECT to_regclass('public.ci_marker');") in ( "", "NULL", ), "drop did not take" diff --git a/tests/mumble/functional/test_server_config_limits.py b/tests/mumble/functional/test_server_config_limits.py index c84f2aa..5f326d3 100644 --- a/tests/mumble/functional/test_server_config_limits.py +++ b/tests/mumble/functional/test_server_config_limits.py @@ -26,9 +26,9 @@ def test_configured_max_users_surfaces_in_serverconfig(live_app): assert r["server_sync"], f"ServerSync handshake did not complete — {r.get('error')}" cfg = r["server_config"] assert cfg, f"server did not send a ServerConfig message — {r!r}" - assert cfg.get("max_users") == recipe_meta.MAX_USERS, ( + assert cfg.get("max_users") == recipe_meta._MAX_USERS, ( f"ServerConfig.max_users={cfg.get('max_users')!r} does not match the configured " - f"USERS={recipe_meta.MAX_USERS} — deploy-time server-limit config did not propagate" + f"USERS={recipe_meta._MAX_USERS} — deploy-time server-limit config did not propagate" ) # allow_html defaults true in the recipe; assert it is present/boolean to prove the field set # is the real ServerConfig (not an empty/garbled decode). diff --git a/tests/mumble/functional/test_welcome_text_roundtrip.py b/tests/mumble/functional/test_welcome_text_roundtrip.py index 62c60ab..9ed4440 100644 --- a/tests/mumble/functional/test_welcome_text_roundtrip.py +++ b/tests/mumble/functional/test_welcome_text_roundtrip.py @@ -20,7 +20,7 @@ import recipe_meta # noqa: E402 def test_configured_welcome_text_surfaces_in_serversync(live_app): - marker = recipe_meta.WELCOME_TEXT_MARKER + marker = recipe_meta._WELCOME_TEXT_MARKER r = _mumble_proto.retry_handshake(attempts=12, interval=5.0) assert r["server_sync"], f"ServerSync handshake did not complete — {r.get('error')}" diff --git a/tests/mumble/ops.py b/tests/mumble/ops.py index f5b7b68..11b6fc3 100644 --- a/tests/mumble/ops.py +++ b/tests/mumble/ops.py @@ -38,16 +38,18 @@ def _seed(domain, value): assert got == value, f"seed did not commit (read back {got!r}, expected {value!r})" -def pre_upgrade(domain, meta): - _seed(domain, "upgrade-survives") +def pre_upgrade(ctx): + _seed(ctx.domain, "upgrade-survives") -def pre_backup(domain, meta): - _seed(domain, "original") +def pre_backup(ctx): + _seed(ctx.domain, "original") -def pre_restore(domain, meta): +def pre_restore(ctx): # diverge from the backup so a successful restore is observable: drop the marker table. - _sqlite(domain, "DROP TABLE IF EXISTS ci_marker;") - got = _sqlite(domain, "SELECT name FROM sqlite_master WHERE type='table' AND name='ci_marker';") + _sqlite(ctx.domain, "DROP TABLE IF EXISTS ci_marker;") + got = _sqlite( + ctx.domain, "SELECT name FROM sqlite_master WHERE type='table' AND name='ci_marker';" + ) assert got == "", f"drop did not take (sqlite_master still lists ci_marker: {got!r})" diff --git a/tests/mumble/recipe_meta.py b/tests/mumble/recipe_meta.py index 228b5b9..d453e4c 100644 --- a/tests/mumble/recipe_meta.py +++ b/tests/mumble/recipe_meta.py @@ -31,18 +31,19 @@ HEALTH_OK = (200,) DEPLOY_TIMEOUT = 900 # two images to pull (mumble-server + mumble-web) on a cold node HTTP_TIMEOUT = 300 -# A unique, stable welcome-text marker the round-trip test asserts surfaces over the protocol. -WELCOME_TEXT_MARKER = "cc-ci-mumble-welcome-7f3a9c" +# A unique, stable welcome-text marker the round-trip test asserts surfaces over the protocol +# (underscore prefix = recipe-private constant, exempt from registry validation — rcust P1). +_WELCOME_TEXT_MARKER = "cc-ci-mumble-welcome-7f3a9c" # A distinctive max-users value (not the recipe default 100) the server_config test asserts. -MAX_USERS = 42 +_MAX_USERS = 42 # BASE deploy (0.2.0): mumble-web only — NO host-ports (0.2.0 predates it). The voice-config env is # set here and persists across the upgrade so it takes effect on the latest (where the custom config # round-trip tests assert it). EXTRA_ENV = { "COMPOSE_FILE": "compose.yml:compose.mumbleweb.yml", - "WELCOME_TEXT": WELCOME_TEXT_MARKER, - "USERS": str(MAX_USERS), + "WELCOME_TEXT": _WELCOME_TEXT_MARKER, + "USERS": str(_MAX_USERS), } # UPGRADE-target deploy (latest 1.0.0+): add the NATIVE compose.host-ports.yml so 64738 is @@ -52,7 +53,7 @@ UPGRADE_EXTRA_ENV = { } -def READY_PROBE(domain): +def READY_PROBE(ctx): # The voice server on 64738 is testable on-host ONLY when compose.host-ports.yml is active — i.e. # the post-upgrade LATEST, not the minimal 0.2.0 base. Read the live COMPOSE_FILE to decide, so the # SAME probe fn is correct in both phases: the post-install probe (base, no host-ports) returns [] @@ -63,7 +64,7 @@ def READY_PROBE(domain): # backup-bot would then exec into a not-running app container -> 409). from harness import abra # lazy: recipe_meta is exec'd with `harness` importable at call time - cf = abra.env_get(domain, "COMPOSE_FILE") or "" + cf = abra.env_get(ctx.domain, "COMPOSE_FILE") or "" if "compose.host-ports.yml" in cf: return [{"tcp_host": "127.0.0.1", "tcp_port": 64738, "stable": 3}] return [] diff --git a/tests/n8n/ops.py b/tests/n8n/ops.py index b5081e2..f26d471 100644 --- a/tests/n8n/ops.py +++ b/tests/n8n/ops.py @@ -15,13 +15,13 @@ def _write(domain, val): lifecycle.exec_in_app(domain, ["sh", "-c", f"echo {val} > {MARKER}"]) -def pre_upgrade(domain, meta): - _write(domain, "upgrade-survives") +def pre_upgrade(ctx): + _write(ctx.domain, "upgrade-survives") -def pre_backup(domain, meta): - _write(domain, "original") +def pre_backup(ctx): + _write(ctx.domain, "original") -def pre_restore(domain, meta): - _write(domain, "mutated") # diverge so a successful restore is observable +def pre_restore(ctx): + _write(ctx.domain, "mutated") # diverge so a successful restore is observable diff --git a/tests/plausible/ops.py b/tests/plausible/ops.py index dde93d0..3ecbeae 100644 --- a/tests/plausible/ops.py +++ b/tests/plausible/ops.py @@ -24,17 +24,17 @@ def _seed(domain, value): assert _psql(domain, "SELECT v FROM ci_marker;") == value -def pre_upgrade(domain, meta): - _seed(domain, "upgrade-survives") +def pre_upgrade(ctx): + _seed(ctx.domain, "upgrade-survives") -def pre_backup(domain, meta): - _seed(domain, "original") +def pre_backup(ctx): + _seed(ctx.domain, "original") -def pre_restore(domain, meta): - _psql(domain, "DROP TABLE ci_marker;") - assert _psql(domain, "SELECT to_regclass('public.ci_marker');") in ( +def pre_restore(ctx): + _psql(ctx.domain, "DROP TABLE ci_marker;") + assert _psql(ctx.domain, "SELECT to_regclass('public.ci_marker');") in ( "", "NULL", ), "drop did not take" diff --git a/tests/unit/test_canonical.py b/tests/unit/test_canonical.py index d31a193..58896f4 100644 --- a/tests/unit/test_canonical.py +++ b/tests/unit/test_canonical.py @@ -13,6 +13,7 @@ import sys sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner")) from harness import canonical, warm # noqa: E402 +from harness import meta as harness_meta # noqa: E402 def test_canonical_domain(): @@ -33,11 +34,9 @@ def test_is_enrolled_reads_flag(tmp_path, monkeypatch): tests_dir = tmp_path / "tests" / recipe tests_dir.mkdir(parents=True) (tests_dir / "recipe_meta.py").write_text("WARM_CANONICAL = True\n") - # canonical.is_enrolled builds the path from canonical.__file__/../../tests/; emulate by - # creating the layout under a fake harness dir and pointing __file__ there. - fake_harness = tmp_path / "runner" / "harness" - fake_harness.mkdir(parents=True) - monkeypatch.setattr(canonical, "__file__", str(fake_harness / "canonical.py")) + # is_enrolled reads through the single meta loader (rcust P1); point its tests/ root at the + # temp layout. + monkeypatch.setattr(harness_meta, "TESTS_DIR", str(tmp_path / "tests")) assert canonical.is_enrolled(recipe) is True (tests_dir / "recipe_meta.py").write_text("WARM_CANONICAL = False\n") assert canonical.is_enrolled(recipe) is False @@ -65,9 +64,7 @@ def test_registry_roundtrip(tmp_path, monkeypatch): def test_enrolled_recipes_scans_meta(tmp_path, monkeypatch): # enrolled_recipes() lists recipes whose tests//recipe_meta.py sets WARM_CANONICAL=True. - fake_harness = tmp_path / "runner" / "harness" - fake_harness.mkdir(parents=True) - monkeypatch.setattr(canonical, "__file__", str(fake_harness / "canonical.py")) + monkeypatch.setattr(harness_meta, "TESTS_DIR", str(tmp_path / "tests")) for name, body in ( ("aaa", "WARM_CANONICAL = True\n"), ("bbb", "DEPS=['x']\n"), diff --git a/tests/unit/test_conftest_fixtures.py b/tests/unit/test_conftest_fixtures.py new file mode 100644 index 0000000..3a3aed9 --- /dev/null +++ b/tests/unit/test_conftest_fixtures.py @@ -0,0 +1,48 @@ +"""Unit tests for the shared conftest fixtures added/reshaped by the rcust restructure (P2d/P4): +`op_state` (run-scoped op context from $CCCI_OP_STATE_FILE) and `deps` (consolidated dep creds +with attribute sugar). Pure — exercised via request.getfixturevalue with env monkeypatched.""" + +from __future__ import annotations + +import json + +import pytest + + +def test_op_state_fixture_reads_file(tmp_path, monkeypatch, request): + f = tmp_path / "op.json" + f.write_text(json.dumps({"backup": {"snapshot_id": "abc123"}, "upgrade": {"head_ref": "h"}})) + monkeypatch.setenv("CCCI_OP_STATE_FILE", str(f)) + st = request.getfixturevalue("op_state") + assert st["backup"]["snapshot_id"] == "abc123" + assert st["upgrade"]["head_ref"] == "h" + + +def test_op_state_fixture_skips_without_env(monkeypatch, request): + monkeypatch.delenv("CCCI_OP_STATE_FILE", raising=False) + with pytest.raises(pytest.skip.Exception, match="orchestrator"): + request.getfixturevalue("op_state") + + +def test_op_state_fixture_skips_on_missing_file(tmp_path, monkeypatch, request): + monkeypatch.setenv("CCCI_OP_STATE_FILE", str(tmp_path / "nope.json")) + with pytest.raises(pytest.skip.Exception, match="missing"): + request.getfixturevalue("op_state") + + +def test_deps_fixture_entries_expose_attributes(tmp_path, monkeypatch, request): + """`deps` (session-scoped) coerces the run deps file into entries with .domain/.realm/... + attribute sugar while keeping dict-style access (rcust P2d). Single test for the session- + cached fixture (one instantiation).""" + f = tmp_path / "deps.json" + f.write_text( + json.dumps( + {"keycloak": {"recipe": "keycloak", "domain": "kc.x", "client_secret": "s3cret"}} + ) + ) + monkeypatch.setenv("CCCI_DEPS_FILE", str(f)) + deps = request.getfixturevalue("deps") + assert deps["keycloak"].domain == "kc.x" + assert deps["keycloak"]["client_secret"] == "s3cret" + with pytest.raises(AttributeError): + _ = deps["keycloak"].not_a_field diff --git a/tests/unit/test_deps.py b/tests/unit/test_deps.py index d160d2b..1a99a7e 100644 --- a/tests/unit/test_deps.py +++ b/tests/unit/test_deps.py @@ -1,9 +1,9 @@ """Unit tests for runner/harness/deps.py (Phase 2 §4.2 / Q2.3). -Pure-Python: no real deploys. Tests the declarative parts of the dep resolver — declared_deps -reading from `tests//recipe_meta.py`, the per-dep domain derivation, and write/load of the -run state file. The deploy_deps + teardown_deps integration is exercised by real e2e against cc-ci -(Q2.4 acceptance). +Pure-Python: no real deploys. Tests the declarative parts of the dep resolver — DEPS declaration +(read through the single meta loader since rcust P1), the per-dep domain derivation, and write/load +of the run state file. The deploy_deps + teardown_deps integration is exercised by real e2e against +cc-ci (Q2.4 acceptance). """ from __future__ import annotations @@ -13,42 +13,23 @@ import sys sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner")) from harness import deps # noqa: E402 +from harness import meta as meta_mod # noqa: E402 -def test_declared_deps_returns_empty_for_no_meta(monkeypatch, tmp_path): - """A recipe with no recipe_meta.py returns [].""" - fake_recipe = "ccci-no-meta" - # No file at tests//recipe_meta.py -> declared_deps reads nothing -> [] - monkeypatch.chdir(tmp_path) - assert deps.declared_deps(fake_recipe) == [] +def test_declared_deps_empty_for_no_meta(monkeypatch, tmp_path): + """A recipe with no recipe_meta.py declares no deps (rcust P1: DEPS via meta.load).""" + monkeypatch.setattr(meta_mod, "TESTS_DIR", str(tmp_path / "tests")) + assert meta_mod.load("ccci-no-meta").DEPS == [] def test_declared_deps_reads_DEPS_list(tmp_path, monkeypatch): - """A recipe_meta.py with `DEPS = [...]` returns the list.""" - fake_recipe = "ccci-with-deps" - # Build a fake repo layout under tmp_path - recipe_dir = tmp_path / "tests" / fake_recipe + """A recipe_meta.py with `DEPS = [...]` surfaces the list on the loaded meta (the orchestrator + reads meta.DEPS — the successor of the deleted deps.declared_deps loader).""" + recipe_dir = tmp_path / "tests" / "ccci-with-deps" recipe_dir.mkdir(parents=True) (recipe_dir / "recipe_meta.py").write_text('HEALTH_PATH = "/"\nDEPS = ["keycloak", "redis"]\n') - # Patch the deps module's idea of "where the repo is" by monkey-patching __file__ for the - # function indirectly: declared_deps uses `os.path.dirname(__file__), "..", "..", "tests"` — - # which resolves to the real repo's `tests/`. So instead, override that with a symlink/dir - # under tmp_path: deps.__file__ points at the runner module. We can't easily relocate that. - # Instead, mock the path by writing the fake recipe under the REAL tests/ dir. - real_tests = os.path.join(os.path.dirname(deps.__file__), "..", "..", "tests") - target_dir = os.path.join(real_tests, fake_recipe) - os.makedirs(target_dir, exist_ok=True) - target_meta = os.path.join(target_dir, "recipe_meta.py") - try: - with open(target_meta, "w") as f: - f.write('DEPS = ["keycloak", "redis"]\n') - result = deps.declared_deps(fake_recipe) - assert result == ["keycloak", "redis"] - finally: - if os.path.exists(target_meta): - os.remove(target_meta) - if os.path.isdir(target_dir): - os.rmdir(target_dir) + monkeypatch.setattr(meta_mod, "TESTS_DIR", str(tmp_path / "tests")) + assert meta_mod.load("ccci-with-deps").DEPS == ["keycloak", "redis"] def test_dep_domain_distinct_per_dep(): diff --git a/tests/unit/test_discovery.py b/tests/unit/test_discovery.py index 68f881a..e170f86 100644 --- a/tests/unit/test_discovery.py +++ b/tests/unit/test_discovery.py @@ -71,17 +71,18 @@ def test_repo_local_wins_when_approved(tmp_path): def test_custom_tests_repo_local_gated(tmp_path, monkeypatch): - # non-lifecycle test_*.py from repo-local only count for approved recipes; lifecycle names excluded + # custom test_*.py from repo-local only count for approved recipes (HC2); placement rule + # (rcust P4): custom tests live under functional/ (or playwright/) — top-level files are + # lifecycle overlays only, so the repo-local custom here sits in functional/. # Use a synthetic recipe name + monkeypatched cc_ci_dir so this is independent of what - # tests// ships (Phase-2 custom-html now also ships functional/ + playwright/, - # which would legitimately appear in custom_tests for "custom-html" — F2-1). + # tests// ships (F2-1). fake_recipe = "ccci-hc2-fixture" monkeypatch.setattr(discovery, "cc_ci_dir", lambda r: str(tmp_path / "cc-ci" / r)) (tmp_path / "cc-ci" / fake_recipe).mkdir(parents=True) rl = tmp_path / "repo" - rl.mkdir() - (rl / "test_sso.py").write_text("# repo-local custom\n") - (rl / "test_install.py").write_text("# lifecycle name -> excluded from custom\n") + (rl / "functional").mkdir(parents=True) + (rl / "functional" / "test_sso.py").write_text("# repo-local custom\n") + (rl / "functional" / "test_install.py").write_text("# lifecycle name -> excluded from custom\n") _approve(tmp_path) # not approved -> repo-local custom ignored assert discovery.custom_tests(fake_recipe, str(rl)) == [] diff --git a/tests/unit/test_discovery_phase2.py b/tests/unit/test_discovery_phase2.py index 8c0e06f..882b4e6 100644 --- a/tests/unit/test_discovery_phase2.py +++ b/tests/unit/test_discovery_phase2.py @@ -1,6 +1,6 @@ """Unit tests for Phase-2 discovery additions (plan §4.1). -Proves the `custom_tests` discovery recurses into the per-recipe `functional/` + `playwright/` +Proves the `custom_tests` discovery covers exactly the per-recipe `functional/` + `playwright/` subdirs as well as the top-level dir, while still excluding lifecycle `test_.py` names and honouring the HC2 repo-local approval gate. @@ -27,16 +27,16 @@ def teardown_function(): os.environ.pop("CCCI_REPO_LOCAL_APPROVED_FILE", None) -def test_custom_tests_recurses_functional_and_playwright(tmp_path, monkeypatch): - """A Phase-2 cc-ci recipe layout: functional/test_*.py + playwright/test_*.py + top-level - test_*.py — all are discovered as custom tests; the lifecycle names are excluded.""" +def test_custom_tests_placement_rule_functional_playwright_only(tmp_path, monkeypatch): + """Placement rule (rcust P4): custom tests are discovered ONLY under functional/ + + playwright/. A top-level non-lifecycle test_*.py is NOT discovered (top level is reserved + for lifecycle overlays); lifecycle names inside the subdirs stay excluded (defensive).""" # Point cc-ci's per-recipe dir at a fake recipe in tmp_path fake_recipe = "ccci-phase2-fixture" fake_dir = tmp_path / "tests" / fake_recipe (fake_dir / "functional").mkdir(parents=True) (fake_dir / "playwright").mkdir() - # legitimate custom tests at multiple levels - (fake_dir / "test_sso_smoke.py").write_text("# top-level cross-cutting\n") + (fake_dir / "test_sso_smoke.py").write_text("# top-level — NOT discovered since P4\n") (fake_dir / "functional" / "test_health_check.py").write_text("# parity port\n") (fake_dir / "functional" / "test_content_roundtrip.py").write_text("# recipe-specific\n") (fake_dir / "playwright" / "test_login_flow.py").write_text("# UI flow\n") @@ -49,11 +49,11 @@ def test_custom_tests_recurses_functional_and_playwright(tmp_path, monkeypatch): customs = discovery.custom_tests(fake_recipe, None) names = sorted((src, os.path.basename(p)) for src, p in customs) - # Top-level + functional/ + playwright/ all discovered; lifecycle name excluded - assert ("cc-ci", "test_sso_smoke.py") in names + # functional/ + playwright/ discovered; top-level custom + lifecycle name are NOT assert ("cc-ci", "test_health_check.py") in names assert ("cc-ci", "test_content_roundtrip.py") in names assert ("cc-ci", "test_login_flow.py") in names + assert ("cc-ci", "test_sso_smoke.py") not in names assert ("cc-ci", "test_install.py") not in names diff --git a/tests/unit/test_f211_sso_skip.py b/tests/unit/test_f211_sso_skip.py index 4df8d72..bca4297 100644 --- a/tests/unit/test_f211_sso_skip.py +++ b/tests/unit/test_f211_sso_skip.py @@ -30,7 +30,7 @@ def test_sso_dep_unverified_true_when_declared_notready_and_skipped(): def test_sso_dep_unverified_false_when_deps_ready(): - """deps ready (setup_custom_tests succeeded) → SSO tests actually ran → not a failure.""" + """deps ready (dep provisioning succeeded) → SSO tests actually ran → not a failure.""" assert not run_recipe_ci.sso_dep_unverified( ["keycloak"], deps_ready=True, requires_deps_skipped=0 ) diff --git a/tests/unit/test_f212_upgrade_convergence.py b/tests/unit/test_f212_upgrade_convergence.py index 5ca9d88..9948f72 100644 --- a/tests/unit/test_f212_upgrade_convergence.py +++ b/tests/unit/test_f212_upgrade_convergence.py @@ -14,6 +14,7 @@ So `-c` + owned-wait is non-vacuous: a genuinely-broken upgrade stays RED. from __future__ import annotations +import dataclasses import os import sys @@ -21,6 +22,7 @@ import pytest sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner")) from harness import lifecycle as lc # noqa: E402 +from harness import meta as harness_meta # noqa: E402 def _fake_clock(monkeypatch): @@ -31,11 +33,15 @@ def _fake_clock(monkeypatch): return state -_DRIVE_META = { - "READY_PROBE": lambda d: [ - {"host": f"collabora-{d}", "path": "/hosting/discovery", "ok": (200,)} - ] -} +# RecipeMeta (rcust P1: wait_ready_probes reads meta.READY_PROBE off the loaded object); defaults +# + the drive-style probe hook (P3 ctx signature: the probe receives a HookCtx). +_DRIVE_META = dataclasses.replace( + harness_meta.load("ccci-no-such-recipe"), + READY_PROBE=lambda ctx: [ + {"host": f"collabora-{ctx.domain}", "path": "/hosting/discovery", "ok": (200,)} + ], +) +_NO_PROBE_META = harness_meta.load("ccci-no-such-recipe") def test_wait_ready_probes_raises_when_never_ready(monkeypatch): @@ -57,7 +63,7 @@ def test_wait_ready_probes_returns_when_ready(monkeypatch): def test_wait_ready_probes_noop_without_probe(monkeypatch): """A recipe with no READY_PROBE is a clean no-op (default behavior preserved for all recipes).""" monkeypatch.setattr(lc, "http_get", lambda *a, **k: 599) # would fail if it were consulted - lc.wait_ready_probes({}, "x.ci.commoninternet.net", timeout=1) # no raise, no call + lc.wait_ready_probes(_NO_PROBE_META, "x.ci.commoninternet.net", timeout=1) # no raise, no call def test_wait_healthy_raises_when_services_never_converge(monkeypatch): diff --git a/tests/unit/test_manifest.py b/tests/unit/test_manifest.py new file mode 100644 index 0000000..e415c5e --- /dev/null +++ b/tests/unit/test_manifest.py @@ -0,0 +1,177 @@ +"""Unit tests for the customization manifest (rcust P5; spec §8 R4 mitigation). + +The manifest is PURE PRESENTATION (must never influence a verdict); these tests pin that it is +COMPLETE (every customization surface a synthetic recipe exercises shows up), DETERMINISTIC +(same inputs -> byte-identical JSON), serializable, and HC2-honoring (unapproved repo-local +contributions are invisible). Pure / tmp-file only. Run cold: + cc-ci-run -m pytest tests/unit/test_manifest.py -q +""" + +from __future__ import annotations + +import json +import os +import sys + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner")) +from harness import discovery, manifest # noqa: E402 +from harness import meta as meta_mod # noqa: E402 + +RECIPE = "ccci-manifest-fixture" + + +def _mk_synthetic(tmp_path, monkeypatch, approved=True): + """A synthetic recipe dir exercising EVERY manifest surface, plus a repo-local tests dir. + + cc-ci side: meta (2 data keys + 1 hook key non-default), ops.py (2 pre-ops), install_steps.sh, + compose.ccci.yml, test_backup.py overlay, 2 functional + 1 playwright custom tests. + repo-local side: test_restore.py overlay + 1 functional custom test (visible iff approved, HC2). + """ + ccci_root = tmp_path / "cc-ci-tests" + d = ccci_root / RECIPE + (d / "functional").mkdir(parents=True) + (d / "playwright").mkdir() + (d / "recipe_meta.py").write_text( + "HTTP_TIMEOUT = 600\n" + "DEPS = ['keycloak']\n" + "def EXTRA_ENV(ctx):\n return {}\n" + "_PRIVATE = 'exempt'\n" + ) + (d / "ops.py").write_text("def pre_upgrade(ctx):\n pass\n\ndef pre_backup(ctx):\n pass\n") + (d / "install_steps.sh").write_text("#!/usr/bin/env bash\n") + (d / "compose.ccci.yml").write_text("version: '3.8'\n") + (d / "test_backup.py").write_text("# lifecycle overlay\n") + (d / "functional" / "test_a.py").write_text("# custom\n") + (d / "functional" / "test_b.py").write_text("# custom\n") + (d / "playwright" / "test_ui.py").write_text("# custom\n") + + rl = tmp_path / "repo-local" + (rl / "functional").mkdir(parents=True) + (rl / "functional" / "test_c.py").write_text("# repo-local custom\n") + (rl / "test_restore.py").write_text("# repo-local lifecycle overlay\n") + + monkeypatch.setattr(discovery, "cc_ci_dir", lambda r: str(ccci_root / r)) + monkeypatch.setattr(meta_mod, "TESTS_DIR", str(ccci_root)) # compose.ccci.yml discovery + approved_file = tmp_path / "approved.txt" + approved_file.write_text(f"{RECIPE}\n" if approved else "") + monkeypatch.setenv("CCCI_REPO_LOCAL_APPROVED_FILE", str(approved_file)) + + meta = meta_mod.load(RECIPE, tests_dir=str(ccci_root)) + return meta, str(rl) + + +def test_manifest_complete(tmp_path, monkeypatch): + # Every surface the synthetic recipe customizes appears — nothing silently dropped (R4). + meta, rl = _mk_synthetic(tmp_path, monkeypatch) + m = manifest.build(RECIPE, meta, rl) + assert m["meta_non_default"] == { + "DEPS": ["keycloak"], + "EXTRA_ENV": "", + "HTTP_TIMEOUT": 600, + } + assert m["hooks"] == { + "ops.py": {"cc-ci": ["pre_backup", "pre_upgrade"]}, + "install_steps.sh": "cc-ci", + "compose.ccci.yml": "cc-ci", + } + assert m["overlays"] == {"backup": "cc-ci", "restore": "repo-local"} + assert m["custom_tests"] == { + "cc-ci": {"functional": 2, "playwright": 1}, + "repo-local": {"functional": 1}, + } + assert m["env_overrides"] == [] + + +def test_manifest_deterministic_and_serializable(tmp_path, monkeypatch): + meta, rl = _mk_synthetic(tmp_path, monkeypatch) + a = manifest.build(RECIPE, meta, rl) + b = manifest.build(RECIPE, meta, rl) + assert json.dumps(a, sort_keys=True) == json.dumps(b, sort_keys=True) + assert json.loads(json.dumps(a)) == a # round-trips: no callables/tuples leak through + + +def test_manifest_zero_config_floor(tmp_path, monkeypatch): + # A recipe with NO customization at all -> every section empty, render says so explicitly. + ccci_root = tmp_path / "cc-ci-tests" + (ccci_root / RECIPE).mkdir(parents=True) + monkeypatch.setattr(discovery, "cc_ci_dir", lambda r: str(ccci_root / r)) + monkeypatch.setattr(meta_mod, "TESTS_DIR", str(ccci_root)) + monkeypatch.setenv("CCCI_REPO_LOCAL_APPROVED_FILE", str(tmp_path / "missing.txt")) + meta = meta_mod.load(RECIPE, tests_dir=str(ccci_root)) + m = manifest.build(RECIPE, meta, None) + assert m == { + "meta_non_default": {}, + "hooks": {}, + "overlays": {}, + "custom_tests": {}, + "env_overrides": [], + } + out = manifest.render(RECIPE, m) + assert f"===== customization manifest: {RECIPE} =====" in out + assert "(none — zero-config floor)" in out + + +def test_manifest_repo_local_hc2_gate(tmp_path, monkeypatch): + # Unapproved recipe -> repo-local overlay + custom tests INVISIBLE (same default-deny as the + # discovery they ride on; the manifest must not advertise code the run will not execute). + meta, rl = _mk_synthetic(tmp_path, monkeypatch, approved=False) + m = manifest.build(RECIPE, meta, rl) + assert m["overlays"] == {"backup": "cc-ci"} # repo-local test_restore.py gone + assert "repo-local" not in m["custom_tests"] + + +def test_manifest_env_overrides_and_ci_flag(tmp_path, monkeypatch): + meta, rl = _mk_synthetic(tmp_path, monkeypatch) + monkeypatch.setenv("CCCI_SKIP_GENERIC_BACKUP", "1") + monkeypatch.setenv("CCCI_SKIP_GENERIC_UPGRADE", "0") # falsy -> not an active override + m = manifest.build(RECIPE, meta, rl) + assert m["env_overrides"] == ["CCCI_SKIP_GENERIC_BACKUP"] + monkeypatch.delenv("DRONE", raising=False) + assert "!!" not in manifest.render(RECIPE, m) # local dev: no CI warning + monkeypatch.setenv("DRONE", "true") # riding a CI run -> loud flag (P2c) + assert "!! dev-only override active in CI" in manifest.render(RECIPE, m) + + +def test_manifest_redacts_sensitive_named_values(tmp_path, monkeypatch): + # Meta values are repo-public by construction, but the manifest lands on the dashboard: + # secret-NAMED entries (top-level or nested dict keys, e.g. plausible's + # EXTRA_ENV["SECRET_KEY_BASE"] dummy) render as '' — name shown, value masked. + # Non-sensitive names (incl. KEYCLOAK_* — 'KEY' matches only as a word segment) pass through. + ccci_root = tmp_path / "cc-ci-tests" + d = ccci_root / RECIPE + d.mkdir(parents=True) + (d / "recipe_meta.py").write_text( + "EXTRA_ENV = {\n" + " 'SECRET_KEY_BASE': 'dummy-ci-constant',\n" + " 'API_KEY': 'also-dummy',\n" + " 'KEYCLOAK_URL': 'https://kc.example',\n" + "}\n" + ) + monkeypatch.setattr(discovery, "cc_ci_dir", lambda r: str(ccci_root / r)) + monkeypatch.setattr(meta_mod, "TESTS_DIR", str(ccci_root)) + monkeypatch.setenv("CCCI_REPO_LOCAL_APPROVED_FILE", str(tmp_path / "missing.txt")) + meta = meta_mod.load(RECIPE, tests_dir=str(ccci_root)) + m = manifest.build(RECIPE, meta, None) + assert m["meta_non_default"]["EXTRA_ENV"] == { + "SECRET_KEY_BASE": "", + "API_KEY": "", + "KEYCLOAK_URL": "https://kc.example", + } + out = manifest.render(RECIPE, m) + assert "dummy-ci-constant" not in out and "also-dummy" not in out + assert "SECRET_KEY_BASE" in out # the key NAME stays visible + + +def test_render_lists_every_surface(tmp_path, monkeypatch): + meta, rl = _mk_synthetic(tmp_path, monkeypatch) + out = manifest.render(RECIPE, manifest.build(RECIPE, meta, rl)) + lines = out.splitlines() + assert lines[0] == f"===== customization manifest: {RECIPE} =====" + assert "meta (non-default): DEPS=['keycloak'] EXTRA_ENV='' HTTP_TIMEOUT=600" in lines + assert ( + "hooks: ops.py[pre_backup,pre_upgrade](cc-ci) install_steps.sh(cc-ci) compose.ccci.yml(cc-ci)" + in lines + ) + assert "overlays: test_backup.py(cc-ci) test_restore.py(repo-local)" in lines + assert "custom tests: functional/=2 playwright/=1 (cc-ci) functional/=1 (repo-local)" in lines + assert "env overrides: (none)" in lines diff --git a/tests/unit/test_meta.py b/tests/unit/test_meta.py new file mode 100644 index 0000000..0ffb689 --- /dev/null +++ b/tests/unit/test_meta.py @@ -0,0 +1,276 @@ +"""Unit tests for the single recipe-meta loader + key registry (rcust P1; spec §8 R1/R6). + +Covers: every in-repo recipe_meta.py loads clean through the registry (THE typo gate), validation +hard-errors (unknown key, wrong type, callable on a data key), the zero-config baseline defaults +(spec §2), the underscore exemption for recipe-private constants, and the registry↔generated-doc +sync (P1.5; drift fails CI). Run: cc-ci-run -m pytest tests/unit/test_meta.py -q +""" + +from __future__ import annotations + +import os +import subprocess +import sys + +import pytest + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner")) +from harness import meta as meta_mod # noqa: E402 +from harness.meta import KEYS, MetaError, RecipeMeta # noqa: E402 + +ROOT = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + +def _recipes_with_meta() -> list[str]: + tests_dir = os.path.join(ROOT, "tests") + return sorted( + n + for n in os.listdir(tests_dir) + if os.path.isfile(os.path.join(tests_dir, n, "recipe_meta.py")) + ) + + +# ---- the typo gate: every in-repo recipe meta must validate against the registry -------------- + + +@pytest.mark.parametrize("recipe", _recipes_with_meta()) +def test_every_recipe_meta_loads_clean(recipe): + """All tests/*/recipe_meta.py in the repo load + validate through the registry. A typo'd or + unregistered ALL-CAPS key in any recipe meta fails HERE, at PR time — not silently at run + time (the R6 failure mode this restructure kills).""" + meta = meta_mod.load(recipe) + assert isinstance(meta, RecipeMeta) + # sanity: the 4 base keys always materialize with usable types + assert isinstance(meta.HEALTH_PATH, str) + assert isinstance(meta.HEALTH_OK, tuple) and meta.HEALTH_OK + assert isinstance(meta.DEPLOY_TIMEOUT, int) and isinstance(meta.HTTP_TIMEOUT, int) + + +# ---- zero-config baseline (spec §2) ------------------------------------------------------------ + + +def test_missing_meta_yields_spec_baseline(tmp_path): + meta = meta_mod.load("no-such-recipe", tests_dir=str(tmp_path)) + assert meta.HEALTH_PATH == "/" + assert meta.HEALTH_OK == (200, 301, 302) + assert meta.DEPLOY_TIMEOUT == 600 + assert meta.HTTP_TIMEOUT == 300 + assert meta.BACKUP_CAPABLE is None # None = auto-detect (tri-state, not False) + assert meta.EXPECTED_NA is None + assert meta.READY_PROBE is None + assert meta.UPGRADE_BASE_VERSION is None + assert meta.BACKUP_VERIFY is None + assert meta.UPGRADE_EXTRA_ENV is None + assert meta.EXTRA_ENV == {} + assert meta.DEPS == [] + assert meta.WARM_CANONICAL is False + assert meta.SCREENSHOT is None + assert meta_mod.non_default(meta) == {} + + +def test_registry_field_set_matches_dataclass(): + """The RecipeMeta field set is generated from KEYS — no drift possible, pinned anyway.""" + import dataclasses + + assert [f.name for f in dataclasses.fields(RecipeMeta)] == [k.name for k in KEYS] + # the 14 final keys, no more (the 3 P2-deleted legacy keys are gone from the registry, + # so any recipe_meta still setting them hard-fails the typo gate) + assert len(KEYS) == 14 + assert not [k for k in KEYS if k.deprecated] + for gone in ("CHAOS_BASE_DEPLOY", "OIDC_AT_INSTALL", "SKIP_GENERIC"): + assert gone not in {k.name for k in KEYS} + + +# ---- validation hard errors (locked decision: fail fast at load) ------------------------------- + + +def _write_meta(tmp_path, body: str, recipe: str = "r") -> str: + d = tmp_path / recipe + d.mkdir(exist_ok=True) + (d / "recipe_meta.py").write_text(body) + return recipe + + +def test_unknown_key_raises_with_suggestion(tmp_path): + r = _write_meta(tmp_path, "READINESS_PROBE = None\n") # the R6 typo example + with pytest.raises(MetaError) as ei: + meta_mod.load(r, tests_dir=str(tmp_path)) + msg = str(ei.value) + assert "READINESS_PROBE" in msg and "READY_PROBE" in msg # names the typo + nearest key + + +def test_unknown_key_without_near_match_lists_registry(tmp_path): + r = _write_meta(tmp_path, "TOTALLY_BOGUS_KNOB = 1\n") + with pytest.raises(MetaError) as ei: + meta_mod.load(r, tests_dir=str(tmp_path)) + assert "HEALTH_PATH" in str(ei.value) # registered keys listed for the reader + + +def test_wrong_type_raises(tmp_path): + r = _write_meta(tmp_path, 'DEPLOY_TIMEOUT = "900"\n') + with pytest.raises(MetaError, match="DEPLOY_TIMEOUT"): + meta_mod.load(r, tests_dir=str(tmp_path)) + + +def test_bool_not_accepted_as_int(tmp_path): + r = _write_meta(tmp_path, "DEPLOY_TIMEOUT = True\n") + with pytest.raises(MetaError, match="DEPLOY_TIMEOUT"): + meta_mod.load(r, tests_dir=str(tmp_path)) + + +def test_callable_on_data_key_rejected(tmp_path): + r = _write_meta(tmp_path, "def HEALTH_PATH():\n return '/'\n") + with pytest.raises(MetaError, match="hook-typed"): + meta_mod.load(r, tests_dir=str(tmp_path)) + + +def test_non_callable_on_hook_key_rejected(tmp_path): + r = _write_meta(tmp_path, "READY_PROBE = ['not', 'a', 'callable']\n") + with pytest.raises(MetaError, match="READY_PROBE"): + meta_mod.load(r, tests_dir=str(tmp_path)) + + +def test_underscore_names_are_private_and_exempt(tmp_path): + r = _write_meta( + tmp_path, + "_WELCOME_TEXT_MARKER = 'marker-xyz'\n_MAX_USERS = 42\n" + "EXTRA_ENV = {'WELCOME_TEXT': _WELCOME_TEXT_MARKER, 'USERS': str(_MAX_USERS)}\n", + ) + meta = meta_mod.load(r, tests_dir=str(tmp_path)) + assert meta.EXTRA_ENV == {"WELCOME_TEXT": "marker-xyz", "USERS": "42"} + + +def test_lowercase_helpers_ignored(tmp_path): + r = _write_meta( + tmp_path, + "def _helper(d):\n return {'K': d}\n\ndef EXTRA_ENV(ctx):\n return _helper(ctx.domain)\n", + ) + meta = meta_mod.load(r, tests_dir=str(tmp_path)) + ctx = meta_mod.hook_ctx("x.example", meta) + assert meta_mod.extra_env(meta, ctx) == {"K": "x.example"} + + +# ---- normalization + helpers -------------------------------------------------------------------- + + +def test_health_ok_list_normalized_to_tuple(tmp_path): + r = _write_meta(tmp_path, "HEALTH_OK = [200, 302]\n") + assert meta_mod.load(r, tests_dir=str(tmp_path)).HEALTH_OK == (200, 302) + + +def test_extra_env_dict_and_callable_forms(tmp_path): + r = _write_meta(tmp_path, "EXTRA_ENV = {'A': 1}\n") + meta = meta_mod.load(r, tests_dir=str(tmp_path)) + assert meta_mod.extra_env(meta, meta_mod.hook_ctx("d", meta)) == {"A": "1"} # stringified + r2 = _write_meta( + tmp_path, "UPGRADE_EXTRA_ENV = lambda ctx: {'COMPOSE_FILE': ctx.domain}\n", recipe="r2" + ) + meta2 = meta_mod.load(r2, tests_dir=str(tmp_path)) + ctx2 = meta_mod.hook_ctx("dom.x", meta2, op="upgrade") + assert meta_mod.upgrade_extra_env(meta2, ctx2) == {"COMPOSE_FILE": "dom.x"} + assert meta_mod.extra_env(meta2, ctx2) == {} # unset EXTRA_ENV resolves to {} + + +# ---- P3: uniform ctx hook convention ------------------------------------------------------------- + + +def test_hook_ctx_fields(tmp_path): + meta = meta_mod.load("no-such", tests_dir=str(tmp_path)) + ctx = meta_mod.hook_ctx("app.ci.example", meta, op="backup") + assert ctx.domain == "app.ci.example" + assert ctx.base_url == "https://app.ci.example" + assert ctx.meta is meta + assert ctx.op == "backup" + assert meta_mod.hook_ctx("d", meta).op is None + + +def test_hook_ctx_deps_from_run_file(tmp_path, monkeypatch): + import json + + meta = meta_mod.load("no-such", tests_dir=str(tmp_path)) + monkeypatch.delenv("CCCI_DEPS_FILE", raising=False) + assert meta_mod.hook_ctx("d", meta).deps is None + f = tmp_path / "deps.json" + f.write_text(json.dumps({"keycloak": {"recipe": "keycloak", "domain": "kc.x"}})) + monkeypatch.setenv("CCCI_DEPS_FILE", str(f)) + deps = meta_mod.hook_ctx("d", meta).deps + assert deps["keycloak"]["domain"] == "kc.x" + f.write_text("{}") # empty dict -> None (deps declared but not provisioned) + assert meta_mod.hook_ctx("d", meta).deps is None + + +def test_legacy_hook_signature_raises_clear_meta_error(tmp_path): + """A pre-restructure hook signature must fail AT LOAD with a migration message — never a + silent TypeError mid-run (P3.4).""" + r = _write_meta(tmp_path, "def READY_PROBE(domain):\n return []\n") + with pytest.raises(MetaError, match="ctx"): + meta_mod.load(r, tests_dir=str(tmp_path)) + r2 = _write_meta(tmp_path, "EXTRA_ENV = lambda domain: {}\n", recipe="r2") + with pytest.raises(MetaError, match="restructure"): + meta_mod.load(r2, tests_dir=str(tmp_path)) + r3 = _write_meta( + tmp_path, "def SCREENSHOT(page, domain, meta):\n return None\n", recipe="r3" + ) + with pytest.raises(MetaError, match="page, ctx"): + meta_mod.load(r3, tests_dir=str(tmp_path)) + + +def test_ctx_hook_signatures_accepted(tmp_path): + r = _write_meta( + tmp_path, + "def READY_PROBE(ctx):\n return []\n" + "def BACKUP_VERIFY(ctx):\n return True\n" + "def SCREENSHOT(page, ctx):\n return None\n" + "def EXTRA_ENV(ctx):\n return {}\n", + ) + meta = meta_mod.load(r, tests_dir=str(tmp_path)) + assert callable(meta.READY_PROBE) and callable(meta.SCREENSHOT) + + +def test_check_hook_signature_for_pre_op_hooks(): + """The orchestrator validates ops.py pre_ hooks with the same checker (legacy + (domain, meta) form names the migration).""" + + def legacy(domain, meta): + pass + + def new(ctx): + pass + + with pytest.raises(MetaError, match="ctx"): + meta_mod.check_hook_signature(legacy, ("ctx",), "tests/x/ops.py::pre_upgrade") + meta_mod.check_hook_signature(new, ("ctx",), "tests/x/ops.py::pre_upgrade") # no raise + + +def test_non_default_reports_only_customized_keys(tmp_path): + r = _write_meta(tmp_path, "DEPLOY_TIMEOUT = 1500\nDEPS = ['keycloak']\n") + nd = meta_mod.non_default(meta_mod.load(r, tests_dir=str(tmp_path))) + assert nd == {"DEPLOY_TIMEOUT": 1500, "DEPS": ["keycloak"]} + + +def test_meta_is_frozen(): + import dataclasses + + meta = meta_mod.load("custom-html") + with pytest.raises(dataclasses.FrozenInstanceError): + meta.DEPLOY_TIMEOUT = 1 + + +# ---- doc generation sync (P1.5: the committed §4 table == the registry rendering) --------------- + + +def test_generated_doc_table_in_sync(): + """docs/recipe-customization.md's key reference table is GENERATED from the registry + (scripts/gen-meta-docs.py). If this fails: re-run `python3 scripts/gen-meta-docs.py` and + commit the result — the table must never drift from the registry (R5).""" + gen = os.path.join(ROOT, "scripts", "gen-meta-docs.py") + doc = os.path.join(ROOT, "docs", "recipe-customization.md") + rendered = subprocess.run( + [sys.executable, gen, "--print"], capture_output=True, text=True, check=True + ).stdout + with open(doc) as f: + committed = f.read() + assert rendered.strip() in committed, ( + "docs/recipe-customization.md key table is out of sync with the harness.meta registry — " + "run `python3 scripts/gen-meta-docs.py` and commit" + ) diff --git a/tests/unit/test_results.py b/tests/unit/test_results.py index e8cc91d..d251ef0 100644 --- a/tests/unit/test_results.py +++ b/tests/unit/test_results.py @@ -280,6 +280,41 @@ def test_build_results_threads_expected_na(tmp_path): ) # backup_restore declared; functional passed → clean +def test_build_results_threads_customization(tmp_path): + # rcust P5: the run-start customization manifest lands verbatim under "customization"; + # omitted -> explicit None (key always present in the schema). + recs = [ + { + "tier": "install", + "source": "generic", + "file": "g/test_install.py", + "rc": 0, + "junit": _write(tmp_path, "i.xml", JUNIT_PASS), + }, + ] + cust = { + "meta_non_default": {"HTTP_TIMEOUT": 600}, + "hooks": {"install_steps.sh": "cc-ci"}, + "overlays": {}, + "custom_tests": {"cc-ci": {"functional": 2}}, + "env_overrides": [], + } + kwargs = { + "recipe": "hedgedoc", + "version": "1.2.3", + "pr": "7", + "ref": None, + "records": recs, + "results": _results(), + "backup_capable": True, + "clean_teardown": True, + "no_secret_leak": True, + "finished_ts": 0.0, + } + assert R.build_results(**kwargs, customization=cust)["customization"] == cust + assert R.build_results(**kwargs)["customization"] is None + + def test_write_results_roundtrip(tmp_path): data = {"run_id": "42", "level": 3, "stages": []} path = R.write_results(data, runs_dir_override=str(tmp_path)) diff --git a/tests/unit/test_screenshot.py b/tests/unit/test_screenshot.py index f033946..a5cc277 100644 --- a/tests/unit/test_screenshot.py +++ b/tests/unit/test_screenshot.py @@ -11,6 +11,7 @@ import os import sys sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "runner")) +from harness import meta as meta_mod # noqa: E402 from harness import screenshot as S # noqa: E402 @@ -29,3 +30,19 @@ def test_hook_returned_when_callable(): pass assert S._load_screenshot_hook({"SCREENSHOT": hook}) is hook + + +def test_screenshot_reachable_through_real_load_path(tmp_path): + """R2 proof (rcust P1): a recipe SCREENSHOT hook declared in recipe_meta.py arrives at + screenshot._load_screenshot_hook through the REAL orchestrator load path (meta.load — the + object run_recipe_ci passes to capture()). Under the old six-loader world the orchestrator's + L1 allowlist dropped SCREENSHOT, so the hook was unreachable (spec §8 R2).""" + d = tmp_path / "shotrecipe" + d.mkdir() + (d / "recipe_meta.py").write_text( + "def SCREENSHOT(page, ctx):\n return None\n", + ) + meta = meta_mod.load("shotrecipe", tests_dir=str(tmp_path)) + hook = S._load_screenshot_hook(meta) + assert callable(hook), "SCREENSHOT hook did not survive the orchestrator load path (R2)" + assert S._load_screenshot_hook(meta_mod.load("no-such", tests_dir=str(tmp_path))) is None