Files
cc-ci/runner/harness/deps.py
autonomic-bot 1adfbd70cb
Some checks failed
continuous-integration/drone/push Build is failing
fix(drone-dep): correct gitea admin create flag + dep deploy counter
Two issues found during first manual harness run:

1. gitea `--must-change-password false` (space form) leaves a pending
   password-change for the ci_admin user, blocking the OAuth2 API call.
   Fix: use `--must-change-password=false` (equals form, required by
   gitea's BoolFlag with default=true).

2. dep deploy_app() calls incremented the DG4.1 "one deploy per run"
   counter, causing a false violation when gitea dep + drone both deploy.
   Fix: lifecycle.deploy_app gains _count_deploy=True param (default
   backward-compat); deps_mod.deploy_deps passes _count_deploy=False so
   only the recipe-under-test counts toward DG4.1.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-11 21:37:45 +00:00

167 lines
7.6 KiB
Python

"""Dependency-resolver harness primitive (Phase 2 §4.2 / Q2.3).
A Phase-2 recipe may declare a set of OTHER recipes it requires to run its tests (e.g.
lasuite-docs requires keycloak as its SSO provider). The orchestrator reads the deps list,
deploys each one BEFORE the recipe-under-test, persists their per-run identity to a JSON file
the recipe's tests can read, and tears them down at the end of the run.
Per Phase-2 DECISIONS:
- Deps are declared on the cc-ci side in `tests/<recipe>/recipe_meta.py` as
`DEPS = ["keycloak", ...]` (a list of recipe names). This keeps the cc-ci surface authoritative
per plan §1.4 (cc-ci is self-contained at runtime).
- Each dep is deployed at a unique per-run domain `<dep[:4]>-<6hex>` (the same naming scheme as
the recipe under test, but the 6hex is derived from `recipe + pr + ref + dep_name` so two deps
of the same kind by different recipes never collide on a host).
- Dep deploys are SEQUENTIAL, never concurrent (per plan §4.2 — heavy deps + recipe under test
must share the single node's MAX_TESTS budget without exceeding it).
- Each dep is undeployed in the orchestrator's `finally`, in **reverse** order so a recipe-under-
test can depend on multiple deps with a dependency chain (a → b → c teardown is c → b → a).
Run state:
- `$CCCI_DEPS_FILE` — JSON file written by the orchestrator after each dep deploys; each entry is
`{"recipe": "<dep-recipe>", "domain": "<dep-domain>", "version": null}`. Tests access via the
`deps` pytest fixture defined in `tests/conftest.py`.
"""
from __future__ import annotations
import contextlib
import json
import os
from collections.abc import Iterable
from . import lifecycle, naming
from . import meta as meta_mod
def dep_domain(parent_recipe: str, pr: str, ref: str | None, dep_recipe: str) -> str:
"""Per-run domain for a dep app. Distinct from the parent's domain so two recipes' deps don't
collide. The 6hex is derived from (parent_recipe, pr, ref, dep_recipe) — stable per run, but
different for every (parent, dep) pair so deps belonging to different parents don't collide
on the same node."""
# naming.app_domain hashes (recipe, pr, ref). Bake parent_recipe + dep_recipe into the ref so
# the hash distinguishes (parent_A,dep_X) from (parent_B,dep_X). The recipe arg drives the
# <recipe[:4]> prefix — passing dep_recipe keeps the visible prefix correct (`keyc-...`).
synthetic_ref = f"{parent_recipe}|{ref or ''}|dep|{dep_recipe}"
return naming.app_domain(dep_recipe, pr, synthetic_ref)
def write_run_state(deps_state) -> None:
"""Write the deps state file ($CCCI_DEPS_FILE). Two shapes supported (canonical=keyed dict):
1. **Legacy list-of-entries:** `[{"recipe": "<dep>", "domain": "<d>"}, ...]` (Q2.3 original).
Still accepted by `load_run_state` for backwards compat — the `deps` fixture flattens.
2. **NEW per-spec dict (operator-2026-05-28 SSO-dep plan §3.2):**
`{"<dep_recipe>": {"recipe": "<dep>", "domain": "<d>", "realm": "...",
"client_id": "...", "client_secret": "...", "admin_user": "...", "admin_password": "..."}}`.
The per-recipe `install_steps.sh` hook reads this via `jq` to wire OIDC env.
No-op if `$CCCI_DEPS_FILE` isn't set."""
path = os.environ.get("CCCI_DEPS_FILE")
if not path:
return
with open(path, "w") as f:
json.dump(deps_state, f)
def deploy_deps(
parent_recipe: str,
pr: str,
ref: str | None,
deps: Iterable[str],
meta_for: dict | None = None,
) -> list[dict]:
"""Deploy each declared dep, sequentially, at its per-run domain. Returns the list of state
dicts (one per dep). `meta_for` maps dep_recipe -> RecipeMeta (HEALTH_PATH/HEALTH_OK/timeouts)
so the readiness wait uses per-dep config; a missing dep meta is loaded via meta.load()
(defaults: /, 200/301/302, 600s)."""
meta_for = meta_for or {}
state: list[dict] = []
for dep in deps:
domain = dep_domain(parent_recipe, pr, ref, dep)
print(f" dep: deploying {dep} -> {domain}", flush=True)
# Dep deploys do NOT count toward the DG4.1 "one deploy per run" invariant — that
# contract covers the recipe-under-test only; each dep is a supporting service, not the
# subject of the test. Pass _count_deploy=False so the main recipe's single-deploy
# assertion isn't distorted by the number of deps declared.
dm = meta_for.get(dep) or meta_mod.load(dep)
lifecycle.deploy_app(
dep,
domain,
secrets=True,
deploy_timeout=int(dm.DEPLOY_TIMEOUT),
meta=dm,
_count_deploy=False,
)
try:
lifecycle.wait_healthy(
domain,
ok_codes=tuple(dm.HEALTH_OK),
path=dm.HEALTH_PATH,
deploy_timeout=int(dm.DEPLOY_TIMEOUT),
http_timeout=int(dm.HTTP_TIMEOUT),
)
except Exception:
# If a dep fails to converge, abort the whole resolve — let the caller teardown
print(f" dep: {dep} ({domain}) failed readiness; tearing down", flush=True)
with contextlib.suppress(Exception):
lifecycle.teardown_app(domain, verify=False)
raise
state.append({"recipe": dep, "domain": domain})
print(f" dep: {dep} ready @ {domain}", flush=True)
write_run_state(state)
return state
def teardown_deps(state: list[dict]) -> None:
"""Undeploy each dep in reverse order. **VERIFY=True (F2-5 fix)**: per plan §9 teardown is
sacred — a dep that leaks containers/volumes/secrets corrupts the next run that uses the same
deterministic dep domain.
Failures are LOGGED LOUDLY (not silently suppressed) so a leak is visible in the run output;
we continue to teardown other deps so one failure doesn't strand the rest; after all attempts
we **raise** if any dep failed to fully teardown — the orchestrator's outer `finally` then
decides whether the leak is a run-failure (it should be, mirroring lifecycle.teardown_app's
own raise-on-residual behaviour at `verify=True`).
"""
errors: list[str] = []
for entry in reversed(state):
domain = entry.get("domain")
if not domain:
continue
recipe = entry.get("recipe", "?")
print(f" dep: tearing down {recipe} @ {domain}", flush=True)
try:
lifecycle.teardown_app(domain, verify=True)
except Exception as e: # noqa: BLE001 — every failure must be visible, but we want to try the rest first
msg = f"dep {recipe} @ {domain} teardown failed: {e}"
print(f" !! {msg}", flush=True)
errors.append(msg)
if errors:
raise lifecycle.TeardownError("dep teardown failures: " + " ; ".join(errors))
def load_run_state():
"""Read the current run's deps state. Returns the JSON content (list OR dict — both shapes
supported, see write_run_state). Returns [] if file is empty/unset."""
path = os.environ.get("CCCI_DEPS_FILE")
if not path or not os.path.exists(path):
return []
try:
with open(path) as f:
return json.load(f) or []
except (OSError, ValueError):
return []
def deps_as_dict(state) -> dict[str, dict]:
"""Coerce either shape (legacy list or new dict) into a recipe→entry dict for the `deps`
fixture + dependent-tests consumption."""
if isinstance(state, dict):
return state
out: dict[str, dict] = {}
for entry in state or []:
if isinstance(entry, dict) and entry.get("recipe"):
out[entry["recipe"]] = entry
return out