feat(harness): P1 — single registry-backed meta loader (rcust)
All checks were successful
continuous-integration/drone/push Build is passing

One loader: runner/harness/meta.py::load(recipe) -> RecipeMeta (frozen dataclass,
attribute access), backed by the declarative KEYS registry (14 final keys + 3
P2-deprecated). The ONLY exec() of tests/<recipe>/recipe_meta.py. Validation per
the locked decision: unknown ALL-CAPS top-level name or type mismatch = MetaError
(hard error at load); underscore-prefixed names recipe-private; callables only on
hook-typed keys.

Migrated all six legacy loaders (spec §4 L1–L6):
- run_recipe_ci.py::_load_meta deleted; orchestrator loads once, passes meta down
- tests/conftest.py::_recipe_meta deleted; meta fixture returns full RecipeMeta (R3)
- lifecycle.py::_recipe_extra_env/_recipe_meta_flag deleted; deploy_app takes meta
- deps.py::declared_deps deleted; callers read meta.DEPS
- canonical.py::is_enrolled reads through meta.load()
- screenshot.py now actually receives SCREENSHOT through the orchestrator path (R2
  fix; proven by unit test through the real load path)

Mumble private constants underscore-prefixed (_WELCOME_TEXT_MARKER/_MAX_USERS) +
importers fixed. New tests/unit/test_meta.py (all-recipes-load-clean typo gate,
MetaError cases, spec §2 baseline defaults, underscore exemption, doc sync). Docs
§4 key table now GENERATED from the registry (scripts/gen-meta-docs.py); drift
fails CI.

Verified on cc-ci: cc-ci-run -m pytest tests/unit -q -> 175 passed; scripts/lint.sh -> PASS.
This commit is contained in:
autonomic-bot
2026-06-10 16:46:58 +00:00
parent 49fb818c60
commit 472a68b32c
18 changed files with 740 additions and 240 deletions

View File

@ -19,6 +19,7 @@ import ssl
import time
from . import abra, lifecycle
from . import meta as meta_mod
# A recipe is backup-capable iff a compose file carries a truthy backupbot.backup label.
_BACKUPBOT_RE = re.compile(r"backupbot\.backup\b[^\n]*\btrue\b", re.IGNORECASE)
@ -28,13 +29,14 @@ def _recipe_dir(recipe: str) -> str:
return abra.recipe_dir(recipe) # the per-run tree inside a CI run ($ABRA_DIR)
def backup_capable(recipe: str, meta: dict | None = None) -> bool:
def backup_capable(recipe: str, meta=None) -> bool:
"""Whether the harness should run the backup/restore tiers (else they are a clean N/A skip, DG3).
`recipe_meta.BACKUP_CAPABLE` (bool) overrides; otherwise auto-detect by scanning the recipe's
compose*.yml for a truthy `backupbot.backup` label (the Co-op Cloud backup convention)."""
if meta and "BACKUP_CAPABLE" in meta:
return bool(meta["BACKUP_CAPABLE"])
`recipe_meta.BACKUP_CAPABLE` (bool) overrides when explicitly set (RecipeMeta default is None =
unset); otherwise auto-detect by scanning the recipe's compose*.yml for a truthy
`backupbot.backup` label (the Co-op Cloud backup convention)."""
if meta is not None and meta.BACKUP_CAPABLE is not None:
return bool(meta.BACKUP_CAPABLE)
for path in glob.glob(os.path.join(_recipe_dir(recipe), "compose*.yml")):
try:
with open(path) as fh:
@ -75,7 +77,7 @@ def served_cert(domain: str, port: int = 443) -> tuple[bool, str]:
return (True, f"CN={cn} SAN={sans}")
def assert_serving(domain: str, meta: dict) -> None:
def assert_serving(domain: str, meta) -> None:
"""The single generic "is the app really serving?" assertion (DG1).
The app-vs-Traefik-fallback proof is steps 1+2 (both load-bearing, verified by the Adversary):
@ -90,14 +92,14 @@ def assert_serving(domain: str, meta: dict) -> None:
Steps 12 are BOUNDED POLLS (no bare sleep), so a state-mutating op (upgrade/restore) that leaves
the app briefly reconverging settles, while a persistent failure still fails within the timeout."""
deadline = time.time() + meta["DEPLOY_TIMEOUT"]
deadline = time.time() + meta.DEPLOY_TIMEOUT
while time.time() < deadline and not lifecycle.services_converged(domain):
time.sleep(5)
assert lifecycle.services_converged(domain), f"{domain}: services did not converge"
path = meta["HEALTH_PATH"]
ok = tuple(meta["HEALTH_OK"])
deadline = time.time() + meta["HTTP_TIMEOUT"]
path = meta.HEALTH_PATH
ok = tuple(meta.HEALTH_OK)
deadline = time.time() + meta.HTTP_TIMEOUT
served = False
status, body = 0, ""
while time.time() < deadline:
@ -141,7 +143,7 @@ def op_state() -> dict:
return {}
def assert_upgraded(domain: str, meta: dict) -> None:
def assert_upgraded(domain: str, meta) -> None:
"""Generic UPGRADE assertion (post-op): the orchestrator already performed the upgrade once via
`abra app deploy --chaos` of the PR-head checkout. Assert it reconverged + still serves AND that
the deployment is genuinely the PR-head code under test (HC1) — non-vacuously (guarding F1d-2).
@ -212,7 +214,7 @@ def assert_backup_artifact(domain: str) -> str:
return snap_id
def assert_restore_healthy(domain: str, meta: dict) -> None:
def assert_restore_healthy(domain: str, meta) -> None:
"""Generic RESTORE assertion (post-op): the orchestrator already restored. Assert the app is
healthy + serving again (assert_serving polls, so the post-restore reconverge settles)."""
assert_serving(domain, meta)
@ -226,7 +228,7 @@ def perform_upgrade(
recipe: str,
head_ref: str | None,
deploy_timeout: int = 900,
meta: dict | None = None,
meta=None,
) -> dict[str, str | None]:
"""Perform the UPGRADE op once, in place, to the PR-HEAD code under test (HC1): re-checkout the
PR head (the prev-tag base deploy reset the recipe working tree), then `abra app deploy --chaos`
@ -244,7 +246,8 @@ def perform_upgrade(
STRICTER convergence+health wait here: services N/N (wait_healthy) + app HEALTH_PATH healthy +
any recipe READY_PROBE (collabora WOPI discovery 200). This bounds readiness by OUR generous
deadline, not abra's impatient one — and is stronger evidence than abra's monitor."""
meta = meta or {}
if meta is None:
meta = meta_mod.load(recipe)
before = lifecycle.deployed_identity(domain)
if head_ref:
lifecycle.recipe_checkout_ref(recipe, head_ref)
@ -253,9 +256,7 @@ def perform_upgrade(
# (target) version, so the base deploys minimally WITHOUT it and the upgrade adds it to COMPOSE_FILE
# here, after the PR-head checkout (which ships the overlay) and before the chaos redeploy that
# picks up the new .env. Dict or callable(domain)->dict. No-op for recipes without it.
upgrade_env = meta.get("UPGRADE_EXTRA_ENV") or {}
if callable(upgrade_env):
upgrade_env = upgrade_env(domain) or {}
upgrade_env = meta_mod.upgrade_extra_env(meta, domain)
for k, v in upgrade_env.items():
print(f" upgrade-env: {k}={v}", flush=True)
abra.env_set(domain, k, v)
@ -266,14 +267,12 @@ def perform_upgrade(
# Own the convergence verification (abra's monitor was skipped via -c).
lifecycle.wait_healthy(
domain,
ok_codes=tuple(meta.get("HEALTH_OK", (200, 301, 302))),
path=meta.get("HEALTH_PATH", "/"),
deploy_timeout=int(meta.get("DEPLOY_TIMEOUT", deploy_timeout)),
http_timeout=int(meta.get("HTTP_TIMEOUT", 300)),
)
lifecycle.wait_ready_probes(
meta, domain, timeout=int(meta.get("DEPLOY_TIMEOUT", deploy_timeout))
ok_codes=tuple(meta.HEALTH_OK),
path=meta.HEALTH_PATH,
deploy_timeout=int(meta.DEPLOY_TIMEOUT),
http_timeout=int(meta.HTTP_TIMEOUT),
)
lifecycle.wait_ready_probes(meta, domain, timeout=int(meta.DEPLOY_TIMEOUT))
after = lifecycle.deployed_identity(domain)
# Evidence (HC1): the chaos-version label = the deployed recipe commit; it should match the
# PR-head we checked out — proving the upgrade deployed the code under test, not a published tag.