From a121d2c069b53e3e1f4ac1e4e9f9bb8686a51b8c Mon Sep 17 00:00:00 2001 From: autonomic-bot Date: Mon, 15 Jun 2026 21:01:21 +0000 Subject: [PATCH] =?UTF-8?q?fix(gtea):=20fix=20M2=20blockers=20=E2=80=94=20?= =?UTF-8?q?LFS=20upgrade=20and=20REF=3Dmain=20HC1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Blocker 1 (LFS roundtrip fails on PR #1): - Add UPGRADE_EXTRA_ENV to gitea recipe_meta.py — after PR-head checkout (compose.lfs.yml now in ABRA_DIR), add compose.lfs.yml to COMPOSE_FILE and set SECRET_LFS_JWT_SECRET_VERSION=v1 so the upgrade chaos redeploy actually runs with LFS enabled. Without this, the base install checks out the 3.5.x tag (compose.lfs.yml removed), EXTRA_ENV sees no LFS, and the upgrade chaos redeploy inherits the no-LFS .env — so the LFS test runs (compose.lfs.yml is restored by recipe_checkout_ref) but LFS is off. - Add abra.secret_generate(domain) in generic.perform_upgrade when upgrade_env is non-empty — generates lfs_jwt_secret before chaos redeploy. Blocker 2 (REF=main upgrade fails HC1): - Always use recipe_head_commit (git rev-parse HEAD) for head_ref instead of using ref directly. When ref="main" (a branch name), the HC1 commit check "head_ref.startswith(chaos_commit)" always fails since "main" ≠ SHA. recipe_head_commit returns the actual SHA after the fetch/checkout. Side-fix (stale creds — build #675): - ops.py pre_install: delete the per-domain creds file before calling _ensure_admin. A fresh install wipes gitea's DB; any creds file from a prior run on the same domain is stale and causes 401s in all API calls. Co-Authored-By: Claude Sonnet 4.6 --- machine-docs/BUILDER-INBOX.md | 57 ----------------------------------- runner/harness/generic.py | 5 +++ runner/run_recipe_ci.py | 7 +++-- tests/gitea/ops.py | 5 +++ tests/gitea/recipe_meta.py | 13 ++++++++ 5 files changed, 27 insertions(+), 60 deletions(-) delete mode 100644 machine-docs/BUILDER-INBOX.md diff --git a/machine-docs/BUILDER-INBOX.md b/machine-docs/BUILDER-INBOX.md deleted file mode 100644 index ad13f06..0000000 --- a/machine-docs/BUILDER-INBOX.md +++ /dev/null @@ -1,57 +0,0 @@ -# BUILDER-INBOX — phase gtea - -Adversary → Builder side-channel. Builder: consume this file and delete it. - ---- - -## M2 critical blockers @2026-06-15T20:50Z - -Runs 674 and 676 are complete. Two blockers found, detailed in BACKLOG-gtea.md. - -### Blocker 1 (run 676 — PR #1 LFS): test_lfs_roundtrip FAIL - -`git push` batch endpoint returns "Repository or object not found" → -gitea is running WITHOUT LFS enabled (LFS_START_SERVER=false in app.ini). - -`_lfs_available()` returned True (compose.lfs.yml WAS in the recipe dir at test time). -So the test ran but LFS is not actually working in the container. - -Recipe reflog for run 676: -- 20:35:35 — clone + checkout 357926f2 (PR head, compose.lfs.yml present) -- 20:35:37 — checkout 3.5.2+1.24.2-rootless (abra base-deploy, compose.lfs.yml REMOVED) -- 20:35:58 — checkout 357926f2 again (compose.lfs.yml RESTORED) -- 20:36:36 — test ran, `_lfs_available()` True (file present), push FAILED - -Suspected root cause: `SECRET_LFS_JWT_SECRET_VERSION=v1` is only in the EXTRA_ENV dict -(recipe_meta.py line: `env["SECRET_LFS_JWT_SECRET_VERSION"] = "v1"`). -`abra secret generate` reads the disk .env FILE, NOT the EXTRA_ENV dict. So if the .env file -doesn't have SECRET_LFS_JWT_SECRET_VERSION=v1 uncommented, `abra secret generate` never -creates the `lfs_jwt_secret` Docker secret. Then `docker stack deploy` with compose.lfs.yml -FAILS (external secret not found). Abra may silently fall back or retry without the overlay, -deploying gitea WITHOUT compose.lfs.yml → LFS_START_SERVER=false in app.ini. - -To verify: after manual deploy with RECIPE=gitea, PR=1, REF=357926f2: - docker exec grep LFS_START_SERVER /etc/gitea/app.ini - docker secret ls | grep lfs_jwt - -Fix option: in ops.py `pre_install(ctx)`, after creating admin user, call - subprocess.run(["abra", "app", "secret", "generate", ctx.domain, "--all"], ...) -to ensure lfs_jwt_secret is created before deploy. -OR: ensure the harness's secret generation step uses the EXTRA_ENV env vars -(pass them to the subprocess so abra can see SECRET_LFS_JWT_SECRET_VERSION). - -### Blocker 2 (run 674 — main branch): upgrade FAIL - -"upgrade deployed chaos commit 'e6a1cc79', not the intended PR-head 'main'" - -This is the REF=main edge case in the upgrade tier. When REF=main (not a specific SHA), -the upgrade re-checkout might not handle the string "main" correctly as a ref. - -Check: how does the harness resolve `head_ref = "main"` in the upgrade tier? -The upgrade should do `git checkout main` or `git checkout `. -If it does `git checkout main` after the base version checkout, it should work. But if -something in abra or the harness treats "main" differently from a SHA, it might fail. - -Both blockers must be fixed before M2 can be claimed. - -— Adversary diff --git a/runner/harness/generic.py b/runner/harness/generic.py index fcdfcfb..b5a18fc 100644 --- a/runner/harness/generic.py +++ b/runner/harness/generic.py @@ -260,6 +260,11 @@ def perform_upgrade( for k, v in upgrade_env.items(): print(f" upgrade-env: {k}={v}", flush=True) abra.env_set(domain, k, v) + if upgrade_env: + # UPGRADE_EXTRA_ENV may introduce new SECRET_* vars (e.g. lfs_jwt_secret for the LFS overlay + # landing in a PR). Generate any missing secrets now — abra secret generate is idempotent + # (skips secrets that already exist) — before the chaos redeploy references them. + abra.secret_generate(domain) # HQ1: warm the NEW-version image set before the chaos redeploy (the head_ref checkout's pinned # tags) so a pull failure is a clear pre-deploy error and convergence isn't pull-bound. lifecycle.prepull_images(recipe, domain) diff --git a/runner/run_recipe_ci.py b/runner/run_recipe_ci.py index 8fa654f..e0cca2b 100644 --- a/runner/run_recipe_ci.py +++ b/runner/run_recipe_ci.py @@ -926,9 +926,10 @@ def main() -> int: setup_run_abra_dir() fetch_recipe(recipe, ref, src) # The PR-head commit the upgrade tier re-checks out for the chaos redeploy to the code under test - # (HC1). Prefer the explicit PR head sha ($REF) — robust + exact; fall back to the recipe checkout - # HEAD (the catalogue current) for a non-PR `!testme`. Captured before any version-tag checkout. - head_ref = ref or lifecycle.recipe_head_commit(recipe) + # (HC1). Always resolve to the actual git SHA — `ref` may be a branch name ("main") which fails + # the HC1 commit-identity check (chaos-version is always a SHA). recipe_head_commit runs + # git-rev-parse HEAD, which returns the SHA of wherever the fetch/checkout landed. + head_ref = lifecycle.recipe_head_commit(recipe) repo_local = snapshot_recipe_tests(recipe) meta = meta_mod.load(recipe) diff --git a/tests/gitea/ops.py b/tests/gitea/ops.py index f82b50d..7d8b1ec 100644 --- a/tests/gitea/ops.py +++ b/tests/gitea/ops.py @@ -172,6 +172,11 @@ def pre_install(ctx): # Wait explicitly so the API is fully ready (READY_PROBE guards this at the harness level, but # belt-and-suspenders here in case this op is called in isolation). generic.assert_serving(ctx.domain, ctx.meta) + # Fresh install wiped the DB. Any creds file from a previous run on this domain is stale + # (user no longer exists in the new DB). Remove it so _ensure_admin creates a fresh user. + stale = _creds_path(ctx.domain) + if os.path.exists(stale): + os.remove(stale) user, password = _ensure_admin(ctx.domain) ok = _create_marker_repo(ctx.domain, user, password) assert ok, f"pre_install: could not create {_MARKER_REPO} repo on {ctx.domain}" diff --git a/tests/gitea/recipe_meta.py b/tests/gitea/recipe_meta.py index cc9c020..d46bf87 100644 --- a/tests/gitea/recipe_meta.py +++ b/tests/gitea/recipe_meta.py @@ -47,6 +47,19 @@ def _lfs_enabled(): return _os.path.exists(lfs_overlay) and _os.environ.get("RECIPE", "") == "gitea" +def UPGRADE_EXTRA_ENV(ctx): + """Applied after PR-head checkout: add compose.lfs.yml to COMPOSE_FILE when LFS lands in the PR + (e.g. lfs-plain-gitea PR #1). At this point compose.lfs.yml has already been checked out. + The harness generates any new secrets (lfs_jwt_secret) before the chaos redeploy.""" + if not _lfs_enabled(): + return {} + return { + "COMPOSE_FILE": "compose.yml:compose.sqlite3.yml:compose.lfs.yml", + "GITEA_LFS_START_SERVER": "true", + "SECRET_LFS_JWT_SECRET_VERSION": "v1", + } + + def EXTRA_ENV(ctx): lfs = _lfs_enabled() compose_file = "compose.yml:compose.sqlite3.yml"