From 3dd6eb5c21a69f559b5d9d359ba970327d3559dd Mon Sep 17 00:00:00 2001 From: autonomic-bot Date: Tue, 9 Jun 2026 18:20:45 +0000 Subject: [PATCH] chore(runner): raise DRONE_RUNNER_CAPACITY 1 -> 2 for parallel recipe CI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lets two recipes be tested in parallel (operator request — immich + plausible under active dev at once). Safe on the current node: measured a full immich CI stack at ~1GiB with multiple GiB free on the 7.6GiB cpx22, and the janitor is already age-based + run-app-scoped so it never reaps a concurrent in-flight run. Updates the stale '28GiB node' comment. Revert to 1 if OOM/IO contention shows up. Co-Authored-By: Claude Opus 4.8 --- nix/modules/drone-runner.nix | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/nix/modules/drone-runner.nix b/nix/modules/drone-runner.nix index d7b5005..b6f91b4 100644 --- a/nix/modules/drone-runner.nix +++ b/nix/modules/drone-runner.nix @@ -9,13 +9,18 @@ let # MAX_TESTS (plan §4.2/§4.3 resource safety): max CI builds the exec runner runs at once. Drone # queues the rest in its native pending-build queue (no custom queue). THE concurrency cap that - # bounds how many test apps can be live at once — kept LOW (1) on this single 28GiB node since - # recipes are heavy (immich/matrix large volumes). With capacity=1 there is never a concurrent - # in-flight run, so the run-start janitor can safely reap *any* orphan (a SIGKILL'd build runs no - # teardown) and the "at most MAX_TESTS apps live" bound holds exactly. Raise to 2 only if the node - # is shown to handle two light recipes at once (then the janitor MUST stay age-based to avoid - # reaping a concurrent run — see DECISIONS.md "Resource safety"). - maxTests = "1"; + # bounds how many test apps can be live at once. + # + # Raised to 2 (operator request 2026-06-09) so two recipes can be tested in parallel (e.g. immich + # and plausible under active development at once). Verified safe on the current node (Hetzner cpx22, + # ~7.6 GiB / 4 vCPU — NOTE: smaller than the original 28 GiB this was written for): a full immich CI + # stack measured ~1 GiB (server+ML+pg+redis) with multiple GiB free, so two concurrent recipes fit. + # The concurrency PRECONDITION holds: the run-start janitor is age-based (default 2h) + run-app-name + # scoped, so it never reaps a concurrent in-flight run (harness.lifecycle.janitor). TRADE-OFF: with + # capacity>1 a SIGKILL'd build (no teardown) leaves an orphan the run-start sweep can't reap + # immediately (it might be a live run) — bounded instead by the 2h janitor + the /upgrade-all + # start/end reap + sweep-orphans. Revert to "1" if OOM / disk-I/O contention is observed under load. + maxTests = "2"; in { # Drone ships under the Polyform Small Business license (nixpkgs marks it unfree); -- 2.49.0