diff --git a/nix/modules/drone-runner.nix b/nix/modules/drone-runner.nix index d7b5005..b6f91b4 100644 --- a/nix/modules/drone-runner.nix +++ b/nix/modules/drone-runner.nix @@ -9,13 +9,18 @@ let # MAX_TESTS (plan §4.2/§4.3 resource safety): max CI builds the exec runner runs at once. Drone # queues the rest in its native pending-build queue (no custom queue). THE concurrency cap that - # bounds how many test apps can be live at once — kept LOW (1) on this single 28GiB node since - # recipes are heavy (immich/matrix large volumes). With capacity=1 there is never a concurrent - # in-flight run, so the run-start janitor can safely reap *any* orphan (a SIGKILL'd build runs no - # teardown) and the "at most MAX_TESTS apps live" bound holds exactly. Raise to 2 only if the node - # is shown to handle two light recipes at once (then the janitor MUST stay age-based to avoid - # reaping a concurrent run — see DECISIONS.md "Resource safety"). - maxTests = "1"; + # bounds how many test apps can be live at once. + # + # Raised to 2 (operator request 2026-06-09) so two recipes can be tested in parallel (e.g. immich + # and plausible under active development at once). Verified safe on the current node (Hetzner cpx22, + # ~7.6 GiB / 4 vCPU — NOTE: smaller than the original 28 GiB this was written for): a full immich CI + # stack measured ~1 GiB (server+ML+pg+redis) with multiple GiB free, so two concurrent recipes fit. + # The concurrency PRECONDITION holds: the run-start janitor is age-based (default 2h) + run-app-name + # scoped, so it never reaps a concurrent in-flight run (harness.lifecycle.janitor). TRADE-OFF: with + # capacity>1 a SIGKILL'd build (no teardown) leaves an orphan the run-start sweep can't reap + # immediately (it might be a live run) — bounded instead by the 2h janitor + the /upgrade-all + # start/end reap + sweep-orphans. Revert to "1" if OOM / disk-I/O contention is observed under load. + maxTests = "2"; in { # Drone ships under the Polyform Small Business license (nixpkgs marks it unfree);