diff --git a/.drone.yml b/.drone.yml index 9908c8b..d7c3674 100644 --- a/.drone.yml +++ b/.drone.yml @@ -53,8 +53,8 @@ trigger: event: - custom -concurrency: - limit: 2 +# NB deliberately NO `concurrency.limit` here: DRONE_RUNNER_CAPACITY (nix/modules/drone-runner.nix +# maxTests) is the single concurrency knob (P4 — two knobs in two files drifted). steps: - name: ci diff --git a/nix/modules/drone-runner.nix b/nix/modules/drone-runner.nix index b6f91b4..3c14f01 100644 --- a/nix/modules/drone-runner.nix +++ b/nix/modules/drone-runner.nix @@ -8,18 +8,18 @@ { pkgs, config, lib, ... }: let # MAX_TESTS (plan §4.2/§4.3 resource safety): max CI builds the exec runner runs at once. Drone - # queues the rest in its native pending-build queue (no custom queue). THE concurrency cap that - # bounds how many test apps can be live at once. + # queues the rest in its native pending-build queue (no custom queue). THE SINGLE concurrency + # knob — nothing else caps recipe-ci parallelism (the .drone.yml concurrency.limit was removed: + # one knob, one place). Bounds how many test apps can be live at once. # # Raised to 2 (operator request 2026-06-09) so two recipes can be tested in parallel (e.g. immich # and plausible under active development at once). Verified safe on the current node (Hetzner cpx22, # ~7.6 GiB / 4 vCPU — NOTE: smaller than the original 28 GiB this was written for): a full immich CI # stack measured ~1 GiB (server+ML+pg+redis) with multiple GiB free, so two concurrent recipes fit. - # The concurrency PRECONDITION holds: the run-start janitor is age-based (default 2h) + run-app-name - # scoped, so it never reaps a concurrent in-flight run (harness.lifecycle.janitor). TRADE-OFF: with - # capacity>1 a SIGKILL'd build (no teardown) leaves an orphan the run-start sweep can't reap - # immediately (it might be a live run) — bounded instead by the 2h janitor + the /upgrade-all - # start/end reap + sweep-orphans. Revert to "1" if OOM / disk-I/O contention is observed under load. + # Concurrent-run safety is the harness's job at ANY capacity (docs/concurrency.md): per-run + # ABRA_DIR recipe trees, per-app-domain flocks, and a flock-probe janitor that reaps a crashed + # build's orphan immediately (held lock = live run, never touched). Revert to "1" if OOM / + # disk-I/O contention is observed under load. maxTests = "2"; in {