All checks were successful
continuous-integration/drone/push Build is passing
Remove concurrency.limit from the recipe-ci pipeline (.drone.yml): it duplicated DRONE_RUNNER_CAPACITY (nix/modules/drone-runner.nix maxTests) and the two had to be kept in step by hand (docs/concurrency.md §8.6). maxTests comment updated to state it is the single knob and to describe the new safety model.
59 lines
3.2 KiB
Nix
59 lines
3.2 KiB
Nix
# Drone exec runner (M2). Runs on cc-ci itself (not in a container) so CI pipelines can drive
|
|
# host `abra` to deploy real recipes onto the swarm (plan §4.2, §8: exec runner). The Drone
|
|
# *server* is deployed separately via abra (scripts/deploy-drone.sh) as a swarm service.
|
|
#
|
|
# The exec runner is drone-runner-exec (the only exec runner upstream ever shipped; see
|
|
# DECISIONS.md "CI engine"). It connects to the server over RPC at drone.ci.commoninternet.net,
|
|
# sharing DRONE_RPC_SECRET with the server via the sops-rendered EnvironmentFile.
|
|
{ pkgs, config, lib, ... }:
|
|
let
|
|
# MAX_TESTS (plan §4.2/§4.3 resource safety): max CI builds the exec runner runs at once. Drone
|
|
# queues the rest in its native pending-build queue (no custom queue). THE SINGLE concurrency
|
|
# knob — nothing else caps recipe-ci parallelism (the .drone.yml concurrency.limit was removed:
|
|
# one knob, one place). Bounds how many test apps can be live at once.
|
|
#
|
|
# Raised to 2 (operator request 2026-06-09) so two recipes can be tested in parallel (e.g. immich
|
|
# and plausible under active development at once). Verified safe on the current node (Hetzner cpx22,
|
|
# ~7.6 GiB / 4 vCPU — NOTE: smaller than the original 28 GiB this was written for): a full immich CI
|
|
# stack measured ~1 GiB (server+ML+pg+redis) with multiple GiB free, so two concurrent recipes fit.
|
|
# Concurrent-run safety is the harness's job at ANY capacity (docs/concurrency.md): per-run
|
|
# ABRA_DIR recipe trees, per-app-domain flocks, and a flock-probe janitor that reaps a crashed
|
|
# build's orphan immediately (held lock = live run, never touched). Revert to "1" if OOM /
|
|
# disk-I/O contention is observed under load.
|
|
maxTests = "2";
|
|
in
|
|
{
|
|
# Drone ships under the Polyform Small Business license (nixpkgs marks it unfree);
|
|
# permitted for our internal CI use. Allow only this package.
|
|
nixpkgs.config.allowUnfreePredicate = pkg:
|
|
builtins.elem (lib.getName pkg) [ "drone-runner-exec" ];
|
|
|
|
systemd.services.drone-runner-exec = {
|
|
description = "Drone exec runner (drives host abra/swarm)";
|
|
after = [ "network-online.target" ];
|
|
wants = [ "network-online.target" ];
|
|
wantedBy = [ "multi-user.target" ];
|
|
environment = {
|
|
DRONE_RPC_PROTO = "https";
|
|
DRONE_RPC_HOST = "drone.ci.commoninternet.net";
|
|
DRONE_RUNNER_CAPACITY = maxTests; # MAX_TESTS concurrency cap (see let-binding above)
|
|
DRONE_RUNNER_NAME = "cc-ci-exec";
|
|
# exec runner needs a writable root for build workspaces
|
|
DRONE_RUNNER_ROOT = "/var/lib/drone-runner";
|
|
# Pipeline commands shell out to abra/docker/git — all live in the system path.
|
|
PATH = lib.mkForce "/run/current-system/sw/bin:/run/wrappers/bin";
|
|
};
|
|
serviceConfig = {
|
|
# DRONE_RPC_SECRET comes from the sops-rendered env file (shared with the server).
|
|
EnvironmentFile = config.sops.templates."drone-runner.env".path;
|
|
ExecStart = "${pkgs.drone-runner-exec}/bin/drone-runner-exec";
|
|
Restart = "always";
|
|
RestartSec = "5s";
|
|
StateDirectory = "drone-runner";
|
|
# exec runner runs pipelines as this service's user; root is needed to drive docker/abra
|
|
# and to read the abra config under /root/.abra (same as manual deploys).
|
|
User = "root";
|
|
};
|
|
};
|
|
}
|