Files
cc-ci/nix/modules/swarm.nix
autonomic-bot e73e4393ed fix(2w): docker autoPrune drop --volumes (was failing daily + would wipe warm vols) [WC8]
The autoPrune flags passed '--volumes' WITH '--filter until=24h', which docker
rejects ('until filter not supported with --volumes') — so docker-prune.service
FAILED every day (system 'degraded') and never reclaimed anything (a cause of the
disk creeping to 96%). Worse, '--volumes' prunes volumes with no running
container — which would DELETE Phase-2w DATA-WARM canonical volumes (undeployed by
design). Removed '--volumes': now prunes images/containers/networks/build-cache
older than 24h only; warm volumes survive and are pruned deliberately by the warm
reconcilers (WC8).

Verified: nixos-rebuild switch -> docker-prune.service runs clean, system
'running' (0 failed units), warm keycloak still 200.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-05-29 01:26:24 +01:00

52 lines
2.2 KiB
Nix

# Docker + single-node Swarm — the deploy target for recipes under test (M1).
# Traefik (modules/traefik.nix) and abra layer on top; recipes attach to the `proxy`
# overlay network, exactly as a real Co-op Cloud host expects.
{ pkgs, ... }:
{
virtualisation.docker = {
enable = true;
# Reclaim disk from churning per-run images (cc-ci root is ~28 GiB). Prune images/containers/
# networks/build-cache older than 24h — but NEVER volumes:
# (1) `--volumes` is incompatible with `--filter until=` (docker errors → the unit failed daily,
# degrading the system and never actually pruning — that's why disk crept to 96%); and
# (2) Phase 2w keeps DATA-WARM canonical volumes that are UNDEPLOYED (no container), so
# `prune --volumes` would DELETE the warm known-good data. Warm volumes are pruned
# deliberately by the warm reconcilers (WC8), never by this blanket sweep.
autoPrune = {
enable = true;
dates = "daily";
flags = [ "--all" "--filter" "until=24h" ];
};
};
environment.systemPackages = [ pkgs.docker ];
# Gateway forwards 80/443 to cc-ci over the public interface (enp5s0); the coop-cloud
# traefik stack (deployed via abra, see docs/install.md) publishes these ports.
networking.firewall.allowedTCPPorts = [ 80 443 ];
# Bring up a single-node swarm + the shared `proxy` overlay network. Idempotent:
# safe to re-run every boot/rebuild. advertise-addr 127.0.0.1 is fine for a lone node.
systemd.services.swarm-init = {
description = "Initialise single-node Docker Swarm + proxy overlay network";
after = [ "docker.service" ];
requires = [ "docker.service" ];
wantedBy = [ "multi-user.target" ];
path = [ pkgs.docker ];
serviceConfig = {
Type = "oneshot";
RemainAfterExit = true;
};
script = ''
set -eu
state="$(docker info --format '{{.Swarm.LocalNodeState}}' 2>/dev/null || echo error)"
if [ "$state" != "active" ]; then
docker swarm init --advertise-addr 127.0.0.1
fi
if ! docker network inspect proxy >/dev/null 2>&1; then
docker network create --driver overlay --attachable proxy
fi
'';
};
}