From 7563d4722807abb73f8912efd0f0aa78487ecb19 Mon Sep 17 00:00:00 2001 From: autonomic-bot Date: Wed, 27 May 2026 17:57:23 +0100 Subject: [PATCH] 1c/W4: serialize abra reconcilers (proxy->drone->bridge->dashboard->backupbot) On a FRESH host the reconcile oneshots ran abra concurrently against an uninitialised ~/.abra and raced on catalogue/recipe init, leaving deploy-proxy/deploy-drone failed after a blank-VM rebuild (observed on the W4 throwaway). Ordering-only `after` chain serializes them so a single nixos-rebuild switch converges. Logically correct too (all need the proxy/abra state first). Co-Authored-By: Claude Opus 4.7 (1M context) --- modules/backupbot.nix | 4 +++- modules/bridge.nix | 5 ++++- modules/dashboard.nix | 4 +++- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/modules/backupbot.nix b/modules/backupbot.nix index a6f6e31..4533671 100644 --- a/modules/backupbot.nix +++ b/modules/backupbot.nix @@ -39,7 +39,9 @@ in { systemd.services.deploy-backupbot = { description = "Reconcile backup-bot-two (volume backups via restic) via abra"; - after = [ "swarm-init.service" "docker.service" "network-online.target" ]; + # Serialized last (chain proxy→drone→bridge→dashboard→backupbot) to avoid the concurrent abra-init + # race on a fresh host (see bridge.nix). Ordering-only; transitively after deploy-proxy. + after = [ "deploy-dashboard.service" "deploy-proxy.service" "swarm-init.service" "docker.service" "network-online.target" ]; requires = [ "swarm-init.service" "docker.service" ]; wants = [ "network-online.target" ]; wantedBy = [ "multi-user.target" ]; diff --git a/modules/bridge.nix b/modules/bridge.nix index 5a94aba..d3686cb 100644 --- a/modules/bridge.nix +++ b/modules/bridge.nix @@ -104,7 +104,10 @@ in { systemd.services.deploy-bridge = { description = "Reconcile the cc-ci comment-bridge (!testme webhook) swarm service"; - after = [ "deploy-proxy.service" "swarm-init.service" "docker.service" "network-online.target" ]; + # Serialized after deploy-drone (chain proxy→drone→bridge→dashboard→backupbot): on a FRESH host the + # abra-driven reconcilers otherwise run concurrently against an uninitialised ~/.abra and race on + # catalogue/recipe init, leaving units failed after a blank-VM rebuild. Ordering-only `after` fixes it. + after = [ "deploy-drone.service" "deploy-proxy.service" "swarm-init.service" "docker.service" "network-online.target" ]; requires = [ "swarm-init.service" "docker.service" ]; wants = [ "network-online.target" ]; wantedBy = [ "multi-user.target" ]; diff --git a/modules/dashboard.nix b/modules/dashboard.nix index 2f4f7bd..52985a0 100644 --- a/modules/dashboard.nix +++ b/modules/dashboard.nix @@ -79,7 +79,9 @@ in { systemd.services.deploy-dashboard = { description = "Reconcile the cc-ci results dashboard (overview + badges) swarm service"; - after = [ "deploy-proxy.service" "swarm-init.service" "docker.service" "network-online.target" ]; + # Serialized after deploy-bridge (chain proxy→drone→bridge→dashboard→backupbot) to avoid the + # concurrent abra-init race on a fresh host (see bridge.nix). Ordering-only. + after = [ "deploy-bridge.service" "deploy-proxy.service" "swarm-init.service" "docker.service" "network-online.target" ]; requires = [ "swarm-init.service" "docker.service" ]; wants = [ "network-online.target" ]; wantedBy = [ "multi-user.target" ];