absorb cc-ci-orchestrator NixOS config into nix/

Merging recipe-maintainers/cc-ci-orchestrator (the VM NixOS config repo)
into this repo as nix/ — the next step toward consolidating the two
orchestrator repos into a single cc-ci-orchestrator.

The source repo will be renamed to archived-cc-ci-orchestrator on Gitea.
This repo will be renamed cc-ci-orchestrator.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
autonomic-bot
2026-05-31 00:01:14 +00:00
parent cd8ac99ed9
commit 8ef141f538
2 changed files with 154 additions and 0 deletions

30
nix/README.md Normal file
View File

@ -0,0 +1,30 @@
# cc-ci-orchestrator
NixOS config for the **`cc-ci-orchestrator`** Incus VM (b1, project `terraform-ci`, tailnet
`100.116.55.106`) — the reboot-resilient host for the cc-ci Builder/Adversary loops + watchdog +
orchestrator session, moved off the unstable 905 MiB Pi.
See `cc-ci-plan/plan-orchestrator-migration.md` for the full migration.
## Files
- `configuration.nix` — the VM's NixOS config (channel-based, `nixos-24.11`). Deployed to
`/etc/nixos/configuration.nix` on the VM. Provides: nix-ld (so the standalone Claude Code Bun binary
runs), tmux/git/python/jq + tools, a 4 GB swapfile, direct ssh to cc-ci (the VM is a tailnet peer —
no SOCKS proxy needed, unlike the Pi), an idempotent `claude-install` oneshot, and the
`cc-ci-loops` supervisor service (defined, **enabled in Phase D** once the workspace is staged).
## Deploy (until this is wired to a flake/auto-pull)
```
# copy configuration.nix to the VM, then:
ssh cc-ci-orchestrator 'nixos-rebuild switch' # or run detached: see below
```
Over the (currently flaky) Pi→VM link, run the rebuild **detached** on the VM so an ssh/proxy drop
doesn't abort it, e.g. `systemd-run --unit=orch-rebuild --collect nixos-rebuild switch` then poll
`journalctl -u orch-rebuild`.
## Status
- Phase A: VM created (2 GB / 2 vCPU / 30 GB), on tailnet, ssh-able. ✅
- Phase B: this config (DRAFT) — nix-ld/claude validation pending on the VM.
- Operator step pending (Phase C): `claude auth login` on the VM (device-code; can't be scripted).
- Secrets to stage (Phase C, out-of-band): `/srv/cc-ci/.testenv`, `~/.ssh/cc-ci-root-ed25519`,
Incus mTLS certs, the sops master age key.

124
nix/configuration.nix Normal file
View File

@ -0,0 +1,124 @@
# cc-ci-orchestrator VM — NixOS config (channel-based: nixos-24.11; deployed to /etc/nixos/configuration.nix)
#
# Purpose: a reboot-resilient host for the cc-ci Builder/Adversary loops + watchdog + the orchestrator
# session, moved off the unstable 905 MiB Pi. See plan-orchestrator-migration.md.
#
# STATUS: DRAFT (Phase B). The nix-ld + claude-install bits need on-VM validation (the standalone
# Claude Code is a Bun ELF binary; NixOS needs nix-ld to run a foreign dynamic binary). The
# cc-ci-loops supervisor service is defined but NOT enabled until the workspace is staged (Phase C/D).
{ config, pkgs, lib, modulesPath, ... }:
{
imports = [
"${modulesPath}/virtualisation/incus-virtual-machine.nix"
];
# --- base (mirrors the incus-base-vm) ---
virtualisation.incus.agent.enable = true; # for `incus exec`
services.cloud-init = { enable = true; network.enable = true; };
services.openssh = { enable = true; settings.PermitRootLogin = "yes"; };
networking.useDHCP = true;
networking.nameservers = [ "1.1.1.1" "8.8.8.8" ];
networking.firewall = { enable = true; trustedInterfaces = [ "tailscale0" ]; allowedTCPPorts = [ 22 ]; };
nix.settings.experimental-features = [ "nix-command" "flakes" ];
system.stateVersion = "24.11";
# --- tailscale (auto-auth from /etc/ts-auth-key, hostname from /etc/ts-hostname; written by cloud-init) ---
services.tailscale = {
enable = true;
authKeyFile = "/etc/ts-auth-key";
extraUpFlags = let h = lib.strings.removeSuffix "\n" (builtins.readFile /etc/ts-hostname);
in [ "--hostname=${h}" "--ssh" ]; # --ssh: allow tailscale-ssh as a fallback path
};
# --- swap: the Pi OOM lesson. 2 GB RAM is tight for 3 concurrent claude sessions; 4 GB disk swap
# as a real overflow tier (zram is in-RAM and doesn't add capacity). ---
swapDevices = [ { device = "/swapfile"; size = 4096; } ];
# --- nix-ld: lets the standalone Claude Code (foreign dynamic ELF / Bun) run on NixOS ---
programs.nix-ld.enable = true;
programs.nix-ld.libraries = with pkgs; [
stdenv.cc.cc.lib # libstdc++ / libgcc_s
zlib
openssl
curl
glibc
];
# --- packages the loops + launch.sh + orchestrator need ---
environment.systemPackages = with pkgs; [
git tmux python3 jq curl cacert
gnused gawk coreutils gnugrep findutils util-linux
nettools openssh # nc, ssh
docker-client # `docker` CLI is not needed (deploys run on cc-ci), but handy for probes
];
# --- loops user: non-root account for running claude (--dangerously-skip-permissions blocked for root) ---
users.users.loops = {
isNormalUser = true;
home = "/home/loops";
shell = pkgs.bash;
extraGroups = [ "wheel" ]; # sudo access
};
security.sudo.wheelNeedsPassword = false; # passwordless sudo for wheel
# Allow loops user to use tmux/claude without a password prompt
security.sudo.extraRules = [{
users = [ "loops" ];
commands = [{ command = "ALL"; options = [ "NOPASSWD" ]; }];
}];
# --- root PATH: ensure ~/.local/bin (where the standalone claude binary lives) is on root's PATH ---
environment.variables.PATH = lib.mkForce "/root/.local/bin:/run/current-system/sw/bin:/run/wrappers/bin:/usr/bin:/bin";
# --- root ssh config: reach cc-ci DIRECTLY over the VM's own tailscale (this VM is a tailnet peer,
# so NO SOCKS proxy is needed — unlike the Pi). Key staged at /root/.ssh/cc-ci-root-ed25519. ---
system.activationScripts.ccciSshConfig = ''
mkdir -p /root/.ssh && chmod 700 /root/.ssh
cat > /root/.ssh/config <<'SSHCFG'
Host cc-ci cc-nix-test 100.90.116.4
HostName 100.90.116.4
User root
IdentityFile /root/.ssh/cc-ci-root-ed25519
IdentitiesOnly yes
StrictHostKeyChecking accept-new
ServerAliveInterval 30
SSHCFG
chmod 600 /root/.ssh/config
'';
# --- claude-install: idempotent oneshot — fetch the standalone Claude Code CLI if missing.
# Runs via nix-ld. (Auth is a one-time operator step: `claude auth login` — see migration plan.) ---
systemd.services.claude-install = {
description = "Install the standalone Claude Code CLI if missing (idempotent)";
wantedBy = [ "multi-user.target" ];
after = [ "network-online.target" "nix-ld.service" ];
wants = [ "network-online.target" ];
serviceConfig = { Type = "oneshot"; RemainAfterExit = true; };
path = [ pkgs.curl pkgs.bash pkgs.coreutils pkgs.gnutar pkgs.gzip pkgs.unzip ];
script = ''
if [ ! -x /root/.local/bin/claude ]; then
echo "installing standalone Claude Code CLI..."
curl -fsSL https://claude.ai/install.sh | bash || echo "claude install failed (retry next activation)"
fi
'';
};
# --- cc-ci-loops supervisor (DEFINED, NOT YET ENABLED). Enabled in Phase D after the workspace
# (/srv/cc-ci: launch.sh, plan, prompts, the loop clones, secrets) is staged. This is the
# reboot-resilience fix: it runs launch.sh start on every boot. Mirrors the Pi's cc-ci-loops.service. ---
systemd.services.cc-ci-loops = {
description = "cc-ci Builder/Adversary loops + watchdog (launch.sh start, RESUME_PHASE)";
wantedBy = [ "multi-user.target" ]; # enabled: workspace staged (Phase C/D 2026-05-30)
after = [ "network-online.target" "tailscaled.service" "claude-install.service" ];
wants = [ "network-online.target" ];
serviceConfig = {
Type = "oneshot"; RemainAfterExit = true; User = "root";
WorkingDirectory = "/srv/cc-ci";
};
environment = { RESUME_PHASE = "1"; HOME = "/root"; };
path = [ pkgs.bash pkgs.tmux pkgs.git pkgs.python3 pkgs.openssh pkgs.nettools ];
script = ''
[ -x /srv/cc-ci/cc-ci-plan/launch.sh ] && /srv/cc-ci/cc-ci-plan/launch.sh start || \
echo "workspace not staged yet (/srv/cc-ci/cc-ci-plan/launch.sh missing) skipping"
'';
};
}