From 8ef141f5380f083c1de2ffa1d4ded15f855efc58 Mon Sep 17 00:00:00 2001 From: autonomic-bot Date: Sun, 31 May 2026 00:01:14 +0000 Subject: [PATCH] absorb cc-ci-orchestrator NixOS config into nix/ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Merging recipe-maintainers/cc-ci-orchestrator (the VM NixOS config repo) into this repo as nix/ — the next step toward consolidating the two orchestrator repos into a single cc-ci-orchestrator. The source repo will be renamed to archived-cc-ci-orchestrator on Gitea. This repo will be renamed cc-ci-orchestrator. Co-Authored-By: Claude Sonnet 4.6 --- nix/README.md | 30 ++++++++++ nix/configuration.nix | 124 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 154 insertions(+) create mode 100644 nix/README.md create mode 100644 nix/configuration.nix diff --git a/nix/README.md b/nix/README.md new file mode 100644 index 0000000..aeb89de --- /dev/null +++ b/nix/README.md @@ -0,0 +1,30 @@ +# cc-ci-orchestrator + +NixOS config for the **`cc-ci-orchestrator`** Incus VM (b1, project `terraform-ci`, tailnet +`100.116.55.106`) — the reboot-resilient host for the cc-ci Builder/Adversary loops + watchdog + +orchestrator session, moved off the unstable 905 MiB Pi. + +See `cc-ci-plan/plan-orchestrator-migration.md` for the full migration. + +## Files +- `configuration.nix` — the VM's NixOS config (channel-based, `nixos-24.11`). Deployed to + `/etc/nixos/configuration.nix` on the VM. Provides: nix-ld (so the standalone Claude Code Bun binary + runs), tmux/git/python/jq + tools, a 4 GB swapfile, direct ssh to cc-ci (the VM is a tailnet peer — + no SOCKS proxy needed, unlike the Pi), an idempotent `claude-install` oneshot, and the + `cc-ci-loops` supervisor service (defined, **enabled in Phase D** once the workspace is staged). + +## Deploy (until this is wired to a flake/auto-pull) +``` +# copy configuration.nix to the VM, then: +ssh cc-ci-orchestrator 'nixos-rebuild switch' # or run detached: see below +``` +Over the (currently flaky) Pi→VM link, run the rebuild **detached** on the VM so an ssh/proxy drop +doesn't abort it, e.g. `systemd-run --unit=orch-rebuild --collect nixos-rebuild switch` then poll +`journalctl -u orch-rebuild`. + +## Status +- Phase A: VM created (2 GB / 2 vCPU / 30 GB), on tailnet, ssh-able. ✅ +- Phase B: this config (DRAFT) — nix-ld/claude validation pending on the VM. +- Operator step pending (Phase C): `claude auth login` on the VM (device-code; can't be scripted). +- Secrets to stage (Phase C, out-of-band): `/srv/cc-ci/.testenv`, `~/.ssh/cc-ci-root-ed25519`, + Incus mTLS certs, the sops master age key. diff --git a/nix/configuration.nix b/nix/configuration.nix new file mode 100644 index 0000000..8e3b84a --- /dev/null +++ b/nix/configuration.nix @@ -0,0 +1,124 @@ +# cc-ci-orchestrator VM — NixOS config (channel-based: nixos-24.11; deployed to /etc/nixos/configuration.nix) +# +# Purpose: a reboot-resilient host for the cc-ci Builder/Adversary loops + watchdog + the orchestrator +# session, moved off the unstable 905 MiB Pi. See plan-orchestrator-migration.md. +# +# STATUS: DRAFT (Phase B). The nix-ld + claude-install bits need on-VM validation (the standalone +# Claude Code is a Bun ELF binary; NixOS needs nix-ld to run a foreign dynamic binary). The +# cc-ci-loops supervisor service is defined but NOT enabled until the workspace is staged (Phase C/D). +{ config, pkgs, lib, modulesPath, ... }: +{ + imports = [ + "${modulesPath}/virtualisation/incus-virtual-machine.nix" + ]; + + # --- base (mirrors the incus-base-vm) --- + virtualisation.incus.agent.enable = true; # for `incus exec` + services.cloud-init = { enable = true; network.enable = true; }; + services.openssh = { enable = true; settings.PermitRootLogin = "yes"; }; + networking.useDHCP = true; + networking.nameservers = [ "1.1.1.1" "8.8.8.8" ]; + networking.firewall = { enable = true; trustedInterfaces = [ "tailscale0" ]; allowedTCPPorts = [ 22 ]; }; + nix.settings.experimental-features = [ "nix-command" "flakes" ]; + system.stateVersion = "24.11"; + + # --- tailscale (auto-auth from /etc/ts-auth-key, hostname from /etc/ts-hostname; written by cloud-init) --- + services.tailscale = { + enable = true; + authKeyFile = "/etc/ts-auth-key"; + extraUpFlags = let h = lib.strings.removeSuffix "\n" (builtins.readFile /etc/ts-hostname); + in [ "--hostname=${h}" "--ssh" ]; # --ssh: allow tailscale-ssh as a fallback path + }; + + # --- swap: the Pi OOM lesson. 2 GB RAM is tight for 3 concurrent claude sessions; 4 GB disk swap + # as a real overflow tier (zram is in-RAM and doesn't add capacity). --- + swapDevices = [ { device = "/swapfile"; size = 4096; } ]; + + # --- nix-ld: lets the standalone Claude Code (foreign dynamic ELF / Bun) run on NixOS --- + programs.nix-ld.enable = true; + programs.nix-ld.libraries = with pkgs; [ + stdenv.cc.cc.lib # libstdc++ / libgcc_s + zlib + openssl + curl + glibc + ]; + + # --- packages the loops + launch.sh + orchestrator need --- + environment.systemPackages = with pkgs; [ + git tmux python3 jq curl cacert + gnused gawk coreutils gnugrep findutils util-linux + nettools openssh # nc, ssh + docker-client # `docker` CLI is not needed (deploys run on cc-ci), but handy for probes + ]; + + # --- loops user: non-root account for running claude (--dangerously-skip-permissions blocked for root) --- + users.users.loops = { + isNormalUser = true; + home = "/home/loops"; + shell = pkgs.bash; + extraGroups = [ "wheel" ]; # sudo access + }; + security.sudo.wheelNeedsPassword = false; # passwordless sudo for wheel + # Allow loops user to use tmux/claude without a password prompt + security.sudo.extraRules = [{ + users = [ "loops" ]; + commands = [{ command = "ALL"; options = [ "NOPASSWD" ]; }]; + }]; + + # --- root PATH: ensure ~/.local/bin (where the standalone claude binary lives) is on root's PATH --- + environment.variables.PATH = lib.mkForce "/root/.local/bin:/run/current-system/sw/bin:/run/wrappers/bin:/usr/bin:/bin"; + + # --- root ssh config: reach cc-ci DIRECTLY over the VM's own tailscale (this VM is a tailnet peer, + # so NO SOCKS proxy is needed — unlike the Pi). Key staged at /root/.ssh/cc-ci-root-ed25519. --- + system.activationScripts.ccciSshConfig = '' + mkdir -p /root/.ssh && chmod 700 /root/.ssh + cat > /root/.ssh/config <<'SSHCFG' + Host cc-ci cc-nix-test 100.90.116.4 + HostName 100.90.116.4 + User root + IdentityFile /root/.ssh/cc-ci-root-ed25519 + IdentitiesOnly yes + StrictHostKeyChecking accept-new + ServerAliveInterval 30 + SSHCFG + chmod 600 /root/.ssh/config + ''; + + # --- claude-install: idempotent oneshot — fetch the standalone Claude Code CLI if missing. + # Runs via nix-ld. (Auth is a one-time operator step: `claude auth login` — see migration plan.) --- + systemd.services.claude-install = { + description = "Install the standalone Claude Code CLI if missing (idempotent)"; + wantedBy = [ "multi-user.target" ]; + after = [ "network-online.target" "nix-ld.service" ]; + wants = [ "network-online.target" ]; + serviceConfig = { Type = "oneshot"; RemainAfterExit = true; }; + path = [ pkgs.curl pkgs.bash pkgs.coreutils pkgs.gnutar pkgs.gzip pkgs.unzip ]; + script = '' + if [ ! -x /root/.local/bin/claude ]; then + echo "installing standalone Claude Code CLI..." + curl -fsSL https://claude.ai/install.sh | bash || echo "claude install failed (retry next activation)" + fi + ''; + }; + + # --- cc-ci-loops supervisor (DEFINED, NOT YET ENABLED). Enabled in Phase D after the workspace + # (/srv/cc-ci: launch.sh, plan, prompts, the loop clones, secrets) is staged. This is the + # reboot-resilience fix: it runs launch.sh start on every boot. Mirrors the Pi's cc-ci-loops.service. --- + systemd.services.cc-ci-loops = { + description = "cc-ci Builder/Adversary loops + watchdog (launch.sh start, RESUME_PHASE)"; + wantedBy = [ "multi-user.target" ]; # enabled: workspace staged (Phase C/D 2026-05-30) + after = [ "network-online.target" "tailscaled.service" "claude-install.service" ]; + wants = [ "network-online.target" ]; + serviceConfig = { + Type = "oneshot"; RemainAfterExit = true; User = "root"; + WorkingDirectory = "/srv/cc-ci"; + }; + environment = { RESUME_PHASE = "1"; HOME = "/root"; }; + path = [ pkgs.bash pkgs.tmux pkgs.git pkgs.python3 pkgs.openssh pkgs.nettools ]; + script = '' + [ -x /srv/cc-ci/cc-ci-plan/launch.sh ] && /srv/cc-ci/cc-ci-plan/launch.sh start || \ + echo "workspace not staged yet (/srv/cc-ci/cc-ci-plan/launch.sh missing) — skipping" + ''; + }; +}