Replace the boot-fragile busybox-crond-in-tmux (phase 5 §4) with a systemd service+timer. Service is timer-triggered only (not wantedBy multi-user.target) so it never runs on boot/activation; mirrors the cc-ci-loops env fix (CLAUDE_BIN + /home/loops/.local/bin on PATH). Timer fires Sundays 02:00 UTC, Persistent=true so a missed run (box down) fires once on next boot. Runs launch-upgrader.py start -> cc-ci-upgrader agent -> /upgrade-all DEFAULT (opens recipe PRs, never merges). Activate via nixos-rebuild + retire the old Monday crond after the phase-5 T0-fire verification completes. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
216 lines
9.6 KiB
Nix
216 lines
9.6 KiB
Nix
# cc-ci-orchestrator-hetzner — NixOS config for the Hetzner loops runtime host.
|
|
#
|
|
# Purpose: run the cc-ci Builder/Adversary/Watchdog loops + orchestrator/assistant sessions
|
|
# on a Hetzner cpx11 (2 vCPU / 2 GB dedicated AMD / 40 GB NVMe), replacing the slow b1 Incus VM.
|
|
#
|
|
# Provision with terraform/ then converge with: nixos-rebuild switch --flake .#cc-ci-orchestrator-hetzner
|
|
# See terraform/README.md for the full Stage 2 procedure.
|
|
{ config, pkgs, lib, ... }:
|
|
{
|
|
# hardware.nix is the nixos-infect generated hardware-configuration.nix (see README Stage 2a).
|
|
|
|
services.openssh = {
|
|
enable = true;
|
|
settings.PermitRootLogin = "yes";
|
|
};
|
|
|
|
# Root SSH access — all keys from the current orchestrator VM's /root/.ssh/authorized_keys.
|
|
users.users.root.openssh.authorizedKeys.keys = [
|
|
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIOk8NaeBdPbS2gfUvbny8h0AkZlVjGYHzx4QPXSJ38gd claude@claude-vm"
|
|
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIJVlfoLBPseQ9fA9534KmRg2KWcksKZGzAJIpHJ2JpsI mfowler.email@protonmail.com"
|
|
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIAcyTGb/wVgdhg5oBCZZvBaR1RuUQRY/3WHnOQpNDCsp claude-cc-ci-sandbox@20260526"
|
|
];
|
|
networking.useDHCP = true;
|
|
networking.nameservers = [ "1.1.1.1" "8.8.8.8" ];
|
|
networking.firewall = {
|
|
enable = true;
|
|
trustedInterfaces = [ "tailscale0" ];
|
|
# Port 80 open only on the tailscale interface (trusted) — nginx binds there for oc.commoninternet.net.
|
|
allowedTCPPorts = [ 22 ];
|
|
};
|
|
nix.settings.experimental-features = [ "nix-command" "flakes" ];
|
|
system.stateVersion = "24.11";
|
|
|
|
# Tailscale — auth key at /etc/ts-auth-key (placed manually in Stage 2, not in git).
|
|
services.tailscale = {
|
|
enable = true;
|
|
authKeyFile = "/etc/ts-auth-key";
|
|
extraUpFlags = [ "--hostname=cc-ci-orchestrator" ];
|
|
};
|
|
|
|
# 4 GB disk swap — claude session memory safety net (2 GB RAM is tight for 3+ sessions).
|
|
swapDevices = [ { device = "/swapfile"; size = 4096; } ];
|
|
|
|
# nix-ld — lets the standalone Claude Code CLI (foreign dynamic ELF / Bun) run on NixOS.
|
|
programs.nix-ld.enable = true;
|
|
programs.nix-ld.libraries = with pkgs; [
|
|
stdenv.cc.cc.lib
|
|
zlib
|
|
openssl
|
|
curl
|
|
glibc
|
|
];
|
|
|
|
environment.systemPackages = with pkgs; [
|
|
git tmux python3 jq curl cacert
|
|
gnused gawk coreutils gnugrep findutils util-linux
|
|
nettools openssh
|
|
age sops # key management (same toolchain as cc-ci server)
|
|
];
|
|
|
|
# loops user — claude sessions run as non-root (--dangerously-skip-permissions blocked for root).
|
|
users.users.loops = {
|
|
isNormalUser = true;
|
|
home = "/home/loops";
|
|
shell = pkgs.bash;
|
|
extraGroups = [ "wheel" ];
|
|
};
|
|
security.sudo.wheelNeedsPassword = false;
|
|
security.sudo.extraRules = [{
|
|
users = [ "loops" ];
|
|
commands = [{ command = "ALL"; options = [ "NOPASSWD" ]; }];
|
|
}];
|
|
|
|
# Ensure /home/loops/.local/bin (claude + opencode) is on the loops user PATH.
|
|
# opencode binary is installed there manually (not yet in nixpkgs); re-install if missing:
|
|
# curl -sL https://github.com/anomalyco/opencode/releases/download/v1.15.13/opencode-linux-x64.tar.gz \
|
|
# | tar -xz -C /home/loops/.local/bin opencode && chmod +x /home/loops/.local/bin/opencode
|
|
environment.variables.PATH = lib.mkForce
|
|
"/home/loops/.local/bin:/run/current-system/sw/bin:/run/wrappers/bin:/usr/bin:/bin";
|
|
|
|
# SSH config for the loops user — points to the cc-ci Hetzner server via tailnet.
|
|
# HostName is updated post-cutover to the Hetzner cc-ci tailnet IP.
|
|
system.activationScripts.loopsSshConfig = ''
|
|
mkdir -p /home/loops/.ssh && chown loops:users /home/loops/.ssh && chmod 700 /home/loops/.ssh
|
|
# Only write if not already present (preserves manual customisation).
|
|
if [ ! -f /home/loops/.ssh/config ]; then
|
|
cat > /home/loops/.ssh/config <<'SSHCFG'
|
|
Host cc-ci
|
|
HostName REPLACE_WITH_CC_CI_HETZNER_TAILNET_IP
|
|
User root
|
|
IdentityFile /home/loops/.ssh/cc-ci-root-ed25519
|
|
IdentitiesOnly yes
|
|
StrictHostKeyChecking accept-new
|
|
ServerAliveInterval 30
|
|
SSHCFG
|
|
chmod 600 /home/loops/.ssh/config
|
|
chown loops:users /home/loops/.ssh/config
|
|
fi
|
|
'';
|
|
|
|
# claude-install — fetch the standalone Claude Code CLI for the loops user if missing.
|
|
systemd.services.claude-install = {
|
|
description = "Install Claude Code CLI for loops user (idempotent)";
|
|
wantedBy = [ "multi-user.target" ];
|
|
after = [ "network-online.target" ];
|
|
wants = [ "network-online.target" ];
|
|
serviceConfig = {
|
|
Type = "oneshot"; RemainAfterExit = true;
|
|
User = "loops"; Group = "users";
|
|
};
|
|
environment = { HOME = "/home/loops"; };
|
|
path = [ pkgs.curl pkgs.bash pkgs.coreutils pkgs.gnutar pkgs.gzip ];
|
|
script = ''
|
|
if [ ! -x "$HOME/.local/bin/claude" ]; then
|
|
echo "installing Claude Code CLI for loops user..."
|
|
curl -fsSL https://claude.ai/install.sh | bash || echo "install failed — retry on next activation"
|
|
fi
|
|
'';
|
|
};
|
|
|
|
# opencode web server — one shared instance; all agent sessions attach to it.
|
|
# Serves the web UI at http://oc.commoninternet.net (via nginx below, tailscale-only).
|
|
# TINFOIL_API_KEY and other creds are read from /srv/cc-ci/.testenv at startup.
|
|
systemd.services.opencode-web = {
|
|
description = "opencode web server for cc-ci agents (tinfoil/deepseek backend)";
|
|
wantedBy = [ "multi-user.target" ];
|
|
after = [ "network-online.target" "tailscaled.service" ];
|
|
wants = [ "network-online.target" ];
|
|
serviceConfig = {
|
|
Type = "simple";
|
|
User = "loops"; Group = "users";
|
|
WorkingDirectory = "/srv/cc-ci-orch/cc-ci";
|
|
EnvironmentFile = "/srv/cc-ci/.testenv";
|
|
ExecStartPre = "${pkgs.coreutils}/bin/rm -rf /tmp/opencode";
|
|
ExecStart = "/home/loops/.local/bin/opencode serve --hostname 127.0.0.1 --port 4096";
|
|
Restart = "on-failure";
|
|
RestartSec = "5s";
|
|
};
|
|
environment = { HOME = "/home/loops"; };
|
|
path = [ pkgs.bash pkgs.coreutils pkgs.git pkgs.python3 pkgs.openssh pkgs.tmux pkgs.nettools ];
|
|
};
|
|
|
|
# nginx — reverse-proxy oc.commoninternet.net → opencode web server.
|
|
# Bound to the tailscale IP so it is only reachable on the tailnet.
|
|
# DNS: add A record oc.commoninternet.net → 100.84.190.30 (operator step).
|
|
services.nginx = {
|
|
enable = true;
|
|
recommendedProxySettings = true;
|
|
virtualHosts."oc.commoninternet.net" = {
|
|
# Listen on the tailscale interface only — not the public IP.
|
|
listen = [{ addr = "100.84.190.30"; port = 80; ssl = false; }];
|
|
locations."/" = {
|
|
proxyPass = "http://127.0.0.1:4096";
|
|
proxyWebsockets = true;
|
|
};
|
|
};
|
|
};
|
|
|
|
# cc-ci-loops supervisor — workspace staged 2026-05-31, so ENABLED for reboot-resilience.
|
|
systemd.services.cc-ci-loops = {
|
|
description = "cc-ci Builder/Adversary loops + watchdog (launch.sh start)";
|
|
wantedBy = [ "multi-user.target" ]; # enabled after workspace staged (Hetzner cutover)
|
|
after = [ "network-online.target" "tailscaled.service" "claude-install.service" ];
|
|
wants = [ "network-online.target" ];
|
|
serviceConfig = {
|
|
Type = "oneshot"; RemainAfterExit = true;
|
|
User = "loops"; Group = "users";
|
|
WorkingDirectory = "/srv/cc-ci/cc-ci";
|
|
# Append one line to REBOOTS.md per genuine reboot (boot_id-gated; not on manual restart).
|
|
ExecStartPre = "${pkgs.bash}/bin/bash /srv/cc-ci/cc-ci-plan/reboot-log.sh";
|
|
};
|
|
# CLAUDE_BIN points at the standalone CLI installed by claude-install.service; the loops
|
|
# backend defaults to claude (persisted in .loop-backend). Without this, launch.py's preflight
|
|
# `which(claude)` fails because the systemd `path` below has no /home/loops/.local/bin.
|
|
environment = { RESUME_PHASE = "1"; HOME = "/home/loops"; CLAUDE_BIN = "/home/loops/.local/bin/claude"; };
|
|
path = [ pkgs.bash pkgs.tmux pkgs.git pkgs.python3 pkgs.openssh pkgs.nettools ];
|
|
script = ''
|
|
# Put the standalone claude/opencode binaries on PATH. On a cold boot this is the env the
|
|
# tmux server (and thus every agent session) inherits, so bare `claude` resolves everywhere.
|
|
export PATH="/home/loops/.local/bin:$PATH"
|
|
[ -x /srv/cc-ci/cc-ci-plan/launch.sh ] && /srv/cc-ci/cc-ci-plan/launch.sh start || \
|
|
echo "workspace not staged yet — skipping loop start"
|
|
'';
|
|
};
|
|
|
|
# Weekly recipe upgrade — runs /upgrade-all over every enrolled recipe (opens recipe PRs
|
|
# verified by !testme, never merges). Replaces the boot-fragile busybox-crond-in-tmux from
|
|
# phase 5 §4 with a reboot-safe systemd timer. The service is timer-triggered only (NOT
|
|
# wantedBy multi-user.target) so it never runs on boot/activation — only on the schedule.
|
|
systemd.services.cc-ci-upgrade-all = {
|
|
description = "cc-ci weekly /upgrade-all run (recipe upgrade survey + PRs, never merges)";
|
|
after = [ "network-online.target" "tailscaled.service" "claude-install.service" ];
|
|
wants = [ "network-online.target" ];
|
|
serviceConfig = {
|
|
Type = "oneshot"; # launch-upgrader.py spawns the cc-ci-upgrader tmux session and returns
|
|
User = "loops"; Group = "users";
|
|
WorkingDirectory = "/srv/cc-ci";
|
|
};
|
|
environment = { HOME = "/home/loops"; CLAUDE_BIN = "/home/loops/.local/bin/claude"; };
|
|
path = [ pkgs.bash pkgs.tmux pkgs.git pkgs.python3 pkgs.openssh pkgs.nettools ];
|
|
script = ''
|
|
export PATH="/home/loops/.local/bin:$PATH"
|
|
python3 /srv/cc-ci/cc-ci-plan/launch-upgrader.py start >> /srv/cc-ci/.cc-ci-logs/upgrader-cron.log 2>&1
|
|
'';
|
|
};
|
|
|
|
systemd.timers.cc-ci-upgrade-all = {
|
|
description = "Weekly trigger for cc-ci-upgrade-all (Sundays 02:00 UTC)";
|
|
wantedBy = [ "timers.target" ];
|
|
timerConfig = {
|
|
OnCalendar = "Sun *-*-* 02:00:00 UTC";
|
|
Persistent = true; # if the box was down at the scheduled time, run once on next boot
|
|
};
|
|
};
|
|
}
|