Compare commits

...

2 Commits

Author SHA1 Message Date
b08ebea876 nix: add cc-ci-hetzner host (cpx32, nixos-infect hardware) + root SSH keys
Created by assistant + patched with root authorized keys so nixos-rebuild
does not lock out SSH access.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-31 02:59:43 +00:00
4c7150d502 terraform: provision cc-ci on Hetzner Cloud via nixos-infect
Adds terraform/ (hcloud provider, cpx32/nbg1/debian-12) and a new
nix/hosts/cc-ci-hetzner/ flake host to provision the cc-ci server on
Hetzner Cloud as an alternative to the Incus cc-nix-test VM.

Stage 1 (Terraform): creates a cpx32 server (4 vCPU / 8 GB / x86 AMD,
Nuremberg), runs nixos-infect (pinned rev 40f62a6, 2026-03-22) to convert
Debian 12 → NixOS 24.11, and reboots into bare NixOS.

Stage 2 (manual, per terraform/README.md): clone cc-ci --recursive,
provision the bootstrap age key, then `nixos-rebuild switch --flake
.#cc-ci-hetzner`.

Verified (throwaway run 2026-05-31, server 134464512, 168.119.126.100):
- terraform apply: cpx32 in nbg1 created in 17 s
- nixos-infect: NixOS 24.11.719113.50ab793786d9 (same nixpkgs pin as flake)
- nixos-rebuild build --flake .#cc-ci-hetzner: exit 0 on server
  (131 derivations; all cc-ci modules: tailscale, drone, drone-runner,
  bridge, dashboard, harness, swarm, abra, proxy, secrets)
- terraform plan: no changes (idempotent)
- terraform destroy: server + SSH key removed

Age key step (plan §4 Stage 2): operator-pending. Full switch/convergence
requires bootstrap age key at /var/lib/sops-nix/key.txt. Flake builds
without it; activation needs it.

No secrets committed: HCLOUD_TOKEN via env, tfstate gitignored,
networking.nix contains throwaway IP (update per README for production).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-31 01:11:56 +00:00
12 changed files with 430 additions and 0 deletions

View File

@ -39,6 +39,17 @@
];
};
# Hetzner Cloud host (cpx32, nbg1). Provisions via `terraform/` + nixos-infect.
# Used in parallel with cc-ci (Incus) during transition; becomes canonical after cutover.
# See terraform/README.md for the full apply + Stage 2 (nixos-rebuild switch) workflow.
nixosConfigurations.cc-ci-hetzner = nixpkgs.lib.nixosSystem {
inherit system;
modules = [
sops-nix.nixosModules.sops
./nix/hosts/cc-ci-hetzner/configuration.nix
];
};
devShells.${system} = {
# Devshell for working on the harness/bridge locally (tools + lint toolchain).
default = pkgs.mkShell {

View File

@ -0,0 +1,75 @@
# cc-ci on Hetzner Cloud — NixOS configuration.
# Extends the shared cc-ci modules (same services as the Incus host) with
# Hetzner-specific hardware + networking. Run in parallel with the Incus cc-ci
# host during transition; make this the canonical cc-ci after cutover (plan §7).
#
# To apply after `terraform apply` + nixos-infect:
# git clone --recursive https://git.autonomic.zone/recipe-maintainers/cc-ci.git /etc/cc-ci
# install -m600 <age-private-key> /var/lib/sops-nix/key.txt
# nixos-rebuild switch --flake /etc/cc-ci#cc-ci-hetzner
{ pkgs, lib, ... }:
{
imports = [
./hardware.nix
./networking.nix
../../modules/packages.nix
../../modules/secrets.nix
../../modules/swarm.nix
../../modules/docker-prune.nix
../../modules/abra.nix
../../modules/proxy.nix
../../modules/drone.nix
../../modules/drone-runner.nix
../../modules/bridge.nix
../../modules/dashboard.nix
../../modules/backupbot.nix
../../modules/harness.nix
../../modules/warm-keycloak.nix
../../modules/nightly-sweep.nix
];
# Timezone (same as Incus host — see configuration.nix there for rationale).
time.timeZone = "UTC";
environment.etc."timezone".text = "UTC\n";
# Tailscale — keeps the orchestrator→cc-ci access path unchanged (direct peer).
# On the Hetzner host the auth key is also seeded via /etc/ts-auth-key.
services.tailscale = {
enable = true;
authKeyFile = "/etc/ts-auth-key";
extraUpFlags = [ "--hostname=cc-ci" ];
};
# SSH — allow root login over tailscale (same as Incus host).
services.openssh = {
enable = true;
settings.PermitRootLogin = "yes";
};
# Root SSH authorized keys — preserved across nixos-rebuild switches.
users.users.root.openssh.authorizedKeys.keys = [
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIOk8NaeBdPbS2gfUvbny8h0AkZlVjGYHzx4QPXSJ38gd claude@claude-vm"
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIJVlfoLBPseQ9fA9534KmRg2KWcksKZGzAJIpHJ2JpsI mfowler.email@protonmail.com"
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIAcyTGb/wVgdhg5oBCZZvBaR1RuUQRY/3WHnOQpNDCsp claude-cc-ci-sandbox@20260526"
];
# Firewall — Hetzner has a public IP, so open 80+443 for Traefik.
# Tailscale interface is trusted (no port restrictions for orchestrator access).
# Plan §6: v1 keeps the sops wildcard cert; evaluate ACME-on-public-IP as follow-up.
networking.firewall = {
enable = true;
trustedInterfaces = [ "tailscale0" ];
allowedTCPPorts = [ 22 80 443 ];
};
environment.systemPackages = with pkgs; [
curl
git
jq
openssh
];
nix.settings.experimental-features = [ "nix-command" "flakes" ];
system.stateVersion = "24.11";
}

View File

@ -0,0 +1,35 @@
# Hardware configuration for cc-ci on Hetzner Cloud (cpx32: AMD 4 vCPU / 8 GB / x86_64).
# Generated by nixos-infect from a Debian 12 base image, then committed here.
#
# nixos-infect uses GRUB + EFI on Hetzner (not systemd-boot), with a qemu-guest profile
# because Hetzner Cloud uses KVM virtualisation.
#
# IMPORTANT: networking.nix (below) contains the server's static public IP.
# When provisioning a new server via `terraform apply`, copy the fresh networking.nix
# from /etc/nixos/networking.nix on the new host and commit it here before rebuilding.
{ modulesPath, ... }:
{
imports = [ (modulesPath + "/profiles/qemu-guest.nix") ];
boot.loader = {
efi.efiSysMountPoint = "/boot/efi";
grub = {
efiSupport = true;
efiInstallAsRemovable = true;
device = "nodev";
};
};
fileSystems."/boot/efi" = {
device = "/dev/disk/by-uuid/D978-69EE";
fsType = "vfat";
};
boot.initrd.availableKernelModules = [ "ata_piix" "uhci_hcd" "xen_blkfront" "vmw_pvscsi" ];
boot.initrd.kernelModules = [ "nvme" ];
fileSystems."/" = {
device = "/dev/sda1";
fsType = "ext4";
};
}

View File

@ -0,0 +1,40 @@
# Hetzner static networking — generated by nixos-infect at provision time.
#
# This file is server-specific: the IP, gateway, and MAC address are tied to a
# particular Hetzner instance. When provisioning a new server:
# 1. After `terraform apply` + nixos-infect completes, run:
# ssh root@<new-ip> 'cat /etc/nixos/networking.nix'
# 2. Replace this file's contents with the output and commit.
# 3. Then: `nixos-rebuild switch --flake .#cc-ci-hetzner --target-host root@<new-ip>`
#
# Current instance: 91.98.47.73 (fsn1, Hetzner server 134485294, provisioned 2026-05-31).
{ lib, ... }: {
networking = {
nameservers = [
"185.12.64.1"
"185.12.64.2"
];
defaultGateway = "172.31.1.1";
defaultGateway6 = {
address = "";
interface = "eth0";
};
dhcpcd.enable = false;
usePredictableInterfaceNames = lib.mkForce false;
interfaces = {
eth0 = {
ipv4.addresses = [
{ address = "91.98.47.73"; prefixLength = 32; }
];
ipv6.addresses = [
{ address = "fe80::9000:8ff:fe04:152e"; prefixLength = 64; }
];
ipv4.routes = [{ address = "172.31.1.1"; prefixLength = 32; }];
ipv6.routes = [{ address = ""; prefixLength = 128; }];
};
};
};
services.udev.extraRules = ''
ATTR{address}=="92:00:08:04:15:2e", NAME="eth0"
'';
}

19
terraform/.gitignore vendored Normal file
View File

@ -0,0 +1,19 @@
# Terraform state — may contain secrets; NEVER commit
*.tfstate
*.tfstate.*
*.tfstate.backup
# Variable files with secret values — NEVER commit
*.auto.tfvars
*.auto.tfvars.json
terraform.tfvars
# Terraform working directory (downloaded providers, modules)
.terraform/
# Crash logs
crash.log
crash.*.log
# NOTE: .terraform.lock.hcl (provider lock file) IS committed — it pins provider SHAs
# for reproducibility, analogous to flake.lock.

23
terraform/.terraform.lock.hcl generated Normal file
View File

@ -0,0 +1,23 @@
# This file is maintained automatically by "tofu init".
# Manual edits may be lost in future updates.
provider "registry.opentofu.org/hetznercloud/hcloud" {
version = "1.64.0"
constraints = "1.64.0"
hashes = [
"h1:FUkTfFrWlmv0JhsbjQvTk3zY7A2Q0LuoSs0PKEzaLpk=",
"zh:5bf7f8f429b1a8f485988d199f46295676a6cdf7d84ad11f1f4613faecfa89d5",
"zh:63b3d182474dd5afd0d5ab3f5f66228b752504436bcb2f4721bd6f1233d0f2ae",
"zh:6867da2d89d297b6760d80dde373e74df511bea72f7daccf6a944a9de4b4d4ed",
"zh:766fdcea1b03038a92414eafaa430b9ac0c57b36ce4c1573e6e291431659d528",
"zh:7f3186dfcae4028eac4f2c9c2c382b49c1fad0b63d0471b50748ee6817fbd8d2",
"zh:bb8a33b6ff9a4d3bce87628c49b08a4780e2c034762f40112058d96f5a4e52bd",
"zh:cc93751c7c90a37f180cf3e5439ed34f3154e60de5920a13d153d93954938239",
"zh:d6e2abf05a0eb8fe0544eb099960a4962db61532e7757016ccacbf0b83bcd1ae",
"zh:da9e3adedd8d33623aac4929fa8b1210f98d2931d5737c201da0dda992dd25ab",
"zh:dffc931aec4d7b0733690e115b1aabdf5c157b7d347a09a9d149ee6b7e9d8ce3",
"zh:e565dea4f28182099a271f794e3b781f069ea54976f5f05dbb79a1c2b6627459",
"zh:e79411287af28ccf6187bd418b7ea2ee217e642026392ddc8027bf3e3287fb80",
"zh:f5102d7141a04c193dffbb5cbc3f7e3588c41b87e11877d2e20d57ea5ef64123",
]
}

100
terraform/README.md Normal file
View File

@ -0,0 +1,100 @@
# cc-ci Hetzner Cloud Terraform
Provisions the cc-ci NixOS server on Hetzner Cloud (cpx32, 4 vCPU / 8 GB, x86 AMD, nbg1).
Stage 1 (Terraform): creates the server, runs nixos-infect to convert Debian 12 → NixOS.
Stage 2 (manual): clone the flake + apply the cc-ci config.
## Prerequisites (Class-A1 inputs — provide at apply time, NEVER commit)
| Input | How to provide |
|---|---|
| `HCLOUD_TOKEN` | `export HCLOUD_TOKEN=<token>` in shell before `tofu apply` |
| SSH key pair | Generate once: `ssh-keygen -t ed25519 -f ~/.ssh/cc-ci-hetzner`; pass pubkey via `TF_VAR_ssh_public_key="$(cat ~/.ssh/cc-ci-hetzner.pub)"` |
| Bootstrap age key | Provision to `/var/lib/sops-nix/key.txt` on the server (Stage 2; see `docs/install.md`) |
## Stage 1 — Provision server + nixos-infect
```bash
cd terraform/
# Provide secrets via environment
export HCLOUD_TOKEN=<your-token>
export TF_VAR_ssh_public_key="$(cat ~/.ssh/cc-ci-hetzner.pub)"
# Download providers (uses .terraform.lock.hcl — pinned, reproducible)
tofu init # or: terraform init
# Preview
tofu plan
# Apply — creates cpx31 server in nbg1, runs nixos-infect on first boot
tofu apply
# Note the output IP:
# server_ipv4 = "x.x.x.x"
# ssh_connect = "ssh root@x.x.x.x"
```
nixos-infect runs on first boot and **reboots the server** into NixOS (~5 min total).
Wait for the reboot to complete, then verify:
```bash
# Check NixOS is up:
ssh root@<ip> 'nixos-version'
# Inspect infect log if needed:
ssh root@<ip> 'cat /var/log/nixos-infect.log'
```
After the reboot the server runs bare NixOS (infect-generated config). Proceed to Stage 2.
## Stage 2 — Apply the cc-ci flake config
Follows the D8 install flow documented in `docs/install.md` exactly:
```bash
# On the Hetzner server (ssh root@<ip>):
# 1. Clone the flake (--recursive brings cc-ci-secrets submodule)
git clone --recursive https://git.autonomic.zone/recipe-maintainers/cc-ci.git /etc/cc-ci
cd /etc/cc-ci
# 2. Provision the bootstrap age key (the one irreducible out-of-band secret)
mkdir -p /var/lib/sops-nix
install -m 0600 /dev/stdin /var/lib/sops-nix/key.txt <<'EOF'
<paste bootstrap age private key here — see docs/install.md>
EOF
# 3. Apply the cc-ci Hetzner host config
nixos-rebuild switch --flake .#cc-ci-hetzner
# 4. Verify (all units green, reconcile oneshots converged)
systemctl --failed
```
## Variables
| Variable | Default | Description |
|---|---|---|
| `server_type` | `cpx31` | x86 only. `cpx31`=AMD 4vCPU/8GB, `cx33`=Intel 4vCPU/8GB. Never `cax*` (ARM). |
| `location` | `nbg1` | Hetzner datacenter. |
| `image` | `debian-12` | Base image; nixos-infect converts it to NixOS. debian-12 preferred. |
| `server_name` | `cc-ci` | Hetzner server name. |
| `ssh_public_key` | (required) | Public key registered for root access. |
Override via env: `TF_VAR_location=hel1 tofu apply`.
## Teardown (throwaway verification run)
```bash
tofu destroy # removes server + SSH key; billing stops immediately
```
## Notes
- `.terraform.lock.hcl` is committed (pins provider SHAs — analogous to flake.lock).
- `*.tfstate`, `*.tfvars`, `.terraform/` are gitignored — never commit state or secrets.
- `cpx31` is retired in some Hetzner DCs; `cpx32` (equivalent AMD, 4 vCPU / 8 GB) is the default.
`cx33` (Intel, same spec) is also available. Both are x86_64 — compatible with the `x86_64-linux` flake.
- The Hetzner server has a public IPv4 — future: point `*.ci.commoninternet.net` A record directly
at it and drop the gateway/MagicDNS path (see plan §6 + `DECISIONS.md`).

32
terraform/main.tf Normal file
View File

@ -0,0 +1,32 @@
resource "hcloud_ssh_key" "cc_ci" {
name = "cc-ci-deploy"
public_key = var.ssh_public_key
labels = {
project = "cc-ci"
managed = "terraform"
}
}
resource "hcloud_server" "cc_ci" {
name = var.server_name
server_type = var.server_type
image = var.image
location = var.location
ssh_keys = [hcloud_ssh_key.cc_ci.id]
# Stage 1: cloud-init runs nixos-infect on first boot, converting Ubuntu to NixOS,
# then reboots. See user-data.sh for the pinned infect revision.
user_data = file("${path.module}/user-data.sh")
public_net {
ipv4_enabled = true
ipv6_enabled = false
}
labels = {
project = "cc-ci"
managed = "terraform"
stage = "infect"
}
}

19
terraform/outputs.tf Normal file
View File

@ -0,0 +1,19 @@
output "server_ipv4" {
description = "Public IPv4 address of the cc-ci Hetzner server"
value = hcloud_server.cc_ci.ipv4_address
}
output "server_id" {
description = "Hetzner internal server ID"
value = hcloud_server.cc_ci.id
}
output "ssh_connect" {
description = "SSH command to connect as root"
value = "ssh root@${hcloud_server.cc_ci.ipv4_address}"
}
output "nixos_infect_log" {
description = "Path on the server where nixos-infect logs are written"
value = "ssh root@${hcloud_server.cc_ci.ipv4_address} 'cat /var/log/nixos-infect.log'"
}

25
terraform/user-data.sh Normal file
View File

@ -0,0 +1,25 @@
#!/usr/bin/env bash
# Stage 1 — convert Debian 12 → NixOS via nixos-infect (pinned revision).
#
# nixos-infect generates /etc/nixos/{configuration.nix,hardware-configuration.nix,networking.nix}
# with Hetzner-correct bootloader (GRUB, not systemd-boot) and networking, then reboots into NixOS.
#
# After the reboot:
# - SSH as root is available (key registered with Hetzner survives infect)
# - Run Stage 2 per terraform/README.md: clone cc-ci + cc-ci-secrets, provision the bootstrap
# age key, then `nixos-rebuild switch --flake .#cc-ci-hetzner`
#
# Logs are written to /var/log/nixos-infect.log on the server for post-mortem inspection.
# The server reboots automatically at the end of infect — wait ~5 min before sshing in.
set -euo pipefail
# Pinned nixos-infect revision (2026-03-22: "fixes errors for non efi systems").
# Update deliberately; verify Hetzner still supported before bumping.
INFECT_SHA="40f62a680bb0e8f2f607d79abfaaecd99d59401c"
export NIX_CHANNEL="nixos-24.11"
export PROVIDER="hetzner" # tells nixos-infect to use GRUB + Hetzner networking
export NIXOS_IMPORT="" # no extra imports at infect time; we apply the real flake in Stage 2
curl -fsSL "https://raw.githubusercontent.com/elitak/nixos-infect/${INFECT_SHA}/nixos-infect" \
| bash -x 2>&1 | tee /var/log/nixos-infect.log

37
terraform/variables.tf Normal file
View File

@ -0,0 +1,37 @@
variable "location" {
description = "Hetzner datacenter (nbg1=Nuremberg, fsn1=Falkenstein, hel1=Helsinki, ash=Ashburn, hil=Hillsboro)"
type = string
default = "nbg1"
}
variable "server_type" {
description = <<-EOT
Hetzner server type. Must be x86 — the flake is x86_64-linux; NEVER use cax* (ARM).
cpx32 = AMD 4 vCPU / 8 GB (default; replaces cpx31 which is retired in some DCs).
cx33 = Intel 4 vCPU / 8 GB (alternative).
EOT
type = string
default = "cpx32"
validation {
condition = !startswith(var.server_type, "cax")
error_message = "ARM server types (cax*) are not supported — the cc-ci flake is x86_64-linux only."
}
}
variable "image" {
description = "Base OS image. nixos-infect supports debian-12 and ubuntu-24.04. debian-12 preferred."
type = string
default = "debian-12"
}
variable "ssh_public_key" {
description = "SSH public key content (the full line, e.g. 'ssh-ed25519 AAAA... comment'). Registered with Hetzner for root access post-infect. Pass via TF_VAR_ssh_public_key or terraform.tfvars (gitignored)."
type = string
}
variable "server_name" {
description = "Hetzner server name and initial NixOS hostname"
type = string
default = "cc-ci"
}

14
terraform/versions.tf Normal file
View File

@ -0,0 +1,14 @@
terraform {
required_version = ">= 1.0"
required_providers {
hcloud = {
source = "hetznercloud/hcloud"
version = "1.64.0"
}
}
}
# The hcloud provider reads HCLOUD_TOKEN from the environment automatically.
# Never put the token value in any .tf file or .tfvars — keep it in the shell
# environment (export HCLOUD_TOKEN=...) or pass via TF_VAR_hcloud_token.
provider "hcloud" {}