diff --git a/.env.sample b/.env.sample index 2337c74..e95dbc9 100644 --- a/.env.sample +++ b/.env.sample @@ -1,6 +1,6 @@ TYPE=bluesky-pds -DOMAIN=pds.example.com +DOMAIN=bluesky-pds.example.com LETS_ENCRYPT_ENV=production # Blob upload limit in bytes (default: 100MB) diff --git a/Caddyfile b/Caddyfile new file mode 100644 index 0000000..1a18550 --- /dev/null +++ b/Caddyfile @@ -0,0 +1,12 @@ +{ + on_demand_tls { + ask http://app:3000/tls-check + } +} + +{$DOMAIN}, *.{$DOMAIN} { + tls { + on_demand + } + reverse_proxy app:3000 +} diff --git a/README.md b/README.md index 159881a..da4fd9f 100644 --- a/README.md +++ b/README.md @@ -77,17 +77,42 @@ abra app run YOURAPPDOMAIN app -- \ User handles on a PDS can work in two ways: -1. **Subdomain handles** (e.g. `user.pds.example.com`): Requires a wildcard DNS - record (`*.pds.example.com`) pointing to your server, and wildcard TLS - certificates (which require DNS challenge configuration in Traefik). +1. **Subdomain handles** (e.g. `user.pds.example.com`): The default. Requires a + wildcard DNS record (`*.pds.example.com`) pointing to your server. TLS is + handled automatically by the Caddy sidecar (see below). 2. **Domain handles** (e.g. `user.com`): Users can use their own domain as a handle by adding a DNS TXT record at `_atproto.user.com` with the value `did=did:plc:`. This works without any additional server configuration. -Domain handles are recommended for most deployments as they don't require -wildcard TLS configuration. +## TLS architecture (Caddy sidecar) + +This recipe uses a **Caddy sidecar** for TLS instead of letting Traefik terminate +TLS directly. This is needed because Bluesky subdomain handles require TLS +certificates for each `user.pds.example.com` subdomain, and Traefik cannot issue +on-demand per-subdomain certificates. + +The architecture: + +1. **Traefik** receives TLS connections on port 443 and does **TCP passthrough** + (no TLS termination) for traffic matching `DOMAIN` and `*.DOMAIN`, forwarding + the raw TLS stream to Caddy. +2. **Caddy** terminates TLS using **on-demand certificates** — it automatically + obtains a Let's Encrypt certificate for each subdomain the first time a + connection arrives, using the TLS-ALPN-01 challenge. +3. **Caddy** reverse proxies the decrypted HTTP traffic to the PDS on port 3000. + +This matches how the [upstream PDS](https://github.com/bluesky-social/pds) is +designed to work (it ships with Caddy), adapted for Co-op Cloud's Traefik-based +routing. The PDS exposes a `/tls-check` endpoint that Caddy consults before +issuing a certificate, preventing abuse. + +**Note:** The first request to a new subdomain handle may take 10-30 seconds while +Caddy obtains the TLS certificate from Let's Encrypt. Subsequent requests are instant. + +No changes to the Traefik recipe are needed — the TCP passthrough is configured +entirely via deploy labels on the Caddy service in this recipe's `compose.yml`. ## DNS setup diff --git a/abra.sh b/abra.sh index e6c130b..8a81819 100644 --- a/abra.sh +++ b/abra.sh @@ -1 +1,2 @@ export ENTRYPOINT_VERSION=v1 +export CADDYFILE_VERSION=v1 diff --git a/compose.yml b/compose.yml index 6be5b78..b14d4a2 100644 --- a/compose.yml +++ b/compose.yml @@ -5,7 +5,7 @@ services: app: image: ghcr.io/bluesky-social/pds:0.4 networks: - - proxy + - internal environment: - PDS_HOSTNAME=${DOMAIN} - PDS_DATA_DIRECTORY=/pds @@ -39,15 +39,11 @@ services: deploy: restart_policy: condition: on-failure + max_attempts: 5 labels: - - "traefik.enable=true" - - "traefik.docker.network=proxy" - - "traefik.http.services.${STACK_NAME}.loadbalancer.server.port=3000" - - "traefik.http.routers.${STACK_NAME}.rule=Host(`${DOMAIN}`)" - - "traefik.http.routers.${STACK_NAME}.entrypoints=web-secure" - - "traefik.http.routers.${STACK_NAME}.tls.certresolver=${LETS_ENCRYPT_ENV}" - "coop-cloud.${STACK_NAME}.timeout=${TIMEOUT:-120}" - - "coop-cloud.${STACK_NAME}.version=0.1.0+0.4" + - "coop-cloud.${STACK_NAME}.version=0.1.0+v0.4" + - "backupbot.backup=true" healthcheck: test: ["CMD", "wget", "-q", "--spider", "http://localhost:3000/xrpc/_health"] interval: 30s @@ -55,12 +51,39 @@ services: retries: 5 start_period: 30s + caddy: + image: caddy:2 + networks: + - proxy + - internal + environment: + - DOMAIN=${DOMAIN} + configs: + - source: caddyfile + target: /etc/caddy/Caddyfile + volumes: + - caddy_data:/data + deploy: + restart_policy: + condition: on-failure + max_attempts: 5 + labels: + - "traefik.enable=true" + - "traefik.swarm.network=proxy" + - "traefik.tcp.routers.${STACK_NAME}.rule=HostSNI(`${DOMAIN}`) || HostSNIRegexp(`^.+\\.${DOMAIN}$$`)" + - "traefik.tcp.routers.${STACK_NAME}.ruleSyntax=v3" + - "traefik.tcp.routers.${STACK_NAME}.entrypoints=web-secure" + - "traefik.tcp.routers.${STACK_NAME}.tls.passthrough=true" + - "traefik.tcp.services.${STACK_NAME}.loadbalancer.server.port=443" + networks: proxy: external: true + internal: volumes: pds_data: + caddy_data: secrets: pds_jwt_secret: @@ -78,3 +101,6 @@ configs: name: ${STACK_NAME}_entrypoint_${ENTRYPOINT_VERSION} file: entrypoint.sh.tmpl template_driver: golang + caddyfile: + name: ${STACK_NAME}_caddyfile_${CADDYFILE_VERSION} + file: Caddyfile