first pass at trimming monitoring stack

This commit is contained in:
decentral1se 2022-03-17 11:58:48 +01:00
parent 4b2b931eb8
commit efa3216198
No known key found for this signature in database
GPG Key ID: 03789458B3D0C410
11 changed files with 76 additions and 137 deletions

View File

@ -1,29 +0,0 @@
TYPE=monitoring
GRAFANA_DOMAIN=grafana.example.com
PROMETHEUS_DOMAIN=prometheus.example.com
LETS_ENCRYPT_ENV=production
# Edit this in order to allow collection of traefik metrics
#TRAEFIK_METRICS_ENABLED=1
#TRAEFIK_SERVICE_NAME=traefik_app
# grafana SMTP configuration (optional)
#GF_SMTP_HOST=changeme
#GF_SMTP_ENABLED=1
#GF_SMTP_FROM_ADDRESS=grafana@example.com
#GF_SMTP_SKIP_VERIFY=1
# Additional grafana settings (unlikely to require editing)
GF_SECURITY_ALLOW_EMBEDDING=1
GF_INSTALL_PLUGINS=grafana-piechart-panel
GF_SERVER_ROOT_URL=https://${GRAFANA_DOMAIN}
# Loki stores logs in object storage, fill these up with your
# minio configuration (or any s3-compatible object store)
#LOKI_AWS_ENDPOINT=https://minio/
#LOKI_AWS_REGION=eu-west-1
#LOKI_ACCESS_KEY_ID=
#LOKI_SECRET_ACCESS_KEY=
#LOKI_BUCKET_NAMES=loki

View File

@ -1,29 +0,0 @@
# Monitoring
A server and application monitoring stack based on Prometheus, Loki and Grafana.
<!-- metadata -->
* **Category**: Utilities
* **Status**: 2, beta
* **Images**: [`prom/prometheus`](https://hub.docker.com/r/prom/prometheus) [`grafana/grafana`](https://hub.docker.com/r/grafana/grafana) [`grafana/loki`](https://hub.docker.com/r/grafana/loki), upstream
* **Healthcheck**: 3
* **Backups**: No
* **Email**: 3
* **Tests**: No
* **SSO**: 1
<!-- endmetadata -->
## Basic usage
1. Set up Docker Swarm and [`abra`]
2. Deploy [`coop-cloud/traefik`]
3. Deploy [`coop-cloud/minio`]
4. `abra app new monitoring --secrets` (optionally with `--pass` if you'd like
to save secrets in `pass`)
5. `abra app YOURAPPDOMAIN config` - be sure to change `GRAFANA_DOMAIN` and `PROMETHEUS_DOMAIN` to something that resolves to
your Docker swarm box
6. `abra app YOURAPPDOMAIN deploy`
7. Open the configured domain in your browser to finish set-up
[`abra`]: https://git.autonomic.zone/autonomic-cooperative/abra
[`coop-cloud/traefik`]: https://git.autonomic.zone/coop-cloud/traefik

View File

@ -1,9 +0,0 @@
export PROMETHEUS_YML_VERSION=v2
export PROMTAIL_YML_VERSION=v1
export LOKI_YML_VERSION=v1
export NODE_EXPORTER_ENTRYPOINT_VERSION=v1
export GRAFANA_DATASOURCES_YML_VERSION=v1
export GRAFANA_DASHBOARDS_YML_VERSION=v1
export GRAFANA_SWARM_DASHBOARD_JSON_VERSION=v1
export GRAFANA_STACKS_DASHBOARD_JSON_VERSION=v1
export GRAFANA_TRAEFIK_DASHBOARD_JSON_VERSION=v1

View File

@ -3,7 +3,7 @@ version: "3.8"
services:
app:
image: grafana/grafana
image: grafana/grafana:8.4.4
volumes:
- grafana-data:/var/lib/grafana:rw
configs:
@ -19,6 +19,7 @@ services:
target: /var/lib/grafana/dashboards/traefik.json
networks:
- proxy
- internal
environment:
- GF_SMTP_HOST
- GF_SMTP_ENABLED
@ -42,38 +43,16 @@ services:
retries: 3
start_period: 10s
loki:
image: grafana/loki:2.0.0
command: -config.file=/etc/loki/local-config.yaml
networks:
- loki
- api
deploy:
endpoint_mode: dnsrr
ports:
- target: 3100
published: 3100
protocol: tcp
mode: host
configs:
- source: loki_yml
target: /etc/loki/local-config.yaml
volumes:
- loki-data:/loki
prometheus:
image: prom/prometheus:latest
image: prom/prometheus:v2.34.0
volumes:
- prometheus-data:/prometheus:rw
configs:
- source: prometheus_yml
target: /etc/prometheus/prometheus.yml
networks:
api:
aliases:
- prometheus_api
exporters: ~
proxy: ~
- proxy
- internal
deploy:
restart_policy:
condition: on-failure
@ -91,14 +70,31 @@ services:
retries: 3
start_period: 10s
loki:
image: grafana/loki:2.0.0
command: -config.file=/etc/loki/local-config.yaml
networks:
- internal
deploy:
endpoint_mode: dnsrr
ports:
- target: 3100
published: 3100
protocol: tcp
mode: host
configs:
- source: loki_yml
target: /etc/loki/local-config.yaml
volumes:
- loki-data:/loki
secrets:
- loki_aws_secret_access_key
configs:
prometheus_yml:
template_driver: golang
name: ${STACK_NAME}_prometheus_yml_${PROMETHEUS_YML_VERSION}
file: prometheus.yml.tmpl
promtail_yml:
name: ${STACK_NAME}_promtail_yml_${PROMTAIL_YML_VERSION}
file: promtail.yml
loki_yml:
template_driver: golang
name: ${STACK_NAME}_loki_yml_${LOKI_YML_VERSION}
@ -127,3 +123,9 @@ volumes:
networks:
proxy:
external: true
internal:
secrets:
loki_aws_secret_access_key:
external: true
name: ${STACK_NAME}_loki_aws_secret_access_key_${SECRET_LOKI_AWS_ACCESS_KEY_VERSION}

41
monitoring/env Normal file
View File

@ -0,0 +1,41 @@
TYPE=monitoring
GRAFANA_DOMAIN=g.monitor.autonomic.zone
PROMETHEUS_DOMAIN=p.monitor.autonomic.zone
LETS_ENCRYPT_ENV=production
# Edit this in order to allow collection of traefik metrics
#TRAEFIK_METRICS_ENABLED=1
#TRAEFIK_SERVICE_NAME=traefik_app
# grafana SMTP configuration (optional)
#GF_SMTP_HOST=changeme
#GF_SMTP_ENABLED=1
#GF_SMTP_FROM_ADDRESS=grafana@example.com
#GF_SMTP_SKIP_VERIFY=1
# Additional grafana settings (unlikely to require editing)
GF_SECURITY_ALLOW_EMBEDDING=1
GF_INSTALL_PLUGINS=grafana-piechart-panel
GF_SERVER_ROOT_URL=https://${GRAFANA_DOMAIN}
# Loki stores logs in object storage, fill these up with your
# minio configuration (or any s3-compatible object store)
LOKI_AWS_ENDPOINT=https://minio.autonomic.zone
LOKI_AWS_REGION=eu-west-1
LOKI_ACCESS_KEY_ID=bush-debrief-approval-robust-scraggly-molecule
LOKI_BUCKET_NAMES=loki
# NOTE(d1): abra.sh env vars, while we deploy things manually
PROMETHEUS_YML_VERSION=v2
PROMTAIL_YML_VERSION=v1
LOKI_YML_VERSION=v1
NODE_EXPORTER_ENTRYPOINT_VERSION=v1
GRAFANA_DATASOURCES_YML_VERSION=v1
GRAFANA_DASHBOARDS_YML_VERSION=v1
GRAFANA_SWARM_DASHBOARD_JSON_VERSION=v1
GRAFANA_STACKS_DASHBOARD_JSON_VERSION=v1
GRAFANA_TRAEFIK_DASHBOARD_JSON_VERSION=v1
SECRET_LOKI_AWS_SECRET_ACCESS_KEY_VERSION=v1

View File

@ -1,3 +1,4 @@
---
apiVersion: 1
providers:

View File

@ -1,3 +1,4 @@
---
apiVersion: 1
datasources:

View File

@ -55,7 +55,7 @@ storage_config:
endpoint: {{ env LOKI_AWS_ENDPOINT }}
region: {{ env LOKI_AWS_REGION }}
access_key_id: {{ env LOKI_ACCESS_KEY_ID }}
secret_access_key: {{ env LOKI_SECRET_ACCESS_KEY }}
secret_access_key: {{ secret loki_aws_secret_access_key }}
bucketnames: {{ env_LOKI_BUCKET_NAMES }}
insecure: false
sse_encryption: false

View File

@ -1,9 +0,0 @@
#!/bin/sh -e
NODE_NAME=$(cat /etc/nodename)
mkdir -p /etc/node-exporter
echo "node_meta{node_id=\"$NODE_ID\", container_label_com_docker_swarm_node_id=\"$NODE_ID\", node_name=\"$NODE_NAME\"} 1" > /etc/node-exporter/node-meta.prom
set -- /bin/node_exporter "$@"
exec "$@"

View File

@ -25,34 +25,3 @@ scrape_configs:
static_configs:
- targets:
- localhost:9090
# http://node_exporter:9100/metrics
- job_name: node-exporter
scrape_interval: 10s
metrics_path: "/metrics"
dns_sd_configs:
- names:
- 'tasks.node_exporter'
type: 'A'
port: 9100
- job_name: 'cadvisor'
scrape_interval: 30s
metrics_path: '/metrics'
dns_sd_configs:
- names:
- 'tasks.cadvisor'
type: 'A'
port: 8080
{{ if eq (env "TRAEFIK_METRICS_ENABLED") "1" }}
- job_name: 'traefik'
scrape_interval: 30s
metrics_path: '/metrics'
dns_sd_configs:
- names:
- 'tasks.{{ (env "TRAEFIK_SERVICE_NAME") }}'
type: 'A'
port: 8082
{{ end }}

View File

@ -1,3 +1,4 @@
---
server:
http_listen_port: 9080
grpc_listen_port: 0