From efa3216198f98635b69fc88001dcbb4b24edad41 Mon Sep 17 00:00:00 2001 From: decentral1se Date: Thu, 17 Mar 2022 11:58:48 +0100 Subject: [PATCH] first pass at trimming monitoring stack --- monitoring/.env.sample | 29 ------------- monitoring/README.md | 29 ------------- monitoring/abra.sh | 9 ---- monitoring/compose.yml | 60 +++++++++++++------------- monitoring/env | 41 ++++++++++++++++++ monitoring/grafana-dashboards.yml | 1 + monitoring/grafana-datasources.yml | 1 + monitoring/loki.yml.tmpl | 2 +- monitoring/node-exporter-entrypoint.sh | 9 ---- monitoring/prometheus.yml.tmpl | 31 ------------- monitoring/promtail.yml | 1 + 11 files changed, 76 insertions(+), 137 deletions(-) delete mode 100644 monitoring/.env.sample delete mode 100644 monitoring/README.md delete mode 100644 monitoring/abra.sh create mode 100644 monitoring/env delete mode 100644 monitoring/node-exporter-entrypoint.sh diff --git a/monitoring/.env.sample b/monitoring/.env.sample deleted file mode 100644 index 3822cbc..0000000 --- a/monitoring/.env.sample +++ /dev/null @@ -1,29 +0,0 @@ -TYPE=monitoring - -GRAFANA_DOMAIN=grafana.example.com -PROMETHEUS_DOMAIN=prometheus.example.com - -LETS_ENCRYPT_ENV=production - -# Edit this in order to allow collection of traefik metrics -#TRAEFIK_METRICS_ENABLED=1 -#TRAEFIK_SERVICE_NAME=traefik_app - -# grafana SMTP configuration (optional) -#GF_SMTP_HOST=changeme -#GF_SMTP_ENABLED=1 -#GF_SMTP_FROM_ADDRESS=grafana@example.com -#GF_SMTP_SKIP_VERIFY=1 - -# Additional grafana settings (unlikely to require editing) -GF_SECURITY_ALLOW_EMBEDDING=1 -GF_INSTALL_PLUGINS=grafana-piechart-panel -GF_SERVER_ROOT_URL=https://${GRAFANA_DOMAIN} - -# Loki stores logs in object storage, fill these up with your -# minio configuration (or any s3-compatible object store) -#LOKI_AWS_ENDPOINT=https://minio/ -#LOKI_AWS_REGION=eu-west-1 -#LOKI_ACCESS_KEY_ID= -#LOKI_SECRET_ACCESS_KEY= -#LOKI_BUCKET_NAMES=loki diff --git a/monitoring/README.md b/monitoring/README.md deleted file mode 100644 index 4d74a62..0000000 --- a/monitoring/README.md +++ /dev/null @@ -1,29 +0,0 @@ -# Monitoring - -A server and application monitoring stack based on Prometheus, Loki and Grafana. - - -* **Category**: Utilities -* **Status**: 2, beta -* **Images**: [`prom/prometheus`](https://hub.docker.com/r/prom/prometheus) [`grafana/grafana`](https://hub.docker.com/r/grafana/grafana) [`grafana/loki`](https://hub.docker.com/r/grafana/loki), upstream -* **Healthcheck**: 3 -* **Backups**: No -* **Email**: 3 -* **Tests**: No -* **SSO**: 1 - - -## Basic usage - -1. Set up Docker Swarm and [`abra`] -2. Deploy [`coop-cloud/traefik`] -3. Deploy [`coop-cloud/minio`] -4. `abra app new monitoring --secrets` (optionally with `--pass` if you'd like - to save secrets in `pass`) -5. `abra app YOURAPPDOMAIN config` - be sure to change `GRAFANA_DOMAIN` and `PROMETHEUS_DOMAIN` to something that resolves to - your Docker swarm box -6. `abra app YOURAPPDOMAIN deploy` -7. Open the configured domain in your browser to finish set-up - -[`abra`]: https://git.autonomic.zone/autonomic-cooperative/abra -[`coop-cloud/traefik`]: https://git.autonomic.zone/coop-cloud/traefik diff --git a/monitoring/abra.sh b/monitoring/abra.sh deleted file mode 100644 index 74bf30a..0000000 --- a/monitoring/abra.sh +++ /dev/null @@ -1,9 +0,0 @@ -export PROMETHEUS_YML_VERSION=v2 -export PROMTAIL_YML_VERSION=v1 -export LOKI_YML_VERSION=v1 -export NODE_EXPORTER_ENTRYPOINT_VERSION=v1 -export GRAFANA_DATASOURCES_YML_VERSION=v1 -export GRAFANA_DASHBOARDS_YML_VERSION=v1 -export GRAFANA_SWARM_DASHBOARD_JSON_VERSION=v1 -export GRAFANA_STACKS_DASHBOARD_JSON_VERSION=v1 -export GRAFANA_TRAEFIK_DASHBOARD_JSON_VERSION=v1 diff --git a/monitoring/compose.yml b/monitoring/compose.yml index cb2faa0..a99cbc5 100644 --- a/monitoring/compose.yml +++ b/monitoring/compose.yml @@ -3,7 +3,7 @@ version: "3.8" services: app: - image: grafana/grafana + image: grafana/grafana:8.4.4 volumes: - grafana-data:/var/lib/grafana:rw configs: @@ -19,6 +19,7 @@ services: target: /var/lib/grafana/dashboards/traefik.json networks: - proxy + - internal environment: - GF_SMTP_HOST - GF_SMTP_ENABLED @@ -42,38 +43,16 @@ services: retries: 3 start_period: 10s - loki: - image: grafana/loki:2.0.0 - command: -config.file=/etc/loki/local-config.yaml - networks: - - loki - - api - deploy: - endpoint_mode: dnsrr - ports: - - target: 3100 - published: 3100 - protocol: tcp - mode: host - configs: - - source: loki_yml - target: /etc/loki/local-config.yaml - volumes: - - loki-data:/loki - prometheus: - image: prom/prometheus:latest + image: prom/prometheus:v2.34.0 volumes: - prometheus-data:/prometheus:rw configs: - source: prometheus_yml target: /etc/prometheus/prometheus.yml networks: - api: - aliases: - - prometheus_api - exporters: ~ - proxy: ~ + - proxy + - internal deploy: restart_policy: condition: on-failure @@ -91,14 +70,31 @@ services: retries: 3 start_period: 10s + loki: + image: grafana/loki:2.0.0 + command: -config.file=/etc/loki/local-config.yaml + networks: + - internal + deploy: + endpoint_mode: dnsrr + ports: + - target: 3100 + published: 3100 + protocol: tcp + mode: host + configs: + - source: loki_yml + target: /etc/loki/local-config.yaml + volumes: + - loki-data:/loki + secrets: + - loki_aws_secret_access_key + configs: prometheus_yml: template_driver: golang name: ${STACK_NAME}_prometheus_yml_${PROMETHEUS_YML_VERSION} file: prometheus.yml.tmpl - promtail_yml: - name: ${STACK_NAME}_promtail_yml_${PROMTAIL_YML_VERSION} - file: promtail.yml loki_yml: template_driver: golang name: ${STACK_NAME}_loki_yml_${LOKI_YML_VERSION} @@ -127,3 +123,9 @@ volumes: networks: proxy: external: true + internal: + +secrets: + loki_aws_secret_access_key: + external: true + name: ${STACK_NAME}_loki_aws_secret_access_key_${SECRET_LOKI_AWS_ACCESS_KEY_VERSION} diff --git a/monitoring/env b/monitoring/env new file mode 100644 index 0000000..8d90a3c --- /dev/null +++ b/monitoring/env @@ -0,0 +1,41 @@ +TYPE=monitoring + +GRAFANA_DOMAIN=g.monitor.autonomic.zone +PROMETHEUS_DOMAIN=p.monitor.autonomic.zone + +LETS_ENCRYPT_ENV=production + +# Edit this in order to allow collection of traefik metrics +#TRAEFIK_METRICS_ENABLED=1 +#TRAEFIK_SERVICE_NAME=traefik_app + +# grafana SMTP configuration (optional) +#GF_SMTP_HOST=changeme +#GF_SMTP_ENABLED=1 +#GF_SMTP_FROM_ADDRESS=grafana@example.com +#GF_SMTP_SKIP_VERIFY=1 + +# Additional grafana settings (unlikely to require editing) +GF_SECURITY_ALLOW_EMBEDDING=1 +GF_INSTALL_PLUGINS=grafana-piechart-panel +GF_SERVER_ROOT_URL=https://${GRAFANA_DOMAIN} + +# Loki stores logs in object storage, fill these up with your +# minio configuration (or any s3-compatible object store) +LOKI_AWS_ENDPOINT=https://minio.autonomic.zone +LOKI_AWS_REGION=eu-west-1 +LOKI_ACCESS_KEY_ID=bush-debrief-approval-robust-scraggly-molecule +LOKI_BUCKET_NAMES=loki + +# NOTE(d1): abra.sh env vars, while we deploy things manually +PROMETHEUS_YML_VERSION=v2 +PROMTAIL_YML_VERSION=v1 +LOKI_YML_VERSION=v1 +NODE_EXPORTER_ENTRYPOINT_VERSION=v1 +GRAFANA_DATASOURCES_YML_VERSION=v1 +GRAFANA_DASHBOARDS_YML_VERSION=v1 +GRAFANA_SWARM_DASHBOARD_JSON_VERSION=v1 +GRAFANA_STACKS_DASHBOARD_JSON_VERSION=v1 +GRAFANA_TRAEFIK_DASHBOARD_JSON_VERSION=v1 + +SECRET_LOKI_AWS_SECRET_ACCESS_KEY_VERSION=v1 diff --git a/monitoring/grafana-dashboards.yml b/monitoring/grafana-dashboards.yml index 8411cca..73888c4 100644 --- a/monitoring/grafana-dashboards.yml +++ b/monitoring/grafana-dashboards.yml @@ -1,3 +1,4 @@ +--- apiVersion: 1 providers: diff --git a/monitoring/grafana-datasources.yml b/monitoring/grafana-datasources.yml index a6361fc..21e1a7b 100644 --- a/monitoring/grafana-datasources.yml +++ b/monitoring/grafana-datasources.yml @@ -1,3 +1,4 @@ +--- apiVersion: 1 datasources: diff --git a/monitoring/loki.yml.tmpl b/monitoring/loki.yml.tmpl index a899331..12987ab 100644 --- a/monitoring/loki.yml.tmpl +++ b/monitoring/loki.yml.tmpl @@ -55,7 +55,7 @@ storage_config: endpoint: {{ env LOKI_AWS_ENDPOINT }} region: {{ env LOKI_AWS_REGION }} access_key_id: {{ env LOKI_ACCESS_KEY_ID }} - secret_access_key: {{ env LOKI_SECRET_ACCESS_KEY }} + secret_access_key: {{ secret loki_aws_secret_access_key }} bucketnames: {{ env_LOKI_BUCKET_NAMES }} insecure: false sse_encryption: false diff --git a/monitoring/node-exporter-entrypoint.sh b/monitoring/node-exporter-entrypoint.sh deleted file mode 100644 index 72e2f0b..0000000 --- a/monitoring/node-exporter-entrypoint.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/sh -e - -NODE_NAME=$(cat /etc/nodename) -mkdir -p /etc/node-exporter -echo "node_meta{node_id=\"$NODE_ID\", container_label_com_docker_swarm_node_id=\"$NODE_ID\", node_name=\"$NODE_NAME\"} 1" > /etc/node-exporter/node-meta.prom - -set -- /bin/node_exporter "$@" - -exec "$@" diff --git a/monitoring/prometheus.yml.tmpl b/monitoring/prometheus.yml.tmpl index 2547b27..eccae65 100644 --- a/monitoring/prometheus.yml.tmpl +++ b/monitoring/prometheus.yml.tmpl @@ -25,34 +25,3 @@ scrape_configs: static_configs: - targets: - localhost:9090 - - # http://node_exporter:9100/metrics - - job_name: node-exporter - scrape_interval: 10s - metrics_path: "/metrics" - dns_sd_configs: - - names: - - 'tasks.node_exporter' - type: 'A' - port: 9100 - - - - job_name: 'cadvisor' - scrape_interval: 30s - metrics_path: '/metrics' - dns_sd_configs: - - names: - - 'tasks.cadvisor' - type: 'A' - port: 8080 - -{{ if eq (env "TRAEFIK_METRICS_ENABLED") "1" }} - - job_name: 'traefik' - scrape_interval: 30s - metrics_path: '/metrics' - dns_sd_configs: - - names: - - 'tasks.{{ (env "TRAEFIK_SERVICE_NAME") }}' - type: 'A' - port: 8082 -{{ end }} diff --git a/monitoring/promtail.yml b/monitoring/promtail.yml index ed06e8c..30a5996 100644 --- a/monitoring/promtail.yml +++ b/monitoring/promtail.yml @@ -1,3 +1,4 @@ +--- server: http_listen_port: 9080 grpc_listen_port: 0