diff --git a/monitoring/README.md b/monitoring/README.md index b9a0631..4bc1202 100644 --- a/monitoring/README.md +++ b/monitoring/README.md @@ -13,6 +13,8 @@ printf <...> | docker secret create gp_monitoring_grafana_oauth_client_secret_v1 pwgen -s 64 1; ./scripts/genpw.py # input password & get hashed output for secret printf <...> | docker secret create gp_monitoring_prometheus_admin_password_v1 - +printf <...> | docker secret create gp_monitoring_alertmanager_smtp_password_v1 - + set -a && source env && set +a docker context use monitor.autonomic.zone diff --git a/monitoring/alertmanager.yml.tmpl b/monitoring/alertmanager.yml.tmpl new file mode 100644 index 0000000..7dee9f2 --- /dev/null +++ b/monitoring/alertmanager.yml.tmpl @@ -0,0 +1,10 @@ +global: + smtp_from: {{ env "SMTP_FROM" }} + smtp_smarthost: {{ env "SMTP_HOST" }} + smtp_auth_username: {{ env "SMTP_FROM" }} + smtp_auth_password: {{ secret "alertmanager_smtp_password" }} + +receivers: + - name: "kaboom mailer" + email_configs: + - to: {{ env "SMTP_TO" }} diff --git a/monitoring/compose.yml b/monitoring/compose.yml index 636cde7..2b64ea0 100644 --- a/monitoring/compose.yml +++ b/monitoring/compose.yml @@ -83,6 +83,25 @@ services: - "traefik.http.routers.${STACK_NAME}-prometheus.tls=true" - "traefik.http.routers.${STACK_NAME}-prometheus.tls.certresolver=${LETS_ENCRYPT_ENV}" + alertmanager: + image: prom/alertmanager:v0.23.0 + volumes: + - alertmanager-data:/etc/alertmanager + command: + - "--config.file=/etc/alertmanager/config.yml" + - "--storage.path=/alertmanager" + networks: + - internal + secrets: + - alertmanager_smtp_password + configs: + - source: alertmanager_config + target: /etc/alertmanager/config.yml + environment: + - SMTP_FROM + - SMTP_HOST + - SMTP_TO + loki: image: grafana/loki:2.0.0 command: -config.file=/etc/loki/local-config.yaml @@ -125,6 +144,10 @@ configs: template_driver: golang name: ${STACK_NAME}_loki_yml_${LOKI_YML_VERSION} file: loki.yml.tmpl + alertmanager_config: + template_driver: golang + name: ${STACK_NAME}_alertmanager_config_${ALERTMANAGER_CONFIG_VERSION} + file: ./alertmanager.yml.tmpl grafana_datasources_yml: name: ${STACK_NAME}_grafana_datasources_yml_${GRAFANA_DATASOURCES_YML_VERSION} file: grafana-datasources.yml @@ -145,6 +168,7 @@ volumes: prometheus-data: grafana-data: loki-data: + alertmanager-data: networks: proxy: @@ -164,3 +188,6 @@ secrets: prometheus_admin_password: external: true name: ${STACK_NAME}_prometheus_admin_password_${SECRET_PROMETHEUS_ADMIN_PASSWORD_VERSION} + alertmanager_smtp_password: + external: true + name: ${STACK_NAME}_alertmanager_smtp_password_${SECRET_ALERTMANAGER_SMTP_PASSWORD_VERSION} diff --git a/monitoring/env b/monitoring/env index 3360c27..4c2f9c9 100644 --- a/monitoring/env +++ b/monitoring/env @@ -41,6 +41,7 @@ GRAFANA_STACKS_DASHBOARD_JSON_VERSION=v1 GRAFANA_TRAEFIK_DASHBOARD_JSON_VERSION=v1 GRAFANA_CUSTOM_INI_VERSION=v1 PROMETHEUS_WEB_YML_VERSION=v1 +ALERTMANAGER_CONFIG_VERSION=v1 KEYCLOAK_AUTH_URL="https://id.autonomic.zone/auth/realms/autonomic/protocol/openid-connect/auth" KEYCLOAK_API_URL="https://id.autonomic.zone/auth/realms/autonomic/protocol/openid-connect/userinfo" @@ -50,3 +51,8 @@ SECRET_LOKI_AWS_SECRET_ACCESS_KEY_VERSION=v1 SECRET_GRAFANA_ADMIN_PASSWORD_VERSION=v1 SECRET_GRAFANA_OAUTH_CLIENT_SECRET_VERSION=v1 SECRET_PROMETHEUS_ADMIN_PASSWORD_VERSION=v1 +SECRET_ALERTMANAGER_SMTP_PASSWORD_VERSION=v1 + +SMTP_FROM=noreply@autonomic.zone +SMTP_HOST=mail.gandi.net:465 +SMTP_TO=kaboom@autonomic.zone diff --git a/monitoring/prometheus.yml.tmpl b/monitoring/prometheus.yml.tmpl index eccae65..a701f55 100644 --- a/monitoring/prometheus.yml.tmpl +++ b/monitoring/prometheus.yml.tmpl @@ -1,27 +1,15 @@ global: - scrape_interval: 30s # Set the scrape interval to every 15 seconds. Default is every 1 minute. - evaluation_interval: 30s # Evaluate rules every 15 seconds. The default is every 1 minute. - # scrape_timeout is set to the global default (10s). + scrape_interval: 30s + evaluation_interval: 30s -# Alertmanager configuration alerting: alertmanagers: - static_configs: - targets: - # - alertmanager:9093 + - alertmanager:9093 -# Load rules once and periodically evaluate them according to the global 'evaluation_interval'. -rule_files: - # - "first_rules.yml" - # - "second_rules.yml" - -# A scrape configuration containing exactly one endpoint to scrape: -# Here it's Prometheus itself. scrape_configs: - # The job name is added as a label `job=` to any timeseries scraped from this config. - job_name: 'prometheus' - # metrics_path defaults to '/metrics' - # scheme defaults to 'http'. static_configs: - targets: - localhost:9090