Initial packaging of a prometheus / loki / grafana monitoring system
This is a work in progress
This commit is contained in:
parent
2cbc1a6de1
commit
d5607c39cd
14
.env.sample
14
.env.sample
@ -1,6 +1,14 @@
|
|||||||
TYPE=monitoring
|
TYPE=monitoring
|
||||||
|
|
||||||
DOMAIN=monitoring.example.com
|
GRAFANA_DOMAIN=grafana.example.com
|
||||||
## Domain aliases
|
PROMETHEUS_DOMAIN=prometheus.example.com
|
||||||
#EXTRA_DOMAINS=', `www.monitoring.example.com`'
|
|
||||||
LETS_ENCRYPT_ENV=production
|
LETS_ENCRYPT_ENV=production
|
||||||
|
|
||||||
|
#GF_SMTP_HOST
|
||||||
|
#GF_SMTP_ENABLED
|
||||||
|
#GF_SMTP_FROM_ADDRESS
|
||||||
|
#GF_SMTP_SKIP_VERIFY
|
||||||
|
#GF_SECURITY_ALLOW_EMBEDDING
|
||||||
|
#GF_INSTALL_PLUGINS=grafana-piechart-panel
|
||||||
|
#GF_SERVER_ROOT_URL=https://${GRAFANA_DOMAIN}
|
||||||
|
18
README.md
18
README.md
@ -3,14 +3,14 @@
|
|||||||
A server and application monitoring stack based on Prometheus, Loki and Grafana.
|
A server and application monitoring stack based on Prometheus, Loki and Grafana.
|
||||||
|
|
||||||
<!-- metadata -->
|
<!-- metadata -->
|
||||||
* **Category**:
|
* **Category**: Monitoring
|
||||||
* **Status**:
|
* **Status**: ❷💛
|
||||||
* **Image**: [`monitoring`](https://hub.docker.com/r/monitoring/monitoring)
|
* **Images**: [`prom/prometheus`](https://hub.docker.com/r/prom/prometheus) [`grafana/grafana`](https://hub.docker.com/r/grafana/grafana) [`grafana/loki`](https://hub.docker.com/r/grafana/loki)
|
||||||
* **Healthcheck**:
|
* **Healthcheck**: ❶💚
|
||||||
* **Backups**:
|
* **Backups**: ❌
|
||||||
* **Email**:
|
* **Email**: ❶💚
|
||||||
* **Tests**:
|
* **Tests**: ❌
|
||||||
* **SSO**:
|
* **SSO**: ❸🍎
|
||||||
<!-- endmetadata -->
|
<!-- endmetadata -->
|
||||||
|
|
||||||
## Basic usage
|
## Basic usage
|
||||||
@ -19,7 +19,7 @@ A server and application monitoring stack based on Prometheus, Loki and Grafana.
|
|||||||
2. Deploy [`coop-cloud/traefik`]
|
2. Deploy [`coop-cloud/traefik`]
|
||||||
3. `abra app new monitoring --secrets` (optionally with `--pass` if you'd like
|
3. `abra app new monitoring --secrets` (optionally with `--pass` if you'd like
|
||||||
to save secrets in `pass`)
|
to save secrets in `pass`)
|
||||||
4. `abra app YOURAPPDOMAIN config` - be sure to change `DOMAIN` to something that resolves to
|
4. `abra app YOURAPPDOMAIN config` - be sure to change `GRAFANA_DOMAIN` and `PROMETHEUS_DOMAIN` to something that resolves to
|
||||||
your Docker swarm box
|
your Docker swarm box
|
||||||
5. `abra app YOURAPPDOMAIN deploy`
|
5. `abra app YOURAPPDOMAIN deploy`
|
||||||
6. Open the configured domain in your browser to finish set-up
|
6. Open the configured domain in your browser to finish set-up
|
||||||
|
4
abra.sh
Normal file
4
abra.sh
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
export PROMETHEUS_YML_VERSION=v1
|
||||||
|
export PROMTAIL_YML_VERSION=v1
|
||||||
|
export LOKI_YML_VERSION=v1
|
||||||
|
export NODE_EXPORTER_ENTRYPOINT_VERSION=v1
|
161
compose.yml
161
compose.yml
@ -2,30 +2,167 @@
|
|||||||
version: "3.8"
|
version: "3.8"
|
||||||
|
|
||||||
services:
|
services:
|
||||||
app:
|
node_exporter:
|
||||||
image: nginx:1.19.2
|
image: prom/node-exporter:v1.0.1
|
||||||
|
user: root
|
||||||
|
environment:
|
||||||
|
- NODE_ID={{.Node.ID}}
|
||||||
|
volumes:
|
||||||
|
- /proc:/host/proc:ro
|
||||||
|
- /sys:/host/sys:ro
|
||||||
|
- /:/rootfs:ro
|
||||||
|
- /etc/hostname:/etc/nodename
|
||||||
|
command:
|
||||||
|
- '--path.sysfs=/host/sys'
|
||||||
|
- '--path.procfs=/host/proc'
|
||||||
|
- '--path.rootfs=/rootfs'
|
||||||
|
- '--collector.textfile.directory=/etc/node-exporter/'
|
||||||
|
- '--collector.filesystem.ignored-mount-points=^/(sys|proc|dev|host|etc)($$|/)'
|
||||||
|
- '--no-collector.ipvs'
|
||||||
|
configs:
|
||||||
|
- source: node_exporter_entrypoint_sh
|
||||||
|
target: /entrypoint.sh
|
||||||
|
entrypoint: ['/bin/sh', '-e', '/entrypoint.sh']
|
||||||
networks:
|
networks:
|
||||||
- proxy
|
- exporters
|
||||||
|
deploy:
|
||||||
|
mode: global
|
||||||
|
endpoint_mode: dnsrr
|
||||||
|
|
||||||
|
cadvisor:
|
||||||
|
image: google/cadvisor:latest
|
||||||
|
command: -logtostderr -docker_only
|
||||||
|
volumes:
|
||||||
|
- /var/lib/docker/:/var/lib/docker
|
||||||
|
- /dev/disk/:/dev/disk
|
||||||
|
- /sys:/sys
|
||||||
|
- /var/run:/var/run
|
||||||
|
- /:/rootfs
|
||||||
|
networks:
|
||||||
|
- exporters
|
||||||
|
deploy:
|
||||||
|
mode: global
|
||||||
|
endpoint_mode: dnsrr
|
||||||
|
|
||||||
|
loki:
|
||||||
|
image: grafana/loki:2.0.0
|
||||||
|
command: -config.file=/etc/loki/local-config.yaml
|
||||||
|
networks:
|
||||||
|
- loki
|
||||||
|
- api
|
||||||
|
deploy:
|
||||||
|
endpoint_mode: dnsrr
|
||||||
|
ports:
|
||||||
|
- target: 3100
|
||||||
|
published: 3100
|
||||||
|
protocol: tcp
|
||||||
|
mode: host
|
||||||
|
configs:
|
||||||
|
- source: loki_yml
|
||||||
|
target: /etc/loki/local-config.yaml
|
||||||
|
volumes:
|
||||||
|
- loki-data:/loki
|
||||||
|
|
||||||
|
promtail:
|
||||||
|
image: grafana/promtail:2.0.0
|
||||||
|
volumes:
|
||||||
|
- /var/log:/var/log
|
||||||
|
command: -config.file=/etc/promtail/config.yml
|
||||||
|
networks:
|
||||||
|
- loki
|
||||||
|
deploy:
|
||||||
|
mode: global
|
||||||
|
endpoint_mode: dnsrr
|
||||||
|
|
||||||
|
prometheus:
|
||||||
|
image: prom/prometheus:latest
|
||||||
|
volumes:
|
||||||
|
- prometheus-data:/prometheus:rw
|
||||||
|
configs:
|
||||||
|
- source: prometheus_yml
|
||||||
|
target: /etc/prometheus/prometheus.yml
|
||||||
|
networks:
|
||||||
|
api:
|
||||||
|
aliases:
|
||||||
|
- prometheus_api
|
||||||
|
exporters: ~
|
||||||
|
proxy: ~
|
||||||
deploy:
|
deploy:
|
||||||
restart_policy:
|
restart_policy:
|
||||||
condition: on-failure
|
condition: on-failure
|
||||||
labels:
|
labels:
|
||||||
- "traefik.enable=true"
|
- "traefik.enable=true"
|
||||||
- "traefik.http.services.${STACK_NAME}.loadbalancer.server.port=80"
|
- "traefik.http.services.${STACK_NAME}-prometheus.loadbalancer.server.port=9090"
|
||||||
- "traefik.http.routers.${STACK_NAME}.rule=Host(`${DOMAIN}`${EXTRA_DOMAINS})"
|
- "traefik.http.routers.${STACK_NAME}-prometheus.rule=Host(`${PROMETHEUS_DOMAIN}`)"
|
||||||
- "traefik.http.routers.${STACK_NAME}.entrypoints=web-secure"
|
- "traefik.http.routers.${STACK_NAME}-prometheus.entrypoints=web-secure"
|
||||||
- "traefik.http.routers.${STACK_NAME}.tls.certresolver=${LETS_ENCRYPT_ENV}"
|
- "traefik.http.routers.${STACK_NAME}-prometheus.tls=true"
|
||||||
## Redirect from EXTRA_DOMAINS to DOMAIN
|
- "traefik.http.routers.${STACK_NAME}-tls.certresolver=${LETS_ENCRYPT_ENV}"
|
||||||
#- "traefik.http.routers.${STACK_NAME}.middlewares=${STACK_NAME}-redirect"
|
- "traefik.http.middlewares.${STACK_NAME}-http-to-https.redirectscheme.scheme=https"
|
||||||
#- "traefik.http.middlewares.${STACK_NAME}-redirect.headers.SSLForceHost=true"
|
- "traefik.http.middlewares.${STACK_NAME}-http-to-https.redirectscheme.permanent=true"
|
||||||
#- "traefik.http.middlewares.${STACK_NAME}-redirect.headers.SSLHost=${DOMAIN}"
|
- "traefik.http.routers.${STACK_NAME}-prometheus-redirect.rule=Host(`${PROMETHEUS_DOMAIN}`)"
|
||||||
|
- "traefik.http.routers.${STACK_NAME}-prometheus-redirect.middlewares=${STACK_NAME}-http-to-https@docker"
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: ["CMD", "curl", "-f", "http://localhost"]
|
test: ["CMD", "curl", "-f", "http://localhost:9090"]
|
||||||
interval: 30s
|
interval: 30s
|
||||||
timeout: 10s
|
timeout: 10s
|
||||||
retries: 10
|
retries: 10
|
||||||
start_period: 1m
|
start_period: 1m
|
||||||
|
|
||||||
|
grafana:
|
||||||
|
image: grafana/grafana
|
||||||
|
volumes:
|
||||||
|
- grafana-data:/var/lib/grafana:rw
|
||||||
|
networks:
|
||||||
|
- api
|
||||||
|
- proxy
|
||||||
|
environment:
|
||||||
|
- GF_SMTP_HOST
|
||||||
|
- GF_SMTP_ENABLED
|
||||||
|
- GF_SMTP_FROM_ADDRESS
|
||||||
|
- GF_SMTP_SKIP_VERIFY
|
||||||
|
- GF_SECURITY_ALLOW_EMBEDDING
|
||||||
|
- GF_INSTALL_PLUGINS=grafana-piechart-panel
|
||||||
|
- GF_SERVER_ROOT_URL=https://${GRAFANA_DOMAIN}
|
||||||
|
deploy:
|
||||||
|
labels:
|
||||||
|
- "traefik.enable=true"
|
||||||
|
- "traefik.http.services.${STACK_NAME}-grafana.loadbalancer.server.port=3000"
|
||||||
|
- "traefik.http.routers.${STACK_NAME}-grafana.rule=Host(`${GRAFANA_DOMAIN}`)"
|
||||||
|
- "traefik.http.routers.${STACK_NAME}-grafana.entrypoints=web-secure"
|
||||||
|
- "traefik.http.routers.${STACK_NAME}-grafana.tls=true"
|
||||||
|
- "traefik.http.routers.${STACK_NAME}-tls.certresolver=${LETS_ENCRYPT_ENV}"
|
||||||
|
- "traefik.http.middlewares.${STACK_NAME}-http-to-https.redirectscheme.scheme=https"
|
||||||
|
- "traefik.http.middlewares.${STACK_NAME}-http-to-https.redirectscheme.permanent=true"
|
||||||
|
- "traefik.http.routers.${STACK_NAME}-grafana-redirect.rule=Host(`${GRAFANA_DOMAIN}`)"
|
||||||
|
- "traefik.http.routers.${STACK_NAME}-grafana-redirect.middlewares=${STACK_NAME}-http-to-https@docker"
|
||||||
|
|
||||||
|
configs:
|
||||||
|
prometheus_yml:
|
||||||
|
name: ${STACK_NAME}_prometheus_yml_${PROMETHEUS_YML_VERSION}
|
||||||
|
file: prometheus.yml
|
||||||
|
promtail_yml:
|
||||||
|
name: ${STACK_NAME}_promtail_yml_${PROMTAIL_YML_VERSION}
|
||||||
|
file: promtail.yml
|
||||||
|
loki_yml:
|
||||||
|
name: ${STACK_NAME}_loki_yml_${LOKI_YML_VERSION}
|
||||||
|
file: loki.yml
|
||||||
|
node_exporter_entrypoint_sh:
|
||||||
|
name: ${STACK_NAME}_node_exporter_entrypoint_${NODE_EXPORTER_ENTRYPOINT_VERSION}
|
||||||
|
file: node-exporter-entrypoint.sh
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
prometheus-data:
|
||||||
|
grafana-data:
|
||||||
|
loki-data:
|
||||||
|
|
||||||
networks:
|
networks:
|
||||||
|
api:
|
||||||
|
driver: overlay
|
||||||
|
exporters:
|
||||||
|
driver: overlay
|
||||||
|
attachable: true
|
||||||
|
loki:
|
||||||
|
driver: overlay
|
||||||
|
attachable: true
|
||||||
proxy:
|
proxy:
|
||||||
external: true
|
external: true
|
||||||
|
71
loki.yml
Normal file
71
loki.yml
Normal file
@ -0,0 +1,71 @@
|
|||||||
|
auth_enabled: false
|
||||||
|
|
||||||
|
server:
|
||||||
|
http_listen_port: 3100
|
||||||
|
|
||||||
|
distributor:
|
||||||
|
ring:
|
||||||
|
kvstore:
|
||||||
|
store: memberlist
|
||||||
|
|
||||||
|
ingester:
|
||||||
|
lifecycler:
|
||||||
|
ring:
|
||||||
|
kvstore:
|
||||||
|
store: memberlist
|
||||||
|
replication_factor: 1
|
||||||
|
final_sleep: 0s
|
||||||
|
chunk_idle_period: 5m
|
||||||
|
chunk_retain_period: 30s
|
||||||
|
|
||||||
|
memberlist:
|
||||||
|
abort_if_cluster_join_fails: false
|
||||||
|
|
||||||
|
# Expose this port on all distributor, ingester
|
||||||
|
# and querier replicas.
|
||||||
|
bind_port: 7946
|
||||||
|
|
||||||
|
# You can use a headless k8s service for all distributor,
|
||||||
|
# ingester and querier components.
|
||||||
|
join_members:
|
||||||
|
- loki:7946
|
||||||
|
|
||||||
|
max_join_backoff: 1m
|
||||||
|
max_join_retries: 10
|
||||||
|
min_join_backoff: 1s
|
||||||
|
|
||||||
|
schema_config:
|
||||||
|
configs:
|
||||||
|
- from: 2020-11-25
|
||||||
|
store: boltdb-shipper
|
||||||
|
object_store: aws
|
||||||
|
schema: v11
|
||||||
|
index:
|
||||||
|
prefix: index_
|
||||||
|
period: 24h
|
||||||
|
|
||||||
|
storage_config:
|
||||||
|
boltdb_shipper:
|
||||||
|
active_index_directory: /loki/index
|
||||||
|
cache_location: /loki/index_cache
|
||||||
|
resync_interval: 5s
|
||||||
|
shared_store: aws
|
||||||
|
|
||||||
|
aws:
|
||||||
|
endpoint:
|
||||||
|
region:
|
||||||
|
access_key_id:
|
||||||
|
secret_access_key:
|
||||||
|
bucketnames:
|
||||||
|
insecure: false
|
||||||
|
sse_encryption: false
|
||||||
|
http_config:
|
||||||
|
idle_conn_timeout: 90s
|
||||||
|
response_header_timeout: 0s
|
||||||
|
insecure_skip_verify: false
|
||||||
|
s3forcepathstyle: true
|
||||||
|
|
||||||
|
limits_config:
|
||||||
|
enforce_metric_name: false
|
||||||
|
reject_old_samples: true
|
||||||
|
reject_old_samples_max_age: 168h
|
9
node-exporter-entrypoint.sh
Normal file
9
node-exporter-entrypoint.sh
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
#!/bin/sh -e
|
||||||
|
|
||||||
|
NODE_NAME=$(cat /etc/nodename)
|
||||||
|
mkdir -p /etc/node-exporter
|
||||||
|
echo "node_meta{node_id=\"$NODE_ID\", container_label_com_docker_swarm_node_id=\"$NODE_ID\", node_name=\"$NODE_NAME\"} 1" > /etc/node-exporter/node-meta.prom
|
||||||
|
|
||||||
|
set -- /bin/node_exporter "$@"
|
||||||
|
|
||||||
|
exec "$@"
|
56
prometheus.yml
Normal file
56
prometheus.yml
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
global:
|
||||||
|
scrape_interval: 30s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
|
||||||
|
evaluation_interval: 30s # Evaluate rules every 15 seconds. The default is every 1 minute.
|
||||||
|
# scrape_timeout is set to the global default (10s).
|
||||||
|
|
||||||
|
# Alertmanager configuration
|
||||||
|
alerting:
|
||||||
|
alertmanagers:
|
||||||
|
- static_configs:
|
||||||
|
- targets:
|
||||||
|
# - alertmanager:9093
|
||||||
|
|
||||||
|
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
|
||||||
|
rule_files:
|
||||||
|
# - "first_rules.yml"
|
||||||
|
# - "second_rules.yml"
|
||||||
|
|
||||||
|
# A scrape configuration containing exactly one endpoint to scrape:
|
||||||
|
# Here it's Prometheus itself.
|
||||||
|
scrape_configs:
|
||||||
|
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
|
||||||
|
- job_name: 'prometheus'
|
||||||
|
# metrics_path defaults to '/metrics'
|
||||||
|
# scheme defaults to 'http'.
|
||||||
|
static_configs:
|
||||||
|
- targets:
|
||||||
|
- localhost:9090
|
||||||
|
|
||||||
|
# http://node_exporter:9100/metrics
|
||||||
|
- job_name: node-exporter
|
||||||
|
scrape_interval: 10s
|
||||||
|
metrics_path: "/metrics"
|
||||||
|
dns_sd_configs:
|
||||||
|
- names:
|
||||||
|
- 'tasks.node_exporter'
|
||||||
|
type: 'A'
|
||||||
|
port: 9100
|
||||||
|
|
||||||
|
|
||||||
|
- job_name: 'cadvisor'
|
||||||
|
scrape_interval: 30s
|
||||||
|
metrics_path: '/metrics'
|
||||||
|
dns_sd_configs:
|
||||||
|
- names:
|
||||||
|
- 'tasks.cadvisor'
|
||||||
|
type: 'A'
|
||||||
|
port: 8080
|
||||||
|
|
||||||
|
- job_name: 'traefik'
|
||||||
|
scrape_interval: 30s
|
||||||
|
metrics_path: '/metrics'
|
||||||
|
dns_sd_configs:
|
||||||
|
- names:
|
||||||
|
- 'tasks.traefik_app'
|
||||||
|
type: 'A'
|
||||||
|
port: 8082
|
18
promtail.yml
Normal file
18
promtail.yml
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
server:
|
||||||
|
http_listen_port: 9080
|
||||||
|
grpc_listen_port: 0
|
||||||
|
|
||||||
|
positions:
|
||||||
|
filename: /tmp/positions.yaml
|
||||||
|
|
||||||
|
clients:
|
||||||
|
- url: http://loki:3100/loki/api/v1/push
|
||||||
|
|
||||||
|
scrape_configs:
|
||||||
|
- job_name: system
|
||||||
|
static_configs:
|
||||||
|
- targets:
|
||||||
|
- localhost
|
||||||
|
labels:
|
||||||
|
job: varlogs
|
||||||
|
__path__: /var/log/*log
|
Loading…
x
Reference in New Issue
Block a user