Initial packaging of a prometheus / loki / grafana monitoring system
This is a work in progress
This commit is contained in:
parent
2cbc1a6de1
commit
d5607c39cd
14
.env.sample
14
.env.sample
@ -1,6 +1,14 @@
|
||||
TYPE=monitoring
|
||||
|
||||
DOMAIN=monitoring.example.com
|
||||
## Domain aliases
|
||||
#EXTRA_DOMAINS=', `www.monitoring.example.com`'
|
||||
GRAFANA_DOMAIN=grafana.example.com
|
||||
PROMETHEUS_DOMAIN=prometheus.example.com
|
||||
|
||||
LETS_ENCRYPT_ENV=production
|
||||
|
||||
#GF_SMTP_HOST
|
||||
#GF_SMTP_ENABLED
|
||||
#GF_SMTP_FROM_ADDRESS
|
||||
#GF_SMTP_SKIP_VERIFY
|
||||
#GF_SECURITY_ALLOW_EMBEDDING
|
||||
#GF_INSTALL_PLUGINS=grafana-piechart-panel
|
||||
#GF_SERVER_ROOT_URL=https://${GRAFANA_DOMAIN}
|
||||
|
18
README.md
18
README.md
@ -3,14 +3,14 @@
|
||||
A server and application monitoring stack based on Prometheus, Loki and Grafana.
|
||||
|
||||
<!-- metadata -->
|
||||
* **Category**:
|
||||
* **Status**:
|
||||
* **Image**: [`monitoring`](https://hub.docker.com/r/monitoring/monitoring)
|
||||
* **Healthcheck**:
|
||||
* **Backups**:
|
||||
* **Email**:
|
||||
* **Tests**:
|
||||
* **SSO**:
|
||||
* **Category**: Monitoring
|
||||
* **Status**: ❷💛
|
||||
* **Images**: [`prom/prometheus`](https://hub.docker.com/r/prom/prometheus) [`grafana/grafana`](https://hub.docker.com/r/grafana/grafana) [`grafana/loki`](https://hub.docker.com/r/grafana/loki)
|
||||
* **Healthcheck**: ❶💚
|
||||
* **Backups**: ❌
|
||||
* **Email**: ❶💚
|
||||
* **Tests**: ❌
|
||||
* **SSO**: ❸🍎
|
||||
<!-- endmetadata -->
|
||||
|
||||
## Basic usage
|
||||
@ -19,7 +19,7 @@ A server and application monitoring stack based on Prometheus, Loki and Grafana.
|
||||
2. Deploy [`coop-cloud/traefik`]
|
||||
3. `abra app new monitoring --secrets` (optionally with `--pass` if you'd like
|
||||
to save secrets in `pass`)
|
||||
4. `abra app YOURAPPDOMAIN config` - be sure to change `DOMAIN` to something that resolves to
|
||||
4. `abra app YOURAPPDOMAIN config` - be sure to change `GRAFANA_DOMAIN` and `PROMETHEUS_DOMAIN` to something that resolves to
|
||||
your Docker swarm box
|
||||
5. `abra app YOURAPPDOMAIN deploy`
|
||||
6. Open the configured domain in your browser to finish set-up
|
||||
|
4
abra.sh
Normal file
4
abra.sh
Normal file
@ -0,0 +1,4 @@
|
||||
export PROMETHEUS_YML_VERSION=v1
|
||||
export PROMTAIL_YML_VERSION=v1
|
||||
export LOKI_YML_VERSION=v1
|
||||
export NODE_EXPORTER_ENTRYPOINT_VERSION=v1
|
161
compose.yml
161
compose.yml
@ -2,30 +2,167 @@
|
||||
version: "3.8"
|
||||
|
||||
services:
|
||||
app:
|
||||
image: nginx:1.19.2
|
||||
node_exporter:
|
||||
image: prom/node-exporter:v1.0.1
|
||||
user: root
|
||||
environment:
|
||||
- NODE_ID={{.Node.ID}}
|
||||
volumes:
|
||||
- /proc:/host/proc:ro
|
||||
- /sys:/host/sys:ro
|
||||
- /:/rootfs:ro
|
||||
- /etc/hostname:/etc/nodename
|
||||
command:
|
||||
- '--path.sysfs=/host/sys'
|
||||
- '--path.procfs=/host/proc'
|
||||
- '--path.rootfs=/rootfs'
|
||||
- '--collector.textfile.directory=/etc/node-exporter/'
|
||||
- '--collector.filesystem.ignored-mount-points=^/(sys|proc|dev|host|etc)($$|/)'
|
||||
- '--no-collector.ipvs'
|
||||
configs:
|
||||
- source: node_exporter_entrypoint_sh
|
||||
target: /entrypoint.sh
|
||||
entrypoint: ['/bin/sh', '-e', '/entrypoint.sh']
|
||||
networks:
|
||||
- proxy
|
||||
- exporters
|
||||
deploy:
|
||||
mode: global
|
||||
endpoint_mode: dnsrr
|
||||
|
||||
cadvisor:
|
||||
image: google/cadvisor:latest
|
||||
command: -logtostderr -docker_only
|
||||
volumes:
|
||||
- /var/lib/docker/:/var/lib/docker
|
||||
- /dev/disk/:/dev/disk
|
||||
- /sys:/sys
|
||||
- /var/run:/var/run
|
||||
- /:/rootfs
|
||||
networks:
|
||||
- exporters
|
||||
deploy:
|
||||
mode: global
|
||||
endpoint_mode: dnsrr
|
||||
|
||||
loki:
|
||||
image: grafana/loki:2.0.0
|
||||
command: -config.file=/etc/loki/local-config.yaml
|
||||
networks:
|
||||
- loki
|
||||
- api
|
||||
deploy:
|
||||
endpoint_mode: dnsrr
|
||||
ports:
|
||||
- target: 3100
|
||||
published: 3100
|
||||
protocol: tcp
|
||||
mode: host
|
||||
configs:
|
||||
- source: loki_yml
|
||||
target: /etc/loki/local-config.yaml
|
||||
volumes:
|
||||
- loki-data:/loki
|
||||
|
||||
promtail:
|
||||
image: grafana/promtail:2.0.0
|
||||
volumes:
|
||||
- /var/log:/var/log
|
||||
command: -config.file=/etc/promtail/config.yml
|
||||
networks:
|
||||
- loki
|
||||
deploy:
|
||||
mode: global
|
||||
endpoint_mode: dnsrr
|
||||
|
||||
prometheus:
|
||||
image: prom/prometheus:latest
|
||||
volumes:
|
||||
- prometheus-data:/prometheus:rw
|
||||
configs:
|
||||
- source: prometheus_yml
|
||||
target: /etc/prometheus/prometheus.yml
|
||||
networks:
|
||||
api:
|
||||
aliases:
|
||||
- prometheus_api
|
||||
exporters: ~
|
||||
proxy: ~
|
||||
deploy:
|
||||
restart_policy:
|
||||
condition: on-failure
|
||||
labels:
|
||||
- "traefik.enable=true"
|
||||
- "traefik.http.services.${STACK_NAME}.loadbalancer.server.port=80"
|
||||
- "traefik.http.routers.${STACK_NAME}.rule=Host(`${DOMAIN}`${EXTRA_DOMAINS})"
|
||||
- "traefik.http.routers.${STACK_NAME}.entrypoints=web-secure"
|
||||
- "traefik.http.routers.${STACK_NAME}.tls.certresolver=${LETS_ENCRYPT_ENV}"
|
||||
## Redirect from EXTRA_DOMAINS to DOMAIN
|
||||
#- "traefik.http.routers.${STACK_NAME}.middlewares=${STACK_NAME}-redirect"
|
||||
#- "traefik.http.middlewares.${STACK_NAME}-redirect.headers.SSLForceHost=true"
|
||||
#- "traefik.http.middlewares.${STACK_NAME}-redirect.headers.SSLHost=${DOMAIN}"
|
||||
- "traefik.http.services.${STACK_NAME}-prometheus.loadbalancer.server.port=9090"
|
||||
- "traefik.http.routers.${STACK_NAME}-prometheus.rule=Host(`${PROMETHEUS_DOMAIN}`)"
|
||||
- "traefik.http.routers.${STACK_NAME}-prometheus.entrypoints=web-secure"
|
||||
- "traefik.http.routers.${STACK_NAME}-prometheus.tls=true"
|
||||
- "traefik.http.routers.${STACK_NAME}-tls.certresolver=${LETS_ENCRYPT_ENV}"
|
||||
- "traefik.http.middlewares.${STACK_NAME}-http-to-https.redirectscheme.scheme=https"
|
||||
- "traefik.http.middlewares.${STACK_NAME}-http-to-https.redirectscheme.permanent=true"
|
||||
- "traefik.http.routers.${STACK_NAME}-prometheus-redirect.rule=Host(`${PROMETHEUS_DOMAIN}`)"
|
||||
- "traefik.http.routers.${STACK_NAME}-prometheus-redirect.middlewares=${STACK_NAME}-http-to-https@docker"
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost"]
|
||||
test: ["CMD", "curl", "-f", "http://localhost:9090"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 10
|
||||
start_period: 1m
|
||||
|
||||
grafana:
|
||||
image: grafana/grafana
|
||||
volumes:
|
||||
- grafana-data:/var/lib/grafana:rw
|
||||
networks:
|
||||
- api
|
||||
- proxy
|
||||
environment:
|
||||
- GF_SMTP_HOST
|
||||
- GF_SMTP_ENABLED
|
||||
- GF_SMTP_FROM_ADDRESS
|
||||
- GF_SMTP_SKIP_VERIFY
|
||||
- GF_SECURITY_ALLOW_EMBEDDING
|
||||
- GF_INSTALL_PLUGINS=grafana-piechart-panel
|
||||
- GF_SERVER_ROOT_URL=https://${GRAFANA_DOMAIN}
|
||||
deploy:
|
||||
labels:
|
||||
- "traefik.enable=true"
|
||||
- "traefik.http.services.${STACK_NAME}-grafana.loadbalancer.server.port=3000"
|
||||
- "traefik.http.routers.${STACK_NAME}-grafana.rule=Host(`${GRAFANA_DOMAIN}`)"
|
||||
- "traefik.http.routers.${STACK_NAME}-grafana.entrypoints=web-secure"
|
||||
- "traefik.http.routers.${STACK_NAME}-grafana.tls=true"
|
||||
- "traefik.http.routers.${STACK_NAME}-tls.certresolver=${LETS_ENCRYPT_ENV}"
|
||||
- "traefik.http.middlewares.${STACK_NAME}-http-to-https.redirectscheme.scheme=https"
|
||||
- "traefik.http.middlewares.${STACK_NAME}-http-to-https.redirectscheme.permanent=true"
|
||||
- "traefik.http.routers.${STACK_NAME}-grafana-redirect.rule=Host(`${GRAFANA_DOMAIN}`)"
|
||||
- "traefik.http.routers.${STACK_NAME}-grafana-redirect.middlewares=${STACK_NAME}-http-to-https@docker"
|
||||
|
||||
configs:
|
||||
prometheus_yml:
|
||||
name: ${STACK_NAME}_prometheus_yml_${PROMETHEUS_YML_VERSION}
|
||||
file: prometheus.yml
|
||||
promtail_yml:
|
||||
name: ${STACK_NAME}_promtail_yml_${PROMTAIL_YML_VERSION}
|
||||
file: promtail.yml
|
||||
loki_yml:
|
||||
name: ${STACK_NAME}_loki_yml_${LOKI_YML_VERSION}
|
||||
file: loki.yml
|
||||
node_exporter_entrypoint_sh:
|
||||
name: ${STACK_NAME}_node_exporter_entrypoint_${NODE_EXPORTER_ENTRYPOINT_VERSION}
|
||||
file: node-exporter-entrypoint.sh
|
||||
|
||||
volumes:
|
||||
prometheus-data:
|
||||
grafana-data:
|
||||
loki-data:
|
||||
|
||||
networks:
|
||||
api:
|
||||
driver: overlay
|
||||
exporters:
|
||||
driver: overlay
|
||||
attachable: true
|
||||
loki:
|
||||
driver: overlay
|
||||
attachable: true
|
||||
proxy:
|
||||
external: true
|
||||
|
71
loki.yml
Normal file
71
loki.yml
Normal file
@ -0,0 +1,71 @@
|
||||
auth_enabled: false
|
||||
|
||||
server:
|
||||
http_listen_port: 3100
|
||||
|
||||
distributor:
|
||||
ring:
|
||||
kvstore:
|
||||
store: memberlist
|
||||
|
||||
ingester:
|
||||
lifecycler:
|
||||
ring:
|
||||
kvstore:
|
||||
store: memberlist
|
||||
replication_factor: 1
|
||||
final_sleep: 0s
|
||||
chunk_idle_period: 5m
|
||||
chunk_retain_period: 30s
|
||||
|
||||
memberlist:
|
||||
abort_if_cluster_join_fails: false
|
||||
|
||||
# Expose this port on all distributor, ingester
|
||||
# and querier replicas.
|
||||
bind_port: 7946
|
||||
|
||||
# You can use a headless k8s service for all distributor,
|
||||
# ingester and querier components.
|
||||
join_members:
|
||||
- loki:7946
|
||||
|
||||
max_join_backoff: 1m
|
||||
max_join_retries: 10
|
||||
min_join_backoff: 1s
|
||||
|
||||
schema_config:
|
||||
configs:
|
||||
- from: 2020-11-25
|
||||
store: boltdb-shipper
|
||||
object_store: aws
|
||||
schema: v11
|
||||
index:
|
||||
prefix: index_
|
||||
period: 24h
|
||||
|
||||
storage_config:
|
||||
boltdb_shipper:
|
||||
active_index_directory: /loki/index
|
||||
cache_location: /loki/index_cache
|
||||
resync_interval: 5s
|
||||
shared_store: aws
|
||||
|
||||
aws:
|
||||
endpoint:
|
||||
region:
|
||||
access_key_id:
|
||||
secret_access_key:
|
||||
bucketnames:
|
||||
insecure: false
|
||||
sse_encryption: false
|
||||
http_config:
|
||||
idle_conn_timeout: 90s
|
||||
response_header_timeout: 0s
|
||||
insecure_skip_verify: false
|
||||
s3forcepathstyle: true
|
||||
|
||||
limits_config:
|
||||
enforce_metric_name: false
|
||||
reject_old_samples: true
|
||||
reject_old_samples_max_age: 168h
|
9
node-exporter-entrypoint.sh
Normal file
9
node-exporter-entrypoint.sh
Normal file
@ -0,0 +1,9 @@
|
||||
#!/bin/sh -e
|
||||
|
||||
NODE_NAME=$(cat /etc/nodename)
|
||||
mkdir -p /etc/node-exporter
|
||||
echo "node_meta{node_id=\"$NODE_ID\", container_label_com_docker_swarm_node_id=\"$NODE_ID\", node_name=\"$NODE_NAME\"} 1" > /etc/node-exporter/node-meta.prom
|
||||
|
||||
set -- /bin/node_exporter "$@"
|
||||
|
||||
exec "$@"
|
56
prometheus.yml
Normal file
56
prometheus.yml
Normal file
@ -0,0 +1,56 @@
|
||||
global:
|
||||
scrape_interval: 30s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
|
||||
evaluation_interval: 30s # Evaluate rules every 15 seconds. The default is every 1 minute.
|
||||
# scrape_timeout is set to the global default (10s).
|
||||
|
||||
# Alertmanager configuration
|
||||
alerting:
|
||||
alertmanagers:
|
||||
- static_configs:
|
||||
- targets:
|
||||
# - alertmanager:9093
|
||||
|
||||
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
|
||||
rule_files:
|
||||
# - "first_rules.yml"
|
||||
# - "second_rules.yml"
|
||||
|
||||
# A scrape configuration containing exactly one endpoint to scrape:
|
||||
# Here it's Prometheus itself.
|
||||
scrape_configs:
|
||||
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
|
||||
- job_name: 'prometheus'
|
||||
# metrics_path defaults to '/metrics'
|
||||
# scheme defaults to 'http'.
|
||||
static_configs:
|
||||
- targets:
|
||||
- localhost:9090
|
||||
|
||||
# http://node_exporter:9100/metrics
|
||||
- job_name: node-exporter
|
||||
scrape_interval: 10s
|
||||
metrics_path: "/metrics"
|
||||
dns_sd_configs:
|
||||
- names:
|
||||
- 'tasks.node_exporter'
|
||||
type: 'A'
|
||||
port: 9100
|
||||
|
||||
|
||||
- job_name: 'cadvisor'
|
||||
scrape_interval: 30s
|
||||
metrics_path: '/metrics'
|
||||
dns_sd_configs:
|
||||
- names:
|
||||
- 'tasks.cadvisor'
|
||||
type: 'A'
|
||||
port: 8080
|
||||
|
||||
- job_name: 'traefik'
|
||||
scrape_interval: 30s
|
||||
metrics_path: '/metrics'
|
||||
dns_sd_configs:
|
||||
- names:
|
||||
- 'tasks.traefik_app'
|
||||
type: 'A'
|
||||
port: 8082
|
18
promtail.yml
Normal file
18
promtail.yml
Normal file
@ -0,0 +1,18 @@
|
||||
server:
|
||||
http_listen_port: 9080
|
||||
grpc_listen_port: 0
|
||||
|
||||
positions:
|
||||
filename: /tmp/positions.yaml
|
||||
|
||||
clients:
|
||||
- url: http://loki:3100/loki/api/v1/push
|
||||
|
||||
scrape_configs:
|
||||
- job_name: system
|
||||
static_configs:
|
||||
- targets:
|
||||
- localhost
|
||||
labels:
|
||||
job: varlogs
|
||||
__path__: /var/log/*log
|
Loading…
x
Reference in New Issue
Block a user