Compare commits
16 Commits
f080ffd38a
...
main
Author | SHA1 | Date |
---|---|---|
3wc | 02427c526a | |
3wc | ca3d131c6b | |
3wc | 4dd02ceba3 | |
3wc | d65b8cab7e | |
decentral1se | bc708f87e9 | |
decentral1se | 339b090570 | |
3wc | f8f2dc5a2d | |
3wc | 127cc1d601 | |
mirsal | c68bdc911b | |
mirsal | b592c3c0fd | |
mirsal | 688e6c35d3 | |
mirsal | aa6ad85f4b | |
mirsal | bc7048d45d | |
mirsal | 8cd59e5d21 | |
mirsal | c4614ad874 | |
mirsal | ba3c204351 |
24
.drone.yml
24
.drone.yml
|
@ -3,10 +3,12 @@ kind: pipeline
|
|||
name: deploy to swarm-test.autonomic.zone
|
||||
steps:
|
||||
- name: deployment
|
||||
image: decentral1se/stack-ssh-deploy:latest
|
||||
image: git.coopcloud.tech/coop-cloud/stack-ssh-deploy:latest
|
||||
settings:
|
||||
host: swarm-test.autonomic.zone
|
||||
stack: monitoring
|
||||
networks:
|
||||
- proxy
|
||||
purge: true
|
||||
deploy_key:
|
||||
from_secret: drone_ssh_swarm_test
|
||||
|
@ -34,23 +36,17 @@ trigger:
|
|||
|
||||
---
|
||||
kind: pipeline
|
||||
name: recipe release
|
||||
name: generate recipe catalogue
|
||||
steps:
|
||||
- name: release a new version
|
||||
image: decentral1se/drone-abra:latest
|
||||
settings:
|
||||
command: recipe monitoring release
|
||||
deploy_key:
|
||||
from_secret: abra_bot_deploy_key
|
||||
|
||||
- name: trigger downstream builds
|
||||
image: plugins/downstream
|
||||
settings:
|
||||
server: https://drone.autonomic.zone
|
||||
server: https://build.coopcloud.tech
|
||||
token:
|
||||
from_secret: decentral1se_token
|
||||
from_secret: drone_abra-bot_token
|
||||
fork: true
|
||||
repositories:
|
||||
- coop-cloud/auto-apps-json
|
||||
depends_on:
|
||||
- release a new version
|
||||
- coop-cloud/auto-recipes-catalogue-json
|
||||
|
||||
trigger:
|
||||
event: tag
|
||||
|
|
33
.env.sample
33
.env.sample
|
@ -5,16 +5,25 @@ PROMETHEUS_DOMAIN=prometheus.example.com
|
|||
|
||||
LETS_ENCRYPT_ENV=production
|
||||
|
||||
#GF_SMTP_HOST
|
||||
#GF_SMTP_ENABLED
|
||||
#GF_SMTP_FROM_ADDRESS
|
||||
#GF_SMTP_SKIP_VERIFY
|
||||
#GF_SECURITY_ALLOW_EMBEDDING
|
||||
#GF_INSTALL_PLUGINS=grafana-piechart-panel
|
||||
#GF_SERVER_ROOT_URL=https://${GRAFANA_DOMAIN}
|
||||
# Edit this in order to allow collection of traefik metrics
|
||||
#TRAEFIK_METRICS_ENABLED=1
|
||||
#TRAEFIK_SERVICE_NAME=traefik_app
|
||||
|
||||
LOKI_AWS_ENDPOINT
|
||||
LOKI_AWS_REGION
|
||||
LOKI_ACCESS_KEY_ID
|
||||
LOKI_SECRET_ACCESS_KEY
|
||||
LOKI_BUCKET_NAMES
|
||||
# grafana SMTP configuration (optional)
|
||||
#GF_SMTP_HOST=changeme
|
||||
#GF_SMTP_ENABLED=1
|
||||
#GF_SMTP_FROM_ADDRESS=grafana@example.com
|
||||
#GF_SMTP_SKIP_VERIFY=1
|
||||
|
||||
# Additional grafana settings (unlikely to require editing)
|
||||
GF_SECURITY_ALLOW_EMBEDDING=1
|
||||
GF_INSTALL_PLUGINS=grafana-piechart-panel
|
||||
GF_SERVER_ROOT_URL=https://${GRAFANA_DOMAIN}
|
||||
|
||||
# Loki stores logs in object storage, fill these up with your
|
||||
# minio configuration (or any s3-compatible object store)
|
||||
#LOKI_AWS_ENDPOINT=https://minio/
|
||||
#LOKI_AWS_REGION=eu-west-1
|
||||
#LOKI_ACCESS_KEY_ID=
|
||||
#LOKI_SECRET_ACCESS_KEY=
|
||||
#LOKI_BUCKET_NAMES=loki
|
||||
|
|
20
README.md
20
README.md
|
@ -3,14 +3,14 @@
|
|||
A server and application monitoring stack based on Prometheus, Loki and Grafana.
|
||||
|
||||
<!-- metadata -->
|
||||
* **Category**: Monitoring
|
||||
* **Status**: ❷💛
|
||||
* **Images**: [`prom/prometheus`](https://hub.docker.com/r/prom/prometheus) [`grafana/grafana`](https://hub.docker.com/r/grafana/grafana) [`grafana/loki`](https://hub.docker.com/r/grafana/loki)
|
||||
* **Healthcheck**: ❶💚
|
||||
* **Backups**: ❌
|
||||
* **Email**: ❶💚
|
||||
* **Tests**: ❌
|
||||
* **SSO**: ❸🍎
|
||||
* **Category**: Utilities
|
||||
* **Status**: 2, beta
|
||||
* **Images**: [`prom/prometheus`](https://hub.docker.com/r/prom/prometheus) [`grafana/grafana`](https://hub.docker.com/r/grafana/grafana) [`grafana/loki`](https://hub.docker.com/r/grafana/loki), upstream
|
||||
* **Healthcheck**: 3
|
||||
* **Backups**: No
|
||||
* **Email**: 3
|
||||
* **Tests**: No
|
||||
* **SSO**: 1
|
||||
<!-- endmetadata -->
|
||||
|
||||
## Basic usage
|
||||
|
@ -20,9 +20,9 @@ A server and application monitoring stack based on Prometheus, Loki and Grafana.
|
|||
3. Deploy [`coop-cloud/minio`]
|
||||
4. `abra app new monitoring --secrets` (optionally with `--pass` if you'd like
|
||||
to save secrets in `pass`)
|
||||
5. `abra app YOURAPPDOMAIN config` - be sure to change `GRAFANA_DOMAIN` and `PROMETHEUS_DOMAIN` to something that resolves to
|
||||
5. `abra app config YOURAPPDOMAIN` - be sure to change `GRAFANA_DOMAIN` and `PROMETHEUS_DOMAIN` to something that resolves to
|
||||
your Docker swarm box
|
||||
6. `abra app YOURAPPDOMAIN deploy`
|
||||
6. `abra app deploy YOURAPPDOMAIN`
|
||||
7. Open the configured domain in your browser to finish set-up
|
||||
|
||||
[`abra`]: https://git.autonomic.zone/autonomic-cooperative/abra
|
||||
|
|
7
abra.sh
7
abra.sh
|
@ -1,4 +1,9 @@
|
|||
export PROMETHEUS_YML_VERSION=v1
|
||||
export PROMETHEUS_YML_VERSION=v2
|
||||
export PROMTAIL_YML_VERSION=v1
|
||||
export LOKI_YML_VERSION=v1
|
||||
export NODE_EXPORTER_ENTRYPOINT_VERSION=v1
|
||||
export GRAFANA_DATASOURCES_YML_VERSION=v1
|
||||
export GRAFANA_DASHBOARDS_YML_VERSION=v1
|
||||
export GRAFANA_SWARM_DASHBOARD_JSON_VERSION=v1
|
||||
export GRAFANA_STACKS_DASHBOARD_JSON_VERSION=v1
|
||||
export GRAFANA_TRAEFIK_DASHBOARD_JSON_VERSION=v1
|
||||
|
|
75
compose.yml
75
compose.yml
|
@ -13,22 +13,22 @@ services:
|
|||
- /:/rootfs:ro
|
||||
- /etc/hostname:/etc/nodename
|
||||
command:
|
||||
- '--path.sysfs=/host/sys'
|
||||
- '--path.procfs=/host/proc'
|
||||
- '--path.rootfs=/rootfs'
|
||||
- '--collector.textfile.directory=/etc/node-exporter/'
|
||||
- '--collector.filesystem.ignored-mount-points=^/(sys|proc|dev|host|etc)($$|/)'
|
||||
- '--no-collector.ipvs'
|
||||
- "--path.sysfs=/host/sys"
|
||||
- "--path.procfs=/host/proc"
|
||||
- "--path.rootfs=/rootfs"
|
||||
- "--collector.textfile.directory=/etc/node-exporter/"
|
||||
- "--collector.filesystem.ignored-mount-points=^/(sys|proc|dev|host|etc)($$|/)"
|
||||
- "--no-collector.ipvs"
|
||||
configs:
|
||||
- source: node_exporter_entrypoint_sh
|
||||
target: /entrypoint.sh
|
||||
entrypoint: ['/bin/sh', '-e', '/entrypoint.sh']
|
||||
entrypoint: ["/bin/sh", "-e", "/entrypoint.sh"]
|
||||
networks:
|
||||
- exporters
|
||||
deploy:
|
||||
mode: global
|
||||
endpoint_mode: dnsrr
|
||||
|
||||
|
||||
cadvisor:
|
||||
image: google/cadvisor:latest
|
||||
command: -logtostderr -docker_only
|
||||
|
@ -74,7 +74,7 @@ services:
|
|||
mode: global
|
||||
endpoint_mode: dnsrr
|
||||
|
||||
prometheus:
|
||||
app:
|
||||
image: prom/prometheus:latest
|
||||
volumes:
|
||||
- prometheus-data:/prometheus:rw
|
||||
|
@ -92,26 +92,33 @@ services:
|
|||
condition: on-failure
|
||||
labels:
|
||||
- "traefik.enable=true"
|
||||
- "traefik.http.services.${STACK_NAME}-prometheus.loadbalancer.server.port=9090"
|
||||
- "traefik.http.services.${STACK_NAME}_prometheus.loadbalancer.server.port=9090"
|
||||
- "traefik.http.routers.${STACK_NAME}-prometheus.rule=Host(`${PROMETHEUS_DOMAIN}`)"
|
||||
- "traefik.http.routers.${STACK_NAME}-prometheus.entrypoints=web-secure"
|
||||
- "traefik.http.routers.${STACK_NAME}-prometheus.tls=true"
|
||||
- "traefik.http.routers.${STACK_NAME}-tls.certresolver=${LETS_ENCRYPT_ENV}"
|
||||
- "traefik.http.middlewares.${STACK_NAME}-http-to-https.redirectscheme.scheme=https"
|
||||
- "traefik.http.middlewares.${STACK_NAME}-http-to-https.redirectscheme.permanent=true"
|
||||
- "traefik.http.routers.${STACK_NAME}-prometheus-redirect.rule=Host(`${PROMETHEUS_DOMAIN}`)"
|
||||
- "traefik.http.routers.${STACK_NAME}-prometheus-redirect.middlewares=${STACK_NAME}-http-to-https@docker"
|
||||
- "traefik.http.routers.${STACK_NAME}-prometheus.tls.certresolver=${LETS_ENCRYPT_ENV}"
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:9090"]
|
||||
interval: 30s
|
||||
test: "wget -q http://localhost:9090/graph -O/dev/null"
|
||||
interval: 5s
|
||||
timeout: 10s
|
||||
retries: 10
|
||||
start_period: 1m
|
||||
retries: 3
|
||||
start_period: 10s
|
||||
|
||||
grafana:
|
||||
image: grafana/grafana
|
||||
volumes:
|
||||
- grafana-data:/var/lib/grafana:rw
|
||||
configs:
|
||||
- source: grafana_datasources_yml
|
||||
target: /etc/grafana/provisioning/datasources/datasources.yml
|
||||
- source: grafana_dashboards_yml
|
||||
target: /etc/grafana/provisioning/dashboards/dashboards.yml
|
||||
- source: grafana_swarm_dashboard_json
|
||||
target: /var/lib/grafana/dashboards/docker-swarm-nodes.json
|
||||
- source: grafana_stacks_dashboard_json
|
||||
target: /var/lib/grafana/dashboards/docker-swarm-stacks.json
|
||||
- source: grafana_traefik_dashboard_json
|
||||
target: /var/lib/grafana/dashboards/traefik.json
|
||||
networks:
|
||||
- api
|
||||
- proxy
|
||||
|
@ -130,16 +137,19 @@ services:
|
|||
- "traefik.http.routers.${STACK_NAME}-grafana.rule=Host(`${GRAFANA_DOMAIN}`)"
|
||||
- "traefik.http.routers.${STACK_NAME}-grafana.entrypoints=web-secure"
|
||||
- "traefik.http.routers.${STACK_NAME}-grafana.tls=true"
|
||||
- "traefik.http.routers.${STACK_NAME}-tls.certresolver=${LETS_ENCRYPT_ENV}"
|
||||
- "traefik.http.middlewares.${STACK_NAME}-http-to-https.redirectscheme.scheme=https"
|
||||
- "traefik.http.middlewares.${STACK_NAME}-http-to-https.redirectscheme.permanent=true"
|
||||
- "traefik.http.routers.${STACK_NAME}-grafana-redirect.rule=Host(`${GRAFANA_DOMAIN}`)"
|
||||
- "traefik.http.routers.${STACK_NAME}-grafana-redirect.middlewares=${STACK_NAME}-http-to-https@docker"
|
||||
- "traefik.http.routers.${STACK_NAME}-grafana.tls.certresolver=${LETS_ENCRYPT_ENV}"
|
||||
healthcheck:
|
||||
test: "wget -q http://localhost:3000/ -O/dev/null"
|
||||
interval: 5s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 10s
|
||||
|
||||
configs:
|
||||
prometheus_yml:
|
||||
template_driver: golang
|
||||
name: ${STACK_NAME}_prometheus_yml_${PROMETHEUS_YML_VERSION}
|
||||
file: prometheus.yml
|
||||
file: prometheus.yml.tmpl
|
||||
promtail_yml:
|
||||
name: ${STACK_NAME}_promtail_yml_${PROMTAIL_YML_VERSION}
|
||||
file: promtail.yml
|
||||
|
@ -147,6 +157,21 @@ configs:
|
|||
template_driver: golang
|
||||
name: ${STACK_NAME}_loki_yml_${LOKI_YML_VERSION}
|
||||
file: loki.yml.tmpl
|
||||
grafana_datasources_yml:
|
||||
name: ${STACK_NAME}_grafana_datasources_yml_${GRAFANA_DATASOURCES_YML_VERSION}
|
||||
file: grafana-datasources.yml
|
||||
grafana_dashboards_yml:
|
||||
name: ${STACK_NAME}_grafana_dashboards_yml_${GRAFANA_DASHBOARDS_YML_VERSION}
|
||||
file: grafana-dashboards.yml
|
||||
grafana_swarm_dashboard_json:
|
||||
name: ${STACK_NAME}_grafana_swarm_dashboard_json_${GRAFANA_SWARM_DASHBOARD_JSON_VERSION}
|
||||
file: grafana-swarm-dashboard.json
|
||||
grafana_stacks_dashboard_json:
|
||||
name: ${STACK_NAME}_grafana_stacks_dashboard_json_${GRAFANA_STACKS_DASHBOARD_JSON_VERSION}
|
||||
file: grafana-stacks-dashboard.json
|
||||
grafana_traefik_dashboard_json:
|
||||
name: ${STACK_NAME}_grafana_traefik_dashboard_json_${GRAFANA_TRAEFIK_DASHBOARD_JSON_VERSION}
|
||||
file: grafana-traefik-dashboard.json
|
||||
node_exporter_entrypoint_sh:
|
||||
name: ${STACK_NAME}_node_exporter_entrypoint_${NODE_EXPORTER_ENTRYPOINT_VERSION}
|
||||
file: node-exporter-entrypoint.sh
|
||||
|
|
|
@ -0,0 +1,13 @@
|
|||
apiVersion: 1
|
||||
|
||||
providers:
|
||||
- name: 'default-dashboard-provider'
|
||||
orgId: 1
|
||||
folder: 'default-dashboards'
|
||||
type: file
|
||||
disableDeletion: false
|
||||
updateIntervalSeconds: 10
|
||||
allowUiUpdates: true
|
||||
options:
|
||||
path: /var/lib/grafana/dashboards
|
||||
foldersFromFilesStructure: true
|
|
@ -0,0 +1,10 @@
|
|||
apiVersion: 1
|
||||
|
||||
datasources:
|
||||
- name: Prometheus
|
||||
type: prometheus
|
||||
access: proxy
|
||||
orgId: 1
|
||||
url: http://prometheus:9090
|
||||
isDefault: true
|
||||
editable: false
|
|
@ -0,0 +1,745 @@
|
|||
{
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": "-- Grafana --",
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"description": "Simple docker swarm monitoring with relation to individual stacks.",
|
||||
"editable": true,
|
||||
"gnetId": 7007,
|
||||
"graphTooltip": 1,
|
||||
"id": 2,
|
||||
"iteration": 1626744694222,
|
||||
"links": [],
|
||||
"panels": [
|
||||
{
|
||||
"cacheTimeout": null,
|
||||
"datasource": "Prometheus",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"decimals": 1,
|
||||
"mappings": [
|
||||
{
|
||||
"options": {
|
||||
"match": "null",
|
||||
"result": {
|
||||
"text": "N/A"
|
||||
}
|
||||
},
|
||||
"type": "special"
|
||||
}
|
||||
],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "s"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 3,
|
||||
"w": 4,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 10,
|
||||
"interval": null,
|
||||
"links": [],
|
||||
"maxDataPoints": 100,
|
||||
"options": {
|
||||
"colorMode": "none",
|
||||
"graphMode": "none",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "horizontal",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"mean"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"text": {},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"pluginVersion": "8.0.6",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(time() - min(container_start_time_seconds{container_label_com_docker_stack_namespace=~\"$stack\"}))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Stack Uptime",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"aliasColors": {},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "Prometheus",
|
||||
"decimals": null,
|
||||
"fill": 5,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 20,
|
||||
"x": 4,
|
||||
"y": 0
|
||||
},
|
||||
"height": "270px",
|
||||
"hiddenSeries": false,
|
||||
"id": 1,
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": true,
|
||||
"hideEmpty": false,
|
||||
"hideZero": true,
|
||||
"max": true,
|
||||
"min": true,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": true
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [],
|
||||
"nullPointMode": "null as zero",
|
||||
"options": {
|
||||
"alertThreshold": true
|
||||
},
|
||||
"percentage": false,
|
||||
"pluginVersion": "8.0.6",
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [],
|
||||
"spaceLength": 10,
|
||||
"stack": true,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(container_cpu_usage_seconds_total{container_label_com_docker_stack_namespace=~\"$stack\"}[$interval])) by (container_label_com_docker_swarm_service_name, container_label_com_docker_swarm_task_id)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{ container_label_com_docker_swarm_service_name }} - {{ container_label_com_docker_swarm_task_id }}",
|
||||
"refId": "A",
|
||||
"step": 2
|
||||
}
|
||||
],
|
||||
"thresholds": [],
|
||||
"timeFrom": null,
|
||||
"timeRegions": [],
|
||||
"timeShift": null,
|
||||
"title": "CPU Usage per Container",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": []
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "percentunit",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": false
|
||||
}
|
||||
],
|
||||
"yaxis": {
|
||||
"align": false,
|
||||
"alignLevel": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"cacheTimeout": null,
|
||||
"datasource": "Prometheus",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"mappings": [
|
||||
{
|
||||
"options": {
|
||||
"match": "null",
|
||||
"result": {
|
||||
"text": "N/A"
|
||||
}
|
||||
},
|
||||
"type": "special"
|
||||
}
|
||||
],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "none"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 4,
|
||||
"x": 0,
|
||||
"y": 3
|
||||
},
|
||||
"id": 3,
|
||||
"interval": null,
|
||||
"links": [],
|
||||
"maxDataPoints": 100,
|
||||
"options": {
|
||||
"colorMode": "none",
|
||||
"graphMode": "none",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "horizontal",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"text": {},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"pluginVersion": "8.0.6",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "count(rate(container_last_seen{container_label_com_docker_stack_namespace=~\"$stack\"}[$interval]))",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"refId": "B",
|
||||
"step": 4
|
||||
}
|
||||
],
|
||||
"title": "Containers",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"aliasColors": {},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "Prometheus",
|
||||
"fill": 3,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 7
|
||||
},
|
||||
"height": "270px",
|
||||
"hiddenSeries": false,
|
||||
"id": 5,
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": true,
|
||||
"hideZero": true,
|
||||
"max": true,
|
||||
"min": true,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"sort": "current",
|
||||
"sortDesc": true,
|
||||
"total": false,
|
||||
"values": true
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 2,
|
||||
"links": [],
|
||||
"nullPointMode": "null as zero",
|
||||
"options": {
|
||||
"alertThreshold": true
|
||||
},
|
||||
"percentage": false,
|
||||
"pluginVersion": "8.0.6",
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [],
|
||||
"spaceLength": 10,
|
||||
"stack": true,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(container_memory_rss{container_label_com_docker_stack_namespace=~\"$stack\"}) by (container_label_com_docker_swarm_service_name, container_label_com_docker_swarm_task_id)",
|
||||
"format": "time_series",
|
||||
"hide": false,
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{ container_label_com_docker_swarm_service_name }} - {{ container_label_com_docker_swarm_task_id }}",
|
||||
"metric": "container_memory_rss",
|
||||
"refId": "A",
|
||||
"step": 2
|
||||
}
|
||||
],
|
||||
"thresholds": [],
|
||||
"timeFrom": null,
|
||||
"timeRegions": [],
|
||||
"timeShift": null,
|
||||
"title": "Memory Usage per Container",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": []
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "bytes",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
}
|
||||
],
|
||||
"yaxis": {
|
||||
"align": false,
|
||||
"alignLevel": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"aliasColors": {},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "Prometheus",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 14
|
||||
},
|
||||
"height": "270px",
|
||||
"hiddenSeries": false,
|
||||
"id": 6,
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": true,
|
||||
"hideEmpty": false,
|
||||
"hideZero": true,
|
||||
"max": true,
|
||||
"min": true,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"sort": "current",
|
||||
"sortDesc": true,
|
||||
"total": false,
|
||||
"values": true
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 2,
|
||||
"links": [],
|
||||
"nullPointMode": "null as zero",
|
||||
"options": {
|
||||
"alertThreshold": true
|
||||
},
|
||||
"percentage": false,
|
||||
"pluginVersion": "8.0.6",
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [],
|
||||
"spaceLength": 10,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(container_network_receive_bytes_total{container_label_com_docker_stack_namespace=~\"$stack\"}[$interval])) by (container_label_com_docker_swarm_service_name, container_label_com_docker_swarm_task_id)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{ container_label_com_docker_swarm_service_name }} - {{ container_label_com_docker_swarm_task_id }}",
|
||||
"refId": "A",
|
||||
"step": 2
|
||||
}
|
||||
],
|
||||
"thresholds": [],
|
||||
"timeFrom": null,
|
||||
"timeRegions": [],
|
||||
"timeShift": null,
|
||||
"title": "Incoming Network Traffic per Container",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": []
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "Bps",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
}
|
||||
],
|
||||
"yaxis": {
|
||||
"align": false,
|
||||
"alignLevel": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"aliasColors": {},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "Prometheus",
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 21
|
||||
},
|
||||
"height": "270px",
|
||||
"hiddenSeries": false,
|
||||
"id": 8,
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": true,
|
||||
"hideEmpty": false,
|
||||
"hideZero": true,
|
||||
"max": true,
|
||||
"min": true,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"sort": "current",
|
||||
"sortDesc": true,
|
||||
"total": false,
|
||||
"values": true
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [],
|
||||
"nullPointMode": "null",
|
||||
"options": {
|
||||
"alertThreshold": true
|
||||
},
|
||||
"percentage": false,
|
||||
"pluginVersion": "8.0.6",
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [],
|
||||
"spaceLength": 10,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(container_network_transmit_bytes_total{container_label_com_docker_stack_namespace=~\"$stack\"}[$interval])) by (container_label_com_docker_swarm_service_name, container_label_com_docker_swarm_task_id)",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{ container_label_com_docker_swarm_service_name }} - {{ container_label_com_docker_swarm_task_id }}",
|
||||
"refId": "A",
|
||||
"step": 2
|
||||
}
|
||||
],
|
||||
"thresholds": [],
|
||||
"timeFrom": null,
|
||||
"timeRegions": [],
|
||||
"timeShift": null,
|
||||
"title": "Outgoing Network Traffic per Container",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": []
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "Bps",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": false
|
||||
}
|
||||
],
|
||||
"yaxis": {
|
||||
"align": false,
|
||||
"alignLevel": null
|
||||
}
|
||||
}
|
||||
],
|
||||
"schemaVersion": 30,
|
||||
"style": "dark",
|
||||
"tags": [
|
||||
"prometheus",
|
||||
"cAdvisor",
|
||||
"node-exporter",
|
||||
"alertmanager"
|
||||
],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"allValue": null,
|
||||
"current": {
|
||||
"selected": false,
|
||||
"text": "pretix",
|
||||
"value": "pretix"
|
||||
},
|
||||
"datasource": "Prometheus",
|
||||
"definition": "",
|
||||
"description": null,
|
||||
"error": null,
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": "Stack",
|
||||
"multi": false,
|
||||
"name": "stack",
|
||||
"options": [],
|
||||
"query": {
|
||||
"query": "query_result(count(container_last_seen{container_label_com_docker_stack_namespace =~\".+\"}) by (container_label_com_docker_stack_namespace))",
|
||||
"refId": "Prometheus-stack-Variable-Query"
|
||||
},
|
||||
"refresh": 1,
|
||||
"regex": "/container_label_com_docker_stack_namespace=\"(.*)\"/",
|
||||
"skipUrlSync": false,
|
||||
"sort": 2,
|
||||
"tagValuesQuery": "",
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"auto": true,
|
||||
"auto_count": 50,
|
||||
"auto_min": "50s",
|
||||
"current": {
|
||||
"selected": false,
|
||||
"text": "auto",
|
||||
"value": "$__auto_interval_interval"
|
||||
},
|
||||
"description": null,
|
||||
"error": null,
|
||||
"hide": 0,
|
||||
"label": "Interval",
|
||||
"name": "interval",
|
||||
"options": [
|
||||
{
|
||||
"selected": true,
|
||||
"text": "auto",
|
||||
"value": "$__auto_interval_interval"
|
||||
},
|
||||
{
|
||||
"selected": false,
|
||||
"text": "30s",
|
||||
"value": "30s"
|
||||
},
|
||||
{
|
||||
"selected": false,
|
||||
"text": "1m",
|
||||
"value": "1m"
|
||||
},
|
||||
{
|
||||
"selected": false,
|
||||
"text": "2m",
|
||||
"value": "2m"
|
||||
},
|
||||
{
|
||||
"selected": false,
|
||||
"text": "3m",
|
||||
"value": "3m"
|
||||
},
|
||||
{
|
||||
"selected": false,
|
||||
"text": "5m",
|
||||
"value": "5m"
|
||||
},
|
||||
{
|
||||
"selected": false,
|
||||
"text": "7m",
|
||||
"value": "7m"
|
||||
},
|
||||
{
|
||||
"selected": false,
|
||||
"text": "10m",
|
||||
"value": "10m"
|
||||
},
|
||||
{
|
||||
"selected": false,
|
||||
"text": "30m",
|
||||
"value": "30m"
|
||||
},
|
||||
{
|
||||
"selected": false,
|
||||
"text": "1h",
|
||||
"value": "1h"
|
||||
},
|
||||
{
|
||||
"selected": false,
|
||||
"text": "6h",
|
||||
"value": "6h"
|
||||
},
|
||||
{
|
||||
"selected": false,
|
||||
"text": "12h",
|
||||
"value": "12h"
|
||||
},
|
||||
{
|
||||
"selected": false,
|
||||
"text": "1d",
|
||||
"value": "1d"
|
||||
},
|
||||
{
|
||||
"selected": false,
|
||||
"text": "7d",
|
||||
"value": "7d"
|
||||
},
|
||||
{
|
||||
"selected": false,
|
||||
"text": "14d",
|
||||
"value": "14d"
|
||||
},
|
||||
{
|
||||
"selected": false,
|
||||
"text": "30d",
|
||||
"value": "30d"
|
||||
}
|
||||
],
|
||||
"query": "30s,1m,2m,3m,5m,7m,10m,30m,1h,6h,12h,1d,7d,14d,30d",
|
||||
"refresh": 2,
|
||||
"skipUrlSync": false,
|
||||
"type": "interval"
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-30m",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {
|
||||
"refresh_intervals": [
|
||||
"5s",
|
||||
"10s",
|
||||
"30s",
|
||||
"1m",
|
||||
"5m",
|
||||
"15m",
|
||||
"30m",
|
||||
"1h",
|
||||
"2h",
|
||||
"1d"
|
||||
],
|
||||
"time_options": [
|
||||
"5m",
|
||||
"15m",
|
||||
"1h",
|
||||
"6h",
|
||||
"12h",
|
||||
"24h",
|
||||
"2d",
|
||||
"7d",
|
||||
"30d"
|
||||
]
|
||||
},
|
||||
"timezone": "browser",
|
||||
"title": "Swarm Stack Monitoring",
|
||||
"uid": "KdVoGQm7z",
|
||||
"version": 1
|
||||
}
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -46,11 +46,13 @@ scrape_configs:
|
|||
type: 'A'
|
||||
port: 8080
|
||||
|
||||
{{ if eq (env "TRAEFIK_METRICS_ENABLED") "1" }}
|
||||
- job_name: 'traefik'
|
||||
scrape_interval: 30s
|
||||
metrics_path: '/metrics'
|
||||
dns_sd_configs:
|
||||
- names:
|
||||
- 'tasks.traefik_app'
|
||||
- 'tasks.{{ (env "TRAEFIK_SERVICE_NAME") }}'
|
||||
type: 'A'
|
||||
port: 8082
|
||||
{{ end }}
|
Loading…
Reference in New Issue