Compare commits

...

16 Commits

Author SHA1 Message Date
3wc 02427c526a Switch to self-hosted stack-ssh-deploy image [mass update] 2023-01-21 11:49:56 -08:00
3wc ca3d131c6b Fix CI by adding networks: [mass update] 2023-01-20 11:58:41 -08:00
3wc 4dd02ceba3 Automatically generate catalogue on release [mass update]
Re: coop-cloud/recipes-catalogue-json#4
2023-01-20 10:27:11 -08:00
3wc d65b8cab7e Update abra syntax in examples (finally) [mass update] 2023-01-19 16:02:28 -08:00
decentral1se bc708f87e9
autoformatter 2021-12-21 00:59:02 +01:00
decentral1se 339b090570
use convention 2021-12-21 00:58:57 +01:00
3wc f8f2dc5a2d Change category
[ci skip]
2021-11-23 13:43:29 +02:00
3wc 127cc1d601 Goodbye, emojis! 😢
[ci skip]
2021-11-23 12:19:05 +02:00
mirsal c68bdc911b Provision a traefik dashboard in grafana 2021-08-10 01:16:08 +00:00
mirsal b592c3c0fd .env.sample: Comment out loki config 2021-08-10 01:07:44 +00:00
mirsal 688e6c35d3 prometheus: Enable collection of traefik metrics 2021-08-10 01:06:41 +00:00
mirsal aa6ad85f4b .env.sample: Add comments and more helpful defaults 2021-08-09 22:25:14 +00:00
mirsal bc7048d45d Provision a docker swarm stack dashboard in grafana 2021-07-20 01:38:04 +00:00
mirsal 8cd59e5d21 Provision a docker swarm node dashboard in grafana 2021-07-20 01:20:53 +00:00
mirsal c4614ad874 Add a prometheus datasource to grafana 2021-07-20 01:02:49 +00:00
mirsal ba3c204351 fix: Update traefik labels and healthchecks
* Remove unnecessary traefik labels
* Fix typos in traefik labels
* Fix prometheus healthcheck command
* Use more sensible values for prometheus healthchecks
* Add healthchecks to the grafana service
2021-07-20 00:35:33 +00:00
11 changed files with 4133 additions and 63 deletions

View File

@ -3,10 +3,12 @@ kind: pipeline
name: deploy to swarm-test.autonomic.zone
steps:
- name: deployment
image: decentral1se/stack-ssh-deploy:latest
image: git.coopcloud.tech/coop-cloud/stack-ssh-deploy:latest
settings:
host: swarm-test.autonomic.zone
stack: monitoring
networks:
- proxy
purge: true
deploy_key:
from_secret: drone_ssh_swarm_test
@ -34,23 +36,17 @@ trigger:
---
kind: pipeline
name: recipe release
name: generate recipe catalogue
steps:
- name: release a new version
image: decentral1se/drone-abra:latest
settings:
command: recipe monitoring release
deploy_key:
from_secret: abra_bot_deploy_key
- name: trigger downstream builds
image: plugins/downstream
settings:
server: https://drone.autonomic.zone
server: https://build.coopcloud.tech
token:
from_secret: decentral1se_token
from_secret: drone_abra-bot_token
fork: true
repositories:
- coop-cloud/auto-apps-json
depends_on:
- release a new version
- coop-cloud/auto-recipes-catalogue-json
trigger:
event: tag

View File

@ -5,16 +5,25 @@ PROMETHEUS_DOMAIN=prometheus.example.com
LETS_ENCRYPT_ENV=production
#GF_SMTP_HOST
#GF_SMTP_ENABLED
#GF_SMTP_FROM_ADDRESS
#GF_SMTP_SKIP_VERIFY
#GF_SECURITY_ALLOW_EMBEDDING
#GF_INSTALL_PLUGINS=grafana-piechart-panel
#GF_SERVER_ROOT_URL=https://${GRAFANA_DOMAIN}
# Edit this in order to allow collection of traefik metrics
#TRAEFIK_METRICS_ENABLED=1
#TRAEFIK_SERVICE_NAME=traefik_app
LOKI_AWS_ENDPOINT
LOKI_AWS_REGION
LOKI_ACCESS_KEY_ID
LOKI_SECRET_ACCESS_KEY
LOKI_BUCKET_NAMES
# grafana SMTP configuration (optional)
#GF_SMTP_HOST=changeme
#GF_SMTP_ENABLED=1
#GF_SMTP_FROM_ADDRESS=grafana@example.com
#GF_SMTP_SKIP_VERIFY=1
# Additional grafana settings (unlikely to require editing)
GF_SECURITY_ALLOW_EMBEDDING=1
GF_INSTALL_PLUGINS=grafana-piechart-panel
GF_SERVER_ROOT_URL=https://${GRAFANA_DOMAIN}
# Loki stores logs in object storage, fill these up with your
# minio configuration (or any s3-compatible object store)
#LOKI_AWS_ENDPOINT=https://minio/
#LOKI_AWS_REGION=eu-west-1
#LOKI_ACCESS_KEY_ID=
#LOKI_SECRET_ACCESS_KEY=
#LOKI_BUCKET_NAMES=loki

View File

@ -3,14 +3,14 @@
A server and application monitoring stack based on Prometheus, Loki and Grafana.
<!-- metadata -->
* **Category**: Monitoring
* **Status**: ❷💛
* **Images**: [`prom/prometheus`](https://hub.docker.com/r/prom/prometheus) [`grafana/grafana`](https://hub.docker.com/r/grafana/grafana) [`grafana/loki`](https://hub.docker.com/r/grafana/loki)
* **Healthcheck**: ❶💚
* **Backups**:
* **Email**: ❶💚
* **Tests**:
* **SSO**: ❸🍎
* **Category**: Utilities
* **Status**: 2, beta
* **Images**: [`prom/prometheus`](https://hub.docker.com/r/prom/prometheus) [`grafana/grafana`](https://hub.docker.com/r/grafana/grafana) [`grafana/loki`](https://hub.docker.com/r/grafana/loki), upstream
* **Healthcheck**: 3
* **Backups**: No
* **Email**: 3
* **Tests**: No
* **SSO**: 1
<!-- endmetadata -->
## Basic usage
@ -20,9 +20,9 @@ A server and application monitoring stack based on Prometheus, Loki and Grafana.
3. Deploy [`coop-cloud/minio`]
4. `abra app new monitoring --secrets` (optionally with `--pass` if you'd like
to save secrets in `pass`)
5. `abra app YOURAPPDOMAIN config` - be sure to change `GRAFANA_DOMAIN` and `PROMETHEUS_DOMAIN` to something that resolves to
5. `abra app config YOURAPPDOMAIN` - be sure to change `GRAFANA_DOMAIN` and `PROMETHEUS_DOMAIN` to something that resolves to
your Docker swarm box
6. `abra app YOURAPPDOMAIN deploy`
6. `abra app deploy YOURAPPDOMAIN`
7. Open the configured domain in your browser to finish set-up
[`abra`]: https://git.autonomic.zone/autonomic-cooperative/abra

View File

@ -1,4 +1,9 @@
export PROMETHEUS_YML_VERSION=v1
export PROMETHEUS_YML_VERSION=v2
export PROMTAIL_YML_VERSION=v1
export LOKI_YML_VERSION=v1
export NODE_EXPORTER_ENTRYPOINT_VERSION=v1
export GRAFANA_DATASOURCES_YML_VERSION=v1
export GRAFANA_DASHBOARDS_YML_VERSION=v1
export GRAFANA_SWARM_DASHBOARD_JSON_VERSION=v1
export GRAFANA_STACKS_DASHBOARD_JSON_VERSION=v1
export GRAFANA_TRAEFIK_DASHBOARD_JSON_VERSION=v1

View File

@ -13,22 +13,22 @@ services:
- /:/rootfs:ro
- /etc/hostname:/etc/nodename
command:
- '--path.sysfs=/host/sys'
- '--path.procfs=/host/proc'
- '--path.rootfs=/rootfs'
- '--collector.textfile.directory=/etc/node-exporter/'
- '--collector.filesystem.ignored-mount-points=^/(sys|proc|dev|host|etc)($$|/)'
- '--no-collector.ipvs'
- "--path.sysfs=/host/sys"
- "--path.procfs=/host/proc"
- "--path.rootfs=/rootfs"
- "--collector.textfile.directory=/etc/node-exporter/"
- "--collector.filesystem.ignored-mount-points=^/(sys|proc|dev|host|etc)($$|/)"
- "--no-collector.ipvs"
configs:
- source: node_exporter_entrypoint_sh
target: /entrypoint.sh
entrypoint: ['/bin/sh', '-e', '/entrypoint.sh']
entrypoint: ["/bin/sh", "-e", "/entrypoint.sh"]
networks:
- exporters
deploy:
mode: global
endpoint_mode: dnsrr
cadvisor:
image: google/cadvisor:latest
command: -logtostderr -docker_only
@ -74,7 +74,7 @@ services:
mode: global
endpoint_mode: dnsrr
prometheus:
app:
image: prom/prometheus:latest
volumes:
- prometheus-data:/prometheus:rw
@ -92,26 +92,33 @@ services:
condition: on-failure
labels:
- "traefik.enable=true"
- "traefik.http.services.${STACK_NAME}-prometheus.loadbalancer.server.port=9090"
- "traefik.http.services.${STACK_NAME}_prometheus.loadbalancer.server.port=9090"
- "traefik.http.routers.${STACK_NAME}-prometheus.rule=Host(`${PROMETHEUS_DOMAIN}`)"
- "traefik.http.routers.${STACK_NAME}-prometheus.entrypoints=web-secure"
- "traefik.http.routers.${STACK_NAME}-prometheus.tls=true"
- "traefik.http.routers.${STACK_NAME}-tls.certresolver=${LETS_ENCRYPT_ENV}"
- "traefik.http.middlewares.${STACK_NAME}-http-to-https.redirectscheme.scheme=https"
- "traefik.http.middlewares.${STACK_NAME}-http-to-https.redirectscheme.permanent=true"
- "traefik.http.routers.${STACK_NAME}-prometheus-redirect.rule=Host(`${PROMETHEUS_DOMAIN}`)"
- "traefik.http.routers.${STACK_NAME}-prometheus-redirect.middlewares=${STACK_NAME}-http-to-https@docker"
- "traefik.http.routers.${STACK_NAME}-prometheus.tls.certresolver=${LETS_ENCRYPT_ENV}"
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:9090"]
interval: 30s
test: "wget -q http://localhost:9090/graph -O/dev/null"
interval: 5s
timeout: 10s
retries: 10
start_period: 1m
retries: 3
start_period: 10s
grafana:
image: grafana/grafana
volumes:
- grafana-data:/var/lib/grafana:rw
configs:
- source: grafana_datasources_yml
target: /etc/grafana/provisioning/datasources/datasources.yml
- source: grafana_dashboards_yml
target: /etc/grafana/provisioning/dashboards/dashboards.yml
- source: grafana_swarm_dashboard_json
target: /var/lib/grafana/dashboards/docker-swarm-nodes.json
- source: grafana_stacks_dashboard_json
target: /var/lib/grafana/dashboards/docker-swarm-stacks.json
- source: grafana_traefik_dashboard_json
target: /var/lib/grafana/dashboards/traefik.json
networks:
- api
- proxy
@ -130,16 +137,19 @@ services:
- "traefik.http.routers.${STACK_NAME}-grafana.rule=Host(`${GRAFANA_DOMAIN}`)"
- "traefik.http.routers.${STACK_NAME}-grafana.entrypoints=web-secure"
- "traefik.http.routers.${STACK_NAME}-grafana.tls=true"
- "traefik.http.routers.${STACK_NAME}-tls.certresolver=${LETS_ENCRYPT_ENV}"
- "traefik.http.middlewares.${STACK_NAME}-http-to-https.redirectscheme.scheme=https"
- "traefik.http.middlewares.${STACK_NAME}-http-to-https.redirectscheme.permanent=true"
- "traefik.http.routers.${STACK_NAME}-grafana-redirect.rule=Host(`${GRAFANA_DOMAIN}`)"
- "traefik.http.routers.${STACK_NAME}-grafana-redirect.middlewares=${STACK_NAME}-http-to-https@docker"
- "traefik.http.routers.${STACK_NAME}-grafana.tls.certresolver=${LETS_ENCRYPT_ENV}"
healthcheck:
test: "wget -q http://localhost:3000/ -O/dev/null"
interval: 5s
timeout: 10s
retries: 3
start_period: 10s
configs:
prometheus_yml:
template_driver: golang
name: ${STACK_NAME}_prometheus_yml_${PROMETHEUS_YML_VERSION}
file: prometheus.yml
file: prometheus.yml.tmpl
promtail_yml:
name: ${STACK_NAME}_promtail_yml_${PROMTAIL_YML_VERSION}
file: promtail.yml
@ -147,6 +157,21 @@ configs:
template_driver: golang
name: ${STACK_NAME}_loki_yml_${LOKI_YML_VERSION}
file: loki.yml.tmpl
grafana_datasources_yml:
name: ${STACK_NAME}_grafana_datasources_yml_${GRAFANA_DATASOURCES_YML_VERSION}
file: grafana-datasources.yml
grafana_dashboards_yml:
name: ${STACK_NAME}_grafana_dashboards_yml_${GRAFANA_DASHBOARDS_YML_VERSION}
file: grafana-dashboards.yml
grafana_swarm_dashboard_json:
name: ${STACK_NAME}_grafana_swarm_dashboard_json_${GRAFANA_SWARM_DASHBOARD_JSON_VERSION}
file: grafana-swarm-dashboard.json
grafana_stacks_dashboard_json:
name: ${STACK_NAME}_grafana_stacks_dashboard_json_${GRAFANA_STACKS_DASHBOARD_JSON_VERSION}
file: grafana-stacks-dashboard.json
grafana_traefik_dashboard_json:
name: ${STACK_NAME}_grafana_traefik_dashboard_json_${GRAFANA_TRAEFIK_DASHBOARD_JSON_VERSION}
file: grafana-traefik-dashboard.json
node_exporter_entrypoint_sh:
name: ${STACK_NAME}_node_exporter_entrypoint_${NODE_EXPORTER_ENTRYPOINT_VERSION}
file: node-exporter-entrypoint.sh

13
grafana-dashboards.yml Normal file
View File

@ -0,0 +1,13 @@
apiVersion: 1
providers:
- name: 'default-dashboard-provider'
orgId: 1
folder: 'default-dashboards'
type: file
disableDeletion: false
updateIntervalSeconds: 10
allowUiUpdates: true
options:
path: /var/lib/grafana/dashboards
foldersFromFilesStructure: true

10
grafana-datasources.yml Normal file
View File

@ -0,0 +1,10 @@
apiVersion: 1
datasources:
- name: Prometheus
type: prometheus
access: proxy
orgId: 1
url: http://prometheus:9090
isDefault: true
editable: false

View File

@ -0,0 +1,745 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": "-- Grafana --",
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"description": "Simple docker swarm monitoring with relation to individual stacks.",
"editable": true,
"gnetId": 7007,
"graphTooltip": 1,
"id": 2,
"iteration": 1626744694222,
"links": [],
"panels": [
{
"cacheTimeout": null,
"datasource": "Prometheus",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"decimals": 1,
"mappings": [
{
"options": {
"match": "null",
"result": {
"text": "N/A"
}
},
"type": "special"
}
],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "s"
},
"overrides": []
},
"gridPos": {
"h": 3,
"w": 4,
"x": 0,
"y": 0
},
"id": 10,
"interval": null,
"links": [],
"maxDataPoints": 100,
"options": {
"colorMode": "none",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "horizontal",
"reduceOptions": {
"calcs": [
"mean"
],
"fields": "",
"values": false
},
"text": {},
"textMode": "auto"
},
"pluginVersion": "8.0.6",
"targets": [
{
"expr": "(time() - min(container_start_time_seconds{container_label_com_docker_stack_namespace=~\"$stack\"}))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "",
"refId": "A"
}
],
"title": "Stack Uptime",
"type": "stat"
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "Prometheus",
"decimals": null,
"fill": 5,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 20,
"x": 4,
"y": 0
},
"height": "270px",
"hiddenSeries": false,
"id": 1,
"legend": {
"alignAsTable": true,
"avg": false,
"current": true,
"hideEmpty": false,
"hideZero": true,
"max": true,
"min": true,
"rightSide": true,
"show": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null as zero",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.0.6",
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": true,
"steppedLine": false,
"targets": [
{
"expr": "sum(rate(container_cpu_usage_seconds_total{container_label_com_docker_stack_namespace=~\"$stack\"}[$interval])) by (container_label_com_docker_swarm_service_name, container_label_com_docker_swarm_task_id)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{ container_label_com_docker_swarm_service_name }} - {{ container_label_com_docker_swarm_task_id }}",
"refId": "A",
"step": 2
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "CPU Usage per Container",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "percentunit",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"cacheTimeout": null,
"datasource": "Prometheus",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [
{
"options": {
"match": "null",
"result": {
"text": "N/A"
}
},
"type": "special"
}
],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "none"
},
"overrides": []
},
"gridPos": {
"h": 4,
"w": 4,
"x": 0,
"y": 3
},
"id": 3,
"interval": null,
"links": [],
"maxDataPoints": 100,
"options": {
"colorMode": "none",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "horizontal",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"text": {},
"textMode": "auto"
},
"pluginVersion": "8.0.6",
"targets": [
{
"expr": "count(rate(container_last_seen{container_label_com_docker_stack_namespace=~\"$stack\"}[$interval]))",
"format": "time_series",
"intervalFactor": 2,
"refId": "B",
"step": 4
}
],
"title": "Containers",
"type": "stat"
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "Prometheus",
"fill": 3,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 24,
"x": 0,
"y": 7
},
"height": "270px",
"hiddenSeries": false,
"id": 5,
"legend": {
"alignAsTable": true,
"avg": false,
"current": true,
"hideZero": true,
"max": true,
"min": true,
"rightSide": true,
"show": true,
"sort": "current",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 2,
"links": [],
"nullPointMode": "null as zero",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.0.6",
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": true,
"steppedLine": false,
"targets": [
{
"expr": "sum(container_memory_rss{container_label_com_docker_stack_namespace=~\"$stack\"}) by (container_label_com_docker_swarm_service_name, container_label_com_docker_swarm_task_id)",
"format": "time_series",
"hide": false,
"intervalFactor": 2,
"legendFormat": "{{ container_label_com_docker_swarm_service_name }} - {{ container_label_com_docker_swarm_task_id }}",
"metric": "container_memory_rss",
"refId": "A",
"step": 2
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Memory Usage per Container",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "bytes",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "Prometheus",
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 24,
"x": 0,
"y": 14
},
"height": "270px",
"hiddenSeries": false,
"id": 6,
"legend": {
"alignAsTable": true,
"avg": false,
"current": true,
"hideEmpty": false,
"hideZero": true,
"max": true,
"min": true,
"rightSide": true,
"show": true,
"sort": "current",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 2,
"links": [],
"nullPointMode": "null as zero",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.0.6",
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sum(rate(container_network_receive_bytes_total{container_label_com_docker_stack_namespace=~\"$stack\"}[$interval])) by (container_label_com_docker_swarm_service_name, container_label_com_docker_swarm_task_id)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{ container_label_com_docker_swarm_service_name }} - {{ container_label_com_docker_swarm_task_id }}",
"refId": "A",
"step": 2
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Incoming Network Traffic per Container",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "Bps",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "Prometheus",
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 7,
"w": 24,
"x": 0,
"y": 21
},
"height": "270px",
"hiddenSeries": false,
"id": 8,
"legend": {
"alignAsTable": true,
"avg": false,
"current": true,
"hideEmpty": false,
"hideZero": true,
"max": true,
"min": true,
"rightSide": true,
"show": true,
"sort": "current",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.0.6",
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sum(rate(container_network_transmit_bytes_total{container_label_com_docker_stack_namespace=~\"$stack\"}[$interval])) by (container_label_com_docker_swarm_service_name, container_label_com_docker_swarm_task_id)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{ container_label_com_docker_swarm_service_name }} - {{ container_label_com_docker_swarm_task_id }}",
"refId": "A",
"step": 2
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Outgoing Network Traffic per Container",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "Bps",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
}
],
"schemaVersion": 30,
"style": "dark",
"tags": [
"prometheus",
"cAdvisor",
"node-exporter",
"alertmanager"
],
"templating": {
"list": [
{
"allValue": null,
"current": {
"selected": false,
"text": "pretix",
"value": "pretix"
},
"datasource": "Prometheus",
"definition": "",
"description": null,
"error": null,
"hide": 0,
"includeAll": false,
"label": "Stack",
"multi": false,
"name": "stack",
"options": [],
"query": {
"query": "query_result(count(container_last_seen{container_label_com_docker_stack_namespace =~\".+\"}) by (container_label_com_docker_stack_namespace))",
"refId": "Prometheus-stack-Variable-Query"
},
"refresh": 1,
"regex": "/container_label_com_docker_stack_namespace=\"(.*)\"/",
"skipUrlSync": false,
"sort": 2,
"tagValuesQuery": "",
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"auto": true,
"auto_count": 50,
"auto_min": "50s",
"current": {
"selected": false,
"text": "auto",
"value": "$__auto_interval_interval"
},
"description": null,
"error": null,
"hide": 0,
"label": "Interval",
"name": "interval",
"options": [
{
"selected": true,
"text": "auto",
"value": "$__auto_interval_interval"
},
{
"selected": false,
"text": "30s",
"value": "30s"
},
{
"selected": false,
"text": "1m",
"value": "1m"
},
{
"selected": false,
"text": "2m",
"value": "2m"
},
{
"selected": false,
"text": "3m",
"value": "3m"
},
{
"selected": false,
"text": "5m",
"value": "5m"
},
{
"selected": false,
"text": "7m",
"value": "7m"
},
{
"selected": false,
"text": "10m",
"value": "10m"
},
{
"selected": false,
"text": "30m",
"value": "30m"
},
{
"selected": false,
"text": "1h",
"value": "1h"
},
{
"selected": false,
"text": "6h",
"value": "6h"
},
{
"selected": false,
"text": "12h",
"value": "12h"
},
{
"selected": false,
"text": "1d",
"value": "1d"
},
{
"selected": false,
"text": "7d",
"value": "7d"
},
{
"selected": false,
"text": "14d",
"value": "14d"
},
{
"selected": false,
"text": "30d",
"value": "30d"
}
],
"query": "30s,1m,2m,3m,5m,7m,10m,30m,1h,6h,12h,1d,7d,14d,30d",
"refresh": 2,
"skipUrlSync": false,
"type": "interval"
}
]
},
"time": {
"from": "now-30m",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "browser",
"title": "Swarm Stack Monitoring",
"uid": "KdVoGQm7z",
"version": 1
}

1729
grafana-swarm-dashboard.json Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -46,11 +46,13 @@ scrape_configs:
type: 'A'
port: 8080
{{ if eq (env "TRAEFIK_METRICS_ENABLED") "1" }}
- job_name: 'traefik'
scrape_interval: 30s
metrics_path: '/metrics'
dns_sd_configs:
- names:
- 'tasks.traefik_app'
- 'tasks.{{ (env "TRAEFIK_SERVICE_NAME") }}'
type: 'A'
port: 8082
{{ end }}