add uptime alert
This commit is contained in:
parent
3e3f45958c
commit
57e6238d72
1 changed files with 62 additions and 50 deletions
|
|
@ -1,4 +1,53 @@
|
||||||
prometheus_agent:
|
prometheus_agent:
|
||||||
|
tls:
|
||||||
|
mode: stunnel
|
||||||
|
manage: yes
|
||||||
|
pki: ~
|
||||||
|
agents:
|
||||||
|
nodeexporter:
|
||||||
|
args:
|
||||||
|
'web.listen-address': "[::1]:9100"
|
||||||
|
proxy:
|
||||||
|
mappings: {}
|
||||||
|
blackbox:
|
||||||
|
enable: False
|
||||||
|
args:
|
||||||
|
"web.listen-address": "[::1]:9115"
|
||||||
|
"config.file": "/etc/prometheus/blackbox.yml"
|
||||||
|
config:
|
||||||
|
modules:
|
||||||
|
http_2xx:
|
||||||
|
prober: http
|
||||||
|
http:
|
||||||
|
preferred_ip_protocol: ip4
|
||||||
|
http_post_2xx:
|
||||||
|
prober: http
|
||||||
|
http:
|
||||||
|
method: POST
|
||||||
|
preferred_ip_protocol: ip4
|
||||||
|
tcp_connect:
|
||||||
|
prober: tcp
|
||||||
|
tcp:
|
||||||
|
preferred_ip_protocol: ip4
|
||||||
|
ssh_banner:
|
||||||
|
prober: tcp
|
||||||
|
tcp:
|
||||||
|
preferred_ip_protocol: ip4
|
||||||
|
query_response:
|
||||||
|
- expect: "^SSH-2.0-"
|
||||||
|
icmp:
|
||||||
|
icmp:
|
||||||
|
preferred_ip_protocol: ip4
|
||||||
|
prober: icmp
|
||||||
|
jobs: {}
|
||||||
|
promtail:
|
||||||
|
enable: False
|
||||||
|
scrape_timeout: ~
|
||||||
|
scrape_interval: ~
|
||||||
|
metrics_path: ~
|
||||||
|
scrapers: {}
|
||||||
|
ansible_groups_as_labels: True
|
||||||
|
labels: {}
|
||||||
alerts:
|
alerts:
|
||||||
NodeDown:
|
NodeDown:
|
||||||
group: nodeexporter
|
group: nodeexporter
|
||||||
|
|
@ -198,53 +247,16 @@ prometheus_agent:
|
||||||
annotations:
|
annotations:
|
||||||
title: '{%raw%}{{ $labels.instance }}: Not all systemd services are running{%endraw%}'
|
title: '{%raw%}{{ $labels.instance }}: Not all systemd services are running{%endraw%}'
|
||||||
description: "Service not running"
|
description: "Service not running"
|
||||||
|
Uptime:
|
||||||
tls:
|
group: nodeexporter
|
||||||
mode: stunnel
|
enabled: True
|
||||||
manage: yes
|
alert: uptime
|
||||||
pki: ~
|
expr: |
|
||||||
agents:
|
sum(node_time_seconds{job="node", instance="{{ inventory_hostname }}"}) -
|
||||||
nodeexporter:
|
sum(node_boot_time_seconds{job="node", instance="{{ inventory_hostname }}"}) > 3600*24*120
|
||||||
args:
|
for: 5m
|
||||||
'web.listen-address': "[::1]:9100"
|
labels:
|
||||||
proxy:
|
severity: critical
|
||||||
mappings: {}
|
annotations:
|
||||||
blackbox:
|
title: '{%raw%}{{ $labels.instance }}: Uptime{%endraw%}'
|
||||||
enable: False
|
description: "Uptime is more than 120 days, please reboot soon"
|
||||||
args:
|
|
||||||
"web.listen-address": "[::1]:9115"
|
|
||||||
"config.file": "/etc/prometheus/blackbox.yml"
|
|
||||||
config:
|
|
||||||
modules:
|
|
||||||
http_2xx:
|
|
||||||
prober: http
|
|
||||||
http:
|
|
||||||
preferred_ip_protocol: ip4
|
|
||||||
http_post_2xx:
|
|
||||||
prober: http
|
|
||||||
http:
|
|
||||||
method: POST
|
|
||||||
preferred_ip_protocol: ip4
|
|
||||||
tcp_connect:
|
|
||||||
prober: tcp
|
|
||||||
tcp:
|
|
||||||
preferred_ip_protocol: ip4
|
|
||||||
ssh_banner:
|
|
||||||
prober: tcp
|
|
||||||
tcp:
|
|
||||||
preferred_ip_protocol: ip4
|
|
||||||
query_response:
|
|
||||||
- expect: "^SSH-2.0-"
|
|
||||||
icmp:
|
|
||||||
icmp:
|
|
||||||
preferred_ip_protocol: ip4
|
|
||||||
prober: icmp
|
|
||||||
jobs: {}
|
|
||||||
promtail:
|
|
||||||
enable: False
|
|
||||||
scrape_timeout: ~
|
|
||||||
scrape_interval: ~
|
|
||||||
metrics_path: ~
|
|
||||||
scrapers: {}
|
|
||||||
ansible_groups_as_labels: True
|
|
||||||
labels: {}
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue