Initial commit
This commit is contained in:
commit
0f748dd02a
4 changed files with 136 additions and 0 deletions
49
defaults/main.yml
Normal file
49
defaults/main.yml
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
prometheus_alertmanager:
|
||||
args:
|
||||
"web.listen-address": "[::1]:9093"
|
||||
config:
|
||||
global:
|
||||
# The smarthost and SMTP sender used for mail notifications.
|
||||
smtp_from: 'alertmanager@{{ inventory_hostname }}'
|
||||
templates:
|
||||
- '/etc/prometheus/alertmanager_templates/*.tmpl'
|
||||
route:
|
||||
# The labels by which incoming alerts are grouped together. For example,
|
||||
# multiple alerts coming in for cluster=A and alertname=LatencyHigh would
|
||||
# be batched into a single group.
|
||||
group_by: ['alertname', 'cluster', 'service']
|
||||
# When a new group of alerts is created by an incoming alert, wait at
|
||||
# least 'group_wait' to send the initial notification.
|
||||
# This way ensures that you get multiple alerts for the same group that start
|
||||
# firing shortly after another are batched together on the first
|
||||
# notification.
|
||||
group_wait: 30s
|
||||
# When the first notification was sent, wait 'group_interval' to send a batch
|
||||
# of new alerts that started firing for that group.
|
||||
group_interval: 5m
|
||||
# If an alert has successfully been sent, wait 'repeat_interval' to
|
||||
# resend them.
|
||||
repeat_interval: 3h
|
||||
# A default receiver
|
||||
receiver: mail-default
|
||||
# All the above attributes are inherited by all child routes and can
|
||||
# overwritten on each.
|
||||
# The child route trees.
|
||||
routes: []
|
||||
# Inhibition rules allow to mute a set of alerts given that another alert is
|
||||
# firing.
|
||||
# We use this to mute any warning-level notifications if the same alert is
|
||||
# already critical.
|
||||
inhibit_rules:
|
||||
- source_match:
|
||||
severity: 'critical'
|
||||
target_match:
|
||||
severity: 'warning'
|
||||
# Apply inhibition if the alertname is the same.
|
||||
equal: ['alertname', 'cluster', 'service']
|
||||
receivers:
|
||||
- name: "blackhole"
|
||||
- name: 'mail-default'
|
||||
send_resolved: True
|
||||
email_configs:
|
||||
- to: 'root@localhost'
|
||||
4
handlers/main.yml
Normal file
4
handlers/main.yml
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
- name: restart alertmanager
|
||||
service:
|
||||
name: prometheus-alertmanager
|
||||
state: restarted
|
||||
18
tasks/main.yml
Normal file
18
tasks/main.yml
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
- name: install alertmanager
|
||||
apt:
|
||||
pkg: prometheus-alertmanager
|
||||
|
||||
- name: wrtie alertmanager service config
|
||||
notify: restart alertmanager
|
||||
template:
|
||||
src: prometheus-alertmanager.j2
|
||||
dest: /etc/default/prometheus-alertmanager
|
||||
|
||||
- name: wrtie alertmanager config
|
||||
notify: restart alertmanager
|
||||
copy:
|
||||
owner: root
|
||||
group: root
|
||||
mode: 0644
|
||||
dest: /etc/prometheus/alertmanager.yml
|
||||
content: "{{ prometheus_alertmanager.config|to_nice_yaml(indent=2) }}"
|
||||
65
templates/prometheus-alertmanager.j2
Normal file
65
templates/prometheus-alertmanager.j2
Normal file
|
|
@ -0,0 +1,65 @@
|
|||
# Set the command-line arguments to pass to the server.
|
||||
ARGS="{% for i in prometheus_alertmanager.args %} --{{ i }}{% if prometheus_alertmanager.args[i] and prometheus_alertmanager.args[i] != {} %}='{{ prometheus_alertmanager.args[i] }}'{% endif %} {% endfor %}"
|
||||
|
||||
# The alert manager supports the following options:
|
||||
|
||||
# --config.file="/etc/prometheus/alertmanager.yml"
|
||||
# Alertmanager configuration file name.
|
||||
# --storage.path="/var/lib/prometheus/alertmanager/"
|
||||
# Base path for data storage.
|
||||
# --data.retention=120h
|
||||
# How long to keep data for.
|
||||
# --alerts.gc-interval=30m
|
||||
# Interval between alert GC.
|
||||
# --log.level=info
|
||||
# Only log messages with the given severity or above.
|
||||
# --web.external-url=WEB.EXTERNAL-URL
|
||||
# The URL under which Alertmanager is externally reachable (for example,
|
||||
# if Alertmanager is served via a reverse proxy). Used for generating
|
||||
# relative and absolute links back to Alertmanager itself. If the URL has
|
||||
# a path portion, it will be used to prefix all HTTP endpoints served by
|
||||
# Alertmanager. If omitted, relevant URL components will be derived
|
||||
# automatically.
|
||||
# --web.route-prefix=WEB.ROUTE-PREFIX
|
||||
# Prefix for the internal routes of web endpoints. Defaults to path of
|
||||
# --web.external-url.
|
||||
# --web.listen-address=":9093"
|
||||
# Address to listen on for the web interface and API.
|
||||
# --web.ui-path="/usr/share/prometheus/alertmanager/ui/"
|
||||
# Path to static UI directory.
|
||||
# --template.default="/usr/share/prometheus/alertmanager/default.tmpl"
|
||||
# Path to default notification template.
|
||||
# --cluster.listen-address="0.0.0.0:9094"
|
||||
# Listen address for cluster.
|
||||
# --cluster.advertise-address=CLUSTER.ADVERTISE-ADDRESS
|
||||
# Explicit address to advertise in cluster.
|
||||
# --cluster.peer=CLUSTER.PEER ...
|
||||
# Initial peers (may be repeated).
|
||||
# --cluster.peer-timeout=15s
|
||||
# Time to wait between peers to send notifications.
|
||||
# --cluster.gossip-interval=200ms
|
||||
# Interval between sending gossip messages. By lowering this value (more
|
||||
# frequent) gossip messages are propagated across the cluster more
|
||||
# quickly at the expense of increased bandwidth.
|
||||
# --cluster.pushpull-interval=1m0s
|
||||
# Interval for gossip state syncs. Setting this interval lower (more
|
||||
# frequent) will increase convergence speeds across larger clusters at
|
||||
# the expense of increased bandwidth usage.
|
||||
# --cluster.tcp-timeout=10s Timeout for establishing a stream connection
|
||||
# with a remote node for a full state sync, and for stream read and write
|
||||
# operations.
|
||||
# --cluster.probe-timeout=500ms
|
||||
# Timeout to wait for an ack from a probed node before assuming it is
|
||||
# unhealthy. This should be set to 99-percentile of RTT (round-trip time)
|
||||
# on your network.
|
||||
# --cluster.probe-interval=1s
|
||||
# Interval between random node probes. Setting this lower (more frequent)
|
||||
# will cause the cluster to detect failed nodes more quickly at the
|
||||
# expense of increased bandwidth usage.
|
||||
# --cluster.settle-timeout=1m0s
|
||||
# Maximum time to wait for cluster connections to settle before
|
||||
# evaluating notifications.
|
||||
# --cluster.reconnect-interval=10s
|
||||
# Interval between attempting to reconnect to lost peers.
|
||||
# --cluster.reconnect-timeout=6h0m0s
|
||||
# Length of time to attempt to reconnect to a lost peer.
|
||||
Loading…
Add table
Add a link
Reference in a new issue