Initial commit
This commit is contained in:
commit
0f748dd02a
4 changed files with 136 additions and 0 deletions
49
defaults/main.yml
Normal file
49
defaults/main.yml
Normal file
|
|
@ -0,0 +1,49 @@
|
||||||
|
prometheus_alertmanager:
|
||||||
|
args:
|
||||||
|
"web.listen-address": "[::1]:9093"
|
||||||
|
config:
|
||||||
|
global:
|
||||||
|
# The smarthost and SMTP sender used for mail notifications.
|
||||||
|
smtp_from: 'alertmanager@{{ inventory_hostname }}'
|
||||||
|
templates:
|
||||||
|
- '/etc/prometheus/alertmanager_templates/*.tmpl'
|
||||||
|
route:
|
||||||
|
# The labels by which incoming alerts are grouped together. For example,
|
||||||
|
# multiple alerts coming in for cluster=A and alertname=LatencyHigh would
|
||||||
|
# be batched into a single group.
|
||||||
|
group_by: ['alertname', 'cluster', 'service']
|
||||||
|
# When a new group of alerts is created by an incoming alert, wait at
|
||||||
|
# least 'group_wait' to send the initial notification.
|
||||||
|
# This way ensures that you get multiple alerts for the same group that start
|
||||||
|
# firing shortly after another are batched together on the first
|
||||||
|
# notification.
|
||||||
|
group_wait: 30s
|
||||||
|
# When the first notification was sent, wait 'group_interval' to send a batch
|
||||||
|
# of new alerts that started firing for that group.
|
||||||
|
group_interval: 5m
|
||||||
|
# If an alert has successfully been sent, wait 'repeat_interval' to
|
||||||
|
# resend them.
|
||||||
|
repeat_interval: 3h
|
||||||
|
# A default receiver
|
||||||
|
receiver: mail-default
|
||||||
|
# All the above attributes are inherited by all child routes and can
|
||||||
|
# overwritten on each.
|
||||||
|
# The child route trees.
|
||||||
|
routes: []
|
||||||
|
# Inhibition rules allow to mute a set of alerts given that another alert is
|
||||||
|
# firing.
|
||||||
|
# We use this to mute any warning-level notifications if the same alert is
|
||||||
|
# already critical.
|
||||||
|
inhibit_rules:
|
||||||
|
- source_match:
|
||||||
|
severity: 'critical'
|
||||||
|
target_match:
|
||||||
|
severity: 'warning'
|
||||||
|
# Apply inhibition if the alertname is the same.
|
||||||
|
equal: ['alertname', 'cluster', 'service']
|
||||||
|
receivers:
|
||||||
|
- name: "blackhole"
|
||||||
|
- name: 'mail-default'
|
||||||
|
send_resolved: True
|
||||||
|
email_configs:
|
||||||
|
- to: 'root@localhost'
|
||||||
4
handlers/main.yml
Normal file
4
handlers/main.yml
Normal file
|
|
@ -0,0 +1,4 @@
|
||||||
|
- name: restart alertmanager
|
||||||
|
service:
|
||||||
|
name: prometheus-alertmanager
|
||||||
|
state: restarted
|
||||||
18
tasks/main.yml
Normal file
18
tasks/main.yml
Normal file
|
|
@ -0,0 +1,18 @@
|
||||||
|
- name: install alertmanager
|
||||||
|
apt:
|
||||||
|
pkg: prometheus-alertmanager
|
||||||
|
|
||||||
|
- name: wrtie alertmanager service config
|
||||||
|
notify: restart alertmanager
|
||||||
|
template:
|
||||||
|
src: prometheus-alertmanager.j2
|
||||||
|
dest: /etc/default/prometheus-alertmanager
|
||||||
|
|
||||||
|
- name: wrtie alertmanager config
|
||||||
|
notify: restart alertmanager
|
||||||
|
copy:
|
||||||
|
owner: root
|
||||||
|
group: root
|
||||||
|
mode: 0644
|
||||||
|
dest: /etc/prometheus/alertmanager.yml
|
||||||
|
content: "{{ prometheus_alertmanager.config|to_nice_yaml(indent=2) }}"
|
||||||
65
templates/prometheus-alertmanager.j2
Normal file
65
templates/prometheus-alertmanager.j2
Normal file
|
|
@ -0,0 +1,65 @@
|
||||||
|
# Set the command-line arguments to pass to the server.
|
||||||
|
ARGS="{% for i in prometheus_alertmanager.args %} --{{ i }}{% if prometheus_alertmanager.args[i] and prometheus_alertmanager.args[i] != {} %}='{{ prometheus_alertmanager.args[i] }}'{% endif %} {% endfor %}"
|
||||||
|
|
||||||
|
# The alert manager supports the following options:
|
||||||
|
|
||||||
|
# --config.file="/etc/prometheus/alertmanager.yml"
|
||||||
|
# Alertmanager configuration file name.
|
||||||
|
# --storage.path="/var/lib/prometheus/alertmanager/"
|
||||||
|
# Base path for data storage.
|
||||||
|
# --data.retention=120h
|
||||||
|
# How long to keep data for.
|
||||||
|
# --alerts.gc-interval=30m
|
||||||
|
# Interval between alert GC.
|
||||||
|
# --log.level=info
|
||||||
|
# Only log messages with the given severity or above.
|
||||||
|
# --web.external-url=WEB.EXTERNAL-URL
|
||||||
|
# The URL under which Alertmanager is externally reachable (for example,
|
||||||
|
# if Alertmanager is served via a reverse proxy). Used for generating
|
||||||
|
# relative and absolute links back to Alertmanager itself. If the URL has
|
||||||
|
# a path portion, it will be used to prefix all HTTP endpoints served by
|
||||||
|
# Alertmanager. If omitted, relevant URL components will be derived
|
||||||
|
# automatically.
|
||||||
|
# --web.route-prefix=WEB.ROUTE-PREFIX
|
||||||
|
# Prefix for the internal routes of web endpoints. Defaults to path of
|
||||||
|
# --web.external-url.
|
||||||
|
# --web.listen-address=":9093"
|
||||||
|
# Address to listen on for the web interface and API.
|
||||||
|
# --web.ui-path="/usr/share/prometheus/alertmanager/ui/"
|
||||||
|
# Path to static UI directory.
|
||||||
|
# --template.default="/usr/share/prometheus/alertmanager/default.tmpl"
|
||||||
|
# Path to default notification template.
|
||||||
|
# --cluster.listen-address="0.0.0.0:9094"
|
||||||
|
# Listen address for cluster.
|
||||||
|
# --cluster.advertise-address=CLUSTER.ADVERTISE-ADDRESS
|
||||||
|
# Explicit address to advertise in cluster.
|
||||||
|
# --cluster.peer=CLUSTER.PEER ...
|
||||||
|
# Initial peers (may be repeated).
|
||||||
|
# --cluster.peer-timeout=15s
|
||||||
|
# Time to wait between peers to send notifications.
|
||||||
|
# --cluster.gossip-interval=200ms
|
||||||
|
# Interval between sending gossip messages. By lowering this value (more
|
||||||
|
# frequent) gossip messages are propagated across the cluster more
|
||||||
|
# quickly at the expense of increased bandwidth.
|
||||||
|
# --cluster.pushpull-interval=1m0s
|
||||||
|
# Interval for gossip state syncs. Setting this interval lower (more
|
||||||
|
# frequent) will increase convergence speeds across larger clusters at
|
||||||
|
# the expense of increased bandwidth usage.
|
||||||
|
# --cluster.tcp-timeout=10s Timeout for establishing a stream connection
|
||||||
|
# with a remote node for a full state sync, and for stream read and write
|
||||||
|
# operations.
|
||||||
|
# --cluster.probe-timeout=500ms
|
||||||
|
# Timeout to wait for an ack from a probed node before assuming it is
|
||||||
|
# unhealthy. This should be set to 99-percentile of RTT (round-trip time)
|
||||||
|
# on your network.
|
||||||
|
# --cluster.probe-interval=1s
|
||||||
|
# Interval between random node probes. Setting this lower (more frequent)
|
||||||
|
# will cause the cluster to detect failed nodes more quickly at the
|
||||||
|
# expense of increased bandwidth usage.
|
||||||
|
# --cluster.settle-timeout=1m0s
|
||||||
|
# Maximum time to wait for cluster connections to settle before
|
||||||
|
# evaluating notifications.
|
||||||
|
# --cluster.reconnect-interval=10s
|
||||||
|
# Interval between attempting to reconnect to lost peers.
|
||||||
|
# --cluster.reconnect-timeout=6h0m0s
|
||||||
|
# Length of time to attempt to reconnect to a lost peer.
|
||||||
Loading…
Add table
Add a link
Reference in a new issue