Previously prometheus/alertmanager were unconditionally reloaded every 10 minutes. Reloading alertmanager when it is about to deliver an alert notification seems to make it account successfull notification deliveries as failed, increasing alertmanager_notifications_failed_total. So reloading every 10 minutes caused spurious AlertmanagerNotifications alerts in our setup. Also config updates are now atomic regardless of whether /tmp is a tmpfs.
24 lines
669 B
Bash
24 lines
669 B
Bash
#!/bin/bash
|
|
set -euo pipefail
|
|
|
|
(
|
|
|
|
cat /etc/prometheus/conf.d/*.conf
|
|
echo "alerting:"
|
|
echo " alertmanagers:"
|
|
(cat /etc/prometheus/conf.d/alerting/*.conf 2> /dev/null | sed "s/^/ /") || echo ""
|
|
|
|
echo "scrape_configs:"
|
|
cat /etc/prometheus/conf.d/scrape_configs/*.conf
|
|
|
|
) > /etc/prometheus/prometheus.yml.new
|
|
|
|
chmod 0644 /etc/prometheus/prometheus.yml.new
|
|
|
|
if ! diff -q /etc/prometheus/prometheus.yml.new /etc/prometheus/prometheus.yml > /dev/null; then
|
|
mv /etc/prometheus/prometheus.yml.new /etc/prometheus/prometheus.yml
|
|
/usr/bin/systemctl reload prometheus
|
|
/usr/bin/systemctl reload prometheus-alertmanager || true
|
|
else
|
|
rm -f /etc/prometheus/prometheus.yml.new
|
|
fi
|