Alert riemann configuration

This commit is contained in:
Yann Esposito 2015-03-23 16:13:11 +01:00
parent 5ec807a002
commit 5156ee738e

69
riemann.config Normal file
View file

@ -0,0 +1,69 @@
; -*- mode: clojure; -*-
; vim: filetype=clojure
(logging/init {:file "riemann.log"})
; Listen on the local interface over TCP (5555), UDP (5555), and websockets
; (5556)
(let [host "127.0.0.1"]
(tcp-server {:host host})
(udp-server {:host host})
(ws-server {:host host}))
; Expire old events from the index every 5 seconds.
(periodically-expire 5)
(let [index (index)]
; Inbound events will be passed to these streams:
(streams
(default :ttl 60
; Index all events immediately.
index
; Log expired events.
(expired
(fn [event] (info "expired" event))))))
;; -- ALERTING
(def notify-sysadmin-team
(let [email (mailer {:from "riemann@vigiglobe.com"})]
(throttle 1000 3600 (rollup 5 3600 (email "dreamteam@vigiglobe.com")))))
;; Basic alerting. You generally don't want this
; (streams (where (state "critical")) alert-everyone)
(defn critial-fraction [service-name warn-threshold critial-threshold events]
(let [nb-events (count events)
nb-critical-events (count (filter #(= "critical" (:state %)) events))
fraction (if (= nb-events 0) 0 (/ nb-critical-events nb-events))]
{:service (str "fail " service-name)
:host "MCP"
:metric fraction
:time (:time (first events))
:state (condp <= fraction
critial-threshold "critical"
warn-threshold "warning"
"ok")}))
(defn alert-if-critical-too-long-with-thresholds
[service-name nb-sec warn-threshold critial-threshold]
(let [index (index)]
(streams
(where (service service-name)
(fixed-time-window
nb-sec
(smap #(critial-fraction service-name warn-threshold critial-threshold %)
(where (state "critical")
index
notify-sysadmin-team)))))))
(defn alert-if-critical-too-long [service-name nb-sec]
(alert-if-critical-too-long-with-thresholds service-name nb-sec 0.3 0.9))
; ------------------------------------------------------------------------------
; Alert everybody if 'supercell prod twitter nb' is critical for 15 seconds
(alert-if-critical-too-long "supercell prod twitter nb" 15)
(alert-if-critical-too-long "supercell prod facebook nb" 15)