another helper
This commit is contained in:
parent
25c01cacec
commit
0f6cea8409
1 changed files with 33 additions and 9 deletions
|
@ -16,13 +16,16 @@
|
|||
(let [index (index)]
|
||||
; Inbound events will be passed to these streams:
|
||||
(streams
|
||||
(default :ttl 60
|
||||
; Index all events immediately.
|
||||
index
|
||||
(where (service #"e.+ .+x bytes")
|
||||
(scale 1/1048576 index)
|
||||
(else
|
||||
(default :ttl 60
|
||||
; Index all events immediately.
|
||||
index
|
||||
|
||||
; Log expired events.
|
||||
(expired
|
||||
(fn [event] (info "expired" event))))))
|
||||
; Log expired events.
|
||||
(expired
|
||||
(fn [event] (info "expired" event))))))))
|
||||
|
||||
;; -- ALERTING
|
||||
(def notify-sysadmin-team
|
||||
|
@ -57,13 +60,34 @@
|
|||
index
|
||||
notify-sysadmin-team)))))))
|
||||
|
||||
(defn alert-if-critical-too-long-with-thresholds-for-host
|
||||
[service-name host-regex nb-sec warn-threshold critial-threshold]
|
||||
(let [index (index)]
|
||||
(streams
|
||||
(where (and (service service-name)
|
||||
(host host-regex))
|
||||
(fixed-time-window
|
||||
nb-sec
|
||||
(smap #(critial-fraction service-name warn-threshold critial-threshold %)
|
||||
(where (state "critical")
|
||||
index
|
||||
notify-sysadmin-team)))))))
|
||||
|
||||
(defn alert-if-critical-too-long [service-name nb-sec]
|
||||
(alert-if-critical-too-long-with-thresholds service-name nb-sec 0.3 0.9))
|
||||
|
||||
|
||||
; ------------------------------------------------------------------------------
|
||||
|
||||
; Alert everybody if 'supercell prod twitter nb' is critical for 15 seconds
|
||||
(alert-if-critical-too-long "supercell prod twitter nb" 15)
|
||||
; Alert everybody if 'supercell prod twitter nb' is critical for 30 seconds
|
||||
(alert-if-critical-too-long "supercell prod twitter nb" 30)
|
||||
(alert-if-critical-too-long "supercell prod facebook nb" 30)
|
||||
(alert-if-critical-too-long "haarp nb" 30)
|
||||
|
||||
(alert-if-critical-too-long "supercell prod facebook nb" 15)
|
||||
(alert-if-critical-too-long-with-thresholds-for-host
|
||||
"kafka message/sec" "tornado" 10 0.1 0.9)
|
||||
|
||||
(alert-if-critical-too-long "cpu" 120)
|
||||
(alert-if-critical-too-long "load" 120)
|
||||
(alert-if-critical-too-long "memory" 120)
|
||||
(alert-if-critical-too-long "disk /" 120)
|
||||
|
|
Loading…
Reference in a new issue