nezha/service/dao/alertsentinel.go

113 lines
2.6 KiB
Go
Raw Normal View History

package dao
2020-12-19 23:18:27 -05:00
import (
"fmt"
2021-01-05 20:35:04 -05:00
"log"
2020-12-19 23:18:27 -05:00
"sync"
"time"
"github.com/naiba/nezha/model"
)
// 报警规则
var alertsLock sync.RWMutex
var alerts []model.AlertRule
var alertsStore map[uint64]map[uint64][][]interface{}
type NotificationHistory struct {
Duration time.Duration
Until time.Time
}
func AlertSentinelStart() {
2020-12-19 23:18:27 -05:00
alertsStore = make(map[uint64]map[uint64][][]interface{})
2020-12-21 03:34:21 -05:00
notificationsLock.Lock()
if err := DB.Find(&notifications).Error; err != nil {
2020-12-19 23:18:27 -05:00
panic(err)
}
2020-12-21 03:34:21 -05:00
notificationsLock.Unlock()
alertsLock.Lock()
if err := DB.Find(&alerts).Error; err != nil {
2020-12-19 23:18:27 -05:00
panic(err)
}
for i := 0; i < len(alerts); i++ {
alertsStore[alerts[i].ID] = make(map[uint64][][]interface{})
}
2020-12-21 03:34:21 -05:00
alertsLock.Unlock()
2020-12-19 23:18:27 -05:00
time.Sleep(time.Second * 10)
2021-01-05 20:35:04 -05:00
var lastPrint time.Time
var checkCount uint64
for {
startedAt := time.Now()
checkStatus()
checkCount++
if lastPrint.Before(startedAt.Add(-1 * time.Hour)) {
if Conf.Debug {
log.Println("报警规则检测每小时", checkCount, "次", startedAt, time.Now())
}
2021-01-05 20:35:04 -05:00
checkCount = 0
lastPrint = startedAt
}
time.Sleep(time.Until(startedAt.Add(time.Second * SnapshotDelay)))
2021-01-05 20:35:04 -05:00
}
2020-12-19 23:18:27 -05:00
}
func OnRefreshOrAddAlert(alert model.AlertRule) {
alertsLock.Lock()
defer alertsLock.Unlock()
delete(alertsStore, alert.ID)
2020-12-21 03:34:21 -05:00
var isEdit bool
2020-12-19 23:18:27 -05:00
for i := 0; i < len(alerts); i++ {
if alerts[i].ID == alert.ID {
alerts[i] = alert
2020-12-21 03:34:21 -05:00
isEdit = true
2020-12-19 23:18:27 -05:00
}
}
2020-12-21 03:34:21 -05:00
if !isEdit {
alerts = append(alerts, alert)
}
2020-12-19 23:18:27 -05:00
alertsStore[alert.ID] = make(map[uint64][][]interface{})
}
func OnDeleteAlert(id uint64) {
alertsLock.Lock()
defer alertsLock.Unlock()
delete(alertsStore, id)
for i := 0; i < len(alerts); i++ {
if alerts[i].ID == id {
alerts = append(alerts[:i], alerts[i+1:]...)
i--
2020-12-19 23:18:27 -05:00
}
}
}
func checkStatus() {
alertsLock.RLock()
defer alertsLock.RUnlock()
ServerLock.RLock()
defer ServerLock.RUnlock()
2020-12-19 23:18:27 -05:00
2021-01-05 20:35:04 -05:00
for _, alert := range alerts {
2020-12-21 10:56:08 -05:00
// 跳过未启用
2021-01-05 20:35:04 -05:00
if alert.Enable == nil || !*alert.Enable {
2020-12-21 10:56:08 -05:00
continue
}
for _, server := range ServerList {
2020-12-19 23:18:27 -05:00
// 监测点
2021-01-05 20:35:04 -05:00
alertsStore[alert.ID][server.ID] = append(alertsStore[alert.
ID][server.ID], alert.Snapshot(server))
2020-12-19 23:18:27 -05:00
// 发送通知
2021-01-05 20:35:04 -05:00
max, desc := alert.Check(alertsStore[alert.ID][server.ID])
2020-12-19 23:18:27 -05:00
if desc != "" {
2021-01-16 01:11:51 -05:00
message := fmt.Sprintf("报警规则:%s服务器%s(%s)%s逮到咯快去看看", alert.Name, server.Name, server.Host.IP, desc)
go SendNotification(message, true)
2020-12-19 23:18:27 -05:00
}
// 清理旧数据
if max > 0 && max < len(alertsStore[alert.ID][server.ID]) {
alertsStore[alert.ID][server.ID] = alertsStore[alert.ID][server.ID][len(alertsStore[alert.ID][server.ID])-max:]
2020-12-19 23:18:27 -05:00
}
}
}
}