2020-12-19 23:18:27 -05:00
|
|
|
|
package alertmanager
|
|
|
|
|
|
|
|
|
|
import (
|
|
|
|
|
"crypto/md5"
|
|
|
|
|
"encoding/hex"
|
|
|
|
|
"fmt"
|
2021-01-05 20:35:04 -05:00
|
|
|
|
"log"
|
2020-12-19 23:18:27 -05:00
|
|
|
|
"sync"
|
|
|
|
|
"time"
|
|
|
|
|
|
|
|
|
|
"github.com/naiba/nezha/model"
|
|
|
|
|
"github.com/naiba/nezha/service/dao"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
const firstNotificationDelay = time.Minute * 15
|
|
|
|
|
|
|
|
|
|
// 通知方式
|
|
|
|
|
var notifications []model.Notification
|
|
|
|
|
var notificationsLock sync.RWMutex
|
|
|
|
|
|
|
|
|
|
// 报警规则
|
|
|
|
|
var alertsLock sync.RWMutex
|
|
|
|
|
var alerts []model.AlertRule
|
|
|
|
|
var alertsStore map[uint64]map[uint64][][]interface{}
|
|
|
|
|
|
|
|
|
|
type NotificationHistory struct {
|
|
|
|
|
Duration time.Duration
|
|
|
|
|
Until time.Time
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func Start() {
|
|
|
|
|
alertsStore = make(map[uint64]map[uint64][][]interface{})
|
2020-12-21 03:34:21 -05:00
|
|
|
|
notificationsLock.Lock()
|
|
|
|
|
if err := dao.DB.Find(¬ifications).Error; err != nil {
|
2020-12-19 23:18:27 -05:00
|
|
|
|
panic(err)
|
|
|
|
|
}
|
2020-12-21 03:34:21 -05:00
|
|
|
|
notificationsLock.Unlock()
|
|
|
|
|
alertsLock.Lock()
|
|
|
|
|
if err := dao.DB.Find(&alerts).Error; err != nil {
|
2020-12-19 23:18:27 -05:00
|
|
|
|
panic(err)
|
|
|
|
|
}
|
|
|
|
|
for i := 0; i < len(alerts); i++ {
|
|
|
|
|
alertsStore[alerts[i].ID] = make(map[uint64][][]interface{})
|
|
|
|
|
}
|
2020-12-21 03:34:21 -05:00
|
|
|
|
alertsLock.Unlock()
|
2020-12-19 23:18:27 -05:00
|
|
|
|
|
|
|
|
|
time.Sleep(time.Second * 10)
|
2021-01-05 20:35:04 -05:00
|
|
|
|
var lastPrint time.Time
|
|
|
|
|
var checkCount uint64
|
|
|
|
|
for {
|
|
|
|
|
startedAt := time.Now()
|
|
|
|
|
checkStatus()
|
|
|
|
|
checkCount++
|
|
|
|
|
if lastPrint.Before(startedAt.Add(-1 * time.Hour)) {
|
|
|
|
|
log.Println("报警规则检测每小时", checkCount, "次", startedAt, time.Now())
|
|
|
|
|
checkCount = 0
|
|
|
|
|
lastPrint = startedAt
|
|
|
|
|
}
|
|
|
|
|
time.Sleep(time.Until(startedAt.Add(time.Second * dao.SnapshotDelay)))
|
|
|
|
|
}
|
2020-12-19 23:18:27 -05:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func OnRefreshOrAddAlert(alert model.AlertRule) {
|
|
|
|
|
alertsLock.Lock()
|
|
|
|
|
defer alertsLock.Unlock()
|
|
|
|
|
delete(alertsStore, alert.ID)
|
2020-12-21 03:34:21 -05:00
|
|
|
|
var isEdit bool
|
2020-12-19 23:18:27 -05:00
|
|
|
|
for i := 0; i < len(alerts); i++ {
|
|
|
|
|
if alerts[i].ID == alert.ID {
|
|
|
|
|
alerts[i] = alert
|
2020-12-21 03:34:21 -05:00
|
|
|
|
isEdit = true
|
2020-12-19 23:18:27 -05:00
|
|
|
|
}
|
|
|
|
|
}
|
2020-12-21 03:34:21 -05:00
|
|
|
|
if !isEdit {
|
|
|
|
|
alerts = append(alerts, alert)
|
|
|
|
|
}
|
2020-12-19 23:18:27 -05:00
|
|
|
|
alertsStore[alert.ID] = make(map[uint64][][]interface{})
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func OnDeleteAlert(id uint64) {
|
|
|
|
|
alertsLock.Lock()
|
|
|
|
|
defer alertsLock.Unlock()
|
|
|
|
|
delete(alertsStore, id)
|
|
|
|
|
for i := 0; i < len(alerts); i++ {
|
|
|
|
|
if alerts[i].ID == id {
|
|
|
|
|
alerts = append(alerts[:i], alerts[i+1:]...)
|
2021-01-06 08:20:02 -05:00
|
|
|
|
i--
|
2020-12-19 23:18:27 -05:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func OnRefreshOrAddNotification(n model.Notification) {
|
|
|
|
|
notificationsLock.Lock()
|
|
|
|
|
defer notificationsLock.Unlock()
|
2020-12-21 03:34:21 -05:00
|
|
|
|
var isEdit bool
|
2020-12-19 23:18:27 -05:00
|
|
|
|
for i := 0; i < len(notifications); i++ {
|
|
|
|
|
if notifications[i].ID == n.ID {
|
|
|
|
|
notifications[i] = n
|
2020-12-21 03:34:21 -05:00
|
|
|
|
isEdit = true
|
2020-12-19 23:18:27 -05:00
|
|
|
|
}
|
|
|
|
|
}
|
2020-12-21 03:34:21 -05:00
|
|
|
|
if !isEdit {
|
|
|
|
|
notifications = append(notifications, n)
|
|
|
|
|
}
|
2020-12-19 23:18:27 -05:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func OnDeleteNotification(id uint64) {
|
|
|
|
|
notificationsLock.Lock()
|
|
|
|
|
defer notificationsLock.Unlock()
|
|
|
|
|
for i := 0; i < len(notifications); i++ {
|
|
|
|
|
if notifications[i].ID == id {
|
|
|
|
|
notifications = append(notifications[:i], notifications[i+1:]...)
|
2021-01-06 08:20:02 -05:00
|
|
|
|
i--
|
2020-12-19 23:18:27 -05:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func checkStatus() {
|
|
|
|
|
alertsLock.RLock()
|
|
|
|
|
defer alertsLock.RUnlock()
|
|
|
|
|
dao.ServerLock.RLock()
|
|
|
|
|
defer dao.ServerLock.RUnlock()
|
|
|
|
|
|
2021-01-05 20:35:04 -05:00
|
|
|
|
for _, alert := range alerts {
|
2020-12-21 10:56:08 -05:00
|
|
|
|
// 跳过未启用
|
2021-01-05 20:35:04 -05:00
|
|
|
|
if alert.Enable == nil || !*alert.Enable {
|
2020-12-21 10:56:08 -05:00
|
|
|
|
continue
|
|
|
|
|
}
|
2020-12-19 23:18:27 -05:00
|
|
|
|
for _, server := range dao.ServerList {
|
|
|
|
|
// 监测点
|
2021-01-05 20:35:04 -05:00
|
|
|
|
alertsStore[alert.ID][server.ID] = append(alertsStore[alert.
|
|
|
|
|
ID][server.ID], alert.Snapshot(server))
|
2020-12-19 23:18:27 -05:00
|
|
|
|
// 发送通知
|
2021-01-05 20:35:04 -05:00
|
|
|
|
max, desc := alert.Check(alertsStore[alert.ID][server.ID])
|
2020-12-19 23:18:27 -05:00
|
|
|
|
if desc != "" {
|
|
|
|
|
nID := getNotificationHash(server, desc)
|
|
|
|
|
var flag bool
|
|
|
|
|
if cacheN, has := dao.Cache.Get(nID); has {
|
|
|
|
|
nHistory := cacheN.(NotificationHistory)
|
2020-12-20 08:19:27 -05:00
|
|
|
|
// 每次提醒都增加一倍等待时间,最后每天最多提醒一次
|
|
|
|
|
if time.Now().After(nHistory.Until) {
|
2020-12-19 23:18:27 -05:00
|
|
|
|
flag = true
|
|
|
|
|
nHistory.Duration *= 2
|
2020-12-20 08:19:27 -05:00
|
|
|
|
if nHistory.Duration > time.Hour*24 {
|
|
|
|
|
nHistory.Duration = time.Hour * 24
|
|
|
|
|
}
|
2020-12-19 23:18:27 -05:00
|
|
|
|
nHistory.Until = time.Now().Add(nHistory.Duration)
|
2021-01-05 20:35:04 -05:00
|
|
|
|
// 缓存有效期加 10 分钟
|
|
|
|
|
dao.Cache.Set(nID, nHistory, nHistory.Duration+time.Minute*10)
|
2020-12-19 23:18:27 -05:00
|
|
|
|
}
|
|
|
|
|
} else {
|
2020-12-20 08:19:27 -05:00
|
|
|
|
// 新提醒直接通知
|
2020-12-19 23:18:27 -05:00
|
|
|
|
flag = true
|
|
|
|
|
dao.Cache.Set(nID, NotificationHistory{
|
|
|
|
|
Duration: firstNotificationDelay,
|
|
|
|
|
Until: time.Now().Add(firstNotificationDelay),
|
2021-01-05 22:26:30 -05:00
|
|
|
|
}, firstNotificationDelay+time.Minute*10)
|
2020-12-19 23:18:27 -05:00
|
|
|
|
}
|
|
|
|
|
if flag {
|
2021-01-06 08:20:02 -05:00
|
|
|
|
message := fmt.Sprintf("报警规则:%s,服务器:%s(%s),%s,逮到咯,快去看看!", alert.Name, server.Name, server.Host.IP, desc)
|
2020-12-19 23:18:27 -05:00
|
|
|
|
go sendNotification(message)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
// 清理旧数据
|
2021-01-06 08:20:02 -05:00
|
|
|
|
if max > 0 && max < len(alertsStore[alert.ID][server.ID]) {
|
|
|
|
|
alertsStore[alert.ID][server.ID] = alertsStore[alert.ID][server.ID][len(alertsStore[alert.ID][server.ID])-max:]
|
2020-12-19 23:18:27 -05:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func sendNotification(desc string) {
|
|
|
|
|
notificationsLock.RLock()
|
|
|
|
|
defer notificationsLock.RUnlock()
|
|
|
|
|
for i := 0; i < len(notifications); i++ {
|
|
|
|
|
notifications[i].Send(desc)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func getNotificationHash(server *model.Server, desc string) string {
|
|
|
|
|
return hex.EncodeToString(md5.New().Sum([]byte(fmt.Sprintf("%d::%s", server.ID, desc))))
|
|
|
|
|
}
|