mirror of
https://github.com/nezhahq/nezha.git
synced 2025-01-23 13:18:13 -05:00
133 lines
3.4 KiB
Go
133 lines
3.4 KiB
Go
package dao
|
||
|
||
import (
|
||
"fmt"
|
||
"log"
|
||
"sync"
|
||
"time"
|
||
|
||
"github.com/naiba/nezha/model"
|
||
"github.com/naiba/nezha/pkg/utils"
|
||
)
|
||
|
||
const (
|
||
_RuleCheckNoData = iota
|
||
_RuleCheckFail
|
||
_RuleCheckPass
|
||
)
|
||
|
||
// 报警规则
|
||
var alertsLock sync.RWMutex
|
||
var alerts []*model.AlertRule
|
||
var alertsStore map[uint64]map[uint64][][]interface{}
|
||
var alertsPrevState map[uint64]map[uint64]uint
|
||
|
||
type NotificationHistory struct {
|
||
Duration time.Duration
|
||
Until time.Time
|
||
}
|
||
|
||
func AlertSentinelStart() {
|
||
alertsStore = make(map[uint64]map[uint64][][]interface{})
|
||
alertsPrevState = make(map[uint64]map[uint64]uint)
|
||
notificationsLock.Lock()
|
||
if err := DB.Find(¬ifications).Error; err != nil {
|
||
panic(err)
|
||
}
|
||
notificationsLock.Unlock()
|
||
alertsLock.Lock()
|
||
if err := DB.Find(&alerts).Error; err != nil {
|
||
panic(err)
|
||
}
|
||
for i := 0; i < len(alerts); i++ {
|
||
alertsStore[alerts[i].ID] = make(map[uint64][][]interface{})
|
||
alertsPrevState[alerts[i].ID] = make(map[uint64]uint)
|
||
}
|
||
alertsLock.Unlock()
|
||
|
||
time.Sleep(time.Second * 10)
|
||
var lastPrint time.Time
|
||
var checkCount uint64
|
||
for {
|
||
startedAt := time.Now()
|
||
checkStatus()
|
||
checkCount++
|
||
if lastPrint.Before(startedAt.Add(-1 * time.Hour)) {
|
||
if Conf.Debug {
|
||
log.Println("报警规则检测每小时", checkCount, "次", startedAt, time.Now())
|
||
}
|
||
checkCount = 0
|
||
lastPrint = startedAt
|
||
}
|
||
time.Sleep(time.Until(startedAt.Add(time.Second * 3))) // 3秒钟检查一次
|
||
}
|
||
}
|
||
|
||
func OnRefreshOrAddAlert(alert model.AlertRule) {
|
||
alertsLock.Lock()
|
||
defer alertsLock.Unlock()
|
||
delete(alertsStore, alert.ID)
|
||
delete(alertsPrevState, alert.ID)
|
||
var isEdit bool
|
||
for i := 0; i < len(alerts); i++ {
|
||
if alerts[i].ID == alert.ID {
|
||
alerts[i] = &alert
|
||
isEdit = true
|
||
}
|
||
}
|
||
if !isEdit {
|
||
alerts = append(alerts, &alert)
|
||
}
|
||
alertsStore[alert.ID] = make(map[uint64][][]interface{})
|
||
alertsPrevState[alert.ID] = make(map[uint64]uint)
|
||
}
|
||
|
||
func OnDeleteAlert(id uint64) {
|
||
alertsLock.Lock()
|
||
defer alertsLock.Unlock()
|
||
delete(alertsStore, id)
|
||
delete(alertsPrevState, id)
|
||
for i := 0; i < len(alerts); i++ {
|
||
if alerts[i].ID == id {
|
||
alerts = append(alerts[:i], alerts[i+1:]...)
|
||
i--
|
||
}
|
||
}
|
||
}
|
||
|
||
func checkStatus() {
|
||
alertsLock.RLock()
|
||
defer alertsLock.RUnlock()
|
||
ServerLock.RLock()
|
||
defer ServerLock.RUnlock()
|
||
|
||
for _, alert := range alerts {
|
||
// 跳过未启用
|
||
if alert.Enable == nil || !*alert.Enable {
|
||
continue
|
||
}
|
||
for _, server := range ServerList {
|
||
// 监测点
|
||
alertsStore[alert.ID][server.ID] = append(alertsStore[alert.
|
||
ID][server.ID], alert.Snapshot(server))
|
||
// 发送通知,分为触发报警和恢复通知
|
||
max, passed := alert.Check(alertsStore[alert.ID][server.ID])
|
||
if !passed {
|
||
alertsPrevState[alert.ID][server.ID] = _RuleCheckFail
|
||
message := fmt.Sprintf("报警规则:%s,服务器:%s(%s),逮到咯,快去看看!", alert.Name, server.Name, utils.IPDesensitize(server.Host.IP))
|
||
go SendNotification(message, true)
|
||
} else {
|
||
if alertsPrevState[alert.ID][server.ID] == _RuleCheckFail {
|
||
message := fmt.Sprintf("报警规则:%s,服务器:%s(%s),已恢复正常", alert.Name, server.Name, utils.IPDesensitize(server.Host.IP))
|
||
go SendNotification(message, true)
|
||
}
|
||
alertsPrevState[alert.ID][server.ID] = _RuleCheckPass
|
||
}
|
||
// 清理旧数据
|
||
if max > 0 && max < len(alertsStore[alert.ID][server.ID]) {
|
||
alertsStore[alert.ID][server.ID] = alertsStore[alert.ID][server.ID][len(alertsStore[alert.ID][server.ID])-max:]
|
||
}
|
||
}
|
||
}
|
||
}
|