2020-12-19 10:11:16 -05:00
|
|
|
package model
|
|
|
|
|
|
|
|
import (
|
2024-11-28 06:38:54 -05:00
|
|
|
"github.com/nezhahq/nezha/pkg/utils"
|
2020-12-19 10:11:16 -05:00
|
|
|
"gorm.io/gorm"
|
|
|
|
)
|
|
|
|
|
2022-09-12 16:01:08 -04:00
|
|
|
const (
|
|
|
|
ModeAlwaysTrigger = 0
|
|
|
|
ModeOnetimeTrigger = 1
|
|
|
|
)
|
|
|
|
|
2020-12-19 10:11:16 -05:00
|
|
|
type AlertRule struct {
|
|
|
|
Common
|
2024-11-16 07:57:03 -05:00
|
|
|
Name string `json:"name"`
|
2024-10-25 20:16:57 -04:00
|
|
|
RulesRaw string `json:"-"`
|
|
|
|
Enable *bool `json:"enable,omitempty"`
|
2024-11-16 07:57:03 -05:00
|
|
|
TriggerMode uint8 `gorm:"default:0" json:"trigger_mode"` // 触发模式: 0-始终触发(默认) 1-单次触发
|
|
|
|
NotificationGroupID uint64 `json:"notification_group_id"` // 该报警规则所在的通知组
|
2024-10-25 20:16:57 -04:00
|
|
|
FailTriggerTasksRaw string `gorm:"default:'[]'" json:"-"`
|
|
|
|
RecoverTriggerTasksRaw string `gorm:"default:'[]'" json:"-"`
|
2024-12-19 10:21:15 -05:00
|
|
|
Rules []*Rule `gorm:"-" json:"rules"`
|
2024-10-25 20:16:57 -04:00
|
|
|
FailTriggerTasks []uint64 `gorm:"-" json:"fail_trigger_tasks"` // 失败时执行的触发任务id
|
|
|
|
RecoverTriggerTasks []uint64 `gorm:"-" json:"recover_trigger_tasks"` // 恢复时执行的触发任务id
|
2020-12-19 10:11:16 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
func (r *AlertRule) BeforeSave(tx *gorm.DB) error {
|
2022-09-13 23:14:23 -04:00
|
|
|
if data, err := utils.Json.Marshal(r.Rules); err != nil {
|
2020-12-19 10:11:16 -05:00
|
|
|
return err
|
2022-09-13 23:14:23 -04:00
|
|
|
} else {
|
|
|
|
r.RulesRaw = string(data)
|
|
|
|
}
|
|
|
|
if data, err := utils.Json.Marshal(r.FailTriggerTasks); err != nil {
|
|
|
|
return err
|
|
|
|
} else {
|
|
|
|
r.FailTriggerTasksRaw = string(data)
|
|
|
|
}
|
|
|
|
if data, err := utils.Json.Marshal(r.RecoverTriggerTasks); err != nil {
|
|
|
|
return err
|
|
|
|
} else {
|
|
|
|
r.RecoverTriggerTasksRaw = string(data)
|
2020-12-19 10:11:16 -05:00
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (r *AlertRule) AfterFind(tx *gorm.DB) error {
|
2022-09-13 23:14:23 -04:00
|
|
|
var err error
|
|
|
|
if err = utils.Json.Unmarshal([]byte(r.RulesRaw), &r.Rules); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if err = utils.Json.Unmarshal([]byte(r.FailTriggerTasksRaw), &r.FailTriggerTasks); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if err = utils.Json.Unmarshal([]byte(r.RecoverTriggerTasksRaw), &r.RecoverTriggerTasks); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
return nil
|
2020-12-19 23:18:27 -05:00
|
|
|
}
|
|
|
|
|
2021-11-06 04:00:08 -04:00
|
|
|
func (r *AlertRule) Enabled() bool {
|
|
|
|
return r.Enable != nil && *r.Enable
|
|
|
|
}
|
|
|
|
|
2024-10-26 11:57:47 -04:00
|
|
|
// Snapshot 对传入的Server进行该报警规则下所有type的检查 返回每项检查结果
|
2024-12-26 10:38:40 -05:00
|
|
|
func (r *AlertRule) Snapshot(cycleTransferStats *CycleTransferStats, server *Server, db *gorm.DB) []bool {
|
2024-12-21 11:05:41 -05:00
|
|
|
point := make([]bool, len(r.Rules))
|
|
|
|
|
|
|
|
for i, rule := range r.Rules {
|
|
|
|
point[i] = rule.Snapshot(cycleTransferStats, server, db)
|
2020-12-19 23:18:27 -05:00
|
|
|
}
|
|
|
|
return point
|
|
|
|
}
|
|
|
|
|
2024-10-26 11:57:47 -04:00
|
|
|
// Check 传入包含当前报警规则下所有type检查结果 返回报警持续时间与是否通过报警检查(通过则返回true)
|
|
|
|
func (r *AlertRule) Check(points [][]bool) (maxDuration int, passed bool) {
|
2024-12-27 11:07:07 -05:00
|
|
|
var hasPassedRule bool
|
2024-10-26 11:57:47 -04:00
|
|
|
|
2024-12-26 10:52:44 -05:00
|
|
|
for ruleId, rule := range r.Rules {
|
2024-10-26 11:57:47 -04:00
|
|
|
if rule.IsTransferDurationRule() {
|
2021-07-15 23:14:07 -04:00
|
|
|
// 循环区间流量报警
|
2024-10-26 11:57:47 -04:00
|
|
|
if maxDuration < 1 {
|
|
|
|
maxDuration = 1
|
2021-07-14 11:53:37 -04:00
|
|
|
}
|
2024-12-27 11:07:07 -05:00
|
|
|
if hasPassedRule {
|
|
|
|
continue
|
|
|
|
}
|
2024-12-26 11:07:44 -05:00
|
|
|
// 只要最后一次检查超出了规则范围 就认为检查未通过
|
2024-12-27 11:07:07 -05:00
|
|
|
if len(points) > 0 && points[len(points)-1][ruleId] {
|
|
|
|
hasPassedRule = true
|
2021-07-14 11:53:37 -04:00
|
|
|
}
|
|
|
|
} else {
|
|
|
|
// 常规报警
|
2024-10-26 11:57:47 -04:00
|
|
|
duration := int(rule.Duration)
|
|
|
|
if duration > maxDuration {
|
|
|
|
maxDuration = duration
|
2021-07-14 11:53:37 -04:00
|
|
|
}
|
2024-12-27 11:07:07 -05:00
|
|
|
if hasPassedRule {
|
|
|
|
continue
|
|
|
|
}
|
2024-10-26 11:57:47 -04:00
|
|
|
if len(points) < duration {
|
2021-07-14 11:53:37 -04:00
|
|
|
continue
|
|
|
|
}
|
2024-10-26 11:57:47 -04:00
|
|
|
total, fail := 0.0, 0.0
|
2024-12-26 10:52:44 -05:00
|
|
|
for timeTick := len(points) - duration; timeTick < len(points); timeTick++ {
|
2021-07-14 11:53:37 -04:00
|
|
|
total++
|
2024-12-26 10:52:44 -05:00
|
|
|
if !points[timeTick][ruleId] {
|
2021-07-14 11:53:37 -04:00
|
|
|
fail++
|
|
|
|
}
|
|
|
|
}
|
2022-04-11 10:51:02 -04:00
|
|
|
// 当70%以上的采样点未通过规则判断时 才认为当前检查未通过
|
2024-12-27 11:07:07 -05:00
|
|
|
if fail/total <= 0.7 {
|
|
|
|
hasPassedRule = true
|
2020-12-19 23:18:27 -05:00
|
|
|
}
|
|
|
|
}
|
2020-12-21 09:51:23 -05:00
|
|
|
}
|
2024-12-26 11:07:44 -05:00
|
|
|
|
|
|
|
// 仅当所有检查均未通过时 才触发告警
|
2024-12-27 11:07:07 -05:00
|
|
|
return maxDuration, hasPassedRule
|
2020-12-19 10:11:16 -05:00
|
|
|
}
|