WIP: 补全各模块的通知分组设置

This commit is contained in:
Akkia 2022-04-15 03:13:53 +08:00
parent 27cd794142
commit 322467673f
No known key found for this signature in database
GPG Key ID: DABE9A4AB2DD7EF3
11 changed files with 103 additions and 73 deletions

View File

@ -211,14 +211,15 @@ func (ma *memberAPI) addOrEditServer(c *gin.Context) {
}
type monitorForm struct {
ID uint64
Name string
Target string
Type uint8
Cover uint8
Notify string
SkipServersRaw string
Duration uint64
ID uint64
Name string
Target string
Type uint8
Cover uint8
Notify string
NotificationTag string
SkipServersRaw string
Duration uint64
}
func (ma *memberAPI) addOrEditMonitor(c *gin.Context) {
@ -233,10 +234,15 @@ func (ma *memberAPI) addOrEditMonitor(c *gin.Context) {
m.SkipServersRaw = mf.SkipServersRaw
m.Cover = mf.Cover
m.Notify = mf.Notify == "on"
m.NotificationTag = mf.NotificationTag
m.Duration = mf.Duration
err = m.InitSkipServers()
}
if err == nil {
// 旧版本服务监控可能不存在通知组 为其添加默认的通知组
if m.NotificationTag == "" {
m.NotificationTag = "default"
}
if m.ID == 0 {
err = singleton.DB.Create(&m).Error
} else {
@ -429,10 +435,11 @@ func (ma *memberAPI) addOrEditNotification(c *gin.Context) {
}
type alertRuleForm struct {
ID uint64
Name string
RulesRaw string
Enable string
ID uint64
Name string
RulesRaw string
NotificationTag string
Enable string
}
func (ma *memberAPI) addOrEditAlertRule(c *gin.Context) {
@ -472,6 +479,7 @@ func (ma *memberAPI) addOrEditAlertRule(c *gin.Context) {
if err == nil {
r.Name = arf.Name
r.RulesRaw = arf.RulesRaw
r.NotificationTag = arf.NotificationTag
enable := arf.Enable == "on"
r.Enable = &enable
r.ID = arf.ID
@ -525,14 +533,15 @@ func (ma *memberAPI) logout(c *gin.Context) {
}
type settingForm struct {
Title string
Admin string
Theme string
CustomCode string
ViewPassword string
IgnoredIPNotification string
GRPCHost string
Cover uint8
Title string
Admin string
Theme string
CustomCode string
ViewPassword string
IgnoredIPNotification string
IPChangeNotificationTag string // IP变更提醒的通知组
GRPCHost string
Cover uint8
EnableIPChangeNotification string
EnablePlainIPInNotification string
@ -552,6 +561,7 @@ func (ma *memberAPI) updateSetting(c *gin.Context) {
singleton.Conf.Cover = sf.Cover
singleton.Conf.GRPCHost = sf.GRPCHost
singleton.Conf.IgnoredIPNotification = sf.IgnoredIPNotification
singleton.Conf.IPChangeNotificationTag = sf.IPChangeNotificationTag
singleton.Conf.Site.Brand = sf.Title
singleton.Conf.Site.Theme = sf.Theme
singleton.Conf.Site.CustomCode = sf.CustomCode

View File

@ -20,10 +20,11 @@ type CycleTransferStats struct {
type AlertRule struct {
Common
Name string
RulesRaw string
Enable *bool
Rules []Rule `gorm:"-" json:"-"`
Name string
RulesRaw string
Enable *bool
NotificationTag string // 该报警规则所在的通知组
Rules []Rule `gorm:"-" json:"-"`
}
func (r *AlertRule) BeforeSave(tx *gorm.DB) error {

View File

@ -71,13 +71,12 @@ type Config struct {
ProxyGRPCPort uint
TLS bool
// IP变更提醒
EnableIPChangeNotification bool
IPChangeNotificationTag string
EnablePlainIPInNotification bool
// IP变更提醒
Cover uint8 // 覆盖范围0:提醒未被 IgnoredIPNotification 包含的所有服务器; 1:仅提醒被 IgnoredIPNotification 包含的服务器;
IgnoredIPNotification string // 特定服务器IP多个服务器用逗号分隔
Cover uint8 // 覆盖范围0:提醒未被 IgnoredIPNotification 包含的所有服务器; 1:仅提醒被 IgnoredIPNotification 包含的服务器;
IgnoredIPNotification string // 特定服务器IP多个服务器用逗号分隔
v *viper.Viper
IgnoredIPNotificationServerIDs map[uint64]bool // [ServerID] -> bool(值为true代表当前ServerID在特定服务器列表内

View File

@ -38,13 +38,14 @@ const (
type Monitor struct {
Common
Name string
Type uint8
Target string
SkipServersRaw string
Duration uint64
Notify bool
Cover uint8
Name string
Type uint8
Target string
SkipServersRaw string
Duration uint64
Notify bool
NotificationTag string // 当前服务监控所属的通知组
Cover uint8
SkipServers map[uint64]bool `gorm:"-" json:"-"`
CronJobID cron.EntryID `gorm:"-" json:"-"`

View File

@ -12,6 +12,10 @@
<label>规则</label>
<textarea name="RulesRaw"></textarea>
</div>
<div class="field">
<label>通知方式组</label>
<input type="text" name="NotificationTag">
</div>
<div class="field">
<div class="ui rule-enable checkbox">
<input name="Enable" type="checkbox" tabindex="0" class="hidden">

View File

@ -57,6 +57,7 @@
<tr>
<th>ID</th>
<th>名称</th>
<th>通知方式组</th>
<th>规则</th>
<th>启用</th>
<th>管理</th>
@ -67,6 +68,7 @@
<tr>
<td>{{$rule.ID}}</td>
<td>{{$rule.Name}}</td>
<td>{{$rule.NotificationTag}}</td>
<td>{{$rule.RulesRaw}}</td>
<td>{{$rule.Enable}}</td>
<td>

View File

@ -52,6 +52,10 @@
<input type="text" name="IgnoredIPNotification" placeholder="服务器ID 以逗号隔开 1001,1002,1003"
value="{{.Conf.IgnoredIPNotification}}">
</div>
<div class="field">
<label>提醒发送指定的通知分组</label>
<input type="text" name="IPChangeNotificationTag" placeholder="" value="{{.Conf.IPChangeNotificationTag}}">
</div>
<div class="field">
<div class="ui nf-ssl checkbox ip-change">
<input name="EnableIPChangeNotification" type="checkbox" tabindex="0" class="hidden">

View File

@ -21,11 +21,13 @@ type NotificationHistory struct {
}
// 报警规则
var AlertsLock sync.RWMutex
var Alerts []*model.AlertRule
var alertsStore map[uint64]map[uint64][][]interface{} // [alert_id][server_id] -> 对应报警规则的检查结果
var alertsPrevState map[uint64]map[uint64]uint // [alert_id][server_id] -> 对应报警规则的上一次报警状态
var AlertsCycleTransferStatsStore map[uint64]*model.CycleTransferStats // [alert_id] -> 对应报警规则的周期流量统计
var (
AlertsLock sync.RWMutex
Alerts []*model.AlertRule
alertsStore map[uint64]map[uint64][][]interface{} // [alert_id][server_id] -> 对应报警规则的检查结果
alertsPrevState map[uint64]map[uint64]uint // [alert_id][server_id] -> 对应报警规则的上一次报警状态
AlertsCycleTransferStatsStore map[uint64]*model.CycleTransferStats // [alert_id] -> 对应报警规则的周期流量统计
)
// addCycleTransferStatsInfo 向AlertsCycleTransferStatsStore中添加周期流量报警统计信息
func addCycleTransferStatsInfo(alert *model.AlertRule) {
@ -62,10 +64,15 @@ func AlertSentinelStart() {
if err := DB.Find(&Alerts).Error; err != nil {
panic(err)
}
for i := 0; i < len(Alerts); i++ {
alertsStore[Alerts[i].ID] = make(map[uint64][][]interface{})
alertsPrevState[Alerts[i].ID] = make(map[uint64]uint)
addCycleTransferStatsInfo(Alerts[i])
for _, alert := range Alerts {
// 旧版本可能不存在通知组 为其添加默认值
if alert.NotificationTag == "" {
alert.NotificationTag = "default"
DB.Save(alert)
}
alertsStore[alert.ID] = make(map[uint64][][]interface{})
alertsPrevState[alert.ID] = make(map[uint64]uint)
addCycleTransferStatsInfo(alert)
}
AlertsLock.Unlock()
@ -143,11 +150,11 @@ func checkStatus() {
if !passed {
alertsPrevState[alert.ID][server.ID] = _RuleCheckFail
message := fmt.Sprintf("[主机故障] %s(%s) 规则:%s", server.Name, IPDesensitize(server.Host.IP), alert.Name)
go SendNotification(message, true)
go SendNotification(alert.NotificationTag, message, true)
} else {
if alertsPrevState[alert.ID][server.ID] == _RuleCheckFail {
message := fmt.Sprintf("[主机恢复] %s(%s) 规则:%s", server.Name, IPDesensitize(server.Host.IP), alert.Name)
go SendNotification(message, true)
go SendNotification(alert.NotificationTag, message, true)
}
alertsPrevState[alert.ID][server.ID] = _RuleCheckPass
}

View File

@ -35,12 +35,12 @@ func LoadNotifications() {
if err := DB.Find(&notifications).Error; err != nil {
panic(err)
}
for _, n := range notifications {
for i := range notifications {
// 旧版本的Tag可能不存在 自动设置为默认值
if n.Tag == "" {
SetDefaultNotificationTagInDB(&n)
if notifications[i].Tag == "" {
SetDefaultNotificationTagInDB(&notifications[i])
}
AddNotificationToList(&n)
AddNotificationToList(&notifications[i])
}
}
@ -70,23 +70,16 @@ func OnRefreshOrAddNotification(n *model.Notification) {
// AddNotificationToList 添加通知方式到map中
func AddNotificationToList(n *model.Notification) {
notificationsLock.Lock()
defer notificationsLock.Unlock()
// 当前 Tag 不存在,创建对应该 Tag 的 子 map 后再添加
if _, ok := NotificationList[n.Tag]; !ok {
NotificationList[n.Tag] = make(map[uint64]*model.Notification)
}
NotificationList[n.Tag][n.ID] = n
NotificationIDToTag[n.ID] = n.Tag
}
// UpdateNotificationInList 在 map 中更新通知方式
func UpdateNotificationInList(n *model.Notification) {
notificationsLock.Lock()
defer notificationsLock.Unlock()
NotificationList[n.Tag][n.ID] = n
}
@ -137,10 +130,14 @@ func SendNotification(notificationTag string, desc string, mutable bool) {
// 向该通知方式组的所有通知方式发出通知
notificationsLock.RLock()
defer notificationsLock.RUnlock()
for _, n := range NotificationList[notificationTag] {
log.Println("尝试通知", n.Name)
}
for _, n := range NotificationList[notificationTag] {
if err := n.Send(desc); err != nil {
log.Println("NEZHA>> 发送通知失败:", err)
log.Println("NEZHA>> 向 ", n.Name, " 发送通知失败:", err)
} else {
log.Println("NEZHA>> 向 ", n.Name, " 发送通知成功:")
}
}
}

View File

@ -149,18 +149,23 @@ func (ss *ServiceSentinel) loadMonitorHistory() {
var err error
ss.monitorsLock.Lock()
defer ss.monitorsLock.Unlock()
for i := 0; i < len(monitors); i++ {
task := *monitors[i]
for _, monitor := range monitors {
// 旧版本可能不存在通知组 为其设置默认组
if monitor.NotificationTag == "" {
monitor.NotificationTag = "default"
DB.Save(monitor)
}
task := *monitor
// 通过cron定时将服务监控任务传递给任务调度管道
monitors[i].CronJobID, err = Cron.AddFunc(task.CronSpec(), func() {
monitor.CronJobID, err = Cron.AddFunc(task.CronSpec(), func() {
ss.dispatchBus <- task
})
if err != nil {
panic(err)
}
ss.monitors[monitors[i].ID] = monitors[i]
ss.serviceCurrentStatusData[monitors[i].ID] = make([]model.MonitorHistory, _CurrentStatusSize)
ss.serviceStatusToday[monitors[i].ID] = &_TodayStatsOfMonitor{}
ss.monitors[monitor.ID] = monitor
ss.serviceCurrentStatusData[monitor.ID] = make([]model.MonitorHistory, _CurrentStatusSize)
ss.serviceStatusToday[monitor.ID] = &_TodayStatsOfMonitor{}
}
year, month, day := time.Now().Date()
@ -356,7 +361,7 @@ func (ss *ServiceSentinel) worker() {
isNeedSendNotification := (ss.lastStatus[mh.MonitorID] != "" || stateStr == "故障") && ss.monitors[mh.MonitorID].Notify
ss.lastStatus[mh.MonitorID] = stateStr
if isNeedSendNotification {
go SendNotification(fmt.Sprintf("[服务%s] %s", stateStr, ss.monitors[mh.MonitorID].Name), true)
go SendNotification(ss.monitors[mh.MonitorID].NotificationTag, fmt.Sprintf("[服务%s] %s", stateStr, ss.monitors[mh.MonitorID].Name), true)
}
ss.monitorsLock.RUnlock()
}
@ -400,7 +405,7 @@ func (ss *ServiceSentinel) worker() {
if errMsg != "" {
ss.monitorsLock.RLock()
if ss.monitors[mh.MonitorID].Notify {
go SendNotification(fmt.Sprintf("[SSL] %s %s", ss.monitors[mh.MonitorID].Name, errMsg), true)
go SendNotification(ss.monitors[mh.MonitorID].NotificationTag, fmt.Sprintf("[SSL] %s %s", ss.monitors[mh.MonitorID].Name, errMsg), true)
}
ss.monitorsLock.RUnlock()
}

View File

@ -107,22 +107,22 @@ func CleanMonitorHistory() {
var specialServerIDs []uint64
var alerts []model.AlertRule
DB.Find(&alerts)
for i := 0; i < len(alerts); i++ {
for j := 0; j < len(alerts[i].Rules); j++ {
for _, alert := range alerts {
for _, rule := range alert.Rules {
// 是不是流量记录规则
if !alerts[i].Rules[j].IsTransferDurationRule() {
if !rule.IsTransferDurationRule() {
continue
}
dataCouldRemoveBefore := alerts[i].Rules[j].GetTransferDurationStart()
dataCouldRemoveBefore := rule.GetTransferDurationStart()
// 判断规则影响的机器范围
if alerts[i].Rules[j].Cover == model.RuleCoverAll {
if rule.Cover == model.RuleCoverAll {
// 更新全局可以清理的数据点
if allServerKeep.IsZero() || allServerKeep.After(dataCouldRemoveBefore) {
allServerKeep = dataCouldRemoveBefore
}
} else {
// 更新特定机器可以清理数据点
for id := range alerts[i].Rules[j].Ignore {
for id := range rule.Ignore {
if specialServerKeep[id].IsZero() || specialServerKeep[id].After(dataCouldRemoveBefore) {
specialServerKeep[id] = dataCouldRemoveBefore
specialServerIDs = append(specialServerIDs, id)