From 322467673f4da7a1f5c0802287335fe7c8e54ddd Mon Sep 17 00:00:00 2001 From: Akkia Date: Fri, 15 Apr 2022 03:13:53 +0800 Subject: [PATCH] =?UTF-8?q?WIP:=20=E8=A1=A5=E5=85=A8=E5=90=84=E6=A8=A1?= =?UTF-8?q?=E5=9D=97=E7=9A=84=E9=80=9A=E7=9F=A5=E5=88=86=E7=BB=84=E8=AE=BE?= =?UTF-8?q?=E7=BD=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cmd/dashboard/controller/member_api.go | 50 +++++++++++-------- model/alertrule.go | 9 ++-- model/config.go | 7 ++- model/monitor.go | 15 +++--- resource/template/component/rule.html | 4 ++ resource/template/dashboard/notification.html | 2 + resource/template/dashboard/setting.html | 4 ++ service/singleton/alertsentinel.go | 29 +++++++---- service/singleton/notification.go | 23 ++++----- service/singleton/servicesentinel.go | 21 +++++--- service/singleton/singleton.go | 12 ++--- 11 files changed, 103 insertions(+), 73 deletions(-) diff --git a/cmd/dashboard/controller/member_api.go b/cmd/dashboard/controller/member_api.go index cf5a03f..5c4e292 100644 --- a/cmd/dashboard/controller/member_api.go +++ b/cmd/dashboard/controller/member_api.go @@ -211,14 +211,15 @@ func (ma *memberAPI) addOrEditServer(c *gin.Context) { } type monitorForm struct { - ID uint64 - Name string - Target string - Type uint8 - Cover uint8 - Notify string - SkipServersRaw string - Duration uint64 + ID uint64 + Name string + Target string + Type uint8 + Cover uint8 + Notify string + NotificationTag string + SkipServersRaw string + Duration uint64 } func (ma *memberAPI) addOrEditMonitor(c *gin.Context) { @@ -233,10 +234,15 @@ func (ma *memberAPI) addOrEditMonitor(c *gin.Context) { m.SkipServersRaw = mf.SkipServersRaw m.Cover = mf.Cover m.Notify = mf.Notify == "on" + m.NotificationTag = mf.NotificationTag m.Duration = mf.Duration err = m.InitSkipServers() } if err == nil { + // 旧版本服务监控可能不存在通知组 为其添加默认的通知组 + if m.NotificationTag == "" { + m.NotificationTag = "default" + } if m.ID == 0 { err = singleton.DB.Create(&m).Error } else { @@ -429,10 +435,11 @@ func (ma *memberAPI) addOrEditNotification(c *gin.Context) { } type alertRuleForm struct { - ID uint64 - Name string - RulesRaw string - Enable string + ID uint64 + Name string + RulesRaw string + NotificationTag string + Enable string } func (ma *memberAPI) addOrEditAlertRule(c *gin.Context) { @@ -472,6 +479,7 @@ func (ma *memberAPI) addOrEditAlertRule(c *gin.Context) { if err == nil { r.Name = arf.Name r.RulesRaw = arf.RulesRaw + r.NotificationTag = arf.NotificationTag enable := arf.Enable == "on" r.Enable = &enable r.ID = arf.ID @@ -525,14 +533,15 @@ func (ma *memberAPI) logout(c *gin.Context) { } type settingForm struct { - Title string - Admin string - Theme string - CustomCode string - ViewPassword string - IgnoredIPNotification string - GRPCHost string - Cover uint8 + Title string + Admin string + Theme string + CustomCode string + ViewPassword string + IgnoredIPNotification string + IPChangeNotificationTag string // IP变更提醒的通知组 + GRPCHost string + Cover uint8 EnableIPChangeNotification string EnablePlainIPInNotification string @@ -552,6 +561,7 @@ func (ma *memberAPI) updateSetting(c *gin.Context) { singleton.Conf.Cover = sf.Cover singleton.Conf.GRPCHost = sf.GRPCHost singleton.Conf.IgnoredIPNotification = sf.IgnoredIPNotification + singleton.Conf.IPChangeNotificationTag = sf.IPChangeNotificationTag singleton.Conf.Site.Brand = sf.Title singleton.Conf.Site.Theme = sf.Theme singleton.Conf.Site.CustomCode = sf.CustomCode diff --git a/model/alertrule.go b/model/alertrule.go index 321480d..032e616 100644 --- a/model/alertrule.go +++ b/model/alertrule.go @@ -20,10 +20,11 @@ type CycleTransferStats struct { type AlertRule struct { Common - Name string - RulesRaw string - Enable *bool - Rules []Rule `gorm:"-" json:"-"` + Name string + RulesRaw string + Enable *bool + NotificationTag string // 该报警规则所在的通知组 + Rules []Rule `gorm:"-" json:"-"` } func (r *AlertRule) BeforeSave(tx *gorm.DB) error { diff --git a/model/config.go b/model/config.go index 35cdd3d..be32ef0 100644 --- a/model/config.go +++ b/model/config.go @@ -71,13 +71,12 @@ type Config struct { ProxyGRPCPort uint TLS bool + // IP变更提醒 EnableIPChangeNotification bool IPChangeNotificationTag string EnablePlainIPInNotification bool - - // IP变更提醒 - Cover uint8 // 覆盖范围(0:提醒未被 IgnoredIPNotification 包含的所有服务器; 1:仅提醒被 IgnoredIPNotification 包含的服务器;) - IgnoredIPNotification string // 特定服务器IP(多个服务器用逗号分隔) + Cover uint8 // 覆盖范围(0:提醒未被 IgnoredIPNotification 包含的所有服务器; 1:仅提醒被 IgnoredIPNotification 包含的服务器;) + IgnoredIPNotification string // 特定服务器IP(多个服务器用逗号分隔) v *viper.Viper IgnoredIPNotificationServerIDs map[uint64]bool // [ServerID] -> bool(值为true代表当前ServerID在特定服务器列表内) diff --git a/model/monitor.go b/model/monitor.go index 309de37..0230eeb 100644 --- a/model/monitor.go +++ b/model/monitor.go @@ -38,13 +38,14 @@ const ( type Monitor struct { Common - Name string - Type uint8 - Target string - SkipServersRaw string - Duration uint64 - Notify bool - Cover uint8 + Name string + Type uint8 + Target string + SkipServersRaw string + Duration uint64 + Notify bool + NotificationTag string // 当前服务监控所属的通知组 + Cover uint8 SkipServers map[uint64]bool `gorm:"-" json:"-"` CronJobID cron.EntryID `gorm:"-" json:"-"` diff --git a/resource/template/component/rule.html b/resource/template/component/rule.html index a9dd8ea..02b9637 100644 --- a/resource/template/component/rule.html +++ b/resource/template/component/rule.html @@ -12,6 +12,10 @@ +
+ + +
diff --git a/resource/template/dashboard/notification.html b/resource/template/dashboard/notification.html index c48f94d..d012b8c 100644 --- a/resource/template/dashboard/notification.html +++ b/resource/template/dashboard/notification.html @@ -57,6 +57,7 @@ ID 名称 + 通知方式组 规则 启用 管理 @@ -67,6 +68,7 @@ {{$rule.ID}} {{$rule.Name}} + {{$rule.NotificationTag}} {{$rule.RulesRaw}} {{$rule.Enable}} diff --git a/resource/template/dashboard/setting.html b/resource/template/dashboard/setting.html index 601921b..d499070 100644 --- a/resource/template/dashboard/setting.html +++ b/resource/template/dashboard/setting.html @@ -52,6 +52,10 @@
+
+ + +
diff --git a/service/singleton/alertsentinel.go b/service/singleton/alertsentinel.go index 8902700..a97397a 100644 --- a/service/singleton/alertsentinel.go +++ b/service/singleton/alertsentinel.go @@ -21,11 +21,13 @@ type NotificationHistory struct { } // 报警规则 -var AlertsLock sync.RWMutex -var Alerts []*model.AlertRule -var alertsStore map[uint64]map[uint64][][]interface{} // [alert_id][server_id] -> 对应报警规则的检查结果 -var alertsPrevState map[uint64]map[uint64]uint // [alert_id][server_id] -> 对应报警规则的上一次报警状态 -var AlertsCycleTransferStatsStore map[uint64]*model.CycleTransferStats // [alert_id] -> 对应报警规则的周期流量统计 +var ( + AlertsLock sync.RWMutex + Alerts []*model.AlertRule + alertsStore map[uint64]map[uint64][][]interface{} // [alert_id][server_id] -> 对应报警规则的检查结果 + alertsPrevState map[uint64]map[uint64]uint // [alert_id][server_id] -> 对应报警规则的上一次报警状态 + AlertsCycleTransferStatsStore map[uint64]*model.CycleTransferStats // [alert_id] -> 对应报警规则的周期流量统计 +) // addCycleTransferStatsInfo 向AlertsCycleTransferStatsStore中添加周期流量报警统计信息 func addCycleTransferStatsInfo(alert *model.AlertRule) { @@ -62,10 +64,15 @@ func AlertSentinelStart() { if err := DB.Find(&Alerts).Error; err != nil { panic(err) } - for i := 0; i < len(Alerts); i++ { - alertsStore[Alerts[i].ID] = make(map[uint64][][]interface{}) - alertsPrevState[Alerts[i].ID] = make(map[uint64]uint) - addCycleTransferStatsInfo(Alerts[i]) + for _, alert := range Alerts { + // 旧版本可能不存在通知组 为其添加默认值 + if alert.NotificationTag == "" { + alert.NotificationTag = "default" + DB.Save(alert) + } + alertsStore[alert.ID] = make(map[uint64][][]interface{}) + alertsPrevState[alert.ID] = make(map[uint64]uint) + addCycleTransferStatsInfo(alert) } AlertsLock.Unlock() @@ -143,11 +150,11 @@ func checkStatus() { if !passed { alertsPrevState[alert.ID][server.ID] = _RuleCheckFail message := fmt.Sprintf("[主机故障] %s(%s) 规则:%s", server.Name, IPDesensitize(server.Host.IP), alert.Name) - go SendNotification(message, true) + go SendNotification(alert.NotificationTag, message, true) } else { if alertsPrevState[alert.ID][server.ID] == _RuleCheckFail { message := fmt.Sprintf("[主机恢复] %s(%s) 规则:%s", server.Name, IPDesensitize(server.Host.IP), alert.Name) - go SendNotification(message, true) + go SendNotification(alert.NotificationTag, message, true) } alertsPrevState[alert.ID][server.ID] = _RuleCheckPass } diff --git a/service/singleton/notification.go b/service/singleton/notification.go index caf3e6d..73efde3 100644 --- a/service/singleton/notification.go +++ b/service/singleton/notification.go @@ -35,12 +35,12 @@ func LoadNotifications() { if err := DB.Find(¬ifications).Error; err != nil { panic(err) } - for _, n := range notifications { + for i := range notifications { // 旧版本的Tag可能不存在 自动设置为默认值 - if n.Tag == "" { - SetDefaultNotificationTagInDB(&n) + if notifications[i].Tag == "" { + SetDefaultNotificationTagInDB(¬ifications[i]) } - AddNotificationToList(&n) + AddNotificationToList(¬ifications[i]) } } @@ -70,23 +70,16 @@ func OnRefreshOrAddNotification(n *model.Notification) { // AddNotificationToList 添加通知方式到map中 func AddNotificationToList(n *model.Notification) { - notificationsLock.Lock() - defer notificationsLock.Unlock() - // 当前 Tag 不存在,创建对应该 Tag 的 子 map 后再添加 if _, ok := NotificationList[n.Tag]; !ok { NotificationList[n.Tag] = make(map[uint64]*model.Notification) } NotificationList[n.Tag][n.ID] = n NotificationIDToTag[n.ID] = n.Tag - } // UpdateNotificationInList 在 map 中更新通知方式 func UpdateNotificationInList(n *model.Notification) { - notificationsLock.Lock() - defer notificationsLock.Unlock() - NotificationList[n.Tag][n.ID] = n } @@ -137,10 +130,14 @@ func SendNotification(notificationTag string, desc string, mutable bool) { // 向该通知方式组的所有通知方式发出通知 notificationsLock.RLock() defer notificationsLock.RUnlock() - + for _, n := range NotificationList[notificationTag] { + log.Println("尝试通知", n.Name) + } for _, n := range NotificationList[notificationTag] { if err := n.Send(desc); err != nil { - log.Println("NEZHA>> 发送通知失败:", err) + log.Println("NEZHA>> 向 ", n.Name, " 发送通知失败:", err) + } else { + log.Println("NEZHA>> 向 ", n.Name, " 发送通知成功:") } } } diff --git a/service/singleton/servicesentinel.go b/service/singleton/servicesentinel.go index cb897d5..23ccd76 100644 --- a/service/singleton/servicesentinel.go +++ b/service/singleton/servicesentinel.go @@ -149,18 +149,23 @@ func (ss *ServiceSentinel) loadMonitorHistory() { var err error ss.monitorsLock.Lock() defer ss.monitorsLock.Unlock() - for i := 0; i < len(monitors); i++ { - task := *monitors[i] + for _, monitor := range monitors { + // 旧版本可能不存在通知组 为其设置默认组 + if monitor.NotificationTag == "" { + monitor.NotificationTag = "default" + DB.Save(monitor) + } + task := *monitor // 通过cron定时将服务监控任务传递给任务调度管道 - monitors[i].CronJobID, err = Cron.AddFunc(task.CronSpec(), func() { + monitor.CronJobID, err = Cron.AddFunc(task.CronSpec(), func() { ss.dispatchBus <- task }) if err != nil { panic(err) } - ss.monitors[monitors[i].ID] = monitors[i] - ss.serviceCurrentStatusData[monitors[i].ID] = make([]model.MonitorHistory, _CurrentStatusSize) - ss.serviceStatusToday[monitors[i].ID] = &_TodayStatsOfMonitor{} + ss.monitors[monitor.ID] = monitor + ss.serviceCurrentStatusData[monitor.ID] = make([]model.MonitorHistory, _CurrentStatusSize) + ss.serviceStatusToday[monitor.ID] = &_TodayStatsOfMonitor{} } year, month, day := time.Now().Date() @@ -356,7 +361,7 @@ func (ss *ServiceSentinel) worker() { isNeedSendNotification := (ss.lastStatus[mh.MonitorID] != "" || stateStr == "故障") && ss.monitors[mh.MonitorID].Notify ss.lastStatus[mh.MonitorID] = stateStr if isNeedSendNotification { - go SendNotification(fmt.Sprintf("[服务%s] %s", stateStr, ss.monitors[mh.MonitorID].Name), true) + go SendNotification(ss.monitors[mh.MonitorID].NotificationTag, fmt.Sprintf("[服务%s] %s", stateStr, ss.monitors[mh.MonitorID].Name), true) } ss.monitorsLock.RUnlock() } @@ -400,7 +405,7 @@ func (ss *ServiceSentinel) worker() { if errMsg != "" { ss.monitorsLock.RLock() if ss.monitors[mh.MonitorID].Notify { - go SendNotification(fmt.Sprintf("[SSL] %s %s", ss.monitors[mh.MonitorID].Name, errMsg), true) + go SendNotification(ss.monitors[mh.MonitorID].NotificationTag, fmt.Sprintf("[SSL] %s %s", ss.monitors[mh.MonitorID].Name, errMsg), true) } ss.monitorsLock.RUnlock() } diff --git a/service/singleton/singleton.go b/service/singleton/singleton.go index 341bd09..5e5fc8c 100644 --- a/service/singleton/singleton.go +++ b/service/singleton/singleton.go @@ -107,22 +107,22 @@ func CleanMonitorHistory() { var specialServerIDs []uint64 var alerts []model.AlertRule DB.Find(&alerts) - for i := 0; i < len(alerts); i++ { - for j := 0; j < len(alerts[i].Rules); j++ { + for _, alert := range alerts { + for _, rule := range alert.Rules { // 是不是流量记录规则 - if !alerts[i].Rules[j].IsTransferDurationRule() { + if !rule.IsTransferDurationRule() { continue } - dataCouldRemoveBefore := alerts[i].Rules[j].GetTransferDurationStart() + dataCouldRemoveBefore := rule.GetTransferDurationStart() // 判断规则影响的机器范围 - if alerts[i].Rules[j].Cover == model.RuleCoverAll { + if rule.Cover == model.RuleCoverAll { // 更新全局可以清理的数据点 if allServerKeep.IsZero() || allServerKeep.After(dataCouldRemoveBefore) { allServerKeep = dataCouldRemoveBefore } } else { // 更新特定机器可以清理数据点 - for id := range alerts[i].Rules[j].Ignore { + for id := range rule.Ignore { if specialServerKeep[id].IsZero() || specialServerKeep[id].After(dataCouldRemoveBefore) { specialServerKeep[id] = dataCouldRemoveBefore specialServerIDs = append(specialServerIDs, id)