添加代码注释+修正一个typo

This commit is contained in:
Akkia 2022-04-11 22:51:02 +08:00
parent 91a1e3fe22
commit 707985e5c8
No known key found for this signature in database
GPG Key ID: 464BA42A151C1E0F
11 changed files with 68 additions and 32 deletions

View File

@ -63,19 +63,20 @@ func initSystem() {
loadServers() //加载服务器列表 loadServers() //加载服务器列表
loadCrons() //加载计划任务 loadCrons() //加载计划任务
// 清理 服务请求记录 和 流量记录 的旧数据 // 每天的3:30 对 监控记录 和 流量记录 进行清理
_, err := singleton.Cron.AddFunc("0 30 3 * * *", cleanMonitorHistory) _, err := singleton.Cron.AddFunc("0 30 3 * * *", cleanMonitorHistory)
if err != nil { if err != nil {
panic(err) panic(err)
} }
// 流量记录打点 // 每小时对流量记录进行打点
_, err = singleton.Cron.AddFunc("0 0 * * * *", recordTransferHourlyUsage) _, err = singleton.Cron.AddFunc("0 0 * * * *", recordTransferHourlyUsage)
if err != nil { if err != nil {
panic(err) panic(err)
} }
} }
// recordTransferHourlyUsage 对流量记录进行打点
func recordTransferHourlyUsage() { func recordTransferHourlyUsage() {
singleton.ServerLock.Lock() singleton.ServerLock.Lock()
defer singleton.ServerLock.Unlock() defer singleton.ServerLock.Unlock()
@ -102,8 +103,9 @@ func recordTransferHourlyUsage() {
log.Println("NEZHA>> Cron 流量统计入库", len(txs), singleton.DB.Create(txs).Error) log.Println("NEZHA>> Cron 流量统计入库", len(txs), singleton.DB.Create(txs).Error)
} }
// cleanMonitorHistory 清理无效或过时的 监控记录 和 流量记录
func cleanMonitorHistory() { func cleanMonitorHistory() {
// 清理无效数据 // 清理已被删除的服务器的监控记录与流量记录
singleton.DB.Unscoped().Delete(&model.MonitorHistory{}, "created_at < ? OR monitor_id NOT IN (SELECT `id` FROM monitors)", time.Now().AddDate(0, 0, -30)) singleton.DB.Unscoped().Delete(&model.MonitorHistory{}, "created_at < ? OR monitor_id NOT IN (SELECT `id` FROM monitors)", time.Now().AddDate(0, 0, -30))
singleton.DB.Unscoped().Delete(&model.Transfer{}, "server_id NOT IN (SELECT `id` FROM servers)") singleton.DB.Unscoped().Delete(&model.Transfer{}, "server_id NOT IN (SELECT `id` FROM servers)")
// 计算可清理流量记录的时长 // 计算可清理流量记录的时长
@ -146,6 +148,7 @@ func cleanMonitorHistory() {
} }
} }
//loadServers 加载服务器列表并根据ID排序
func loadServers() { func loadServers() {
var servers []model.Server var servers []model.Server
singleton.DB.Find(&servers) singleton.DB.Find(&servers)
@ -159,6 +162,7 @@ func loadServers() {
singleton.ReSortServer() singleton.ReSortServer()
} }
// loadCrons 加载计划任务
func loadCrons() { func loadCrons() {
var crons []model.Cron var crons []model.Cron
singleton.DB.Find(&crons) singleton.DB.Find(&crons)
@ -172,6 +176,7 @@ func loadCrons() {
crIgnoreMap[cr.Servers[j]] = true crIgnoreMap[cr.Servers[j]] = true
} }
// 注册计划任务
cr.CronJobID, err = singleton.Cron.AddFunc(cr.Scheduler, singleton.CronTrigger(cr)) cr.CronJobID, err = singleton.Cron.AddFunc(cr.Scheduler, singleton.CronTrigger(cr))
if err == nil { if err == nil {
singleton.Crons[cr.ID] = &cr singleton.Crons[cr.ID] = &cr
@ -192,7 +197,7 @@ func loadCrons() {
func main() { func main() {
cleanMonitorHistory() cleanMonitorHistory()
go rpc.ServeRPC(singleton.Conf.GRPCPort) go rpc.ServeRPC(singleton.Conf.GRPCPort)
serviceSentinelDispatchBus := make(chan model.Monitor) serviceSentinelDispatchBus := make(chan model.Monitor) // 用于传递服务监控任务信息的channel
go rpc.DispatchTask(serviceSentinelDispatchBus) go rpc.DispatchTask(serviceSentinelDispatchBus)
go rpc.DispatchKeepalive() go rpc.DispatchKeepalive()
go singleton.AlertSentinelStart() go singleton.AlertSentinelStart()

View File

@ -43,6 +43,7 @@ func (r *AlertRule) Enabled() bool {
return r.Enable != nil && *r.Enable return r.Enable != nil && *r.Enable
} }
// Snapshot 对传入的Server进行该报警规则下所有type的检查 返回包含每项检查结果的空接口
func (r *AlertRule) Snapshot(cycleTransferStats *CycleTransferStats, server *Server, db *gorm.DB) []interface{} { func (r *AlertRule) Snapshot(cycleTransferStats *CycleTransferStats, server *Server, db *gorm.DB) []interface{} {
var point []interface{} var point []interface{}
for i := 0; i < len(r.Rules); i++ { for i := 0; i < len(r.Rules); i++ {
@ -51,9 +52,10 @@ func (r *AlertRule) Snapshot(cycleTransferStats *CycleTransferStats, server *Ser
return point return point
} }
// Check 传入包含当前报警规则下所有type检查结果的空接口 返回报警持续时间与是否通过报警检查(通过则返回true)
func (r *AlertRule) Check(points [][]interface{}) (int, bool) { func (r *AlertRule) Check(points [][]interface{}) (int, bool) {
var max int var max int // 报警持续时间
var count int var count int // 检查未通过的个数
for i := 0; i < len(r.Rules); i++ { for i := 0; i < len(r.Rules); i++ {
if r.Rules[i].IsTransferDurationRule() { if r.Rules[i].IsTransferDurationRule() {
// 循环区间流量报警 // 循环区间流量报警
@ -83,11 +85,13 @@ func (r *AlertRule) Check(points [][]interface{}) (int, bool) {
fail++ fail++
} }
} }
// 当70%以上的采样点未通过规则判断时 才认为当前检查未通过
if fail/total > 0.7 { if fail/total > 0.7 {
count++ count++
break break
} }
} }
} }
// 仅当所有检查均未通过时 返回false
return max, count != len(r.Rules) return max, count != len(r.Rules)
} }

View File

@ -48,8 +48,9 @@ func (c *AgentConfig) Save() error {
return ioutil.WriteFile(c.v.ConfigFileUsed(), data, os.ModePerm) return ioutil.WriteFile(c.v.ConfigFileUsed(), data, os.ModePerm)
} }
// Config 站点配置
type Config struct { type Config struct {
Debug bool Debug bool // debug模式开关
Site struct { Site struct {
Brand string // 站点名称 Brand string // 站点名称
CookieName string // 浏览器 Cookie 名称 CookieName string // 浏览器 Cookie 名称
@ -73,13 +74,14 @@ type Config struct {
EnablePlainIPInNotification bool EnablePlainIPInNotification bool
// IP变更提醒 // IP变更提醒
Cover uint8 // 覆盖范围 Cover uint8 // 覆盖范围0:提醒未被 IgnoredIPNotification 包含的所有服务器; 1:仅提醒被 IgnoredIPNotification 包含的服务器;
IgnoredIPNotification string // 特定服务器 IgnoredIPNotification string // 特定服务器IP多个服务器用逗号分隔
v *viper.Viper v *viper.Viper
IgnoredIPNotificationServerIDs map[uint64]bool IgnoredIPNotificationServerIDs map[uint64]bool // [ServerID] -> bool(值为true代表当前ServerID在特定服务器列表内
} }
// Read 读取配置文件并应用
func (c *Config) Read(path string) error { func (c *Config) Read(path string) error {
c.v = viper.New() c.v = viper.New()
c.v.SetConfigFile(path) c.v.SetConfigFile(path)
@ -101,6 +103,7 @@ func (c *Config) Read(path string) error {
return nil return nil
} }
// updateIgnoredIPNotificationID 更新用于判断服务器ID是否属于特定服务器的map
func (c *Config) updateIgnoredIPNotificationID() { func (c *Config) updateIgnoredIPNotificationID() {
c.IgnoredIPNotificationServerIDs = make(map[uint64]bool) c.IgnoredIPNotificationServerIDs = make(map[uint64]bool)
splitedIDs := strings.Split(c.IgnoredIPNotification, ",") splitedIDs := strings.Split(c.IgnoredIPNotification, ",")
@ -112,6 +115,7 @@ func (c *Config) updateIgnoredIPNotificationID() {
} }
} }
// Save 保存配置文件
func (c *Config) Save() error { func (c *Config) Save() error {
c.updateIgnoredIPNotificationID() c.updateIgnoredIPNotificationID()
data, err := yaml.Marshal(c) data, err := yaml.Marshal(c)

View File

@ -22,7 +22,7 @@ type Cron struct {
PushSuccessful bool // 推送成功的通知 PushSuccessful bool // 推送成功的通知
LastExecutedAt time.Time // 最后一次执行时间 LastExecutedAt time.Time // 最后一次执行时间
LastResult bool // 最后一次执行结果 LastResult bool // 最后一次执行结果
Cover uint8 Cover uint8 // 计划任务覆盖范围 (0:仅覆盖特定服务器 1:仅忽略特定服务器)
CronJobID cron.EntryID `gorm:"-"` CronJobID cron.EntryID `gorm:"-"`
ServersRaw string ServersRaw string

View File

@ -56,6 +56,7 @@ func (m *Monitor) PB() *pb.Task {
} }
} }
// CronSpec 返回服务监控请求间隔对应的 cron 表达式
func (m *Monitor) CronSpec() string { func (m *Monitor) CronSpec() string {
if m.Duration == 0 { if m.Duration == 0 {
// 默认间隔 30 秒 // 默认间隔 30 秒
@ -76,6 +77,7 @@ func (m *Monitor) AfterFind(tx *gorm.DB) error {
return nil return nil
} }
// IsServiceSentinelNeeded 判断该任务类型是否需要进行服务监控 需要则返回true
func IsServiceSentinelNeeded(t uint64) bool { func IsServiceSentinelNeeded(t uint64) bool {
return t != TaskTypeCommand && t != TaskTypeTerminal && t != TaskTypeUpgrade return t != TaskTypeCommand && t != TaskTypeTerminal && t != TaskTypeUpgrade
} }

View File

@ -4,6 +4,7 @@ import (
pb "github.com/naiba/nezha/proto" pb "github.com/naiba/nezha/proto"
) )
// MonitorHistory 历史监控记录
type MonitorHistory struct { type MonitorHistory struct {
Common Common
MonitorID uint64 MonitorID uint64

View File

@ -159,10 +159,12 @@ func (u *Rule) Snapshot(cycleTransferStats *CycleTransferStats, server *Server,
return nil return nil
} }
// IsTransferDurationRule 判断该规则是否属于周期流量规则 属于则返回true
func (rule Rule) IsTransferDurationRule() bool { func (rule Rule) IsTransferDurationRule() bool {
return strings.HasSuffix(rule.Type, "_cycle") return strings.HasSuffix(rule.Type, "_cycle")
} }
// GetTransferDurationStart 获取周期流量的起始时间
func (rule Rule) GetTransferDurationStart() time.Time { func (rule Rule) GetTransferDurationStart() time.Time {
// Accept uppercase and lowercase // Accept uppercase and lowercase
unit := strings.ToLower(rule.CycleUnit) unit := strings.ToLower(rule.CycleUnit)
@ -202,6 +204,7 @@ func (rule Rule) GetTransferDurationStart() time.Time {
return startTime return startTime
} }
// GetTransferDurationEnd 获取周期流量结束时间
func (rule Rule) GetTransferDurationEnd() time.Time { func (rule Rule) GetTransferDurationEnd() time.Time {
// Accept uppercase and lowercase // Accept uppercase and lowercase
unit := strings.ToLower(rule.CycleUnit) unit := strings.ToLower(rule.CycleUnit)

View File

@ -23,10 +23,11 @@ type NotificationHistory struct {
// 报警规则 // 报警规则
var AlertsLock sync.RWMutex var AlertsLock sync.RWMutex
var Alerts []*model.AlertRule var Alerts []*model.AlertRule
var alertsStore map[uint64]map[uint64][][]interface{} var alertsStore map[uint64]map[uint64][][]interface{} // [alert_id][server_id] -> 对应报警规则的检查结果
var alertsPrevState map[uint64]map[uint64]uint var alertsPrevState map[uint64]map[uint64]uint // [alert_id][server_id] -> 对应报警规则的上一次报警状态
var AlertsCycleTransferStatsStore map[uint64]*model.CycleTransferStats var AlertsCycleTransferStatsStore map[uint64]*model.CycleTransferStats // [alert_id] -> 对应报警规则的周期流量统计
// addCycleTransferStatsInfo 向AlertsCycleTransferStatsStore中添加周期流量报警统计信息
func addCycleTransferStatsInfo(alert *model.AlertRule) { func addCycleTransferStatsInfo(alert *model.AlertRule) {
if !alert.Enabled() { if !alert.Enabled() {
return return
@ -52,6 +53,7 @@ func addCycleTransferStatsInfo(alert *model.AlertRule) {
} }
} }
// AlertSentinelStart 报警器启动
func AlertSentinelStart() { func AlertSentinelStart() {
alertsStore = make(map[uint64]map[uint64][][]interface{}) alertsStore = make(map[uint64]map[uint64][][]interface{})
alertsPrevState = make(map[uint64]map[uint64]uint) alertsPrevState = make(map[uint64]map[uint64]uint)
@ -120,6 +122,7 @@ func OnDeleteAlert(id uint64) {
delete(AlertsCycleTransferStatsStore, id) delete(AlertsCycleTransferStatsStore, id)
} }
// checkStatus 检查报警规则并发送报警
func checkStatus() { func checkStatus() {
AlertsLock.RLock() AlertsLock.RLock()
defer AlertsLock.RUnlock() defer AlertsLock.RUnlock()

View File

@ -16,6 +16,7 @@ const firstNotificationDelay = time.Minute * 15
var notifications []model.Notification var notifications []model.Notification
var notificationsLock sync.RWMutex var notificationsLock sync.RWMutex
// LoadNotifications 加载通知方式到 singleton.notifications 变量
func LoadNotifications() { func LoadNotifications() {
notificationsLock.Lock() notificationsLock.Lock()
if err := DB.Find(&notifications).Error; err != nil { if err := DB.Find(&notifications).Error; err != nil {

View File

@ -24,12 +24,14 @@ type ReportData struct {
Reporter uint64 Reporter uint64
} }
// _TodayStatsOfMonitor 今日监控记录
type _TodayStatsOfMonitor struct { type _TodayStatsOfMonitor struct {
Up int Up int // 今日在线计数
Down int Down int // 今日离线计数
Delay float32 Delay float32 // 今日平均延迟
} }
// NewServiceSentinel 创建服务监控器
func NewServiceSentinel(serviceSentinelDispatchBus chan<- model.Monitor) { func NewServiceSentinel(serviceSentinelDispatchBus chan<- model.Monitor) {
ServiceSentinelShared = &ServiceSentinel{ ServiceSentinelShared = &ServiceSentinel{
serviceReportChannel: make(chan ReportData, 200), serviceReportChannel: make(chan ReportData, 200),
@ -46,6 +48,7 @@ func NewServiceSentinel(serviceSentinelDispatchBus chan<- model.Monitor) {
monthlyStatus: make(map[uint64]*model.ServiceItemResponse), monthlyStatus: make(map[uint64]*model.ServiceItemResponse),
dispatchBus: serviceSentinelDispatchBus, dispatchBus: serviceSentinelDispatchBus,
} }
// 加载历史记录
ServiceSentinelShared.loadMonitorHistory() ServiceSentinelShared.loadMonitorHistory()
year, month, day := time.Now().Date() year, month, day := time.Now().Date()
@ -72,6 +75,7 @@ func NewServiceSentinel(serviceSentinelDispatchBus chan<- model.Monitor) {
ServiceSentinelShared.latestDate[k] = time.Now().Format("02-Jan-06") ServiceSentinelShared.latestDate[k] = time.Now().Format("02-Jan-06")
} }
// 启动服务监控器
go ServiceSentinelShared.worker() go ServiceSentinelShared.worker()
// 每日将游标往后推一天 // 每日将游标往后推一天
@ -88,20 +92,20 @@ func NewServiceSentinel(serviceSentinelDispatchBus chan<- model.Monitor) {
type ServiceSentinel struct { type ServiceSentinel struct {
serviceResponseDataStoreLock sync.RWMutex serviceResponseDataStoreLock sync.RWMutex
monitorsLock sync.RWMutex monitorsLock sync.RWMutex
serviceReportChannel chan ReportData serviceReportChannel chan ReportData // 服务状态汇报管道
serviceStatusToday map[uint64]*_TodayStatsOfMonitor serviceStatusToday map[uint64]*_TodayStatsOfMonitor // [monitor_id] -> _TodayStatsOfMonitor
serviceCurrentStatusIndex map[uint64]int serviceCurrentStatusIndex map[uint64]int // [monitor_id] -> 该监控ID对应的 serviceCurrentStatusData 的最新索引下标
serviceCurrentStatusData map[uint64][]model.MonitorHistory serviceCurrentStatusData map[uint64][]model.MonitorHistory // [monitor_id] -> []model.MonitorHistory
latestDate map[uint64]string latestDate map[uint64]string // 最近一次更新时间
lastStatus map[uint64]string lastStatus map[uint64]string
serviceResponseDataStoreCurrentUp map[uint64]uint64 serviceResponseDataStoreCurrentUp map[uint64]uint64 // [monitor_id] -> 当前服务在线计数
serviceResponseDataStoreCurrentDown map[uint64]uint64 serviceResponseDataStoreCurrentDown map[uint64]uint64 // [monitor_id] -> 当前服务离线计数
monitors map[uint64]*model.Monitor monitors map[uint64]*model.Monitor // [monitor_id] -> model.Monitor
sslCertCache map[uint64]string sslCertCache map[uint64]string
// 30天数据缓存 // 30天数据缓存
monthlyStatusLock sync.Mutex monthlyStatusLock sync.Mutex
monthlyStatus map[uint64]*model.ServiceItemResponse monthlyStatus map[uint64]*model.ServiceItemResponse // [monitor_id] -> model.ServiceItemResponse
// 服务监控调度计划任务 // 服务监控任务调度管道
dispatchBus chan<- model.Monitor dispatchBus chan<- model.Monitor
} }
@ -120,6 +124,7 @@ func (ss *ServiceSentinel) refreshMonthlyServiceStatus() {
} }
} }
// Dispatch 将传入的 ReportData 传给 服务状态汇报管道
func (ss *ServiceSentinel) Dispatch(r ReportData) { func (ss *ServiceSentinel) Dispatch(r ReportData) {
ss.serviceReportChannel <- r ss.serviceReportChannel <- r
} }
@ -137,6 +142,7 @@ func (ss *ServiceSentinel) Monitors() []*model.Monitor {
return monitors return monitors
} }
// LoadStats 加载服务监控器的历史状态信息
func (ss *ServiceSentinel) loadMonitorHistory() { func (ss *ServiceSentinel) loadMonitorHistory() {
var monitors []*model.Monitor var monitors []*model.Monitor
DB.Find(&monitors) DB.Find(&monitors)
@ -146,6 +152,7 @@ func (ss *ServiceSentinel) loadMonitorHistory() {
ss.monitors = make(map[uint64]*model.Monitor) ss.monitors = make(map[uint64]*model.Monitor)
for i := 0; i < len(monitors); i++ { for i := 0; i < len(monitors); i++ {
task := *monitors[i] task := *monitors[i]
// 通过cron定时将服务监控任务传递给任务调度管道
monitors[i].CronJobID, err = Cron.AddFunc(task.CronSpec(), func() { monitors[i].CronJobID, err = Cron.AddFunc(task.CronSpec(), func() {
ss.dispatchBus <- task ss.dispatchBus <- task
}) })
@ -171,7 +178,7 @@ func (ss *ServiceSentinel) loadMonitorHistory() {
} }
} }
// 加载历史记录 // 加载服务监控历史记录
var mhs []model.MonitorHistory var mhs []model.MonitorHistory
DB.Where("created_at >= ? AND created_at < ?", today.AddDate(0, 0, -29), today).Find(&mhs) DB.Where("created_at >= ? AND created_at < ?", today.AddDate(0, 0, -29), today).Find(&mhs)
for i := 0; i < len(mhs); i++ { for i := 0; i < len(mhs); i++ {
@ -266,6 +273,7 @@ func (ss *ServiceSentinel) LoadStats() map[uint64]*model.ServiceItemResponse {
return ss.monthlyStatus return ss.monthlyStatus
} }
// getStateStr 根据服务在线率返回对应的状态字符串
func getStateStr(percent uint64) string { func getStateStr(percent uint64) string {
if percent == 0 { if percent == 0 {
return "无数据" return "无数据"
@ -279,7 +287,9 @@ func getStateStr(percent uint64) string {
return "故障" return "故障"
} }
// worker 服务监控的实际工作流程
func (ss *ServiceSentinel) worker() { func (ss *ServiceSentinel) worker() {
// 从服务状态汇报管道获取汇报的服务数据
for r := range ss.serviceReportChannel { for r := range ss.serviceReportChannel {
if ss.monitors[r.Data.GetId()] == nil || ss.monitors[r.Data.GetId()].ID == 0 { if ss.monitors[r.Data.GetId()] == nil || ss.monitors[r.Data.GetId()].ID == 0 {
log.Printf("NEZAH>> 错误的服务监控上报 %+v", r) log.Printf("NEZAH>> 错误的服务监控上报 %+v", r)
@ -355,7 +365,7 @@ func (ss *ServiceSentinel) worker() {
// SSL 证书报警 // SSL 证书报警
var errMsg string var errMsg string
if strings.HasPrefix(mh.Data, "SSL证书错误") { if strings.HasPrefix(mh.Data, "SSL证书错误") {
// 排除 i/o timeont、connection timeout、EOF 错误 // 排除 i/o timeout、connection timeout、EOF 错误
if !strings.HasSuffix(mh.Data, "timeout") && if !strings.HasSuffix(mh.Data, "timeout") &&
!strings.HasSuffix(mh.Data, "EOF") && !strings.HasSuffix(mh.Data, "EOF") &&
!strings.HasSuffix(mh.Data, "timed out") { !strings.HasSuffix(mh.Data, "timed out") {

View File

@ -23,14 +23,15 @@ var (
DB *gorm.DB DB *gorm.DB
Loc *time.Location Loc *time.Location
ServerList map[uint64]*model.Server ServerList map[uint64]*model.Server // [ServerID] -> model.Server
SecretToID map[string]uint64 SecretToID map[string]uint64 // [ServerSecret] -> ServerID
ServerLock sync.RWMutex ServerLock sync.RWMutex
SortedServerList []*model.Server SortedServerList []*model.Server // 用于存储服务器列表的 slice按照服务器 ID 排序
SortedServerLock sync.RWMutex SortedServerLock sync.RWMutex
) )
// Init 初始化时区为上海时区
func Init() { func Init() {
var err error var err error
Loc, err = time.LoadLocation("Asia/Shanghai") Loc, err = time.LoadLocation("Asia/Shanghai")
@ -39,6 +40,7 @@ func Init() {
} }
} }
// ReSortServer 根据服务器ID 对服务器列表进行排序ID越大越靠前
func ReSortServer() { func ReSortServer() {
ServerLock.RLock() ServerLock.RLock()
defer ServerLock.RUnlock() defer ServerLock.RUnlock()
@ -50,6 +52,7 @@ func ReSortServer() {
SortedServerList = append(SortedServerList, s) SortedServerList = append(SortedServerList, s)
} }
// 按照服务器 ID 排序的具体实现ID越大越靠前
sort.SliceStable(SortedServerList, func(i, j int) bool { sort.SliceStable(SortedServerList, func(i, j int) bool {
if SortedServerList[i].DisplayIndex == SortedServerList[j].DisplayIndex { if SortedServerList[i].DisplayIndex == SortedServerList[j].DisplayIndex {
return SortedServerList[i].ID < SortedServerList[j].ID return SortedServerList[i].ID < SortedServerList[j].ID