mirror of
https://github.com/nezhahq/nezha.git
synced 2025-02-02 01:28:13 -05:00
添加代码注释+修正一个typo
This commit is contained in:
parent
91a1e3fe22
commit
707985e5c8
@ -63,19 +63,20 @@ func initSystem() {
|
|||||||
loadServers() //加载服务器列表
|
loadServers() //加载服务器列表
|
||||||
loadCrons() //加载计划任务
|
loadCrons() //加载计划任务
|
||||||
|
|
||||||
// 清理 服务请求记录 和 流量记录 的旧数据
|
// 每天的3:30 对 监控记录 和 流量记录 进行清理
|
||||||
_, err := singleton.Cron.AddFunc("0 30 3 * * *", cleanMonitorHistory)
|
_, err := singleton.Cron.AddFunc("0 30 3 * * *", cleanMonitorHistory)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// 流量记录打点
|
// 每小时对流量记录进行打点
|
||||||
_, err = singleton.Cron.AddFunc("0 0 * * * *", recordTransferHourlyUsage)
|
_, err = singleton.Cron.AddFunc("0 0 * * * *", recordTransferHourlyUsage)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// recordTransferHourlyUsage 对流量记录进行打点
|
||||||
func recordTransferHourlyUsage() {
|
func recordTransferHourlyUsage() {
|
||||||
singleton.ServerLock.Lock()
|
singleton.ServerLock.Lock()
|
||||||
defer singleton.ServerLock.Unlock()
|
defer singleton.ServerLock.Unlock()
|
||||||
@ -102,8 +103,9 @@ func recordTransferHourlyUsage() {
|
|||||||
log.Println("NEZHA>> Cron 流量统计入库", len(txs), singleton.DB.Create(txs).Error)
|
log.Println("NEZHA>> Cron 流量统计入库", len(txs), singleton.DB.Create(txs).Error)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// cleanMonitorHistory 清理无效或过时的 监控记录 和 流量记录
|
||||||
func cleanMonitorHistory() {
|
func cleanMonitorHistory() {
|
||||||
// 清理无效数据
|
// 清理已被删除的服务器的监控记录与流量记录
|
||||||
singleton.DB.Unscoped().Delete(&model.MonitorHistory{}, "created_at < ? OR monitor_id NOT IN (SELECT `id` FROM monitors)", time.Now().AddDate(0, 0, -30))
|
singleton.DB.Unscoped().Delete(&model.MonitorHistory{}, "created_at < ? OR monitor_id NOT IN (SELECT `id` FROM monitors)", time.Now().AddDate(0, 0, -30))
|
||||||
singleton.DB.Unscoped().Delete(&model.Transfer{}, "server_id NOT IN (SELECT `id` FROM servers)")
|
singleton.DB.Unscoped().Delete(&model.Transfer{}, "server_id NOT IN (SELECT `id` FROM servers)")
|
||||||
// 计算可清理流量记录的时长
|
// 计算可清理流量记录的时长
|
||||||
@ -146,6 +148,7 @@ func cleanMonitorHistory() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//loadServers 加载服务器列表并根据ID排序
|
||||||
func loadServers() {
|
func loadServers() {
|
||||||
var servers []model.Server
|
var servers []model.Server
|
||||||
singleton.DB.Find(&servers)
|
singleton.DB.Find(&servers)
|
||||||
@ -159,6 +162,7 @@ func loadServers() {
|
|||||||
singleton.ReSortServer()
|
singleton.ReSortServer()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// loadCrons 加载计划任务
|
||||||
func loadCrons() {
|
func loadCrons() {
|
||||||
var crons []model.Cron
|
var crons []model.Cron
|
||||||
singleton.DB.Find(&crons)
|
singleton.DB.Find(&crons)
|
||||||
@ -172,6 +176,7 @@ func loadCrons() {
|
|||||||
crIgnoreMap[cr.Servers[j]] = true
|
crIgnoreMap[cr.Servers[j]] = true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 注册计划任务
|
||||||
cr.CronJobID, err = singleton.Cron.AddFunc(cr.Scheduler, singleton.CronTrigger(cr))
|
cr.CronJobID, err = singleton.Cron.AddFunc(cr.Scheduler, singleton.CronTrigger(cr))
|
||||||
if err == nil {
|
if err == nil {
|
||||||
singleton.Crons[cr.ID] = &cr
|
singleton.Crons[cr.ID] = &cr
|
||||||
@ -192,7 +197,7 @@ func loadCrons() {
|
|||||||
func main() {
|
func main() {
|
||||||
cleanMonitorHistory()
|
cleanMonitorHistory()
|
||||||
go rpc.ServeRPC(singleton.Conf.GRPCPort)
|
go rpc.ServeRPC(singleton.Conf.GRPCPort)
|
||||||
serviceSentinelDispatchBus := make(chan model.Monitor)
|
serviceSentinelDispatchBus := make(chan model.Monitor) // 用于传递服务监控任务信息的channel
|
||||||
go rpc.DispatchTask(serviceSentinelDispatchBus)
|
go rpc.DispatchTask(serviceSentinelDispatchBus)
|
||||||
go rpc.DispatchKeepalive()
|
go rpc.DispatchKeepalive()
|
||||||
go singleton.AlertSentinelStart()
|
go singleton.AlertSentinelStart()
|
||||||
|
@ -43,6 +43,7 @@ func (r *AlertRule) Enabled() bool {
|
|||||||
return r.Enable != nil && *r.Enable
|
return r.Enable != nil && *r.Enable
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Snapshot 对传入的Server进行该报警规则下所有type的检查 返回包含每项检查结果的空接口
|
||||||
func (r *AlertRule) Snapshot(cycleTransferStats *CycleTransferStats, server *Server, db *gorm.DB) []interface{} {
|
func (r *AlertRule) Snapshot(cycleTransferStats *CycleTransferStats, server *Server, db *gorm.DB) []interface{} {
|
||||||
var point []interface{}
|
var point []interface{}
|
||||||
for i := 0; i < len(r.Rules); i++ {
|
for i := 0; i < len(r.Rules); i++ {
|
||||||
@ -51,9 +52,10 @@ func (r *AlertRule) Snapshot(cycleTransferStats *CycleTransferStats, server *Ser
|
|||||||
return point
|
return point
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check 传入包含当前报警规则下所有type检查结果的空接口 返回报警持续时间与是否通过报警检查(通过则返回true)
|
||||||
func (r *AlertRule) Check(points [][]interface{}) (int, bool) {
|
func (r *AlertRule) Check(points [][]interface{}) (int, bool) {
|
||||||
var max int
|
var max int // 报警持续时间
|
||||||
var count int
|
var count int // 检查未通过的个数
|
||||||
for i := 0; i < len(r.Rules); i++ {
|
for i := 0; i < len(r.Rules); i++ {
|
||||||
if r.Rules[i].IsTransferDurationRule() {
|
if r.Rules[i].IsTransferDurationRule() {
|
||||||
// 循环区间流量报警
|
// 循环区间流量报警
|
||||||
@ -83,11 +85,13 @@ func (r *AlertRule) Check(points [][]interface{}) (int, bool) {
|
|||||||
fail++
|
fail++
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// 当70%以上的采样点未通过规则判断时 才认为当前检查未通过
|
||||||
if fail/total > 0.7 {
|
if fail/total > 0.7 {
|
||||||
count++
|
count++
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// 仅当所有检查均未通过时 返回false
|
||||||
return max, count != len(r.Rules)
|
return max, count != len(r.Rules)
|
||||||
}
|
}
|
||||||
|
@ -48,8 +48,9 @@ func (c *AgentConfig) Save() error {
|
|||||||
return ioutil.WriteFile(c.v.ConfigFileUsed(), data, os.ModePerm)
|
return ioutil.WriteFile(c.v.ConfigFileUsed(), data, os.ModePerm)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Config 站点配置
|
||||||
type Config struct {
|
type Config struct {
|
||||||
Debug bool
|
Debug bool // debug模式开关
|
||||||
Site struct {
|
Site struct {
|
||||||
Brand string // 站点名称
|
Brand string // 站点名称
|
||||||
CookieName string // 浏览器 Cookie 名称
|
CookieName string // 浏览器 Cookie 名称
|
||||||
@ -73,13 +74,14 @@ type Config struct {
|
|||||||
EnablePlainIPInNotification bool
|
EnablePlainIPInNotification bool
|
||||||
|
|
||||||
// IP变更提醒
|
// IP变更提醒
|
||||||
Cover uint8 // 覆盖范围
|
Cover uint8 // 覆盖范围(0:提醒未被 IgnoredIPNotification 包含的所有服务器; 1:仅提醒被 IgnoredIPNotification 包含的服务器;)
|
||||||
IgnoredIPNotification string // 特定服务器
|
IgnoredIPNotification string // 特定服务器IP(多个服务器用逗号分隔)
|
||||||
|
|
||||||
v *viper.Viper
|
v *viper.Viper
|
||||||
IgnoredIPNotificationServerIDs map[uint64]bool
|
IgnoredIPNotificationServerIDs map[uint64]bool // [ServerID] -> bool(值为true代表当前ServerID在特定服务器列表内)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Read 读取配置文件并应用
|
||||||
func (c *Config) Read(path string) error {
|
func (c *Config) Read(path string) error {
|
||||||
c.v = viper.New()
|
c.v = viper.New()
|
||||||
c.v.SetConfigFile(path)
|
c.v.SetConfigFile(path)
|
||||||
@ -101,6 +103,7 @@ func (c *Config) Read(path string) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// updateIgnoredIPNotificationID 更新用于判断服务器ID是否属于特定服务器的map
|
||||||
func (c *Config) updateIgnoredIPNotificationID() {
|
func (c *Config) updateIgnoredIPNotificationID() {
|
||||||
c.IgnoredIPNotificationServerIDs = make(map[uint64]bool)
|
c.IgnoredIPNotificationServerIDs = make(map[uint64]bool)
|
||||||
splitedIDs := strings.Split(c.IgnoredIPNotification, ",")
|
splitedIDs := strings.Split(c.IgnoredIPNotification, ",")
|
||||||
@ -112,6 +115,7 @@ func (c *Config) updateIgnoredIPNotificationID() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Save 保存配置文件
|
||||||
func (c *Config) Save() error {
|
func (c *Config) Save() error {
|
||||||
c.updateIgnoredIPNotificationID()
|
c.updateIgnoredIPNotificationID()
|
||||||
data, err := yaml.Marshal(c)
|
data, err := yaml.Marshal(c)
|
||||||
|
@ -22,7 +22,7 @@ type Cron struct {
|
|||||||
PushSuccessful bool // 推送成功的通知
|
PushSuccessful bool // 推送成功的通知
|
||||||
LastExecutedAt time.Time // 最后一次执行时间
|
LastExecutedAt time.Time // 最后一次执行时间
|
||||||
LastResult bool // 最后一次执行结果
|
LastResult bool // 最后一次执行结果
|
||||||
Cover uint8
|
Cover uint8 // 计划任务覆盖范围 (0:仅覆盖特定服务器 1:仅忽略特定服务器)
|
||||||
|
|
||||||
CronJobID cron.EntryID `gorm:"-"`
|
CronJobID cron.EntryID `gorm:"-"`
|
||||||
ServersRaw string
|
ServersRaw string
|
||||||
|
@ -56,6 +56,7 @@ func (m *Monitor) PB() *pb.Task {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// CronSpec 返回服务监控请求间隔对应的 cron 表达式
|
||||||
func (m *Monitor) CronSpec() string {
|
func (m *Monitor) CronSpec() string {
|
||||||
if m.Duration == 0 {
|
if m.Duration == 0 {
|
||||||
// 默认间隔 30 秒
|
// 默认间隔 30 秒
|
||||||
@ -76,6 +77,7 @@ func (m *Monitor) AfterFind(tx *gorm.DB) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// IsServiceSentinelNeeded 判断该任务类型是否需要进行服务监控 需要则返回true
|
||||||
func IsServiceSentinelNeeded(t uint64) bool {
|
func IsServiceSentinelNeeded(t uint64) bool {
|
||||||
return t != TaskTypeCommand && t != TaskTypeTerminal && t != TaskTypeUpgrade
|
return t != TaskTypeCommand && t != TaskTypeTerminal && t != TaskTypeUpgrade
|
||||||
}
|
}
|
||||||
|
@ -4,6 +4,7 @@ import (
|
|||||||
pb "github.com/naiba/nezha/proto"
|
pb "github.com/naiba/nezha/proto"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// MonitorHistory 历史监控记录
|
||||||
type MonitorHistory struct {
|
type MonitorHistory struct {
|
||||||
Common
|
Common
|
||||||
MonitorID uint64
|
MonitorID uint64
|
||||||
|
@ -159,10 +159,12 @@ func (u *Rule) Snapshot(cycleTransferStats *CycleTransferStats, server *Server,
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// IsTransferDurationRule 判断该规则是否属于周期流量规则 属于则返回true
|
||||||
func (rule Rule) IsTransferDurationRule() bool {
|
func (rule Rule) IsTransferDurationRule() bool {
|
||||||
return strings.HasSuffix(rule.Type, "_cycle")
|
return strings.HasSuffix(rule.Type, "_cycle")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// GetTransferDurationStart 获取周期流量的起始时间
|
||||||
func (rule Rule) GetTransferDurationStart() time.Time {
|
func (rule Rule) GetTransferDurationStart() time.Time {
|
||||||
// Accept uppercase and lowercase
|
// Accept uppercase and lowercase
|
||||||
unit := strings.ToLower(rule.CycleUnit)
|
unit := strings.ToLower(rule.CycleUnit)
|
||||||
@ -202,6 +204,7 @@ func (rule Rule) GetTransferDurationStart() time.Time {
|
|||||||
return startTime
|
return startTime
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// GetTransferDurationEnd 获取周期流量结束时间
|
||||||
func (rule Rule) GetTransferDurationEnd() time.Time {
|
func (rule Rule) GetTransferDurationEnd() time.Time {
|
||||||
// Accept uppercase and lowercase
|
// Accept uppercase and lowercase
|
||||||
unit := strings.ToLower(rule.CycleUnit)
|
unit := strings.ToLower(rule.CycleUnit)
|
||||||
|
@ -23,10 +23,11 @@ type NotificationHistory struct {
|
|||||||
// 报警规则
|
// 报警规则
|
||||||
var AlertsLock sync.RWMutex
|
var AlertsLock sync.RWMutex
|
||||||
var Alerts []*model.AlertRule
|
var Alerts []*model.AlertRule
|
||||||
var alertsStore map[uint64]map[uint64][][]interface{}
|
var alertsStore map[uint64]map[uint64][][]interface{} // [alert_id][server_id] -> 对应报警规则的检查结果
|
||||||
var alertsPrevState map[uint64]map[uint64]uint
|
var alertsPrevState map[uint64]map[uint64]uint // [alert_id][server_id] -> 对应报警规则的上一次报警状态
|
||||||
var AlertsCycleTransferStatsStore map[uint64]*model.CycleTransferStats
|
var AlertsCycleTransferStatsStore map[uint64]*model.CycleTransferStats // [alert_id] -> 对应报警规则的周期流量统计
|
||||||
|
|
||||||
|
// addCycleTransferStatsInfo 向AlertsCycleTransferStatsStore中添加周期流量报警统计信息
|
||||||
func addCycleTransferStatsInfo(alert *model.AlertRule) {
|
func addCycleTransferStatsInfo(alert *model.AlertRule) {
|
||||||
if !alert.Enabled() {
|
if !alert.Enabled() {
|
||||||
return
|
return
|
||||||
@ -52,6 +53,7 @@ func addCycleTransferStatsInfo(alert *model.AlertRule) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// AlertSentinelStart 报警器启动
|
||||||
func AlertSentinelStart() {
|
func AlertSentinelStart() {
|
||||||
alertsStore = make(map[uint64]map[uint64][][]interface{})
|
alertsStore = make(map[uint64]map[uint64][][]interface{})
|
||||||
alertsPrevState = make(map[uint64]map[uint64]uint)
|
alertsPrevState = make(map[uint64]map[uint64]uint)
|
||||||
@ -120,6 +122,7 @@ func OnDeleteAlert(id uint64) {
|
|||||||
delete(AlertsCycleTransferStatsStore, id)
|
delete(AlertsCycleTransferStatsStore, id)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// checkStatus 检查报警规则并发送报警
|
||||||
func checkStatus() {
|
func checkStatus() {
|
||||||
AlertsLock.RLock()
|
AlertsLock.RLock()
|
||||||
defer AlertsLock.RUnlock()
|
defer AlertsLock.RUnlock()
|
||||||
|
@ -16,6 +16,7 @@ const firstNotificationDelay = time.Minute * 15
|
|||||||
var notifications []model.Notification
|
var notifications []model.Notification
|
||||||
var notificationsLock sync.RWMutex
|
var notificationsLock sync.RWMutex
|
||||||
|
|
||||||
|
// LoadNotifications 加载通知方式到 singleton.notifications 变量
|
||||||
func LoadNotifications() {
|
func LoadNotifications() {
|
||||||
notificationsLock.Lock()
|
notificationsLock.Lock()
|
||||||
if err := DB.Find(¬ifications).Error; err != nil {
|
if err := DB.Find(¬ifications).Error; err != nil {
|
||||||
|
@ -24,12 +24,14 @@ type ReportData struct {
|
|||||||
Reporter uint64
|
Reporter uint64
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// _TodayStatsOfMonitor 今日监控记录
|
||||||
type _TodayStatsOfMonitor struct {
|
type _TodayStatsOfMonitor struct {
|
||||||
Up int
|
Up int // 今日在线计数
|
||||||
Down int
|
Down int // 今日离线计数
|
||||||
Delay float32
|
Delay float32 // 今日平均延迟
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// NewServiceSentinel 创建服务监控器
|
||||||
func NewServiceSentinel(serviceSentinelDispatchBus chan<- model.Monitor) {
|
func NewServiceSentinel(serviceSentinelDispatchBus chan<- model.Monitor) {
|
||||||
ServiceSentinelShared = &ServiceSentinel{
|
ServiceSentinelShared = &ServiceSentinel{
|
||||||
serviceReportChannel: make(chan ReportData, 200),
|
serviceReportChannel: make(chan ReportData, 200),
|
||||||
@ -46,6 +48,7 @@ func NewServiceSentinel(serviceSentinelDispatchBus chan<- model.Monitor) {
|
|||||||
monthlyStatus: make(map[uint64]*model.ServiceItemResponse),
|
monthlyStatus: make(map[uint64]*model.ServiceItemResponse),
|
||||||
dispatchBus: serviceSentinelDispatchBus,
|
dispatchBus: serviceSentinelDispatchBus,
|
||||||
}
|
}
|
||||||
|
// 加载历史记录
|
||||||
ServiceSentinelShared.loadMonitorHistory()
|
ServiceSentinelShared.loadMonitorHistory()
|
||||||
|
|
||||||
year, month, day := time.Now().Date()
|
year, month, day := time.Now().Date()
|
||||||
@ -72,6 +75,7 @@ func NewServiceSentinel(serviceSentinelDispatchBus chan<- model.Monitor) {
|
|||||||
ServiceSentinelShared.latestDate[k] = time.Now().Format("02-Jan-06")
|
ServiceSentinelShared.latestDate[k] = time.Now().Format("02-Jan-06")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 启动服务监控器
|
||||||
go ServiceSentinelShared.worker()
|
go ServiceSentinelShared.worker()
|
||||||
|
|
||||||
// 每日将游标往后推一天
|
// 每日将游标往后推一天
|
||||||
@ -88,20 +92,20 @@ func NewServiceSentinel(serviceSentinelDispatchBus chan<- model.Monitor) {
|
|||||||
type ServiceSentinel struct {
|
type ServiceSentinel struct {
|
||||||
serviceResponseDataStoreLock sync.RWMutex
|
serviceResponseDataStoreLock sync.RWMutex
|
||||||
monitorsLock sync.RWMutex
|
monitorsLock sync.RWMutex
|
||||||
serviceReportChannel chan ReportData
|
serviceReportChannel chan ReportData // 服务状态汇报管道
|
||||||
serviceStatusToday map[uint64]*_TodayStatsOfMonitor
|
serviceStatusToday map[uint64]*_TodayStatsOfMonitor // [monitor_id] -> _TodayStatsOfMonitor
|
||||||
serviceCurrentStatusIndex map[uint64]int
|
serviceCurrentStatusIndex map[uint64]int // [monitor_id] -> 该监控ID对应的 serviceCurrentStatusData 的最新索引下标
|
||||||
serviceCurrentStatusData map[uint64][]model.MonitorHistory
|
serviceCurrentStatusData map[uint64][]model.MonitorHistory // [monitor_id] -> []model.MonitorHistory
|
||||||
latestDate map[uint64]string
|
latestDate map[uint64]string // 最近一次更新时间
|
||||||
lastStatus map[uint64]string
|
lastStatus map[uint64]string
|
||||||
serviceResponseDataStoreCurrentUp map[uint64]uint64
|
serviceResponseDataStoreCurrentUp map[uint64]uint64 // [monitor_id] -> 当前服务在线计数
|
||||||
serviceResponseDataStoreCurrentDown map[uint64]uint64
|
serviceResponseDataStoreCurrentDown map[uint64]uint64 // [monitor_id] -> 当前服务离线计数
|
||||||
monitors map[uint64]*model.Monitor
|
monitors map[uint64]*model.Monitor // [monitor_id] -> model.Monitor
|
||||||
sslCertCache map[uint64]string
|
sslCertCache map[uint64]string
|
||||||
// 30天数据缓存
|
// 30天数据缓存
|
||||||
monthlyStatusLock sync.Mutex
|
monthlyStatusLock sync.Mutex
|
||||||
monthlyStatus map[uint64]*model.ServiceItemResponse
|
monthlyStatus map[uint64]*model.ServiceItemResponse // [monitor_id] -> model.ServiceItemResponse
|
||||||
// 服务监控调度计划任务
|
// 服务监控任务调度管道
|
||||||
dispatchBus chan<- model.Monitor
|
dispatchBus chan<- model.Monitor
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -120,6 +124,7 @@ func (ss *ServiceSentinel) refreshMonthlyServiceStatus() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Dispatch 将传入的 ReportData 传给 服务状态汇报管道
|
||||||
func (ss *ServiceSentinel) Dispatch(r ReportData) {
|
func (ss *ServiceSentinel) Dispatch(r ReportData) {
|
||||||
ss.serviceReportChannel <- r
|
ss.serviceReportChannel <- r
|
||||||
}
|
}
|
||||||
@ -137,6 +142,7 @@ func (ss *ServiceSentinel) Monitors() []*model.Monitor {
|
|||||||
return monitors
|
return monitors
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// LoadStats 加载服务监控器的历史状态信息
|
||||||
func (ss *ServiceSentinel) loadMonitorHistory() {
|
func (ss *ServiceSentinel) loadMonitorHistory() {
|
||||||
var monitors []*model.Monitor
|
var monitors []*model.Monitor
|
||||||
DB.Find(&monitors)
|
DB.Find(&monitors)
|
||||||
@ -146,6 +152,7 @@ func (ss *ServiceSentinel) loadMonitorHistory() {
|
|||||||
ss.monitors = make(map[uint64]*model.Monitor)
|
ss.monitors = make(map[uint64]*model.Monitor)
|
||||||
for i := 0; i < len(monitors); i++ {
|
for i := 0; i < len(monitors); i++ {
|
||||||
task := *monitors[i]
|
task := *monitors[i]
|
||||||
|
// 通过cron定时将服务监控任务传递给任务调度管道
|
||||||
monitors[i].CronJobID, err = Cron.AddFunc(task.CronSpec(), func() {
|
monitors[i].CronJobID, err = Cron.AddFunc(task.CronSpec(), func() {
|
||||||
ss.dispatchBus <- task
|
ss.dispatchBus <- task
|
||||||
})
|
})
|
||||||
@ -171,7 +178,7 @@ func (ss *ServiceSentinel) loadMonitorHistory() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// 加载历史记录
|
// 加载服务监控历史记录
|
||||||
var mhs []model.MonitorHistory
|
var mhs []model.MonitorHistory
|
||||||
DB.Where("created_at >= ? AND created_at < ?", today.AddDate(0, 0, -29), today).Find(&mhs)
|
DB.Where("created_at >= ? AND created_at < ?", today.AddDate(0, 0, -29), today).Find(&mhs)
|
||||||
for i := 0; i < len(mhs); i++ {
|
for i := 0; i < len(mhs); i++ {
|
||||||
@ -266,6 +273,7 @@ func (ss *ServiceSentinel) LoadStats() map[uint64]*model.ServiceItemResponse {
|
|||||||
return ss.monthlyStatus
|
return ss.monthlyStatus
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// getStateStr 根据服务在线率返回对应的状态字符串
|
||||||
func getStateStr(percent uint64) string {
|
func getStateStr(percent uint64) string {
|
||||||
if percent == 0 {
|
if percent == 0 {
|
||||||
return "无数据"
|
return "无数据"
|
||||||
@ -279,7 +287,9 @@ func getStateStr(percent uint64) string {
|
|||||||
return "故障"
|
return "故障"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// worker 服务监控的实际工作流程
|
||||||
func (ss *ServiceSentinel) worker() {
|
func (ss *ServiceSentinel) worker() {
|
||||||
|
// 从服务状态汇报管道获取汇报的服务数据
|
||||||
for r := range ss.serviceReportChannel {
|
for r := range ss.serviceReportChannel {
|
||||||
if ss.monitors[r.Data.GetId()] == nil || ss.monitors[r.Data.GetId()].ID == 0 {
|
if ss.monitors[r.Data.GetId()] == nil || ss.monitors[r.Data.GetId()].ID == 0 {
|
||||||
log.Printf("NEZAH>> 错误的服务监控上报 %+v", r)
|
log.Printf("NEZAH>> 错误的服务监控上报 %+v", r)
|
||||||
@ -355,7 +365,7 @@ func (ss *ServiceSentinel) worker() {
|
|||||||
// SSL 证书报警
|
// SSL 证书报警
|
||||||
var errMsg string
|
var errMsg string
|
||||||
if strings.HasPrefix(mh.Data, "SSL证书错误:") {
|
if strings.HasPrefix(mh.Data, "SSL证书错误:") {
|
||||||
// 排除 i/o timeont、connection timeout、EOF 错误
|
// 排除 i/o timeout、connection timeout、EOF 错误
|
||||||
if !strings.HasSuffix(mh.Data, "timeout") &&
|
if !strings.HasSuffix(mh.Data, "timeout") &&
|
||||||
!strings.HasSuffix(mh.Data, "EOF") &&
|
!strings.HasSuffix(mh.Data, "EOF") &&
|
||||||
!strings.HasSuffix(mh.Data, "timed out") {
|
!strings.HasSuffix(mh.Data, "timed out") {
|
||||||
|
@ -23,14 +23,15 @@ var (
|
|||||||
DB *gorm.DB
|
DB *gorm.DB
|
||||||
Loc *time.Location
|
Loc *time.Location
|
||||||
|
|
||||||
ServerList map[uint64]*model.Server
|
ServerList map[uint64]*model.Server // [ServerID] -> model.Server
|
||||||
SecretToID map[string]uint64
|
SecretToID map[string]uint64 // [ServerSecret] -> ServerID
|
||||||
ServerLock sync.RWMutex
|
ServerLock sync.RWMutex
|
||||||
|
|
||||||
SortedServerList []*model.Server
|
SortedServerList []*model.Server // 用于存储服务器列表的 slice,按照服务器 ID 排序
|
||||||
SortedServerLock sync.RWMutex
|
SortedServerLock sync.RWMutex
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// Init 初始化时区为上海时区
|
||||||
func Init() {
|
func Init() {
|
||||||
var err error
|
var err error
|
||||||
Loc, err = time.LoadLocation("Asia/Shanghai")
|
Loc, err = time.LoadLocation("Asia/Shanghai")
|
||||||
@ -39,6 +40,7 @@ func Init() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ReSortServer 根据服务器ID 对服务器列表进行排序(ID越大越靠前)
|
||||||
func ReSortServer() {
|
func ReSortServer() {
|
||||||
ServerLock.RLock()
|
ServerLock.RLock()
|
||||||
defer ServerLock.RUnlock()
|
defer ServerLock.RUnlock()
|
||||||
@ -50,6 +52,7 @@ func ReSortServer() {
|
|||||||
SortedServerList = append(SortedServerList, s)
|
SortedServerList = append(SortedServerList, s)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 按照服务器 ID 排序的具体实现(ID越大越靠前)
|
||||||
sort.SliceStable(SortedServerList, func(i, j int) bool {
|
sort.SliceStable(SortedServerList, func(i, j int) bool {
|
||||||
if SortedServerList[i].DisplayIndex == SortedServerList[j].DisplayIndex {
|
if SortedServerList[i].DisplayIndex == SortedServerList[j].DisplayIndex {
|
||||||
return SortedServerList[i].ID < SortedServerList[j].ID
|
return SortedServerList[i].ID < SortedServerList[j].ID
|
||||||
|
Loading…
Reference in New Issue
Block a user