From eae12d8df26f818036a04f5291e5e60c1937e3e7 Mon Sep 17 00:00:00 2001 From: naiba Date: Fri, 25 Oct 2024 00:13:45 +0800 Subject: [PATCH] refactor: rename monitor -> service --- cmd/dashboard/controller/common_page.go | 35 +-- cmd/dashboard/controller/controller.go | 8 +- cmd/dashboard/controller/member_api.go | 115 -------- cmd/dashboard/controller/member_page.go | 10 - .../controller/{monitor.go => service.go} | 103 ++++--- cmd/dashboard/main.go | 6 +- cmd/dashboard/rpc/rpc.go | 10 +- model/alertrule.go | 13 - model/api.go | 18 -- model/monitor_history.go | 21 -- model/{monitor.go => service.go} | 16 +- model/{monitor_api.go => service_api.go} | 38 ++- model/service_history.go | 20 ++ service/singleton/api.go | 271 ------------------ service/singleton/servicesentinel.go | 220 +++++++------- service/singleton/singleton.go | 13 +- 16 files changed, 260 insertions(+), 657 deletions(-) rename cmd/dashboard/controller/{monitor.go => service.go} (51%) delete mode 100644 model/monitor_history.go rename model/{monitor.go => service.go} (91%) rename model/{monitor_api.go => service_api.go} (57%) create mode 100644 model/service_history.go delete mode 100644 service/singleton/api.go diff --git a/cmd/dashboard/controller/common_page.go b/cmd/dashboard/controller/common_page.go index ef702f2..3de8ecc 100644 --- a/cmd/dashboard/controller/common_page.go +++ b/cmd/dashboard/controller/common_page.go @@ -8,7 +8,6 @@ import ( "github.com/gin-gonic/gin" "github.com/gorilla/websocket" "github.com/hashicorp/go-uuid" - "github.com/jinzhu/copier" "github.com/naiba/nezha/model" "github.com/naiba/nezha/pkg/utils" @@ -24,7 +23,6 @@ type commonPage struct { func (cp *commonPage) serve() { cr := cp.r.Group("") - cr.GET("/service", cp.service) // TODO: 界面直接跳转使用该接口 cr.GET("/network/:id", cp.network) cr.GET("/network", cp.network) @@ -32,34 +30,9 @@ func (cp *commonPage) serve() { cr.GET("/file/:id", cp.fm) } -func (p *commonPage) service(c *gin.Context) { - res, _, _ := requestGroup.Do("servicePage", func() (interface{}, error) { - singleton.AlertsLock.RLock() - defer singleton.AlertsLock.RUnlock() - var stats map[uint64]model.ServiceItemResponse - var statsStore map[uint64]model.CycleTransferStats - copier.Copy(&stats, singleton.ServiceSentinelShared.LoadStats()) - copier.Copy(&statsStore, singleton.AlertsCycleTransferStatsStore) - for k, service := range stats { - if !service.Monitor.EnableShowInService { - delete(stats, k) - } - } - return []interface { - }{ - stats, statsStore, - }, nil - }) - c.HTML(http.StatusOK, "", gin.H{ - // "Title": singleton.Localizer.MustLocalize(&i18n.LocalizeConfig{MessageID: "ServicesStatus"}), - "Services": res.([]interface{})[0], - "CycleTransferStats": res.([]interface{})[1], - }) -} - func (cp *commonPage) network(c *gin.Context) { var ( - monitorHistory *model.MonitorHistory + monitorHistory *model.ServiceHistory servers []model.Server serverIdsWithMonitor []uint64 monitorInfos = []byte("{}") @@ -68,7 +41,7 @@ func (cp *commonPage) network(c *gin.Context) { if len(singleton.SortedServerList) > 0 { id = singleton.SortedServerList[0].ID } - if err := singleton.DB.Model(&model.MonitorHistory{}).Select("monitor_id, server_id"). + if err := singleton.DB.Model(&model.ServiceHistory{}).Select("monitor_id, server_id"). Where("monitor_id != 0 and server_id != 0").Limit(1).First(&monitorHistory).Error; err != nil { // mygin.ShowErrorPage(c, mygin.ErrInfo{ // Code: http.StatusForbidden, @@ -114,12 +87,10 @@ func (cp *commonPage) network(c *gin.Context) { return } } - monitorHistories := singleton.MonitorAPI.GetMonitorHistories(map[string]any{"server_id": id}) - monitorInfos, _ = utils.Json.Marshal(monitorHistories) _, isMember := c.Get(model.CtxKeyAuthorizedUser) var isViewPasswordVerfied bool - if err := singleton.DB.Model(&model.MonitorHistory{}). + if err := singleton.DB.Model(&model.ServiceHistory{}). Select("distinct(server_id)"). Where("server_id != 0"). Find(&serverIdsWithMonitor). diff --git a/cmd/dashboard/controller/controller.go b/cmd/dashboard/controller/controller.go index 21fd7a3..122b06f 100644 --- a/cmd/dashboard/controller/controller.go +++ b/cmd/dashboard/controller/controller.go @@ -63,10 +63,10 @@ func routers(r *gin.Engine) { auth.POST("/user", commonHandler(createUser)) auth.POST("/batch-delete/user", commonHandler(batchDeleteUser)) - auth.GET("/monitor", commonHandler(listMonitor)) - auth.POST("/monitor", commonHandler(createMonitor)) - auth.PATCH("/monitor/:id", commonHandler(updateMonitor)) - auth.POST("/batch-delete/monitor", commonHandler(batchDeleteMonitor)) + auth.GET("/service", commonHandler(listService)) + auth.POST("/service", commonHandler(createService)) + auth.PATCH("/service/:id", commonHandler(updateService)) + auth.POST("/batch-delete/service", commonHandler(batchDeleteService)) auth.POST("/server-group", commonHandler(createServerGroup)) auth.PATCH("/server-group/:id", commonHandler(updateServerGroup)) diff --git a/cmd/dashboard/controller/member_api.go b/cmd/dashboard/controller/member_api.go index 7b625a5..409ead3 100644 --- a/cmd/dashboard/controller/member_api.go +++ b/cmd/dashboard/controller/member_api.go @@ -43,121 +43,6 @@ func (ma *memberAPI) serve() { mr.POST("/setting", ma.updateSetting) mr.DELETE("/:model/:id", ma.delete) mr.POST("/logout", ma.logout) - mr.GET("/token", ma.getToken) - mr.POST("/token", ma.issueNewToken) - mr.DELETE("/token/:token", ma.deleteToken) -} - -type apiResult struct { - Token string `json:"token"` - Note string `json:"note"` -} - -// getToken 获取 Token -func (ma *memberAPI) getToken(c *gin.Context) { - u := c.MustGet(model.CtxKeyAuthorizedUser).(*model.User) - singleton.ApiLock.RLock() - defer singleton.ApiLock.RUnlock() - - tokenList := singleton.UserIDToApiTokenList[u.ID] - res := make([]*apiResult, len(tokenList)) - for i, token := range tokenList { - res[i] = &apiResult{ - Token: token, - Note: singleton.ApiTokenList[token].Note, - } - } - c.JSON(http.StatusOK, gin.H{ - "code": 0, - "message": "success", - "result": res, - }) -} - -type TokenForm struct { - Note string -} - -// issueNewToken 生成新的 token -func (ma *memberAPI) issueNewToken(c *gin.Context) { - u := c.MustGet(model.CtxKeyAuthorizedUser).(*model.User) - tf := &TokenForm{} - err := c.ShouldBindJSON(tf) - if err != nil { - c.JSON(http.StatusOK, model.Response{ - Code: http.StatusBadRequest, - Message: fmt.Sprintf("请求错误:%s", err), - }) - return - } - secureToken, err := utils.GenerateRandomString(32) - if err != nil { - c.JSON(http.StatusOK, model.Response{ - Code: http.StatusBadRequest, - Message: fmt.Sprintf("请求错误:%s", err), - }) - return - } - token := &model.ApiToken{ - UserID: u.ID, - Token: secureToken, - Note: tf.Note, - } - singleton.DB.Create(token) - - singleton.ApiLock.Lock() - singleton.ApiTokenList[token.Token] = token - singleton.UserIDToApiTokenList[u.ID] = append(singleton.UserIDToApiTokenList[u.ID], token.Token) - singleton.ApiLock.Unlock() - - c.JSON(http.StatusOK, model.Response{ - Code: http.StatusOK, - Message: "success", - Result: map[string]string{ - "token": token.Token, - "note": token.Note, - }, - }) -} - -// deleteToken 删除 token -func (ma *memberAPI) deleteToken(c *gin.Context) { - token := c.Param("token") - if token == "" { - c.JSON(http.StatusOK, model.Response{ - Code: http.StatusBadRequest, - Message: "token 不能为空", - }) - return - } - singleton.ApiLock.Lock() - defer singleton.ApiLock.Unlock() - if _, ok := singleton.ApiTokenList[token]; !ok { - c.JSON(http.StatusOK, model.Response{ - Code: http.StatusBadRequest, - Message: "token 不存在", - }) - return - } - // 在数据库中删除该Token - singleton.DB.Unscoped().Delete(&model.ApiToken{}, "token = ?", token) - - // 在UserIDToApiTokenList中删除该Token - for i, t := range singleton.UserIDToApiTokenList[singleton.ApiTokenList[token].UserID] { - if t == token { - singleton.UserIDToApiTokenList[singleton.ApiTokenList[token].UserID] = append(singleton.UserIDToApiTokenList[singleton.ApiTokenList[token].UserID][:i], singleton.UserIDToApiTokenList[singleton.ApiTokenList[token].UserID][i+1:]...) - break - } - } - if len(singleton.UserIDToApiTokenList[singleton.ApiTokenList[token].UserID]) == 0 { - delete(singleton.UserIDToApiTokenList, singleton.ApiTokenList[token].UserID) - } - // 在ApiTokenList中删除该Token - delete(singleton.ApiTokenList, token) - c.JSON(http.StatusOK, model.Response{ - Code: http.StatusOK, - Message: "success", - }) } func (ma *memberAPI) delete(c *gin.Context) { diff --git a/cmd/dashboard/controller/member_page.go b/cmd/dashboard/controller/member_page.go index 8cf4fb5..c73a810 100644 --- a/cmd/dashboard/controller/member_page.go +++ b/cmd/dashboard/controller/member_page.go @@ -26,16 +26,6 @@ func (mp *memberPage) serve() { mr.GET("/ddns", mp.ddns) mr.GET("/nat", mp.nat) mr.GET("/setting", mp.setting) - mr.GET("/api", mp.api) -} - -func (mp *memberPage) api(c *gin.Context) { - singleton.ApiLock.RLock() - defer singleton.ApiLock.RUnlock() - c.HTML(http.StatusOK, "dashboard-", gin.H{ - // "title": singleton.Localizer.MustLocalize(&i18n.LocalizeConfig{MessageID: "ApiManagement"}), - "Tokens": singleton.ApiTokenList, - }) } func (mp *memberPage) cron(c *gin.Context) { diff --git a/cmd/dashboard/controller/monitor.go b/cmd/dashboard/controller/service.go similarity index 51% rename from cmd/dashboard/controller/monitor.go rename to cmd/dashboard/controller/service.go index 954262e..b17d302 100644 --- a/cmd/dashboard/controller/monitor.go +++ b/cmd/dashboard/controller/service.go @@ -6,42 +6,67 @@ import ( "strings" "github.com/gin-gonic/gin" + "github.com/jinzhu/copier" "github.com/naiba/nezha/model" "github.com/naiba/nezha/service/singleton" "gorm.io/gorm" ) -// List monitor -// @Summary List monitor +// List service +// @Summary List service // @Security BearerAuth // @Schemes -// @Description List monitor +// @Description List service // @Tags auth required // @Produce json -// @Success 200 {object} model.CommonResponse[[]model.Monitor] -// @Router /monitor [get] -func listMonitor(c *gin.Context) ([]*model.Monitor, error) { - return singleton.ServiceSentinelShared.Monitors(), nil +// @Success 200 {object} model.CommonResponse[model.ServiceResponse] +// @Router /service [get] +func listService(c *gin.Context) (*model.ServiceResponse, error) { + res, err, _ := requestGroup.Do("list-service", func() (interface{}, error) { + singleton.AlertsLock.RLock() + defer singleton.AlertsLock.RUnlock() + var stats map[uint64]model.ServiceResponseItem + var statsStore map[uint64]model.CycleTransferStats + copier.Copy(&stats, singleton.ServiceSentinelShared.LoadStats()) + copier.Copy(&statsStore, singleton.AlertsCycleTransferStatsStore) + for k, service := range stats { + if !service.Service.EnableShowInService { + delete(stats, k) + } + } + return []interface { + }{ + stats, statsStore, + }, nil + }) + if err != nil { + return nil, err + } + + return &model.ServiceResponse{ + Services: res.([]interface{})[0].(map[uint64]model.ServiceResponseItem), + CycleTransferStats: res.([]interface{})[1].(map[uint64]model.CycleTransferStats), + }, nil } -// Create monitor -// @Summary Create monitor +// Create service +// @Summary Create service // @Security BearerAuth // @Schemes -// @Description Create monitor +// @Description Create service // @Tags auth required // @Accept json -// @param request body model.MonitorForm true "Monitor Request" +// @param request body model.ServiceForm true "Service Request" // @Produce json // @Success 200 {object} model.CommonResponse[uint64] -// @Router /monitor [post] -func createMonitor(c *gin.Context) (uint64, error) { - var mf model.MonitorForm +// @Router /service [post] +func createService(c *gin.Context) (uint64, error) { + var mf model.ServiceForm if err := c.ShouldBindJSON(&mf); err != nil { return 0, err } - var m model.Monitor + var m model.Service m.Name = mf.Name m.Target = strings.TrimSpace(mf.Target) m.Type = mf.Type @@ -69,42 +94,42 @@ func createMonitor(c *gin.Context) (uint64, error) { var err error if m.Cover == 0 { - err = singleton.DB.Unscoped().Delete(&model.MonitorHistory{}, "monitor_id = ? and server_id in (?)", m.ID, skipServers).Error + err = singleton.DB.Unscoped().Delete(&model.ServiceHistory{}, "service_id = ? and server_id in (?)", m.ID, skipServers).Error } else { - err = singleton.DB.Unscoped().Delete(&model.MonitorHistory{}, "monitor_id = ? and server_id not in (?)", m.ID, skipServers).Error + err = singleton.DB.Unscoped().Delete(&model.ServiceHistory{}, "service_id = ? and server_id not in (?)", m.ID, skipServers).Error } if err != nil { return 0, err } - return m.ID, singleton.ServiceSentinelShared.OnMonitorUpdate(m) + return m.ID, singleton.ServiceSentinelShared.OnServiceUpdate(m) } -// Update monitor -// @Summary Update monitor +// Update service +// @Summary Update service // @Security BearerAuth // @Schemes -// @Description Update monitor +// @Description Update service // @Tags auth required // @Accept json -// @param id path uint true "Monitor ID" -// @param request body model.MonitorForm true "Monitor Request" +// @param id path uint true "Service ID" +// @param request body model.ServiceForm true "Service Request" // @Produce json // @Success 200 {object} model.CommonResponse[any] -// @Router /monitor/{id} [patch] -func updateMonitor(c *gin.Context) (any, error) { +// @Router /service/{id} [patch] +func updateService(c *gin.Context) (any, error) { strID := c.Param("id") id, err := strconv.ParseUint(strID, 10, 64) if err != nil { return nil, err } - var mf model.MonitorForm + var mf model.ServiceForm if err := c.ShouldBindJSON(&mf); err != nil { return nil, err } - var m model.Monitor + var m model.Service if err := singleton.DB.First(&m, id).Error; err != nil { - return nil, fmt.Errorf("monitor id %d does not exist", id) + return nil, fmt.Errorf("service id %d does not exist", id) } m.Name = mf.Name m.Target = strings.TrimSpace(mf.Target) @@ -132,42 +157,42 @@ func updateMonitor(c *gin.Context) (any, error) { } if m.Cover == 0 { - err = singleton.DB.Unscoped().Delete(&model.MonitorHistory{}, "monitor_id = ? and server_id in (?)", m.ID, skipServers).Error + err = singleton.DB.Unscoped().Delete(&model.ServiceHistory{}, "service_id = ? and server_id in (?)", m.ID, skipServers).Error } else { - err = singleton.DB.Unscoped().Delete(&model.MonitorHistory{}, "monitor_id = ? and server_id not in (?)", m.ID, skipServers).Error + err = singleton.DB.Unscoped().Delete(&model.ServiceHistory{}, "service_id = ? and server_id not in (?)", m.ID, skipServers).Error } if err != nil { return nil, err } - return nil, singleton.ServiceSentinelShared.OnMonitorUpdate(m) + return nil, singleton.ServiceSentinelShared.OnServiceUpdate(m) } -// Batch delete monitor -// @Summary Batch delete monitor +// Batch delete service +// @Summary Batch delete service // @Security BearerAuth // @Schemes -// @Description Batch delete monitor +// @Description Batch delete service // @Tags auth required // @Accept json // @param request body []uint true "id list" // @Produce json // @Success 200 {object} model.CommonResponse[any] -// @Router /batch-delete/monitor [post] -func batchDeleteMonitor(c *gin.Context) (any, error) { +// @Router /batch-delete/service [post] +func batchDeleteService(c *gin.Context) (any, error) { var ids []uint64 if err := c.ShouldBindJSON(&ids); err != nil { return nil, err } err := singleton.DB.Transaction(func(tx *gorm.DB) error { - if err := tx.Unscoped().Delete(&model.Monitor{}, "id in (?)", ids).Error; err != nil { + if err := tx.Unscoped().Delete(&model.Service{}, "id in (?)", ids).Error; err != nil { return err } - return tx.Unscoped().Delete(&model.MonitorHistory{}, "monitor_id in (?)", ids).Error + return tx.Unscoped().Delete(&model.ServiceHistory{}, "service_id in (?)", ids).Error }) if err != nil { return nil, err } - singleton.ServiceSentinelShared.OnMonitorDelete(ids) + singleton.ServiceSentinelShared.OnServiceDelete(ids) return nil, nil } diff --git a/cmd/dashboard/main.go b/cmd/dashboard/main.go index 6f2d94a..54a839c 100644 --- a/cmd/dashboard/main.go +++ b/cmd/dashboard/main.go @@ -71,7 +71,7 @@ func initSystem() { singleton.LoadSingleton() // 每天的3:30 对 监控记录 和 流量记录 进行清理 - if _, err := singleton.Cron.AddFunc("0 30 3 * * *", singleton.CleanMonitorHistory); err != nil { + if _, err := singleton.Cron.AddFunc("0 30 3 * * *", singleton.CleanServiceHistory); err != nil { panic(err) } @@ -113,8 +113,8 @@ func main() { log.Fatal(err) } - singleton.CleanMonitorHistory() - serviceSentinelDispatchBus := make(chan model.Monitor) // 用于传递服务监控任务信息的channel + singleton.CleanServiceHistory() + serviceSentinelDispatchBus := make(chan model.Service) // 用于传递服务监控任务信息的channel go rpc.DispatchTask(serviceSentinelDispatchBus) go rpc.DispatchKeepalive() go singleton.AlertSentinelStart() diff --git a/cmd/dashboard/rpc/rpc.go b/cmd/dashboard/rpc/rpc.go index 55689ec..8aecbfc 100644 --- a/cmd/dashboard/rpc/rpc.go +++ b/cmd/dashboard/rpc/rpc.go @@ -22,7 +22,7 @@ func ServeRPC() *grpc.Server { return server } -func DispatchTask(serviceSentinelDispatchBus <-chan model.Monitor) { +func DispatchTask(serviceSentinelDispatchBus <-chan model.Service) { workedServerIndex := 0 for task := range serviceSentinelDispatchBus { round := 0 @@ -42,17 +42,17 @@ func DispatchTask(serviceSentinelDispatchBus <-chan model.Monitor) { continue } // 如果此任务不可使用此服务器请求,跳过这个服务器(有些 IPv6 only 开了 NAT64 的机器请求 IPv4 总会出问题) - if (task.Cover == model.MonitorCoverAll && task.SkipServers[singleton.SortedServerList[workedServerIndex].ID]) || - (task.Cover == model.MonitorCoverIgnoreAll && !task.SkipServers[singleton.SortedServerList[workedServerIndex].ID]) { + if (task.Cover == model.ServiceCoverAll && task.SkipServers[singleton.SortedServerList[workedServerIndex].ID]) || + (task.Cover == model.ServiceCoverIgnoreAll && !task.SkipServers[singleton.SortedServerList[workedServerIndex].ID]) { workedServerIndex++ continue } - if task.Cover == model.MonitorCoverIgnoreAll && task.SkipServers[singleton.SortedServerList[workedServerIndex].ID] { + if task.Cover == model.ServiceCoverIgnoreAll && task.SkipServers[singleton.SortedServerList[workedServerIndex].ID] { singleton.SortedServerList[workedServerIndex].TaskStream.Send(task.PB()) workedServerIndex++ continue } - if task.Cover == model.MonitorCoverAll && !task.SkipServers[singleton.SortedServerList[workedServerIndex].ID] { + if task.Cover == model.ServiceCoverAll && !task.SkipServers[singleton.SortedServerList[workedServerIndex].ID] { singleton.SortedServerList[workedServerIndex].TaskStream.Send(task.PB()) workedServerIndex++ continue diff --git a/model/alertrule.go b/model/alertrule.go index 23e651f..8c68353 100644 --- a/model/alertrule.go +++ b/model/alertrule.go @@ -1,8 +1,6 @@ package model import ( - "time" - "github.com/naiba/nezha/pkg/utils" "gorm.io/gorm" ) @@ -12,17 +10,6 @@ const ( ModeOnetimeTrigger = 1 ) -type CycleTransferStats struct { - Name string - From time.Time - To time.Time - Max uint64 - Min uint64 - ServerName map[uint64]string - Transfer map[uint64]uint64 - NextUpdate map[uint64]time.Time -} - type AlertRule struct { Common Name string diff --git a/model/api.go b/model/api.go index 489e6f9..f69d157 100644 --- a/model/api.go +++ b/model/api.go @@ -4,24 +4,6 @@ const ( ApiErrorUnauthorized = 10001 ) -type ServiceItemResponse struct { - Monitor *Monitor - CurrentUp uint64 - CurrentDown uint64 - TotalUp uint64 - TotalDown uint64 - Delay *[30]float32 - Up *[30]int - Down *[30]int -} - -func (r ServiceItemResponse) TotalUptime() float32 { - if r.TotalUp+r.TotalDown == 0 { - return 0 - } - return float32(r.TotalUp) / (float32(r.TotalUp + r.TotalDown)) * 100 -} - type LoginRequest struct { Username string `json:"username,omitempty"` Password string `json:"password,omitempty"` diff --git a/model/monitor_history.go b/model/monitor_history.go deleted file mode 100644 index 5d2d1bb..0000000 --- a/model/monitor_history.go +++ /dev/null @@ -1,21 +0,0 @@ -package model - -import ( - "time" - - "gorm.io/gorm" -) - -// MonitorHistory 历史监控记录 -type MonitorHistory struct { - ID uint64 `gorm:"primaryKey"` - CreatedAt time.Time `gorm:"index;<-:create;index:idx_server_id_created_at_monitor_id_avg_delay"` - UpdatedAt time.Time `gorm:"autoUpdateTime"` - DeletedAt gorm.DeletedAt `gorm:"index"` - MonitorID uint64 `gorm:"index:idx_server_id_created_at_monitor_id_avg_delay"` - ServerID uint64 `gorm:"index:idx_server_id_created_at_monitor_id_avg_delay"` - AvgDelay float32 `gorm:"index:idx_server_id_created_at_monitor_id_avg_delay"` // 平均延迟,毫秒 - Up uint64 // 检查状态良好计数 - Down uint64 // 检查状态异常计数 - Data string -} diff --git a/model/monitor.go b/model/service.go similarity index 91% rename from model/monitor.go rename to model/service.go index bbaed1e..6262c92 100644 --- a/model/monitor.go +++ b/model/service.go @@ -40,11 +40,11 @@ type TaskFM struct { } const ( - MonitorCoverAll = iota - MonitorCoverIgnoreAll + ServiceCoverAll = iota + ServiceCoverIgnoreAll ) -type Monitor struct { +type Service struct { Common Name string `json:"name,omitempty"` Type uint8 `json:"type,omitempty"` @@ -71,7 +71,7 @@ type Monitor struct { CronJobID cron.EntryID `gorm:"-" json:"-"` } -func (m *Monitor) PB() *pb.Task { +func (m *Service) PB() *pb.Task { return &pb.Task{ Id: m.ID, Type: uint64(m.Type), @@ -80,7 +80,7 @@ func (m *Monitor) PB() *pb.Task { } // CronSpec 返回服务监控请求间隔对应的 cron 表达式 -func (m *Monitor) CronSpec() string { +func (m *Service) CronSpec() string { if m.Duration == 0 { // 默认间隔 30 秒 m.Duration = 30 @@ -88,7 +88,7 @@ func (m *Monitor) CronSpec() string { return fmt.Sprintf("@every %ds", m.Duration) } -func (m *Monitor) BeforeSave(tx *gorm.DB) error { +func (m *Service) BeforeSave(tx *gorm.DB) error { if data, err := utils.Json.Marshal(m.SkipServers); err != nil { return err } else { @@ -107,10 +107,10 @@ func (m *Monitor) BeforeSave(tx *gorm.DB) error { return nil } -func (m *Monitor) AfterFind(tx *gorm.DB) error { +func (m *Service) AfterFind(tx *gorm.DB) error { m.SkipServers = make(map[uint64]bool) if err := utils.Json.Unmarshal([]byte(m.SkipServersRaw), &m.SkipServers); err != nil { - log.Println("NEZHA>> Monitor.AfterFind:", err) + log.Println("NEZHA>> Service.AfterFind:", err) return nil } diff --git a/model/monitor_api.go b/model/service_api.go similarity index 57% rename from model/monitor_api.go rename to model/service_api.go index 205b4f1..7fbb092 100644 --- a/model/monitor_api.go +++ b/model/service_api.go @@ -1,6 +1,8 @@ package model -type MonitorForm struct { +import "time" + +type ServiceForm struct { Name string `json:"name,omitempty"` Target string `json:"target,omitempty"` Type uint8 `json:"type,omitempty"` @@ -17,3 +19,37 @@ type MonitorForm struct { SkipServers map[uint64]bool `json:"skip_servers,omitempty"` NotificationGroupID uint64 `json:"notification_group_id,omitempty"` } + +type ServiceResponseItem struct { + Service *Service + CurrentUp uint64 + CurrentDown uint64 + TotalUp uint64 + TotalDown uint64 + Delay *[30]float32 + Up *[30]int + Down *[30]int +} + +func (r ServiceResponseItem) TotalUptime() float32 { + if r.TotalUp+r.TotalDown == 0 { + return 0 + } + return float32(r.TotalUp) / (float32(r.TotalUp + r.TotalDown)) * 100 +} + +type CycleTransferStats struct { + Name string + From time.Time + To time.Time + Max uint64 + Min uint64 + ServerName map[uint64]string + Transfer map[uint64]uint64 + NextUpdate map[uint64]time.Time +} + +type ServiceResponse struct { + Services map[uint64]ServiceResponseItem + CycleTransferStats map[uint64]CycleTransferStats +} diff --git a/model/service_history.go b/model/service_history.go new file mode 100644 index 0000000..1ebc155 --- /dev/null +++ b/model/service_history.go @@ -0,0 +1,20 @@ +package model + +import ( + "time" + + "gorm.io/gorm" +) + +type ServiceHistory struct { + ID uint64 `gorm:"primaryKey" json:"id,omitempty"` + CreatedAt time.Time `gorm:"index;<-:create;index:idx_server_id_created_at_service_id_avg_delay" json:"created_at,omitempty"` + UpdatedAt time.Time `gorm:"autoUpdateTime" json:"updated_at,omitempty"` + DeletedAt gorm.DeletedAt `gorm:"index" json:"deleted_at,omitempty"` + ServiceID uint64 `gorm:"index:idx_server_id_created_at_service_id_avg_delay" json:"service_id,omitempty"` + ServerID uint64 `gorm:"index:idx_server_id_created_at_service_id_avg_delay" json:"server_id,omitempty"` + AvgDelay float32 `gorm:"index:idx_server_id_created_at_service_id_avg_delay" json:"avg_delay,omitempty"` // 平均延迟,毫秒 + Up uint64 `json:"up,omitempty"` // 检查状态良好计数 + Down uint64 `json:"down,omitempty"` // 检查状态异常计数 + Data string `json:"data,omitempty"` +} diff --git a/service/singleton/api.go b/service/singleton/api.go deleted file mode 100644 index 0f813ac..0000000 --- a/service/singleton/api.go +++ /dev/null @@ -1,271 +0,0 @@ -package singleton - -import ( - "sync" - "time" - - "github.com/naiba/nezha/model" - "github.com/naiba/nezha/pkg/utils" -) - -var ( - ApiTokenList = make(map[string]*model.ApiToken) - UserIDToApiTokenList = make(map[uint64][]string) - ApiLock sync.RWMutex - - ServerAPI = &ServerAPIService{} - MonitorAPI = &MonitorAPIService{} -) - -type ServerAPIService struct{} - -// CommonResponse 常规返回结构 包含状态码 和 状态信息 -type CommonResponse struct { - Code int `json:"code"` - Message string `json:"message"` -} - -type CommonServerInfo struct { - ID uint64 `json:"id"` - Name string `json:"name"` - Tag string `json:"tag"` - LastActive int64 `json:"last_active"` - IPV4 string `json:"ipv4"` - IPV6 string `json:"ipv6"` - ValidIP string `json:"valid_ip"` - DisplayIndex int `json:"display_index"` - HideForGuest bool `json:"hide_for_guest"` -} - -// StatusResponse 服务器状态子结构 包含服务器信息与状态信息 -type StatusResponse struct { - CommonServerInfo - Host *model.Host `json:"host"` - Status *model.HostState `json:"status"` -} - -// ServerStatusResponse 服务器状态返回结构 包含常规返回结构 和 服务器状态子结构 -type ServerStatusResponse struct { - CommonResponse - Result []*StatusResponse `json:"result"` -} - -// ServerInfoResponse 服务器信息返回结构 包含常规返回结构 和 服务器信息子结构 -type ServerInfoResponse struct { - CommonResponse - Result []*CommonServerInfo `json:"result"` -} - -type MonitorAPIService struct { -} - -type MonitorInfoResponse struct { - CommonResponse - Result []*MonitorInfo `json:"result"` -} - -type MonitorInfo struct { - MonitorID uint64 `json:"monitor_id"` - ServerID uint64 `json:"server_id"` - MonitorName string `json:"monitor_name"` - ServerName string `json:"server_name"` - CreatedAt []int64 `json:"created_at"` - AvgDelay []float32 `json:"avg_delay"` -} - -func InitAPI() { - ApiTokenList = make(map[string]*model.ApiToken) - UserIDToApiTokenList = make(map[uint64][]string) -} - -func loadAPI() { - InitAPI() - var tokenList []*model.ApiToken - DB.Find(&tokenList) - for _, token := range tokenList { - ApiTokenList[token.Token] = token - UserIDToApiTokenList[token.UserID] = append(UserIDToApiTokenList[token.UserID], token.Token) - } -} - -// GetStatusByIDList 获取传入IDList的服务器状态信息 -func (s *ServerAPIService) GetStatusByIDList(idList []uint64) *ServerStatusResponse { - res := &ServerStatusResponse{} - res.Result = make([]*StatusResponse, 0) - - ServerLock.RLock() - defer ServerLock.RUnlock() - - for _, v := range idList { - server := ServerList[v] - if server == nil { - continue - } - ipv4, ipv6, validIP := utils.SplitIPAddr(server.Host.IP) - info := CommonServerInfo{ - ID: server.ID, - Name: server.Name, - // Tag: server.Tag, - LastActive: server.LastActive.Unix(), - IPV4: ipv4, - IPV6: ipv6, - ValidIP: validIP, - } - res.Result = append(res.Result, &StatusResponse{ - CommonServerInfo: info, - Host: server.Host, - Status: server.State, - }) - } - res.CommonResponse = CommonResponse{ - Code: 0, - Message: "success", - } - return res -} - -// GetStatusByTag 获取传入分组的所有服务器状态信息 -// func (s *ServerAPIService) GetStatusByTag(tag string) *ServerStatusResponse { -// return s.GetStatusByIDList(ServerTagToIDList[tag]) -// } - -// GetAllStatus 获取所有服务器状态信息 -func (s *ServerAPIService) GetAllStatus() *ServerStatusResponse { - res := &ServerStatusResponse{} - res.Result = make([]*StatusResponse, 0) - ServerLock.RLock() - defer ServerLock.RUnlock() - for _, v := range ServerList { - host := v.Host - state := v.State - if host == nil || state == nil { - continue - } - ipv4, ipv6, validIP := utils.SplitIPAddr(host.IP) - info := CommonServerInfo{ - ID: v.ID, - Name: v.Name, - // Tag: v.Tag, - LastActive: v.LastActive.Unix(), - IPV4: ipv4, - IPV6: ipv6, - ValidIP: validIP, - DisplayIndex: v.DisplayIndex, - HideForGuest: v.HideForGuest, - } - res.Result = append(res.Result, &StatusResponse{ - CommonServerInfo: info, - Host: v.Host, - Status: v.State, - }) - } - res.CommonResponse = CommonResponse{ - Code: 0, - Message: "success", - } - return res -} - -// GetListByTag 获取传入分组的所有服务器信息 -func (s *ServerAPIService) GetListByTag(tag string) *ServerInfoResponse { - res := &ServerInfoResponse{} - res.Result = make([]*CommonServerInfo, 0) - - ServerLock.RLock() - defer ServerLock.RUnlock() - // for _, v := range ServerTagToIDList[tag] { - // host := ServerList[v].Host - // if host == nil { - // continue - // } - // ipv4, ipv6, validIP := utils.SplitIPAddr(host.IP) - // info := &CommonServerInfo{ - // ID: v, - // Name: ServerList[v].Name, - // Tag: ServerList[v].Tag, - // LastActive: ServerList[v].LastActive.Unix(), - // IPV4: ipv4, - // IPV6: ipv6, - // ValidIP: validIP, - // } - // res.Result = append(res.Result, info) - // } - res.CommonResponse = CommonResponse{ - Code: 0, - Message: "success", - } - return res -} - -// GetAllList 获取所有服务器信息 -func (s *ServerAPIService) GetAllList() *ServerInfoResponse { - res := &ServerInfoResponse{} - res.Result = make([]*CommonServerInfo, 0) - - ServerLock.RLock() - defer ServerLock.RUnlock() - for _, v := range ServerList { - host := v.Host - if host == nil { - continue - } - ipv4, ipv6, validIP := utils.SplitIPAddr(host.IP) - info := &CommonServerInfo{ - ID: v.ID, - Name: v.Name, - // Tag: v.Tag, - LastActive: v.LastActive.Unix(), - IPV4: ipv4, - IPV6: ipv6, - ValidIP: validIP, - } - res.Result = append(res.Result, info) - } - res.CommonResponse = CommonResponse{ - Code: 0, - Message: "success", - } - return res -} - -func (m *MonitorAPIService) GetMonitorHistories(query map[string]any) *MonitorInfoResponse { - var ( - resultMap = make(map[uint64]*MonitorInfo) - monitorHistories []*model.MonitorHistory - sortedMonitorIDs []uint64 - ) - res := &MonitorInfoResponse{ - CommonResponse: CommonResponse{ - Code: 0, - Message: "success", - }, - } - if err := DB.Model(&model.MonitorHistory{}).Select("monitor_id, created_at, server_id, avg_delay"). - Where(query).Where("created_at >= ?", time.Now().Add(-24*time.Hour)).Order("monitor_id, created_at"). - Scan(&monitorHistories).Error; err != nil { - res.CommonResponse = CommonResponse{ - Code: 500, - Message: err.Error(), - } - } else { - for _, history := range monitorHistories { - infos, ok := resultMap[history.MonitorID] - if !ok { - infos = &MonitorInfo{ - MonitorID: history.MonitorID, - ServerID: history.ServerID, - MonitorName: ServiceSentinelShared.monitors[history.MonitorID].Name, - ServerName: ServerList[history.ServerID].Name, - } - resultMap[history.MonitorID] = infos - sortedMonitorIDs = append(sortedMonitorIDs, history.MonitorID) - } - infos.CreatedAt = append(infos.CreatedAt, history.CreatedAt.Truncate(time.Minute).Unix()*1000) - infos.AvgDelay = append(infos.AvgDelay, history.AvgDelay) - } - for _, monitorID := range sortedMonitorIDs { - res.Result = append(res.Result, resultMap[monitorID]) - } - } - return res -} diff --git a/service/singleton/servicesentinel.go b/service/singleton/servicesentinel.go index bfb109e..db194d7 100644 --- a/service/singleton/servicesentinel.go +++ b/service/singleton/servicesentinel.go @@ -23,18 +23,18 @@ type ReportData struct { Reporter uint64 } -// _TodayStatsOfMonitor 今日监控记录 -type _TodayStatsOfMonitor struct { +// _TodayStatsOfService 今日监控记录 +type _TodayStatsOfService struct { Up int // 今日在线计数 Down int // 今日离线计数 Delay float32 // 今日平均延迟 } // NewServiceSentinel 创建服务监控器 -func NewServiceSentinel(serviceSentinelDispatchBus chan<- model.Monitor) { +func NewServiceSentinel(serviceSentinelDispatchBus chan<- model.Service) { ServiceSentinelShared = &ServiceSentinel{ serviceReportChannel: make(chan ReportData, 200), - serviceStatusToday: make(map[uint64]*_TodayStatsOfMonitor), + serviceStatusToday: make(map[uint64]*_TodayStatsOfService), serviceCurrentStatusIndex: make(map[uint64]*indexStore), serviceCurrentStatusData: make(map[uint64][]*pb.TaskResult), lastStatus: make(map[uint64]int), @@ -42,30 +42,30 @@ func NewServiceSentinel(serviceSentinelDispatchBus chan<- model.Monitor) { serviceResponseDataStoreCurrentDown: make(map[uint64]uint64), serviceResponseDataStoreCurrentAvgDelay: make(map[uint64]float32), serviceResponsePing: make(map[uint64]map[uint64]*pingStore), - monitors: make(map[uint64]*model.Monitor), + services: make(map[uint64]*model.Service), sslCertCache: make(map[uint64]string), // 30天数据缓存 - monthlyStatus: make(map[uint64]*model.ServiceItemResponse), + monthlyStatus: make(map[uint64]*model.ServiceResponseItem), dispatchBus: serviceSentinelDispatchBus, } // 加载历史记录 - ServiceSentinelShared.loadMonitorHistory() + ServiceSentinelShared.loadServiceHistory() year, month, day := time.Now().Date() today := time.Date(year, month, day, 0, 0, 0, 0, Loc) - var mhs []model.MonitorHistory + var mhs []model.ServiceHistory // 加载当日记录 DB.Where("created_at >= ?", today).Find(&mhs) totalDelay := make(map[uint64]float32) totalDelayCount := make(map[uint64]float32) for i := 0; i < len(mhs); i++ { - totalDelay[mhs[i].MonitorID] += mhs[i].AvgDelay - totalDelayCount[mhs[i].MonitorID]++ - ServiceSentinelShared.serviceStatusToday[mhs[i].MonitorID].Up += int(mhs[i].Up) - ServiceSentinelShared.monthlyStatus[mhs[i].MonitorID].TotalUp += mhs[i].Up - ServiceSentinelShared.serviceStatusToday[mhs[i].MonitorID].Down += int(mhs[i].Down) - ServiceSentinelShared.monthlyStatus[mhs[i].MonitorID].TotalDown += mhs[i].Down + totalDelay[mhs[i].ServiceID] += mhs[i].AvgDelay + totalDelayCount[mhs[i].ServiceID]++ + ServiceSentinelShared.serviceStatusToday[mhs[i].ServiceID].Up += int(mhs[i].Up) + ServiceSentinelShared.monthlyStatus[mhs[i].ServiceID].TotalUp += mhs[i].Up + ServiceSentinelShared.serviceStatusToday[mhs[i].ServiceID].Down += int(mhs[i].Down) + ServiceSentinelShared.monthlyStatus[mhs[i].ServiceID].TotalDown += mhs[i].Down } for id, delay := range totalDelay { ServiceSentinelShared.serviceStatusToday[id].Delay = delay / float32(totalDelayCount[id]) @@ -85,31 +85,31 @@ func NewServiceSentinel(serviceSentinelDispatchBus chan<- model.Monitor) { 使用缓存 channel,处理上报的 Service 请求结果,然后判断是否需要报警 需要记录上一次的状态信息 -加锁顺序:serviceResponseDataStoreLock > monthlyStatusLock > monitorsLock +加锁顺序:serviceResponseDataStoreLock > monthlyStatusLock > servicesLock */ type ServiceSentinel struct { // 服务监控任务上报通道 serviceReportChannel chan ReportData // 服务状态汇报管道 // 服务监控任务调度通道 - dispatchBus chan<- model.Monitor + dispatchBus chan<- model.Service serviceResponseDataStoreLock sync.RWMutex - serviceStatusToday map[uint64]*_TodayStatsOfMonitor // [monitor_id] -> _TodayStatsOfMonitor - serviceCurrentStatusIndex map[uint64]*indexStore // [monitor_id] -> 该监控ID对应的 serviceCurrentStatusData 的最新索引下标 - serviceCurrentStatusData map[uint64][]*pb.TaskResult // [monitor_id] -> []model.MonitorHistory - serviceResponseDataStoreCurrentUp map[uint64]uint64 // [monitor_id] -> 当前服务在线计数 - serviceResponseDataStoreCurrentDown map[uint64]uint64 // [monitor_id] -> 当前服务离线计数 - serviceResponseDataStoreCurrentAvgDelay map[uint64]float32 // [monitor_id] -> 当前服务离线计数 - serviceResponsePing map[uint64]map[uint64]*pingStore // [monitor_id] -> ClientID -> delay + serviceStatusToday map[uint64]*_TodayStatsOfService // [service_id] -> _TodayStatsOfService + serviceCurrentStatusIndex map[uint64]*indexStore // [service_id] -> 该监控ID对应的 serviceCurrentStatusData 的最新索引下标 + serviceCurrentStatusData map[uint64][]*pb.TaskResult // [service_id] -> []model.ServiceHistory + serviceResponseDataStoreCurrentUp map[uint64]uint64 // [service_id] -> 当前服务在线计数 + serviceResponseDataStoreCurrentDown map[uint64]uint64 // [service_id] -> 当前服务离线计数 + serviceResponseDataStoreCurrentAvgDelay map[uint64]float32 // [service_id] -> 当前服务离线计数 + serviceResponsePing map[uint64]map[uint64]*pingStore // [service_id] -> ClientID -> delay lastStatus map[uint64]int sslCertCache map[uint64]string - monitorsLock sync.RWMutex - monitors map[uint64]*model.Monitor // [monitor_id] -> model.Monitor + servicesLock sync.RWMutex + services map[uint64]*model.Service // 30天数据缓存 monthlyStatusLock sync.Mutex - monthlyStatus map[uint64]*model.ServiceItemResponse // [monitor_id] -> model.ServiceItemResponse + monthlyStatus map[uint64]*model.ServiceResponseItem } type indexStore struct { @@ -157,23 +157,23 @@ func (ss *ServiceSentinel) Dispatch(r ReportData) { ss.serviceReportChannel <- r } -func (ss *ServiceSentinel) Monitors() []*model.Monitor { - ss.monitorsLock.RLock() - defer ss.monitorsLock.RUnlock() - var monitors []*model.Monitor - for _, v := range ss.monitors { - monitors = append(monitors, v) +func (ss *ServiceSentinel) Services() []*model.Service { + ss.servicesLock.RLock() + defer ss.servicesLock.RUnlock() + var services []*model.Service + for _, v := range ss.services { + services = append(services, v) } - sort.SliceStable(monitors, func(i, j int) bool { - return monitors[i].ID < monitors[j].ID + sort.SliceStable(services, func(i, j int) bool { + return services[i].ID < services[j].ID }) - return monitors + return services } -// loadMonitorHistory 加载服务监控器的历史状态信息 -func (ss *ServiceSentinel) loadMonitorHistory() { - var monitors []*model.Monitor - err := DB.Find(&monitors).Error +// loadServiceHistory 加载服务监控器的历史状态信息 +func (ss *ServiceSentinel) loadServiceHistory() { + var services []*model.Service + err := DB.Find(&services).Error if err != nil { panic(err) } @@ -182,29 +182,29 @@ func (ss *ServiceSentinel) loadMonitorHistory() { defer ss.serviceResponseDataStoreLock.Unlock() ss.monthlyStatusLock.Lock() defer ss.monthlyStatusLock.Unlock() - ss.monitorsLock.Lock() - defer ss.monitorsLock.Unlock() + ss.servicesLock.Lock() + defer ss.servicesLock.Unlock() - for i := 0; i < len(monitors); i++ { - task := *monitors[i] + for i := 0; i < len(services); i++ { + task := *services[i] // 通过cron定时将服务监控任务传递给任务调度管道 - monitors[i].CronJobID, err = Cron.AddFunc(task.CronSpec(), func() { + services[i].CronJobID, err = Cron.AddFunc(task.CronSpec(), func() { ss.dispatchBus <- task }) if err != nil { panic(err) } - ss.monitors[monitors[i].ID] = monitors[i] - ss.serviceCurrentStatusData[monitors[i].ID] = make([]*pb.TaskResult, _CurrentStatusSize) - ss.serviceStatusToday[monitors[i].ID] = &_TodayStatsOfMonitor{} + ss.services[services[i].ID] = services[i] + ss.serviceCurrentStatusData[services[i].ID] = make([]*pb.TaskResult, _CurrentStatusSize) + ss.serviceStatusToday[services[i].ID] = &_TodayStatsOfService{} } year, month, day := time.Now().Date() today := time.Date(year, month, day, 0, 0, 0, 0, Loc) - for i := 0; i < len(monitors); i++ { - ServiceSentinelShared.monthlyStatus[monitors[i].ID] = &model.ServiceItemResponse{ - Monitor: monitors[i], + for i := 0; i < len(services); i++ { + ServiceSentinelShared.monthlyStatus[services[i].ID] = &model.ServiceResponseItem{ + Service: services[i], Delay: &[30]float32{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, Up: &[30]int{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, Down: &[30]int{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, @@ -212,7 +212,7 @@ func (ss *ServiceSentinel) loadMonitorHistory() { } // 加载服务监控历史记录 - var mhs []model.MonitorHistory + var mhs []model.ServiceHistory DB.Where("created_at > ? AND created_at < ?", today.AddDate(0, 0, -29), today).Find(&mhs) var delayCount = make(map[int]int) for i := 0; i < len(mhs); i++ { @@ -220,22 +220,22 @@ func (ss *ServiceSentinel) loadMonitorHistory() { if dayIndex < 0 { continue } - ServiceSentinelShared.monthlyStatus[mhs[i].MonitorID].Delay[dayIndex] = (ServiceSentinelShared.monthlyStatus[mhs[i].MonitorID].Delay[dayIndex]*float32(delayCount[dayIndex]) + mhs[i].AvgDelay) / float32(delayCount[dayIndex]+1) + ServiceSentinelShared.monthlyStatus[mhs[i].ServiceID].Delay[dayIndex] = (ServiceSentinelShared.monthlyStatus[mhs[i].ServiceID].Delay[dayIndex]*float32(delayCount[dayIndex]) + mhs[i].AvgDelay) / float32(delayCount[dayIndex]+1) delayCount[dayIndex]++ - ServiceSentinelShared.monthlyStatus[mhs[i].MonitorID].Up[dayIndex] += int(mhs[i].Up) - ServiceSentinelShared.monthlyStatus[mhs[i].MonitorID].TotalUp += mhs[i].Up - ServiceSentinelShared.monthlyStatus[mhs[i].MonitorID].Down[dayIndex] += int(mhs[i].Down) - ServiceSentinelShared.monthlyStatus[mhs[i].MonitorID].TotalDown += mhs[i].Down + ServiceSentinelShared.monthlyStatus[mhs[i].ServiceID].Up[dayIndex] += int(mhs[i].Up) + ServiceSentinelShared.monthlyStatus[mhs[i].ServiceID].TotalUp += mhs[i].Up + ServiceSentinelShared.monthlyStatus[mhs[i].ServiceID].Down[dayIndex] += int(mhs[i].Down) + ServiceSentinelShared.monthlyStatus[mhs[i].ServiceID].TotalDown += mhs[i].Down } } -func (ss *ServiceSentinel) OnMonitorUpdate(m model.Monitor) error { +func (ss *ServiceSentinel) OnServiceUpdate(m model.Service) error { ss.serviceResponseDataStoreLock.Lock() defer ss.serviceResponseDataStoreLock.Unlock() ss.monthlyStatusLock.Lock() defer ss.monthlyStatusLock.Unlock() - ss.monitorsLock.Lock() - defer ss.monitorsLock.Unlock() + ss.servicesLock.Lock() + defer ss.servicesLock.Unlock() var err error // 写入新任务 @@ -245,32 +245,32 @@ func (ss *ServiceSentinel) OnMonitorUpdate(m model.Monitor) error { if err != nil { return err } - if ss.monitors[m.ID] != nil { + if ss.services[m.ID] != nil { // 停掉旧任务 - Cron.Remove(ss.monitors[m.ID].CronJobID) + Cron.Remove(ss.services[m.ID].CronJobID) } else { // 新任务初始化数据 - ss.monthlyStatus[m.ID] = &model.ServiceItemResponse{ - Monitor: &m, + ss.monthlyStatus[m.ID] = &model.ServiceResponseItem{ + Service: &m, Delay: &[30]float32{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, Up: &[30]int{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, Down: &[30]int{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, } ss.serviceCurrentStatusData[m.ID] = make([]*pb.TaskResult, _CurrentStatusSize) - ss.serviceStatusToday[m.ID] = &_TodayStatsOfMonitor{} + ss.serviceStatusToday[m.ID] = &_TodayStatsOfService{} } // 更新这个任务 - ss.monitors[m.ID] = &m + ss.services[m.ID] = &m return nil } -func (ss *ServiceSentinel) OnMonitorDelete(ids []uint64) { +func (ss *ServiceSentinel) OnServiceDelete(ids []uint64) { ss.serviceResponseDataStoreLock.Lock() defer ss.serviceResponseDataStoreLock.Unlock() ss.monthlyStatusLock.Lock() defer ss.monthlyStatusLock.Unlock() - ss.monitorsLock.Lock() - defer ss.monitorsLock.Unlock() + ss.servicesLock.Lock() + defer ss.servicesLock.Unlock() for _, id := range ids { delete(ss.serviceCurrentStatusIndex, id) @@ -283,22 +283,22 @@ func (ss *ServiceSentinel) OnMonitorDelete(ids []uint64) { delete(ss.serviceStatusToday, id) // 停掉定时任务 - Cron.Remove(ss.monitors[id].CronJobID) - delete(ss.monitors, id) + Cron.Remove(ss.services[id].CronJobID) + delete(ss.services, id) delete(ss.monthlyStatus, id) } } -func (ss *ServiceSentinel) LoadStats() map[uint64]*model.ServiceItemResponse { +func (ss *ServiceSentinel) LoadStats() map[uint64]*model.ServiceResponseItem { ss.serviceResponseDataStoreLock.RLock() defer ss.serviceResponseDataStoreLock.RUnlock() ss.monthlyStatusLock.Lock() defer ss.monthlyStatusLock.Unlock() // 刷新最新一天的数据 - for k := range ss.monitors { - ss.monthlyStatus[k].Monitor = ss.monitors[k] + for k := range ss.services { + ss.monthlyStatus[k].Service = ss.services[k] v := ss.serviceStatusToday[k] // 30 天在线率, @@ -314,7 +314,7 @@ func (ss *ServiceSentinel) LoadStats() map[uint64]*model.ServiceItemResponse { ss.monthlyStatus[k].Delay[29] = v.Delay } - // 最后 5 分钟的状态 与 monitor 对象填充 + // 最后 5 分钟的状态 与 service 对象填充 for k, v := range ss.serviceResponseDataStoreCurrentDown { ss.monthlyStatus[k].CurrentDown = v } @@ -329,18 +329,18 @@ func (ss *ServiceSentinel) LoadStats() map[uint64]*model.ServiceItemResponse { func (ss *ServiceSentinel) worker() { // 从服务状态汇报管道获取汇报的服务数据 for r := range ss.serviceReportChannel { - if ss.monitors[r.Data.GetId()] == nil || ss.monitors[r.Data.GetId()].ID == 0 { + if ss.services[r.Data.GetId()] == nil || ss.services[r.Data.GetId()].ID == 0 { log.Printf("NEZHA>> 错误的服务监控上报 %+v", r) continue } mh := r.Data if mh.Type == model.TaskTypeTCPPing || mh.Type == model.TaskTypeICMPPing { - monitorTcpMap, ok := ss.serviceResponsePing[mh.GetId()] + serviceTcpMap, ok := ss.serviceResponsePing[mh.GetId()] if !ok { - monitorTcpMap = make(map[uint64]*pingStore) - ss.serviceResponsePing[mh.GetId()] = monitorTcpMap + serviceTcpMap = make(map[uint64]*pingStore) + ss.serviceResponsePing[mh.GetId()] = serviceTcpMap } - ts, ok := monitorTcpMap[r.Reporter] + ts, ok := serviceTcpMap[r.Reporter] if !ok { ts = &pingStore{} } @@ -348,8 +348,8 @@ func (ss *ServiceSentinel) worker() { ts.ping = (ts.ping*float32(ts.count-1) + mh.Delay) / float32(ts.count) if ts.count == Conf.AvgPingCount { ts.count = 0 - if err := DB.Create(&model.MonitorHistory{ - MonitorID: mh.GetId(), + if err := DB.Create(&model.ServiceHistory{ + ServiceID: mh.GetId(), AvgDelay: ts.ping, Data: mh.Data, ServerID: r.Reporter, @@ -357,7 +357,7 @@ func (ss *ServiceSentinel) worker() { log.Println("NEZHA>> 服务监控数据持久化失败:", err) } } - monitorTcpMap[r.Reporter] = ts + serviceTcpMap[r.Reporter] = ts } ss.serviceResponseDataStoreLock.Lock() // 写入当天状态 @@ -414,8 +414,8 @@ func (ss *ServiceSentinel) worker() { index: 0, t: currentTime, } - if err := DB.Create(&model.MonitorHistory{ - MonitorID: mh.GetId(), + if err := DB.Create(&model.ServiceHistory{ + ServiceID: mh.GetId(), AvgDelay: ss.serviceResponseDataStoreCurrentAvgDelay[mh.GetId()], Data: mh.Data, Up: ss.serviceResponseDataStoreCurrentUp[mh.GetId()], @@ -427,23 +427,23 @@ func (ss *ServiceSentinel) worker() { // 延迟报警 if mh.Delay > 0 { - ss.monitorsLock.RLock() - if ss.monitors[mh.GetId()].LatencyNotify { - notificationGroupID := ss.monitors[mh.GetId()].NotificationGroupID + ss.servicesLock.RLock() + if ss.services[mh.GetId()].LatencyNotify { + notificationGroupID := ss.services[mh.GetId()].NotificationGroupID minMuteLabel := NotificationMuteLabel.ServiceLatencyMin(mh.GetId()) maxMuteLabel := NotificationMuteLabel.ServiceLatencyMax(mh.GetId()) - if mh.Delay > ss.monitors[mh.GetId()].MaxLatency { + if mh.Delay > ss.services[mh.GetId()].MaxLatency { // 延迟超过最大值 ServerLock.RLock() reporterServer := ServerList[r.Reporter] - msg := fmt.Sprintf("[Latency] %s %2f > %2f, Reporter: %s", ss.monitors[mh.GetId()].Name, mh.Delay, ss.monitors[mh.GetId()].MaxLatency, reporterServer.Name) + msg := fmt.Sprintf("[Latency] %s %2f > %2f, Reporter: %s", ss.services[mh.GetId()].Name, mh.Delay, ss.services[mh.GetId()].MaxLatency, reporterServer.Name) go SendNotification(notificationGroupID, msg, minMuteLabel) ServerLock.RUnlock() - } else if mh.Delay < ss.monitors[mh.GetId()].MinLatency { + } else if mh.Delay < ss.services[mh.GetId()].MinLatency { // 延迟低于最小值 ServerLock.RLock() reporterServer := ServerList[r.Reporter] - msg := fmt.Sprintf("[Latency] %s %2f < %2f, Reporter: %s", ss.monitors[mh.GetId()].Name, mh.Delay, ss.monitors[mh.GetId()].MinLatency, reporterServer.Name) + msg := fmt.Sprintf("[Latency] %s %2f < %2f, Reporter: %s", ss.services[mh.GetId()].Name, mh.Delay, ss.services[mh.GetId()].MinLatency, reporterServer.Name) go SendNotification(notificationGroupID, msg, maxMuteLabel) ServerLock.RUnlock() } else { @@ -452,24 +452,24 @@ func (ss *ServiceSentinel) worker() { UnMuteNotification(notificationGroupID, maxMuteLabel) } } - ss.monitorsLock.RUnlock() + ss.servicesLock.RUnlock() } // 状态变更报警+触发任务执行 if stateCode == StatusDown || stateCode != ss.lastStatus[mh.GetId()] { - ss.monitorsLock.Lock() + ss.servicesLock.Lock() lastStatus := ss.lastStatus[mh.GetId()] // 存储新的状态值 ss.lastStatus[mh.GetId()] = stateCode // 判断是否需要发送通知 - isNeedSendNotification := ss.monitors[mh.GetId()].Notify && (lastStatus != 0 || stateCode == StatusDown) + isNeedSendNotification := ss.services[mh.GetId()].Notify && (lastStatus != 0 || stateCode == StatusDown) if isNeedSendNotification { ServerLock.RLock() reporterServer := ServerList[r.Reporter] - notificationGroupID := ss.monitors[mh.GetId()].NotificationGroupID - notificationMsg := fmt.Sprintf("[%s] %s Reporter: %s, Error: %s", StatusCodeToString(stateCode), ss.monitors[mh.GetId()].Name, reporterServer.Name, mh.Data) + notificationGroupID := ss.services[mh.GetId()].NotificationGroupID + notificationMsg := fmt.Sprintf("[%s] %s Reporter: %s, Error: %s", StatusCodeToString(stateCode), ss.services[mh.GetId()].Name, reporterServer.Name, mh.Data) muteLabel := NotificationMuteLabel.ServiceStateChanged(mh.GetId()) // 状态变更时,清除静音缓存 @@ -482,7 +482,7 @@ func (ss *ServiceSentinel) worker() { } // 判断是否需要触发任务 - isNeedTriggerTask := ss.monitors[mh.GetId()].EnableTriggerTask && lastStatus != 0 + isNeedTriggerTask := ss.services[mh.GetId()].EnableTriggerTask && lastStatus != 0 if isNeedTriggerTask { ServerLock.RLock() reporterServer := ServerList[r.Reporter] @@ -490,14 +490,14 @@ func (ss *ServiceSentinel) worker() { if stateCode == StatusGood && lastStatus != stateCode { // 当前状态正常 前序状态非正常时 触发恢复任务 - go SendTriggerTasks(ss.monitors[mh.GetId()].RecoverTriggerTasks, reporterServer.ID) + go SendTriggerTasks(ss.services[mh.GetId()].RecoverTriggerTasks, reporterServer.ID) } else if lastStatus == StatusGood && lastStatus != stateCode { // 前序状态正常 当前状态非正常时 触发失败任务 - go SendTriggerTasks(ss.monitors[mh.GetId()].FailTriggerTasks, reporterServer.ID) + go SendTriggerTasks(ss.services[mh.GetId()].FailTriggerTasks, reporterServer.ID) } } - ss.monitorsLock.Unlock() + ss.servicesLock.Unlock() } ss.serviceResponseDataStoreLock.Unlock() @@ -509,22 +509,22 @@ func (ss *ServiceSentinel) worker() { !strings.HasSuffix(mh.Data, "EOF") && !strings.HasSuffix(mh.Data, "timed out") { errMsg = mh.Data - ss.monitorsLock.RLock() - if ss.monitors[mh.GetId()].Notify { + ss.servicesLock.RLock() + if ss.services[mh.GetId()].Notify { muteLabel := NotificationMuteLabel.ServiceSSL(mh.GetId(), "network") - go SendNotification(ss.monitors[mh.GetId()].NotificationGroupID, fmt.Sprintf("[SSL] Fetch cert info failed, %s %s", ss.monitors[mh.GetId()].Name, errMsg), muteLabel) + go SendNotification(ss.services[mh.GetId()].NotificationGroupID, fmt.Sprintf("[SSL] Fetch cert info failed, %s %s", ss.services[mh.GetId()].Name, errMsg), muteLabel) } - ss.monitorsLock.RUnlock() + ss.servicesLock.RUnlock() } } else { // 清除网络错误静音缓存 - UnMuteNotification(ss.monitors[mh.GetId()].NotificationGroupID, NotificationMuteLabel.ServiceSSL(mh.GetId(), "network")) + UnMuteNotification(ss.services[mh.GetId()].NotificationGroupID, NotificationMuteLabel.ServiceSSL(mh.GetId(), "network")) var newCert = strings.Split(mh.Data, "|") if len(newCert) > 1 { - ss.monitorsLock.Lock() - enableNotify := ss.monitors[mh.GetId()].Notify + ss.servicesLock.Lock() + enableNotify := ss.services[mh.GetId()].Notify // 首次获取证书信息时,缓存证书信息 if ss.sslCertCache[mh.GetId()] == "" { @@ -542,9 +542,9 @@ func (ss *ServiceSentinel) worker() { ss.sslCertCache[mh.GetId()] = mh.Data } - notificationGroupID := ss.monitors[mh.GetId()].NotificationGroupID - serviceName := ss.monitors[mh.GetId()].Name - ss.monitorsLock.Unlock() + notificationGroupID := ss.services[mh.GetId()].NotificationGroupID + serviceName := ss.services[mh.GetId()].Name + ss.servicesLock.Unlock() // 需要发送提醒 if enableNotify { diff --git a/service/singleton/singleton.go b/service/singleton/singleton.go index 46c2717..19cd3e5 100644 --- a/service/singleton/singleton.go +++ b/service/singleton/singleton.go @@ -36,7 +36,6 @@ func LoadSingleton() { loadNotifications() // 加载通知服务 loadServers() // 加载服务器列表 loadCronTasks() // 加载定时任务 - loadAPI() initNAT() initDDNS() } @@ -63,8 +62,8 @@ func InitDBFromPath(path string) { DB = DB.Debug() } err = DB.AutoMigrate(model.Server{}, model.User{}, - model.Notification{}, model.AlertRule{}, model.Monitor{}, - model.MonitorHistory{}, model.Cron{}, model.Transfer{}, + model.Notification{}, model.AlertRule{}, model.Service{}, + model.ServiceHistory{}, model.Cron{}, model.Transfer{}, model.ApiToken{}, model.NAT{}, model.DDNSProfile{}, model.NotificationGroupNotification{}) if err != nil { panic(err) @@ -98,14 +97,14 @@ func RecordTransferHourlyUsage() { log.Println("NEZHA>> Cron 流量统计入库", len(txs), DB.Create(txs).Error) } -// CleanMonitorHistory 清理无效或过时的 监控记录 和 流量记录 -func CleanMonitorHistory() { +// CleanServiceHistory 清理无效或过时的 监控记录 和 流量记录 +func CleanServiceHistory() { // 清理已被删除的服务器的监控记录与流量记录 - DB.Unscoped().Delete(&model.MonitorHistory{}, "created_at < ? OR monitor_id NOT IN (SELECT `id` FROM monitors)", time.Now().AddDate(0, 0, -30)) + DB.Unscoped().Delete(&model.ServiceHistory{}, "created_at < ? OR service_id NOT IN (SELECT `id` FROM services)", time.Now().AddDate(0, 0, -30)) // 由于网络监控记录的数据较多,并且前端仅使用了 1 天的数据 // 考虑到 sqlite 数据量问题,仅保留一天数据, // server_id = 0 的数据会用于/service页面的可用性展示 - DB.Unscoped().Delete(&model.MonitorHistory{}, "(created_at < ? AND server_id != 0) OR monitor_id NOT IN (SELECT `id` FROM monitors)", time.Now().AddDate(0, 0, -1)) + DB.Unscoped().Delete(&model.ServiceHistory{}, "(created_at < ? AND server_id != 0) OR service_id NOT IN (SELECT `id` FROM services)", time.Now().AddDate(0, 0, -1)) DB.Unscoped().Delete(&model.Transfer{}, "server_id NOT IN (SELECT `id` FROM servers)") // 计算可清理流量记录的时长 var allServerKeep time.Time