diff --git a/README.md b/README.md index f4d90d2..a0d34b6 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@


-    +   

:trollface: 哪吒监控 一站式轻监控轻运维系统。支持系统状态、HTTP(SSL 证书变更、即将到期、到期)、TCP、Ping 监控报警,命令批量执行和计划任务。

@@ -43,7 +43,7 @@ _\* 使用 WatchTower 可以自动更新面板,Windows 终端可以使用 nssm
- 报警通知:CPU、内存、硬盘、带宽、流量实时监控。 + 报警通知:CPU、内存、硬盘、带宽、流量、 **月流量** 实时监控。 #### 灵活通知方式 @@ -96,15 +96,34 @@ URL 里面也可放置占位符,请求时会进行简单的字符串替换。 #### 报警规则说明 -- Type - - cpu、memory、swap、disk:Min/Max 数值为占用百分比 - - net_in_speed(入站网速)、net_out_speed(出站网速)、net_all_speed(双向网速)、transfer_in(入站流量)、transfer_out(出站流量)、transfer_all(双向流量):Min/Max 数值为字节(1kb=1024,1mb = 1024\*1024) - - offline:不支持 Min/Max 参数 -- Duration:持续秒数,监控比较简陋,取持续时间内的 70% 采样结果 -- Cover `[{"Type":"offline","Duration":10, "Cover":0, "Ignore":{"5": true}}]` - - `0` 监控所有,通过 `Ignore` 忽略特定服务器 - - `1` 忽略所有,通过 `Ignore` 监控特定服务器 -- Ignore: `{"1": true, "2":false}` 特定服务器,搭配 `Cover` 使用 +##### 基本规则 + +- type + - cpu、memory、swap、disk + - net_in_speed(入站网速)、net_out_speed(出站网速)、net_all_speed(双向网速)、transfer_in(入站流量)、transfer_out(出站流量)、transfer_all(双向流量) + - offline +- duration:持续秒数,秒数内采样记录 30% 以上触发阈值才会报警(防数据插针) +- min/max + - 流量、网速类数值 为字节(1kb=1024,1mb = 1024\*1024) + - 内存、硬盘、CPU 为占用百分比 + - 离线监控无需设置 +- cover `[{"type":"offline","duration":10, "cover":0, "ignore":{"5": true}}]` + - `0` 监控所有,通过 `ignore` 忽略特定服务器 + - `1` 忽略所有,通过 `ignore` 监控特定服务器 +- ignore: `{"1": true, "2":false}` 特定服务器,搭配 `cover` 使用 + +##### 特殊:任意周期流量报警 + +可以用作月流量报警 + +- type + - transfer_in_cycle 周期内的入站流量 + - transfer_out_cycle 周期内的出站流量 + - transfer_all_cycle 周期内双向流量和 +- cycle_start 周期开始日期(可以是你机器计费周期的开始日期) +- cycle_interval 小时(可以设为1月,30*24) +- min/max、cover、ignore 参考基本规则配置 +- 示例: 每月15号计费的月流量1T报警 `[{"type":"transfer_all_cycle","max":1000000000,"cycle_start":"2021-07-15T08:00:00Z","cycle_interval":720}]`
diff --git a/cmd/dashboard/controller/controller.go b/cmd/dashboard/controller/controller.go index 3d2a811..9ffda7f 100644 --- a/cmd/dashboard/controller/controller.go +++ b/cmd/dashboard/controller/controller.go @@ -3,6 +3,7 @@ package controller import ( "fmt" "html/template" + "net/http" "strings" "time" @@ -14,7 +15,7 @@ import ( "github.com/naiba/nezha/service/dao" ) -func ServeWeb(port uint) { +func ServeWeb(port uint) *http.Server { gin.SetMode(gin.ReleaseMode) r := gin.Default() if dao.Conf.Debug { @@ -112,7 +113,11 @@ func ServeWeb(port uint) { r.Static("/static", "resource/static") r.LoadHTMLGlob("resource/template/**/*") routers(r) - r.Run(fmt.Sprintf(":%d", port)) + srv := &http.Server{ + Addr: fmt.Sprintf(":%d", port), + Handler: r, + } + return srv } func routers(r *gin.Engine) { diff --git a/cmd/dashboard/controller/member_api.go b/cmd/dashboard/controller/member_api.go index a2492e1..e0be4f9 100644 --- a/cmd/dashboard/controller/member_api.go +++ b/cmd/dashboard/controller/member_api.go @@ -57,7 +57,7 @@ func (ma *memberAPI) delete(c *gin.Context) { var err error switch c.Param("model") { case "server": - err = dao.DB.Delete(&model.Server{}, "id = ?", id).Error + err = dao.DB.Unscoped().Delete(&model.Server{}, "id = ?", id).Error if err == nil { dao.ServerLock.Lock() delete(dao.SecretToID, dao.ServerList[id].Secret) @@ -66,18 +66,18 @@ func (ma *memberAPI) delete(c *gin.Context) { dao.ReSortServer() } case "notification": - err = dao.DB.Delete(&model.Notification{}, "id = ?", id).Error + err = dao.DB.Unscoped().Delete(&model.Notification{}, "id = ?", id).Error if err == nil { dao.OnDeleteNotification(id) } case "monitor": - err = dao.DB.Delete(&model.Monitor{}, "id = ?", id).Error + err = dao.DB.Unscoped().Delete(&model.Monitor{}, "id = ?", id).Error if err == nil { dao.ServiceSentinelShared.OnMonitorDelete(id) - err = dao.DB.Delete(&model.MonitorHistory{}, "monitor_id = ?", id).Error + err = dao.DB.Unscoped().Delete(&model.MonitorHistory{}, "monitor_id = ?", id).Error } case "cron": - err = dao.DB.Delete(&model.Cron{}, "id = ?", id).Error + err = dao.DB.Unscoped().Delete(&model.Cron{}, "id = ?", id).Error if err == nil { dao.CronLock.RLock() defer dao.CronLock.RUnlock() @@ -88,7 +88,7 @@ func (ma *memberAPI) delete(c *gin.Context) { delete(dao.Crons, id) } case "alert-rule": - err = dao.DB.Delete(&model.AlertRule{}, "id = ?", id).Error + err = dao.DB.Unscoped().Delete(&model.AlertRule{}, "id = ?", id).Error if err == nil { dao.OnDeleteAlert(id) } @@ -378,8 +378,8 @@ func (ma *memberAPI) addOrEditAlertRule(c *gin.Context) { err = errors.New("至少定义一条规则") } else { for i := 0; i < len(r.Rules); i++ { - if r.Rules[i].Duration < 3 { - err = errors.New("Duration 至少为 3") + if !r.Rules[i].IsTransferDurationRule() && r.Rules[i].Duration < 3 { + err = errors.New("错误:Duration 至少为 3") break } } diff --git a/cmd/dashboard/controller/oauth2.go b/cmd/dashboard/controller/oauth2.go index a57cb20..a318415 100644 --- a/cmd/dashboard/controller/oauth2.go +++ b/cmd/dashboard/controller/oauth2.go @@ -29,22 +29,24 @@ func (oa *oauth2controller) serve() { } func (oa *oauth2controller) getCommonOauth2Config(c *gin.Context) *oauth2.Config { - var endPoint oauth2.Endpoint if dao.Conf.Oauth2.Type == model.ConfigTypeGitee { - endPoint = oauth2.Endpoint{ - AuthURL: "https://gitee.com/oauth/authorize", - TokenURL: "https://gitee.com/oauth/token", + return &oauth2.Config{ + ClientID: dao.Conf.Oauth2.ClientID, + ClientSecret: dao.Conf.Oauth2.ClientSecret, + Scopes: []string{}, + Endpoint: oauth2.Endpoint{ + AuthURL: "https://gitee.com/oauth/authorize", + TokenURL: "https://gitee.com/oauth/token", + }, + RedirectURL: oa.getRedirectURL(c), } } else { - endPoint = GitHubOauth2.Endpoint - } - - return &oauth2.Config{ - ClientID: dao.Conf.Oauth2.ClientID, - ClientSecret: dao.Conf.Oauth2.ClientSecret, - Scopes: []string{}, - Endpoint: endPoint, - RedirectURL: oa.getRedirectURL(c), + return &oauth2.Config{ + ClientID: dao.Conf.Oauth2.ClientID, + ClientSecret: dao.Conf.Oauth2.ClientSecret, + Scopes: []string{}, + Endpoint: GitHubOauth2.Endpoint, + } } } diff --git a/cmd/dashboard/main.go b/cmd/dashboard/main.go index ce3ae2a..5f76afc 100644 --- a/cmd/dashboard/main.go +++ b/cmd/dashboard/main.go @@ -1,8 +1,11 @@ package main import ( + "context" + "log" "time" + "github.com/ory/graceful" "github.com/patrickmn/go-cache" "github.com/robfig/cron/v3" "gorm.io/driver/sqlite" @@ -31,7 +34,9 @@ func init() { if err != nil { panic(err) } - dao.DB, err = gorm.Open(sqlite.Open("data/sqlite.db"), &gorm.Config{}) + dao.DB, err = gorm.Open(sqlite.Open("data/sqlite.db"), &gorm.Config{ + CreateBatchSize: 200, + }) if err != nil { panic(err) } @@ -49,18 +54,73 @@ func init() { func initSystem() { dao.DB.AutoMigrate(model.Server{}, model.User{}, model.Notification{}, model.AlertRule{}, model.Monitor{}, - model.MonitorHistory{}, model.Cron{}) + model.MonitorHistory{}, model.Cron{}, model.Transfer{}) dao.NewServiceSentinel() loadServers() //加载服务器列表 loadCrons() //加载计划任务 - // 清理旧数据 - dao.Cron.AddFunc("* 3 * * *", cleanMonitorHistory) + // 清理 服务请求记录 和 流量记录 的旧数据 + dao.Cron.AddFunc("0 20 3 * * *", cleanMonitorHistory) + // 流量记录打点 + dao.Cron.AddFunc("0 0 * * * *", recordTransferHourlyUsage) +} + +func recordTransferHourlyUsage() { + dao.ServerLock.Lock() + defer dao.ServerLock.Unlock() + now := time.Now() + nowTrimSeconds := time.Date(now.Year(), now.Month(), now.Day(), now.Hour(), 0, 0, 0, time.Local) + var txs []model.Transfer + for id, server := range dao.ServerList { + tx := model.Transfer{ + ServerID: id, + In: server.State.NetInTransfer - uint64(server.PrevHourlyTransferIn), + Out: server.State.NetInTransfer - uint64(server.PrevHourlyTransferIn), + } + server.PrevHourlyTransferIn = int64(server.State.NetInTransfer) + server.PrevHourlyTransferOut = int64(server.State.NetOutTransfer) + tx.CreatedAt = nowTrimSeconds + txs = append(txs, tx) + } + dao.DB.Create(txs) } func cleanMonitorHistory() { - dao.DB.Delete(&model.MonitorHistory{}, "created_at < ?", time.Now().AddDate(0, 0, -30)) + dao.DB.Unscoped().Delete(&model.MonitorHistory{}, "created_at < ?", time.Now().AddDate(0, 0, -30)) + var allServerKeep time.Time + specialServerKeep := make(map[uint64]time.Time) + var specialServerIDs []uint64 + var alerts []model.AlertRule + dao.DB.Find(&alerts) + for i := 0; i < len(alerts); i++ { + for j := 0; j < len(alerts[i].Rules); j++ { + // 是不是流量记录规则 + if !alerts[i].Rules[j].IsTransferDurationRule() { + continue + } + dataCouldRemoveBefore := alerts[i].Rules[j].GetTransferDurationStart() + // 判断规则影响的机器范围 + if alerts[i].Rules[j].Cover == model.RuleCoverAll { + // 更新全局可以清理的数据点 + if allServerKeep.IsZero() || allServerKeep.After(dataCouldRemoveBefore) { + allServerKeep = dataCouldRemoveBefore + } + } else { + // 更新特定机器可以清理数据点 + for id := range alerts[i].Rules[j].Ignore { + if specialServerKeep[id].IsZero() || specialServerKeep[id].After(dataCouldRemoveBefore) { + specialServerKeep[id] = dataCouldRemoveBefore + specialServerIDs = append(specialServerIDs, id) + } + } + } + } + } + for id, couldRemove := range specialServerKeep { + dao.DB.Unscoped().Delete(&model.Transfer{}, "id = ? AND created_at < ?", id, couldRemove) + } + dao.DB.Unscoped().Delete(&model.Transfer{}, "id NOT IN (?) AND created_at < ?", specialServerIDs, allServerKeep) } func loadServers() { @@ -98,8 +158,33 @@ func loadCrons() { } func main() { - go controller.ServeWeb(dao.Conf.HTTPPort) go rpc.ServeRPC(dao.Conf.GRPCPort) go rpc.DispatchTask(time.Second * 30) - dao.AlertSentinelStart() + go dao.AlertSentinelStart() + srv := controller.ServeWeb(dao.Conf.HTTPPort) + graceful.Graceful(func() error { + return srv.ListenAndServe() + }, func(c context.Context) error { + dao.ServerLock.Lock() + defer dao.ServerLock.Unlock() + var txs []model.Transfer + for _, s := range dao.ServerList { + in := s.State.NetInTransfer - uint64(s.PrevHourlyTransferIn) + out := s.State.NetOutTransfer - uint64(s.PrevHourlyTransferOut) + if in > 0 && out > 0 { + tx := model.Transfer{ + ServerID: s.ID, + In: in, + Out: out, + } + tx.CreatedAt = time.Now() + txs = append(txs, tx) + } + } + if err := dao.DB.Create(txs).Error; err != nil { + log.Println("流量统计入库", err) + } + srv.Shutdown(c) + return nil + }) } diff --git a/go.mod b/go.mod index 07d9896..cc7848b 100644 --- a/go.mod +++ b/go.mod @@ -15,6 +15,7 @@ require ( github.com/gorilla/websocket v1.4.2 github.com/onsi/ginkgo v1.7.0 // indirect github.com/onsi/gomega v1.4.3 // indirect + github.com/ory/graceful v0.1.1 github.com/p14yground/go-github-selfupdate v0.0.0-20210520015421-eddf14461293 github.com/patrickmn/go-cache v2.1.0+incompatible github.com/robfig/cron/v3 v3.0.1 diff --git a/go.sum b/go.sum index a785187..b23d3d6 100644 --- a/go.sum +++ b/go.sum @@ -251,6 +251,8 @@ github.com/onsi/ginkgo v1.7.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+W github.com/onsi/gomega v1.4.2/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY= github.com/onsi/gomega v1.4.3 h1:RE1xgDvH7imwFD45h+u2SgIfERHlS2yNG4DObb5BSKU= github.com/onsi/gomega v1.4.3/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY= +github.com/ory/graceful v0.1.1 h1:zx+8tDObLPrG+7Tc8jKYlXsqWnLtOQA1IZ/FAAKHMXU= +github.com/ory/graceful v0.1.1/go.mod h1:zqu70l95WrKHF4AZ6tXHvAqAvpY6M7g6ttaAVcMm7KU= github.com/p14yground/go-github-selfupdate v0.0.0-20210520015421-eddf14461293 h1:a+cgfu5wXK/NIhR2FWqpaUYl0XloncvajO8SDvBu4UQ= github.com/p14yground/go-github-selfupdate v0.0.0-20210520015421-eddf14461293/go.mod h1:unC1eanNdTd2fUViujtg9cDR/jNnYNvGmFYjIXz1hK4= github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= @@ -259,6 +261,7 @@ github.com/patrickmn/go-cache v2.1.0+incompatible/go.mod h1:3Qf8kWWT7OJRJbdiICTK github.com/pelletier/go-toml v1.2.0 h1:T5zMGML61Wp+FlcbWjRDT7yAxhJNAiPPLOFECq181zc= github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic= github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pkg/errors v0.8.1 h1:iURUrRGxPUNPdy5/HRSm+Yj6okJ6UtLINN0Q9M4+h3I= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= diff --git a/model/alertrule.go b/model/alertrule.go index b752df6..053ad0c 100644 --- a/model/alertrule.go +++ b/model/alertrule.go @@ -27,10 +27,10 @@ func (r *AlertRule) AfterFind(tx *gorm.DB) error { return json.Unmarshal([]byte(r.RulesRaw), &r.Rules) } -func (r *AlertRule) Snapshot(server *Server) []interface{} { +func (r *AlertRule) Snapshot(server *Server, db *gorm.DB) []interface{} { var point []interface{} for i := 0; i < len(r.Rules); i++ { - point = append(point, r.Rules[i].Snapshot(server)) + point = append(point, r.Rules[i].Snapshot(server, db)) } return point } @@ -39,28 +39,39 @@ func (r *AlertRule) Check(points [][]interface{}) (int, bool) { var max int var count int for i := 0; i < len(r.Rules); i++ { - total := 0.0 - fail := 0.0 - num := int(r.Rules[i].Duration) - if num > max { - max = num - } - if len(points) < num { - continue - } - for j := len(points) - 1; j >= 0 && len(points)-num <= j; j-- { - total++ - if points[j][i] != nil { - fail++ + if r.Rules[i].IsTransferDurationRule() { + if max < 1 { + max = 1 + } + // 循环区间流量报警 + for j := len(points[i]) - 1; j >= 0; j-- { + if points[i][j] != nil { + count++ + break + } + } + } else { + // 常规报警 + total := 0.0 + fail := 0.0 + num := int(r.Rules[i].Duration) + if num > max { + max = num + } + if len(points) < num { + continue + } + for j := len(points) - 1; j >= 0 && len(points)-num <= j; j-- { + total++ + if points[j][i] != nil { + fail++ + } + } + if fail/total > 0.7 { + count++ + break } } - if fail/total > 0.7 { - count++ - break - } } - if count == len(r.Rules) { - return max, false - } - return max, true + return max, count != len(r.Rules) } diff --git a/model/common.go b/model/common.go index ef4736b..a585862 100644 --- a/model/common.go +++ b/model/common.go @@ -8,8 +8,8 @@ const CacheKeyOauth2State = "p:a:state" const CacheKeyServicePage = "p:c:service" type Common struct { - ID uint64 `gorm:"primary_key"` - CreatedAt time.Time + ID uint64 `gorm:"primary_key"` + CreatedAt time.Time `sql:"index"` UpdatedAt time.Time DeletedAt *time.Time `sql:"index"` } diff --git a/model/rule.go b/model/rule.go index 9731fae..4e332b3 100644 --- a/model/rule.go +++ b/model/rule.go @@ -1,21 +1,36 @@ package model -import "time" +import ( + "strings" + "time" + + "gorm.io/gorm" +) const ( RuleCoverAll = iota RuleCoverIgnoreAll ) +type NResult struct { + N uint64 +} + type Rule struct { // 指标类型,cpu、memory、swap、disk、net_in_speed、net_out_speed // net_all_speed、transfer_in、transfer_out、transfer_all、offline - Type string `json:"type,omitempty"` - Min uint64 `json:"min,omitempty"` // 最小阈值 (百分比、字节 kb ÷ 1024) - Max uint64 `json:"max,omitempty"` // 最大阈值 (百分比、字节 kb ÷ 1024) - Duration uint64 `json:"duration,omitempty"` // 持续时间 (秒) - Cover uint64 `json:"cover,omitempty"` // 覆盖范围 RuleCoverAll/IgnoreAll - Ignore map[uint64]bool `json:"ignore,omitempty"` // 覆盖范围的排除 + // transfer_in_cycle、transfer_out_cycle、transfer_all_cycle + Type string `json:"type,omitempty"` + Min uint64 `json:"min,omitempty"` // 最小阈值 (百分比、字节 kb ÷ 1024) + Max uint64 `json:"max,omitempty"` // 最大阈值 (百分比、字节 kb ÷ 1024) + CycleStart time.Time `json:"cycle_start,omitempty"` // 流量统计的开始时间 + CycleInterval uint64 `json:"cycle_interval,omitempty"` // 流量统计周期 + Duration uint64 `json:"duration,omitempty"` // 持续时间 (秒) + Cover uint64 `json:"cover,omitempty"` // 覆盖范围 RuleCoverAll/IgnoreAll + Ignore map[uint64]bool `json:"ignore,omitempty"` // 覆盖范围的排除 + + // 只作为缓存使用,记录下次该检测的时间 + NextTransferAt map[uint64]time.Time `json:"-"` } func percentage(used, total uint64) uint64 { @@ -26,7 +41,7 @@ func percentage(used, total uint64) uint64 { } // Snapshot 未通过规则返回 struct{}{}, 通过返回 nil -func (u *Rule) Snapshot(server *Server) interface{} { +func (u *Rule) Snapshot(server *Server, db *gorm.DB) interface{} { // 监控全部但是排除了此服务器 if u.Cover == RuleCoverAll && u.Ignore[server.ID] { return nil @@ -36,6 +51,11 @@ func (u *Rule) Snapshot(server *Server) interface{} { return nil } + // 循环区间流量检测 · 短期无需重复检测 + if u.IsTransferDurationRule() && u.NextTransferAt[server.ID].After(time.Now()) { + return nil + } + var src uint64 switch u.Type { @@ -65,6 +85,39 @@ func (u *Rule) Snapshot(server *Server) interface{} { } else { src = uint64(server.LastActive.Unix()) } + case "transfer_in_cycle": + src = server.State.NetInTransfer - uint64(server.PrevHourlyTransferIn) + if u.CycleInterval != 1 { + var res NResult + db.Model(&Transfer{}).Select("SUM('in') AS n").Where("created_at > ? AND server_id = ?", u.GetTransferDurationStart(), server.ID).Scan(&res) + src += res.N + } + case "transfer_out_cycle": + src = server.State.NetOutTransfer - uint64(server.PrevHourlyTransferOut) + if u.CycleInterval != 1 { + var res NResult + db.Model(&Transfer{}).Select("SUM('in') AS n").Where("created_at > ? AND server_id = ?", u.GetTransferDurationStart(), server.ID).Scan(&res) + src += res.N + } + case "transfer_all_cycle": + src = server.State.NetOutTransfer - uint64(server.PrevHourlyTransferOut) + server.State.NetInTransfer - uint64(server.PrevHourlyTransferIn) + if u.CycleInterval != 1 { + var res NResult + db.Model(&Transfer{}).Select("SUM('in'+'out') AS n").Where("created_at > ? AND server_id = ?", u.GetTransferDurationStart(), server.ID).Scan(&res) + src += res.N + } + } + + // 循环区间流量检测 · 更新下次需要检测时间 + if u.IsTransferDurationRule() { + seconds := 1800 * time.Duration(((u.Max - src) / u.Max)) + if seconds < 180 { + seconds = 180 + } + if u.NextTransferAt == nil { + u.NextTransferAt = make(map[uint64]time.Time) + } + u.NextTransferAt[server.ID] = time.Now().Add(time.Duration(time.Second * seconds)) } if u.Type == "offline" && uint64(time.Now().Unix())-src > 6 { @@ -75,3 +128,12 @@ func (u *Rule) Snapshot(server *Server) interface{} { return nil } + +func (rule Rule) IsTransferDurationRule() bool { + return strings.HasSuffix(rule.Type, "_cycle") +} + +func (rule Rule) GetTransferDurationStart() time.Time { + interval := 3600 * int64(rule.CycleInterval) + return time.Unix(rule.CycleStart.Unix()+(time.Now().Unix()-rule.CycleStart.Unix())/interval*interval, 0) +} diff --git a/model/server.go b/model/server.go index 9dc5bf9..ac2a2ab 100644 --- a/model/server.go +++ b/model/server.go @@ -23,6 +23,9 @@ type Server struct { TaskClose chan error `gorm:"-" json:"-"` TaskStream pb.NezhaService_RequestTaskServer `gorm:"-" json:"-"` + + PrevHourlyTransferIn int64 `gorm:"-" json:"-"` // 上次数据点时的入站使用量 + PrevHourlyTransferOut int64 `gorm:"-" json:"-"` // 上次数据点时的出站使用量 } func (s Server) Marshal() template.JS { diff --git a/model/transfer.go b/model/transfer.go new file mode 100644 index 0000000..0cbde6c --- /dev/null +++ b/model/transfer.go @@ -0,0 +1,8 @@ +package model + +type Transfer struct { + Common + ServerID uint64 + In uint64 + Out uint64 +} diff --git a/service/dao/alertsentinel.go b/service/dao/alertsentinel.go index d788166..2468931 100644 --- a/service/dao/alertsentinel.go +++ b/service/dao/alertsentinel.go @@ -109,7 +109,7 @@ func checkStatus() { for _, server := range ServerList { // 监测点 alertsStore[alert.ID][server.ID] = append(alertsStore[alert. - ID][server.ID], alert.Snapshot(server)) + ID][server.ID], alert.Snapshot(server, DB)) // 发送通知,分为触发报警和恢复通知 max, passed := alert.Check(alertsStore[alert.ID][server.ID]) if !passed { diff --git a/service/dao/dao.go b/service/dao/dao.go index 95cd279..d0dbada 100644 --- a/service/dao/dao.go +++ b/service/dao/dao.go @@ -13,7 +13,7 @@ import ( pb "github.com/naiba/nezha/proto" ) -var Version = "v0.8.6" // !!记得修改 README 中的 badge 版本!! +var Version = "v0.9.0" // !!记得修改 README 中的 badge 版本!! var ( Conf *model.Config diff --git a/service/dao/servicesentinel.go b/service/dao/servicesentinel.go index 7d4a39b..671b700 100644 --- a/service/dao/servicesentinel.go +++ b/service/dao/servicesentinel.go @@ -12,7 +12,7 @@ import ( pb "github.com/naiba/nezha/proto" ) -const _CurrentStatusSize = 30 // 统计 5 分钟内的数据为当前状态 +const _CurrentStatusSize = 30 // 统计 15 分钟内的数据为当前状态 var ServiceSentinelShared *ServiceSentinel diff --git a/service/rpc/nezha.go b/service/rpc/nezha.go index ce4eef4..7308b30 100644 --- a/service/rpc/nezha.go +++ b/service/rpc/nezha.go @@ -73,7 +73,21 @@ func (s *NezhaHandler) ReportSystemState(c context.Context, r *pb.State) (*pb.Re dao.ServerLock.RLock() defer dao.ServerLock.RUnlock() dao.ServerList[clientID].LastActive = time.Now() + + // 判断是否是机器重启,如果是机器重启要录入最后记录的流量里面 + if state.Uptime < dao.ServerList[clientID].State.Uptime { + dao.ServerList[clientID].PrevHourlyTransferIn = dao.ServerList[clientID].PrevHourlyTransferIn - int64(dao.ServerList[clientID].State.NetInTransfer) + dao.ServerList[clientID].PrevHourlyTransferOut = dao.ServerList[clientID].PrevHourlyTransferOut - int64(dao.ServerList[clientID].State.NetOutTransfer) + } + dao.ServerList[clientID].State = &state + + // 如果从未记录过,先打点,等到小时时间点时入库 + if dao.ServerList[clientID].PrevHourlyTransferIn == 0 || dao.ServerList[clientID].PrevHourlyTransferOut == 0 { + dao.ServerList[clientID].PrevHourlyTransferIn = int64(state.NetInTransfer) + dao.ServerList[clientID].PrevHourlyTransferOut = int64(state.NetOutTransfer) + } + return &pb.Receipt{Proced: true}, nil }