diff --git a/README.md b/README.md
index f4d90d2..a0d34b6 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
-
+
:trollface: 哪吒监控 一站式轻监控轻运维系统。支持系统状态、HTTP(SSL 证书变更、即将到期、到期)、TCP、Ping 监控报警,命令批量执行和计划任务。
@@ -43,7 +43,7 @@ _\* 使用 WatchTower 可以自动更新面板,Windows 终端可以使用 nssm
- 报警通知:CPU、内存、硬盘、带宽、流量实时监控。
+ 报警通知:CPU、内存、硬盘、带宽、流量、 **月流量** 实时监控。
#### 灵活通知方式
@@ -96,15 +96,34 @@ URL 里面也可放置占位符,请求时会进行简单的字符串替换。
#### 报警规则说明
-- Type
- - cpu、memory、swap、disk:Min/Max 数值为占用百分比
- - net_in_speed(入站网速)、net_out_speed(出站网速)、net_all_speed(双向网速)、transfer_in(入站流量)、transfer_out(出站流量)、transfer_all(双向流量):Min/Max 数值为字节(1kb=1024,1mb = 1024\*1024)
- - offline:不支持 Min/Max 参数
-- Duration:持续秒数,监控比较简陋,取持续时间内的 70% 采样结果
-- Cover `[{"Type":"offline","Duration":10, "Cover":0, "Ignore":{"5": true}}]`
- - `0` 监控所有,通过 `Ignore` 忽略特定服务器
- - `1` 忽略所有,通过 `Ignore` 监控特定服务器
-- Ignore: `{"1": true, "2":false}` 特定服务器,搭配 `Cover` 使用
+##### 基本规则
+
+- type
+ - cpu、memory、swap、disk
+ - net_in_speed(入站网速)、net_out_speed(出站网速)、net_all_speed(双向网速)、transfer_in(入站流量)、transfer_out(出站流量)、transfer_all(双向流量)
+ - offline
+- duration:持续秒数,秒数内采样记录 30% 以上触发阈值才会报警(防数据插针)
+- min/max
+ - 流量、网速类数值 为字节(1kb=1024,1mb = 1024\*1024)
+ - 内存、硬盘、CPU 为占用百分比
+ - 离线监控无需设置
+- cover `[{"type":"offline","duration":10, "cover":0, "ignore":{"5": true}}]`
+ - `0` 监控所有,通过 `ignore` 忽略特定服务器
+ - `1` 忽略所有,通过 `ignore` 监控特定服务器
+- ignore: `{"1": true, "2":false}` 特定服务器,搭配 `cover` 使用
+
+##### 特殊:任意周期流量报警
+
+可以用作月流量报警
+
+- type
+ - transfer_in_cycle 周期内的入站流量
+ - transfer_out_cycle 周期内的出站流量
+ - transfer_all_cycle 周期内双向流量和
+- cycle_start 周期开始日期(可以是你机器计费周期的开始日期)
+- cycle_interval 小时(可以设为1月,30*24)
+- min/max、cover、ignore 参考基本规则配置
+- 示例: 每月15号计费的月流量1T报警 `[{"type":"transfer_all_cycle","max":1000000000,"cycle_start":"2021-07-15T08:00:00Z","cycle_interval":720}]`
diff --git a/cmd/dashboard/controller/controller.go b/cmd/dashboard/controller/controller.go
index 3d2a811..9ffda7f 100644
--- a/cmd/dashboard/controller/controller.go
+++ b/cmd/dashboard/controller/controller.go
@@ -3,6 +3,7 @@ package controller
import (
"fmt"
"html/template"
+ "net/http"
"strings"
"time"
@@ -14,7 +15,7 @@ import (
"github.com/naiba/nezha/service/dao"
)
-func ServeWeb(port uint) {
+func ServeWeb(port uint) *http.Server {
gin.SetMode(gin.ReleaseMode)
r := gin.Default()
if dao.Conf.Debug {
@@ -112,7 +113,11 @@ func ServeWeb(port uint) {
r.Static("/static", "resource/static")
r.LoadHTMLGlob("resource/template/**/*")
routers(r)
- r.Run(fmt.Sprintf(":%d", port))
+ srv := &http.Server{
+ Addr: fmt.Sprintf(":%d", port),
+ Handler: r,
+ }
+ return srv
}
func routers(r *gin.Engine) {
diff --git a/cmd/dashboard/controller/member_api.go b/cmd/dashboard/controller/member_api.go
index a2492e1..e0be4f9 100644
--- a/cmd/dashboard/controller/member_api.go
+++ b/cmd/dashboard/controller/member_api.go
@@ -57,7 +57,7 @@ func (ma *memberAPI) delete(c *gin.Context) {
var err error
switch c.Param("model") {
case "server":
- err = dao.DB.Delete(&model.Server{}, "id = ?", id).Error
+ err = dao.DB.Unscoped().Delete(&model.Server{}, "id = ?", id).Error
if err == nil {
dao.ServerLock.Lock()
delete(dao.SecretToID, dao.ServerList[id].Secret)
@@ -66,18 +66,18 @@ func (ma *memberAPI) delete(c *gin.Context) {
dao.ReSortServer()
}
case "notification":
- err = dao.DB.Delete(&model.Notification{}, "id = ?", id).Error
+ err = dao.DB.Unscoped().Delete(&model.Notification{}, "id = ?", id).Error
if err == nil {
dao.OnDeleteNotification(id)
}
case "monitor":
- err = dao.DB.Delete(&model.Monitor{}, "id = ?", id).Error
+ err = dao.DB.Unscoped().Delete(&model.Monitor{}, "id = ?", id).Error
if err == nil {
dao.ServiceSentinelShared.OnMonitorDelete(id)
- err = dao.DB.Delete(&model.MonitorHistory{}, "monitor_id = ?", id).Error
+ err = dao.DB.Unscoped().Delete(&model.MonitorHistory{}, "monitor_id = ?", id).Error
}
case "cron":
- err = dao.DB.Delete(&model.Cron{}, "id = ?", id).Error
+ err = dao.DB.Unscoped().Delete(&model.Cron{}, "id = ?", id).Error
if err == nil {
dao.CronLock.RLock()
defer dao.CronLock.RUnlock()
@@ -88,7 +88,7 @@ func (ma *memberAPI) delete(c *gin.Context) {
delete(dao.Crons, id)
}
case "alert-rule":
- err = dao.DB.Delete(&model.AlertRule{}, "id = ?", id).Error
+ err = dao.DB.Unscoped().Delete(&model.AlertRule{}, "id = ?", id).Error
if err == nil {
dao.OnDeleteAlert(id)
}
@@ -378,8 +378,8 @@ func (ma *memberAPI) addOrEditAlertRule(c *gin.Context) {
err = errors.New("至少定义一条规则")
} else {
for i := 0; i < len(r.Rules); i++ {
- if r.Rules[i].Duration < 3 {
- err = errors.New("Duration 至少为 3")
+ if !r.Rules[i].IsTransferDurationRule() && r.Rules[i].Duration < 3 {
+ err = errors.New("错误:Duration 至少为 3")
break
}
}
diff --git a/cmd/dashboard/controller/oauth2.go b/cmd/dashboard/controller/oauth2.go
index a57cb20..a318415 100644
--- a/cmd/dashboard/controller/oauth2.go
+++ b/cmd/dashboard/controller/oauth2.go
@@ -29,22 +29,24 @@ func (oa *oauth2controller) serve() {
}
func (oa *oauth2controller) getCommonOauth2Config(c *gin.Context) *oauth2.Config {
- var endPoint oauth2.Endpoint
if dao.Conf.Oauth2.Type == model.ConfigTypeGitee {
- endPoint = oauth2.Endpoint{
- AuthURL: "https://gitee.com/oauth/authorize",
- TokenURL: "https://gitee.com/oauth/token",
+ return &oauth2.Config{
+ ClientID: dao.Conf.Oauth2.ClientID,
+ ClientSecret: dao.Conf.Oauth2.ClientSecret,
+ Scopes: []string{},
+ Endpoint: oauth2.Endpoint{
+ AuthURL: "https://gitee.com/oauth/authorize",
+ TokenURL: "https://gitee.com/oauth/token",
+ },
+ RedirectURL: oa.getRedirectURL(c),
}
} else {
- endPoint = GitHubOauth2.Endpoint
- }
-
- return &oauth2.Config{
- ClientID: dao.Conf.Oauth2.ClientID,
- ClientSecret: dao.Conf.Oauth2.ClientSecret,
- Scopes: []string{},
- Endpoint: endPoint,
- RedirectURL: oa.getRedirectURL(c),
+ return &oauth2.Config{
+ ClientID: dao.Conf.Oauth2.ClientID,
+ ClientSecret: dao.Conf.Oauth2.ClientSecret,
+ Scopes: []string{},
+ Endpoint: GitHubOauth2.Endpoint,
+ }
}
}
diff --git a/cmd/dashboard/main.go b/cmd/dashboard/main.go
index ce3ae2a..5f76afc 100644
--- a/cmd/dashboard/main.go
+++ b/cmd/dashboard/main.go
@@ -1,8 +1,11 @@
package main
import (
+ "context"
+ "log"
"time"
+ "github.com/ory/graceful"
"github.com/patrickmn/go-cache"
"github.com/robfig/cron/v3"
"gorm.io/driver/sqlite"
@@ -31,7 +34,9 @@ func init() {
if err != nil {
panic(err)
}
- dao.DB, err = gorm.Open(sqlite.Open("data/sqlite.db"), &gorm.Config{})
+ dao.DB, err = gorm.Open(sqlite.Open("data/sqlite.db"), &gorm.Config{
+ CreateBatchSize: 200,
+ })
if err != nil {
panic(err)
}
@@ -49,18 +54,73 @@ func init() {
func initSystem() {
dao.DB.AutoMigrate(model.Server{}, model.User{},
model.Notification{}, model.AlertRule{}, model.Monitor{},
- model.MonitorHistory{}, model.Cron{})
+ model.MonitorHistory{}, model.Cron{}, model.Transfer{})
dao.NewServiceSentinel()
loadServers() //加载服务器列表
loadCrons() //加载计划任务
- // 清理旧数据
- dao.Cron.AddFunc("* 3 * * *", cleanMonitorHistory)
+ // 清理 服务请求记录 和 流量记录 的旧数据
+ dao.Cron.AddFunc("0 20 3 * * *", cleanMonitorHistory)
+ // 流量记录打点
+ dao.Cron.AddFunc("0 0 * * * *", recordTransferHourlyUsage)
+}
+
+func recordTransferHourlyUsage() {
+ dao.ServerLock.Lock()
+ defer dao.ServerLock.Unlock()
+ now := time.Now()
+ nowTrimSeconds := time.Date(now.Year(), now.Month(), now.Day(), now.Hour(), 0, 0, 0, time.Local)
+ var txs []model.Transfer
+ for id, server := range dao.ServerList {
+ tx := model.Transfer{
+ ServerID: id,
+ In: server.State.NetInTransfer - uint64(server.PrevHourlyTransferIn),
+ Out: server.State.NetInTransfer - uint64(server.PrevHourlyTransferIn),
+ }
+ server.PrevHourlyTransferIn = int64(server.State.NetInTransfer)
+ server.PrevHourlyTransferOut = int64(server.State.NetOutTransfer)
+ tx.CreatedAt = nowTrimSeconds
+ txs = append(txs, tx)
+ }
+ dao.DB.Create(txs)
}
func cleanMonitorHistory() {
- dao.DB.Delete(&model.MonitorHistory{}, "created_at < ?", time.Now().AddDate(0, 0, -30))
+ dao.DB.Unscoped().Delete(&model.MonitorHistory{}, "created_at < ?", time.Now().AddDate(0, 0, -30))
+ var allServerKeep time.Time
+ specialServerKeep := make(map[uint64]time.Time)
+ var specialServerIDs []uint64
+ var alerts []model.AlertRule
+ dao.DB.Find(&alerts)
+ for i := 0; i < len(alerts); i++ {
+ for j := 0; j < len(alerts[i].Rules); j++ {
+ // 是不是流量记录规则
+ if !alerts[i].Rules[j].IsTransferDurationRule() {
+ continue
+ }
+ dataCouldRemoveBefore := alerts[i].Rules[j].GetTransferDurationStart()
+ // 判断规则影响的机器范围
+ if alerts[i].Rules[j].Cover == model.RuleCoverAll {
+ // 更新全局可以清理的数据点
+ if allServerKeep.IsZero() || allServerKeep.After(dataCouldRemoveBefore) {
+ allServerKeep = dataCouldRemoveBefore
+ }
+ } else {
+ // 更新特定机器可以清理数据点
+ for id := range alerts[i].Rules[j].Ignore {
+ if specialServerKeep[id].IsZero() || specialServerKeep[id].After(dataCouldRemoveBefore) {
+ specialServerKeep[id] = dataCouldRemoveBefore
+ specialServerIDs = append(specialServerIDs, id)
+ }
+ }
+ }
+ }
+ }
+ for id, couldRemove := range specialServerKeep {
+ dao.DB.Unscoped().Delete(&model.Transfer{}, "id = ? AND created_at < ?", id, couldRemove)
+ }
+ dao.DB.Unscoped().Delete(&model.Transfer{}, "id NOT IN (?) AND created_at < ?", specialServerIDs, allServerKeep)
}
func loadServers() {
@@ -98,8 +158,33 @@ func loadCrons() {
}
func main() {
- go controller.ServeWeb(dao.Conf.HTTPPort)
go rpc.ServeRPC(dao.Conf.GRPCPort)
go rpc.DispatchTask(time.Second * 30)
- dao.AlertSentinelStart()
+ go dao.AlertSentinelStart()
+ srv := controller.ServeWeb(dao.Conf.HTTPPort)
+ graceful.Graceful(func() error {
+ return srv.ListenAndServe()
+ }, func(c context.Context) error {
+ dao.ServerLock.Lock()
+ defer dao.ServerLock.Unlock()
+ var txs []model.Transfer
+ for _, s := range dao.ServerList {
+ in := s.State.NetInTransfer - uint64(s.PrevHourlyTransferIn)
+ out := s.State.NetOutTransfer - uint64(s.PrevHourlyTransferOut)
+ if in > 0 && out > 0 {
+ tx := model.Transfer{
+ ServerID: s.ID,
+ In: in,
+ Out: out,
+ }
+ tx.CreatedAt = time.Now()
+ txs = append(txs, tx)
+ }
+ }
+ if err := dao.DB.Create(txs).Error; err != nil {
+ log.Println("流量统计入库", err)
+ }
+ srv.Shutdown(c)
+ return nil
+ })
}
diff --git a/go.mod b/go.mod
index 07d9896..cc7848b 100644
--- a/go.mod
+++ b/go.mod
@@ -15,6 +15,7 @@ require (
github.com/gorilla/websocket v1.4.2
github.com/onsi/ginkgo v1.7.0 // indirect
github.com/onsi/gomega v1.4.3 // indirect
+ github.com/ory/graceful v0.1.1
github.com/p14yground/go-github-selfupdate v0.0.0-20210520015421-eddf14461293
github.com/patrickmn/go-cache v2.1.0+incompatible
github.com/robfig/cron/v3 v3.0.1
diff --git a/go.sum b/go.sum
index a785187..b23d3d6 100644
--- a/go.sum
+++ b/go.sum
@@ -251,6 +251,8 @@ github.com/onsi/ginkgo v1.7.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+W
github.com/onsi/gomega v1.4.2/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY=
github.com/onsi/gomega v1.4.3 h1:RE1xgDvH7imwFD45h+u2SgIfERHlS2yNG4DObb5BSKU=
github.com/onsi/gomega v1.4.3/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY=
+github.com/ory/graceful v0.1.1 h1:zx+8tDObLPrG+7Tc8jKYlXsqWnLtOQA1IZ/FAAKHMXU=
+github.com/ory/graceful v0.1.1/go.mod h1:zqu70l95WrKHF4AZ6tXHvAqAvpY6M7g6ttaAVcMm7KU=
github.com/p14yground/go-github-selfupdate v0.0.0-20210520015421-eddf14461293 h1:a+cgfu5wXK/NIhR2FWqpaUYl0XloncvajO8SDvBu4UQ=
github.com/p14yground/go-github-selfupdate v0.0.0-20210520015421-eddf14461293/go.mod h1:unC1eanNdTd2fUViujtg9cDR/jNnYNvGmFYjIXz1hK4=
github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc=
@@ -259,6 +261,7 @@ github.com/patrickmn/go-cache v2.1.0+incompatible/go.mod h1:3Qf8kWWT7OJRJbdiICTK
github.com/pelletier/go-toml v1.2.0 h1:T5zMGML61Wp+FlcbWjRDT7yAxhJNAiPPLOFECq181zc=
github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic=
github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
+github.com/pkg/errors v0.8.1 h1:iURUrRGxPUNPdy5/HRSm+Yj6okJ6UtLINN0Q9M4+h3I=
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
diff --git a/model/alertrule.go b/model/alertrule.go
index b752df6..053ad0c 100644
--- a/model/alertrule.go
+++ b/model/alertrule.go
@@ -27,10 +27,10 @@ func (r *AlertRule) AfterFind(tx *gorm.DB) error {
return json.Unmarshal([]byte(r.RulesRaw), &r.Rules)
}
-func (r *AlertRule) Snapshot(server *Server) []interface{} {
+func (r *AlertRule) Snapshot(server *Server, db *gorm.DB) []interface{} {
var point []interface{}
for i := 0; i < len(r.Rules); i++ {
- point = append(point, r.Rules[i].Snapshot(server))
+ point = append(point, r.Rules[i].Snapshot(server, db))
}
return point
}
@@ -39,28 +39,39 @@ func (r *AlertRule) Check(points [][]interface{}) (int, bool) {
var max int
var count int
for i := 0; i < len(r.Rules); i++ {
- total := 0.0
- fail := 0.0
- num := int(r.Rules[i].Duration)
- if num > max {
- max = num
- }
- if len(points) < num {
- continue
- }
- for j := len(points) - 1; j >= 0 && len(points)-num <= j; j-- {
- total++
- if points[j][i] != nil {
- fail++
+ if r.Rules[i].IsTransferDurationRule() {
+ if max < 1 {
+ max = 1
+ }
+ // 循环区间流量报警
+ for j := len(points[i]) - 1; j >= 0; j-- {
+ if points[i][j] != nil {
+ count++
+ break
+ }
+ }
+ } else {
+ // 常规报警
+ total := 0.0
+ fail := 0.0
+ num := int(r.Rules[i].Duration)
+ if num > max {
+ max = num
+ }
+ if len(points) < num {
+ continue
+ }
+ for j := len(points) - 1; j >= 0 && len(points)-num <= j; j-- {
+ total++
+ if points[j][i] != nil {
+ fail++
+ }
+ }
+ if fail/total > 0.7 {
+ count++
+ break
}
}
- if fail/total > 0.7 {
- count++
- break
- }
}
- if count == len(r.Rules) {
- return max, false
- }
- return max, true
+ return max, count != len(r.Rules)
}
diff --git a/model/common.go b/model/common.go
index ef4736b..a585862 100644
--- a/model/common.go
+++ b/model/common.go
@@ -8,8 +8,8 @@ const CacheKeyOauth2State = "p:a:state"
const CacheKeyServicePage = "p:c:service"
type Common struct {
- ID uint64 `gorm:"primary_key"`
- CreatedAt time.Time
+ ID uint64 `gorm:"primary_key"`
+ CreatedAt time.Time `sql:"index"`
UpdatedAt time.Time
DeletedAt *time.Time `sql:"index"`
}
diff --git a/model/rule.go b/model/rule.go
index 9731fae..4e332b3 100644
--- a/model/rule.go
+++ b/model/rule.go
@@ -1,21 +1,36 @@
package model
-import "time"
+import (
+ "strings"
+ "time"
+
+ "gorm.io/gorm"
+)
const (
RuleCoverAll = iota
RuleCoverIgnoreAll
)
+type NResult struct {
+ N uint64
+}
+
type Rule struct {
// 指标类型,cpu、memory、swap、disk、net_in_speed、net_out_speed
// net_all_speed、transfer_in、transfer_out、transfer_all、offline
- Type string `json:"type,omitempty"`
- Min uint64 `json:"min,omitempty"` // 最小阈值 (百分比、字节 kb ÷ 1024)
- Max uint64 `json:"max,omitempty"` // 最大阈值 (百分比、字节 kb ÷ 1024)
- Duration uint64 `json:"duration,omitempty"` // 持续时间 (秒)
- Cover uint64 `json:"cover,omitempty"` // 覆盖范围 RuleCoverAll/IgnoreAll
- Ignore map[uint64]bool `json:"ignore,omitempty"` // 覆盖范围的排除
+ // transfer_in_cycle、transfer_out_cycle、transfer_all_cycle
+ Type string `json:"type,omitempty"`
+ Min uint64 `json:"min,omitempty"` // 最小阈值 (百分比、字节 kb ÷ 1024)
+ Max uint64 `json:"max,omitempty"` // 最大阈值 (百分比、字节 kb ÷ 1024)
+ CycleStart time.Time `json:"cycle_start,omitempty"` // 流量统计的开始时间
+ CycleInterval uint64 `json:"cycle_interval,omitempty"` // 流量统计周期
+ Duration uint64 `json:"duration,omitempty"` // 持续时间 (秒)
+ Cover uint64 `json:"cover,omitempty"` // 覆盖范围 RuleCoverAll/IgnoreAll
+ Ignore map[uint64]bool `json:"ignore,omitempty"` // 覆盖范围的排除
+
+ // 只作为缓存使用,记录下次该检测的时间
+ NextTransferAt map[uint64]time.Time `json:"-"`
}
func percentage(used, total uint64) uint64 {
@@ -26,7 +41,7 @@ func percentage(used, total uint64) uint64 {
}
// Snapshot 未通过规则返回 struct{}{}, 通过返回 nil
-func (u *Rule) Snapshot(server *Server) interface{} {
+func (u *Rule) Snapshot(server *Server, db *gorm.DB) interface{} {
// 监控全部但是排除了此服务器
if u.Cover == RuleCoverAll && u.Ignore[server.ID] {
return nil
@@ -36,6 +51,11 @@ func (u *Rule) Snapshot(server *Server) interface{} {
return nil
}
+ // 循环区间流量检测 · 短期无需重复检测
+ if u.IsTransferDurationRule() && u.NextTransferAt[server.ID].After(time.Now()) {
+ return nil
+ }
+
var src uint64
switch u.Type {
@@ -65,6 +85,39 @@ func (u *Rule) Snapshot(server *Server) interface{} {
} else {
src = uint64(server.LastActive.Unix())
}
+ case "transfer_in_cycle":
+ src = server.State.NetInTransfer - uint64(server.PrevHourlyTransferIn)
+ if u.CycleInterval != 1 {
+ var res NResult
+ db.Model(&Transfer{}).Select("SUM('in') AS n").Where("created_at > ? AND server_id = ?", u.GetTransferDurationStart(), server.ID).Scan(&res)
+ src += res.N
+ }
+ case "transfer_out_cycle":
+ src = server.State.NetOutTransfer - uint64(server.PrevHourlyTransferOut)
+ if u.CycleInterval != 1 {
+ var res NResult
+ db.Model(&Transfer{}).Select("SUM('in') AS n").Where("created_at > ? AND server_id = ?", u.GetTransferDurationStart(), server.ID).Scan(&res)
+ src += res.N
+ }
+ case "transfer_all_cycle":
+ src = server.State.NetOutTransfer - uint64(server.PrevHourlyTransferOut) + server.State.NetInTransfer - uint64(server.PrevHourlyTransferIn)
+ if u.CycleInterval != 1 {
+ var res NResult
+ db.Model(&Transfer{}).Select("SUM('in'+'out') AS n").Where("created_at > ? AND server_id = ?", u.GetTransferDurationStart(), server.ID).Scan(&res)
+ src += res.N
+ }
+ }
+
+ // 循环区间流量检测 · 更新下次需要检测时间
+ if u.IsTransferDurationRule() {
+ seconds := 1800 * time.Duration(((u.Max - src) / u.Max))
+ if seconds < 180 {
+ seconds = 180
+ }
+ if u.NextTransferAt == nil {
+ u.NextTransferAt = make(map[uint64]time.Time)
+ }
+ u.NextTransferAt[server.ID] = time.Now().Add(time.Duration(time.Second * seconds))
}
if u.Type == "offline" && uint64(time.Now().Unix())-src > 6 {
@@ -75,3 +128,12 @@ func (u *Rule) Snapshot(server *Server) interface{} {
return nil
}
+
+func (rule Rule) IsTransferDurationRule() bool {
+ return strings.HasSuffix(rule.Type, "_cycle")
+}
+
+func (rule Rule) GetTransferDurationStart() time.Time {
+ interval := 3600 * int64(rule.CycleInterval)
+ return time.Unix(rule.CycleStart.Unix()+(time.Now().Unix()-rule.CycleStart.Unix())/interval*interval, 0)
+}
diff --git a/model/server.go b/model/server.go
index 9dc5bf9..ac2a2ab 100644
--- a/model/server.go
+++ b/model/server.go
@@ -23,6 +23,9 @@ type Server struct {
TaskClose chan error `gorm:"-" json:"-"`
TaskStream pb.NezhaService_RequestTaskServer `gorm:"-" json:"-"`
+
+ PrevHourlyTransferIn int64 `gorm:"-" json:"-"` // 上次数据点时的入站使用量
+ PrevHourlyTransferOut int64 `gorm:"-" json:"-"` // 上次数据点时的出站使用量
}
func (s Server) Marshal() template.JS {
diff --git a/model/transfer.go b/model/transfer.go
new file mode 100644
index 0000000..0cbde6c
--- /dev/null
+++ b/model/transfer.go
@@ -0,0 +1,8 @@
+package model
+
+type Transfer struct {
+ Common
+ ServerID uint64
+ In uint64
+ Out uint64
+}
diff --git a/service/dao/alertsentinel.go b/service/dao/alertsentinel.go
index d788166..2468931 100644
--- a/service/dao/alertsentinel.go
+++ b/service/dao/alertsentinel.go
@@ -109,7 +109,7 @@ func checkStatus() {
for _, server := range ServerList {
// 监测点
alertsStore[alert.ID][server.ID] = append(alertsStore[alert.
- ID][server.ID], alert.Snapshot(server))
+ ID][server.ID], alert.Snapshot(server, DB))
// 发送通知,分为触发报警和恢复通知
max, passed := alert.Check(alertsStore[alert.ID][server.ID])
if !passed {
diff --git a/service/dao/dao.go b/service/dao/dao.go
index 95cd279..d0dbada 100644
--- a/service/dao/dao.go
+++ b/service/dao/dao.go
@@ -13,7 +13,7 @@ import (
pb "github.com/naiba/nezha/proto"
)
-var Version = "v0.8.6" // !!记得修改 README 中的 badge 版本!!
+var Version = "v0.9.0" // !!记得修改 README 中的 badge 版本!!
var (
Conf *model.Config
diff --git a/service/dao/servicesentinel.go b/service/dao/servicesentinel.go
index 7d4a39b..671b700 100644
--- a/service/dao/servicesentinel.go
+++ b/service/dao/servicesentinel.go
@@ -12,7 +12,7 @@ import (
pb "github.com/naiba/nezha/proto"
)
-const _CurrentStatusSize = 30 // 统计 5 分钟内的数据为当前状态
+const _CurrentStatusSize = 30 // 统计 15 分钟内的数据为当前状态
var ServiceSentinelShared *ServiceSentinel
diff --git a/service/rpc/nezha.go b/service/rpc/nezha.go
index ce4eef4..7308b30 100644
--- a/service/rpc/nezha.go
+++ b/service/rpc/nezha.go
@@ -73,7 +73,21 @@ func (s *NezhaHandler) ReportSystemState(c context.Context, r *pb.State) (*pb.Re
dao.ServerLock.RLock()
defer dao.ServerLock.RUnlock()
dao.ServerList[clientID].LastActive = time.Now()
+
+ // 判断是否是机器重启,如果是机器重启要录入最后记录的流量里面
+ if state.Uptime < dao.ServerList[clientID].State.Uptime {
+ dao.ServerList[clientID].PrevHourlyTransferIn = dao.ServerList[clientID].PrevHourlyTransferIn - int64(dao.ServerList[clientID].State.NetInTransfer)
+ dao.ServerList[clientID].PrevHourlyTransferOut = dao.ServerList[clientID].PrevHourlyTransferOut - int64(dao.ServerList[clientID].State.NetOutTransfer)
+ }
+
dao.ServerList[clientID].State = &state
+
+ // 如果从未记录过,先打点,等到小时时间点时入库
+ if dao.ServerList[clientID].PrevHourlyTransferIn == 0 || dao.ServerList[clientID].PrevHourlyTransferOut == 0 {
+ dao.ServerList[clientID].PrevHourlyTransferIn = int64(state.NetInTransfer)
+ dao.ServerList[clientID].PrevHourlyTransferOut = int64(state.NetOutTransfer)
+ }
+
return &pb.Receipt{Proced: true}, nil
}