mirror of
https://github.com/nezhahq/nezha.git
synced 2025-01-22 12:48:14 -05:00
✨ 任意时间区间(月流量)统计,无视重启~
This commit is contained in:
parent
dff1e29c40
commit
63bb1570d2
41
README.md
41
README.md
@ -1,7 +1,7 @@
|
||||
<div align="center">
|
||||
<img width="500" style="max-width:100%" src="resource/static/brand.png" title="哪吒监控">
|
||||
<br><br>
|
||||
<img src="https://img.shields.io/github/workflow/status/naiba/nezha/Dashboard%20image?label=Dash%20v0.8.6&logo=github&style=for-the-badge"> <img src="https://img.shields.io/github/v/release/naiba/nezha?color=brightgreen&label=Agent&style=for-the-badge&logo=github"> <img src="https://img.shields.io/github/workflow/status/naiba/nezha/Agent%20release?label=Agent%20CI&logo=github&style=for-the-badge"> <img src="https://img.shields.io/badge/Installer-v0.6.4-brightgreen?style=for-the-badge&logo=linux">
|
||||
<img src="https://img.shields.io/github/workflow/status/naiba/nezha/Dashboard%20image?label=Dash%20v0.9.0&logo=github&style=for-the-badge"> <img src="https://img.shields.io/github/v/release/naiba/nezha?color=brightgreen&label=Agent&style=for-the-badge&logo=github"> <img src="https://img.shields.io/github/workflow/status/naiba/nezha/Agent%20release?label=Agent%20CI&logo=github&style=for-the-badge"> <img src="https://img.shields.io/badge/Installer-v0.6.4-brightgreen?style=for-the-badge&logo=linux">
|
||||
<br>
|
||||
<br>
|
||||
<p>:trollface: <b>哪吒监控</b> 一站式轻监控轻运维系统。支持系统状态、HTTP(SSL 证书变更、即将到期、到期)、TCP、Ping 监控报警,命令批量执行和计划任务。</p>
|
||||
@ -43,7 +43,7 @@ _\* 使用 WatchTower 可以自动更新面板,Windows 终端可以使用 nssm
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary>报警通知:CPU、内存、硬盘、带宽、流量实时监控。</summary>
|
||||
<summary>报警通知:CPU、内存、硬盘、带宽、流量、 **月流量** 实时监控。</summary>
|
||||
|
||||
#### 灵活通知方式
|
||||
|
||||
@ -96,15 +96,34 @@ URL 里面也可放置占位符,请求时会进行简单的字符串替换。
|
||||
|
||||
#### 报警规则说明
|
||||
|
||||
- Type
|
||||
- cpu、memory、swap、disk:Min/Max 数值为占用百分比
|
||||
- net_in_speed(入站网速)、net_out_speed(出站网速)、net_all_speed(双向网速)、transfer_in(入站流量)、transfer_out(出站流量)、transfer_all(双向流量):Min/Max 数值为字节(1kb=1024,1mb = 1024\*1024)
|
||||
- offline:不支持 Min/Max 参数
|
||||
- Duration:持续秒数,监控比较简陋,取持续时间内的 70% 采样结果
|
||||
- Cover `[{"Type":"offline","Duration":10, "Cover":0, "Ignore":{"5": true}}]`
|
||||
- `0` 监控所有,通过 `Ignore` 忽略特定服务器
|
||||
- `1` 忽略所有,通过 `Ignore` 监控特定服务器
|
||||
- Ignore: `{"1": true, "2":false}` 特定服务器,搭配 `Cover` 使用
|
||||
##### 基本规则
|
||||
|
||||
- type
|
||||
- cpu、memory、swap、disk
|
||||
- net_in_speed(入站网速)、net_out_speed(出站网速)、net_all_speed(双向网速)、transfer_in(入站流量)、transfer_out(出站流量)、transfer_all(双向流量)
|
||||
- offline
|
||||
- duration:持续秒数,秒数内采样记录 30% 以上触发阈值才会报警(防数据插针)
|
||||
- min/max
|
||||
- 流量、网速类数值 为字节(1kb=1024,1mb = 1024\*1024)
|
||||
- 内存、硬盘、CPU 为占用百分比
|
||||
- 离线监控无需设置
|
||||
- cover `[{"type":"offline","duration":10, "cover":0, "ignore":{"5": true}}]`
|
||||
- `0` 监控所有,通过 `ignore` 忽略特定服务器
|
||||
- `1` 忽略所有,通过 `ignore` 监控特定服务器
|
||||
- ignore: `{"1": true, "2":false}` 特定服务器,搭配 `cover` 使用
|
||||
|
||||
##### 特殊:任意周期流量报警
|
||||
|
||||
可以用作月流量报警
|
||||
|
||||
- type
|
||||
- transfer_in_cycle 周期内的入站流量
|
||||
- transfer_out_cycle 周期内的出站流量
|
||||
- transfer_all_cycle 周期内双向流量和
|
||||
- cycle_start 周期开始日期(可以是你机器计费周期的开始日期)
|
||||
- cycle_interval 小时(可以设为1月,30*24)
|
||||
- min/max、cover、ignore 参考基本规则配置
|
||||
- 示例: 每月15号计费的月流量1T报警 `[{"type":"transfer_all_cycle","max":1000000000,"cycle_start":"2021-07-15T08:00:00Z","cycle_interval":720}]`
|
||||
</details>
|
||||
|
||||
<details>
|
||||
|
@ -3,6 +3,7 @@ package controller
|
||||
import (
|
||||
"fmt"
|
||||
"html/template"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
@ -14,7 +15,7 @@ import (
|
||||
"github.com/naiba/nezha/service/dao"
|
||||
)
|
||||
|
||||
func ServeWeb(port uint) {
|
||||
func ServeWeb(port uint) *http.Server {
|
||||
gin.SetMode(gin.ReleaseMode)
|
||||
r := gin.Default()
|
||||
if dao.Conf.Debug {
|
||||
@ -112,7 +113,11 @@ func ServeWeb(port uint) {
|
||||
r.Static("/static", "resource/static")
|
||||
r.LoadHTMLGlob("resource/template/**/*")
|
||||
routers(r)
|
||||
r.Run(fmt.Sprintf(":%d", port))
|
||||
srv := &http.Server{
|
||||
Addr: fmt.Sprintf(":%d", port),
|
||||
Handler: r,
|
||||
}
|
||||
return srv
|
||||
}
|
||||
|
||||
func routers(r *gin.Engine) {
|
||||
|
@ -57,7 +57,7 @@ func (ma *memberAPI) delete(c *gin.Context) {
|
||||
var err error
|
||||
switch c.Param("model") {
|
||||
case "server":
|
||||
err = dao.DB.Delete(&model.Server{}, "id = ?", id).Error
|
||||
err = dao.DB.Unscoped().Delete(&model.Server{}, "id = ?", id).Error
|
||||
if err == nil {
|
||||
dao.ServerLock.Lock()
|
||||
delete(dao.SecretToID, dao.ServerList[id].Secret)
|
||||
@ -66,18 +66,18 @@ func (ma *memberAPI) delete(c *gin.Context) {
|
||||
dao.ReSortServer()
|
||||
}
|
||||
case "notification":
|
||||
err = dao.DB.Delete(&model.Notification{}, "id = ?", id).Error
|
||||
err = dao.DB.Unscoped().Delete(&model.Notification{}, "id = ?", id).Error
|
||||
if err == nil {
|
||||
dao.OnDeleteNotification(id)
|
||||
}
|
||||
case "monitor":
|
||||
err = dao.DB.Delete(&model.Monitor{}, "id = ?", id).Error
|
||||
err = dao.DB.Unscoped().Delete(&model.Monitor{}, "id = ?", id).Error
|
||||
if err == nil {
|
||||
dao.ServiceSentinelShared.OnMonitorDelete(id)
|
||||
err = dao.DB.Delete(&model.MonitorHistory{}, "monitor_id = ?", id).Error
|
||||
err = dao.DB.Unscoped().Delete(&model.MonitorHistory{}, "monitor_id = ?", id).Error
|
||||
}
|
||||
case "cron":
|
||||
err = dao.DB.Delete(&model.Cron{}, "id = ?", id).Error
|
||||
err = dao.DB.Unscoped().Delete(&model.Cron{}, "id = ?", id).Error
|
||||
if err == nil {
|
||||
dao.CronLock.RLock()
|
||||
defer dao.CronLock.RUnlock()
|
||||
@ -88,7 +88,7 @@ func (ma *memberAPI) delete(c *gin.Context) {
|
||||
delete(dao.Crons, id)
|
||||
}
|
||||
case "alert-rule":
|
||||
err = dao.DB.Delete(&model.AlertRule{}, "id = ?", id).Error
|
||||
err = dao.DB.Unscoped().Delete(&model.AlertRule{}, "id = ?", id).Error
|
||||
if err == nil {
|
||||
dao.OnDeleteAlert(id)
|
||||
}
|
||||
@ -378,8 +378,8 @@ func (ma *memberAPI) addOrEditAlertRule(c *gin.Context) {
|
||||
err = errors.New("至少定义一条规则")
|
||||
} else {
|
||||
for i := 0; i < len(r.Rules); i++ {
|
||||
if r.Rules[i].Duration < 3 {
|
||||
err = errors.New("Duration 至少为 3")
|
||||
if !r.Rules[i].IsTransferDurationRule() && r.Rules[i].Duration < 3 {
|
||||
err = errors.New("错误:Duration 至少为 3")
|
||||
break
|
||||
}
|
||||
}
|
||||
|
@ -29,22 +29,24 @@ func (oa *oauth2controller) serve() {
|
||||
}
|
||||
|
||||
func (oa *oauth2controller) getCommonOauth2Config(c *gin.Context) *oauth2.Config {
|
||||
var endPoint oauth2.Endpoint
|
||||
if dao.Conf.Oauth2.Type == model.ConfigTypeGitee {
|
||||
endPoint = oauth2.Endpoint{
|
||||
AuthURL: "https://gitee.com/oauth/authorize",
|
||||
TokenURL: "https://gitee.com/oauth/token",
|
||||
return &oauth2.Config{
|
||||
ClientID: dao.Conf.Oauth2.ClientID,
|
||||
ClientSecret: dao.Conf.Oauth2.ClientSecret,
|
||||
Scopes: []string{},
|
||||
Endpoint: oauth2.Endpoint{
|
||||
AuthURL: "https://gitee.com/oauth/authorize",
|
||||
TokenURL: "https://gitee.com/oauth/token",
|
||||
},
|
||||
RedirectURL: oa.getRedirectURL(c),
|
||||
}
|
||||
} else {
|
||||
endPoint = GitHubOauth2.Endpoint
|
||||
}
|
||||
|
||||
return &oauth2.Config{
|
||||
ClientID: dao.Conf.Oauth2.ClientID,
|
||||
ClientSecret: dao.Conf.Oauth2.ClientSecret,
|
||||
Scopes: []string{},
|
||||
Endpoint: endPoint,
|
||||
RedirectURL: oa.getRedirectURL(c),
|
||||
return &oauth2.Config{
|
||||
ClientID: dao.Conf.Oauth2.ClientID,
|
||||
ClientSecret: dao.Conf.Oauth2.ClientSecret,
|
||||
Scopes: []string{},
|
||||
Endpoint: GitHubOauth2.Endpoint,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,8 +1,11 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"log"
|
||||
"time"
|
||||
|
||||
"github.com/ory/graceful"
|
||||
"github.com/patrickmn/go-cache"
|
||||
"github.com/robfig/cron/v3"
|
||||
"gorm.io/driver/sqlite"
|
||||
@ -31,7 +34,9 @@ func init() {
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
dao.DB, err = gorm.Open(sqlite.Open("data/sqlite.db"), &gorm.Config{})
|
||||
dao.DB, err = gorm.Open(sqlite.Open("data/sqlite.db"), &gorm.Config{
|
||||
CreateBatchSize: 200,
|
||||
})
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
@ -49,18 +54,73 @@ func init() {
|
||||
func initSystem() {
|
||||
dao.DB.AutoMigrate(model.Server{}, model.User{},
|
||||
model.Notification{}, model.AlertRule{}, model.Monitor{},
|
||||
model.MonitorHistory{}, model.Cron{})
|
||||
model.MonitorHistory{}, model.Cron{}, model.Transfer{})
|
||||
dao.NewServiceSentinel()
|
||||
|
||||
loadServers() //加载服务器列表
|
||||
loadCrons() //加载计划任务
|
||||
|
||||
// 清理旧数据
|
||||
dao.Cron.AddFunc("* 3 * * *", cleanMonitorHistory)
|
||||
// 清理 服务请求记录 和 流量记录 的旧数据
|
||||
dao.Cron.AddFunc("0 20 3 * * *", cleanMonitorHistory)
|
||||
// 流量记录打点
|
||||
dao.Cron.AddFunc("0 0 * * * *", recordTransferHourlyUsage)
|
||||
}
|
||||
|
||||
func recordTransferHourlyUsage() {
|
||||
dao.ServerLock.Lock()
|
||||
defer dao.ServerLock.Unlock()
|
||||
now := time.Now()
|
||||
nowTrimSeconds := time.Date(now.Year(), now.Month(), now.Day(), now.Hour(), 0, 0, 0, time.Local)
|
||||
var txs []model.Transfer
|
||||
for id, server := range dao.ServerList {
|
||||
tx := model.Transfer{
|
||||
ServerID: id,
|
||||
In: server.State.NetInTransfer - uint64(server.PrevHourlyTransferIn),
|
||||
Out: server.State.NetInTransfer - uint64(server.PrevHourlyTransferIn),
|
||||
}
|
||||
server.PrevHourlyTransferIn = int64(server.State.NetInTransfer)
|
||||
server.PrevHourlyTransferOut = int64(server.State.NetOutTransfer)
|
||||
tx.CreatedAt = nowTrimSeconds
|
||||
txs = append(txs, tx)
|
||||
}
|
||||
dao.DB.Create(txs)
|
||||
}
|
||||
|
||||
func cleanMonitorHistory() {
|
||||
dao.DB.Delete(&model.MonitorHistory{}, "created_at < ?", time.Now().AddDate(0, 0, -30))
|
||||
dao.DB.Unscoped().Delete(&model.MonitorHistory{}, "created_at < ?", time.Now().AddDate(0, 0, -30))
|
||||
var allServerKeep time.Time
|
||||
specialServerKeep := make(map[uint64]time.Time)
|
||||
var specialServerIDs []uint64
|
||||
var alerts []model.AlertRule
|
||||
dao.DB.Find(&alerts)
|
||||
for i := 0; i < len(alerts); i++ {
|
||||
for j := 0; j < len(alerts[i].Rules); j++ {
|
||||
// 是不是流量记录规则
|
||||
if !alerts[i].Rules[j].IsTransferDurationRule() {
|
||||
continue
|
||||
}
|
||||
dataCouldRemoveBefore := alerts[i].Rules[j].GetTransferDurationStart()
|
||||
// 判断规则影响的机器范围
|
||||
if alerts[i].Rules[j].Cover == model.RuleCoverAll {
|
||||
// 更新全局可以清理的数据点
|
||||
if allServerKeep.IsZero() || allServerKeep.After(dataCouldRemoveBefore) {
|
||||
allServerKeep = dataCouldRemoveBefore
|
||||
}
|
||||
} else {
|
||||
// 更新特定机器可以清理数据点
|
||||
for id := range alerts[i].Rules[j].Ignore {
|
||||
if specialServerKeep[id].IsZero() || specialServerKeep[id].After(dataCouldRemoveBefore) {
|
||||
specialServerKeep[id] = dataCouldRemoveBefore
|
||||
specialServerIDs = append(specialServerIDs, id)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
for id, couldRemove := range specialServerKeep {
|
||||
dao.DB.Unscoped().Delete(&model.Transfer{}, "id = ? AND created_at < ?", id, couldRemove)
|
||||
}
|
||||
dao.DB.Unscoped().Delete(&model.Transfer{}, "id NOT IN (?) AND created_at < ?", specialServerIDs, allServerKeep)
|
||||
}
|
||||
|
||||
func loadServers() {
|
||||
@ -98,8 +158,33 @@ func loadCrons() {
|
||||
}
|
||||
|
||||
func main() {
|
||||
go controller.ServeWeb(dao.Conf.HTTPPort)
|
||||
go rpc.ServeRPC(dao.Conf.GRPCPort)
|
||||
go rpc.DispatchTask(time.Second * 30)
|
||||
dao.AlertSentinelStart()
|
||||
go dao.AlertSentinelStart()
|
||||
srv := controller.ServeWeb(dao.Conf.HTTPPort)
|
||||
graceful.Graceful(func() error {
|
||||
return srv.ListenAndServe()
|
||||
}, func(c context.Context) error {
|
||||
dao.ServerLock.Lock()
|
||||
defer dao.ServerLock.Unlock()
|
||||
var txs []model.Transfer
|
||||
for _, s := range dao.ServerList {
|
||||
in := s.State.NetInTransfer - uint64(s.PrevHourlyTransferIn)
|
||||
out := s.State.NetOutTransfer - uint64(s.PrevHourlyTransferOut)
|
||||
if in > 0 && out > 0 {
|
||||
tx := model.Transfer{
|
||||
ServerID: s.ID,
|
||||
In: in,
|
||||
Out: out,
|
||||
}
|
||||
tx.CreatedAt = time.Now()
|
||||
txs = append(txs, tx)
|
||||
}
|
||||
}
|
||||
if err := dao.DB.Create(txs).Error; err != nil {
|
||||
log.Println("流量统计入库", err)
|
||||
}
|
||||
srv.Shutdown(c)
|
||||
return nil
|
||||
})
|
||||
}
|
||||
|
1
go.mod
1
go.mod
@ -15,6 +15,7 @@ require (
|
||||
github.com/gorilla/websocket v1.4.2
|
||||
github.com/onsi/ginkgo v1.7.0 // indirect
|
||||
github.com/onsi/gomega v1.4.3 // indirect
|
||||
github.com/ory/graceful v0.1.1
|
||||
github.com/p14yground/go-github-selfupdate v0.0.0-20210520015421-eddf14461293
|
||||
github.com/patrickmn/go-cache v2.1.0+incompatible
|
||||
github.com/robfig/cron/v3 v3.0.1
|
||||
|
3
go.sum
3
go.sum
@ -251,6 +251,8 @@ github.com/onsi/ginkgo v1.7.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+W
|
||||
github.com/onsi/gomega v1.4.2/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY=
|
||||
github.com/onsi/gomega v1.4.3 h1:RE1xgDvH7imwFD45h+u2SgIfERHlS2yNG4DObb5BSKU=
|
||||
github.com/onsi/gomega v1.4.3/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY=
|
||||
github.com/ory/graceful v0.1.1 h1:zx+8tDObLPrG+7Tc8jKYlXsqWnLtOQA1IZ/FAAKHMXU=
|
||||
github.com/ory/graceful v0.1.1/go.mod h1:zqu70l95WrKHF4AZ6tXHvAqAvpY6M7g6ttaAVcMm7KU=
|
||||
github.com/p14yground/go-github-selfupdate v0.0.0-20210520015421-eddf14461293 h1:a+cgfu5wXK/NIhR2FWqpaUYl0XloncvajO8SDvBu4UQ=
|
||||
github.com/p14yground/go-github-selfupdate v0.0.0-20210520015421-eddf14461293/go.mod h1:unC1eanNdTd2fUViujtg9cDR/jNnYNvGmFYjIXz1hK4=
|
||||
github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc=
|
||||
@ -259,6 +261,7 @@ github.com/patrickmn/go-cache v2.1.0+incompatible/go.mod h1:3Qf8kWWT7OJRJbdiICTK
|
||||
github.com/pelletier/go-toml v1.2.0 h1:T5zMGML61Wp+FlcbWjRDT7yAxhJNAiPPLOFECq181zc=
|
||||
github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic=
|
||||
github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
|
||||
github.com/pkg/errors v0.8.1 h1:iURUrRGxPUNPdy5/HRSm+Yj6okJ6UtLINN0Q9M4+h3I=
|
||||
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
|
||||
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
|
@ -27,10 +27,10 @@ func (r *AlertRule) AfterFind(tx *gorm.DB) error {
|
||||
return json.Unmarshal([]byte(r.RulesRaw), &r.Rules)
|
||||
}
|
||||
|
||||
func (r *AlertRule) Snapshot(server *Server) []interface{} {
|
||||
func (r *AlertRule) Snapshot(server *Server, db *gorm.DB) []interface{} {
|
||||
var point []interface{}
|
||||
for i := 0; i < len(r.Rules); i++ {
|
||||
point = append(point, r.Rules[i].Snapshot(server))
|
||||
point = append(point, r.Rules[i].Snapshot(server, db))
|
||||
}
|
||||
return point
|
||||
}
|
||||
@ -39,28 +39,39 @@ func (r *AlertRule) Check(points [][]interface{}) (int, bool) {
|
||||
var max int
|
||||
var count int
|
||||
for i := 0; i < len(r.Rules); i++ {
|
||||
total := 0.0
|
||||
fail := 0.0
|
||||
num := int(r.Rules[i].Duration)
|
||||
if num > max {
|
||||
max = num
|
||||
}
|
||||
if len(points) < num {
|
||||
continue
|
||||
}
|
||||
for j := len(points) - 1; j >= 0 && len(points)-num <= j; j-- {
|
||||
total++
|
||||
if points[j][i] != nil {
|
||||
fail++
|
||||
if r.Rules[i].IsTransferDurationRule() {
|
||||
if max < 1 {
|
||||
max = 1
|
||||
}
|
||||
// 循环区间流量报警
|
||||
for j := len(points[i]) - 1; j >= 0; j-- {
|
||||
if points[i][j] != nil {
|
||||
count++
|
||||
break
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// 常规报警
|
||||
total := 0.0
|
||||
fail := 0.0
|
||||
num := int(r.Rules[i].Duration)
|
||||
if num > max {
|
||||
max = num
|
||||
}
|
||||
if len(points) < num {
|
||||
continue
|
||||
}
|
||||
for j := len(points) - 1; j >= 0 && len(points)-num <= j; j-- {
|
||||
total++
|
||||
if points[j][i] != nil {
|
||||
fail++
|
||||
}
|
||||
}
|
||||
if fail/total > 0.7 {
|
||||
count++
|
||||
break
|
||||
}
|
||||
}
|
||||
if fail/total > 0.7 {
|
||||
count++
|
||||
break
|
||||
}
|
||||
}
|
||||
if count == len(r.Rules) {
|
||||
return max, false
|
||||
}
|
||||
return max, true
|
||||
return max, count != len(r.Rules)
|
||||
}
|
||||
|
@ -8,8 +8,8 @@ const CacheKeyOauth2State = "p:a:state"
|
||||
const CacheKeyServicePage = "p:c:service"
|
||||
|
||||
type Common struct {
|
||||
ID uint64 `gorm:"primary_key"`
|
||||
CreatedAt time.Time
|
||||
ID uint64 `gorm:"primary_key"`
|
||||
CreatedAt time.Time `sql:"index"`
|
||||
UpdatedAt time.Time
|
||||
DeletedAt *time.Time `sql:"index"`
|
||||
}
|
||||
|
@ -1,21 +1,36 @@
|
||||
package model
|
||||
|
||||
import "time"
|
||||
import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"gorm.io/gorm"
|
||||
)
|
||||
|
||||
const (
|
||||
RuleCoverAll = iota
|
||||
RuleCoverIgnoreAll
|
||||
)
|
||||
|
||||
type NResult struct {
|
||||
N uint64
|
||||
}
|
||||
|
||||
type Rule struct {
|
||||
// 指标类型,cpu、memory、swap、disk、net_in_speed、net_out_speed
|
||||
// net_all_speed、transfer_in、transfer_out、transfer_all、offline
|
||||
Type string `json:"type,omitempty"`
|
||||
Min uint64 `json:"min,omitempty"` // 最小阈值 (百分比、字节 kb ÷ 1024)
|
||||
Max uint64 `json:"max,omitempty"` // 最大阈值 (百分比、字节 kb ÷ 1024)
|
||||
Duration uint64 `json:"duration,omitempty"` // 持续时间 (秒)
|
||||
Cover uint64 `json:"cover,omitempty"` // 覆盖范围 RuleCoverAll/IgnoreAll
|
||||
Ignore map[uint64]bool `json:"ignore,omitempty"` // 覆盖范围的排除
|
||||
// transfer_in_cycle、transfer_out_cycle、transfer_all_cycle
|
||||
Type string `json:"type,omitempty"`
|
||||
Min uint64 `json:"min,omitempty"` // 最小阈值 (百分比、字节 kb ÷ 1024)
|
||||
Max uint64 `json:"max,omitempty"` // 最大阈值 (百分比、字节 kb ÷ 1024)
|
||||
CycleStart time.Time `json:"cycle_start,omitempty"` // 流量统计的开始时间
|
||||
CycleInterval uint64 `json:"cycle_interval,omitempty"` // 流量统计周期
|
||||
Duration uint64 `json:"duration,omitempty"` // 持续时间 (秒)
|
||||
Cover uint64 `json:"cover,omitempty"` // 覆盖范围 RuleCoverAll/IgnoreAll
|
||||
Ignore map[uint64]bool `json:"ignore,omitempty"` // 覆盖范围的排除
|
||||
|
||||
// 只作为缓存使用,记录下次该检测的时间
|
||||
NextTransferAt map[uint64]time.Time `json:"-"`
|
||||
}
|
||||
|
||||
func percentage(used, total uint64) uint64 {
|
||||
@ -26,7 +41,7 @@ func percentage(used, total uint64) uint64 {
|
||||
}
|
||||
|
||||
// Snapshot 未通过规则返回 struct{}{}, 通过返回 nil
|
||||
func (u *Rule) Snapshot(server *Server) interface{} {
|
||||
func (u *Rule) Snapshot(server *Server, db *gorm.DB) interface{} {
|
||||
// 监控全部但是排除了此服务器
|
||||
if u.Cover == RuleCoverAll && u.Ignore[server.ID] {
|
||||
return nil
|
||||
@ -36,6 +51,11 @@ func (u *Rule) Snapshot(server *Server) interface{} {
|
||||
return nil
|
||||
}
|
||||
|
||||
// 循环区间流量检测 · 短期无需重复检测
|
||||
if u.IsTransferDurationRule() && u.NextTransferAt[server.ID].After(time.Now()) {
|
||||
return nil
|
||||
}
|
||||
|
||||
var src uint64
|
||||
|
||||
switch u.Type {
|
||||
@ -65,6 +85,39 @@ func (u *Rule) Snapshot(server *Server) interface{} {
|
||||
} else {
|
||||
src = uint64(server.LastActive.Unix())
|
||||
}
|
||||
case "transfer_in_cycle":
|
||||
src = server.State.NetInTransfer - uint64(server.PrevHourlyTransferIn)
|
||||
if u.CycleInterval != 1 {
|
||||
var res NResult
|
||||
db.Model(&Transfer{}).Select("SUM('in') AS n").Where("created_at > ? AND server_id = ?", u.GetTransferDurationStart(), server.ID).Scan(&res)
|
||||
src += res.N
|
||||
}
|
||||
case "transfer_out_cycle":
|
||||
src = server.State.NetOutTransfer - uint64(server.PrevHourlyTransferOut)
|
||||
if u.CycleInterval != 1 {
|
||||
var res NResult
|
||||
db.Model(&Transfer{}).Select("SUM('in') AS n").Where("created_at > ? AND server_id = ?", u.GetTransferDurationStart(), server.ID).Scan(&res)
|
||||
src += res.N
|
||||
}
|
||||
case "transfer_all_cycle":
|
||||
src = server.State.NetOutTransfer - uint64(server.PrevHourlyTransferOut) + server.State.NetInTransfer - uint64(server.PrevHourlyTransferIn)
|
||||
if u.CycleInterval != 1 {
|
||||
var res NResult
|
||||
db.Model(&Transfer{}).Select("SUM('in'+'out') AS n").Where("created_at > ? AND server_id = ?", u.GetTransferDurationStart(), server.ID).Scan(&res)
|
||||
src += res.N
|
||||
}
|
||||
}
|
||||
|
||||
// 循环区间流量检测 · 更新下次需要检测时间
|
||||
if u.IsTransferDurationRule() {
|
||||
seconds := 1800 * time.Duration(((u.Max - src) / u.Max))
|
||||
if seconds < 180 {
|
||||
seconds = 180
|
||||
}
|
||||
if u.NextTransferAt == nil {
|
||||
u.NextTransferAt = make(map[uint64]time.Time)
|
||||
}
|
||||
u.NextTransferAt[server.ID] = time.Now().Add(time.Duration(time.Second * seconds))
|
||||
}
|
||||
|
||||
if u.Type == "offline" && uint64(time.Now().Unix())-src > 6 {
|
||||
@ -75,3 +128,12 @@ func (u *Rule) Snapshot(server *Server) interface{} {
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (rule Rule) IsTransferDurationRule() bool {
|
||||
return strings.HasSuffix(rule.Type, "_cycle")
|
||||
}
|
||||
|
||||
func (rule Rule) GetTransferDurationStart() time.Time {
|
||||
interval := 3600 * int64(rule.CycleInterval)
|
||||
return time.Unix(rule.CycleStart.Unix()+(time.Now().Unix()-rule.CycleStart.Unix())/interval*interval, 0)
|
||||
}
|
||||
|
@ -23,6 +23,9 @@ type Server struct {
|
||||
|
||||
TaskClose chan error `gorm:"-" json:"-"`
|
||||
TaskStream pb.NezhaService_RequestTaskServer `gorm:"-" json:"-"`
|
||||
|
||||
PrevHourlyTransferIn int64 `gorm:"-" json:"-"` // 上次数据点时的入站使用量
|
||||
PrevHourlyTransferOut int64 `gorm:"-" json:"-"` // 上次数据点时的出站使用量
|
||||
}
|
||||
|
||||
func (s Server) Marshal() template.JS {
|
||||
|
8
model/transfer.go
Normal file
8
model/transfer.go
Normal file
@ -0,0 +1,8 @@
|
||||
package model
|
||||
|
||||
type Transfer struct {
|
||||
Common
|
||||
ServerID uint64
|
||||
In uint64
|
||||
Out uint64
|
||||
}
|
@ -109,7 +109,7 @@ func checkStatus() {
|
||||
for _, server := range ServerList {
|
||||
// 监测点
|
||||
alertsStore[alert.ID][server.ID] = append(alertsStore[alert.
|
||||
ID][server.ID], alert.Snapshot(server))
|
||||
ID][server.ID], alert.Snapshot(server, DB))
|
||||
// 发送通知,分为触发报警和恢复通知
|
||||
max, passed := alert.Check(alertsStore[alert.ID][server.ID])
|
||||
if !passed {
|
||||
|
@ -13,7 +13,7 @@ import (
|
||||
pb "github.com/naiba/nezha/proto"
|
||||
)
|
||||
|
||||
var Version = "v0.8.6" // !!记得修改 README 中的 badge 版本!!
|
||||
var Version = "v0.9.0" // !!记得修改 README 中的 badge 版本!!
|
||||
|
||||
var (
|
||||
Conf *model.Config
|
||||
|
@ -12,7 +12,7 @@ import (
|
||||
pb "github.com/naiba/nezha/proto"
|
||||
)
|
||||
|
||||
const _CurrentStatusSize = 30 // 统计 5 分钟内的数据为当前状态
|
||||
const _CurrentStatusSize = 30 // 统计 15 分钟内的数据为当前状态
|
||||
|
||||
var ServiceSentinelShared *ServiceSentinel
|
||||
|
||||
|
@ -73,7 +73,21 @@ func (s *NezhaHandler) ReportSystemState(c context.Context, r *pb.State) (*pb.Re
|
||||
dao.ServerLock.RLock()
|
||||
defer dao.ServerLock.RUnlock()
|
||||
dao.ServerList[clientID].LastActive = time.Now()
|
||||
|
||||
// 判断是否是机器重启,如果是机器重启要录入最后记录的流量里面
|
||||
if state.Uptime < dao.ServerList[clientID].State.Uptime {
|
||||
dao.ServerList[clientID].PrevHourlyTransferIn = dao.ServerList[clientID].PrevHourlyTransferIn - int64(dao.ServerList[clientID].State.NetInTransfer)
|
||||
dao.ServerList[clientID].PrevHourlyTransferOut = dao.ServerList[clientID].PrevHourlyTransferOut - int64(dao.ServerList[clientID].State.NetOutTransfer)
|
||||
}
|
||||
|
||||
dao.ServerList[clientID].State = &state
|
||||
|
||||
// 如果从未记录过,先打点,等到小时时间点时入库
|
||||
if dao.ServerList[clientID].PrevHourlyTransferIn == 0 || dao.ServerList[clientID].PrevHourlyTransferOut == 0 {
|
||||
dao.ServerList[clientID].PrevHourlyTransferIn = int64(state.NetInTransfer)
|
||||
dao.ServerList[clientID].PrevHourlyTransferOut = int64(state.NetOutTransfer)
|
||||
}
|
||||
|
||||
return &pb.Receipt{Proced: true}, nil
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user