️ v0.7.0 优化 Dashboard 内存占用

This commit is contained in:
naiba 2021-05-10 17:30:18 +08:00
parent fb686b2f57
commit 842233976b
5 changed files with 93 additions and 81 deletions

View File

@ -1,7 +1,7 @@
<div align="center" style="background-color: white"> <div align="center" style="background-color: white">
<img width="500" style="max-width:100%" src="https://raw.githubusercontent.com/naiba/nezha/master/resource/static/brand.png" title="哪吒监控"> <img width="500" style="max-width:100%" src="https://raw.githubusercontent.com/naiba/nezha/master/resource/static/brand.png" title="哪吒监控">
<br><br> <br><br>
<img src="https://img.shields.io/github/workflow/status/naiba/nezha/Dashboard%20image?label=Dash%20v0.6.7&logo=github&style=for-the-badge">&nbsp;<img src="https://img.shields.io/github/v/release/naiba/nezha?color=brightgreen&label=Agent&style=for-the-badge&logo=github">&nbsp;<img src="https://img.shields.io/github/workflow/status/naiba/nezha/Agent%20release?label=Agent%20CI&logo=github&style=for-the-badge">&nbsp;<img src="https://img.shields.io/badge/Installer-v0.5.0-brightgreen?style=for-the-badge&logo=linux"> <img src="https://img.shields.io/github/workflow/status/naiba/nezha/Dashboard%20image?label=Dash%20v0.7.0&logo=github&style=for-the-badge">&nbsp;<img src="https://img.shields.io/github/v/release/naiba/nezha?color=brightgreen&label=Agent&style=for-the-badge&logo=github">&nbsp;<img src="https://img.shields.io/github/workflow/status/naiba/nezha/Agent%20release?label=Agent%20CI&logo=github&style=for-the-badge">&nbsp;<img src="https://img.shields.io/badge/Installer-v0.5.0-brightgreen?style=for-the-badge&logo=linux">
<br> <br>
<p>:trollface: 哪吒监控 一站式轻监控轻运维系统。支持系统状态、HTTP(SSL 证书变更、即将到期、到期)、TCP、Ping 监控报警,命令批量执行和计划任务。</p> <p>:trollface: 哪吒监控 一站式轻监控轻运维系统。支持系统状态、HTTP(SSL 证书变更、即将到期、到期)、TCP、Ping 监控报警,命令批量执行和计划任务。</p>
</div> </div>
@ -18,7 +18,7 @@
## 安装脚本 ## 安装脚本
**推荐配置:** 安装前解析 _两个域名_ 到面板服务器,一个作为 _公开访问_ ,可以 **接入 CDN**,比如 (status.nai.ba);另外一个作为安装 Agent 时连接 Dashboard 使用,**不能接入 CDN** 直接暴露面板主机 IP比如randomdashboard.nai.ba **推荐配置:** 安装前准备 _两个域名_,一个可以 **接入 CDN** 作为 _公开访问_,比如 (status.nai.ba);另外一个解析到面板服务器作为 Agent 连接 Dashboard 使用,**不能接入 CDN** 直接暴露面板主机 IP比如randomdashboard.nai.ba
```shell ```shell
curl -L https://raw.githubusercontent.com/naiba/nezha/master/script/install.sh -o nezha.sh && chmod +x nezha.sh curl -L https://raw.githubusercontent.com/naiba/nezha/master/script/install.sh -o nezha.sh && chmod +x nezha.sh
@ -59,11 +59,13 @@ URL 里面也可放置占位符,请求时会进行简单的字符串替换。
1. 添加通知方式 1. 添加通知方式
- server 酱示例 - server 酱示例
- 名称server 酱 - 名称server 酱
- URLhttps://sc.ftqq.com/SCUrandomkeys.send?text=#NEZHA# - URLhttps://sc.ftqq.com/SCUrandomkeys.send?text=#NEZHA#
- 请求方式: GET - 请求方式: GET
- 请求类型: 默认 - 请求类型: 默认
- Body: 空 - Body: 空
- wxpusher 示例,需要关注你的应用 - wxpusher 示例,需要关注你的应用
- 名称: wxpusher - 名称: wxpusher
@ -73,6 +75,7 @@ URL 里面也可放置占位符,请求时会进行简单的字符串替换。
- Body: `{"appToken":"你的appToken","topicIds":[],"content":"#NEZHA#","contentType":"1","uids":["你的uid"]}` - Body: `{"appToken":"你的appToken","topicIds":[],"content":"#NEZHA#","contentType":"1","uids":["你的uid"]}`
- telegram 示例 [@haitau](https://github.com/haitau) 贡献 - telegram 示例 [@haitau](https://github.com/haitau) 贡献
- 名称telegram 机器人消息通知 - 名称telegram 机器人消息通知
- URLhttps://api.telegram.org/botXXXXXX/sendMessage?chat_id=YYYYYY&text=#NEZHA# - URLhttps://api.telegram.org/botXXXXXX/sendMessage?chat_id=YYYYYY&text=#NEZHA#
- 请求方式: GET - 请求方式: GET
@ -122,7 +125,7 @@ URL 里面也可放置占位符,请求时会进行简单的字符串替换。
</style> </style>
``` ```
- DayNight 主题更改进度条颜色示例(来自 [@hyt-allen-xu](https://github.com/hyt-allen-xu),欢迎 PR - DayNight 主题更改进度条颜色示例(来自 [@hyt-allen-xu](https://github.com/hyt-allen-xu)
``` ```
<style> <style>
@ -132,7 +135,7 @@ URL 里面也可放置占位符,请求时会进行简单的字符串替换。
</style> </style>
``` ```
- 默认主题修改 LOGO、移除版权示例来自 [@iLay1678](https://github.com/iLay1678),欢迎 PR - 默认主题修改 LOGO、移除版权示例来自 [@iLay1678](https://github.com/iLay1678)
``` ```
<style> <style>
@ -157,7 +160,8 @@ URL 里面也可放置占位符,请求时会进行简单的字符串替换。
} }
</script> </script>
``` ```
- DayNight 移除版权示例(来自 [@hyt-allen-xu](https://github.com/hyt-allen-xu),欢迎 PR
- DayNight 移除版权示例(来自 [@hyt-allen-xu](https://github.com/hyt-allen-xu)
``` ```
<script> <script>

View File

@ -80,7 +80,7 @@ func main() {
var debug bool var debug bool
flag.String("i", "", "unused 旧Agent配置兼容") flag.String("i", "", "unused 旧Agent配置兼容")
flag.BoolVar(&debug, "d", false, "允许不安全连接") flag.BoolVar(&debug, "d", false, "开启调试信息")
flag.StringVar(&server, "s", "localhost:5555", "管理面板RPC端口") flag.StringVar(&server, "s", "localhost:5555", "管理面板RPC端口")
flag.StringVar(&clientSecret, "p", "", "Agent连接Secret") flag.StringVar(&clientSecret, "p", "", "Agent连接Secret")
flag.Parse() flag.Parse()

View File

@ -15,7 +15,7 @@ Type=simple
User=root User=root
Group=root Group=root
WorkingDirectory=/opt/nezha/agent/ WorkingDirectory=/opt/nezha/agent/
ExecStart=/opt/nezha/agent/nezha-agent -d -s nz_grpc_host:nz_grpc_port -p nz_client_secret ExecStart=/opt/nezha/agent/nezha-agent -s nz_grpc_host:nz_grpc_port -p nz_client_secret
Restart=always Restart=always
#Environment=DEBUG=true #Environment=DEBUG=true

View File

@ -13,7 +13,7 @@ import (
pb "github.com/naiba/nezha/proto" pb "github.com/naiba/nezha/proto"
) )
var Version = "v0.6.7" // !!记得修改 README 中的 badge 版本!! var Version = "v0.7.0" // !!记得修改 README 中的 badge 版本!!
const ( const (
SnapshotDelay = 3 SnapshotDelay = 3

View File

@ -3,6 +3,7 @@ package dao
import ( import (
"fmt" "fmt"
"log" "log"
"sort"
"strings" "strings"
"sync" "sync"
"time" "time"
@ -15,20 +16,29 @@ const _CurrentStatusSize = 30 // 统计 5 分钟内的数据为当前状态
var ServiceSentinelShared *ServiceSentinel var ServiceSentinelShared *ServiceSentinel
type ReportData struct {
Data *pb.TaskResult
Reporter uint64
}
type _TodayStatsOfMonitor struct {
Up int
Down int
Delay float32
}
func NewServiceSentinel() { func NewServiceSentinel() {
ServiceSentinelShared = &ServiceSentinel{ ServiceSentinelShared = &ServiceSentinel{
serviceResponseChannel: make(chan ReportData, 200), serviceReportChannel: make(chan ReportData, 200),
serviceResponseDataStoreTodaySavedIndex: make(map[uint64]int), serviceStatusToday: make(map[uint64]*_TodayStatsOfMonitor),
serviceCurrentStatusIndex: make(map[uint64]int), serviceCurrentStatusIndex: make(map[uint64]int),
serviceCurrentStatusData: make(map[uint64][]model.MonitorHistory), serviceCurrentStatusData: make(map[uint64][]model.MonitorHistory),
serviceResponseDataStoreTodayLastSave: make(map[uint64]time.Time), latestDate: make(map[uint64]string),
latestDate: make(map[uint64]string), lastStatus: make(map[uint64]string),
lastStatus: make(map[uint64]string), serviceResponseDataStoreCurrentUp: make(map[uint64]uint64),
serviceResponseDataStoreCurrentUp: make(map[uint64]uint64), serviceResponseDataStoreCurrentDown: make(map[uint64]uint64),
serviceResponseDataStoreCurrentDown: make(map[uint64]uint64), monitors: make(map[uint64]model.Monitor),
monitors: make(map[uint64]model.Monitor), sslCertCache: make(map[uint64]string),
serviceResponseDataStoreToday: make(map[uint64][]model.MonitorHistory),
sslCertCache: make(map[uint64]string),
} }
ServiceSentinelShared.OnMonitorUpdate() ServiceSentinelShared.OnMonitorUpdate()
@ -38,48 +48,48 @@ func NewServiceSentinel() {
DB.Where("created_at >= ?", today).Find(&mhs) DB.Where("created_at >= ?", today).Find(&mhs)
// 加载当日记录 // 加载当日记录
totalDelay := make(map[uint64]float32)
for i := 0; i < len(mhs); i++ { for i := 0; i < len(mhs); i++ {
ServiceSentinelShared.serviceResponseDataStoreToday[mhs[i].MonitorID] = if mhs[i].Successful {
append(ServiceSentinelShared.serviceResponseDataStoreToday[mhs[i].MonitorID], mhs[i]) ServiceSentinelShared.serviceStatusToday[mhs[i].MonitorID].Up++
totalDelay[mhs[i].MonitorID] += mhs[i].Delay
} else {
ServiceSentinelShared.serviceStatusToday[mhs[i].MonitorID].Down++
}
}
for id, delay := range totalDelay {
ServiceSentinelShared.serviceStatusToday[id].Delay = delay / float32(ServiceSentinelShared.serviceStatusToday[id].Up)
} }
// 更新入库时间及当日数据入库游标 // 更新入库时间及当日数据入库游标
for k := range ServiceSentinelShared.monitors { for k := range ServiceSentinelShared.monitors {
ServiceSentinelShared.latestDate[k] = time.Now().Format("02-Jan-06") ServiceSentinelShared.latestDate[k] = time.Now().Format("02-Jan-06")
ServiceSentinelShared.serviceResponseDataStoreTodaySavedIndex[k] = len(ServiceSentinelShared.serviceResponseDataStoreToday[k])
} }
go ServiceSentinelShared.worker() go ServiceSentinelShared.worker()
} }
type ReportData struct {
Data *pb.TaskResult
Reporter uint64
}
/* /*
使用缓存 channel处理上报的 Service 请求结果然后判断是否需要报警 使用缓存 channel处理上报的 Service 请求结果然后判断是否需要报警
需要记录上一次的状态信息 需要记录上一次的状态信息
*/ */
type ServiceSentinel struct { type ServiceSentinel struct {
serviceResponseDataStoreLock sync.RWMutex serviceResponseDataStoreLock sync.RWMutex
monitorsLock sync.RWMutex monitorsLock sync.RWMutex
serviceResponseChannel chan ReportData serviceReportChannel chan ReportData
serviceResponseDataStoreTodaySavedIndex map[uint64]int serviceStatusToday map[uint64]*_TodayStatsOfMonitor
serviceCurrentStatusIndex map[uint64]int serviceCurrentStatusIndex map[uint64]int
serviceCurrentStatusData map[uint64][]model.MonitorHistory serviceCurrentStatusData map[uint64][]model.MonitorHistory
serviceResponseDataStoreTodayLastSave map[uint64]time.Time latestDate map[uint64]string
latestDate map[uint64]string lastStatus map[uint64]string
lastStatus map[uint64]string serviceResponseDataStoreCurrentUp map[uint64]uint64
serviceResponseDataStoreCurrentUp map[uint64]uint64 serviceResponseDataStoreCurrentDown map[uint64]uint64
serviceResponseDataStoreCurrentDown map[uint64]uint64 monitors map[uint64]model.Monitor
monitors map[uint64]model.Monitor sslCertCache map[uint64]string
serviceResponseDataStoreToday map[uint64][]model.MonitorHistory
sslCertCache map[uint64]string
} }
func (ss *ServiceSentinel) Dispatch(r ReportData) { func (ss *ServiceSentinel) Dispatch(r ReportData) {
ss.serviceResponseChannel <- r ss.serviceReportChannel <- r
} }
func (ss *ServiceSentinel) Monitors() []model.Monitor { func (ss *ServiceSentinel) Monitors() []model.Monitor {
@ -89,6 +99,9 @@ func (ss *ServiceSentinel) Monitors() []model.Monitor {
for _, v := range ss.monitors { for _, v := range ss.monitors {
monitors = append(monitors, v) monitors = append(monitors, v)
} }
sort.SliceStable(monitors, func(i, j int) bool {
return monitors[i].ID < monitors[j].ID
})
return monitors return monitors
} }
@ -103,21 +116,21 @@ func (ss *ServiceSentinel) OnMonitorUpdate() {
if len(ss.serviceCurrentStatusData[monitors[i].ID]) == 0 { if len(ss.serviceCurrentStatusData[monitors[i].ID]) == 0 {
ss.serviceCurrentStatusData[monitors[i].ID] = make([]model.MonitorHistory, _CurrentStatusSize) ss.serviceCurrentStatusData[monitors[i].ID] = make([]model.MonitorHistory, _CurrentStatusSize)
} }
if ss.serviceStatusToday[monitors[i].ID] == nil {
ss.serviceStatusToday[monitors[i].ID] = &_TodayStatsOfMonitor{}
}
} }
} }
func (ss *ServiceSentinel) OnMonitorDelete(id uint64) { func (ss *ServiceSentinel) OnMonitorDelete(id uint64) {
ss.serviceResponseDataStoreLock.Lock() ss.serviceResponseDataStoreLock.Lock()
defer ss.serviceResponseDataStoreLock.Unlock() defer ss.serviceResponseDataStoreLock.Unlock()
delete(ss.serviceResponseDataStoreTodaySavedIndex, id)
delete(ss.serviceCurrentStatusIndex, id) delete(ss.serviceCurrentStatusIndex, id)
delete(ss.serviceCurrentStatusData, id) delete(ss.serviceCurrentStatusData, id)
delete(ss.serviceResponseDataStoreTodayLastSave, id)
delete(ss.latestDate, id) delete(ss.latestDate, id)
delete(ss.lastStatus, id) delete(ss.lastStatus, id)
delete(ss.serviceResponseDataStoreCurrentUp, id) delete(ss.serviceResponseDataStoreCurrentUp, id)
delete(ss.serviceResponseDataStoreCurrentDown, id) delete(ss.serviceResponseDataStoreCurrentDown, id)
delete(ss.serviceResponseDataStoreToday, id)
delete(ss.sslCertCache, id) delete(ss.sslCertCache, id)
ss.monitorsLock.Lock() ss.monitorsLock.Lock()
defer ss.monitorsLock.Unlock() defer ss.monitorsLock.Unlock()
@ -176,17 +189,12 @@ func (ss *ServiceSentinel) LoadStats() map[uint64]*model.ServiceItemResponse {
} }
} }
msm[k].Monitor = ss.monitors[k] msm[k].Monitor = ss.monitors[k]
v := ss.serviceResponseDataStoreToday[k] v := ss.serviceStatusToday[k]
for i := 0; i < len(v); i++ { msm[k].Up[29] = v.Up
if v[i].Successful { msm[k].Down[29] = v.Down
msm[k].Up[29]++ msm[k].TotalUp += uint64(v.Up)
msm[k].TotalUp++ msm[k].TotalDown += uint64(v.Down)
} else { msm[k].Delay[29] = v.Delay
msm[k].Down[29]++
msm[k].TotalDown++
}
msm[k].Delay[29] = (msm[k].Delay[29]*float32(msm[k].Up[29]) + v[i].Delay) / float32(msm[k].Up[29]+1)
}
} }
// 最后 5 分钟的状态 与 monitor 对象填充 // 最后 5 分钟的状态 与 monitor 对象填充
for k, v := range ss.serviceResponseDataStoreCurrentDown { for k, v := range ss.serviceResponseDataStoreCurrentDown {
@ -212,7 +220,7 @@ func getStateStr(percent uint64) string {
} }
func (ss *ServiceSentinel) worker() { func (ss *ServiceSentinel) worker() {
for r := range ss.serviceResponseChannel { for r := range ss.serviceReportChannel {
if ss.monitors[r.Data.GetId()].ID == 0 { if ss.monitors[r.Data.GetId()].ID == 0 {
continue continue
} }
@ -221,33 +229,33 @@ func (ss *ServiceSentinel) worker() {
// 先查看是否到下一天 // 先查看是否到下一天
nowDate := time.Now().Format("02-Jan-06") nowDate := time.Now().Format("02-Jan-06")
if nowDate != ss.latestDate[mh.MonitorID] { if nowDate != ss.latestDate[mh.MonitorID] {
// 清理前一天数据
ss.latestDate[mh.MonitorID] = nowDate ss.latestDate[mh.MonitorID] = nowDate
dataToSave := ss.serviceResponseDataStoreToday[mh.MonitorID][ss.serviceResponseDataStoreTodaySavedIndex[mh.MonitorID]:]
if err := DB.Create(&dataToSave).Error; err != nil {
log.Println(err)
}
ss.serviceResponseDataStoreTodaySavedIndex[mh.MonitorID] = 0
ss.serviceResponseDataStoreToday[mh.MonitorID] = []model.MonitorHistory{}
ss.serviceResponseDataStoreCurrentDown[mh.MonitorID] = 0
ss.serviceResponseDataStoreCurrentUp[mh.MonitorID] = 0 ss.serviceResponseDataStoreCurrentUp[mh.MonitorID] = 0
ss.serviceResponseDataStoreTodayLastSave[mh.MonitorID] = time.Now() ss.serviceResponseDataStoreCurrentDown[mh.MonitorID] = 0
ss.serviceStatusToday[mh.MonitorID].Delay = 0
ss.serviceStatusToday[mh.MonitorID].Up = 0
ss.serviceStatusToday[mh.MonitorID].Down = 0
} }
// 储存至当日数据 // 写入当天状态
ss.serviceResponseDataStoreToday[mh.MonitorID] = append(ss.serviceResponseDataStoreToday[mh.MonitorID], mh) if mh.Successful {
// 每20分钟入库一次 ss.serviceStatusToday[mh.MonitorID].Delay = (ss.serviceStatusToday[mh.
if time.Now().After(ss.serviceResponseDataStoreTodayLastSave[mh.MonitorID].Add(time.Minute * 20)) { MonitorID].Delay*float32(ss.serviceStatusToday[mh.MonitorID].Up) +
ss.serviceResponseDataStoreTodayLastSave[mh.MonitorID] = time.Now() mh.Delay) / float32(ss.serviceStatusToday[mh.MonitorID].Up+1)
dataToSave := ss.serviceResponseDataStoreToday[mh.MonitorID][ss.serviceResponseDataStoreTodaySavedIndex[mh.MonitorID]:] ss.serviceStatusToday[mh.MonitorID].Up++
if err := DB.Create(&dataToSave).Error; err != nil { } else {
log.Println(err) ss.serviceStatusToday[mh.MonitorID].Down++
}
ss.serviceResponseDataStoreTodaySavedIndex[mh.MonitorID] = len(ss.serviceResponseDataStoreToday[mh.MonitorID])
} }
// 写入当前数据 // 写入当前数据
ss.serviceCurrentStatusData[mh.MonitorID][ss.serviceCurrentStatusIndex[mh.MonitorID]] = mh ss.serviceCurrentStatusData[mh.MonitorID][ss.serviceCurrentStatusIndex[mh.MonitorID]] = mh
ss.serviceCurrentStatusIndex[mh.MonitorID]++ ss.serviceCurrentStatusIndex[mh.MonitorID]++
// 数据持久化
if ss.serviceCurrentStatusIndex[mh.MonitorID] == _CurrentStatusSize { if ss.serviceCurrentStatusIndex[mh.MonitorID] == _CurrentStatusSize {
ss.serviceCurrentStatusIndex[mh.MonitorID] = 0 ss.serviceCurrentStatusIndex[mh.MonitorID] = 0
dataToSave := ss.serviceCurrentStatusData[mh.MonitorID]
if err := DB.Create(&dataToSave).Error; err != nil {
log.Println(err)
}
} }
// 更新当前状态 // 更新当前状态
ss.serviceResponseDataStoreCurrentUp[mh.MonitorID] = 0 ss.serviceResponseDataStoreCurrentUp[mh.MonitorID] = 0
@ -267,13 +275,13 @@ func (ss *ServiceSentinel) worker() {
} }
stateStr := getStateStr(upPercent) stateStr := getStateStr(upPercent)
if Conf.Debug { if Conf.Debug {
log.Println(ss.monitors[mh.MonitorID].Target, stateStr, "Reporter:", r.Reporter, "Successful:", mh.Successful, "Data:", mh.Data) log.Println(ss.monitors[mh.MonitorID].Target, stateStr, "Agent:", r.Reporter, "Successful:", mh.Successful, "Response:", mh.Data)
} }
if stateStr == "故障" || stateStr != ss.lastStatus[mh.MonitorID] { if stateStr == "故障" || stateStr != ss.lastStatus[mh.MonitorID] {
ss.monitorsLock.RLock() ss.monitorsLock.RLock()
isSendNotification := (ss.lastStatus[mh.MonitorID] != "" || stateStr == "故障") && ss.monitors[mh.MonitorID].Notify isNeedSendNotification := (ss.lastStatus[mh.MonitorID] != "" || stateStr == "故障") && ss.monitors[mh.MonitorID].Notify
ss.lastStatus[mh.MonitorID] = stateStr ss.lastStatus[mh.MonitorID] = stateStr
if isSendNotification { if isNeedSendNotification {
go SendNotification(fmt.Sprintf("服务监控:%s 服务状态:%s", ss.monitors[mh.MonitorID].Name, stateStr), true) go SendNotification(fmt.Sprintf("服务监控:%s 服务状态:%s", ss.monitors[mh.MonitorID].Name, stateStr), true)
} }
ss.monitorsLock.RUnlock() ss.monitorsLock.RUnlock()