mirror of
https://github.com/nezhahq/nezha.git
synced 2025-01-22 12:48:14 -05:00
✨ dashboard: 服务监控请求时间间隔
This commit is contained in:
parent
0ea21598e8
commit
446ab3b1b8
@ -4,7 +4,7 @@
|
||||
<br>
|
||||
<small><i>LOGO designed by <a href="https://xio.ng" target="_blank">熊大</a> .</i></small>
|
||||
<br><br>
|
||||
<img src="https://img.shields.io/github/workflow/status/naiba/nezha/Dashboard%20image?label=Dash%20v0.9.32&logo=github&style=for-the-badge"> <img src="https://img.shields.io/github/v/release/naiba/nezha?color=brightgreen&label=Agent&style=for-the-badge&logo=github"> <img src="https://img.shields.io/github/workflow/status/naiba/nezha/Agent%20release?label=Agent%20CI&logo=github&style=for-the-badge"> <img src="https://img.shields.io/badge/Installer-v0.7.0-brightgreen?style=for-the-badge&logo=linux">
|
||||
<img src="https://img.shields.io/github/workflow/status/naiba/nezha/Dashboard%20image?label=Dash%20v0.9.33&logo=github&style=for-the-badge"> <img src="https://img.shields.io/github/v/release/naiba/nezha?color=brightgreen&label=Agent&style=for-the-badge&logo=github"> <img src="https://img.shields.io/github/workflow/status/naiba/nezha/Agent%20release?label=Agent%20CI&logo=github&style=for-the-badge"> <img src="https://img.shields.io/badge/Installer-v0.7.0-brightgreen?style=for-the-badge&logo=linux">
|
||||
<br>
|
||||
<br>
|
||||
<p>:trollface: <b>哪吒监控</b> 一站式轻监控轻运维系统。支持系统状态、HTTP(SSL 证书变更、即将到期、到期)、TCP、Ping 监控报警,命令批量执行和计划任务。</p>
|
||||
|
@ -204,6 +204,7 @@ type monitorForm struct {
|
||||
Cover uint8
|
||||
Notify string
|
||||
SkipServersRaw string
|
||||
Duration uint64
|
||||
}
|
||||
|
||||
func (ma *memberAPI) addOrEditMonitor(c *gin.Context) {
|
||||
@ -218,6 +219,7 @@ func (ma *memberAPI) addOrEditMonitor(c *gin.Context) {
|
||||
m.SkipServersRaw = mf.SkipServersRaw
|
||||
m.Cover = mf.Cover
|
||||
m.Notify = mf.Notify == "on"
|
||||
m.Duration = mf.Duration
|
||||
}
|
||||
if err == nil {
|
||||
if m.ID == 0 {
|
||||
@ -226,14 +228,15 @@ func (ma *memberAPI) addOrEditMonitor(c *gin.Context) {
|
||||
err = dao.DB.Save(&m).Error
|
||||
}
|
||||
}
|
||||
if err == nil {
|
||||
err = dao.ServiceSentinelShared.OnMonitorUpdate(m)
|
||||
}
|
||||
if err != nil {
|
||||
c.JSON(http.StatusOK, model.Response{
|
||||
Code: http.StatusBadRequest,
|
||||
Message: fmt.Sprintf("请求错误:%s", err),
|
||||
})
|
||||
return
|
||||
} else {
|
||||
dao.ServiceSentinelShared.OnMonitorUpdate()
|
||||
}
|
||||
c.JSON(http.StatusOK, model.Response{
|
||||
Code: http.StatusOK,
|
||||
|
@ -17,7 +17,11 @@ import (
|
||||
"github.com/naiba/nezha/service/dao"
|
||||
)
|
||||
|
||||
var serviceSentinelDispatchBus chan model.Monitor
|
||||
|
||||
func init() {
|
||||
serviceSentinelDispatchBus = make(chan model.Monitor)
|
||||
|
||||
shanghai, err := time.LoadLocation("Asia/Shanghai")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
@ -55,7 +59,7 @@ func initSystem() {
|
||||
dao.DB.AutoMigrate(model.Server{}, model.User{},
|
||||
model.Notification{}, model.AlertRule{}, model.Monitor{},
|
||||
model.MonitorHistory{}, model.Cron{}, model.Transfer{})
|
||||
dao.NewServiceSentinel()
|
||||
dao.NewServiceSentinel(serviceSentinelDispatchBus)
|
||||
|
||||
loadServers() //加载服务器列表
|
||||
loadCrons() //加载计划任务
|
||||
@ -65,6 +69,7 @@ func initSystem() {
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
// 流量记录打点
|
||||
_, err = dao.Cron.AddFunc("0 * * * *", recordTransferHourlyUsage)
|
||||
if err != nil {
|
||||
@ -173,7 +178,7 @@ func loadCrons() {
|
||||
func main() {
|
||||
cleanMonitorHistory()
|
||||
go rpc.ServeRPC(dao.Conf.GRPCPort)
|
||||
go rpc.DispatchTask(time.Second * 30)
|
||||
go rpc.DispatchTask(serviceSentinelDispatchBus)
|
||||
go dao.AlertSentinelStart()
|
||||
srv := controller.ServeWeb(dao.Conf.HTTPPort)
|
||||
graceful.Graceful(func() error {
|
||||
|
@ -3,7 +3,6 @@ package rpc
|
||||
import (
|
||||
"fmt"
|
||||
"net"
|
||||
"time"
|
||||
|
||||
"google.golang.org/grpc"
|
||||
|
||||
@ -25,41 +24,36 @@ func ServeRPC(port uint) {
|
||||
server.Serve(listen)
|
||||
}
|
||||
|
||||
func DispatchTask(duration time.Duration) {
|
||||
var index uint64 = 0
|
||||
for {
|
||||
var hasAliveAgent bool
|
||||
tasks := dao.ServiceSentinelShared.Monitors()
|
||||
func DispatchTask(serviceSentinelDispatchBus <-chan model.Monitor) {
|
||||
workedServerIndex := 0
|
||||
for task := range serviceSentinelDispatchBus {
|
||||
round := 0
|
||||
prevIndex := workedServerIndex
|
||||
dao.SortedServerLock.RLock()
|
||||
startedAt := time.Now()
|
||||
for i := 0; i < len(tasks); i++ {
|
||||
if index >= uint64(len(dao.SortedServerList)) {
|
||||
index = 0
|
||||
if !hasAliveAgent {
|
||||
break
|
||||
}
|
||||
hasAliveAgent = false
|
||||
}
|
||||
|
||||
// 1. 如果服务器不在线,跳过这个服务器
|
||||
if dao.SortedServerList[index].TaskStream == nil {
|
||||
i--
|
||||
index++
|
||||
// 如果已经轮了一整圈没有合适机器去请求,跳出循环
|
||||
for round == 0 && prevIndex != workedServerIndex {
|
||||
// 如果到了圈尾,再回到圈头,圈数加一,游标重置
|
||||
if workedServerIndex == len(dao.SortedServerList) {
|
||||
workedServerIndex = 0
|
||||
round++
|
||||
continue
|
||||
}
|
||||
// 2. 如果此任务不可使用此服务器请求,跳过这个服务器(有些 IPv6 only 开了 NAT64 的机器请求 IPv4 总会出问题)
|
||||
if (tasks[i].Cover == model.MonitorCoverAll && tasks[i].SkipServers[dao.SortedServerList[index].ID]) ||
|
||||
(tasks[i].Cover == model.MonitorCoverIgnoreAll && !tasks[i].SkipServers[dao.SortedServerList[index].ID]) {
|
||||
i--
|
||||
index++
|
||||
// 如果服务器不在线,跳过这个服务器
|
||||
if dao.SortedServerList[workedServerIndex].TaskStream == nil {
|
||||
workedServerIndex++
|
||||
continue
|
||||
}
|
||||
|
||||
hasAliveAgent = true
|
||||
dao.SortedServerList[index].TaskStream.Send(tasks[i].PB())
|
||||
index++
|
||||
// 如果此任务不可使用此服务器请求,跳过这个服务器(有些 IPv6 only 开了 NAT64 的机器请求 IPv4 总会出问题)
|
||||
if (task.Cover == model.MonitorCoverAll && task.SkipServers[dao.SortedServerList[workedServerIndex].ID]) ||
|
||||
(task.Cover == model.MonitorCoverIgnoreAll && !task.SkipServers[dao.SortedServerList[workedServerIndex].ID]) {
|
||||
workedServerIndex++
|
||||
continue
|
||||
}
|
||||
// 找到合适机器执行任务,跳出循环
|
||||
dao.SortedServerList[workedServerIndex].TaskStream.Send(task.PB())
|
||||
workedServerIndex++
|
||||
break
|
||||
}
|
||||
dao.SortedServerLock.RUnlock()
|
||||
time.Sleep(time.Until(startedAt.Add(duration)))
|
||||
}
|
||||
}
|
||||
|
@ -1,7 +1,7 @@
|
||||
package model
|
||||
|
||||
type ServiceItemResponse struct {
|
||||
Monitor Monitor
|
||||
Monitor *Monitor
|
||||
TotalUp uint64
|
||||
TotalDown uint64
|
||||
CurrentUp uint64
|
||||
|
@ -2,8 +2,10 @@ package model
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
|
||||
pb "github.com/naiba/nezha/proto"
|
||||
"github.com/robfig/cron/v3"
|
||||
"gorm.io/gorm"
|
||||
)
|
||||
|
||||
@ -36,9 +38,12 @@ type Monitor struct {
|
||||
Type uint8
|
||||
Target string
|
||||
SkipServersRaw string
|
||||
Duration uint64
|
||||
Notify bool
|
||||
Cover uint8
|
||||
SkipServers map[uint64]bool `gorm:"-" json:"-"`
|
||||
|
||||
SkipServers map[uint64]bool `gorm:"-" json:"-"`
|
||||
CronJobID cron.EntryID `gorm:"-" json:"-"`
|
||||
}
|
||||
|
||||
func (m *Monitor) PB() *pb.Task {
|
||||
@ -49,6 +54,14 @@ func (m *Monitor) PB() *pb.Task {
|
||||
}
|
||||
}
|
||||
|
||||
func (m *Monitor) CronSpec() string {
|
||||
if m.Duration == 0 {
|
||||
// 默认间隔 30 秒
|
||||
m.Duration = 30
|
||||
}
|
||||
return fmt.Sprintf("@every %ds", m.Duration)
|
||||
}
|
||||
|
||||
func (m *Monitor) AfterFind(tx *gorm.DB) error {
|
||||
var skipServers []uint64
|
||||
if err := json.Unmarshal([]byte(m.SkipServersRaw), &skipServers); err != nil {
|
||||
|
@ -55,7 +55,8 @@ function showFormModal(modelSelector, formID, URL, getData) {
|
||||
item.name === "RequestMethod" ||
|
||||
item.name === "DisplayIndex" ||
|
||||
item.name === "Type" ||
|
||||
item.name === "Cover"
|
||||
item.name === "Cover" ||
|
||||
item.name === "Duration"
|
||||
) {
|
||||
obj[item.name] = parseInt(item.value);
|
||||
} else {
|
||||
@ -218,6 +219,7 @@ function addOrEditMonitor(monitor) {
|
||||
modal.find("input[name=ID]").val(monitor ? monitor.ID : null);
|
||||
modal.find("input[name=Name]").val(monitor ? monitor.Name : null);
|
||||
modal.find("input[name=Target]").val(monitor ? monitor.Target : null);
|
||||
modal.find("input[name=Duration]").val(monitor && monitor.Duration ? monitor.Duration : 30);
|
||||
modal.find("select[name=Type]").val(monitor ? monitor.Type : 1);
|
||||
modal.find("select[name=Cover]").val(monitor ? monitor.Cover : 0);
|
||||
if (monitor && monitor.Notify) {
|
||||
|
2
resource/template/common/footer.html
vendored
2
resource/template/common/footer.html
vendored
@ -9,7 +9,7 @@
|
||||
<script src="https://cdn.jsdelivr.net/npm/semantic-ui@2.4.1/dist/semantic.min.js"></script>
|
||||
<script src="/static/semantic-ui-alerts.min.js"></script>
|
||||
<script src="https://cdn.jsdelivr.net/npm/vue@2.6.12/dist/vue.min.js"></script>
|
||||
<script src="/static/main.js?v20210819"></script>
|
||||
<script src="/static/main.js?v20210902"></script>
|
||||
</body>
|
||||
|
||||
</html>
|
||||
|
4
resource/template/component/monitor.html
vendored
4
resource/template/component/monitor.html
vendored
@ -24,6 +24,10 @@
|
||||
<option value="3">TCP-Ping</option>
|
||||
</select>
|
||||
</div>
|
||||
<div class="field">
|
||||
<label>请求间隔</label>
|
||||
<input type="number" name="Duration" placeholder="秒" />
|
||||
</div>
|
||||
<div class="field">
|
||||
<label>覆盖范围</label>
|
||||
<select name="Cover" class="ui fluid dropdown">
|
||||
|
@ -18,6 +18,7 @@
|
||||
<th>覆盖范围</th>
|
||||
<th>特定服务器</th>
|
||||
<th>类型</th>
|
||||
<th>请求间隔</th>
|
||||
<th>通知</th>
|
||||
<th>管理</th>
|
||||
</tr>
|
||||
@ -34,6 +35,7 @@
|
||||
{{if eq $monitor.Type 1}}HTTP(S)/SSL证书 {{else if eq $monitor.Type
|
||||
2}} ICMP Ping {{else}} TCP 端口 {{end}}
|
||||
</td>
|
||||
<td>{{$monitor.Duration}}秒</td>
|
||||
<td>{{$monitor.Notify}}</td>
|
||||
<td>
|
||||
<div class="ui mini icon buttons">
|
||||
|
@ -13,7 +13,7 @@ import (
|
||||
pb "github.com/naiba/nezha/proto"
|
||||
)
|
||||
|
||||
var Version = "v0.9.32" // !!记得修改 README 中的 badge 版本!!
|
||||
var Version = "v0.9.33" // !!记得修改 README 中的 badge 版本!!
|
||||
|
||||
var (
|
||||
Conf *model.Config
|
||||
|
@ -10,6 +10,7 @@ import (
|
||||
|
||||
"github.com/naiba/nezha/model"
|
||||
pb "github.com/naiba/nezha/proto"
|
||||
"github.com/robfig/cron/v3"
|
||||
)
|
||||
|
||||
const _CurrentStatusSize = 30 // 统计 15 分钟内的数据为当前状态
|
||||
@ -27,7 +28,7 @@ type _TodayStatsOfMonitor struct {
|
||||
Delay float32
|
||||
}
|
||||
|
||||
func NewServiceSentinel() {
|
||||
func NewServiceSentinel(serviceSentinelDispatchBus chan<- model.Monitor) {
|
||||
ServiceSentinelShared = &ServiceSentinel{
|
||||
serviceReportChannel: make(chan ReportData, 200),
|
||||
serviceStatusToday: make(map[uint64]*_TodayStatsOfMonitor),
|
||||
@ -37,12 +38,15 @@ func NewServiceSentinel() {
|
||||
lastStatus: make(map[uint64]string),
|
||||
serviceResponseDataStoreCurrentUp: make(map[uint64]uint64),
|
||||
serviceResponseDataStoreCurrentDown: make(map[uint64]uint64),
|
||||
monitors: make(map[uint64]model.Monitor),
|
||||
monitors: make(map[uint64]*model.Monitor),
|
||||
sslCertCache: make(map[uint64]string),
|
||||
// 30天数据缓存
|
||||
monthlyStatus: make(map[uint64]*model.ServiceItemResponse),
|
||||
dispatchCron: cron.New(cron.WithSeconds()),
|
||||
dispatchBus: serviceSentinelDispatchBus,
|
||||
}
|
||||
ServiceSentinelShared.OnMonitorUpdate()
|
||||
ServiceSentinelShared.loadMonitorHistory()
|
||||
ServiceSentinelShared.dispatchCron.Start()
|
||||
|
||||
year, month, day := time.Now().Date()
|
||||
today := time.Date(year, month, day, 0, 0, 0, 0, time.Local)
|
||||
@ -92,11 +96,14 @@ type ServiceSentinel struct {
|
||||
lastStatus map[uint64]string
|
||||
serviceResponseDataStoreCurrentUp map[uint64]uint64
|
||||
serviceResponseDataStoreCurrentDown map[uint64]uint64
|
||||
monitors map[uint64]model.Monitor
|
||||
monitors map[uint64]*model.Monitor
|
||||
sslCertCache map[uint64]string
|
||||
// 30天数据缓存
|
||||
monthlyStatusLock sync.Mutex
|
||||
monthlyStatus map[uint64]*model.ServiceItemResponse
|
||||
// 服务监控调度计划任务
|
||||
dispatchCron *cron.Cron
|
||||
dispatchBus chan<- model.Monitor
|
||||
}
|
||||
|
||||
func (ss *ServiceSentinel) refreshMonthlyServiceStatus() {
|
||||
@ -118,10 +125,10 @@ func (ss *ServiceSentinel) Dispatch(r ReportData) {
|
||||
ss.serviceReportChannel <- r
|
||||
}
|
||||
|
||||
func (ss *ServiceSentinel) Monitors() []model.Monitor {
|
||||
func (ss *ServiceSentinel) Monitors() []*model.Monitor {
|
||||
ss.monitorsLock.RLock()
|
||||
defer ss.monitorsLock.RUnlock()
|
||||
var monitors []model.Monitor
|
||||
var monitors []*model.Monitor
|
||||
for _, v := range ss.monitors {
|
||||
monitors = append(monitors, v)
|
||||
}
|
||||
@ -131,14 +138,21 @@ func (ss *ServiceSentinel) Monitors() []model.Monitor {
|
||||
return monitors
|
||||
}
|
||||
|
||||
func (ss *ServiceSentinel) OnMonitorUpdate() {
|
||||
var monitors []model.Monitor
|
||||
func (ss *ServiceSentinel) loadMonitorHistory() {
|
||||
var monitors []*model.Monitor
|
||||
DB.Find(&monitors)
|
||||
|
||||
var err error
|
||||
ss.monitorsLock.Lock()
|
||||
defer ss.monitorsLock.Unlock()
|
||||
ss.monitors = make(map[uint64]model.Monitor)
|
||||
ss.monitors = make(map[uint64]*model.Monitor)
|
||||
for i := 0; i < len(monitors); i++ {
|
||||
task := *monitors[i]
|
||||
monitors[i].CronJobID, err = ss.dispatchCron.AddFunc(task.CronSpec(), func() {
|
||||
ss.dispatchBus <- task
|
||||
})
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
ss.monitors[monitors[i].ID] = monitors[i]
|
||||
if len(ss.serviceCurrentStatusData[monitors[i].ID]) == 0 {
|
||||
ss.serviceCurrentStatusData[monitors[i].ID] = make([]model.MonitorHistory, _CurrentStatusSize)
|
||||
@ -178,6 +192,36 @@ func (ss *ServiceSentinel) OnMonitorUpdate() {
|
||||
}
|
||||
}
|
||||
|
||||
func (ss *ServiceSentinel) OnMonitorUpdate(m model.Monitor) error {
|
||||
ss.monitorsLock.Lock()
|
||||
defer ss.monitorsLock.Unlock()
|
||||
var err error
|
||||
// 写入新任务
|
||||
m.CronJobID, err = ss.dispatchCron.AddFunc(m.CronSpec(), func() {
|
||||
ss.dispatchBus <- m
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if ss.monitors[m.ID] != nil {
|
||||
// 停掉旧任务
|
||||
ss.dispatchCron.Remove(ss.monitors[m.ID].CronJobID)
|
||||
} else {
|
||||
// 新任务初始化数据
|
||||
ss.monthlyStatusLock.Lock()
|
||||
defer ss.monthlyStatusLock.Unlock()
|
||||
ss.monthlyStatus[m.ID] = &model.ServiceItemResponse{
|
||||
Monitor: &m,
|
||||
Delay: &[30]float32{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
Up: &[30]int{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
Down: &[30]int{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
}
|
||||
}
|
||||
// 更新这个任务
|
||||
ss.monitors[m.ID] = &m
|
||||
return nil
|
||||
}
|
||||
|
||||
func (ss *ServiceSentinel) OnMonitorDelete(id uint64) {
|
||||
ss.serviceResponseDataStoreLock.Lock()
|
||||
defer ss.serviceResponseDataStoreLock.Unlock()
|
||||
@ -190,6 +234,8 @@ func (ss *ServiceSentinel) OnMonitorDelete(id uint64) {
|
||||
delete(ss.sslCertCache, id)
|
||||
ss.monitorsLock.Lock()
|
||||
defer ss.monitorsLock.Unlock()
|
||||
// 停掉定时任务
|
||||
ss.dispatchCron.Remove(ss.monitors[id].CronJobID)
|
||||
delete(ss.monitors, id)
|
||||
ss.monthlyStatusLock.Lock()
|
||||
defer ss.monthlyStatusLock.Unlock()
|
||||
|
Loading…
Reference in New Issue
Block a user