dashboard: 服务监控请求时间间隔

This commit is contained in:
naiba 2021-09-02 23:45:21 +08:00
parent 0ea21598e8
commit 446ab3b1b8
12 changed files with 119 additions and 50 deletions

View File

@ -4,7 +4,7 @@
<br> <br>
<small><i>LOGO designed by <a href="https://xio.ng" target="_blank">熊大</a> .</i></small> <small><i>LOGO designed by <a href="https://xio.ng" target="_blank">熊大</a> .</i></small>
<br><br> <br><br>
<img src="https://img.shields.io/github/workflow/status/naiba/nezha/Dashboard%20image?label=Dash%20v0.9.32&logo=github&style=for-the-badge">&nbsp;<img src="https://img.shields.io/github/v/release/naiba/nezha?color=brightgreen&label=Agent&style=for-the-badge&logo=github">&nbsp;<img src="https://img.shields.io/github/workflow/status/naiba/nezha/Agent%20release?label=Agent%20CI&logo=github&style=for-the-badge">&nbsp;<img src="https://img.shields.io/badge/Installer-v0.7.0-brightgreen?style=for-the-badge&logo=linux"> <img src="https://img.shields.io/github/workflow/status/naiba/nezha/Dashboard%20image?label=Dash%20v0.9.33&logo=github&style=for-the-badge">&nbsp;<img src="https://img.shields.io/github/v/release/naiba/nezha?color=brightgreen&label=Agent&style=for-the-badge&logo=github">&nbsp;<img src="https://img.shields.io/github/workflow/status/naiba/nezha/Agent%20release?label=Agent%20CI&logo=github&style=for-the-badge">&nbsp;<img src="https://img.shields.io/badge/Installer-v0.7.0-brightgreen?style=for-the-badge&logo=linux">
<br> <br>
<br> <br>
<p>:trollface: <b>哪吒监控</b> 一站式轻监控轻运维系统。支持系统状态、HTTP(SSL 证书变更、即将到期、到期)、TCP、Ping 监控报警,命令批量执行和计划任务。</p> <p>:trollface: <b>哪吒监控</b> 一站式轻监控轻运维系统。支持系统状态、HTTP(SSL 证书变更、即将到期、到期)、TCP、Ping 监控报警,命令批量执行和计划任务。</p>

View File

@ -204,6 +204,7 @@ type monitorForm struct {
Cover uint8 Cover uint8
Notify string Notify string
SkipServersRaw string SkipServersRaw string
Duration uint64
} }
func (ma *memberAPI) addOrEditMonitor(c *gin.Context) { func (ma *memberAPI) addOrEditMonitor(c *gin.Context) {
@ -218,6 +219,7 @@ func (ma *memberAPI) addOrEditMonitor(c *gin.Context) {
m.SkipServersRaw = mf.SkipServersRaw m.SkipServersRaw = mf.SkipServersRaw
m.Cover = mf.Cover m.Cover = mf.Cover
m.Notify = mf.Notify == "on" m.Notify = mf.Notify == "on"
m.Duration = mf.Duration
} }
if err == nil { if err == nil {
if m.ID == 0 { if m.ID == 0 {
@ -226,14 +228,15 @@ func (ma *memberAPI) addOrEditMonitor(c *gin.Context) {
err = dao.DB.Save(&m).Error err = dao.DB.Save(&m).Error
} }
} }
if err == nil {
err = dao.ServiceSentinelShared.OnMonitorUpdate(m)
}
if err != nil { if err != nil {
c.JSON(http.StatusOK, model.Response{ c.JSON(http.StatusOK, model.Response{
Code: http.StatusBadRequest, Code: http.StatusBadRequest,
Message: fmt.Sprintf("请求错误:%s", err), Message: fmt.Sprintf("请求错误:%s", err),
}) })
return return
} else {
dao.ServiceSentinelShared.OnMonitorUpdate()
} }
c.JSON(http.StatusOK, model.Response{ c.JSON(http.StatusOK, model.Response{
Code: http.StatusOK, Code: http.StatusOK,

View File

@ -17,7 +17,11 @@ import (
"github.com/naiba/nezha/service/dao" "github.com/naiba/nezha/service/dao"
) )
var serviceSentinelDispatchBus chan model.Monitor
func init() { func init() {
serviceSentinelDispatchBus = make(chan model.Monitor)
shanghai, err := time.LoadLocation("Asia/Shanghai") shanghai, err := time.LoadLocation("Asia/Shanghai")
if err != nil { if err != nil {
panic(err) panic(err)
@ -55,7 +59,7 @@ func initSystem() {
dao.DB.AutoMigrate(model.Server{}, model.User{}, dao.DB.AutoMigrate(model.Server{}, model.User{},
model.Notification{}, model.AlertRule{}, model.Monitor{}, model.Notification{}, model.AlertRule{}, model.Monitor{},
model.MonitorHistory{}, model.Cron{}, model.Transfer{}) model.MonitorHistory{}, model.Cron{}, model.Transfer{})
dao.NewServiceSentinel() dao.NewServiceSentinel(serviceSentinelDispatchBus)
loadServers() //加载服务器列表 loadServers() //加载服务器列表
loadCrons() //加载计划任务 loadCrons() //加载计划任务
@ -65,6 +69,7 @@ func initSystem() {
if err != nil { if err != nil {
panic(err) panic(err)
} }
// 流量记录打点 // 流量记录打点
_, err = dao.Cron.AddFunc("0 * * * *", recordTransferHourlyUsage) _, err = dao.Cron.AddFunc("0 * * * *", recordTransferHourlyUsage)
if err != nil { if err != nil {
@ -173,7 +178,7 @@ func loadCrons() {
func main() { func main() {
cleanMonitorHistory() cleanMonitorHistory()
go rpc.ServeRPC(dao.Conf.GRPCPort) go rpc.ServeRPC(dao.Conf.GRPCPort)
go rpc.DispatchTask(time.Second * 30) go rpc.DispatchTask(serviceSentinelDispatchBus)
go dao.AlertSentinelStart() go dao.AlertSentinelStart()
srv := controller.ServeWeb(dao.Conf.HTTPPort) srv := controller.ServeWeb(dao.Conf.HTTPPort)
graceful.Graceful(func() error { graceful.Graceful(func() error {

View File

@ -3,7 +3,6 @@ package rpc
import ( import (
"fmt" "fmt"
"net" "net"
"time"
"google.golang.org/grpc" "google.golang.org/grpc"
@ -25,41 +24,36 @@ func ServeRPC(port uint) {
server.Serve(listen) server.Serve(listen)
} }
func DispatchTask(duration time.Duration) { func DispatchTask(serviceSentinelDispatchBus <-chan model.Monitor) {
var index uint64 = 0 workedServerIndex := 0
for { for task := range serviceSentinelDispatchBus {
var hasAliveAgent bool round := 0
tasks := dao.ServiceSentinelShared.Monitors() prevIndex := workedServerIndex
dao.SortedServerLock.RLock() dao.SortedServerLock.RLock()
startedAt := time.Now() // 如果已经轮了一整圈没有合适机器去请求,跳出循环
for i := 0; i < len(tasks); i++ { for round == 0 && prevIndex != workedServerIndex {
if index >= uint64(len(dao.SortedServerList)) { // 如果到了圈尾,再回到圈头,圈数加一,游标重置
index = 0 if workedServerIndex == len(dao.SortedServerList) {
if !hasAliveAgent { workedServerIndex = 0
break round++
}
hasAliveAgent = false
}
// 1. 如果服务器不在线,跳过这个服务器
if dao.SortedServerList[index].TaskStream == nil {
i--
index++
continue continue
} }
// 2. 如果此任务不可使用此服务器请求,跳过这个服务器(有些 IPv6 only 开了 NAT64 的机器请求 IPv4 总会出问题) // 如果服务器不在线,跳过这个服务器
if (tasks[i].Cover == model.MonitorCoverAll && tasks[i].SkipServers[dao.SortedServerList[index].ID]) || if dao.SortedServerList[workedServerIndex].TaskStream == nil {
(tasks[i].Cover == model.MonitorCoverIgnoreAll && !tasks[i].SkipServers[dao.SortedServerList[index].ID]) { workedServerIndex++
i--
index++
continue continue
} }
// 如果此任务不可使用此服务器请求,跳过这个服务器(有些 IPv6 only 开了 NAT64 的机器请求 IPv4 总会出问题)
hasAliveAgent = true if (task.Cover == model.MonitorCoverAll && task.SkipServers[dao.SortedServerList[workedServerIndex].ID]) ||
dao.SortedServerList[index].TaskStream.Send(tasks[i].PB()) (task.Cover == model.MonitorCoverIgnoreAll && !task.SkipServers[dao.SortedServerList[workedServerIndex].ID]) {
index++ workedServerIndex++
continue
}
// 找到合适机器执行任务,跳出循环
dao.SortedServerList[workedServerIndex].TaskStream.Send(task.PB())
workedServerIndex++
break
} }
dao.SortedServerLock.RUnlock() dao.SortedServerLock.RUnlock()
time.Sleep(time.Until(startedAt.Add(duration)))
} }
} }

View File

@ -1,7 +1,7 @@
package model package model
type ServiceItemResponse struct { type ServiceItemResponse struct {
Monitor Monitor Monitor *Monitor
TotalUp uint64 TotalUp uint64
TotalDown uint64 TotalDown uint64
CurrentUp uint64 CurrentUp uint64

View File

@ -2,8 +2,10 @@ package model
import ( import (
"encoding/json" "encoding/json"
"fmt"
pb "github.com/naiba/nezha/proto" pb "github.com/naiba/nezha/proto"
"github.com/robfig/cron/v3"
"gorm.io/gorm" "gorm.io/gorm"
) )
@ -36,9 +38,12 @@ type Monitor struct {
Type uint8 Type uint8
Target string Target string
SkipServersRaw string SkipServersRaw string
Duration uint64
Notify bool Notify bool
Cover uint8 Cover uint8
SkipServers map[uint64]bool `gorm:"-" json:"-"`
SkipServers map[uint64]bool `gorm:"-" json:"-"`
CronJobID cron.EntryID `gorm:"-" json:"-"`
} }
func (m *Monitor) PB() *pb.Task { func (m *Monitor) PB() *pb.Task {
@ -49,6 +54,14 @@ func (m *Monitor) PB() *pb.Task {
} }
} }
func (m *Monitor) CronSpec() string {
if m.Duration == 0 {
// 默认间隔 30 秒
m.Duration = 30
}
return fmt.Sprintf("@every %ds", m.Duration)
}
func (m *Monitor) AfterFind(tx *gorm.DB) error { func (m *Monitor) AfterFind(tx *gorm.DB) error {
var skipServers []uint64 var skipServers []uint64
if err := json.Unmarshal([]byte(m.SkipServersRaw), &skipServers); err != nil { if err := json.Unmarshal([]byte(m.SkipServersRaw), &skipServers); err != nil {

View File

@ -55,7 +55,8 @@ function showFormModal(modelSelector, formID, URL, getData) {
item.name === "RequestMethod" || item.name === "RequestMethod" ||
item.name === "DisplayIndex" || item.name === "DisplayIndex" ||
item.name === "Type" || item.name === "Type" ||
item.name === "Cover" item.name === "Cover" ||
item.name === "Duration"
) { ) {
obj[item.name] = parseInt(item.value); obj[item.name] = parseInt(item.value);
} else { } else {
@ -218,6 +219,7 @@ function addOrEditMonitor(monitor) {
modal.find("input[name=ID]").val(monitor ? monitor.ID : null); modal.find("input[name=ID]").val(monitor ? monitor.ID : null);
modal.find("input[name=Name]").val(monitor ? monitor.Name : null); modal.find("input[name=Name]").val(monitor ? monitor.Name : null);
modal.find("input[name=Target]").val(monitor ? monitor.Target : null); modal.find("input[name=Target]").val(monitor ? monitor.Target : null);
modal.find("input[name=Duration]").val(monitor && monitor.Duration ? monitor.Duration : 30);
modal.find("select[name=Type]").val(monitor ? monitor.Type : 1); modal.find("select[name=Type]").val(monitor ? monitor.Type : 1);
modal.find("select[name=Cover]").val(monitor ? monitor.Cover : 0); modal.find("select[name=Cover]").val(monitor ? monitor.Cover : 0);
if (monitor && monitor.Notify) { if (monitor && monitor.Notify) {

View File

@ -9,7 +9,7 @@
<script src="https://cdn.jsdelivr.net/npm/semantic-ui@2.4.1/dist/semantic.min.js"></script> <script src="https://cdn.jsdelivr.net/npm/semantic-ui@2.4.1/dist/semantic.min.js"></script>
<script src="/static/semantic-ui-alerts.min.js"></script> <script src="/static/semantic-ui-alerts.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/vue@2.6.12/dist/vue.min.js"></script> <script src="https://cdn.jsdelivr.net/npm/vue@2.6.12/dist/vue.min.js"></script>
<script src="/static/main.js?v20210819"></script> <script src="/static/main.js?v20210902"></script>
</body> </body>
</html> </html>

View File

@ -24,6 +24,10 @@
<option value="3">TCP-Ping</option> <option value="3">TCP-Ping</option>
</select> </select>
</div> </div>
<div class="field">
<label>请求间隔</label>
<input type="number" name="Duration" placeholder="秒" />
</div>
<div class="field"> <div class="field">
<label>覆盖范围</label> <label>覆盖范围</label>
<select name="Cover" class="ui fluid dropdown"> <select name="Cover" class="ui fluid dropdown">

View File

@ -18,6 +18,7 @@
<th>覆盖范围</th> <th>覆盖范围</th>
<th>特定服务器</th> <th>特定服务器</th>
<th>类型</th> <th>类型</th>
<th>请求间隔</th>
<th>通知</th> <th>通知</th>
<th>管理</th> <th>管理</th>
</tr> </tr>
@ -34,6 +35,7 @@
{{if eq $monitor.Type 1}}HTTP(S)/SSL证书 {{else if eq $monitor.Type {{if eq $monitor.Type 1}}HTTP(S)/SSL证书 {{else if eq $monitor.Type
2}} ICMP Ping {{else}} TCP 端口 {{end}} 2}} ICMP Ping {{else}} TCP 端口 {{end}}
</td> </td>
<td>{{$monitor.Duration}}秒</td>
<td>{{$monitor.Notify}}</td> <td>{{$monitor.Notify}}</td>
<td> <td>
<div class="ui mini icon buttons"> <div class="ui mini icon buttons">

View File

@ -13,7 +13,7 @@ import (
pb "github.com/naiba/nezha/proto" pb "github.com/naiba/nezha/proto"
) )
var Version = "v0.9.32" // !!记得修改 README 中的 badge 版本!! var Version = "v0.9.33" // !!记得修改 README 中的 badge 版本!!
var ( var (
Conf *model.Config Conf *model.Config

View File

@ -10,6 +10,7 @@ import (
"github.com/naiba/nezha/model" "github.com/naiba/nezha/model"
pb "github.com/naiba/nezha/proto" pb "github.com/naiba/nezha/proto"
"github.com/robfig/cron/v3"
) )
const _CurrentStatusSize = 30 // 统计 15 分钟内的数据为当前状态 const _CurrentStatusSize = 30 // 统计 15 分钟内的数据为当前状态
@ -27,7 +28,7 @@ type _TodayStatsOfMonitor struct {
Delay float32 Delay float32
} }
func NewServiceSentinel() { func NewServiceSentinel(serviceSentinelDispatchBus chan<- model.Monitor) {
ServiceSentinelShared = &ServiceSentinel{ ServiceSentinelShared = &ServiceSentinel{
serviceReportChannel: make(chan ReportData, 200), serviceReportChannel: make(chan ReportData, 200),
serviceStatusToday: make(map[uint64]*_TodayStatsOfMonitor), serviceStatusToday: make(map[uint64]*_TodayStatsOfMonitor),
@ -37,12 +38,15 @@ func NewServiceSentinel() {
lastStatus: make(map[uint64]string), lastStatus: make(map[uint64]string),
serviceResponseDataStoreCurrentUp: make(map[uint64]uint64), serviceResponseDataStoreCurrentUp: make(map[uint64]uint64),
serviceResponseDataStoreCurrentDown: make(map[uint64]uint64), serviceResponseDataStoreCurrentDown: make(map[uint64]uint64),
monitors: make(map[uint64]model.Monitor), monitors: make(map[uint64]*model.Monitor),
sslCertCache: make(map[uint64]string), sslCertCache: make(map[uint64]string),
// 30天数据缓存 // 30天数据缓存
monthlyStatus: make(map[uint64]*model.ServiceItemResponse), monthlyStatus: make(map[uint64]*model.ServiceItemResponse),
dispatchCron: cron.New(cron.WithSeconds()),
dispatchBus: serviceSentinelDispatchBus,
} }
ServiceSentinelShared.OnMonitorUpdate() ServiceSentinelShared.loadMonitorHistory()
ServiceSentinelShared.dispatchCron.Start()
year, month, day := time.Now().Date() year, month, day := time.Now().Date()
today := time.Date(year, month, day, 0, 0, 0, 0, time.Local) today := time.Date(year, month, day, 0, 0, 0, 0, time.Local)
@ -92,11 +96,14 @@ type ServiceSentinel struct {
lastStatus map[uint64]string lastStatus map[uint64]string
serviceResponseDataStoreCurrentUp map[uint64]uint64 serviceResponseDataStoreCurrentUp map[uint64]uint64
serviceResponseDataStoreCurrentDown map[uint64]uint64 serviceResponseDataStoreCurrentDown map[uint64]uint64
monitors map[uint64]model.Monitor monitors map[uint64]*model.Monitor
sslCertCache map[uint64]string sslCertCache map[uint64]string
// 30天数据缓存 // 30天数据缓存
monthlyStatusLock sync.Mutex monthlyStatusLock sync.Mutex
monthlyStatus map[uint64]*model.ServiceItemResponse monthlyStatus map[uint64]*model.ServiceItemResponse
// 服务监控调度计划任务
dispatchCron *cron.Cron
dispatchBus chan<- model.Monitor
} }
func (ss *ServiceSentinel) refreshMonthlyServiceStatus() { func (ss *ServiceSentinel) refreshMonthlyServiceStatus() {
@ -118,10 +125,10 @@ func (ss *ServiceSentinel) Dispatch(r ReportData) {
ss.serviceReportChannel <- r ss.serviceReportChannel <- r
} }
func (ss *ServiceSentinel) Monitors() []model.Monitor { func (ss *ServiceSentinel) Monitors() []*model.Monitor {
ss.monitorsLock.RLock() ss.monitorsLock.RLock()
defer ss.monitorsLock.RUnlock() defer ss.monitorsLock.RUnlock()
var monitors []model.Monitor var monitors []*model.Monitor
for _, v := range ss.monitors { for _, v := range ss.monitors {
monitors = append(monitors, v) monitors = append(monitors, v)
} }
@ -131,14 +138,21 @@ func (ss *ServiceSentinel) Monitors() []model.Monitor {
return monitors return monitors
} }
func (ss *ServiceSentinel) OnMonitorUpdate() { func (ss *ServiceSentinel) loadMonitorHistory() {
var monitors []model.Monitor var monitors []*model.Monitor
DB.Find(&monitors) DB.Find(&monitors)
var err error
ss.monitorsLock.Lock() ss.monitorsLock.Lock()
defer ss.monitorsLock.Unlock() defer ss.monitorsLock.Unlock()
ss.monitors = make(map[uint64]model.Monitor) ss.monitors = make(map[uint64]*model.Monitor)
for i := 0; i < len(monitors); i++ { for i := 0; i < len(monitors); i++ {
task := *monitors[i]
monitors[i].CronJobID, err = ss.dispatchCron.AddFunc(task.CronSpec(), func() {
ss.dispatchBus <- task
})
if err != nil {
panic(err)
}
ss.monitors[monitors[i].ID] = monitors[i] ss.monitors[monitors[i].ID] = monitors[i]
if len(ss.serviceCurrentStatusData[monitors[i].ID]) == 0 { if len(ss.serviceCurrentStatusData[monitors[i].ID]) == 0 {
ss.serviceCurrentStatusData[monitors[i].ID] = make([]model.MonitorHistory, _CurrentStatusSize) ss.serviceCurrentStatusData[monitors[i].ID] = make([]model.MonitorHistory, _CurrentStatusSize)
@ -178,6 +192,36 @@ func (ss *ServiceSentinel) OnMonitorUpdate() {
} }
} }
func (ss *ServiceSentinel) OnMonitorUpdate(m model.Monitor) error {
ss.monitorsLock.Lock()
defer ss.monitorsLock.Unlock()
var err error
// 写入新任务
m.CronJobID, err = ss.dispatchCron.AddFunc(m.CronSpec(), func() {
ss.dispatchBus <- m
})
if err != nil {
return err
}
if ss.monitors[m.ID] != nil {
// 停掉旧任务
ss.dispatchCron.Remove(ss.monitors[m.ID].CronJobID)
} else {
// 新任务初始化数据
ss.monthlyStatusLock.Lock()
defer ss.monthlyStatusLock.Unlock()
ss.monthlyStatus[m.ID] = &model.ServiceItemResponse{
Monitor: &m,
Delay: &[30]float32{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
Up: &[30]int{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
Down: &[30]int{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
}
}
// 更新这个任务
ss.monitors[m.ID] = &m
return nil
}
func (ss *ServiceSentinel) OnMonitorDelete(id uint64) { func (ss *ServiceSentinel) OnMonitorDelete(id uint64) {
ss.serviceResponseDataStoreLock.Lock() ss.serviceResponseDataStoreLock.Lock()
defer ss.serviceResponseDataStoreLock.Unlock() defer ss.serviceResponseDataStoreLock.Unlock()
@ -190,6 +234,8 @@ func (ss *ServiceSentinel) OnMonitorDelete(id uint64) {
delete(ss.sslCertCache, id) delete(ss.sslCertCache, id)
ss.monitorsLock.Lock() ss.monitorsLock.Lock()
defer ss.monitorsLock.Unlock() defer ss.monitorsLock.Unlock()
// 停掉定时任务
ss.dispatchCron.Remove(ss.monitors[id].CronJobID)
delete(ss.monitors, id) delete(ss.monitors, id)
ss.monthlyStatusLock.Lock() ss.monthlyStatusLock.Lock()
defer ss.monthlyStatusLock.Unlock() defer ss.monthlyStatusLock.Unlock()