mirror of
https://github.com/nezhahq/nezha.git
synced 2025-01-22 12:48:14 -05:00
🚸 release: v0.2.1
This commit is contained in:
parent
303dc73e16
commit
d792fc8499
18
README.md
18
README.md
@ -1,6 +1,6 @@
|
||||
# 哪吒面板
|
||||
|
||||
服务期状态监控,报警通知,被动接收,极省资源 64M 小鸡也能装 Agent。
|
||||
系统状态、API(SSL证书变更、即将到期、到期)/TCP端口存活/PING 监控,报警通知,被动接收,极省资源 64M 小鸡也能装 Agent。
|
||||
|
||||
| 哪吒面板 | 首页截图1 | 首页截图2 |
|
||||
| ---- | ---- | ---- |
|
||||
@ -131,7 +131,7 @@ URL 里面也可放置占位符,请求时会进行简单的字符串替换。
|
||||
- net_in_speed(入站网速)、net_out_speed(出站网速)、net_all_speed(双向网速)、transfer_in(入站流量)、transfer_out(出站流量)、transfer_all(双向流量):Min/Max 数值为字节(1kb=1024,1mb = 1024*1024)
|
||||
- offline:不支持 Min/Max 参数
|
||||
- Duration:持续秒数,监控比较简陋,取持续时间内的 70 采样结果
|
||||
|
||||
- Ignore: `{"1": true, "2":false}` 忽略此规则的服务器ID列表
|
||||
## 常见问题
|
||||
|
||||
### 数据备份恢复
|
||||
@ -182,7 +182,19 @@ URL 里面也可放置占位符,请求时会进行简单的字符串替换。
|
||||
|
||||
## 变更日志
|
||||
|
||||
- `0.2.0` **重大更新**
|
||||
- `dashboard 0.2.1` `agent 0.2.1`
|
||||
|
||||
- dashboard
|
||||
- 修复了默认开启IP变更通知
|
||||
- hotaru 主题的服务状态页面
|
||||
- **新增可以指定服务器忽略监控规则**
|
||||
- 修复info透明 @ilay1678
|
||||
|
||||
- agent
|
||||
- 优化了 IPv6/IPv4 双栈问题
|
||||
- 增加 SSL 证书过期、即将过期提醒
|
||||
|
||||
- `dashboard 0.2.0` `agent 0.2.0` **重大更新**
|
||||
|
||||
增加了服务监控(TCP端口延迟、Ping、HTTP-SSL 证书)功能,此版本 Agent 与旧面板不兼容,而 Agent 是通过 GitHub Release 自动更新的 所以务必更新面板开启最新功能。
|
||||
|
||||
|
@ -2,6 +2,7 @@ package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/tls"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log"
|
||||
@ -50,6 +51,9 @@ var (
|
||||
ctx = context.Background()
|
||||
delayWhenError = time.Second * 10
|
||||
updateCh = make(chan struct{}, 0)
|
||||
httpClient = &http.Client{Transport: &http.Transport{
|
||||
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
|
||||
}}
|
||||
)
|
||||
|
||||
func doSelfUpdate() {
|
||||
@ -151,6 +155,7 @@ func run(cmd *cobra.Command, args []string) {
|
||||
func receiveTasks(tasks pb.NezhaService_RequestTaskClient) error {
|
||||
var err error
|
||||
var task *pb.Task
|
||||
|
||||
defer log.Printf("receiveTasks exit %v %v => %v", time.Now(), task, err)
|
||||
for {
|
||||
task, err = tasks.Recv()
|
||||
@ -159,30 +164,31 @@ func receiveTasks(tasks pb.NezhaService_RequestTaskClient) error {
|
||||
}
|
||||
var result pb.TaskResult
|
||||
result.Id = task.GetId()
|
||||
result.Type = task.GetType()
|
||||
switch task.GetType() {
|
||||
case model.MonitorTypeHTTPGET:
|
||||
start := time.Now()
|
||||
resp, err := http.Get(task.GetData())
|
||||
resp, err := httpClient.Get(task.GetData())
|
||||
if err == nil {
|
||||
result.Delay = float32(time.Now().Sub(start).Microseconds()) / 1000.0
|
||||
if resp.StatusCode > 299 || resp.StatusCode < 200 {
|
||||
err = errors.New("\n应用错误:" + resp.Status)
|
||||
}
|
||||
}
|
||||
var certs cert.Certs
|
||||
if err == nil {
|
||||
if strings.HasPrefix(task.GetData(), "https://") {
|
||||
certs, err = cert.NewCerts([]string{task.GetData()})
|
||||
c := cert.NewCert(task.GetData()[8:])
|
||||
if c.Error != "" {
|
||||
if strings.Contains(c.Error, "expired") {
|
||||
result.Data = "SSL证书错误:证书已过期"
|
||||
} else {
|
||||
result.Data = "SSL证书错误:" + c.Error
|
||||
}
|
||||
} else {
|
||||
result.Data = c.Issuer + "|" + c.NotAfter
|
||||
result.Successful = true
|
||||
}
|
||||
}
|
||||
}
|
||||
if err == nil {
|
||||
if len(certs) == 0 {
|
||||
err = errors.New("\n获取SSL证书错误:未获取到证书")
|
||||
}
|
||||
}
|
||||
if err == nil {
|
||||
result.Data = certs[0].Issuer
|
||||
result.Successful = true
|
||||
} else {
|
||||
result.Data = err.Error()
|
||||
}
|
||||
|
@ -42,6 +42,9 @@ func ServeWeb(port uint) {
|
||||
"ts": func(s string) string {
|
||||
return strings.TrimSpace(s)
|
||||
},
|
||||
"float32f": func(f float32) string {
|
||||
return fmt.Sprintf("%.2f", f)
|
||||
},
|
||||
"divU64": func(a, b uint64) float32 {
|
||||
if b == 0 {
|
||||
if a > 0 {
|
||||
@ -49,6 +52,10 @@ func ServeWeb(port uint) {
|
||||
}
|
||||
return 0
|
||||
}
|
||||
if a == 0 {
|
||||
// 这是从未在线的情况
|
||||
return 1 / float32(b) * 100
|
||||
}
|
||||
return float32(a) / float32(b) * 100
|
||||
},
|
||||
"div": func(a, b int) float32 {
|
||||
@ -58,6 +65,10 @@ func ServeWeb(port uint) {
|
||||
}
|
||||
return 0
|
||||
}
|
||||
if a == 0 {
|
||||
// 这是从未在线的情况
|
||||
return 1 / float32(b) * 100
|
||||
}
|
||||
return float32(a) / float32(b) * 100
|
||||
},
|
||||
"addU64": func(a, b uint64) uint64 {
|
||||
|
@ -2,7 +2,6 @@ package rpc
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"net"
|
||||
"time"
|
||||
|
||||
@ -47,9 +46,7 @@ func DispatchTask(duration time.Duration) {
|
||||
continue
|
||||
}
|
||||
hasAliveAgent = true
|
||||
log.Println("DispatchTask 确认派发 >>>>>", i, index)
|
||||
dao.SortedServerList[index].TaskStream.Send(tasks[i].PB())
|
||||
log.Println("DispatchTask 确认派发 <<<<<", i, index)
|
||||
index++
|
||||
}
|
||||
dao.ServerLock.RUnlock()
|
||||
|
@ -1,9 +1,11 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"crypto/tls"
|
||||
"fmt"
|
||||
"log"
|
||||
"net"
|
||||
"net/http"
|
||||
"os/exec"
|
||||
"time"
|
||||
|
||||
@ -13,11 +15,23 @@ import (
|
||||
)
|
||||
|
||||
func main() {
|
||||
// 跳过 SSL 检查
|
||||
transCfg := &http.Transport{
|
||||
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
|
||||
}
|
||||
httpClient := &http.Client{Transport: transCfg}
|
||||
_, err := httpClient.Get("https://expired-ecc-dv.ssl.com")
|
||||
fmt.Println(err)
|
||||
// SSL 证书信息获取
|
||||
c := cert.NewCert("expired-ecc-dv.ssl.com")
|
||||
fmt.Println(c.Error)
|
||||
// TCP
|
||||
conn, err := net.DialTimeout("tcp", "example.com:80", time.Second*10)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
println(conn)
|
||||
// ICMP Ping
|
||||
pinger, err := ping.NewPinger("example.com")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
@ -28,11 +42,7 @@ func main() {
|
||||
panic(err)
|
||||
}
|
||||
fmt.Printf("%+v", pinger.Statistics())
|
||||
certs, err := cert.NewCerts([]string{"example.com"})
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
fmt.Println(certs)
|
||||
// 硬盘信息
|
||||
dparts, _ := disk.Partitions(false)
|
||||
for _, part := range dparts {
|
||||
u, _ := disk.Usage(part.Mountpoint)
|
||||
|
@ -18,9 +18,10 @@ type Rule struct {
|
||||
// 指标类型,cpu、memory、swap、disk、net_in_speed、net_out_speed
|
||||
// net_all_speed、transfer_in、transfer_out、transfer_all、offline
|
||||
Type string
|
||||
Min uint64 // 最小阈值 (百分比、字节 kb ÷ 1024)
|
||||
Max uint64 // 最大阈值 (百分比、字节 kb ÷ 1024)
|
||||
Duration uint64 // 持续时间 (秒)
|
||||
Min uint64 // 最小阈值 (百分比、字节 kb ÷ 1024)
|
||||
Max uint64 // 最大阈值 (百分比、字节 kb ÷ 1024)
|
||||
Duration uint64 // 持续时间 (秒)
|
||||
Ignore map[uint64]bool //忽略此规则的ID列表
|
||||
}
|
||||
|
||||
func percentage(used, total uint64) uint64 {
|
||||
@ -30,7 +31,11 @@ func percentage(used, total uint64) uint64 {
|
||||
return used * 100 / total
|
||||
}
|
||||
|
||||
// Snapshot 未通过规则返回 struct{}{}, 通过返回 nil
|
||||
func (u *Rule) Snapshot(server *Server) interface{} {
|
||||
if u.Ignore[server.ID] {
|
||||
return nil
|
||||
}
|
||||
var src uint64
|
||||
switch u.Type {
|
||||
case "cpu":
|
||||
@ -72,9 +77,9 @@ func (u *Rule) Snapshot(server *Server) interface{} {
|
||||
type AlertRule struct {
|
||||
Common
|
||||
Name string
|
||||
Rules []Rule `gorm:"-" json:"-"`
|
||||
RulesRaw string
|
||||
Enable *bool
|
||||
Rules []Rule `gorm:"-" json:"-"`
|
||||
}
|
||||
|
||||
func (r *AlertRule) BeforeSave(tx *gorm.DB) error {
|
||||
|
@ -15,7 +15,7 @@
|
||||
<div class="field">
|
||||
<label>类型</label>
|
||||
<select name="Type" class="ui fluid dropdown">
|
||||
<option value="1">HTTP-GET</option>
|
||||
<option value="1">HTTP-GET(SSL到期、变更)</option>
|
||||
<option value="2">ICMP-Ping</option>
|
||||
<option value="3">TCP-Ping</option>
|
||||
</select>
|
||||
|
@ -8,7 +8,7 @@
|
||||
<div class="ui grid">
|
||||
<div class="three wide column">
|
||||
<p>{{$service.Monitor.Name}}</p>
|
||||
<p>30天在线率{{divU64 $service.TotalUp (addU64 $service.TotalUp $service.TotalDown)}}%</p>
|
||||
<p>30天在线率{{float32f (divU64 $service.TotalUp (addU64 $service.TotalUp $service.TotalDown))}}%</p>
|
||||
</div>
|
||||
<div class="eleven wide column">
|
||||
{{range $i,$d := $service.Delay}}
|
||||
|
@ -133,33 +133,8 @@ func checkStatus() {
|
||||
// 发送通知
|
||||
max, desc := alert.Check(alertsStore[alert.ID][server.ID])
|
||||
if desc != "" {
|
||||
nID := getNotificationHash(server, desc)
|
||||
var flag bool
|
||||
if cacheN, has := dao.Cache.Get(nID); has {
|
||||
nHistory := cacheN.(NotificationHistory)
|
||||
// 每次提醒都增加一倍等待时间,最后每天最多提醒一次
|
||||
if time.Now().After(nHistory.Until) {
|
||||
flag = true
|
||||
nHistory.Duration *= 2
|
||||
if nHistory.Duration > time.Hour*24 {
|
||||
nHistory.Duration = time.Hour * 24
|
||||
}
|
||||
nHistory.Until = time.Now().Add(nHistory.Duration)
|
||||
// 缓存有效期加 10 分钟
|
||||
dao.Cache.Set(nID, nHistory, nHistory.Duration+time.Minute*10)
|
||||
}
|
||||
} else {
|
||||
// 新提醒直接通知
|
||||
flag = true
|
||||
dao.Cache.Set(nID, NotificationHistory{
|
||||
Duration: firstNotificationDelay,
|
||||
Until: time.Now().Add(firstNotificationDelay),
|
||||
}, firstNotificationDelay+time.Minute*10)
|
||||
}
|
||||
if flag {
|
||||
message := fmt.Sprintf("报警规则:%s,服务器:%s(%s),%s,逮到咯,快去看看!", alert.Name, server.Name, server.Host.IP, desc)
|
||||
go SendNotification(message)
|
||||
}
|
||||
message := fmt.Sprintf("报警规则:%s,服务器:%s(%s),%s,逮到咯,快去看看!", alert.Name, server.Name, server.Host.IP, desc)
|
||||
go SendNotification(message)
|
||||
}
|
||||
// 清理旧数据
|
||||
if max > 0 && max < len(alertsStore[alert.ID][server.ID]) {
|
||||
@ -170,13 +145,39 @@ func checkStatus() {
|
||||
}
|
||||
|
||||
func SendNotification(desc string) {
|
||||
// 通知防骚扰策略
|
||||
nID := hex.EncodeToString(md5.New().Sum([]byte(desc)))
|
||||
var flag bool
|
||||
if cacheN, has := dao.Cache.Get(nID); has {
|
||||
nHistory := cacheN.(NotificationHistory)
|
||||
// 每次提醒都增加一倍等待时间,最后每天最多提醒一次
|
||||
if time.Now().After(nHistory.Until) {
|
||||
flag = true
|
||||
nHistory.Duration *= 2
|
||||
if nHistory.Duration > time.Hour*24 {
|
||||
nHistory.Duration = time.Hour * 24
|
||||
}
|
||||
nHistory.Until = time.Now().Add(nHistory.Duration)
|
||||
// 缓存有效期加 10 分钟
|
||||
dao.Cache.Set(nID, nHistory, nHistory.Duration+time.Minute*10)
|
||||
}
|
||||
} else {
|
||||
// 新提醒直接通知
|
||||
flag = true
|
||||
dao.Cache.Set(nID, NotificationHistory{
|
||||
Duration: firstNotificationDelay,
|
||||
Until: time.Now().Add(firstNotificationDelay),
|
||||
}, firstNotificationDelay+time.Minute*10)
|
||||
}
|
||||
|
||||
if !flag {
|
||||
return
|
||||
}
|
||||
|
||||
// 发出通知
|
||||
notificationsLock.RLock()
|
||||
defer notificationsLock.RUnlock()
|
||||
for i := 0; i < len(notifications); i++ {
|
||||
notifications[i].Send(desc)
|
||||
}
|
||||
}
|
||||
|
||||
func getNotificationHash(server *model.Server, desc string) string {
|
||||
return hex.EncodeToString(md5.New().Sum([]byte(fmt.Sprintf("%d::%s", server.ID, desc))))
|
||||
}
|
||||
|
@ -3,6 +3,7 @@ package rpc
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/naiba/nezha/model"
|
||||
@ -21,15 +22,33 @@ func (s *NezhaHandler) ReportTask(c context.Context, r *pb.TaskResult) (*pb.Rece
|
||||
return nil, err
|
||||
}
|
||||
if r.GetType() == model.MonitorTypeHTTPGET {
|
||||
// SSL 证书变更报警
|
||||
// SSL 证书报警
|
||||
var last model.MonitorHistory
|
||||
if err := dao.DB.Where("monitor_id = ?", r.GetId()).Order("id DESC").First(&last).Error; err == nil {
|
||||
if last.Data != "" && last.Data != r.GetData() {
|
||||
var errMsg string
|
||||
if strings.HasPrefix(r.GetData(), "SSL证书错误:") {
|
||||
// 证书错误提醒
|
||||
errMsg = r.GetData()
|
||||
} else {
|
||||
var splits = strings.Split(r.GetData(), "|")
|
||||
// 证书变更提醒
|
||||
if last.Data != "" && last.Data != splits[0] {
|
||||
errMsg = fmt.Sprintf(
|
||||
"SSL证书变更,旧:%s,新:%s。",
|
||||
last.Data, splits[0])
|
||||
}
|
||||
expires, err := time.Parse("2006-01-02 15:04:05 -0700 MST", splits[1])
|
||||
// 证书过期提醒
|
||||
if err == nil && expires.Before(time.Now().AddDate(0, 0, 7)) {
|
||||
errMsg = fmt.Sprintf(
|
||||
"SSL证书将在七天内过期,过期时间:%s。",
|
||||
expires.Format("2006-01-02 15:04:05"))
|
||||
}
|
||||
}
|
||||
if errMsg != "" {
|
||||
var monitor model.Monitor
|
||||
dao.DB.First(&monitor, "id = ?", last.MonitorID)
|
||||
alertmanager.SendNotification(fmt.Sprintf(
|
||||
"监控:%s SSL证书变更,旧:%s,新:%s。",
|
||||
monitor.Name, last.Data, r.GetData()))
|
||||
alertmanager.SendNotification(fmt.Sprintf("服务监控:%s %s", monitor.Name, errMsg))
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -38,12 +57,6 @@ func (s *NezhaHandler) ReportTask(c context.Context, r *pb.TaskResult) (*pb.Rece
|
||||
if err := dao.DB.Create(&mh).Error; err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// 更新最后检测时间
|
||||
var m model.Monitor
|
||||
m.ID = r.GetId()
|
||||
if err := dao.DB.Model(&m).Update("last_check", time.Now()).Error; err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &pb.Receipt{Proced: true}, nil
|
||||
}
|
||||
|
||||
@ -93,7 +106,7 @@ func (s *NezhaHandler) ReportSystemInfo(c context.Context, r *pb.Host) (*pb.Rece
|
||||
host.IP != "" &&
|
||||
dao.ServerList[clientID].Host.IP != host.IP {
|
||||
alertmanager.SendNotification(fmt.Sprintf(
|
||||
"服务器:%s IP变更提醒,旧IP:%s,新IP:%s。",
|
||||
"IP变更提醒 服务器:%s ,旧IP:%s,新IP:%s。",
|
||||
dao.ServerList[clientID].Name, dao.ServerList[clientID].Host.IP, host.IP))
|
||||
}
|
||||
dao.ServerList[clientID].Host = &host
|
||||
|
Loading…
Reference in New Issue
Block a user