diff --git a/README.md b/README.md index 3798edd..155c212 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # 哪吒面板 -服务期状态监控,报警通知,被动接收,极省资源 64M 小鸡也能装 Agent。 +系统状态、API(SSL证书变更、即将到期、到期)/TCP端口存活/PING 监控,报警通知,被动接收,极省资源 64M 小鸡也能装 Agent。 | 哪吒面板 | 首页截图1 | 首页截图2 | | ---- | ---- | ---- | @@ -131,7 +131,7 @@ URL 里面也可放置占位符,请求时会进行简单的字符串替换。 - net_in_speed(入站网速)、net_out_speed(出站网速)、net_all_speed(双向网速)、transfer_in(入站流量)、transfer_out(出站流量)、transfer_all(双向流量):Min/Max 数值为字节(1kb=1024,1mb = 1024*1024) - offline:不支持 Min/Max 参数 - Duration:持续秒数,监控比较简陋,取持续时间内的 70 采样结果 - +- Ignore: `{"1": true, "2":false}` 忽略此规则的服务器ID列表 ## 常见问题 ### 数据备份恢复 @@ -182,7 +182,19 @@ URL 里面也可放置占位符,请求时会进行简单的字符串替换。 ## 变更日志 -- `0.2.0` **重大更新** +- `dashboard 0.2.1` `agent 0.2.1` + + - dashboard + - 修复了默认开启IP变更通知 + - hotaru 主题的服务状态页面 + - **新增可以指定服务器忽略监控规则** + - 修复info透明 @ilay1678 + + - agent + - 优化了 IPv6/IPv4 双栈问题 + - 增加 SSL 证书过期、即将过期提醒 + +- `dashboard 0.2.0` `agent 0.2.0` **重大更新** 增加了服务监控(TCP端口延迟、Ping、HTTP-SSL 证书)功能,此版本 Agent 与旧面板不兼容,而 Agent 是通过 GitHub Release 自动更新的 所以务必更新面板开启最新功能。 diff --git a/cmd/agent/main.go b/cmd/agent/main.go index 4564339..6871779 100644 --- a/cmd/agent/main.go +++ b/cmd/agent/main.go @@ -2,6 +2,7 @@ package main import ( "context" + "crypto/tls" "errors" "fmt" "log" @@ -50,6 +51,9 @@ var ( ctx = context.Background() delayWhenError = time.Second * 10 updateCh = make(chan struct{}, 0) + httpClient = &http.Client{Transport: &http.Transport{ + TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, + }} ) func doSelfUpdate() { @@ -151,6 +155,7 @@ func run(cmd *cobra.Command, args []string) { func receiveTasks(tasks pb.NezhaService_RequestTaskClient) error { var err error var task *pb.Task + defer log.Printf("receiveTasks exit %v %v => %v", time.Now(), task, err) for { task, err = tasks.Recv() @@ -159,30 +164,31 @@ func receiveTasks(tasks pb.NezhaService_RequestTaskClient) error { } var result pb.TaskResult result.Id = task.GetId() + result.Type = task.GetType() switch task.GetType() { case model.MonitorTypeHTTPGET: start := time.Now() - resp, err := http.Get(task.GetData()) + resp, err := httpClient.Get(task.GetData()) if err == nil { result.Delay = float32(time.Now().Sub(start).Microseconds()) / 1000.0 if resp.StatusCode > 299 || resp.StatusCode < 200 { err = errors.New("\n应用错误:" + resp.Status) } } - var certs cert.Certs if err == nil { if strings.HasPrefix(task.GetData(), "https://") { - certs, err = cert.NewCerts([]string{task.GetData()}) + c := cert.NewCert(task.GetData()[8:]) + if c.Error != "" { + if strings.Contains(c.Error, "expired") { + result.Data = "SSL证书错误:证书已过期" + } else { + result.Data = "SSL证书错误:" + c.Error + } + } else { + result.Data = c.Issuer + "|" + c.NotAfter + result.Successful = true + } } - } - if err == nil { - if len(certs) == 0 { - err = errors.New("\n获取SSL证书错误:未获取到证书") - } - } - if err == nil { - result.Data = certs[0].Issuer - result.Successful = true } else { result.Data = err.Error() } diff --git a/cmd/dashboard/controller/controller.go b/cmd/dashboard/controller/controller.go index 99a1831..d25ef7b 100644 --- a/cmd/dashboard/controller/controller.go +++ b/cmd/dashboard/controller/controller.go @@ -42,6 +42,9 @@ func ServeWeb(port uint) { "ts": func(s string) string { return strings.TrimSpace(s) }, + "float32f": func(f float32) string { + return fmt.Sprintf("%.2f", f) + }, "divU64": func(a, b uint64) float32 { if b == 0 { if a > 0 { @@ -49,6 +52,10 @@ func ServeWeb(port uint) { } return 0 } + if a == 0 { + // 这是从未在线的情况 + return 1 / float32(b) * 100 + } return float32(a) / float32(b) * 100 }, "div": func(a, b int) float32 { @@ -58,6 +65,10 @@ func ServeWeb(port uint) { } return 0 } + if a == 0 { + // 这是从未在线的情况 + return 1 / float32(b) * 100 + } return float32(a) / float32(b) * 100 }, "addU64": func(a, b uint64) uint64 { diff --git a/cmd/dashboard/rpc/rpc.go b/cmd/dashboard/rpc/rpc.go index 1c6b45b..e8072d5 100644 --- a/cmd/dashboard/rpc/rpc.go +++ b/cmd/dashboard/rpc/rpc.go @@ -2,7 +2,6 @@ package rpc import ( "fmt" - "log" "net" "time" @@ -47,9 +46,7 @@ func DispatchTask(duration time.Duration) { continue } hasAliveAgent = true - log.Println("DispatchTask 确认派发 >>>>>", i, index) dao.SortedServerList[index].TaskStream.Send(tasks[i].PB()) - log.Println("DispatchTask 确认派发 <<<<<", i, index) index++ } dao.ServerLock.RUnlock() diff --git a/cmd/playground/main.go b/cmd/playground/main.go index 627d7ee..068d2b6 100644 --- a/cmd/playground/main.go +++ b/cmd/playground/main.go @@ -1,9 +1,11 @@ package main import ( + "crypto/tls" "fmt" "log" "net" + "net/http" "os/exec" "time" @@ -13,11 +15,23 @@ import ( ) func main() { + // 跳过 SSL 检查 + transCfg := &http.Transport{ + TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, + } + httpClient := &http.Client{Transport: transCfg} + _, err := httpClient.Get("https://expired-ecc-dv.ssl.com") + fmt.Println(err) + // SSL 证书信息获取 + c := cert.NewCert("expired-ecc-dv.ssl.com") + fmt.Println(c.Error) + // TCP conn, err := net.DialTimeout("tcp", "example.com:80", time.Second*10) if err != nil { panic(err) } println(conn) + // ICMP Ping pinger, err := ping.NewPinger("example.com") if err != nil { panic(err) @@ -28,11 +42,7 @@ func main() { panic(err) } fmt.Printf("%+v", pinger.Statistics()) - certs, err := cert.NewCerts([]string{"example.com"}) - if err != nil { - panic(err) - } - fmt.Println(certs) + // 硬盘信息 dparts, _ := disk.Partitions(false) for _, part := range dparts { u, _ := disk.Usage(part.Mountpoint) diff --git a/model/alertrule.go b/model/alertrule.go index 96dafd1..59d98a3 100644 --- a/model/alertrule.go +++ b/model/alertrule.go @@ -18,9 +18,10 @@ type Rule struct { // 指标类型,cpu、memory、swap、disk、net_in_speed、net_out_speed // net_all_speed、transfer_in、transfer_out、transfer_all、offline Type string - Min uint64 // 最小阈值 (百分比、字节 kb ÷ 1024) - Max uint64 // 最大阈值 (百分比、字节 kb ÷ 1024) - Duration uint64 // 持续时间 (秒) + Min uint64 // 最小阈值 (百分比、字节 kb ÷ 1024) + Max uint64 // 最大阈值 (百分比、字节 kb ÷ 1024) + Duration uint64 // 持续时间 (秒) + Ignore map[uint64]bool //忽略此规则的ID列表 } func percentage(used, total uint64) uint64 { @@ -30,7 +31,11 @@ func percentage(used, total uint64) uint64 { return used * 100 / total } +// Snapshot 未通过规则返回 struct{}{}, 通过返回 nil func (u *Rule) Snapshot(server *Server) interface{} { + if u.Ignore[server.ID] { + return nil + } var src uint64 switch u.Type { case "cpu": @@ -72,9 +77,9 @@ func (u *Rule) Snapshot(server *Server) interface{} { type AlertRule struct { Common Name string - Rules []Rule `gorm:"-" json:"-"` RulesRaw string Enable *bool + Rules []Rule `gorm:"-" json:"-"` } func (r *AlertRule) BeforeSave(tx *gorm.DB) error { diff --git a/resource/template/component/monitor.html b/resource/template/component/monitor.html index 0a80f26..6910c56 100644 --- a/resource/template/component/monitor.html +++ b/resource/template/component/monitor.html @@ -15,7 +15,7 @@
diff --git a/resource/template/theme-default/service.html b/resource/template/theme-default/service.html index 0f43f2b..8ac7a21 100644 --- a/resource/template/theme-default/service.html +++ b/resource/template/theme-default/service.html @@ -8,7 +8,7 @@

{{$service.Monitor.Name}}

-

30天在线率{{divU64 $service.TotalUp (addU64 $service.TotalUp $service.TotalDown)}}%

+

30天在线率{{float32f (divU64 $service.TotalUp (addU64 $service.TotalUp $service.TotalDown))}}%

{{range $i,$d := $service.Delay}} diff --git a/service/alertmanager/alertmanager.go b/service/alertmanager/alertmanager.go index e56725a..dac2822 100644 --- a/service/alertmanager/alertmanager.go +++ b/service/alertmanager/alertmanager.go @@ -133,33 +133,8 @@ func checkStatus() { // 发送通知 max, desc := alert.Check(alertsStore[alert.ID][server.ID]) if desc != "" { - nID := getNotificationHash(server, desc) - var flag bool - if cacheN, has := dao.Cache.Get(nID); has { - nHistory := cacheN.(NotificationHistory) - // 每次提醒都增加一倍等待时间,最后每天最多提醒一次 - if time.Now().After(nHistory.Until) { - flag = true - nHistory.Duration *= 2 - if nHistory.Duration > time.Hour*24 { - nHistory.Duration = time.Hour * 24 - } - nHistory.Until = time.Now().Add(nHistory.Duration) - // 缓存有效期加 10 分钟 - dao.Cache.Set(nID, nHistory, nHistory.Duration+time.Minute*10) - } - } else { - // 新提醒直接通知 - flag = true - dao.Cache.Set(nID, NotificationHistory{ - Duration: firstNotificationDelay, - Until: time.Now().Add(firstNotificationDelay), - }, firstNotificationDelay+time.Minute*10) - } - if flag { - message := fmt.Sprintf("报警规则:%s,服务器:%s(%s),%s,逮到咯,快去看看!", alert.Name, server.Name, server.Host.IP, desc) - go SendNotification(message) - } + message := fmt.Sprintf("报警规则:%s,服务器:%s(%s),%s,逮到咯,快去看看!", alert.Name, server.Name, server.Host.IP, desc) + go SendNotification(message) } // 清理旧数据 if max > 0 && max < len(alertsStore[alert.ID][server.ID]) { @@ -170,13 +145,39 @@ func checkStatus() { } func SendNotification(desc string) { + // 通知防骚扰策略 + nID := hex.EncodeToString(md5.New().Sum([]byte(desc))) + var flag bool + if cacheN, has := dao.Cache.Get(nID); has { + nHistory := cacheN.(NotificationHistory) + // 每次提醒都增加一倍等待时间,最后每天最多提醒一次 + if time.Now().After(nHistory.Until) { + flag = true + nHistory.Duration *= 2 + if nHistory.Duration > time.Hour*24 { + nHistory.Duration = time.Hour * 24 + } + nHistory.Until = time.Now().Add(nHistory.Duration) + // 缓存有效期加 10 分钟 + dao.Cache.Set(nID, nHistory, nHistory.Duration+time.Minute*10) + } + } else { + // 新提醒直接通知 + flag = true + dao.Cache.Set(nID, NotificationHistory{ + Duration: firstNotificationDelay, + Until: time.Now().Add(firstNotificationDelay), + }, firstNotificationDelay+time.Minute*10) + } + + if !flag { + return + } + + // 发出通知 notificationsLock.RLock() defer notificationsLock.RUnlock() for i := 0; i < len(notifications); i++ { notifications[i].Send(desc) } } - -func getNotificationHash(server *model.Server, desc string) string { - return hex.EncodeToString(md5.New().Sum([]byte(fmt.Sprintf("%d::%s", server.ID, desc)))) -} diff --git a/service/rpc/nezha.go b/service/rpc/nezha.go index e3c1351..da1147e 100644 --- a/service/rpc/nezha.go +++ b/service/rpc/nezha.go @@ -3,6 +3,7 @@ package rpc import ( "context" "fmt" + "strings" "time" "github.com/naiba/nezha/model" @@ -21,15 +22,33 @@ func (s *NezhaHandler) ReportTask(c context.Context, r *pb.TaskResult) (*pb.Rece return nil, err } if r.GetType() == model.MonitorTypeHTTPGET { - // SSL 证书变更报警 + // SSL 证书报警 var last model.MonitorHistory if err := dao.DB.Where("monitor_id = ?", r.GetId()).Order("id DESC").First(&last).Error; err == nil { - if last.Data != "" && last.Data != r.GetData() { + var errMsg string + if strings.HasPrefix(r.GetData(), "SSL证书错误:") { + // 证书错误提醒 + errMsg = r.GetData() + } else { + var splits = strings.Split(r.GetData(), "|") + // 证书变更提醒 + if last.Data != "" && last.Data != splits[0] { + errMsg = fmt.Sprintf( + "SSL证书变更,旧:%s,新:%s。", + last.Data, splits[0]) + } + expires, err := time.Parse("2006-01-02 15:04:05 -0700 MST", splits[1]) + // 证书过期提醒 + if err == nil && expires.Before(time.Now().AddDate(0, 0, 7)) { + errMsg = fmt.Sprintf( + "SSL证书将在七天内过期,过期时间:%s。", + expires.Format("2006-01-02 15:04:05")) + } + } + if errMsg != "" { var monitor model.Monitor dao.DB.First(&monitor, "id = ?", last.MonitorID) - alertmanager.SendNotification(fmt.Sprintf( - "监控:%s SSL证书变更,旧:%s,新:%s。", - monitor.Name, last.Data, r.GetData())) + alertmanager.SendNotification(fmt.Sprintf("服务监控:%s %s", monitor.Name, errMsg)) } } } @@ -38,12 +57,6 @@ func (s *NezhaHandler) ReportTask(c context.Context, r *pb.TaskResult) (*pb.Rece if err := dao.DB.Create(&mh).Error; err != nil { return nil, err } - // 更新最后检测时间 - var m model.Monitor - m.ID = r.GetId() - if err := dao.DB.Model(&m).Update("last_check", time.Now()).Error; err != nil { - return nil, err - } return &pb.Receipt{Proced: true}, nil } @@ -93,7 +106,7 @@ func (s *NezhaHandler) ReportSystemInfo(c context.Context, r *pb.Host) (*pb.Rece host.IP != "" && dao.ServerList[clientID].Host.IP != host.IP { alertmanager.SendNotification(fmt.Sprintf( - "服务器:%s IP变更提醒,旧IP:%s,新IP:%s。", + "IP变更提醒 服务器:%s ,旧IP:%s,新IP:%s。", dao.ServerList[clientID].Name, dao.ServerList[clientID].Host.IP, host.IP)) } dao.ServerList[clientID].Host = &host