🚸 release: v0.2.1

This commit is contained in:
naiba 2021-01-16 14:11:51 +08:00
parent 303dc73e16
commit d792fc8499
10 changed files with 127 additions and 72 deletions

View File

@ -1,6 +1,6 @@
# 哪吒面板
服务期状态监控,报警通知,被动接收,极省资源 64M 小鸡也能装 Agent。
系统状态、API(SSL证书变更、即将到期、到期)/TCP端口存活/PING 监控,报警通知,被动接收,极省资源 64M 小鸡也能装 Agent。
| 哪吒面板 | 首页截图1 | 首页截图2 |
| ---- | ---- | ---- |
@ -131,7 +131,7 @@ URL 里面也可放置占位符,请求时会进行简单的字符串替换。
- net_in_speed(入站网速)、net_out_speed(出站网速)、net_all_speed(双向网速)、transfer_in(入站流量)、transfer_out(出站流量)、transfer_all(双向流量)Min/Max 数值为字节1kb=10241mb = 1024*1024
- offline不支持 Min/Max 参数
- Duration持续秒数监控比较简陋取持续时间内的 70 采样结果
- Ignore: `{"1": true, "2":false}` 忽略此规则的服务器ID列表
## 常见问题
### 数据备份恢复
@ -182,7 +182,19 @@ URL 里面也可放置占位符,请求时会进行简单的字符串替换。
## 变更日志
- `0.2.0` **重大更新**
- `dashboard 0.2.1` `agent 0.2.1`
- dashboard
- 修复了默认开启IP变更通知
- hotaru 主题的服务状态页面
- **新增可以指定服务器忽略监控规则**
- 修复info透明 @ilay1678
- agent
- 优化了 IPv6/IPv4 双栈问题
- 增加 SSL 证书过期、即将过期提醒
- `dashboard 0.2.0` `agent 0.2.0` **重大更新**
增加了服务监控TCP端口延迟、Ping、HTTP-SSL 证书)功能,此版本 Agent 与旧面板不兼容,而 Agent 是通过 GitHub Release 自动更新的 所以务必更新面板开启最新功能。

View File

@ -2,6 +2,7 @@ package main
import (
"context"
"crypto/tls"
"errors"
"fmt"
"log"
@ -50,6 +51,9 @@ var (
ctx = context.Background()
delayWhenError = time.Second * 10
updateCh = make(chan struct{}, 0)
httpClient = &http.Client{Transport: &http.Transport{
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
}}
)
func doSelfUpdate() {
@ -151,6 +155,7 @@ func run(cmd *cobra.Command, args []string) {
func receiveTasks(tasks pb.NezhaService_RequestTaskClient) error {
var err error
var task *pb.Task
defer log.Printf("receiveTasks exit %v %v => %v", time.Now(), task, err)
for {
task, err = tasks.Recv()
@ -159,30 +164,31 @@ func receiveTasks(tasks pb.NezhaService_RequestTaskClient) error {
}
var result pb.TaskResult
result.Id = task.GetId()
result.Type = task.GetType()
switch task.GetType() {
case model.MonitorTypeHTTPGET:
start := time.Now()
resp, err := http.Get(task.GetData())
resp, err := httpClient.Get(task.GetData())
if err == nil {
result.Delay = float32(time.Now().Sub(start).Microseconds()) / 1000.0
if resp.StatusCode > 299 || resp.StatusCode < 200 {
err = errors.New("\n应用错误" + resp.Status)
}
}
var certs cert.Certs
if err == nil {
if strings.HasPrefix(task.GetData(), "https://") {
certs, err = cert.NewCerts([]string{task.GetData()})
c := cert.NewCert(task.GetData()[8:])
if c.Error != "" {
if strings.Contains(c.Error, "expired") {
result.Data = "SSL证书错误证书已过期"
} else {
result.Data = "SSL证书错误" + c.Error
}
} else {
result.Data = c.Issuer + "|" + c.NotAfter
result.Successful = true
}
}
}
if err == nil {
if len(certs) == 0 {
err = errors.New("\n获取SSL证书错误未获取到证书")
}
}
if err == nil {
result.Data = certs[0].Issuer
result.Successful = true
} else {
result.Data = err.Error()
}

View File

@ -42,6 +42,9 @@ func ServeWeb(port uint) {
"ts": func(s string) string {
return strings.TrimSpace(s)
},
"float32f": func(f float32) string {
return fmt.Sprintf("%.2f", f)
},
"divU64": func(a, b uint64) float32 {
if b == 0 {
if a > 0 {
@ -49,6 +52,10 @@ func ServeWeb(port uint) {
}
return 0
}
if a == 0 {
// 这是从未在线的情况
return 1 / float32(b) * 100
}
return float32(a) / float32(b) * 100
},
"div": func(a, b int) float32 {
@ -58,6 +65,10 @@ func ServeWeb(port uint) {
}
return 0
}
if a == 0 {
// 这是从未在线的情况
return 1 / float32(b) * 100
}
return float32(a) / float32(b) * 100
},
"addU64": func(a, b uint64) uint64 {

View File

@ -2,7 +2,6 @@ package rpc
import (
"fmt"
"log"
"net"
"time"
@ -47,9 +46,7 @@ func DispatchTask(duration time.Duration) {
continue
}
hasAliveAgent = true
log.Println("DispatchTask 确认派发 >>>>>", i, index)
dao.SortedServerList[index].TaskStream.Send(tasks[i].PB())
log.Println("DispatchTask 确认派发 <<<<<", i, index)
index++
}
dao.ServerLock.RUnlock()

View File

@ -1,9 +1,11 @@
package main
import (
"crypto/tls"
"fmt"
"log"
"net"
"net/http"
"os/exec"
"time"
@ -13,11 +15,23 @@ import (
)
func main() {
// 跳过 SSL 检查
transCfg := &http.Transport{
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
}
httpClient := &http.Client{Transport: transCfg}
_, err := httpClient.Get("https://expired-ecc-dv.ssl.com")
fmt.Println(err)
// SSL 证书信息获取
c := cert.NewCert("expired-ecc-dv.ssl.com")
fmt.Println(c.Error)
// TCP
conn, err := net.DialTimeout("tcp", "example.com:80", time.Second*10)
if err != nil {
panic(err)
}
println(conn)
// ICMP Ping
pinger, err := ping.NewPinger("example.com")
if err != nil {
panic(err)
@ -28,11 +42,7 @@ func main() {
panic(err)
}
fmt.Printf("%+v", pinger.Statistics())
certs, err := cert.NewCerts([]string{"example.com"})
if err != nil {
panic(err)
}
fmt.Println(certs)
// 硬盘信息
dparts, _ := disk.Partitions(false)
for _, part := range dparts {
u, _ := disk.Usage(part.Mountpoint)

View File

@ -18,9 +18,10 @@ type Rule struct {
// 指标类型cpu、memory、swap、disk、net_in_speed、net_out_speed
// net_all_speed、transfer_in、transfer_out、transfer_all、offline
Type string
Min uint64 // 最小阈值 (百分比、字节 kb ÷ 1024)
Max uint64 // 最大阈值 (百分比、字节 kb ÷ 1024)
Duration uint64 // 持续时间 (秒)
Min uint64 // 最小阈值 (百分比、字节 kb ÷ 1024)
Max uint64 // 最大阈值 (百分比、字节 kb ÷ 1024)
Duration uint64 // 持续时间 (秒)
Ignore map[uint64]bool //忽略此规则的ID列表
}
func percentage(used, total uint64) uint64 {
@ -30,7 +31,11 @@ func percentage(used, total uint64) uint64 {
return used * 100 / total
}
// Snapshot 未通过规则返回 struct{}{}, 通过返回 nil
func (u *Rule) Snapshot(server *Server) interface{} {
if u.Ignore[server.ID] {
return nil
}
var src uint64
switch u.Type {
case "cpu":
@ -72,9 +77,9 @@ func (u *Rule) Snapshot(server *Server) interface{} {
type AlertRule struct {
Common
Name string
Rules []Rule `gorm:"-" json:"-"`
RulesRaw string
Enable *bool
Rules []Rule `gorm:"-" json:"-"`
}
func (r *AlertRule) BeforeSave(tx *gorm.DB) error {

View File

@ -15,7 +15,7 @@
<div class="field">
<label>类型</label>
<select name="Type" class="ui fluid dropdown">
<option value="1">HTTP-GET</option>
<option value="1">HTTP-GET(SSL到期、变更)</option>
<option value="2">ICMP-Ping</option>
<option value="3">TCP-Ping</option>
</select>

View File

@ -8,7 +8,7 @@
<div class="ui grid">
<div class="three wide column">
<p>{{$service.Monitor.Name}}</p>
<p>30天在线率{{divU64 $service.TotalUp (addU64 $service.TotalUp $service.TotalDown)}}%</p>
<p>30天在线率{{float32f (divU64 $service.TotalUp (addU64 $service.TotalUp $service.TotalDown))}}%</p>
</div>
<div class="eleven wide column">
{{range $i,$d := $service.Delay}}

View File

@ -133,33 +133,8 @@ func checkStatus() {
// 发送通知
max, desc := alert.Check(alertsStore[alert.ID][server.ID])
if desc != "" {
nID := getNotificationHash(server, desc)
var flag bool
if cacheN, has := dao.Cache.Get(nID); has {
nHistory := cacheN.(NotificationHistory)
// 每次提醒都增加一倍等待时间,最后每天最多提醒一次
if time.Now().After(nHistory.Until) {
flag = true
nHistory.Duration *= 2
if nHistory.Duration > time.Hour*24 {
nHistory.Duration = time.Hour * 24
}
nHistory.Until = time.Now().Add(nHistory.Duration)
// 缓存有效期加 10 分钟
dao.Cache.Set(nID, nHistory, nHistory.Duration+time.Minute*10)
}
} else {
// 新提醒直接通知
flag = true
dao.Cache.Set(nID, NotificationHistory{
Duration: firstNotificationDelay,
Until: time.Now().Add(firstNotificationDelay),
}, firstNotificationDelay+time.Minute*10)
}
if flag {
message := fmt.Sprintf("报警规则:%s服务器%s(%s)%s逮到咯快去看看", alert.Name, server.Name, server.Host.IP, desc)
go SendNotification(message)
}
message := fmt.Sprintf("报警规则:%s服务器%s(%s)%s逮到咯快去看看", alert.Name, server.Name, server.Host.IP, desc)
go SendNotification(message)
}
// 清理旧数据
if max > 0 && max < len(alertsStore[alert.ID][server.ID]) {
@ -170,13 +145,39 @@ func checkStatus() {
}
func SendNotification(desc string) {
// 通知防骚扰策略
nID := hex.EncodeToString(md5.New().Sum([]byte(desc)))
var flag bool
if cacheN, has := dao.Cache.Get(nID); has {
nHistory := cacheN.(NotificationHistory)
// 每次提醒都增加一倍等待时间,最后每天最多提醒一次
if time.Now().After(nHistory.Until) {
flag = true
nHistory.Duration *= 2
if nHistory.Duration > time.Hour*24 {
nHistory.Duration = time.Hour * 24
}
nHistory.Until = time.Now().Add(nHistory.Duration)
// 缓存有效期加 10 分钟
dao.Cache.Set(nID, nHistory, nHistory.Duration+time.Minute*10)
}
} else {
// 新提醒直接通知
flag = true
dao.Cache.Set(nID, NotificationHistory{
Duration: firstNotificationDelay,
Until: time.Now().Add(firstNotificationDelay),
}, firstNotificationDelay+time.Minute*10)
}
if !flag {
return
}
// 发出通知
notificationsLock.RLock()
defer notificationsLock.RUnlock()
for i := 0; i < len(notifications); i++ {
notifications[i].Send(desc)
}
}
func getNotificationHash(server *model.Server, desc string) string {
return hex.EncodeToString(md5.New().Sum([]byte(fmt.Sprintf("%d::%s", server.ID, desc))))
}

View File

@ -3,6 +3,7 @@ package rpc
import (
"context"
"fmt"
"strings"
"time"
"github.com/naiba/nezha/model"
@ -21,15 +22,33 @@ func (s *NezhaHandler) ReportTask(c context.Context, r *pb.TaskResult) (*pb.Rece
return nil, err
}
if r.GetType() == model.MonitorTypeHTTPGET {
// SSL 证书变更报警
// SSL 证书报警
var last model.MonitorHistory
if err := dao.DB.Where("monitor_id = ?", r.GetId()).Order("id DESC").First(&last).Error; err == nil {
if last.Data != "" && last.Data != r.GetData() {
var errMsg string
if strings.HasPrefix(r.GetData(), "SSL证书错误") {
// 证书错误提醒
errMsg = r.GetData()
} else {
var splits = strings.Split(r.GetData(), "|")
// 证书变更提醒
if last.Data != "" && last.Data != splits[0] {
errMsg = fmt.Sprintf(
"SSL证书变更%s%s。",
last.Data, splits[0])
}
expires, err := time.Parse("2006-01-02 15:04:05 -0700 MST", splits[1])
// 证书过期提醒
if err == nil && expires.Before(time.Now().AddDate(0, 0, 7)) {
errMsg = fmt.Sprintf(
"SSL证书将在七天内过期过期时间%s。",
expires.Format("2006-01-02 15:04:05"))
}
}
if errMsg != "" {
var monitor model.Monitor
dao.DB.First(&monitor, "id = ?", last.MonitorID)
alertmanager.SendNotification(fmt.Sprintf(
"监控:%s SSL证书变更%s%s。",
monitor.Name, last.Data, r.GetData()))
alertmanager.SendNotification(fmt.Sprintf("服务监控:%s %s", monitor.Name, errMsg))
}
}
}
@ -38,12 +57,6 @@ func (s *NezhaHandler) ReportTask(c context.Context, r *pb.TaskResult) (*pb.Rece
if err := dao.DB.Create(&mh).Error; err != nil {
return nil, err
}
// 更新最后检测时间
var m model.Monitor
m.ID = r.GetId()
if err := dao.DB.Model(&m).Update("last_check", time.Now()).Error; err != nil {
return nil, err
}
return &pb.Receipt{Proced: true}, nil
}
@ -93,7 +106,7 @@ func (s *NezhaHandler) ReportSystemInfo(c context.Context, r *pb.Host) (*pb.Rece
host.IP != "" &&
dao.ServerList[clientID].Host.IP != host.IP {
alertmanager.SendNotification(fmt.Sprintf(
"服务器:%s IP变更提醒旧IP%s新IP%s。",
"IP变更提醒 服务器:%s 旧IP%s新IP%s。",
dao.ServerList[clientID].Name, dao.ServerList[clientID].Host.IP, host.IP))
}
dao.ServerList[clientID].Host = &host