Commit f6a7c526 authored by 李科's avatar 李科

ci: 同步prometheus告警规则

parent b4a22d18
...@@ -36,8 +36,8 @@ type RulesAlertRange struct { ...@@ -36,8 +36,8 @@ type RulesAlertRange struct {
} }
type AlertCondition struct { type AlertCondition struct {
ThresholdsMin int `json:"thresholds_min" form:"thresholds_min" binding:"required,lt=ThresholdsMax"`
ThresholdsMax int `json:"thresholds_max" form:"thresholds_max" binding:"required"` ThresholdsMax int `json:"thresholds_max" form:"thresholds_max" binding:"required"`
ThresholdsMin int `json:"thresholds_min" form:"thresholds_min" binding:"required"`
RiskLevel int `json:"risk_level" form:"risk_level" binding:"required,oneof=1 2 3 4"` RiskLevel int `json:"risk_level" form:"risk_level" binding:"required,oneof=1 2 3 4"`
} }
......
...@@ -6,6 +6,7 @@ import ( ...@@ -6,6 +6,7 @@ import (
"gitlab.wodcloud.com/smart-operation/so-operation-api/src/pkg/beagle/resp" "gitlab.wodcloud.com/smart-operation/so-operation-api/src/pkg/beagle/resp"
"gitlab.wodcloud.com/smart-operation/so-operation-api/src/router/middleware/header" "gitlab.wodcloud.com/smart-operation/so-operation-api/src/router/middleware/header"
"gitlab.wodcloud.com/smart-operation/so-operation-api/src/service" "gitlab.wodcloud.com/smart-operation/so-operation-api/src/service"
"sort"
) )
// AddAlertRules 新增任务 // AddAlertRules 新增任务
...@@ -16,6 +17,25 @@ func AddAlertRules(c *gin.Context) { ...@@ -16,6 +17,25 @@ func AddAlertRules(c *gin.Context) {
return return
} }
/*
[
{
"thresholds_min": 12,
"thresholds_max": 66,
"risk_level": 1
},
{
"thresholds_min": 66,
"thresholds_max": 80,
"risk_level": 2
}
]
*/
// TODO 阈值范围判断,阈值上下限是否存在交叉
sort.SliceStable(req.AlertCondition, func(i, j int) bool {
return req.AlertCondition[i].ThresholdsMin < req.AlertCondition[j].ThresholdsMin
})
svc := service.AlertRulesSvc{User: header.GetUser(c)} svc := service.AlertRulesSvc{User: header.GetUser(c)}
err := svc.Add(req) err := svc.Add(req)
if err != nil { if err != nil {
......
...@@ -124,11 +124,24 @@ var OpTypeIntMap = map[OpType]int{ ...@@ -124,11 +124,24 @@ var OpTypeIntMap = map[OpType]int{
} }
// RiskLevel 风险等级 // RiskLevel 风险等级
type RiskLevel int
const ( const (
RiskLevelLow RiskLevel = iota + 1 // 1:低风险 RiskLevelLow = iota + 1 // 1:低风险
RiskLevelModerate // 1:一般风险 RiskLevelModerate // 1:一般风险
RiskLevelHigh // 3:较大风险 RiskLevelHigh // 3:较大风险
RiskLevelCritical // 4:重大风险 RiskLevelCritical // 4:重大风险
) )
func RiskLeveText(code int) string {
switch code {
case RiskLevelLow:
return "低风险"
case RiskLevelModerate:
return "一般风险"
case RiskLevelHigh:
return "较大风险"
case RiskLevelCritical:
return "重大风险"
default:
return ""
}
}
...@@ -39,10 +39,20 @@ func (a *AlertRulesSvc) Add(req request.AddAlertRules) error { ...@@ -39,10 +39,20 @@ func (a *AlertRulesSvc) Add(req request.AddAlertRules) error {
data.NotifyRecipients = util.ConvertToString(req.NotifyRecipients) data.NotifyRecipients = util.ConvertToString(req.NotifyRecipients)
switch req.DetectionType { switch req.DetectionType {
case 1: case 1:
// TODO 暂时不能做事务,需要先插入数据,再进行查询
_, err = db.Insert(&data) _, err = db.Insert(&data)
if err != nil { if err != nil {
return err return err
} }
var item response.AlertRulesItem
item, err = a.GetDataById(request.DetailAlertRules{Id: data.Id})
// TODO 1.插入数据到 prometheus.yml --> rule_files
prSvc := PrometheusRuleSvc{User: a.User}
prSvc.Create(item)
_ = item
case 2: // 自定义 case 2: // 自定义
_, err = db.Transaction(func(session *xorm.Session) (interface{}, error) { _, err = db.Transaction(func(session *xorm.Session) (interface{}, error) {
// 添加自定义分类 // 添加自定义分类
......
...@@ -14,10 +14,29 @@ var ( ...@@ -14,10 +14,29 @@ var (
PrometheusRuleGroup = "monitoring.beagle.io" // kubectl api-resources | grep -i prome PrometheusRuleGroup = "monitoring.beagle.io" // kubectl api-resources | grep -i prome
PrometheusRuleVersion = "v1" PrometheusRuleVersion = "v1"
PrometheusRuleKind = "PrometheusRule" PrometheusRuleKind = "PrometheusRule"
Namespace = "beagle-monitoring"
PrometheusRuleApiVersion = PrometheusRuleGroup + "/" + PrometheusRuleVersion PrometheusRuleApiVersion = PrometheusRuleGroup + "/" + PrometheusRuleVersion
PrometheusRuleName = strings.ToLower(PrometheusRuleKind) + "s." + PrometheusRuleGroup PrometheusRuleName = strings.ToLower(PrometheusRuleKind) + "s." + PrometheusRuleGroup
PrometheusRuleNamePrefix = "beagle-prometheus-so-operation-api-rules" // beagle-monitoring beagle-prometheus-prometheus-operator 43d
) )
var AlertDefLabels = map[string]string{
"app": "prometheus",
"app.bd-apaas.com/cluster-component": "monitoring",
"prometheus-operator": "monitoring",
"release": "beagle-prometheus",
}
// GetPrometheusRuleName 获取规则CRD名称
func GetPrometheusRuleName(alertRulesId string) string {
return fmt.Sprintf("%s-%s", PrometheusRuleNamePrefix, alertRulesId)
}
// GetPrometheusRuleGroupName 获取规则组名称
func GetPrometheusRuleGroupName(alertName string, groupInterval string) string {
return fmt.Sprintf("%s-group-%s", alertName, groupInterval)
}
type PrometheusRule struct { type PrometheusRule struct {
Header map[string]string Header map[string]string
} }
......
...@@ -2,7 +2,7 @@ package k8s ...@@ -2,7 +2,7 @@ package k8s
import ( import (
"fmt" "fmt"
v1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
"github.com/spf13/cast" "github.com/spf13/cast"
"gitlab.wodcloud.com/smart-operation/so-operation-api/src/common/conf" "gitlab.wodcloud.com/smart-operation/so-operation-api/src/common/conf"
"gitlab.wodcloud.com/smart-operation/so-operation-api/src/pkg/beagle/constant" "gitlab.wodcloud.com/smart-operation/so-operation-api/src/pkg/beagle/constant"
...@@ -13,14 +13,7 @@ import ( ...@@ -13,14 +13,7 @@ import (
) )
var svc PrometheusRule var svc PrometheusRule
var obj v1.PrometheusRule var pr monitoringv1.PrometheusRule
var AlertDefLabels = map[string]string{
"app": "prometheus",
"app.bd-apaas.com/cluster-component": "monitoring",
"prometheus-operator": "monitoring",
"release": "beagle-prometheus",
}
func init() { func init() {
conf.Options = &conf.Config{} conf.Options = &conf.Config{}
...@@ -29,43 +22,66 @@ func init() { ...@@ -29,43 +22,66 @@ func init() {
header := make(map[string]string) header := make(map[string]string)
header["Authorization"] = "Bearer " + conf.Options.KubernetesToken header["Authorization"] = "Bearer " + conf.Options.KubernetesToken
svc = PrometheusRule{Header: header} svc = PrometheusRule{Header: header}
obj = v1.PrometheusRule{} pr = monitoringv1.PrometheusRule{}
obj.Name = "beagle-prometheus-operation-api-rules-101"
obj.Namespace = "beagle-monitoring" alertRulesId := "83343ef6-4a99-47bd-abb4-bcff52feb2ec" // 预警规则id
obj.Labels = AlertDefLabels prometheusRuleName := GetPrometheusRuleName(alertRulesId)
pr.Name = prometheusRuleName
pr.Namespace = "beagle-monitoring"
pr.Labels = AlertDefLabels
} }
func TestRule(t *testing.T) { func TestRule(t *testing.T) {
group := v1.RuleGroup{} group := monitoringv1.RuleGroup{}
group.Name = "demo-group-1m" groupInterval := monitoringv1.Duration("5m")
rule := v1.Rule{} group.Interval = &groupInterval
rule.Alert = "kubernetes启动状态" rule1 := monitoringv1.Rule{}
rule.Annotations = make(map[string]string) rule1.Alert = "kubernetes启动状态"
rule.Annotations["value"] = "{{ $value }}" group.Name = GetPrometheusRuleGroupName(rule1.Alert, string(*group.Interval))
rule.Annotations["message"] = "启动了" rule1.Annotations = make(map[string]string)
rule.Expr = intstr.FromString(`up{endpoint="https", instance="192.168.1.244:6443", job="apiserver", namespace="default", service="kubernetes"} == 1`) rule1.Annotations["value"] = "{{ $value }}"
ruleFor := v1.Duration("1m") rule1.Annotations["message"] = "启动了"
rule.For = &ruleFor rule1.Expr = intstr.FromString(`up{endpoint="https", instance="192.168.1.244:6443", job="apiserver", namespace="default", service="kubernetes"} == 1`)
rule.Labels = make(map[string]string) ruleFor := monitoringv1.Duration("1m")
rule.Labels["severity"] = "warning" rule1.For = &ruleFor
rule.Labels["severity_name"] = "一般风险" rule1.Labels = map[string]string{
rule.Labels["risk_level"] = cast.ToString(constant.RiskLevelLow) "severity": "warning",
rule.Labels["source"] = "so-operation-api" "risk_level": cast.ToString(constant.RiskLevelLow),
rule.Labels["alert_id"] = "101" "risk_level_name": constant.RiskLeveText(constant.RiskLevelLow),
group.Rules = append(group.Rules, rule) "source": "so-operation-api",
obj.Spec.Groups = append(obj.Spec.Groups, group) "alert_rules_id": "83343ef6-4a99-47bd-abb4-bcff52feb2ec",
err := svc.Create(&obj) }
rule2 := monitoringv1.Rule{}
rule2.Alert = "kubernetes启动状态"
rule2.Annotations = make(map[string]string)
rule2.Annotations["value"] = "{{ $value }}"
rule2.Annotations["message"] = "启动了"
rule2.Expr = intstr.FromString(`up{endpoint="https", instance="192.168.1.244:6443", job="apiserver", namespace="default", service="kubernetes"} == 0`)
ruleFor2 := monitoringv1.Duration("1m")
rule2.For = &ruleFor2
rule2.Labels = map[string]string{
"severity": "warning",
"risk_level": cast.ToString(constant.RiskLevelModerate),
"risk_level_name": constant.RiskLeveText(constant.RiskLevelModerate),
"source": "so-operation-api",
"alert_rules_id": "83343ef6-4a99-47bd-abb4-bcff52feb2ec",
}
group.Rules = append(group.Rules, rule1, rule2)
pr.Spec.Groups = append(pr.Spec.Groups, group)
err := svc.Create(&pr)
if err != nil { if err != nil {
fmt.Println("添加失败" + err.Error()) log.Println("添加失败" + err.Error())
} else { } else {
fmt.Println("添加成功") log.Println("添加成功")
} }
} }
func TestGet(t *testing.T) { func TestGet(t *testing.T) {
// 获取对象 // 获取对象
ruleObj, err := svc.Get(obj.Namespace, obj.Name) ruleObj, err := svc.Get(pr.Namespace, pr.Name)
if err != nil { if err != nil {
fmt.Println("获取失败" + err.Error()) fmt.Println("获取失败" + err.Error())
} else { } else {
...@@ -75,7 +91,7 @@ func TestGet(t *testing.T) { ...@@ -75,7 +91,7 @@ func TestGet(t *testing.T) {
} }
func TestUpdate(t *testing.T) { func TestUpdate(t *testing.T) {
ruleObj, err := svc.Get(obj.Namespace, obj.Name) ruleObj, err := svc.Get(pr.Namespace, pr.Name)
if err != nil { if err != nil {
fmt.Println("获取失败" + err.Error()) fmt.Println("获取失败" + err.Error())
} else { } else {
...@@ -85,7 +101,7 @@ func TestUpdate(t *testing.T) { ...@@ -85,7 +101,7 @@ func TestUpdate(t *testing.T) {
ruleObj.Name = "galileo-101" ruleObj.Name = "galileo-101"
ruleObj.Spec.Groups[0].Rules[0].Alert = "请求次数大于10" ruleObj.Spec.Groups[0].Rules[0].Alert = "请求次数大于10"
ruleObj.Spec.Groups[0].Name = "2分钟组" ruleObj.Spec.Groups[0].Name = "2分钟组"
ruleFor := v1.Duration("2m") ruleFor := monitoringv1.Duration("2m")
ruleObj.Spec.Groups[0].Rules[0].For = &ruleFor ruleObj.Spec.Groups[0].Rules[0].For = &ruleFor
ruleObj.Spec.Groups[0].Rules[0].Expr = intstr.FromString(`up{endpoint="https", instance="192.168.1.244:6443", job="apiserver", namespace="default", service="kubernetes"} == 0`) ruleObj.Spec.Groups[0].Rules[0].Expr = intstr.FromString(`up{endpoint="https", instance="192.168.1.244:6443", job="apiserver", namespace="default", service="kubernetes"} == 0`)
err = svc.Update(ruleObj) err = svc.Update(ruleObj)
...@@ -99,7 +115,7 @@ func TestUpdate(t *testing.T) { ...@@ -99,7 +115,7 @@ func TestUpdate(t *testing.T) {
func TestDelete(t *testing.T) { func TestDelete(t *testing.T) {
// 删除 // 删除
time.Sleep(time.Second * 10) time.Sleep(time.Second * 10)
err := svc.Delete(obj.Namespace, obj.Name) err := svc.Delete(pr.Namespace, pr.Name)
if err != nil { if err != nil {
fmt.Println("删除失败" + err.Error()) fmt.Println("删除失败" + err.Error())
} else { } else {
......
package service
import (
"fmt"
monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
"github.com/spf13/cast"
"gitlab.wodcloud.com/smart-operation/so-operation-api/src/bean/entity"
"gitlab.wodcloud.com/smart-operation/so-operation-api/src/bean/vo/response"
"gitlab.wodcloud.com/smart-operation/so-operation-api/src/common/conf"
"gitlab.wodcloud.com/smart-operation/so-operation-api/src/pkg/beagle/constant"
"gitlab.wodcloud.com/smart-operation/so-operation-api/src/service/k8s"
"k8s.io/apimachinery/pkg/util/intstr"
"strings"
)
type PrometheusRuleSvc struct {
User entity.SystemUserInfo
}
func (p *PrometheusRuleSvc) Create(data response.AlertRulesItem) {
header := make(map[string]string)
header["Authorization"] = "Bearer " + conf.Options.KubernetesToken
svc := k8s.PrometheusRule{Header: header}
pr := monitoringv1.PrometheusRule{}
prometheusRuleName := k8s.GetPrometheusRuleName(data.Id)
pr.Name = prometheusRuleName
pr.Namespace = k8s.Namespace
pr.Labels = k8s.AlertDefLabels
group := monitoringv1.RuleGroup{}
groupInterval := monitoringv1.Duration(fmt.Sprintf("%d%s", data.CheckPeriod, "m"))
group.Interval = &groupInterval
ruleFor := monitoringv1.Duration(fmt.Sprintf("%d%s", data.Duration, data.DurationUnit))
// [{"variable_name":"$pod$","metric_name":"http_requests_total","metric_label":"pod","chinese_name":"demoString","is_required":true,"is_linked":true,"value":"LeaseGrant","compare":"="}]
// [{"thresholds_max":100,"thresholds_min":0,"risk_level":4}]
group.Name = k8s.GetPrometheusRuleGroupName(data.MetricConfigId, string(*group.Interval))
for _, v := range data.AlertRange {
item := fmt.Sprintf("%s%s%s", v.MetricLabel, v.Compare, v.Value) // pod=LeaseGrant
data.Expr = strings.ReplaceAll(data.Expr, v.VariableName, item)
}
for _, v := range data.AlertCondition {
rule := monitoringv1.Rule{
Alert: data.MetricConfigName,
For: &ruleFor,
Labels: map[string]string{
"severity": "warning",
"risk_level": cast.ToString(v.RiskLevel),
"risk_level_name": constant.RiskLeveText(v.RiskLevel),
"source": "so-operation-api",
"alert_rules_id": data.MetricConfigId,
},
Annotations: map[string]string{
"value": "{{ $value }}",
"summary": "概述",
"description": "描述",
},
}
expr := fmt.Sprintf("%d <= %s <=%d", v.ThresholdsMin, data.Expr, v.ThresholdsMax)
rule.Expr = intstr.FromString(expr)
group.Rules = append(group.Rules, rule)
}
_ = svc
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment