From f90898aa10625b54bdaa603c9d55fbf0e2bc3e3e Mon Sep 17 00:00:00 2001 From: like Date: Wed, 18 Oct 2023 18:12:56 +0800 Subject: [PATCH] =?UTF-8?q?perf:=20=E4=BB=A3=E7=A0=81=E8=BF=81=E7=A7=BB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- go.mod | 2 +- src/bean/vo/request/alert_class.go | 1 + src/bean/vo/request/alert_rules.go | 2 +- src/bean/vo/request/alert_webhook.go | 2 +- src/common/conf/options.go | 13 ++- src/controller/alert_webhook.go | 10 ++- src/main.go | 43 ++++++---- src/router/alertwebhookrouter.go.go | 3 +- src/service/alert.go | 122 ++++++++++++++++++++++++--- src/service/alert_class.go | 64 +++++++++++++- src/service/alert_overview.go | 11 +-- src/service/alert_rules.go | 44 ++++++++-- src/service/alert_webhook.go | 32 +++++-- src/service/k8s/prometheusrule.go | 54 +++++++----- src/service/prometheus.go | 12 +-- src/service/prometheusrule.go | 103 +++++++++++++++++----- src/util/http.go | 7 +- 17 files changed, 414 insertions(+), 111 deletions(-) diff --git a/go.mod b/go.mod index ee2fc14..7ef71f9 100644 --- a/go.mod +++ b/go.mod @@ -32,6 +32,7 @@ require ( github.com/spf13/pflag v1.0.5 github.com/tealeg/xlsx v1.0.5 github.com/thoas/go-funk v0.9.3 + github.com/tidwall/gjson v1.16.0 github.com/valyala/fasthttp v1.47.0 github.com/wanghuiyt/ding v0.0.2 go.uber.org/zap v1.24.0 @@ -108,7 +109,6 @@ require ( github.com/shurcooL/vfsgen v0.0.0-20200824052919-0d455de96546 // indirect github.com/sirupsen/logrus v1.9.2 // indirect github.com/syndtr/goleveldb v1.0.0 // indirect - github.com/tidwall/gjson v1.16.0 // indirect github.com/tidwall/match v1.1.1 // indirect github.com/tidwall/pretty v1.2.0 // indirect github.com/twitchyliquid64/golang-asm v0.15.1 // indirect diff --git a/src/bean/vo/request/alert_class.go b/src/bean/vo/request/alert_class.go index 6c889f0..cb65403 100644 --- a/src/bean/vo/request/alert_class.go +++ b/src/bean/vo/request/alert_class.go @@ -9,6 +9,7 @@ type AddAlertClass struct { type UpdateAlertClass struct { ClassId int `json:"class_id" form:"class_id" binding:"required"` + ParentId int `json:"parent_id" form:"parent_id"` ClassName string `json:"class_name" form:"class_name" binding:"required"` } diff --git a/src/bean/vo/request/alert_rules.go b/src/bean/vo/request/alert_rules.go index 5ae8742..5045195 100644 --- a/src/bean/vo/request/alert_rules.go +++ b/src/bean/vo/request/alert_rules.go @@ -33,7 +33,7 @@ type UpdateAlertRules struct { ClassId int `json:"class_id" form:"class_id" binding:"required_if=DetectionType 1"` // 预警对象id(级联:预警分类/预警对象) ClassParentName string `json:"class_parent_name" form:"class_parent_name" binding:"required_if=DetectionType 2"` // 预警分类名称 ClassName string `json:"class_name" form:"class_name" binding:"required_if=DetectionType 2"` // 预警对象名称 - MetricConfigId string `json:"metric_config_id" form:"'metric_config_id'" binding:"required_if=DetectionType 1"` // 预警指标id + MetricConfigId string `json:"metric_config_id" form:"metric_config_id" binding:"required_if=DetectionType 1"` // 预警指标id MetricConfigName string `json:"metric_config_name" form:"metric_config_name" binding:"required_if=DetectionType 2"` // 预警指标名称(映射entity.MetricConfig.MetricName) Expr string `json:"expr" form:"expr" binding:"required_if=DetectionType 2"` // 指标表达式(PromQL语句) AlertCondition []entity.AlertCondition `json:"alert_condition" form:"alert_condition" binding:"required"` // 预警规则 字典值 diff --git a/src/bean/vo/request/alert_webhook.go b/src/bean/vo/request/alert_webhook.go index 644ef17..f01b4c2 100644 --- a/src/bean/vo/request/alert_webhook.go +++ b/src/bean/vo/request/alert_webhook.go @@ -10,7 +10,7 @@ type AddAlertWebhook struct { ClassId int `json:"class_id" form:"class_id" binding:"required_if=DetectionType 1"` // 预警对象id(级联:预警分类/预警对象) ClassParentName string `json:"class_parent_name" form:"class_parent_name" binding:"required_if=DetectionType 2"` // 预警分类名称 ClassName string `json:"class_name" form:"class_name" binding:"required_if=DetectionType 2"` // 预警对象名称 - MetricConfigId string `json:"metric_config_id" form:"'metric_config_id'" binding:"required_if=DetectionType 1"` // 预警指标id + MetricConfigId string `json:"metric_config_id" form:"metric_config_id" binding:"required_if=DetectionType 1"` // 预警指标id MetricConfigName string `json:"metric_config_name" form:"metric_config_name" binding:"required_if=DetectionType 2"` // 预警指标名称(映射entity.MetricConfig.MetricName) Expr string `json:"expr" form:"expr" binding:"required_if=DetectionType 2"` // 指标表达式(PromQL语句) AlertCondition []entity.AlertCondition `json:"alert_condition" form:"alert_condition" binding:"required,dive"` // 预警规则 字典值 diff --git a/src/common/conf/options.go b/src/common/conf/options.go index 78cf905..3bf9e7f 100644 --- a/src/common/conf/options.go +++ b/src/common/conf/options.go @@ -31,7 +31,7 @@ type Config struct { MinioSecretKey string MinioBucket string //TempDirPrefix string - PrometheusHost string + AccessRuleModeKey string LocationUrl string LocationKey string @@ -49,9 +49,20 @@ type Config struct { AweRestURL string KubernetesToken string + OpenSearchIndex string OpenSearchAddresses string OpenSearchUserName string OpenSearchPassword string + + MonitorApiVersion string + MonitorMatchNs string + MonitorMatchLabelsStr string + MonitorMatchLabels map[string]interface{} + + Namespace string + PrometheusHost string + PrometheusRuleLabel string + PrometheusRuleNamePrefix string } const ( diff --git a/src/controller/alert_webhook.go b/src/controller/alert_webhook.go index 6b7de1c..2f3ae2e 100644 --- a/src/controller/alert_webhook.go +++ b/src/controller/alert_webhook.go @@ -3,10 +3,10 @@ package controller import ( "github.com/gin-gonic/gin" "github.com/prometheus/alertmanager/notify/webhook" + "gitlab.wodcloud.com/smart-operation/so-operation-api/src/bean/entity" "gitlab.wodcloud.com/smart-operation/so-operation-api/src/common/client" "gitlab.wodcloud.com/smart-operation/so-operation-api/src/common/conf" "gitlab.wodcloud.com/smart-operation/so-operation-api/src/pkg/beagle/resp" - "gitlab.wodcloud.com/smart-operation/so-operation-api/src/router/middleware/header" "gitlab.wodcloud.com/smart-operation/so-operation-api/src/service" "go.uber.org/zap" ) @@ -14,12 +14,18 @@ import ( // AlertWebhook 回调 func AlertWebhook(c *gin.Context) { var req webhook.Message + conf.Logger.Info("------>webhook.start------>") if err := c.ShouldBind(&req); err != nil { SendJsonResponse(c, resp.InvalidParam.TranslateError(err), nil) return } conf.Logger.Info("------>webhook.Message------>", zap.Any("message", req)) - svc := service.AlertWebhookSvc{User: header.GetUser(c)} + svc := service.AlertWebhookSvc{User: entity.SystemUserInfo{ + Name: "prometheus", + SystemAccount: "prometheus", + OrganizationId: "", + State: 1, + }} db, err := client.GetDbClient() if err != nil { SendJsonResponse(c, resp.DbConnectError.WithError(err), nil) diff --git a/src/main.go b/src/main.go index 0ce4306..0462289 100644 --- a/src/main.go +++ b/src/main.go @@ -48,7 +48,7 @@ func main() { // 初始化OpenSearch的索引 err := service.CheckAndCreateIndex() if err != nil { - conf.Logger.Fatal("failed to init OpenSearch index.", zap.Error(err)) + conf.Logger.Error("failed to init OpenSearch index", zap.Error(err)) } //启动定时任务 @@ -70,17 +70,16 @@ func initConfig() { RedisURL: util.SetEnvStr("REDIS_URL", "localhost:7001"), RedisDB: 0, RedisTag: "bg", - LogDirPrefix: util.SetEnvStr("LOG_DIR_PREFIX", "/app/log"), // 日志目录 - LogDirName: util.SetEnvStr("LOG_NAME", "syslog"), // 日志名称 - LogSaveDays: util.SetEnvInt("LOG_SAVE_DAYS", 7), // 日志最大存储天数 - LogMode: util.SetEnvInt("LOG_MODE", 1), // 1.标准打印 2.输出文件 - ArgBool: util.SetEnvBool("ARG_BOOL", false), // 示例参数 - ArgInt: util.SetEnvInt("ARG_INT", 10), // 示例参数 - MinioServer: util.SetEnvStr("MINIO_SERVER", "https://cache.wodcloud.com"), // Minio 服务地址 - MinioAccessKey: util.SetEnvStr("MINIO_ACCESS_KEY", "beagleadmin"), // Minio Access Key - MinioSecretKey: util.SetEnvStr("MINIO_SECRET_KEY", "H76cPmwvH7vJ"), // Minio Secret - MinioBucket: util.SetEnvStr("MINIO_BUCKET", "so-operation"), // Minio Bucket - PrometheusHost: util.SetEnvStr("PROMETHEUS_HOST", "https://prometheus.wodcloud.com"), // Prometheus Host + LogDirPrefix: util.SetEnvStr("LOG_DIR_PREFIX", "/app/log"), // 日志目录 + LogDirName: util.SetEnvStr("LOG_NAME", "syslog"), // 日志名称 + LogSaveDays: util.SetEnvInt("LOG_SAVE_DAYS", 7), // 日志最大存储天数 + LogMode: util.SetEnvInt("LOG_MODE", 1), // 1.标准打印 2.输出文件 + ArgBool: util.SetEnvBool("ARG_BOOL", false), // 示例参数 + ArgInt: util.SetEnvInt("ARG_INT", 10), // 示例参数 + MinioServer: util.SetEnvStr("MINIO_SERVER", "https://cache.wodcloud.com"), // Minio 服务地址 + MinioAccessKey: util.SetEnvStr("MINIO_ACCESS_KEY", "beagleadmin"), // Minio Access Key + MinioSecretKey: util.SetEnvStr("MINIO_SECRET_KEY", "H76cPmwvH7vJ"), // Minio Secret + MinioBucket: util.SetEnvStr("MINIO_BUCKET", "so-operation"), // Minio Bucket AccessRuleModeKey: "accessRuleMode", LocationUrl: util.SetEnvStr("LOCATION_URL", "https://apis.map.qq.com/ws/location/v1/ip"), LocationKey: util.SetEnvStr("LOCATION_KEY", "QKFBZ-PGGWJ-VZQFF-FHPA7-QWT5H-YHF4T"), @@ -91,14 +90,26 @@ func initConfig() { SmsAccessKeyId: util.SetEnvStr("SMS_ACCESS_KEY", "LTAI4GBcVubRjzX7ABPcHnhB"), // 短信key SmsAccessSecret: util.SetEnvStr("SMS_ACCESS_SECRET", "dYE2dtABFOqYtK1ijcrits0yedHkw7"), // 短信secret SmsTemplateLogin: util.SetEnvStr("SMS_TEMPLATE_LOGIN", "SMS_212925130"), // 短信验证码模板 - SmsTemplateAlert: util.SetEnvStr("Sms_Template_Alert", "SMS_461975765"), // 预警短信模板 // 短信工单下发模板 + SmsTemplateAlert: util.SetEnvStr("SMS_TEMPLATE_ALERT", "SMS_461975765"), // 预警短信模板 // 短信工单下发模板 SmsWorkOrderTemplate: util.SetEnvStr("SMS_TEMPLATE_LOGIN", "SMS_462020767"), // 短信工单下发模板 SmsSignName: util.SetEnvStr("SMS_SIGN_NAME", "比格数据"), // 签名 AweRestURL: util.SetEnvStr("AWE_REST_URL", "http://awecloud-rest.beagle-system/awecloud/rest"), // awecloud-rest KubernetesToken: util.SetEnvStr("AWE_REST_K8S_TOKEN", "eyJhbGciOiJSUzI1NiIsImtpZCI6InJ1alJzNEVGamN5UC0wRU1rS1BKQ0JZVUtNNWpzR0t2bmlrSlJhY2Q3R00ifQ.eyJpc3MiOiJrdWJlcm5ldGVzL3NlcnZpY2VhY2NvdW50Iiwia3ViZXJuZXRlcy5pby9zZXJ2aWNlYWNjb3VudC9uYW1lc3BhY2UiOiJiZWFnbGUtc3lzdGVtIiwia3ViZXJuZXRlcy5pby9zZXJ2aWNlYWNjb3VudC9zZWNyZXQubmFtZSI6InJvb3QiLCJrdWJlcm5ldGVzLmlvL3NlcnZpY2VhY2NvdW50L3NlcnZpY2UtYWNjb3VudC5uYW1lIjoicm9vdCIsImt1YmVybmV0ZXMuaW8vc2VydmljZWFjY291bnQvc2VydmljZS1hY2NvdW50LnVpZCI6IjRlMDM0OTI3LTc0ZTMtNDQ5Yy1hN2RlLWExMGE3MjU1NGYyMCIsInN1YiI6InN5c3RlbTpzZXJ2aWNlYWNjb3VudDpiZWFnbGUtc3lzdGVtOnJvb3QifQ.YPLE_E2kIeo-YFQtKScBt5p4KhnniJF9n3iWN2i9UMYS06lIsq2-2wBrgON-YsJihWJupYyDQRiZ9h8bUWTrQzhnpsnuJ_aUclKyAw3QOT9rjvZhJp7qP--27dmdspSHncKtvIiprWE7UTUKzvF33WsMB0fSYFqYXOggNFMoT-fXmWwUXjgar3op0iOl3c3deJ_GeBzFyLSHEuGM7OVdjU8032aUmTen0Kji_P1yB4-O3Iqd0OdVs33BQy_tycjbxhQ8TDEpqrqhLnXjAwJCprLDEpFMx7ODZbjB9Wmuns8yJhaRDxTO47rTME7ZIAxjZ-zLR_QybtW97rlwnUTaNw"), - OpenSearchAddresses: util.SetEnvStr("Open_Search_Addresses", "https://so-opensearch.wodcloud.com"), // OpenSearch连接地址 - OpenSearchUserName: util.SetEnvStr("Open_Search_User_Name", ""), // OpenSearch用户名 - OpenSearchPassword: util.SetEnvStr("Open_Search_Password", ""), // OpenSearch密码 + + OpenSearchIndex: util.SetEnvStr("OPEN_SEARCH_INDEX", "so_alert"), + OpenSearchAddresses: util.SetEnvStr("OPEN_SEARCH_ADDRESSES", "https://so-opensearch.wodcloud.com"), // OpenSearch连接地址 + OpenSearchUserName: util.SetEnvStr("OPEN_SEARCH_USER_NAME", ""), // OpenSearch用户名 + OpenSearchPassword: util.SetEnvStr("OPEN_SEARCH_PASSWORD", ""), // OpenSearch密码 + + Namespace: util.SetEnvStr("NAMESPACE", "smart-manage"), //采集器部署命名空间 + PrometheusHost: util.SetEnvStr("PROMETHEUS_HOST", "https://prometheus.wodcloud.com"), // Prometheus Host + PrometheusRuleNamePrefix: util.SetEnvStr("PROMETHEUS_RULE_NAME_PREFIX", "beagle-prometheus-so-operation-api-rules"), // prometheusrules资源名前缀 + PrometheusRuleLabel: util.SetEnvStr("PROMETHEUS_RULE_LABEL", `{"source":"so-operation-api","severity":"warning"}`), // prometheusrules标签,用于区分项目来源 + + MonitorApiVersion: util.SetEnvStr("MONITOR_API_VERSION", "monitoring.beagle.io/v1"), //Prometheus Operator 资源版本 + MonitorMatchNs: util.SetEnvStr("MONITOR_MATCH_NS", "beagle-monitoring"), //Monitor匹配 命名空间 + MonitorMatchLabelsStr: util.SetEnvStr("MONITOR_MATCH_LABELS", `{"prometheus-operator":"monitoring"}`), //Monitor匹配 标签JSON + } } diff --git a/src/router/alertwebhookrouter.go.go b/src/router/alertwebhookrouter.go.go index 09e380a..bb884f7 100644 --- a/src/router/alertwebhookrouter.go.go +++ b/src/router/alertwebhookrouter.go.go @@ -5,12 +5,11 @@ import ( "github.com/gin-gonic/gin" "gitlab.wodcloud.com/smart-operation/so-operation-api/src/common/conf" "gitlab.wodcloud.com/smart-operation/so-operation-api/src/controller" - "gitlab.wodcloud.com/smart-operation/so-operation-api/src/router/middleware/header" ) // InitAlertWebhookRouter 初始化预警回调路由 func InitAlertWebhookRouter(e *gin.Engine) { - group := e.Group(fmt.Sprintf("%s/alert_webhook", conf.Options.Prefix), header.SetContext) + group := e.Group(fmt.Sprintf("%s/alert_webhook", conf.Options.Prefix)) { group.POST("", controller.AlertWebhook) } diff --git a/src/service/alert.go b/src/service/alert.go index 9531e80..f41cb41 100644 --- a/src/service/alert.go +++ b/src/service/alert.go @@ -32,8 +32,7 @@ type AlertSvc struct { } var ( - OpenSearchIndex = "so_alert" - Mapping = strings.NewReader(`{ + Mapping = strings.NewReader(`{ "settings": { "number_of_shards": 1, "number_of_replicas": 0, @@ -284,7 +283,7 @@ var ( ) func CheckAndCreateIndex() (err error) { - exist, err := checkIndexExists(OpenSearchIndex) + exist, err := checkIndexExists(conf.Options.OpenSearchIndex) if err != nil { return } @@ -293,7 +292,7 @@ func CheckAndCreateIndex() (err error) { return nil } - err = CreateIndex(OpenSearchIndex) + err = CreateIndex(conf.Options.OpenSearchIndex) if err != nil { return err } @@ -346,7 +345,7 @@ func (a *AlertSvc) DeleteIndex() error { return err } res := opensearchapi.IndicesDeleteRequest{ - Index: []string{OpenSearchIndex}, + Index: []string{conf.Options.OpenSearchIndex}, } do, err := res.Do(context.Background(), cli) if err != nil { @@ -427,8 +426,11 @@ func (a *AlertSvc) DocSearch(req request.ListAlert) (resp response.AlertList, er // 请输入预警点/分类/指标 if req.Keyword != "" { subBoolQuery := elastic.NewBoolQuery() - subBoolQuery.Should(elastic.NewMultiMatchQuery(req.Keyword, "alert_point", "class_parent_name", "class_name", "metric_config_name")) - //subBoolQuery.Should(elastic.NewMatchQuery("class_name", req.Keyword)) + //subBoolQuery.Should(elastic.NewMultiMatchQuery(req.Keyword, "alert_point", "class_parent_name", "class_name", "metric_config_name")) + subBoolQuery.Should(elastic.NewPrefixQuery("alert_point.keyword", req.Keyword)) + subBoolQuery.Should(elastic.NewPrefixQuery("class_parent_name.keyword", req.Keyword)) + subBoolQuery.Should(elastic.NewPrefixQuery("class_name.keyword", req.Keyword)) + subBoolQuery.Should(elastic.NewPrefixQuery("metric_config_name.keyword", req.Keyword)) boolQuery.Must(subBoolQuery) } @@ -467,7 +469,7 @@ func (a *AlertSvc) DocSearch(req request.ListAlert) (resp response.AlertList, er "size": %d}`, string(b), req.GetPageSize()*(req.GetPage()-1), req.GetPageSize())) res := opensearchapi.SearchRequest{ - Index: []string{OpenSearchIndex}, + Index: []string{conf.Options.OpenSearchIndex}, Body: content, Sort: []string{"id"}, } @@ -493,6 +495,38 @@ func (a *AlertSvc) DocSearch(req request.ListAlert) (resp response.AlertList, er for _, hit := range sources.Hits.Hits { resp.List = append(resp.List, hit.Source) } + + // 推送人数:推送记录中去重的人数 + // 推送次数:发起推送的总次数(钉钉、短信、工单算单次) + for i := 0; i < len(resp.List); i++ { + var userSet []string + var pushCountSet []string + + mergedData := make(map[string]entity.PushRecord) + for _, record := range resp.List[i].PushRecords { + userSet = append(userSet, record.SystemAccount) + pushCountSet = append(pushCountSet, record.PushTime.String()) + key := record.AlertRulesId + record.PushTime.String() + // 映射中已存在相同键的数据,则合并user_id + if existingRecord, found := mergedData[key]; found { + existingRecord.SystemAccount += ", " + record.SystemAccount + existingRecord.UserName += ", " + record.UserName + mergedData[key] = existingRecord + } else { + mergedData[key] = record + } + } + + var mergedRecords []entity.PushRecord + for _, v := range mergedData { + mergedRecords = append(mergedRecords, v) + } + + resp.List[i].PushRecords = mergedRecords + resp.List[i].NotificationCount = len(funk.UniqString(userSet)) + resp.List[i].PushCount = len(funk.UniqString(pushCountSet)) + } + resp.TotalCount = int64(sources.Hits.Total.Value) return } @@ -524,7 +558,7 @@ func (a *AlertSvc) IndexDocExist(req request.ExistAlert) (exist bool, err error) "size": %d}`, string(b), 0, 1)) res := opensearchapi.SearchRequest{ - Index: []string{OpenSearchIndex}, + Index: []string{conf.Options.OpenSearchIndex}, Body: content, Sort: []string{"id"}, } @@ -567,7 +601,7 @@ func (a *AlertSvc) CatCount(indexName ...string) (count int) { if len(indexName) > 0 && indexName[0] != "" { index = indexName[0] + "*" } else { - index = OpenSearchIndex + "*" + index = conf.Options.OpenSearchIndex + "*" } res := opensearchapi.CatCountRequest{ @@ -597,6 +631,66 @@ func (a *AlertSvc) CatCount(indexName ...string) (count int) { return } +func (a *AlertSvc) GetIndexMaxID(indexName ...string) (maxId int, err error) { + var ( + index string + ) + + cli, err := client.GetOpenSearch() + if err != nil { + return 0, err + } + + if len(indexName) > 0 && indexName[0] != "" { + index = indexName[0] + } else { + index = conf.Options.OpenSearchIndex + } + + // 构建 aggregation 查询 + aggregationQuery := ` + { + "size": 0, + "aggs": { + "max_id": { + "max": { + "field": "id" + } + } + } + } + ` + + res := opensearchapi.SearchRequest{ + Index: []string{index}, + Body: strings.NewReader(aggregationQuery), + } + + do, err := res.Do(context.Background(), cli) + if err != nil { + return 0, err + } + defer do.Body.Close() + if do.StatusCode < http.StatusOK && do.StatusCode > http.StatusIMUsed { + return 0, errors.New(do.String()) + } + + // 解析聚合结果 + var responseMap map[string]interface{} + err = json.NewDecoder(do.Body).Decode(&responseMap) + if err != nil { + return 0, err + } + + // 提取最大值 + aggregations := responseMap["aggregations"].(map[string]interface{}) + maxIDAgg := aggregations["max_id"].(map[string]interface{}) + maxIDValue := maxIDAgg["value"] + maxId = int(maxIDValue.(float64)) + + return maxId, nil +} + func (a *AlertSvc) DocCreate(req request.CreateAlert) (err error) { var ( sources response.OpenSearchSource @@ -616,7 +710,7 @@ func (a *AlertSvc) DocCreate(req request.CreateAlert) (err error) { content := strings.NewReader(fmt.Sprintf(`%s`, docStr)) res := opensearchapi.CreateRequest{ - Index: OpenSearchIndex, + Index: conf.Options.OpenSearchIndex, DocumentID: cast.ToString(req.Id), Body: content, } @@ -696,7 +790,7 @@ func (a *AlertSvc) DocUpdate(req request.UpdateAlert) (err error) { }`, docStr)) res := opensearchapi.UpdateRequest{ - Index: OpenSearchIndex, + Index: conf.Options.OpenSearchIndex, DocumentID: cast.ToString(req.Id), Body: content, Source: []string{"true"}, @@ -730,7 +824,7 @@ func (a *AlertSvc) Create() error { return err } res := opensearchapi.IndicesCreateRequest{ - Index: OpenSearchIndex, + Index: conf.Options.OpenSearchIndex, Body: Mapping, } do, err := res.Do(context.Background(), cli) @@ -926,7 +1020,7 @@ func (a *AlertSvc) DisposeAlert(req request.DisposeAlert) (err error) { }`, docStr)) res := opensearchapi.UpdateRequest{ - Index: OpenSearchIndex, + Index: conf.Options.OpenSearchIndex, DocumentID: cast.ToString(req.Id), Body: content, Source: []string{"true"}, diff --git a/src/service/alert_class.go b/src/service/alert_class.go index 0e587fb..b1b6b18 100644 --- a/src/service/alert_class.go +++ b/src/service/alert_class.go @@ -52,7 +52,7 @@ func (a *AlertClassSvc) Update(session *xorm.Session, req request.UpdateAlertCla UpdatedAt: now, } _ = copier.Copy(&data, &req) - _, err := session.Cols("class_name", "updated_by", "updated_at").ID(data.ClassId).Update(&data) + _, err := session.Cols("parent_id", "class_name", "updated_by", "updated_at").ID(data.ClassId).Update(&data) if err != nil { return err } @@ -150,6 +150,9 @@ func (a *AlertClassSvc) List(req request.ListAlertClass) (resp response.AlertCla if req.ClassName != "" { session.Where("class_name LIKE ?", "%"+req.ClassName+"%") } + if req.Page == -1 { + req.PageSize = 100000 + } resp.TotalCount, err = session.Limit(req.GetPageSize(), (req.GetPage()-1)*req.GetPageSize()). OrderBy("sort_order").FindAndCount(&resp.List) return @@ -234,8 +237,61 @@ func (a *AlertClassSvc) SortOrderMax(parentId int) (max int, err error) { func (a *AlertClassSvc) Delete(ids []int) (err error) { db, err := client.GetDbClient() if err != nil { - return + return err } - _, err = db.NewSession().In("class_id", ids).Delete(&entity.AlertClass{}) - return + + var classes []entity.AlertClass + err = db.In("class_id", ids).Find(&classes) + + // 检查是否所有指定的 ids 都存在于数据库中 + if len(ids) > len(classes) { + return errors.New("部分数据不存在") + } + + var notExist []int + idSet := make(map[int]bool) + for _, v := range classes { + idSet[v.ClassId] = true + } + for _, id := range ids { + if !idSet[id] { + notExist = append(notExist, id) + } + } + + if len(notExist) > 0 { + return errors.New(fmt.Sprintf("指标分类或对象id为%v的数据未查询到", notExist)) + } + + for _, v := range classes { + if v.ParentId == 0 { + // 如果是父级数据,判断是否存在子集 + var num int + has, err := db.Table(new(entity.AlertClass)).Select("count(*)").Where("parent_id = ?", v.ClassId).Get(&num) + if err != nil { + return err + } + if has && num > 0 { + return errors.New("当前分类存在指标对象子集数据,不允许删除") + } + } else { + // 如果为子集数据,判断是否存在指标配置关联 + var configCount int + has, err := db.Table(new(entity.MetricConfig)).Select("count(*)").Where("class_id = ?", v.ClassId).Get(&configCount) + if err != nil { + return err + } + if has && configCount > 0 { + return errors.New("指标对象存在指标配置关联,不允许删除") + } + } + + // 删除数据 + _, err = db.ID(v.ClassId).Delete(v) + if err != nil { + return err + } + } + + return nil } diff --git a/src/service/alert_overview.go b/src/service/alert_overview.go index ad2b879..f8e1af8 100644 --- a/src/service/alert_overview.go +++ b/src/service/alert_overview.go @@ -14,6 +14,7 @@ import ( "gitlab.wodcloud.com/smart-operation/so-operation-api/src/bean/vo/request" "gitlab.wodcloud.com/smart-operation/so-operation-api/src/bean/vo/response" "gitlab.wodcloud.com/smart-operation/so-operation-api/src/common/client" + "gitlab.wodcloud.com/smart-operation/so-operation-api/src/common/conf" "gitlab.wodcloud.com/smart-operation/so-operation-api/src/pkg/beagle/constant" "gitlab.wodcloud.com/smart-operation/so-operation-api/src/pkg/beagle/jsontime" "io" @@ -132,7 +133,7 @@ func (a *AlertOverviewSvc) AlertOverview(req request.DetailAlertOverview) (resp } }`, req.StartTime, req.EndTime) - body, err := executeQuery(cli, OpenSearchIndex, content) + body, err := executeQuery(cli, conf.Options.OpenSearchIndex, content) if err != nil { return } @@ -190,7 +191,7 @@ func (a *AlertOverviewSvc) RiskLevelDistribution(req request.DetailAlertOverview req.EndTime = time.Now().Add(time.Hour * 24).Format("2006-01-02") } - body, err := executeQuery(cli, OpenSearchIndex, buildAggQueryContent(req.StartTime, req.EndTime, "risk_level")) + body, err := executeQuery(cli, conf.Options.OpenSearchIndex, buildAggQueryContent(req.StartTime, req.EndTime, "risk_level")) if err != nil { return } @@ -235,7 +236,7 @@ func (a *AlertOverviewSvc) AlertStatusDistribution(req request.DetailAlertOvervi req.EndTime = time.Now().Add(time.Hour * 24).Format("2006-01-02") } - body, err := executeQuery(cli, OpenSearchIndex, buildAggQueryContent(req.StartTime, req.EndTime, "status")) + body, err := executeQuery(cli, conf.Options.OpenSearchIndex, buildAggQueryContent(req.StartTime, req.EndTime, "status")) if err != nil { return } @@ -279,7 +280,7 @@ func (a *AlertOverviewSvc) AlertClassDistribution(req request.DetailAlertOvervie req.EndTime = time.Now().Add(time.Hour * 24).Format("2006-01-02") } - body, err := executeQuery(cli, OpenSearchIndex, buildAggQueryContent(req.StartTime, req.EndTime, "class_id")) + body, err := executeQuery(cli, conf.Options.OpenSearchIndex, buildAggQueryContent(req.StartTime, req.EndTime, "class_id")) if err != nil { return } @@ -355,7 +356,7 @@ func (a *AlertOverviewSvc) AlertFrequencyDistribution(req request.DetailAlertOve } }` - body, err := executeQuery(cli, OpenSearchIndex, content) + body, err := executeQuery(cli, conf.Options.OpenSearchIndex, content) if err != nil { return } diff --git a/src/service/alert_rules.go b/src/service/alert_rules.go index ba6d3f7..a122a73 100644 --- a/src/service/alert_rules.go +++ b/src/service/alert_rules.go @@ -74,12 +74,14 @@ func (a *AlertRulesSvc) Add(req request.AddAlertRules) (err error) { return nil, err } data.ClassId = classId + req.ClassId = classId // 添加指标配置 metricConfigSvc := MetricConfigSvc{User: a.User} _ = copier.Copy(&addMetricConfig, &req) addMetricConfig.Source = constant.SourceCustom addMetricConfig.MetricName = req.MetricConfigName + addMetricConfig.ClassId = classId metricConfigId, err = metricConfigSvc.Add(session, addMetricConfig) if err != nil { return nil, err @@ -88,14 +90,22 @@ func (a *AlertRulesSvc) Add(req request.AddAlertRules) (err error) { // 添加预警规则配置 _, err = session.Insert(&data) + if err != nil { + return nil, err + } return nil, err }) + + err = a.CreatePrometheusRule(req.IsEnabled, data.Id, db, "") if err != nil { return err } } err = a.CreatePrometheusRule(req.IsEnabled, data.Id, db, "") + if err != nil { + return err + } return nil } @@ -125,7 +135,11 @@ func (a *AlertRulesSvc) Update(req request.UpdateAlertRules) (err error) { } switch req.DetectionType { case 1: - _, err = db.ID(data.Id).Update(&data) + _, err = db.ID(data.Id).MustCols("duration").Update(&data) + if err != nil { + return err + } + err = a.CreatePrometheusRule(req.IsEnabled, data.Id, db, "update") if err != nil { return err } @@ -134,24 +148,37 @@ func (a *AlertRulesSvc) Update(req request.UpdateAlertRules) (err error) { // 更新自定义分类 var ( updateMetricConfig request.UpdateMetricConfig - alertClassItem response.AlertClassItem + alertClass response.AlertClassItem + alertParentClass response.AlertClassItem ) alertClassSvc := AlertClassSvc{User: a.User} - alertClassItem, err = alertClassSvc.GetDataById(request.DetailAlertClass{ClassId: dbAlertRules.ClassId}) + alertClass, err = alertClassSvc.GetDataById(request.DetailAlertClass{ClassId: dbAlertRules.ClassId}) if err != nil { return nil, err } err = alertClassSvc.Update(session, request.UpdateAlertClass{ ClassId: dbAlertRules.ClassId, ClassName: req.ClassName, + ParentId: alertClass.ParentId, }) if err != nil { return nil, err } + + if alertClass.ParentId == 0 { + return nil, errors.New("预警分类为空") + } + + alertParentClass, err = alertClassSvc.GetDataById(request.DetailAlertClass{ClassId: alertClass.ParentId}) + if err != nil { + return nil, err + } + err = alertClassSvc.Update(session, request.UpdateAlertClass{ - ClassId: alertClassItem.ParentId, + ClassId: alertClass.ParentId, ClassName: req.ClassParentName, + ParentId: alertParentClass.ParentId, }) if err != nil { return nil, err @@ -166,11 +193,12 @@ func (a *AlertRulesSvc) Update(req request.UpdateAlertRules) (err error) { if err != nil { return nil, err } - - // 更新预警规则配置 - _, err = session.ID(data.Id).Update(&data) return nil, err }) + + // 更新预警策略配置 + _, err = session.ID(data.Id).MustCols("duration").Update(&data) + err = a.CreatePrometheusRule(req.IsEnabled, data.Id, db, "update") if err != nil { return err } @@ -312,7 +340,7 @@ func (a *AlertRulesSvc) List(req request.ListAlertRules) (resp response.AlertRul Or("acp.class_name LIKE ?", "%"+req.Keyword+"%") } resp.TotalCount, err = session.Limit(req.GetPageSize(), (req.GetPage()-1)*req.GetPageSize()). - OrderBy("r.created_at desc"). + OrderBy("r.is_enabled asc,r.created_at desc"). FindAndCount(&resp.List) for i := 0; i < len(resp.List); i++ { _ = json.Unmarshal([]byte(resp.List[i].AlertRules.AlertCondition), &resp.List[i].AlertCondition) diff --git a/src/service/alert_webhook.go b/src/service/alert_webhook.go index a6a889a..d5890b3 100644 --- a/src/service/alert_webhook.go +++ b/src/service/alert_webhook.go @@ -7,8 +7,11 @@ import ( "gitlab.wodcloud.com/smart-operation/so-operation-api/src/bean/entity" "gitlab.wodcloud.com/smart-operation/so-operation-api/src/bean/vo/request" "gitlab.wodcloud.com/smart-operation/so-operation-api/src/bean/vo/response" + "gitlab.wodcloud.com/smart-operation/so-operation-api/src/common/conf" "gitlab.wodcloud.com/smart-operation/so-operation-api/src/pkg/beagle/constant" "gitlab.wodcloud.com/smart-operation/so-operation-api/src/pkg/beagle/jsontime" + "go.uber.org/zap" + "time" "xorm.io/xorm" ) @@ -37,16 +40,20 @@ func (a *AlertWebhookSvc) AlertWebhook(session *xorm.Session, req webhook.Messag ) if alertRulesId, ok = alert.Labels["alert_rules_id"]; !ok { - return errors.New("alert_rules_id not found in the map") + err = errors.New("alert_rules_id not found in the map") + conf.Logger.Error("err", zap.Error(err)) } if riskLevelStr, ok = alert.Labels["risk_level"]; !ok { - return errors.New("risk_level not found in the map") + err = errors.New("risk_level not found in the map") + conf.Logger.Error("err", zap.Error(err)) } riskLevel = cast.ToInt(riskLevelStr) if currentValueStr, ok = alert.Annotations["value"]; !ok { - return errors.New("value not found in the map") + err = errors.New("value not found in the map") + conf.Logger.Error("err", zap.Error(err)) + return } currentValue = cast.ToFloat64(currentValueStr) @@ -55,6 +62,11 @@ func (a *AlertWebhookSvc) AlertWebhook(session *xorm.Session, req webhook.Messag return } + if alertRulesItem.Id == "" { + conf.Logger.Error("err", zap.Error(errors.New("告警规则查询为空"))) + return + } + alertItem, err = alertSvc.GetDataByAlertRulesIdAndRiskLevel(alertRulesId, riskLevel, 2) if err != nil { return @@ -68,12 +80,18 @@ func (a *AlertWebhookSvc) AlertWebhook(session *xorm.Session, req webhook.Messag switch isNewAlert { case true: // 新增数据到OpenSearch - max := alertSvc.CatCount(OpenSearchIndex) + var max int + max, err = alertSvc.GetIndexMaxID(conf.Options.OpenSearchIndex) + if err != nil { + // 获取id最大值 + max = alertSvc.CatCount(conf.Options.OpenSearchIndex) + } if max == 0 { err = errors.New("failed to get doc count for index") + conf.Logger.Error("err", zap.Error(err)) return } - alertId := max + 1 + id := max + 1 for _, v := range alertRulesItem.AlertCondition { if v.RiskLevel == riskLevel { alertCondition = v @@ -81,12 +99,12 @@ func (a *AlertWebhookSvc) AlertWebhook(session *xorm.Session, req webhook.Messag } } createAlert := request.CreateAlert{Alert: entity.Alert{ - Id: alertId, + Id: id, AlertPoint: alertRulesItem.ClassParentName + "/" + alertRulesItem.MetricName, AlertRulesId: alertRulesItem.Id, AlertRulesName: alertRulesItem.MetricName, RiskLevel: riskLevel, - AlertTime: jsontime.Time(alert.StartsAt), + AlertTime: jsontime.Time(alert.StartsAt.Add(time.Hour * 8)), ClassId: alertRulesItem.ClassId, ClassParentName: alertRulesItem.ClassParentName, ClassName: alertRulesItem.ClassName, diff --git a/src/service/k8s/prometheusrule.go b/src/service/k8s/prometheusrule.go index 1697cf4..72f45ba 100644 --- a/src/service/k8s/prometheusrule.go +++ b/src/service/k8s/prometheusrule.go @@ -6,30 +6,46 @@ import ( "gitlab.wodcloud.com/smart-operation/so-operation-api/src/common/conf" "gitlab.wodcloud.com/smart-operation/so-operation-api/src/util" "strings" + "sync" monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" ) var ( - PrometheusRuleGroup = "monitoring.beagle.io" // kubectl api-resources | grep -i prome - PrometheusRuleVersion = "v1" - PrometheusRuleKind = "PrometheusRule" - Namespace = "beagle-monitoring" - PrometheusRuleApiVersion = PrometheusRuleGroup + "/" + PrometheusRuleVersion - PrometheusRuleName = strings.ToLower(PrometheusRuleKind) + "s." + PrometheusRuleGroup - PrometheusRuleNamePrefix = "beagle-prometheus-so-operation-api-rules" // beagle-monitoring beagle-prometheus-prometheus-operator 43d + promOnce sync.Once + prometheusRuleName string + + alertOnce sync.Once + alertDefLabels map[string]string ) -var AlertDefLabels = map[string]string{ - "app": "prometheus", - "app.bd-apaas.com/cluster-component": "monitoring", - "prometheus-operator": "monitoring", - "release": "beagle-prometheus", +func GetPrometheusRuleCRDName() string { + promOnce.Do(func() { + url := conf.Options.MonitorApiVersion // 请确保 conf 和其他相关配置可用 + parts := strings.Split(url, "/") + if len(parts) == 0 || parts[0] == "" { + prometheusRuleName = "prometheusrules.monitoring.beagle.io" + } else { + prometheusRuleName = fmt.Sprintf("prometheusrules.%s", parts[0]) + } + }) + return prometheusRuleName +} + +func GetAlertDefLabels() map[string]string { + alertOnce.Do(func() { + alertDefLabels = make(map[string]string) + err := json.Unmarshal([]byte(conf.Options.MonitorMatchLabelsStr), &alertDefLabels) + if err != nil { + fmt.Println("Error parsing JSON:", err) + } + }) + return alertDefLabels } -// GetPrometheusRuleName 获取规则CRD名称 -func GetPrometheusRuleName(alertRulesId string) string { - return fmt.Sprintf("%s-%s", PrometheusRuleNamePrefix, alertRulesId) +// GetPrometheusRuleId 获取规则CRD名称 +func GetPrometheusRuleId(alertPolicyId string) string { + return fmt.Sprintf("%s-%s", conf.Options.PrometheusRuleNamePrefix, alertPolicyId) } // GetPrometheusRuleGroupName 获取规则组名称 @@ -43,19 +59,19 @@ type PrometheusRule struct { func (p PrometheusRule) Create(pRule *monitoringv1.PrometheusRule) error { k8sSvc := K8sSvc{Header: p.Header} - c := &Content{Kind: PrometheusRuleKind, ApiVersion: PrometheusRuleApiVersion, Metadata: pRule.ObjectMeta, Spec: pRule.Spec} + c := &Content{Kind: "PrometheusRule", ApiVersion: conf.Options.MonitorApiVersion, Metadata: pRule.ObjectMeta, Spec: pRule.Spec} _, err := k8sSvc.SendFile(c) return err } func (p PrometheusRule) Delete(namespace string, name string) error { - delUrl := fmt.Sprintf("%s/kubernetes/api/v1/_raw/%s/namespace/%s/name/%s", conf.Options.AweRestURL, PrometheusRuleName, namespace, name) + delUrl := fmt.Sprintf("%s/kubernetes/api/v1/_raw/%s/namespace/%s/name/%s", conf.Options.AweRestURL, GetPrometheusRuleCRDName(), namespace, name) _, err := util.ProxySendRes("DELETE", delUrl, "", p.Header) return err } func (p PrometheusRule) Update(pRule *monitoringv1.PrometheusRule) error { - updateUrl := fmt.Sprintf("%s/kubernetes/api/v1/_raw/%s/namespace/%s/name/%s", conf.Options.AweRestURL, PrometheusRuleName, pRule.Namespace, pRule.Name) + updateUrl := fmt.Sprintf("%s/kubernetes/api/v1/_raw/%s/namespace/%s/name/%s", conf.Options.AweRestURL, GetPrometheusRuleCRDName(), pRule.Namespace, pRule.Name) body, _ := json.Marshal(pRule) p.Header["Content-Type"] = "application/json" _, err := util.ProxySendRes("PUT", updateUrl, string(body), p.Header) @@ -63,7 +79,7 @@ func (p PrometheusRule) Update(pRule *monitoringv1.PrometheusRule) error { } func (p PrometheusRule) Get(namespace string, name string) (obj *monitoringv1.PrometheusRule, err error) { - getUrl := fmt.Sprintf("%s/kubernetes/api/v1/_raw/%s/namespace/%s/name/%s", conf.Options.AweRestURL, PrometheusRuleName, namespace, name) + getUrl := fmt.Sprintf("%s/kubernetes/api/v1/_raw/%s/namespace/%s/name/%s", conf.Options.AweRestURL, GetPrometheusRuleCRDName(), namespace, name) res, err := util.ProxySendRes("GET", getUrl, "", p.Header) if err != nil { return diff --git a/src/service/prometheus.go b/src/service/prometheus.go index 7e42291..879ec93 100644 --- a/src/service/prometheus.go +++ b/src/service/prometheus.go @@ -26,10 +26,10 @@ func (p *PrometheusSvc) Label(req request.PrometheusLabel) (resp response.Promet if req.LabelName != "" { url := fmt.Sprintf("%s%s", conf.Options.PrometheusHost, "/api/v1/series") - bytes, _ := util.Request(url, http.MethodPost, + response, _ := util.Request(url, http.MethodPost, []byte(fmt.Sprintf("match[]=%s", req.LabelName)), map[string]string{"Content-Type": util.MediaTypeForm}) - _ = json.Unmarshal(bytes, &prometheusSeries) + _ = json.Unmarshal(response.Body(), &prometheusSeries) for _, v := range prometheusSeries.Data { for k, _ := range v { resp.List = append(resp.List, k) @@ -41,8 +41,8 @@ func (p *PrometheusSvc) Label(req request.PrometheusLabel) (resp response.Promet } else { url := fmt.Sprintf("%s%s", conf.Options.PrometheusHost, "/api/v1/label/__name__/values") - bytes, _ := util.Request(url, http.MethodGet, nil, nil) - _ = json.Unmarshal(bytes, &prometheusLabel) + response, _ := util.Request(url, http.MethodGet, nil, nil) + _ = json.Unmarshal(response.Body(), &prometheusLabel) resp.TotalCount = int64(len(prometheusLabel.Data)) resp.List = prometheusLabel.Data } @@ -57,10 +57,10 @@ func (p *PrometheusSvc) LabelValue(req request.PrometheusLabelValue) (resp respo ) url := fmt.Sprintf("%s%s", conf.Options.PrometheusHost, "/api/v1/series") - bytes, _ := util.Request(url, http.MethodPost, + response, _ := util.Request(url, http.MethodPost, []byte(fmt.Sprintf("match[]=%s", req.MetricName)), map[string]string{"Content-Type": util.MediaTypeForm}) - _ = json.Unmarshal(bytes, &prometheusSeries) + _ = json.Unmarshal(response.Body(), &prometheusSeries) for _, v := range prometheusSeries.Data { for key, value := range v { metricLabelMap[key] = append(metricLabelMap[key], value) diff --git a/src/service/prometheusrule.go b/src/service/prometheusrule.go index 1f06dd9..913362b 100644 --- a/src/service/prometheusrule.go +++ b/src/service/prometheusrule.go @@ -1,31 +1,59 @@ package service import ( + "errors" "fmt" + json "github.com/json-iterator/go" monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" "github.com/spf13/cast" + "github.com/tidwall/gjson" "gitlab.wodcloud.com/smart-operation/so-operation-api/src/bean/entity" "gitlab.wodcloud.com/smart-operation/so-operation-api/src/bean/vo/response" "gitlab.wodcloud.com/smart-operation/so-operation-api/src/common/conf" "gitlab.wodcloud.com/smart-operation/so-operation-api/src/pkg/beagle/constant" "gitlab.wodcloud.com/smart-operation/so-operation-api/src/service/k8s" + "gitlab.wodcloud.com/smart-operation/so-operation-api/src/util" "go.uber.org/zap" v1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/intstr" + "net/http" + "net/url" "strings" + "sync" ) +var prometheusRuleLabel map[string]string +var once sync.Once + +func initPrometheusRuleLabel() { + once.Do(func() { + str := conf.Options.PrometheusRuleLabel + err := json.Unmarshal([]byte(str), &prometheusRuleLabel) + if err != nil { + prometheusRuleLabel = map[string]string{ // 返回默认标签 + "source": "aiops-systemmonitor-api", + } + } + }) +} + +// GetPrometheusRuleLabel 返回 prometheusRuleLabel 单例 +func GetPrometheusRuleLabel() map[string]string { + initPrometheusRuleLabel() + return prometheusRuleLabel +} + type PrometheusRuleSvc struct { User entity.SystemUserInfo } func (p *PrometheusRuleSvc) Create(data response.AlertRulesItem) (err error) { - prometheusRuleName := k8s.GetPrometheusRuleName(data.Id) + prometheusRuleObjName := k8s.GetPrometheusRuleId(data.Id) pr := monitoringv1.PrometheusRule{ ObjectMeta: v1.ObjectMeta{ - Name: prometheusRuleName, - Namespace: k8s.Namespace, - Labels: k8s.AlertDefLabels, + Name: prometheusRuleObjName, + Namespace: conf.Options.MonitorMatchNs, + Labels: k8s.GetAlertDefLabels(), }, } @@ -38,17 +66,24 @@ func (p *PrometheusRuleSvc) Create(data response.AlertRulesItem) (err error) { item := fmt.Sprintf(`%s%s"%s"`, v.MetricLabel, v.Compare, v.Value) // http_requests_total{method="GET",pod="LeaseGrant"} data.Expr = strings.ReplaceAll(data.Expr, v.VariableName, item) } - for _, v := range data.AlertCondition { + for k, v := range data.AlertCondition { + labels := map[string]string{ + "severity": "warning", + "risk_level": cast.ToString(v.RiskLevel), + "risk_level_name": constant.RiskLeveText(v.RiskLevel), + "namespace": conf.Options.MonitorMatchNs, + "alert_rules_id": data.Id, + "metric_config_id": data.MetricConfigId, + } + + for key, value := range GetPrometheusRuleLabel() { + labels[key] = value + } rule := monitoringv1.Rule{ - Alert: data.MetricConfigName, - For: &ruleFor, - Labels: map[string]string{ - "severity": "warning", - "risk_level": cast.ToString(v.RiskLevel), - "risk_level_name": constant.RiskLeveText(v.RiskLevel), - "source": "so-operation-api", - "alert_rules_id": data.MetricConfigId, - }, + // promhttp超过5万次告警-prom指标控制器请求数-较大风险-3 + Alert: fmt.Sprintf("%s-%s-%s-%d", data.MetricName, data.MetricConfigName, constant.RiskLeveText(v.RiskLevel), k+1), + For: &ruleFor, + Labels: labels, Annotations: map[string]string{ "value": "{{ $value }}", "summary": fmt.Sprintf("分组名:%s, 检查周期:%s, 持续时间:%s", group.Name, string(groupInterval), string(ruleFor)), @@ -68,6 +103,11 @@ func (p *PrometheusRuleSvc) Create(data response.AlertRulesItem) (err error) { condition += 2 } + // 为"空"状态下,默认表达式已经有比较判断,故直接使用表达式即可 + if data.AlertRuleTypeName == "空" { + condition = 0 + } + switch condition { default: expr = data.Expr @@ -79,6 +119,12 @@ func (p *PrometheusRuleSvc) Create(data response.AlertRulesItem) (err error) { expr = fmt.Sprintf("%s <= %s <=%s", cast.ToString(v.ThresholdsMin), data.Expr, cast.ToString(v.ThresholdsMax)) } + // 校验表达式正确性 + err = CheckPrometheusQuerySyntax(expr) + if err != nil { + return + } + rule.Expr = intstr.FromString(expr) group.Rules = append(group.Rules, rule) } @@ -91,12 +137,12 @@ func (p *PrometheusRuleSvc) Create(data response.AlertRulesItem) (err error) { } func (p *PrometheusRuleSvc) Get(data response.AlertRulesItem) (obj *monitoringv1.PrometheusRule, exist bool, err error) { - prometheusRuleName := k8s.GetPrometheusRuleName(data.Id) + prometheusRuleObjName := k8s.GetPrometheusRuleId(data.Id) pr := monitoringv1.PrometheusRule{ ObjectMeta: v1.ObjectMeta{ - Name: prometheusRuleName, - Namespace: k8s.Namespace, - Labels: k8s.AlertDefLabels, + Name: prometheusRuleObjName, + Namespace: conf.Options.MonitorMatchNs, + Labels: k8s.GetAlertDefLabels(), }, } header := map[string]string{"Authorization": "Bearer " + conf.Options.KubernetesToken} @@ -109,13 +155,26 @@ func (p *PrometheusRuleSvc) Get(data response.AlertRulesItem) (obj *monitoringv1 return } +// CheckPrometheusQuerySyntax 校验普罗米修斯语法正确性 +func CheckPrometheusQuerySyntax(expr string) error { + params := url.Values{} + params.Add("query", expr) + query := params.Encode() + webUrl := fmt.Sprintf("%s%s%s", conf.Options.PrometheusHost, "/api/v1/query?", query) + resp, _ := util.Request(webUrl, http.MethodGet, nil, nil) + if resp.StatusCode() != http.StatusOK { + return errors.New(fmt.Sprintf("%s, err: %s", "普罗米修斯语法PromQL错误", gjson.GetBytes(resp.Body(), "error").String())) + } + return nil +} + func (p *PrometheusRuleSvc) Delete(data response.AlertRulesItem) (err error) { - prometheusRuleName := k8s.GetPrometheusRuleName(data.Id) + prometheusRuleObjName := k8s.GetPrometheusRuleId(data.Id) pr := monitoringv1.PrometheusRule{ ObjectMeta: v1.ObjectMeta{ - Name: prometheusRuleName, - Namespace: k8s.Namespace, - Labels: k8s.AlertDefLabels, + Name: prometheusRuleObjName, + Namespace: conf.Options.MonitorMatchNs, + Labels: k8s.GetAlertDefLabels(), }, } diff --git a/src/util/http.go b/src/util/http.go index 4a8a456..f3ddc94 100644 --- a/src/util/http.go +++ b/src/util/http.go @@ -60,7 +60,7 @@ Request("https://httpbin.org/put", "Cookie": "aweToken=3ab9f63f-b0b3-4935-80ec-405d76ac111d", }) */ -func Request(url string, method string, body []byte, headers map[string]string) ([]byte, error) { +func Request(url string, method string, body []byte, headers map[string]string) (*fasthttp.Response, error) { req := fasthttp.AcquireRequest() defer fasthttp.ReleaseRequest(req) @@ -98,8 +98,11 @@ func Request(url string, method string, body []byte, headers map[string]string) return nil, err } + result := new(fasthttp.Response) + resp.CopyTo(result) + // 返回响应体和错误信息 - return resp.Body(), nil + return result, nil } // HttpSend , http请求 GET/DELETE/POST/PUT -- 2.26.0