Skip to content

Commit

Permalink
Feat: prometheus check steps provider (#149)
Browse files Browse the repository at this point in the history
* check-metrics

Signed-off-by: 楚岳 <[email protected]>

small fix

Signed-off-by: 楚岳 <[email protected]>

small fix

Signed-off-by: 楚岳 <[email protected]>

refactor some code

Signed-off-by: 楚岳 <[email protected]>

add tests

Signed-off-by: 楚岳 <[email protected]>

* try to fix go lint

Signed-off-by: 楚岳 <[email protected]>

small fix

Signed-off-by: 楚岳 <[email protected]>

small fix

Signed-off-by: 楚岳 <[email protected]>

* fix test

Signed-off-by: 楚岳 <[email protected]>

* fix comments

Signed-off-by: 楚岳 <[email protected]>

small fix

Signed-off-by: 楚岳 <[email protected]>

delete useless code

Signed-off-by: 楚岳 <[email protected]>

add nolint

Signed-off-by: 楚岳 <[email protected]>

small fix

Signed-off-by: 楚岳 <[email protected]>

small fix

Signed-off-by: 楚岳 <[email protected]>

fix comments

Signed-off-by: 楚岳 <[email protected]>

fix comments

Signed-off-by: 楚岳 <[email protected]>

---------

Signed-off-by: 楚岳 <[email protected]>
  • Loading branch information
wangyikewxgm authored Mar 28, 2023
1 parent 5b55dbd commit c730c05
Show file tree
Hide file tree
Showing 6 changed files with 321 additions and 0 deletions.
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -725,6 +725,7 @@ github.com/jonboulle/clockwork v0.2.2/go.mod h1:Pkfl5aHPm1nk2H9h0bjmnJD/BcgbGXUB
github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
github.com/josharian/txtarfs v0.0.0-20210218200122-0702f000015a/go.mod h1:izVPOvVRsHiKkeGCT6tYBNWyDVuzj9wAaBb5R9qamfw=
github.com/jpillora/backoff v1.0.0 h1:uvFg412JmmHBHw7iwprIxkPMI+sGQ4kzOWsMeHnm2EA=
github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4=
github.com/json-iterator/go v1.1.5/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
Expand Down Expand Up @@ -916,6 +917,7 @@ github.com/munnerz/goautoneg v0.0.0-20120707110453-a547fc61f48d/go.mod h1:+n7T8m
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f h1:KUppIJq7/+SVif2QVs3tOP0zanoHgBEVAwHxUSIzRqU=
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
github.com/mwitkow/go-proto-validators v0.0.0-20180403085117-0950a7990007/go.mod h1:m2XC9Qq0AlmmVksL6FktJCdTYyLk7V3fKyp0sl1yWQo=
github.com/mwitkow/go-proto-validators v0.2.0/go.mod h1:ZfA1hW+UH/2ZHOWvQ3HnQaU0DtnpXu850MZiy+YUgcc=
Expand Down
2 changes: 2 additions & 0 deletions pkg/generator/generator.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"encoding/json"
"errors"

metrics2 "github.com/kubevela/workflow/pkg/providers/metrics"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/utils/pointer"
"sigs.k8s.io/controller-runtime/pkg/client"
Expand Down Expand Up @@ -160,6 +161,7 @@ func installBuiltinProviders(instance *types.WorkflowInstance, client client.Cli
util.Install(providerHandlers, pCtx)
http.Install(providerHandlers, client, instance.Namespace)
provider.Install(providerHandlers, client, nil)
metrics2.Install(providerHandlers)
kube.Install(providerHandlers, client, map[string]string{
types.LabelWorkflowRunName: instance.Name,
types.LabelWorkflowRunNamespace: instance.Namespace,
Expand Down
210 changes: 210 additions & 0 deletions pkg/providers/metrics/prom_check.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,210 @@
/*
Copyright 2022 The KubeVela Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package metrics

import (
"fmt"
"strconv"
"time"

monitorContext "github.com/kubevela/pkg/monitor/context"
wfContext "github.com/kubevela/workflow/pkg/context"
"github.com/kubevela/workflow/pkg/cue/model/value"
"github.com/kubevela/workflow/pkg/types"
"github.com/prometheus/client_golang/api"
v1 "github.com/prometheus/client_golang/api/prometheus/v1"
"github.com/prometheus/common/model"
)

const (
// ProviderName is provider name for install.
ProviderName = "metrics"
)

type provider struct{}

// PromCheck do health check from metrics from prometheus
func (h *provider) PromCheck(ctx monitorContext.Context, wfCtx wfContext.Context, v *value.Value, act types.Action) error {
stepID, err := v.GetString("stepID")
if err != nil {
return err
}

valueStr, err := getQueryResult(ctx, v)
if err != nil {
return err
}

conditionStr, err := v.GetString("condition")
if err != nil {
return err
}

res, err := compareValueWithCondition(valueStr, conditionStr, v)

if err != nil {
return err
}

if res {
// meet the condition
return handleSuccessCompare(wfCtx, stepID, v, conditionStr, valueStr)
}
return handleFailCompare(wfCtx, stepID, v, conditionStr, valueStr)
}

func handleSuccessCompare(wfCtx wfContext.Context, stepID string, v *value.Value, conditionStr, valueStr string) error {
// clean up fail timeStamp
setMetricsStatusTime(wfCtx, stepID, "fail", 0)
d, err := v.GetString("duration")
if err != nil {
return err
}
duration, err := time.ParseDuration(d)
if err != nil {
return err
}

st := getMetricsStatusTime(wfCtx, stepID, "success")
if st == 0 {
// first success
if err := v.FillObject(fmt.Sprintf("The healthy condition should be %s, and the query result is %s, indicating success.", conditionStr, valueStr), "message"); err != nil {
return err
}
setMetricsStatusTime(wfCtx, stepID, "success", time.Now().Unix())
return v.FillObject(false, "result")
}
successTime := time.Unix(st, 0)
if successTime.Add(duration).Before(time.Now()) {
if err = v.FillObject("The metric check has passed successfully.", "message"); err != nil {
return err
}
return v.FillObject(true, "result")
}
if err := v.FillObject(fmt.Sprintf("The healthy condition should be %s, and the query result is %s, indicating success. The success has persisted for %s, with success duration being %s.", conditionStr, valueStr, time.Since(successTime).String(), duration), "message"); err != nil {
return err
}
return v.FillObject(false, "result")
}

func handleFailCompare(wfCtx wfContext.Context, stepID string, v *value.Value, conditionStr, valueStr string) error {
// clean up success timeStamp
setMetricsStatusTime(wfCtx, stepID, "success", 0)
ft := getMetricsStatusTime(wfCtx, stepID, "")
d, err := v.GetString("failDuration")
if err != nil {
return err
}
failDuration, err := time.ParseDuration(d)
if err != nil {
return err
}

if ft == 0 {
// first failed
setMetricsStatusTime(wfCtx, stepID, "fail", time.Now().Unix())
if err := v.FillObject(fmt.Sprintf("The healthy condition should be %s, but the query result is %s, indicating failure, with the failure duration being %s. This is first failed checking.", conditionStr, valueStr, failDuration), "message"); err != nil {
return err
}
return v.FillObject(false, "result")
}

failTime := time.Unix(ft, 0)
if failTime.Add(failDuration).Before(time.Now()) {
if err = v.FillObject(true, "failed"); err != nil {
return err
}
if err := v.FillObject(fmt.Sprintf("The healthy condition should be %s, but the query result is %s, indicating failure. The failure has persisted for %s, with the failure duration being %s. The check has terminated.", conditionStr, valueStr, time.Since(failTime).String(), failDuration), "message"); err != nil {
return err
}
return v.FillObject(false, "result")
}
if err := v.FillObject(fmt.Sprintf("The healthy condition should be %s, but the query result is %s, indicating failure. The failure has persisted for %s, with the failure duration being %s.", conditionStr, valueStr, time.Since(failTime).String(), failDuration), "message"); err != nil {
return err
}
return v.FillObject(false, "result")
}

func getQueryResult(ctx monitorContext.Context, v *value.Value) (string, error) {
addr, err := v.GetString("metricEndpoint")
if err != nil {
return "", err
}
c, err := api.NewClient(api.Config{
Address: addr,
})
if err != nil {
return "", err
}
promCli := v1.NewAPI(c)
query, err := v.GetString("query")
if err != nil {
return "", err
}
resp, _, err := promCli.Query(ctx, query, time.Now())
if err != nil {
return "", err
}

var valueStr string
switch v := resp.(type) {
case *model.Scalar:
valueStr = v.Value.String()
case model.Vector:
if len(v) != 1 {
return "", fmt.Errorf(fmt.Sprintf("ehe query is returning %d results when it should only return one. Please review the query to identify and fix the issue", len(v)))
}
valueStr = v[0].Value.String()
default:
return "", fmt.Errorf("cannot handle the not query value")
}
return valueStr, nil
}

func compareValueWithCondition(valueStr string, conditionStr string, v *value.Value) (bool, error) {
template := fmt.Sprintf("if: %s %s", valueStr, conditionStr)
cueValue, err := value.NewValue(template, nil, "")
if err != nil {
return false, err
}
res, err := cueValue.GetBool("if")
if err != nil {
return false, err
}
return res, nil
}

func setMetricsStatusTime(wfCtx wfContext.Context, stepID string, status string, time int64) {
wfCtx.SetMutableValue(strconv.FormatInt(time, 10), stepID, "metrics", status, "time")
}

func getMetricsStatusTime(wfCtx wfContext.Context, stepID string, status string) int64 {
str := wfCtx.GetMutableValue(stepID, "metrics", status, "time")
if len(str) == 0 {
return 0
}
t, _ := strconv.ParseInt(str, 10, 64)
return t
}

// Install register handlers to provider discover.
func Install(p types.Providers) {
prd := &provider{}
p.Register(ProviderName, map[string]types.Handler{
"promCheck": prd.PromCheck,
})
}
95 changes: 95 additions & 0 deletions pkg/providers/metrics/prom_check_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
/*
Copyright 2022 The KubeVela Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package metrics

import (
"context"
"fmt"
"net/http"
"testing"
"time"

"github.com/crossplane/crossplane-runtime/pkg/test"

monitorContext "github.com/kubevela/pkg/monitor/context"
context2 "github.com/kubevela/workflow/pkg/context"
"github.com/kubevela/workflow/pkg/cue/model/value"
"github.com/stretchr/testify/assert"
"sigs.k8s.io/controller-runtime/pkg/client"
)

func TestMetricCheck(t *testing.T) {
srv := runMockPrometheusServer() // no lint

v, err := value.NewValue(`
metricEndpoint: "http://127.0.0.1:18089"
query: "sum(nginx_ingress_controller_requests{host=\"canary-demo.com\",status=\"200\"})"
duration: "4s"
failDuration: "2s"
condition: ">=3"
stepID: "123456"`, nil, "")
assert.NoError(t, err)
prd := &provider{}
ctx := monitorContext.NewTraceContext(context.Background(), "")
cli := &test.MockClient{
MockCreate: func(ctx context.Context, obj client.Object, opts ...client.CreateOption) error {
return nil
},
MockPatch: func(ctx context.Context, obj client.Object, patch client.Patch, opts ...client.PatchOption) error {
return nil
},
MockGet: func(ctx context.Context, key client.ObjectKey, obj client.Object) error {
return nil
},
}
wfCtx, err := context2.NewContext(context.Background(), cli, "default", "v1", nil)
assert.NoError(t, err)
err = prd.PromCheck(ctx, wfCtx, v, nil)
assert.NoError(t, err)
res, err := v.GetBool("result")
assert.NoError(t, err)
assert.Equal(t, res, false)
message, err := v.GetString("message")
assert.NoError(t, err)
assert.Equal(t, message, "The healthy condition should be >=3, and the query result is 10, indicating success.")
if err := srv.Close(); err != nil {
fmt.Printf("Server shutdown error: %v\n", err)
}
}

func runMockPrometheusServer() *http.Server {
srv := http.Server{Addr: ":18089", Handler: http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
w.Write([]byte(`{
"status": "success",
"data": {
"resultType": "vector",
"result": [
{
"metric": {},
"value": [
1678701380.73,
"10"
]
}
]
}
}`))
})}
go srv.ListenAndServe() // no lint
time.Sleep(3 * time.Second)
return &srv
}
2 changes: 2 additions & 0 deletions pkg/stdlib/actions/v1/op.cue
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,8 @@ import (
#ReadConfig: config.#Read
#ListConfig: config.#List

#PromCheck: metrics.#PromCheck

#PatchK8sObject: util.#PatchK8sObject

#Steps: {
Expand Down
10 changes: 10 additions & 0 deletions pkg/stdlib/actions/v1/pkgs/metrics.cue
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#PromCheck: {
#do: "promCheck"
#provider: "metrics"

query: string
promAddress: string
condition: string
duration: string
...
}

0 comments on commit c730c05

Please sign in to comment.