Skip to content

Commit

Permalink
Merge pull request #84 from cybozu-go/add-backup-time-metrics
Browse files Browse the repository at this point in the history
add backup creation time metrics
  • Loading branch information
satoru-takeuchi authored Jan 14, 2025
2 parents 083ea99 + ffd571b commit c999f69
Show file tree
Hide file tree
Showing 5 changed files with 73 additions and 3 deletions.
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ require (
github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.1.0
github.com/onsi/ginkgo/v2 v2.20.2
github.com/onsi/gomega v1.35.1
github.com/prometheus/client_golang v1.18.0
github.com/pseudomuto/protoc-gen-doc v1.5.1
github.com/spf13/cobra v1.8.1
go.uber.org/mock v0.5.0
Expand Down Expand Up @@ -77,7 +78,6 @@ require (
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/mwitkow/go-proto-validators v0.0.0-20180403085117-0950a7990007 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/prometheus/client_golang v1.18.0 // indirect
github.com/prometheus/client_model v0.5.0 // indirect
github.com/prometheus/common v0.45.0 // indirect
github.com/prometheus/procfs v0.12.0 // indirect
Expand Down
18 changes: 18 additions & 0 deletions internal/controller/mantlebackup_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,12 @@ import (
_ "embed"

mantlev1 "github.com/cybozu-go/mantle/api/v1"
"github.com/cybozu-go/mantle/cmd/backup"
"github.com/cybozu-go/mantle/internal/ceph"
"github.com/cybozu-go/mantle/internal/controller/internal/objectstorage"
"github.com/cybozu-go/mantle/internal/controller/metrics"
"github.com/cybozu-go/mantle/pkg/controller/proto"
"github.com/prometheus/client_golang/prometheus"
batchv1 "k8s.io/api/batch/v1"
corev1 "k8s.io/api/core/v1"
aerrors "k8s.io/apimachinery/pkg/api/errors"
Expand Down Expand Up @@ -2068,6 +2071,21 @@ func (r *MantleBackupReconciler) primaryCleanup(
return ctrl.Result{}, fmt.Errorf("failed to update SyncedToRemote to True: %w", err)
}

duration := time.Since(target.GetCreationTimestamp().Time).Seconds()
source := "none"
if _, ok := target.GetLabels()[backup.MantleBackupConfigUID]; ok {
source = "mantle-backup-config"
}
metrics.BackupCreationDuration.
With(prometheus.Labels{
"cluster_namespace": r.managedCephClusterID,
// PVC is located in the same namespace as the MantleBackup.
"pvc_namespace": target.GetNamespace(),
"pvc": target.Spec.PVC,
"source": source,
}).
Observe(duration)

return ctrl.Result{}, nil
}

Expand Down
24 changes: 24 additions & 0 deletions internal/controller/metrics/metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
package metrics

import (
"github.com/prometheus/client_golang/prometheus"
runtimemetrics "sigs.k8s.io/controller-runtime/pkg/metrics"
)

const subsystem = "mantle"

var (
BackupCreationDuration = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Subsystem: subsystem,
Name: "backup_creation_duration_seconds",
Help: "Duration in seconds of backup creation.",
Buckets: []float64{100, 250, 500, 750, 1_000, 2_500, 5_000, 7_500, 10_000, 25_000, 50_000, 75_000, 100_000, 250_000},
},
[]string{"cluster_namespace", "pvc_namespace", "pvc", "source"},
)
)

func init() {
runtimemetrics.Registry.MustRegister(BackupCreationDuration)
}
22 changes: 22 additions & 0 deletions test/e2e/multik8s/replication/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"os"
"reflect"
"slices"
"strings"
"testing"
"time"

Expand Down Expand Up @@ -325,5 +326,26 @@ func replicationTestSuite() {
EnsureCorrectRestoration(PrimaryK8sCluster, ctx, namespace, backupName0, restoreName0, writtenDataHash0)
EnsureCorrectRestoration(SecondaryK8sCluster, ctx, namespace, backupName0, restoreName0, writtenDataHash0)
})

It("should get metrics from the controller pod in the primary cluster", func(ctx SpecContext) {
metrics := []string{
`mantle_backup_creation_duration_seconds_count`,
`mantle_backup_creation_duration_seconds_sum`,
}
ensureMetricsAreExposed(metrics)
})
})
}

func ensureMetricsAreExposed(metrics []string) {
GinkgoHelper()
controllerPod, err := GetControllerPodName(PrimaryK8sCluster)
Expect(err).NotTo(HaveOccurred())

stdout, _, err := Kubectl(PrimaryK8sCluster, nil, "exec", "-n", CephClusterNamespace, controllerPod, "--",
"curl", "-s", "http://localhost:8080/metrics")
Expect(err).NotTo(HaveOccurred())
for _, metric := range metrics {
Expect(strings.Contains(string(stdout), metric)).To(BeTrue())
}
}
10 changes: 8 additions & 2 deletions test/e2e/multik8s/testutil/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -374,15 +374,21 @@ func IsJobConditionTrue(conditions []batchv1.JobCondition, conditionType batchv1
return false
}

func GetControllerPodName(clusterNo int) (string, error) {
stdout, _, err := Kubectl(clusterNo, nil, "get", "pod", "-n", CephClusterNamespace,
"-l", "app.kubernetes.io/name=mantle", "-o", "jsonpath={.items[0].metadata.name}")
return string(stdout), err
}

func WaitControllerToBeReady() {
GinkgoHelper()
It("wait for mantle-controller to be ready", func() {
Eventually(func() error {
return CheckDeploymentReady(PrimaryK8sCluster, "rook-ceph", "mantle-controller")
return CheckDeploymentReady(PrimaryK8sCluster, CephClusterNamespace, "mantle-controller")
}).Should(Succeed())

Eventually(func() error {
return CheckDeploymentReady(PrimaryK8sCluster, "rook-ceph", "mantle-controller")
return CheckDeploymentReady(PrimaryK8sCluster, CephClusterNamespace, "mantle-controller")
}).Should(Succeed())
})
}
Expand Down

0 comments on commit c999f69

Please sign in to comment.