Skip to content

Commit

Permalink
Added prometheus support
Browse files Browse the repository at this point in the history
Signed-off-by: Kaustav Majumder <[email protected]>
  • Loading branch information
Kaustav Majumder committed Mar 22, 2024
1 parent 9d4d892 commit 2e95903
Show file tree
Hide file tree
Showing 13 changed files with 646 additions and 5 deletions.
33 changes: 33 additions & 0 deletions controllers/defaults/resources.go
Original file line number Diff line number Diff line change
Expand Up @@ -224,4 +224,37 @@ var (
},
},
}

MonitoringResources = map[string]corev1.ResourceRequirements{
"kube-rbac-proxy": {
Requests: corev1.ResourceList{
"memory": resource.MustParse("30Mi"),
"cpu": resource.MustParse("50m"),
},
Limits: corev1.ResourceList{
"memory": resource.MustParse("30Mi"),
"cpu": resource.MustParse("50m"),
},
},
"alertmanager": {
Requests: corev1.ResourceList{
"cpu": resource.MustParse("100m"),
"memory": resource.MustParse("200Mi"),
},
Limits: corev1.ResourceList{
"cpu": resource.MustParse("100m"),
"memory": resource.MustParse("200Mi"),
},
},
"prometheus": {
Requests: corev1.ResourceList{
"cpu": resource.MustParse("400m"),
"memory": resource.MustParse("250Mi"),
},
Limits: corev1.ResourceList{
"cpu": resource.MustParse("400m"),
"memory": resource.MustParse("250Mi"),
},
},
}
)
242 changes: 240 additions & 2 deletions controllers/ocsinitialization/ocsinitialization_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,17 @@ import (
"fmt"
"reflect"
"strconv"
"strings"

"github.com/go-logr/logr"
secv1client "github.com/openshift/client-go/security/clientset/versioned/typed/security/v1"
opv1a1 "github.com/operator-framework/api/pkg/operators/v1alpha1"
promv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
ocsv1 "github.com/red-hat-storage/ocs-operator/api/v4/v1"
"github.com/red-hat-storage/ocs-operator/v4/controllers/defaults"
"github.com/red-hat-storage/ocs-operator/v4/controllers/platform"
"github.com/red-hat-storage/ocs-operator/v4/controllers/util"
"github.com/red-hat-storage/ocs-operator/v4/templates"
rookCephv1 "github.com/rook/rook/pkg/apis/ceph.rook.io/v1"
"gopkg.in/yaml.v2"
corev1 "k8s.io/api/core/v1"
Expand All @@ -21,6 +26,7 @@ import (
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/klog/v2"
"k8s.io/utils/ptr"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
Expand All @@ -32,8 +38,11 @@ import (
var operatorNamespace string

const (
wrongNamespacedName = "Ignoring this resource. Only one should exist, and this one has the wrong name and/or namespace."
random30CharacterString = "KP7TThmSTZegSGmHuPKLnSaaAHSG3RSgqw6akBj0oVk"
wrongNamespacedName = "Ignoring this resource. Only one should exist, and this one has the wrong name and/or namespace."
random30CharacterString = "KP7TThmSTZegSGmHuPKLnSaaAHSG3RSgqw6akBj0oVk"
PrometheusOperatorDeploymentName = "prometheus-operator"
PrometheusOperatorCSVNamePrefix = "odf-prometheus-operator"
PrometheusOperatorCSVName = "odf-prometheus-operator.v4.10.0"
)

// InitNamespacedName returns a NamespacedName for the singleton instance that
Expand All @@ -60,6 +69,9 @@ type OCSInitializationReconciler struct {
// +kubebuilder:rbac:groups=ocs.openshift.io,resources=*,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=security.openshift.io,resources=securitycontextconstraints,verbs=get;create;update
// +kubebuilder:rbac:groups=security.openshift.io,resourceNames=privileged,resources=securitycontextconstraints,verbs=get;create;update
// +kubebuilder:rbac:groups="monitoring.coreos.com",resources={alertmanagers,prometheuses},verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups="monitoring.coreos.com",resources=servicemonitors,verbs=get;list;watch;update;patch;create;delete
// +kubebuilder:rbac:groups=operators.coreos.com,resources=clusterserviceversions,verbs=get;list;watch;delete;update;patch

// Reconcile reads that state of the cluster for a OCSInitialization object and makes changes based on the state read
// and what is in the OCSInitialization.Spec
Expand Down Expand Up @@ -178,6 +190,47 @@ func (r *OCSInitializationReconciler) Reconcile(ctx context.Context, request rec
r.Log.Error(err, "Failed to ensure uxbackend service")
return reconcile.Result{}, err
}
if isROSAHCP, err := platform.IsPlatformROSAHCP(); err != nil {
r.Log.Error(err, "Failed to determine if ROSA HCP cluster")
return reconcile.Result{}, err
} else if isROSAHCP {
r.Log.Info("Setting up monitoring resources for ROSA HCP platform")
err = r.reconcilePrometheusOperatorCSV(instance)
if err != nil {
r.Log.Error(err, "Failed to ensure prometheus operator deployment")
return reconcile.Result{}, err
}

err = r.reconcilePrometheusKubeRBACConfigMap(instance)
if err != nil {
r.Log.Error(err, "Failed to ensure kubeRBACConfig config map")
return reconcile.Result{}, err
}

err = r.reconcilePrometheusService(instance)
if err != nil {
r.Log.Error(err, "Failed to ensure prometheus service")
return reconcile.Result{}, err
}

err = r.reconcilePrometheus(instance)
if err != nil {
r.Log.Error(err, "Failed to ensure prometheus instance")
return reconcile.Result{}, err
}

err = r.reconcileAlertManager(instance)
if err != nil {
r.Log.Error(err, "Failed to ensure alertmanager instance")
return reconcile.Result{}, err
}

err = r.reconcileK8sMetricsServiceMonitor(instance)
if err != nil {
r.Log.Error(err, "Failed to ensure k8sMetricsService Monitor")
return reconcile.Result{}, err
}
}

reason := ocsv1.ReconcileCompleted
message := ocsv1.ReconcileCompletedMessage
Expand All @@ -197,6 +250,10 @@ func (r *OCSInitializationReconciler) SetupWithManager(mgr ctrl.Manager) error {
For(&ocsv1.OCSInitialization{}).
Owns(&corev1.Service{}).
Owns(&corev1.Secret{}).
Owns(&promv1.Prometheus{}).
Owns(&corev1.ConfigMap{}).
Owns(&promv1.Alertmanager{}).
Owns(&promv1.ServiceMonitor{}).
// Watcher for storagecluster required to update
// ocs-operator-config configmap if storagecluster spec changes
Watches(
Expand Down Expand Up @@ -258,6 +315,21 @@ func (r *OCSInitializationReconciler) SetupWithManager(mgr ctrl.Manager) error {
},
),
).
Watches(
&opv1a1.ClusterServiceVersion{
ObjectMeta: metav1.ObjectMeta{
Name: PrometheusOperatorCSVName,
Namespace: r.OperatorNamespace,
},
},
handler.EnqueueRequestsFromMapFunc(
func(context context.Context, obj client.Object) []reconcile.Request {
return []reconcile.Request{{
NamespacedName: InitNamespacedName(),
}}
},
),
).
Complete(r)
}

Expand Down Expand Up @@ -550,3 +622,169 @@ func (r *OCSInitializationReconciler) reconcileUXBackendService(initialData *ocs

return nil
}

func (r *OCSInitializationReconciler) reconcilePrometheusKubeRBACConfigMap(initialData *ocsv1.OCSInitialization) error {
prometheusKubeRBACConfigMap := &corev1.ConfigMap{}
prometheusKubeRBACConfigMap.Name = templates.PrometheusKubeRBACProxyConfigMapName
prometheusKubeRBACConfigMap.Namespace = initialData.Namespace

_, err := ctrl.CreateOrUpdate(r.ctx, r.Client, prometheusKubeRBACConfigMap, func() error {
if err := ctrl.SetControllerReference(initialData, prometheusKubeRBACConfigMap, r.Scheme); err != nil {
return err
}
prometheusKubeRBACConfigMap.Data = templates.KubeRBACProxyConfigMap.Data
return nil
})

if err != nil {
r.Log.Error(err, "Failed to create/update prometheus kube-rbac-proxy config map")
return err
}
r.Log.Info("Prometheus kube-rbac-proxy config map creation succeeded", "Name", prometheusKubeRBACConfigMap.Name)
return nil
}

func (r *OCSInitializationReconciler) reconcilePrometheusService(initialData *ocsv1.OCSInitialization) error {
prometheusService := &corev1.Service{}
prometheusService.Name = "prometheus"
prometheusService.Namespace = initialData.Namespace

_, err := ctrl.CreateOrUpdate(r.ctx, r.Client, prometheusService, func() error {
if err := ctrl.SetControllerReference(initialData, prometheusService, r.Scheme); err != nil {
return err
}
util.AddAnnotation(
prometheusService,
"service.beta.openshift.io/serving-cert-secret-name",
"prometheus-serving-cert-secret",
)
util.AddLabel(prometheusService, "prometheus", "odf-prometheus")
prometheusService.Spec.Selector = map[string]string{
"app.kubernetes.io/name": prometheusService.Name,
}
prometheusService.Spec.Ports = []corev1.ServicePort{
{
Name: "https",
Protocol: corev1.ProtocolTCP,
Port: int32(templates.KubeRBACProxyPortNumber),
TargetPort: intstr.FromString("https"),
},
}
return nil
})
if err != nil {
r.Log.Error(err, "Failed to create/update prometheus service")
return err
}
r.Log.Info("Service creation succeeded", "Name", prometheusService.Name)
return nil
}

func (r *OCSInitializationReconciler) reconcilePrometheus(initialData *ocsv1.OCSInitialization) error {
prometheus := &promv1.Prometheus{}
prometheus.Name = "odf-prometheus"
prometheus.Namespace = initialData.Namespace

_, err := ctrl.CreateOrUpdate(r.ctx, r.Client, prometheus, func() error {
if err := ctrl.SetControllerReference(initialData, prometheus, r.Scheme); err != nil {
return err
}
templates.PrometheusSpecTemplate.DeepCopyInto(&prometheus.Spec)
alertManagerEndpoint := util.Find(
prometheus.Spec.Alerting.Alertmanagers,
func(candidate *promv1.AlertmanagerEndpoints) bool {
return candidate.Name == templates.AlertManagerEndpointName
},
)
if alertManagerEndpoint == nil {
return fmt.Errorf("unable to find AlertManagerEndpoint")
}
alertManagerEndpoint.Namespace = initialData.Namespace
return nil
})

if err != nil {
r.Log.Error(err, "Failed to create/update prometheus instance")
return err
}
r.Log.Info("Prometheus instance creation succeeded", "Name", prometheus.Name)

return nil
}

func (r *OCSInitializationReconciler) reconcileAlertManager(initialData *ocsv1.OCSInitialization) error {
alertManager := &promv1.Alertmanager{}
alertManager.Name = "odf-alertmanager"
alertManager.Namespace = initialData.Namespace

_, err := ctrl.CreateOrUpdate(r.ctx, r.Client, alertManager, func() error {
if err := ctrl.SetControllerReference(initialData, alertManager, r.Scheme); err != nil {
return err
}
util.AddAnnotation(alertManager, "prometheus", "odf-prometheus")
templates.AlertmanagerSpecTemplate.DeepCopyInto(&alertManager.Spec)
return nil
})
if err != nil {
r.Log.Error(err, "Failed to create/update alertManager instance")
return err
}
r.Log.Info("AlertManager instance creation succeeded", "Name", alertManager.Name)
return nil
}

func (r *OCSInitializationReconciler) reconcileK8sMetricsServiceMonitor(initialData *ocsv1.OCSInitialization) error {
k8sMetricsServiceMonitor := &promv1.ServiceMonitor{}
k8sMetricsServiceMonitor.Name = "k8s-metrics-service-monitor"
k8sMetricsServiceMonitor.Namespace = initialData.Namespace

_, err := ctrl.CreateOrUpdate(r.ctx, r.Client, k8sMetricsServiceMonitor, func() error {
if err := ctrl.SetControllerReference(initialData, k8sMetricsServiceMonitor, r.Scheme); err != nil {
return err
}
util.AddLabel(k8sMetricsServiceMonitor, "app", "odf-prometheus")
templates.K8sMetricsServiceMonitorSpecTemplate.DeepCopyInto(&k8sMetricsServiceMonitor.Spec)
return nil
})
if err != nil {
r.Log.Error(err, "Failed to create/update K8s Metrics Service Monitor")
return err
}
r.Log.Info("K8s Metrics Service Monitor creation succeeded", "Name", k8sMetricsServiceMonitor.Name)
return nil

}

func (r *OCSInitializationReconciler) reconcilePrometheusOperatorCSV(initialData *ocsv1.OCSInitialization) error {
csvList := &opv1a1.ClusterServiceVersionList{}
if err := r.Client.List(r.ctx, csvList, client.InNamespace(initialData.Namespace)); err != nil {
return fmt.Errorf("failed to list csvs in namespace %s,%v", initialData.Namespace, err)
}
csv := util.Find(
csvList.Items,
func(csv *opv1a1.ClusterServiceVersion) bool {
return strings.HasPrefix(csv.Name, PrometheusOperatorCSVNamePrefix)
},
)
if csv == nil {
return fmt.Errorf("prometheus csv does not exist in namespace :%s", initialData.Namespace)
}
deploymentSpec := util.Find(
csv.Spec.InstallStrategy.StrategySpec.DeploymentSpecs,
func(deploymentSpec *opv1a1.StrategyDeploymentSpec) bool {
return deploymentSpec.Name == PrometheusOperatorDeploymentName
},
)
if deploymentSpec == nil {
return fmt.Errorf("unable to find prometheus operator deployment spec")
}
currentDeploymentSpec := deploymentSpec.DeepCopy()
deploymentSpec.Spec.Replicas = ptr.To(int32(1))
if !reflect.DeepEqual(currentDeploymentSpec, deploymentSpec) {
if err := r.Client.Update(r.ctx, csv); err != nil {
r.Log.Error(err, "Failed to update Prometheus csv")
return err
}
}
return nil
}
10 changes: 10 additions & 0 deletions controllers/ocsinitialization/ocsinitialization_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,14 @@ import (
"fmt"
"testing"

configv1 "github.com/openshift/api/config/v1"
secv1 "github.com/openshift/api/security/v1"
fakeSecClient "github.com/openshift/client-go/security/clientset/versioned/typed/security/v1/fake"
conditionsv1 "github.com/openshift/custom-resource-status/conditions/v1"
opv1a1 "github.com/operator-framework/api/pkg/operators/v1alpha1"
monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
v1 "github.com/red-hat-storage/ocs-operator/api/v4/v1"
"github.com/red-hat-storage/ocs-operator/v4/controllers/platform"
statusutil "github.com/red-hat-storage/ocs-operator/v4/controllers/util"
"github.com/stretchr/testify/assert"
appsv1 "k8s.io/api/apps/v1"
Expand Down Expand Up @@ -106,6 +109,10 @@ func createFakeScheme(t *testing.T) *runtime.Scheme {
assert.Fail(t, "failed to add securityv1 scheme")
}

err = opv1a1.AddToScheme(scheme)
if err != nil {
assert.Fail(t, "failed to add v1alpha1 scheme")
}
return scheme
}

Expand Down Expand Up @@ -191,6 +198,7 @@ func TestCreateWatchedResource(t *testing.T) {
testcases := []struct {
label string
alreadyCreated bool
platform configv1.PlatformType
}{
{
label: "Case 1", // ocsInit resource not created already before reconcile
Expand All @@ -203,6 +211,7 @@ func TestCreateWatchedResource(t *testing.T) {
}

for _, tc := range testcases {
platform.SetFakePlatformInstanceForTesting(true, tc.platform)
ctx := context.TODO()
ocs, request, reconciler := getTestParams(false, t)
if !tc.alreadyCreated {
Expand All @@ -218,6 +227,7 @@ func TestCreateWatchedResource(t *testing.T) {
_ = reconciler.Client.Get(ctx, request.NamespacedName, &obj)
assert.Equalf(t, obj.Name, request.Name, "[%s]: failed to create ocsInit resource with correct name", tc.label)
assert.Equalf(t, obj.Namespace, request.Namespace, "[%s]: failed to create ocsInit resource with correct namespace", tc.label)
platform.UnsetFakePlatformInstanceForTesting()
}
}

Expand Down
Loading

0 comments on commit 2e95903

Please sign in to comment.