Skip to content

Commit

Permalink
Metrics for SyncSet and SelectorSyncSets
Browse files Browse the repository at this point in the history
merging 8659 and 9545

Metrics for SyncSet and SelectorSyncSets
  • Loading branch information
rhamitarora committed Oct 23, 2024
1 parent 3b6426c commit 99c51f8
Show file tree
Hide file tree
Showing 20 changed files with 1,237 additions and 16 deletions.
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ require (
github.com/openshift/api v0.0.0-20240103200955-7ca3a4634e46
github.com/openshift/client-go v0.0.0-20221019143426-16aed247da5c
github.com/openshift/cloud-credential-operator v0.0.0-00010101000000-000000000000
github.com/openshift/hive/apis v0.0.0-20241008210644-986c5efa21e7
github.com/openshift/hive/apis v0.0.0-20240821011206-1ec27ad45d5a
github.com/openshift/library-go v0.0.0-20220525173854-9b950a41acdc
github.com/openshift/machine-config-operator v0.0.1-0.20230519222939-1abc13efbb0d
github.com/pires/go-proxyproto v0.6.2
Expand Down
72 changes: 72 additions & 0 deletions pkg/frontend/admin_hive_syncset_resources.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
package frontend

// Copyright (c) Microsoft Corporation.
// Licensed under the Apache License 2.0.

import (
"context"
"net/http"
"path/filepath"
"strings"

"github.com/sirupsen/logrus"
"github.com/ugorji/go/codec"

"github.com/Azure/ARO-RP/pkg/api"
"github.com/Azure/ARO-RP/pkg/database/cosmosdb"
"github.com/Azure/ARO-RP/pkg/frontend/middleware"
)

func (f *frontend) getAdminHiveClusterSync(w http.ResponseWriter, r *http.Request) {
ctx := r.Context()
log := ctx.Value(middleware.ContextKeyLog).(*logrus.Entry)
resourceId := strings.TrimPrefix(filepath.Dir(r.URL.Path), "/admin")
b, err := f._getAdminHiveClusterSync(ctx, resourceId)

if cloudErr, ok := err.(*api.CloudError); ok {
api.WriteCloudError(w, cloudErr)
return
}

adminReply(log, w, nil, b, err)
}

func (f *frontend) _getAdminHiveClusterSync(ctx context.Context, resourceId string) ([]byte, error) {
// We have to check if the frontend has a valid clustermanager since hive is not everywhere.
if f.hiveClusterManager == nil {
return nil, api.NewCloudError(http.StatusBadRequest, api.CloudErrorCodeInvalidParameter, "", "hive is not enabled")
}

dbOpenShiftClusters, err := f.dbGroup.OpenShiftClusters()
if err != nil {
return nil, api.NewCloudError(http.StatusInternalServerError, api.CloudErrorCodeInternalServerError, "", err.Error())
}

doc, err := dbOpenShiftClusters.Get(ctx, resourceId)
switch {
case cosmosdb.IsErrorStatusCode(err, http.StatusNotFound):
return nil, api.NewCloudError(http.StatusNotFound, api.CloudErrorCodeResourceNotFound, "", err.Error())
case err != nil:
return nil, err
}

if doc.OpenShiftCluster.Properties.HiveProfile.Namespace == "" {
return nil, api.NewCloudError(http.StatusNoContent, api.CloudErrorCodeResourceNotFound, "", "cluster is not managed by hive")
}

cd, err := f.hiveClusterManager.GetClusterSync(ctx, doc)
switch {
case cosmosdb.IsErrorStatusCode(err, http.StatusNotFound):
return nil, api.NewCloudError(http.StatusNotFound, api.CloudErrorCodeNotFound, "", err.Error())
case err != nil:
return nil, err
}

var b []byte
err = codec.NewEncoderBytes(&b, &codec.JsonHandle{}).Encode(cd)
if err != nil {
return nil, api.NewCloudError(http.StatusInternalServerError, api.CloudErrorCodeInternalServerError, "", err.Error())
}

return b, nil
}
116 changes: 116 additions & 0 deletions pkg/frontend/admin_hive_syncset_resources_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
package frontend

// Copyright (c) Microsoft Corporation.
// Licensed under the Apache License 2.0.

import (
"context"
"fmt"
"net/http"
"strings"
"testing"
"time"

hivev1alpha1 "github.com/openshift/hive/apis/hiveinternal/v1alpha1"
"go.uber.org/mock/gomock"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

"github.com/Azure/ARO-RP/pkg/api"
"github.com/Azure/ARO-RP/pkg/metrics/noop"
mock_env "github.com/Azure/ARO-RP/pkg/util/mocks/env"
mock_hive "github.com/Azure/ARO-RP/pkg/util/mocks/hive"
)

func Test_getAdminHiveClusterSync(t *testing.T) {
fakeUUID := "00000000-0000-0000-0000-000000000000"
ctx := context.Background()
clusterSync := hivev1alpha1.ClusterSync{Spec: hivev1alpha1.ClusterSyncSpec{}, Status: hivev1alpha1.ClusterSyncStatus{
SyncSets: []hivev1alpha1.SyncStatus{{Name: "syncSet1", ObservedGeneration: 0, Result: "success", LastTransitionTime: metav1.Time{Time: time.Date(2024, 7, 1, 0, 0, 0, 0, time.UTC)}}},
}}
type test struct {
name string
resourceID string
properties api.OpenShiftClusterProperties
hiveEnabled bool
expectedGetClusterSyncCallCount int
wantStatusCode int
wantResponse []byte
wantError string
}

for _, tt := range []*test{
{
name: "cluster has hive profile with namespace",
resourceID: fmt.Sprintf("/subscriptions/%s/resourcegroups/resourceGroup/providers/Microsoft.RedHatOpenShift/openShiftClusters/hive", fakeUUID),
properties: api.OpenShiftClusterProperties{HiveProfile: api.HiveProfile{Namespace: fmt.Sprintf("aro-%s", fakeUUID)}},
hiveEnabled: true,
expectedGetClusterSyncCallCount: 1,
wantResponse: []byte(`{"status":{"syncSets":[{"name":"syncSet1","observedGeneration":0,"result":"success","lastTransitionTime":"2024-07-01T00:00:00Z"}]}}`),
},
{
name: "cluster does not have hive profile with namespace",
resourceID: fmt.Sprintf("/subscriptions/%s/resourcegroups/resourceGroup/providers/Microsoft.RedHatOpenShift/openShiftClusters/nonHive", fakeUUID),
hiveEnabled: true,
expectedGetClusterSyncCallCount: 0,
wantStatusCode: http.StatusNoContent,
wantError: "204: ResourceNotFound: : cluster is not managed by hive",
},
{
name: "hive is not enabled",
resourceID: fmt.Sprintf("/subscriptions/%s/resourcegroups/resourceGroup/providers/Microsoft.RedHatOpenShift/openShiftClusters/nonHive", fakeUUID),
hiveEnabled: false,
expectedGetClusterSyncCallCount: 0,
wantStatusCode: http.StatusBadRequest,
wantError: "400: InvalidParameter: : hive is not enabled",
},
} {
t.Run(tt.name, func(t *testing.T) {
ti := newTestInfra(t).WithOpenShiftClusters().WithSubscriptions()
controller := gomock.NewController(t)
defer ti.done()
defer controller.Finish()

ti.fixture.AddOpenShiftClusterDocuments(&api.OpenShiftClusterDocument{
Key: strings.ToLower(tt.resourceID),
OpenShiftCluster: &api.OpenShiftCluster{
ID: tt.resourceID,
Name: "hive",
Type: "Microsoft.RedHatOpenShift/openshiftClusters",
Properties: tt.properties,
},
})

err := ti.buildFixtures(nil)
if err != nil {
t.Fatal(err)
}
_env := ti.env.(*mock_env.MockInterface)
var f *frontend
if tt.hiveEnabled {
clusterManager := mock_hive.NewMockClusterManager(controller)
clusterManager.EXPECT().GetClusterSync(gomock.Any(), gomock.Any()).Return(&clusterSync, nil).Times(tt.expectedGetClusterSyncCallCount)
f, err = NewFrontend(ctx, ti.audit, ti.log, _env, ti.dbGroup, api.APIs, &noop.Noop{}, &noop.Noop{}, nil, clusterManager, nil, nil, nil, nil)
} else {
f, err = NewFrontend(ctx, ti.audit, ti.log, _env, ti.dbGroup, api.APIs, &noop.Noop{}, &noop.Noop{}, nil, nil, nil, nil, nil, nil)
}

if err != nil {
t.Fatal(err)
}
hiveClusterSync, err := f._getAdminHiveClusterSync(ctx, strings.ToLower(tt.resourceID))
cloudErr, isCloudErr := err.(*api.CloudError)
if tt.wantError != "" && isCloudErr && cloudErr != nil {
if tt.wantError != cloudErr.Error() {
t.Errorf("got %q but wanted %q", cloudErr.Error(), tt.wantError)
}
if tt.wantStatusCode != 0 && tt.wantStatusCode != cloudErr.StatusCode {
t.Errorf("got %q but wanted %q", cloudErr.Error(), tt.wantError)
}
}

if !strings.EqualFold(string(hiveClusterSync), string(tt.wantResponse)) {
t.Errorf("got %q and expected %q", hiveClusterSync, tt.wantResponse)
}
})
}
}
2 changes: 2 additions & 0 deletions pkg/frontend/frontend.go
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,8 @@ func (f *frontend) chiAuthenticatedRoutes(router chi.Router) {

r.Get("/clusterdeployment", f.getAdminHiveClusterDeployment)

r.Get("/clustersync", f.getAdminHiveClusterSync)

r.With(f.maintenanceMiddleware.UnplannedMaintenanceSignal).Post("/redeployvm", f.postAdminOpenShiftClusterRedeployVM)

r.With(f.maintenanceMiddleware.UnplannedMaintenanceSignal).Post("/stopvm", f.postAdminOpenShiftClusterStopVM)
Expand Down
18 changes: 18 additions & 0 deletions pkg/hive/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"sort"

hivev1 "github.com/openshift/hive/apis/hive/v1"
hivev1alpha1 "github.com/openshift/hive/apis/hiveinternal/v1alpha1"
"github.com/sirupsen/logrus"
corev1 "k8s.io/api/core/v1"
kerrors "k8s.io/apimachinery/pkg/api/errors"
Expand Down Expand Up @@ -43,6 +44,7 @@ type ClusterManager interface {
IsClusterInstallationComplete(ctx context.Context, doc *api.OpenShiftClusterDocument) (bool, error)
GetClusterDeployment(ctx context.Context, doc *api.OpenShiftClusterDocument) (*hivev1.ClusterDeployment, error)
ResetCorrelationData(ctx context.Context, doc *api.OpenShiftClusterDocument) error
GetClusterSync(ctx context.Context, doc *api.OpenShiftClusterDocument) (*hivev1alpha1.ClusterSync, error)
}

type clusterManager struct {
Expand Down Expand Up @@ -263,3 +265,19 @@ func (hr *clusterManager) installLogsForLatestDeployment(ctx context.Context, cd

return latestProvision.Spec.InstallLog, nil
}

func (hr *clusterManager) GetClusterSync(ctx context.Context, doc *api.OpenShiftClusterDocument) (*hivev1alpha1.ClusterSync, error) {
clusterSync := &hivev1alpha1.ClusterSync{}

key := client.ObjectKey{
Name: ClusterDeploymentName, // "cluster",
Namespace: doc.OpenShiftCluster.Properties.HiveProfile.Namespace,
}

err := hr.hiveClientset.Get(ctx, key, clusterSync)
if err != nil {
return nil, err
}

return clusterSync, nil
}
50 changes: 50 additions & 0 deletions pkg/hive/manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"testing"

hivev1 "github.com/openshift/hive/apis/hive/v1"
hivev1alpha1 "github.com/openshift/hive/apis/hiveinternal/v1alpha1"
"github.com/sirupsen/logrus"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand Down Expand Up @@ -548,3 +549,52 @@ func TestGetClusterDeployment(t *testing.T) {
})
}
}

func TestGetClusterSyncforClusterDeployment(t *testing.T) {
fakeNamespace := "aro-00000000-0000-0000-0000-000000000000"
doc := &api.OpenShiftClusterDocument{
OpenShiftCluster: &api.OpenShiftCluster{
Properties: api.OpenShiftClusterProperties{
HiveProfile: api.HiveProfile{
Namespace: fakeNamespace,
},
},
},
}

cs := &hivev1alpha1.ClusterSync{
ObjectMeta: metav1.ObjectMeta{
Name: ClusterDeploymentName,
Namespace: fakeNamespace,
},
}

for _, tt := range []struct {
name string
wantErr string
}{
{name: "syncset exists and returned"},
{name: "syncset does not exist err returned", wantErr: `clustersyncs.hiveinternal.openshift.io "cluster" not found`},
} {
t.Run(tt.name, func(t *testing.T) {
fakeClientBuilder := fake.NewClientBuilder()
if tt.wantErr == "" {
fakeClientBuilder = fakeClientBuilder.WithRuntimeObjects(cs)
}
c := clusterManager{
hiveClientset: fakeClientBuilder.Build(),
log: logrus.NewEntry(logrus.StandardLogger()),
}

result, err := c.GetClusterSync(context.Background(), doc)
if err != nil && err.Error() != tt.wantErr ||
err == nil && tt.wantErr != "" {
t.Error(err)
}

if result != nil && result.Name != cs.Name && result.Namespace != cs.Namespace {
t.Error("Unexpected cluster sync returned", result)
}
})
}
}
30 changes: 18 additions & 12 deletions pkg/monitor/cluster/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"sigs.k8s.io/controller-runtime/pkg/client/apiutil"

"github.com/Azure/ARO-RP/pkg/api"
"github.com/Azure/ARO-RP/pkg/hive"
"github.com/Azure/ARO-RP/pkg/metrics"
"github.com/Azure/ARO-RP/pkg/monitor/dimension"
"github.com/Azure/ARO-RP/pkg/monitor/emitter"
Expand Down Expand Up @@ -60,10 +61,12 @@ type Monitor struct {
arodl *appsv1.DeploymentList
}

wg *sync.WaitGroup
wg *sync.WaitGroup
hiveClusterManager hive.ClusterManager
doc *api.OpenShiftClusterDocument
}

func NewMonitor(log *logrus.Entry, restConfig *rest.Config, oc *api.OpenShiftCluster, m metrics.Emitter, hiveRestConfig *rest.Config, hourlyRun bool, wg *sync.WaitGroup) (*Monitor, error) {
func NewMonitor(log *logrus.Entry, restConfig *rest.Config, oc *api.OpenShiftCluster, doc *api.OpenShiftClusterDocument, m metrics.Emitter, hiveRestConfig *rest.Config, hourlyRun bool, wg *sync.WaitGroup, hiveClusterManager hive.ClusterManager) (*Monitor, error) {
r, err := azure.ParseResourceID(oc.ID)
if err != nil {
return nil, err
Expand Down Expand Up @@ -126,16 +129,18 @@ func NewMonitor(log *logrus.Entry, restConfig *rest.Config, oc *api.OpenShiftClu
oc: oc,
dims: dims,

restconfig: restConfig,
cli: cli,
configcli: configcli,
maocli: maocli,
mcocli: mcocli,
arocli: arocli,
m: m,
ocpclientset: ocpclientset,
hiveclientset: hiveclientset,
wg: wg,
restconfig: restConfig,
cli: cli,
configcli: configcli,
maocli: maocli,
mcocli: mcocli,
arocli: arocli,
m: m,
ocpclientset: ocpclientset,
hiveclientset: hiveclientset,
wg: wg,
hiveClusterManager: hiveClusterManager,
doc: doc,
}, nil
}

Expand Down Expand Up @@ -208,6 +213,7 @@ func (mon *Monitor) Monitor(ctx context.Context) (errs []error) {
mon.emitJobConditions,
mon.emitSummary,
mon.emitHiveRegistrationStatus,
mon.emitClusterSync,
mon.emitOperatorFlagsAndSupportBanner,
mon.emitMaintenanceState,
mon.emitCertificateExpirationStatuses,
Expand Down
Loading

0 comments on commit 99c51f8

Please sign in to comment.