From 6b07f524563fb3f7cbd7a93382ad2b58d01c2009 Mon Sep 17 00:00:00 2001 From: Gianluca Mardente Date: Mon, 4 May 2026 21:03:14 +0200 Subject: [PATCH] (bug) Fix shard tracking: cluster map key used empty namespace/name processCluster built clusterRef from the cluster object before calling c.Get, so the object's namespace and name were always empty. Every reconcile wrote to the same map key `{Kind: "SveltosCluster", Namespace: "", Name: ""}`. --- internal/controller/cluster_controller.go | 3 +- .../controller/sveltoscluster_controller.go | 3 +- internal/controller/utils.go | 28 +++++++++++++++---- 3 files changed, 24 insertions(+), 10 deletions(-) diff --git a/internal/controller/cluster_controller.go b/internal/controller/cluster_controller.go index ad64cf7..db0138b 100644 --- a/internal/controller/cluster_controller.go +++ b/internal/controller/cluster_controller.go @@ -43,11 +43,10 @@ type ClusterReconciler struct { func (r *ClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { logger := ctrl.LoggerFrom(ctx) - logger.V(logs.LogInfo).Info("Reconciling Cluster") + logger.V(logs.LogDebug).Info("Reconciling Cluster") // Fecth the Cluster instance cluster := &clusterv1.Cluster{} - addTypeInformationToObject(r.Scheme, cluster) return reconcile.Result{}, processCluster(ctx, r.Config, r.Client, r.AgentInMgmtCluster, cluster, req, logger) } diff --git a/internal/controller/sveltoscluster_controller.go b/internal/controller/sveltoscluster_controller.go index e8072fc..c2875d6 100644 --- a/internal/controller/sveltoscluster_controller.go +++ b/internal/controller/sveltoscluster_controller.go @@ -43,11 +43,10 @@ type SveltosClusterReconciler struct { func (r *SveltosClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { logger := ctrl.LoggerFrom(ctx) - logger.V(logs.LogInfo).Info("Reconciling SveltosCluster") + logger.V(logs.LogDebug).Info("Reconciling SveltosCluster") // Fecth the SveltosCluster instance sveltosCluster := &libsveltosv1beta1.SveltosCluster{} - addTypeInformationToObject(r.Scheme, sveltosCluster) return reconcile.Result{}, processCluster(ctx, r.Config, r.Client, r.AgentInMgmtCluster, sveltosCluster, req, logger) } diff --git a/internal/controller/utils.go b/internal/controller/utils.go index e295d39..f754337 100644 --- a/internal/controller/utils.go +++ b/internal/controller/utils.go @@ -101,11 +101,18 @@ func InitScheme() (*runtime.Scheme, error) { func processCluster(ctx context.Context, config *rest.Config, c client.Client, agentInMgmtCluster bool, cluster client.Object, req ctrl.Request, logger logr.Logger) error { - clusterRef := getObjectReferenceFromObject(c.Scheme(), cluster) - if err := c.Get(ctx, req.NamespacedName, cluster); err != nil { if apierrors.IsNotFound(err) { - return stopTrackingCluster(ctx, config, clusterRef, logger) + // cluster is empty here; derive the GVK from the scheme so the ref is complete. + addTypeInformationToObject(c.Scheme(), cluster) + apiVersion, kind := cluster.GetObjectKind().GroupVersionKind().ToAPIVersionAndKind() + notFoundRef := &corev1.ObjectReference{ + Namespace: req.Namespace, + Name: req.Name, + Kind: kind, + APIVersion: apiVersion, + } + return stopTrackingCluster(ctx, config, notFoundRef, logger) } logger.Error(err, "Failed to fetch cluster") return errors.Wrapf( @@ -115,6 +122,9 @@ func processCluster(ctx context.Context, config *rest.Config, c client.Client, ) } + // clusterRef is computed after Get so namespace/name are populated. + clusterRef := getObjectReferenceFromObject(c.Scheme(), cluster) + // Handle deleted cluster if !cluster.GetDeletionTimestamp().IsZero() { return stopTrackingCluster(ctx, config, clusterRef, logger) @@ -145,13 +155,13 @@ func trackCluster(ctx context.Context, config *rest.Config, c client.Client, age mux.Lock() defer mux.Unlock() - oldShard, ok := clusterMap[*cluster] - if ok && oldShard == currentShardKey { + oldShard, alreadyTracked := clusterMap[*cluster] + if alreadyTracked && oldShard == currentShardKey { // Cluster is already tracked. And cluster shard has not changed. return nil } - if ok { + if alreadyTracked { if shardMap[oldShard].Has(cluster) && shardMap[oldShard].Len() == 1 { // By removing cluster, no more clusters will match oldShard. @@ -167,6 +177,8 @@ func trackCluster(ctx context.Context, config *rest.Config, c client.Client, age clusterPerShard := shardMap[oldShard] clusterPerShard.Erase(cluster) shardMap[oldShard] = clusterPerShard + logger.V(logs.LogInfo).Info(fmt.Sprintf("removed cluster from shard %q: %d cluster(s) remaining", + oldShard, clusterPerShard.Len())) } // Update Cluster shard (key: cluster; value: cluster current shard) @@ -189,6 +201,8 @@ func trackCluster(ctx context.Context, config *rest.Config, c client.Client, age } clusterPerShard.Insert(cluster) shardMap[currentShardKey] = clusterPerShard + logger.V(logs.LogInfo).Info(fmt.Sprintf("added cluster to shard %q: %d cluster(s) total", + currentShardKey, clusterPerShard.Len())) return nil } @@ -217,6 +231,8 @@ func stopTrackingCluster(ctx context.Context, config *rest.Config, cluster *core fmt.Sprintf("no more clusters matching shard %s. Removing controllers", oldShardKey)) return undeployControllers(ctx, config, oldShardKey, logger) } + logger.V(logs.LogInfo).Info(fmt.Sprintf("removed cluster from shard %q: %d cluster(s) remaining", + oldShardKey, shardMap[oldShardKey].Len())) } }