diff --git a/api/v1beta1/consts.go b/api/v1beta1/consts.go index 769b1d02..47bf8b3e 100644 --- a/api/v1beta1/consts.go +++ b/api/v1beta1/consts.go @@ -43,6 +43,10 @@ const ( // HostAgentJobNameAnnotation is the annotation identifying the name of HostOperationJob. HostAgentJobNameAnnotation = "cape.infrastructure.cluster.x-k8s.io/host-agent-job-name" + + // MachineHotUpdateStatusAnnotation is the annotation recording the hot update status of machine. + // MachineHotUpdateStatusAnnotation is used in KCP and MD. + MachineHotUpdateStatusAnnotation = "cape.infrastructure.cluster.x-k8s.io/machine-hot-update-status" ) // Labels. diff --git a/api/v1beta1/elfmachine_types.go b/api/v1beta1/elfmachine_types.go index a7214fdf..d0f42163 100644 --- a/api/v1beta1/elfmachine_types.go +++ b/api/v1beta1/elfmachine_types.go @@ -124,6 +124,10 @@ type ElfMachineStatus struct { // +optional GPUDevices []GPUStatus `json:"gpuDevices,omitempty"` + // Resources records the resources allocated for the machine. + // +optional + Resources ResourcesStatus `json:"resources,omitempty"` + // FailureReason will be set in the event that there is a terminal problem // reconciling the Machine and will contain a succinct value suitable // for machine interpretation. @@ -241,6 +245,11 @@ func (m *ElfMachine) IsFailed() bool { return m.Status.FailureReason != nil || m.Status.FailureMessage != nil } +// IsResourcesUpToDate returns whether the machine's resources(disk) are as expected. +func (m *ElfMachine) IsResourcesUpToDate() bool { + return m.Spec.DiskGiB == m.Status.Resources.Disk +} + func (m *ElfMachine) SetVMDisconnectionTimestamp(timestamp *metav1.Time) { if m.Annotations == nil { m.Annotations = make(map[string]string) diff --git a/api/v1beta1/types.go b/api/v1beta1/types.go index e18cf5ab..f8e67929 100644 --- a/api/v1beta1/types.go +++ b/api/v1beta1/types.go @@ -196,6 +196,20 @@ type GPUStatus struct { Name string `json:"name,omitempty"` } +// ResourcesStatus records the resources allocated to the virtual machine. +type ResourcesStatus struct { + Disk int32 `json:"disk,omitempty"` +} + +// MachineHotUpdateStatus defines the observed state of machine hot update. +// MachineHotUpdateStatus is used in KCP and MD. +type MachineHotUpdateStatus struct { + // Total number of machines are not up to date targeted by KCP/MD. + OutdatedReplicas int32 `json:"outdatedReplicas"` + // Total number of machines are updating targeted by KCP/MD. + UpdatingReplicas int32 `json:"updatingReplicas"` +} + //+kubebuilder:object:generate=false // PatchStringValue is for patching resources. diff --git a/api/v1beta1/zz_generated.deepcopy.go b/api/v1beta1/zz_generated.deepcopy.go index d741329a..942f6431 100644 --- a/api/v1beta1/zz_generated.deepcopy.go +++ b/api/v1beta1/zz_generated.deepcopy.go @@ -248,6 +248,7 @@ func (in *ElfMachineStatus) DeepCopyInto(out *ElfMachineStatus) { *out = make([]GPUStatus, len(*in)) copy(*out, *in) } + out.Resources = in.Resources if in.FailureReason != nil { in, out := &in.FailureReason, &out.FailureReason *out = new(errors.MachineStatusError) @@ -390,6 +391,21 @@ func (in *GPUStatus) DeepCopy() *GPUStatus { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MachineResourcesHotUpdateStatus) DeepCopyInto(out *MachineResourcesHotUpdateStatus) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MachineResourcesHotUpdateStatus. +func (in *MachineResourcesHotUpdateStatus) DeepCopy() *MachineResourcesHotUpdateStatus { + if in == nil { + return nil + } + out := new(MachineResourcesHotUpdateStatus) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *NetworkDeviceRouteSpec) DeepCopyInto(out *NetworkDeviceRouteSpec) { *out = *in @@ -484,6 +500,21 @@ func (in *NetworkStatus) DeepCopy() *NetworkStatus { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ResourcesStatus) DeepCopyInto(out *ResourcesStatus) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ResourcesStatus. +func (in *ResourcesStatus) DeepCopy() *ResourcesStatus { + if in == nil { + return nil + } + out := new(ResourcesStatus) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *Tower) DeepCopyInto(out *Tower) { *out = *in diff --git a/config/crd/bases/infrastructure.cluster.x-k8s.io_elfmachines.yaml b/config/crd/bases/infrastructure.cluster.x-k8s.io_elfmachines.yaml index e00060db..100f26ff 100644 --- a/config/crd/bases/infrastructure.cluster.x-k8s.io_elfmachines.yaml +++ b/config/crd/bases/infrastructure.cluster.x-k8s.io_elfmachines.yaml @@ -403,6 +403,13 @@ spec: ready: description: Ready is true when the provider resource is ready. type: boolean + resources: + description: Resources records the resources allocated for the machine. + properties: + disk: + format: int32 + type: integer + type: object taskRef: description: TaskRef is a managed object reference to a Task related to the machine. This value is set automatically at runtime and should diff --git a/controllers/elfmachine_controller.go b/controllers/elfmachine_controller.go index bf127f4c..97ef37d8 100644 --- a/controllers/elfmachine_controller.go +++ b/controllers/elfmachine_controller.go @@ -980,15 +980,24 @@ func (r *ElfMachineReconciler) reconcileVMFailedTask(ctx *context.MachineContext if ctx.ElfMachine.RequiresGPUDevices() { unlockGPUDevicesLockedByVM(ctx.ElfCluster.Spec.Cluster, ctx.ElfMachine.Name) } + case service.IsUpdateVMDiskTask(task, ctx.ElfMachine.Name): + reason := conditions.GetReason(ctx.ElfMachine, infrav1.ResourcesHotUpdatedCondition) + if reason == infrav1.ExpandingVMDiskReason || reason == infrav1.ExpandingVMDiskFailedReason { + conditions.MarkFalse(ctx.ElfMachine, infrav1.ResourcesHotUpdatedCondition, infrav1.ExpandingVMDiskFailedReason, clusterv1.ConditionSeverityInfo, errorMessage) + } case service.IsPowerOnVMTask(task) || service.IsUpdateVMTask(task) || service.IsVMColdMigrationTask(task): if ctx.ElfMachine.RequiresGPUDevices() { unlockGPUDevicesLockedByVM(ctx.ElfCluster.Spec.Cluster, ctx.ElfMachine.Name) } + } + + switch { + case service.IsVMDuplicateError(errorMessage): + setVMDuplicate(ctx.ElfMachine.Name) case service.IsMemoryInsufficientError(errorMessage): recordElfClusterMemoryInsufficient(ctx, true) message := fmt.Sprintf("Insufficient memory detected for the ELF cluster %s", ctx.ElfCluster.Spec.Cluster) ctx.Logger.Info(message) - return errors.New(message) case service.IsPlacementGroupError(errorMessage): if err := recordPlacementGroupPolicyNotSatisfied(ctx, true); err != nil { @@ -996,7 +1005,6 @@ func (r *ElfMachineReconciler) reconcileVMFailedTask(ctx *context.MachineContext } message := "The placement group policy can not be satisfied" ctx.Logger.Info(message) - return errors.New(message) } diff --git a/controllers/elfmachine_controller_resources.go b/controllers/elfmachine_controller_resources.go index c36efe1a..e6f8e584 100644 --- a/controllers/elfmachine_controller_resources.go +++ b/controllers/elfmachine_controller_resources.go @@ -30,14 +30,9 @@ import ( "github.com/smartxworks/cluster-api-provider-elf/pkg/hostagent" "github.com/smartxworks/cluster-api-provider-elf/pkg/service" annotationsutil "github.com/smartxworks/cluster-api-provider-elf/pkg/util/annotations" - machineutil "github.com/smartxworks/cluster-api-provider-elf/pkg/util/machine" ) func (r *ElfMachineReconciler) reconcileVMResources(ctx *context.MachineContext, vm *models.VM) (bool, error) { - if !machineutil.IsUpdatingElfMachineResources(ctx.ElfMachine) { - return true, nil - } - if ok, err := r.reconcieVMVolume(ctx, vm, infrav1.ResourcesHotUpdatedCondition); err != nil || !ok { return ok, err } @@ -49,57 +44,18 @@ func (r *ElfMachineReconciler) reconcileVMResources(ctx *context.MachineContext, return false, nil } - kubeClient, err := capiremote.NewClusterClient(ctx, "", ctx.Client, client.ObjectKey{Namespace: ctx.Cluster.Namespace, Name: ctx.Cluster.Name}) - if err != nil { - return false, err - } - - var agentJob *agentv1.HostOperationJob - agentJobName := annotationsutil.HostAgentJobName(ctx.ElfMachine) - if agentJobName != "" { - agentJob, err = hostagent.GetHostJob(ctx, kubeClient, ctx.ElfMachine.Namespace, agentJobName) - if err != nil && !apierrors.IsNotFound(err) { - return false, err - } - } - if agentJob == nil { - agentJob, err = hostagent.AddNewDiskCapacityToRoot(ctx, kubeClient, ctx.ElfMachine) - if err != nil { - conditions.MarkFalse(ctx.ElfMachine, infrav1.ResourcesHotUpdatedCondition, infrav1.ExpandingRootPartitionFailedReason, clusterv1.ConditionSeverityInfo, err.Error()) - - return false, err - } - - annotationsutil.AddAnnotations(ctx.ElfMachine, map[string]string{infrav1.HostAgentJobNameAnnotation: agentJob.Name}) - - conditions.MarkFalse(ctx.ElfMachine, infrav1.ResourcesHotUpdatedCondition, infrav1.ExpandingRootPartitionReason, clusterv1.ConditionSeverityInfo, "") - - ctx.Logger.Info("Waiting for disk to be added new disk capacity to root", "hostAgentJob", agentJob.Name) - - return false, nil + if ok, err := r.expandVMRootPartition(ctx); err != nil || !ok { + return ok, err } - switch agentJob.Status.Phase { - case agentv1.PhaseSucceeded: - annotationsutil.RemoveAnnotation(ctx.ElfMachine, infrav1.HostAgentJobNameAnnotation) - conditions.MarkTrue(ctx.ElfMachine, infrav1.ResourcesHotUpdatedCondition) - ctx.Logger.Info("Add new disk capacity to root succeeded", "hostAgentJob", agentJob.Name) - case agentv1.PhaseFailed: - annotationsutil.RemoveAnnotation(ctx.ElfMachine, infrav1.HostAgentJobNameAnnotation) - conditions.MarkFalse(ctx.ElfMachine, infrav1.ResourcesHotUpdatedCondition, infrav1.ExpandingRootPartitionFailedReason, clusterv1.ConditionSeverityWarning, agentJob.Status.FailureMessage) - ctx.Logger.Info("Add new disk capacity to root failed, will try again", "hostAgentJob", agentJob.Name) - - return false, nil - default: - ctx.Logger.Info("Waiting for adding new disk capacity to root job done", "jobStatus", agentJob.Status.Phase) - - return false, nil - } + conditions.MarkTrue(ctx.ElfMachine, infrav1.ResourcesHotUpdatedCondition) return true, nil } // reconcieVMVolume ensures that the vm disk size is as expected. +// +// The conditionType param: VMProvisionedCondition/ResourcesHotUpdatedCondition. func (r *ElfMachineReconciler) reconcieVMVolume(ctx *context.MachineContext, vm *models.VM, conditionType clusterv1.ConditionType) (bool, error) { vmDiskIDs := make([]string, len(vm.VMDisks)) for i := 0; i < len(vm.VMDisks); i++ { @@ -108,6 +64,8 @@ func (r *ElfMachineReconciler) reconcieVMVolume(ctx *context.MachineContext, vm vmDisks, err := ctx.VMService.GetVMDisks(vmDiskIDs) if err != nil { + return false, errors.Wrapf(err, "failed to get disks for vm %s/%s", *vm.ID, *vm.Name) + } else if len(vmDisks) == 0 { return false, errors.Errorf("no disks found for vm %s/%s", *vm.ID, *vm.Name) } @@ -116,18 +74,13 @@ func (r *ElfMachineReconciler) reconcieVMVolume(ctx *context.MachineContext, vm return false, err } - diskSize := service.TowerDisk(ctx.ElfMachine.Spec.DiskGiB) - if *diskSize > *vmVolume.Size { - if service.IsTowerResourcePerformingAnOperation(vmVolume.EntityAsyncStatus) { - ctx.Logger.Info("Waiting for vm volume task done", "volume", fmt.Sprintf("%s/%s", *vmVolume.ID, *vmVolume.Name)) + diskSize := service.ByteToGiB(*vmVolume.Size) + ctx.ElfMachine.Status.Resources.Disk = diskSize - return false, nil - } - - return false, r.resizeVMVolume(ctx, vmVolume, *diskSize, conditionType) - } else if *diskSize < *vmVolume.Size { - conditions.MarkTrue(ctx.ElfMachine, infrav1.ResourcesHotUpdatedCondition) - ctx.Logger.Info(fmt.Sprintf("Current disk capacity is larger than expected, skipping expand vm volume %s/%s", *vmVolume.ID, *vmVolume.Name), "currentSize", *vmVolume.Size, "expectedSize", *diskSize) + if ctx.ElfMachine.Spec.DiskGiB < diskSize { + ctx.Logger.V(3).Info(fmt.Sprintf("Current disk capacity is larger than expected, skipping expand vm volume %s/%s", *vmVolume.ID, *vmVolume.Name), "currentSize", diskSize, "expectedSize", ctx.ElfMachine.Spec.DiskGiB) + } else if ctx.ElfMachine.Spec.DiskGiB > diskSize { + return false, r.resizeVMVolume(ctx, vmVolume, *service.TowerDisk(ctx.ElfMachine.Spec.DiskGiB), conditionType) } return true, nil @@ -135,18 +88,84 @@ func (r *ElfMachineReconciler) reconcieVMVolume(ctx *context.MachineContext, vm // resizeVMVolume sets the volume to the specified size. func (r *ElfMachineReconciler) resizeVMVolume(ctx *context.MachineContext, vmVolume *models.VMVolume, diskSize int64, conditionType clusterv1.ConditionType) error { + reason := conditions.GetReason(ctx.ElfMachine, conditionType) + if reason == "" || + (reason != infrav1.ExpandingVMDiskReason && reason != infrav1.ExpandingVMDiskFailedReason) { + conditions.MarkFalse(ctx.ElfMachine, conditionType, infrav1.ExpandingVMDiskReason, clusterv1.ConditionSeverityInfo, "") + + // Save the conditionType first, and then expand the disk capacity. + // This prevents the disk expansion from succeeding but failing to save the + // conditionType, causing ElfMachine to not record the conditionType. + return nil + } + + if service.IsTowerResourcePerformingAnOperation(vmVolume.EntityAsyncStatus) { + ctx.Logger.Info("Waiting for vm volume task done", "volume", fmt.Sprintf("%s/%s", *vmVolume.ID, *vmVolume.Name)) + + return nil + } + withTaskVMVolume, err := ctx.VMService.ResizeVMVolume(*vmVolume.ID, diskSize) if err != nil { - conditions.MarkFalse(ctx.ElfMachine, conditionType, infrav1.ExpandingVMDiskReason, clusterv1.ConditionSeverityWarning, err.Error()) + conditions.MarkFalse(ctx.ElfMachine, conditionType, infrav1.ExpandingVMDiskFailedReason, clusterv1.ConditionSeverityWarning, err.Error()) return errors.Wrapf(err, "failed to trigger expand size from %d to %d for vm volume %s/%s", *vmVolume.Size, diskSize, *vmVolume.ID, *vmVolume.Name) } - conditions.MarkFalse(ctx.ElfMachine, conditionType, infrav1.ExpandingVMDiskFailedReason, clusterv1.ConditionSeverityInfo, "") - ctx.ElfMachine.SetTask(*withTaskVMVolume.TaskID) ctx.Logger.Info(fmt.Sprintf("Waiting for the vm volume %s/%s to be expanded", *vmVolume.ID, *vmVolume.Name), "taskRef", ctx.ElfMachine.Status.TaskRef, "oldSize", *vmVolume.Size, "newSize", diskSize) return nil } + +func (r *ElfMachineReconciler) expandVMRootPartition(ctx *context.MachineContext) (bool, error) { + reason := conditions.GetReason(ctx.ElfMachine, infrav1.ResourcesHotUpdatedCondition) + if reason == "" { + return true, nil + } else if reason != infrav1.ExpandingVMDiskReason && + reason != infrav1.ExpandingVMDiskFailedReason && + reason != infrav1.ExpandingRootPartitionReason && + reason != infrav1.ExpandingRootPartitionFailedReason { + return true, nil + } + + kubeClient, err := capiremote.NewClusterClient(ctx, "", ctx.Client, client.ObjectKey{Namespace: ctx.Cluster.Namespace, Name: ctx.Cluster.Name}) + if err != nil { + return false, err + } + var agentJob *agentv1.HostOperationJob + agentJobName := annotationsutil.HostAgentJobName(ctx.ElfMachine) + if agentJobName != "" { + agentJob, err = hostagent.GetHostJob(ctx, kubeClient, ctx.ElfMachine.Namespace, agentJobName) + if err != nil && !apierrors.IsNotFound(err) { + return false, err + } + } + if agentJob == nil { + agentJob, err = hostagent.AddNewDiskCapacityToRoot(ctx, kubeClient, ctx.ElfMachine) + if err != nil { + conditions.MarkFalse(ctx.ElfMachine, infrav1.ResourcesHotUpdatedCondition, infrav1.ExpandingRootPartitionFailedReason, clusterv1.ConditionSeverityInfo, err.Error()) + return false, err + } + annotationsutil.AddAnnotations(ctx.ElfMachine, map[string]string{infrav1.HostAgentJobNameAnnotation: agentJob.Name}) + conditions.MarkFalse(ctx.ElfMachine, infrav1.ResourcesHotUpdatedCondition, infrav1.ExpandingRootPartitionReason, clusterv1.ConditionSeverityInfo, "") + ctx.Logger.Info("Waiting for disk to be added new disk capacity to root", "hostAgentJob", agentJob.Name) + return false, nil + } + switch agentJob.Status.Phase { + case agentv1.PhaseSucceeded: + annotationsutil.RemoveAnnotation(ctx.ElfMachine, infrav1.HostAgentJobNameAnnotation) + ctx.Logger.Info("Add new disk capacity to root succeeded", "hostAgentJob", agentJob.Name) + case agentv1.PhaseFailed: + annotationsutil.RemoveAnnotation(ctx.ElfMachine, infrav1.HostAgentJobNameAnnotation) + conditions.MarkFalse(ctx.ElfMachine, infrav1.ResourcesHotUpdatedCondition, infrav1.ExpandingRootPartitionFailedReason, clusterv1.ConditionSeverityWarning, agentJob.Status.FailureMessage) + ctx.Logger.Info("Add new disk capacity to root failed, will try again", "hostAgentJob", agentJob.Name, "failureMessage", agentJob.Status.FailureMessage) + return false, nil + default: + ctx.Logger.Info("Waiting for adding new disk capacity to root partition job done", "jobStatus", agentJob.Status.Phase) + return false, nil + } + + return true, nil +} diff --git a/controllers/elfmachinetemplate_controller.go b/controllers/elfmachinetemplate_controller.go index e52139ed..7167ff7b 100644 --- a/controllers/elfmachinetemplate_controller.go +++ b/controllers/elfmachinetemplate_controller.go @@ -44,6 +44,7 @@ import ( "github.com/smartxworks/cluster-api-provider-elf/pkg/config" "github.com/smartxworks/cluster-api-provider-elf/pkg/context" "github.com/smartxworks/cluster-api-provider-elf/pkg/service" + annotationsutil "github.com/smartxworks/cluster-api-provider-elf/pkg/util/annotations" kcputil "github.com/smartxworks/cluster-api-provider-elf/pkg/util/kcp" machineutil "github.com/smartxworks/cluster-api-provider-elf/pkg/util/machine" mdutil "github.com/smartxworks/cluster-api-provider-elf/pkg/util/md" @@ -213,7 +214,27 @@ func (r *ElfMachineTemplateReconciler) reconcileCPResources(ctx *context.Machine updatingResourcesElfMachines, needUpdatedResourcesElfMachines, err := selectResourcesNotUpToDateElfMachines(ctx, ctx.ElfMachineTemplate, elfMachines) if err != nil { return false, err - } else if len(updatingResourcesElfMachines) == 0 && len(needUpdatedResourcesElfMachines) == 0 { + } + + patchHelper, err := patch.NewHelper(&kcp, r.Client) + if err != nil { + return false, errors.Wrapf(err, "failed to init patch helper for %s %s/%s", kcp.GroupVersionKind(), kcp.Namespace, kcp.Name) + } + + defer func() { + if err := patchHelper.Patch(r, &kcp); err != nil { + ctx.Logger.Error(err, fmt.Sprintf("failed to patch KCP %s to set MachineHotUpdateStatus", kcp.Name)) + } + }() + + if err := annotationsutil.SetMachineHotUpdateStatus(&kcp, &infrav1.MachineHotUpdateStatus{ + OutdatedReplicas: int32(len(needUpdatedResourcesElfMachines) + len(updatingResourcesElfMachines)), + UpdatingReplicas: int32(len(updatingResourcesElfMachines)), + }); err != nil { + return false, errors.Wrapf(err, "failed to set MachineHotUpdateStatus for kcp %s", kcp.Name) + } + + if len(updatingResourcesElfMachines) == 0 && len(needUpdatedResourcesElfMachines) == 0 { return true, nil } @@ -350,7 +371,27 @@ func (r *ElfMachineTemplateReconciler) reconcileWorkerResourcesForMD(ctx *contex updatingResourcesElfMachines, needUpdatedResourcesElfMachines, err := selectResourcesNotUpToDateElfMachines(ctx, ctx.ElfMachineTemplate, elfMachines) if err != nil { return false, err - } else if len(updatingResourcesElfMachines) == 0 && len(needUpdatedResourcesElfMachines) == 0 { + } + + patchHelper, err := patch.NewHelper(md, r.Client) + if err != nil { + return false, errors.Wrapf(err, "failed to init patch helper for %s %s/%s", md.GroupVersionKind(), md.Namespace, md.Name) + } + + defer func() { + if err := patchHelper.Patch(r, md); err != nil { + ctx.Logger.Error(err, fmt.Sprintf("failed to patch md %s to set MachineHotUpdateStatus", md.Name)) + } + }() + + if err := annotationsutil.SetMachineHotUpdateStatus(md, &infrav1.MachineHotUpdateStatus{ + OutdatedReplicas: int32(len(needUpdatedResourcesElfMachines) + len(updatingResourcesElfMachines)), + UpdatingReplicas: int32(len(updatingResourcesElfMachines)), + }); err != nil { + return false, errors.Wrapf(err, "failed to set MachineHotUpdateStatus for md %s", md.Name) + } + + if len(updatingResourcesElfMachines) == 0 && len(needUpdatedResourcesElfMachines) == 0 { return true, nil } diff --git a/pkg/service/util.go b/pkg/service/util.go index 79ef3cb6..08f76174 100644 --- a/pkg/service/util.go +++ b/pkg/service/util.go @@ -150,6 +150,10 @@ func TowerCPUSockets(vCPU, cpuCores int32) *int32 { return &cpuSockets } +func ByteToGiB(bytes int64) int32 { + return int32(bytes / 1024 / 1024 / 1024) +} + func IsVMInRecycleBin(vm *models.VM) bool { return vm.InRecycleBin != nil && *vm.InRecycleBin } @@ -198,6 +202,10 @@ func IsUpdateVMTask(task *models.Task) bool { return strings.Contains(GetTowerString(task.Description), "Edit VM") } +func IsUpdateVMDiskTask(task *models.Task, vmName string) bool { + return GetTowerString(task.Description) == fmt.Sprintf("Edit VM %s disk", vmName) +} + func IsVMColdMigrationTask(task *models.Task) bool { return strings.Contains(GetTowerString(task.Description), "performing a cold migration") } diff --git a/pkg/util/annotations/helpers.go b/pkg/util/annotations/helpers.go index 04988953..e56d3827 100644 --- a/pkg/util/annotations/helpers.go +++ b/pkg/util/annotations/helpers.go @@ -17,6 +17,8 @@ limitations under the License. package annotations import ( + "encoding/json" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" "sigs.k8s.io/cluster-api/util/annotations" @@ -71,6 +73,42 @@ func GetTemplateClonedFromName(o metav1.Object) string { return annotations[clusterv1.TemplateClonedFromNameAnnotation] } +func GetMachineHotUpdateStatus(o metav1.Object) (*infrav1.MachineHotUpdateStatus, error) { + annotations := o.GetAnnotations() + if annotations == nil { + return nil, nil + } + + hotUpdateStatusAnnotation := annotations[infrav1.MachineHotUpdateStatusAnnotation] + if hotUpdateStatusAnnotation == "" { + return nil, nil + } + + hotUpdateStatus := &infrav1.MachineHotUpdateStatus{} + if err := json.Unmarshal([]byte(hotUpdateStatusAnnotation), hotUpdateStatus); err != nil { + return nil, err + } + + return hotUpdateStatus, nil +} + +func SetMachineHotUpdateStatus(o metav1.Object, hotUpdateStatus *infrav1.MachineHotUpdateStatus) error { + if hotUpdateStatus.OutdatedReplicas == 0 && hotUpdateStatus.UpdatingReplicas == 0 { + RemoveAnnotation(o, infrav1.MachineHotUpdateStatusAnnotation) + + return nil + } + + bs, err := json.Marshal(hotUpdateStatus) + if err != nil { + return err + } + + annotations.AddAnnotations(o, map[string]string{infrav1.MachineHotUpdateStatusAnnotation: string(bs)}) + + return nil +} + // AddAnnotations sets the desired annotations on the object and returns true if the annotations have changed. func AddAnnotations(o metav1.Object, desired map[string]string) bool { return annotations.AddAnnotations(o, desired)