diff --git a/api/v1beta1/consts.go b/api/v1beta1/consts.go index 769b1d02..9848dfcc 100644 --- a/api/v1beta1/consts.go +++ b/api/v1beta1/consts.go @@ -40,9 +40,6 @@ const ( // 1. ${Tower username}@${Tower auth_config_id}, e.g. caas.smartx@7e98ecbb-779e-43f6-8330-1bc1d29fffc7. // 2. ${Tower username}, e.g. root. If auth_config_id is not set, it means it is a LOCAL user. CreatedByAnnotation = "cape.infrastructure.cluster.x-k8s.io/created-by" - - // HostAgentJobNameAnnotation is the annotation identifying the name of HostOperationJob. - HostAgentJobNameAnnotation = "cape.infrastructure.cluster.x-k8s.io/host-agent-job-name" ) // Labels. diff --git a/api/v1beta1/elfmachine_types.go b/api/v1beta1/elfmachine_types.go index a7214fdf..85a106a2 100644 --- a/api/v1beta1/elfmachine_types.go +++ b/api/v1beta1/elfmachine_types.go @@ -124,6 +124,10 @@ type ElfMachineStatus struct { // +optional GPUDevices []GPUStatus `json:"gpuDevices,omitempty"` + // Resources records the resources allocated for the machine. + // +optional + Resources ResourcesStatus `json:"resources,omitempty"` + // FailureReason will be set in the event that there is a terminal problem // reconciling the Machine and will contain a succinct value suitable // for machine interpretation. @@ -241,6 +245,11 @@ func (m *ElfMachine) IsFailed() bool { return m.Status.FailureReason != nil || m.Status.FailureMessage != nil } +// IsResourcesUpToDate returns whether the machine's resources are as expected. +func (m *ElfMachine) IsResourcesUpToDate() bool { + return m.Spec.DiskGiB == m.Status.Resources.Disk +} + func (m *ElfMachine) SetVMDisconnectionTimestamp(timestamp *metav1.Time) { if m.Annotations == nil { m.Annotations = make(map[string]string) diff --git a/api/v1beta1/types.go b/api/v1beta1/types.go index e18cf5ab..6c5154cb 100644 --- a/api/v1beta1/types.go +++ b/api/v1beta1/types.go @@ -196,6 +196,11 @@ type GPUStatus struct { Name string `json:"name,omitempty"` } +// ResourcesStatus records the resources allocated to the virtual machine. +type ResourcesStatus struct { + Disk int32 `json:"disk,omitempty"` +} + //+kubebuilder:object:generate=false // PatchStringValue is for patching resources. diff --git a/api/v1beta1/zz_generated.deepcopy.go b/api/v1beta1/zz_generated.deepcopy.go index d741329a..92ee1f3c 100644 --- a/api/v1beta1/zz_generated.deepcopy.go +++ b/api/v1beta1/zz_generated.deepcopy.go @@ -248,6 +248,7 @@ func (in *ElfMachineStatus) DeepCopyInto(out *ElfMachineStatus) { *out = make([]GPUStatus, len(*in)) copy(*out, *in) } + out.Resources = in.Resources if in.FailureReason != nil { in, out := &in.FailureReason, &out.FailureReason *out = new(errors.MachineStatusError) @@ -484,6 +485,21 @@ func (in *NetworkStatus) DeepCopy() *NetworkStatus { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ResourcesStatus) DeepCopyInto(out *ResourcesStatus) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ResourcesStatus. +func (in *ResourcesStatus) DeepCopy() *ResourcesStatus { + if in == nil { + return nil + } + out := new(ResourcesStatus) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *Tower) DeepCopyInto(out *Tower) { *out = *in diff --git a/config/crd/bases/infrastructure.cluster.x-k8s.io_elfmachines.yaml b/config/crd/bases/infrastructure.cluster.x-k8s.io_elfmachines.yaml index e00060db..100f26ff 100644 --- a/config/crd/bases/infrastructure.cluster.x-k8s.io_elfmachines.yaml +++ b/config/crd/bases/infrastructure.cluster.x-k8s.io_elfmachines.yaml @@ -403,6 +403,13 @@ spec: ready: description: Ready is true when the provider resource is ready. type: boolean + resources: + description: Resources records the resources allocated for the machine. + properties: + disk: + format: int32 + type: integer + type: object taskRef: description: TaskRef is a managed object reference to a Task related to the machine. This value is set automatically at runtime and should diff --git a/config/webhook/manifests.yaml b/config/webhook/manifests.yaml index c1cc6caf..8baf195d 100644 --- a/config/webhook/manifests.yaml +++ b/config/webhook/manifests.yaml @@ -49,6 +49,26 @@ kind: ValidatingWebhookConfiguration metadata: name: validating-webhook-configuration webhooks: +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: webhook-service + namespace: system + path: /validate-infrastructure-cluster-x-k8s-io-v1beta1-elfmachine + failurePolicy: Fail + name: validation.elfmachine.infrastructure.x-k8s.io + rules: + - apiGroups: + - infrastructure.cluster.x-k8s.io + apiVersions: + - v1beta1 + operations: + - CREATE + - UPDATE + resources: + - elfmachines + sideEffects: None - admissionReviewVersions: - v1 clientConfig: diff --git a/controllers/elfmachine_controller.go b/controllers/elfmachine_controller.go index ac1a04f4..5c561475 100644 --- a/controllers/elfmachine_controller.go +++ b/controllers/elfmachine_controller.go @@ -990,9 +990,18 @@ func (r *ElfMachineReconciler) reconcileVMFailedTask(ctx *context.MachineContext case service.IsCloneVMTask(task): releaseTicketForCreateVM(ctx.ElfMachine.Name) + if service.IsVMDuplicateError(errorMessage) { + setVMDuplicate(ctx.ElfMachine.Name) + } + if ctx.ElfMachine.RequiresGPUDevices() { unlockGPUDevicesLockedByVM(ctx.ElfCluster.Spec.Cluster, ctx.ElfMachine.Name) } + case service.IsUpdateVMDiskTask(task, ctx.ElfMachine.Name): + reason := conditions.GetReason(ctx.ElfMachine, infrav1.ResourcesHotUpdatedCondition) + if reason == infrav1.ExpandingVMDiskReason || reason == infrav1.ExpandingVMDiskFailedReason { + conditions.MarkFalse(ctx.ElfMachine, infrav1.ResourcesHotUpdatedCondition, infrav1.ExpandingVMDiskFailedReason, clusterv1.ConditionSeverityInfo, errorMessage) + } case service.IsPowerOnVMTask(task) || service.IsUpdateVMTask(task) || service.IsVMColdMigrationTask(task): if ctx.ElfMachine.RequiresGPUDevices() { unlockGPUDevicesLockedByVM(ctx.ElfCluster.Spec.Cluster, ctx.ElfMachine.Name) diff --git a/controllers/elfmachine_controller_resources.go b/controllers/elfmachine_controller_resources.go index c36efe1a..d6aa0d6f 100644 --- a/controllers/elfmachine_controller_resources.go +++ b/controllers/elfmachine_controller_resources.go @@ -15,6 +15,7 @@ package controllers import ( "fmt" + "time" "github.com/pkg/errors" "github.com/smartxworks/cloudtower-go-sdk/v2/models" @@ -29,77 +30,36 @@ import ( "github.com/smartxworks/cluster-api-provider-elf/pkg/context" "github.com/smartxworks/cluster-api-provider-elf/pkg/hostagent" "github.com/smartxworks/cluster-api-provider-elf/pkg/service" - annotationsutil "github.com/smartxworks/cluster-api-provider-elf/pkg/util/annotations" machineutil "github.com/smartxworks/cluster-api-provider-elf/pkg/util/machine" ) func (r *ElfMachineReconciler) reconcileVMResources(ctx *context.MachineContext, vm *models.VM) (bool, error) { - if !machineutil.IsUpdatingElfMachineResources(ctx.ElfMachine) { - return true, nil - } - if ok, err := r.reconcieVMVolume(ctx, vm, infrav1.ResourcesHotUpdatedCondition); err != nil || !ok { return ok, err } // Agent needs to wait for the node exists before it can run and execute commands. - if ctx.Machine.Status.Phase != string(clusterv1.MachinePhaseRunning) { - ctx.Logger.Info("Waiting for node exists for host agent running", "phase", ctx.Machine.Status.Phase) + if machineutil.IsUpdatingElfMachineResources(ctx.ElfMachine) && + ctx.Machine.Status.NodeInfo == nil { + ctx.Logger.Info("Waiting for node exists for host agent expand vm root partition") return false, nil } - kubeClient, err := capiremote.NewClusterClient(ctx, "", ctx.Client, client.ObjectKey{Namespace: ctx.Cluster.Namespace, Name: ctx.Cluster.Name}) - if err != nil { - return false, err - } - - var agentJob *agentv1.HostOperationJob - agentJobName := annotationsutil.HostAgentJobName(ctx.ElfMachine) - if agentJobName != "" { - agentJob, err = hostagent.GetHostJob(ctx, kubeClient, ctx.ElfMachine.Namespace, agentJobName) - if err != nil && !apierrors.IsNotFound(err) { - return false, err - } - } - if agentJob == nil { - agentJob, err = hostagent.AddNewDiskCapacityToRoot(ctx, kubeClient, ctx.ElfMachine) - if err != nil { - conditions.MarkFalse(ctx.ElfMachine, infrav1.ResourcesHotUpdatedCondition, infrav1.ExpandingRootPartitionFailedReason, clusterv1.ConditionSeverityInfo, err.Error()) - - return false, err - } - - annotationsutil.AddAnnotations(ctx.ElfMachine, map[string]string{infrav1.HostAgentJobNameAnnotation: agentJob.Name}) - - conditions.MarkFalse(ctx.ElfMachine, infrav1.ResourcesHotUpdatedCondition, infrav1.ExpandingRootPartitionReason, clusterv1.ConditionSeverityInfo, "") - - ctx.Logger.Info("Waiting for disk to be added new disk capacity to root", "hostAgentJob", agentJob.Name) - - return false, nil + if ok, err := r.expandVMRootPartition(ctx); err != nil || !ok { + return ok, err } - switch agentJob.Status.Phase { - case agentv1.PhaseSucceeded: - annotationsutil.RemoveAnnotation(ctx.ElfMachine, infrav1.HostAgentJobNameAnnotation) + if machineutil.IsUpdatingElfMachineResources(ctx.ElfMachine) { conditions.MarkTrue(ctx.ElfMachine, infrav1.ResourcesHotUpdatedCondition) - ctx.Logger.Info("Add new disk capacity to root succeeded", "hostAgentJob", agentJob.Name) - case agentv1.PhaseFailed: - annotationsutil.RemoveAnnotation(ctx.ElfMachine, infrav1.HostAgentJobNameAnnotation) - conditions.MarkFalse(ctx.ElfMachine, infrav1.ResourcesHotUpdatedCondition, infrav1.ExpandingRootPartitionFailedReason, clusterv1.ConditionSeverityWarning, agentJob.Status.FailureMessage) - ctx.Logger.Info("Add new disk capacity to root failed, will try again", "hostAgentJob", agentJob.Name) - - return false, nil - default: - ctx.Logger.Info("Waiting for adding new disk capacity to root job done", "jobStatus", agentJob.Status.Phase) - - return false, nil } return true, nil } // reconcieVMVolume ensures that the vm disk size is as expected. +// +// The conditionType param: VMProvisionedCondition/ResourcesHotUpdatedCondition. func (r *ElfMachineReconciler) reconcieVMVolume(ctx *context.MachineContext, vm *models.VM, conditionType clusterv1.ConditionType) (bool, error) { vmDiskIDs := make([]string, len(vm.VMDisks)) for i := 0; i < len(vm.VMDisks); i++ { @@ -108,6 +68,8 @@ func (r *ElfMachineReconciler) reconcieVMVolume(ctx *context.MachineContext, vm vmDisks, err := ctx.VMService.GetVMDisks(vmDiskIDs) if err != nil { + return false, errors.Wrapf(err, "failed to get disks for vm %s/%s", *vm.ID, *vm.Name) + } else if len(vmDisks) == 0 { return false, errors.Errorf("no disks found for vm %s/%s", *vm.ID, *vm.Name) } @@ -116,18 +78,13 @@ func (r *ElfMachineReconciler) reconcieVMVolume(ctx *context.MachineContext, vm return false, err } - diskSize := service.TowerDisk(ctx.ElfMachine.Spec.DiskGiB) - if *diskSize > *vmVolume.Size { - if service.IsTowerResourcePerformingAnOperation(vmVolume.EntityAsyncStatus) { - ctx.Logger.Info("Waiting for vm volume task done", "volume", fmt.Sprintf("%s/%s", *vmVolume.ID, *vmVolume.Name)) + diskSize := service.ByteToGiB(*vmVolume.Size) + ctx.ElfMachine.Status.Resources.Disk = diskSize - return false, nil - } - - return false, r.resizeVMVolume(ctx, vmVolume, *diskSize, conditionType) - } else if *diskSize < *vmVolume.Size { - conditions.MarkTrue(ctx.ElfMachine, infrav1.ResourcesHotUpdatedCondition) - ctx.Logger.Info(fmt.Sprintf("Current disk capacity is larger than expected, skipping expand vm volume %s/%s", *vmVolume.ID, *vmVolume.Name), "currentSize", *vmVolume.Size, "expectedSize", *diskSize) + if ctx.ElfMachine.Spec.DiskGiB < diskSize { + ctx.Logger.V(3).Info(fmt.Sprintf("Current disk capacity is larger than expected, skipping expand vm volume %s/%s", *vmVolume.ID, *vmVolume.Name), "currentSize", diskSize, "expectedSize", ctx.ElfMachine.Spec.DiskGiB) + } else if ctx.ElfMachine.Spec.DiskGiB > diskSize { + return false, r.resizeVMVolume(ctx, vmVolume, *service.TowerDisk(ctx.ElfMachine.Spec.DiskGiB), conditionType) } return true, nil @@ -135,18 +92,100 @@ func (r *ElfMachineReconciler) reconcieVMVolume(ctx *context.MachineContext, vm // resizeVMVolume sets the volume to the specified size. func (r *ElfMachineReconciler) resizeVMVolume(ctx *context.MachineContext, vmVolume *models.VMVolume, diskSize int64, conditionType clusterv1.ConditionType) error { + reason := conditions.GetReason(ctx.ElfMachine, conditionType) + if reason == "" || + (reason != infrav1.ExpandingVMDiskReason && reason != infrav1.ExpandingVMDiskFailedReason) { + conditions.MarkFalse(ctx.ElfMachine, conditionType, infrav1.ExpandingVMDiskReason, clusterv1.ConditionSeverityInfo, "") + + // Save the conditionType first, and then expand the disk capacity. + // This prevents the disk expansion from succeeding but failing to save the + // conditionType, causing ElfMachine to not record the conditionType. + return nil + } + + if service.IsTowerResourcePerformingAnOperation(vmVolume.EntityAsyncStatus) { + ctx.Logger.Info("Waiting for vm volume task done", "volume", fmt.Sprintf("%s/%s", *vmVolume.ID, *vmVolume.Name)) + + return nil + } + withTaskVMVolume, err := ctx.VMService.ResizeVMVolume(*vmVolume.ID, diskSize) if err != nil { - conditions.MarkFalse(ctx.ElfMachine, conditionType, infrav1.ExpandingVMDiskReason, clusterv1.ConditionSeverityWarning, err.Error()) + conditions.MarkFalse(ctx.ElfMachine, conditionType, infrav1.ExpandingVMDiskFailedReason, clusterv1.ConditionSeverityWarning, err.Error()) return errors.Wrapf(err, "failed to trigger expand size from %d to %d for vm volume %s/%s", *vmVolume.Size, diskSize, *vmVolume.ID, *vmVolume.Name) } - conditions.MarkFalse(ctx.ElfMachine, conditionType, infrav1.ExpandingVMDiskFailedReason, clusterv1.ConditionSeverityInfo, "") - ctx.ElfMachine.SetTask(*withTaskVMVolume.TaskID) ctx.Logger.Info(fmt.Sprintf("Waiting for the vm volume %s/%s to be expanded", *vmVolume.ID, *vmVolume.Name), "taskRef", ctx.ElfMachine.Status.TaskRef, "oldSize", *vmVolume.Size, "newSize", diskSize) return nil } + +// expandVMRootPartition adds new disk capacity to root partition. +func (r *ElfMachineReconciler) expandVMRootPartition(ctx *context.MachineContext) (bool, error) { + reason := conditions.GetReason(ctx.ElfMachine, infrav1.ResourcesHotUpdatedCondition) + if reason == "" { + return true, nil + } else if reason != infrav1.ExpandingVMDiskReason && + reason != infrav1.ExpandingVMDiskFailedReason && + reason != infrav1.ExpandingRootPartitionReason && + reason != infrav1.ExpandingRootPartitionFailedReason { + return true, nil + } + + if reason != infrav1.ExpandingRootPartitionFailedReason { + conditions.MarkFalse(ctx.ElfMachine, infrav1.ResourcesHotUpdatedCondition, infrav1.ExpandingRootPartitionReason, clusterv1.ConditionSeverityInfo, "") + } + + kubeClient, err := capiremote.NewClusterClient(ctx, "", ctx.Client, client.ObjectKey{Namespace: ctx.Cluster.Namespace, Name: ctx.Cluster.Name}) + if err != nil { + return false, err + } + + agentJob, err := hostagent.GetHostJob(ctx, kubeClient, ctx.ElfMachine.Namespace, hostagent.GetExpandRootPartitionJobName(ctx.ElfMachine)) + if err != nil && !apierrors.IsNotFound(err) { + return false, err + } + + if agentJob == nil { + agentJob, err = hostagent.ExpandRootPartition(ctx, kubeClient, ctx.ElfMachine) + if err != nil { + conditions.MarkFalse(ctx.ElfMachine, infrav1.ResourcesHotUpdatedCondition, infrav1.ExpandingRootPartitionFailedReason, clusterv1.ConditionSeverityInfo, err.Error()) + + return false, err + } + + ctx.Logger.Info("Waiting for expanding root partition", "hostAgentJob", agentJob.Name) + + return false, nil + } + + switch agentJob.Status.Phase { + case agentv1.PhaseSucceeded: + ctx.Logger.Info("Expand root partition to root succeeded", "hostAgentJob", agentJob.Name) + case agentv1.PhaseFailed: + conditions.MarkFalse(ctx.ElfMachine, infrav1.ResourcesHotUpdatedCondition, infrav1.ExpandingRootPartitionFailedReason, clusterv1.ConditionSeverityWarning, agentJob.Status.FailureMessage) + ctx.Logger.Info("Expand root partition failed, will try again after two minutes", "hostAgentJob", agentJob.Name, "failureMessage", agentJob.Status.FailureMessage) + + lastExecutionTime := agentJob.Status.LastExecutionTime + if lastExecutionTime == nil { + lastExecutionTime = &agentJob.CreationTimestamp + } + // Two minutes after the job fails, delete the job and try again. + if time.Now().After(lastExecutionTime.Add(2 * time.Minute)) { + if err := kubeClient.Delete(ctx, agentJob); err != nil { + return false, errors.Wrapf(err, "failed to delete expand root partition job %s/%s for retry", agentJob.Namespace, agentJob.Name) + } + } + + return false, nil + default: + ctx.Logger.Info("Waiting for expanding root partition job done", "hostAgentJob", agentJob.Name, "jobStatus", agentJob.Status.Phase) + + return false, nil + } + + return true, nil +} diff --git a/main.go b/main.go index df55ddf2..1544669c 100644 --- a/main.go +++ b/main.go @@ -200,6 +200,12 @@ func main() { return err } + if err := (&webhooks.ElfMachineValidator{ + Client: mgr.GetClient(), + }).SetupWebhookWithManager(mgr); err != nil { + return err + } + if err := (&webhooks.ElfMachineMutation{ Client: mgr.GetClient(), Logger: mgr.GetLogger().WithName("ElfMachineMutation"), diff --git a/pkg/cloudinit/expand_root_partition b/pkg/cloudinit/expand_root_partition index e2151f7d..abbdbbc3 100644 --- a/pkg/cloudinit/expand_root_partition +++ b/pkg/cloudinit/expand_root_partition @@ -1,5 +1,29 @@ runcmd: - - "growpart /dev/vda 2" + - | + ostype=$(cat /etc/os-release | grep "^ID=" | cut -d "=" -f 2) + rootpath="/dev/mapper/rl-root" + if [[ $ostype == '"openEuler"' ]]; then + rootpath="/dev/mapper/openeuler-root" + fi + - | + result=$(growpart /dev/vda 2) + if [[ $? == 0 ]]; then + echo "$result" + elif [[ $result == NOCHANGE* ]]; then + echo "$result" + else + echo "$result" + exit 1 + fi - "pvresize /dev/vda2" - - "lvextend -l+100%FREE -n /dev/mapper/rl-root" - - "resize2fs /dev/mapper/rl-root" \ No newline at end of file + - | + result=$(lvextend -l+100%FREE -n $rootpath 2>&1) + if [[ $? == 0 ]]; then + echo "$result" + elif [[ $result == *'matches existing size'* ]]; then + echo "$result" + else + echo "$result" + exit 1 + fi + - "resize2fs $rootpath" \ No newline at end of file diff --git a/pkg/hostagent/service.go b/pkg/hostagent/service.go index ed329333..e8c3feee 100644 --- a/pkg/hostagent/service.go +++ b/pkg/hostagent/service.go @@ -20,7 +20,6 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" apitypes "k8s.io/apimachinery/pkg/types" - capiutil "sigs.k8s.io/cluster-api/util" "sigs.k8s.io/controller-runtime/pkg/client" agentv1 "github.com/smartxworks/cluster-api-provider-elf/api/v1alpha1" @@ -42,10 +41,16 @@ func GetHostJob(ctx goctx.Context, c client.Client, namespace, name string) (*ag return &restartKubeletJob, nil } -func AddNewDiskCapacityToRoot(ctx goctx.Context, c client.Client, elfMachine *infrav1.ElfMachine) (*agentv1.HostOperationJob, error) { +// GetExpandRootPartitionJobName return the expand root partition job name. +// The same disk expansion uses the same job name to reduce duplicate jobs. +func GetExpandRootPartitionJobName(elfMachine *infrav1.ElfMachine) string { + return fmt.Sprintf("cape-expand-root-partition-%s-%d", elfMachine.Name, elfMachine.Spec.DiskGiB) +} + +func ExpandRootPartition(ctx goctx.Context, c client.Client, elfMachine *infrav1.ElfMachine) (*agentv1.HostOperationJob, error) { agentJob := &agentv1.HostOperationJob{ ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf("cape-expand-root-rartition-%s-%s", elfMachine.Name, capiutil.RandomString(6)), + Name: GetExpandRootPartitionJobName(elfMachine), Namespace: elfMachine.Namespace, }, Spec: agentv1.HostOperationJobSpec{ diff --git a/pkg/hostagent/tasks/expand_root_partition.yaml b/pkg/hostagent/tasks/expand_root_partition.yaml index 8bc001be..18783f3a 100644 --- a/pkg/hostagent/tasks/expand_root_partition.yaml +++ b/pkg/hostagent/tasks/expand_root_partition.yaml @@ -4,11 +4,38 @@ become: true gather_facts: false tasks: + - name: Get root path + shell: | + ostype=$(cat /etc/os-release | grep "^ID=" | cut -d "=" -f 2) + rootpath="/dev/mapper/rl-root" + if [[ $ostype == '"openEuler"' ]]; then + rootpath="/dev/mapper/openeuler-root" + fi + echo $rootpath + register: rootpath - name: Grow vda2 - shell: growpart /dev/vda 2 + shell: | + result=$(growpart /dev/vda 20) + if [[ $? == 0 ]]; then + echo "$result" + elif [[ $result == NOCHANGE* ]]; then + echo "$result" + else + echo "$result" + exit 1 + fi - name: Resize vda2 shell: pvresize /dev/vda2 - name: Extend root - shell: lvextend -l+100%FREE -n /dev/mapper/rl-root + shell: | + result=$(lvextend -l+100%FREE -n {{ rootpath.stdout }} 2>&1) + if [[ $? == 0 ]]; then + echo "$result" + elif [[ $result == *'matches existing size'* ]]; then + echo "$result" + else + echo "$result" + exit 1 + fi - name: Resize root - shell: resize2fs /dev/mapper/rl-root + shell: resize2fs {{ rootpath.stdout }} diff --git a/pkg/service/util.go b/pkg/service/util.go index 79ef3cb6..08f76174 100644 --- a/pkg/service/util.go +++ b/pkg/service/util.go @@ -150,6 +150,10 @@ func TowerCPUSockets(vCPU, cpuCores int32) *int32 { return &cpuSockets } +func ByteToGiB(bytes int64) int32 { + return int32(bytes / 1024 / 1024 / 1024) +} + func IsVMInRecycleBin(vm *models.VM) bool { return vm.InRecycleBin != nil && *vm.InRecycleBin } @@ -198,6 +202,10 @@ func IsUpdateVMTask(task *models.Task) bool { return strings.Contains(GetTowerString(task.Description), "Edit VM") } +func IsUpdateVMDiskTask(task *models.Task, vmName string) bool { + return GetTowerString(task.Description) == fmt.Sprintf("Edit VM %s disk", vmName) +} + func IsVMColdMigrationTask(task *models.Task) bool { return strings.Contains(GetTowerString(task.Description), "performing a cold migration") } diff --git a/pkg/util/annotations/helpers.go b/pkg/util/annotations/helpers.go index 04988953..9007c1fc 100644 --- a/pkg/util/annotations/helpers.go +++ b/pkg/util/annotations/helpers.go @@ -53,15 +53,6 @@ func GetCreatedBy(o metav1.Object) string { return annotations[infrav1.CreatedByAnnotation] } -func HostAgentJobName(o metav1.Object) string { - annotations := o.GetAnnotations() - if annotations == nil { - return "" - } - - return annotations[infrav1.HostAgentJobNameAnnotation] -} - func GetTemplateClonedFromName(o metav1.Object) string { annotations := o.GetAnnotations() if annotations == nil { diff --git a/test/helpers/envtest.go b/test/helpers/envtest.go index c61b5701..e18fc383 100644 --- a/test/helpers/envtest.go +++ b/test/helpers/envtest.go @@ -135,6 +135,12 @@ func NewTestEnvironment() *TestEnvironment { return err } + if err := (&webhooks.ElfMachineValidator{ + Client: mgr.GetClient(), + }).SetupWebhookWithManager(mgr); err != nil { + return err + } + if err := (&webhooks.ElfMachineMutation{ Client: mgr.GetClient(), Logger: mgr.GetLogger().WithName("ElfMachineMutation"), diff --git a/webhooks/elfmachine_webhook_validation.go b/webhooks/elfmachine_webhook_validation.go new file mode 100644 index 00000000..959089da --- /dev/null +++ b/webhooks/elfmachine_webhook_validation.go @@ -0,0 +1,104 @@ +/* +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package webhooks + +import ( + goctx "context" + "fmt" + + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/util/validation/field" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/webhook" + "sigs.k8s.io/controller-runtime/pkg/webhook/admission" + + infrav1 "github.com/smartxworks/cluster-api-provider-elf/api/v1beta1" + annotationsutil "github.com/smartxworks/cluster-api-provider-elf/pkg/util/annotations" +) + +// Error messages. +const ( + canOnlyModifiedThroughElfMachineTemplate = "virtual machine resources can only be modified through ElfMachineTemplate %s" +) + +func (v *ElfMachineValidator) SetupWebhookWithManager(mgr ctrl.Manager) error { + return ctrl.NewWebhookManagedBy(mgr). + For(&infrav1.ElfMachine{}). + WithValidator(v). + Complete() +} + +//+kubebuilder:webhook:path=/validate-infrastructure-cluster-x-k8s-io-v1beta1-elfmachine,mutating=false,failurePolicy=fail,sideEffects=None,groups=infrastructure.cluster.x-k8s.io,resources=elfmachines,verbs=create;update,versions=v1beta1,name=validation.elfmachine.infrastructure.x-k8s.io,admissionReviewVersions=v1 + +// ElfMachineValidator implements a validation webhook for ElfMachine. +type ElfMachineValidator struct { + client.Client +} + +var _ webhook.CustomValidator = &ElfMachineTemplateValidator{} + +// ValidateCreate implements webhook.Validator so a webhook will be registered for the type. +func (v *ElfMachineValidator) ValidateCreate(ctx goctx.Context, obj runtime.Object) (admission.Warnings, error) { + return nil, nil +} + +// ValidateUpdate implements webhook.Validator so a webhook will be registered for the type. +func (v *ElfMachineValidator) ValidateUpdate(ctx goctx.Context, oldObj, newObj runtime.Object) (admission.Warnings, error) { + oldElfMachine, ok := oldObj.(*infrav1.ElfMachine) //nolint:forcetypeassert + if !ok { + return nil, apierrors.NewBadRequest(fmt.Sprintf("expected an ElfMachine but got a %T", oldObj)) + } + elfMachine, ok := newObj.(*infrav1.ElfMachine) //nolint:forcetypeassert + if !ok { + return nil, apierrors.NewBadRequest(fmt.Sprintf("expected an ElfMachine but got a %T", newObj)) + } + + var allErrs field.ErrorList + + elfMachineTemplateName := annotationsutil.GetTemplateClonedFromName(elfMachine) + if elfMachineTemplateName == "" { + if elfMachine.Spec.DiskGiB < oldElfMachine.Spec.DiskGiB { + allErrs = append(allErrs, field.Invalid(field.NewPath("spec", "diskGiB"), elfMachine.Spec.DiskGiB, diskCapacityCanOnlyBeExpanded)) + } + + return nil, aggregateObjErrors(elfMachine.GroupVersionKind().GroupKind(), elfMachine.Name, allErrs) + } + + // If the ElfMachine was created using ElfMachineTemplate. ElfMachine's + // resources can only be modified through this ElfMachineTemplate. + + var elfMachineTemplate infrav1.ElfMachineTemplate + if err := v.Client.Get(ctx, client.ObjectKey{ + Namespace: elfMachine.Namespace, + Name: annotationsutil.GetTemplateClonedFromName(elfMachine), + }, &elfMachineTemplate); err != nil { + return nil, apierrors.NewInternalError(err) + } + + if elfMachine.Spec.DiskGiB != elfMachineTemplate.Spec.Template.Spec.DiskGiB { + allErrs = append(allErrs, field.Invalid(field.NewPath("spec", "diskGiB"), elfMachine.Spec.DiskGiB, fmt.Sprintf(canOnlyModifiedThroughElfMachineTemplate, elfMachineTemplateName))) + } + + return nil, aggregateObjErrors(elfMachine.GroupVersionKind().GroupKind(), elfMachine.Name, allErrs) +} + +// ValidateDelete implements webhook.Validator so a webhook will be registered for the type. +func (v *ElfMachineValidator) ValidateDelete(ctx goctx.Context, obj runtime.Object) (admission.Warnings, error) { + return nil, nil +} diff --git a/webhooks/elfmachine_webhook_validation_test.go b/webhooks/elfmachine_webhook_validation_test.go new file mode 100644 index 00000000..9ffa3217 --- /dev/null +++ b/webhooks/elfmachine_webhook_validation_test.go @@ -0,0 +1,136 @@ +/* +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package webhooks + +import ( + goctx "context" + "fmt" + "testing" + + . "github.com/onsi/gomega" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/util/validation/field" + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + + infrav1 "github.com/smartxworks/cluster-api-provider-elf/api/v1beta1" +) + +func TestElfMachineValidatorValidateUpdate(t *testing.T) { + g := NewWithT(t) + + var tests []elfMachineTestCase + scheme := newScheme(g) + + elfMachineTemplate := &infrav1.ElfMachineTemplate{ + ObjectMeta: metav1.ObjectMeta{Name: "test"}, + Spec: infrav1.ElfMachineTemplateSpec{ + Template: infrav1.ElfMachineTemplateResource{ + Spec: infrav1.ElfMachineSpec{ + DiskGiB: 1, + }, + }, + }, + } + + tests = append(tests, elfMachineTestCase{ + Name: "Cannot reduce disk capacity", + OldEM: &infrav1.ElfMachine{ + Spec: infrav1.ElfMachineSpec{ + DiskGiB: 2, + }, + }, + EM: &infrav1.ElfMachine{ + Spec: infrav1.ElfMachineSpec{ + DiskGiB: 1, + }, + }, + Errs: field.ErrorList{ + field.Invalid(field.NewPath("spec", "diskGiB"), 1, diskCapacityCanOnlyBeExpanded), + }, + }) + + tests = append(tests, elfMachineTestCase{ + Name: "Disk cannot be modified directly", + OldEM: nil, + EM: &infrav1.ElfMachine{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + clusterv1.TemplateClonedFromNameAnnotation: elfMachineTemplate.Name, + }, + }, + Spec: infrav1.ElfMachineSpec{ + DiskGiB: 2, + }, + }, + Objs: []client.Object{elfMachineTemplate}, + Errs: field.ErrorList{ + field.Invalid(field.NewPath("spec", "diskGiB"), 2, fmt.Sprintf(canOnlyModifiedThroughElfMachineTemplate, elfMachineTemplate.Name)), + }, + }) + + for _, tc := range tests { + t.Run(tc.Name, func(t *testing.T) { + validator := &ElfMachineValidator{ + Client: fake.NewClientBuilder().WithScheme(scheme).WithObjects(tc.Objs...).Build(), + } + warnings, err := validator.ValidateUpdate(goctx.Background(), tc.OldEM, tc.EM) + g.Expect(warnings).To(BeEmpty()) + expectElfMachineTestCase(g, tc, err) + }) + } +} + +func newScheme(g Gomega) *runtime.Scheme { + scheme := runtime.NewScheme() + g.Expect(infrav1.AddToScheme(scheme)).To(Succeed()) + + return scheme +} + +func expectElfMachineTestCase(g Gomega, tc elfMachineTestCase, err error) { + if tc.Errs != nil { + g.Expect(err).To(HaveOccurred()) + statusErr, ok := err.(*apierrors.StatusError) + g.Expect(ok).To(BeTrue()) + g.Expect(statusErr.ErrStatus.Details.Group).To(Equal(tc.EM.GroupVersionKind().Group)) + g.Expect(statusErr.ErrStatus.Details.Kind).To(Equal(tc.EM.GroupVersionKind().Kind)) + g.Expect(statusErr.ErrStatus.Details.Name).To(Equal(tc.EM.Name)) + causes := make([]metav1.StatusCause, 0, len(tc.Errs)) + for i := 0; i < len(tc.Errs); i++ { + causes = append(causes, metav1.StatusCause{ + Type: metav1.CauseType(tc.Errs[i].Type), + Message: tc.Errs[i].ErrorBody(), + Field: tc.Errs[i].Field, + }) + } + g.Expect(statusErr.ErrStatus.Details.Causes).To(Equal(causes)) + } else { + g.Expect(err).NotTo(HaveOccurred()) + } +} + +type elfMachineTestCase struct { + Name string + EM *infrav1.ElfMachine + OldEM *infrav1.ElfMachine + Objs []client.Object + Errs field.ErrorList +} diff --git a/webhooks/elfmachinetemplate_webhook.go b/webhooks/elfmachinetemplate_webhook_validation.go similarity index 99% rename from webhooks/elfmachinetemplate_webhook.go rename to webhooks/elfmachinetemplate_webhook_validation.go index 8b3700a4..b4d5b4c8 100644 --- a/webhooks/elfmachinetemplate_webhook.go +++ b/webhooks/elfmachinetemplate_webhook_validation.go @@ -45,7 +45,7 @@ func (v *ElfMachineTemplateValidator) SetupWebhookWithManager(mgr ctrl.Manager) //+kubebuilder:webhook:path=/validate-infrastructure-cluster-x-k8s-io-v1beta1-elfmachinetemplate,mutating=false,failurePolicy=fail,sideEffects=None,groups=infrastructure.cluster.x-k8s.io,resources=elfmachinetemplates,verbs=create;update,versions=v1beta1,name=validation.elfmachinetemplate.infrastructure.x-k8s.io,admissionReviewVersions=v1 -// ElfMachineTemplateValidator implements a validation webhook for VSphereMachineTemplate. +// ElfMachineTemplateValidator implements a validation webhook for ElfMachineTemplate. type ElfMachineTemplateValidator struct{} var _ webhook.CustomValidator = &ElfMachineTemplateValidator{} diff --git a/webhooks/elfmachinetemplate_webhook_test.go b/webhooks/elfmachinetemplate_webhook_validation_test.go similarity index 100% rename from webhooks/elfmachinetemplate_webhook_test.go rename to webhooks/elfmachinetemplate_webhook_validation_test.go