diff --git a/api/v1beta1/conditions_consts.go b/api/v1beta1/conditions_consts.go new file mode 100644 index 000000000..880809aa3 --- /dev/null +++ b/api/v1beta1/conditions_consts.go @@ -0,0 +1,24 @@ +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1beta1 + +const ( + // WaitingForClusterInfrastructureReason used when machine is waiting for cluster infrastructure to be ready before proceeding. + WaitingForClusterInfrastructureReason = "WaitingForClusterInfrastructure" + // WaitingForBootstrapDataReason used when machine is waiting for bootstrap data to be ready before proceeding. + WaitingForBootstrapDataReason = "WaitingForBootstrapData" +) diff --git a/cloud/interfaces.go b/cloud/interfaces.go index 27abdcb42..ba2f8b0bb 100644 --- a/cloud/interfaces.go +++ b/cloud/interfaces.go @@ -89,7 +89,7 @@ type MachineGetter interface { ControlPlaneGroupName() string GetInstanceID() *string GetProviderID() string - GetBootstrapData() (string, error) + GetBootstrapData(ctx context.Context) (string, error) GetInstanceStatus() *infrav1.InstanceStatus } diff --git a/cloud/scope/machine.go b/cloud/scope/machine.go index 8173728f2..7d59fdf17 100644 --- a/cloud/scope/machine.go +++ b/cloud/scope/machine.go @@ -25,7 +25,6 @@ import ( "strings" "github.com/go-logr/logr" - "github.com/pkg/errors" "golang.org/x/mod/semver" "google.golang.org/api/compute/v1" @@ -41,6 +40,13 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" ) +// Constants for GCP OnHostMaintenance values. +// These are not exported, because they are not _our_ API, but they are used in multiple places. +const ( + onHostMaintenanceTerminate = "TERMINATE" + onHostMaintenanceMigrate = "MIGRATE" +) + // MachineScopeParams defines the input parameters used to create a new MachineScope. type MachineScopeParams struct { Client client.Client @@ -327,12 +333,12 @@ func instanceAdditionalDiskSpec(ctx context.Context, spec []infrav1.AttachedDisk } // InstanceNetworkInterfaceSpec returns compute network interface spec. -func (m *MachineScope) InstanceNetworkInterfaceSpec() *compute.NetworkInterface { +func InstanceNetworkInterfaceSpec(cluster cloud.ClusterGetter, publicIP *bool, subnet *string) *compute.NetworkInterface { networkInterface := &compute.NetworkInterface{ - Network: path.Join("projects", m.ClusterGetter.NetworkProject(), "global", "networks", m.ClusterGetter.NetworkName()), + Network: path.Join("projects", cluster.NetworkProject(), "global", "networks", cluster.NetworkName()), } - if m.GCPMachine.Spec.PublicIP != nil && *m.GCPMachine.Spec.PublicIP { + if publicIP != nil && *publicIP { networkInterface.AccessConfigs = []*compute.AccessConfig{ { Type: "ONE_TO_ONE_NAT", @@ -341,8 +347,8 @@ func (m *MachineScope) InstanceNetworkInterfaceSpec() *compute.NetworkInterface } } - if m.GCPMachine.Spec.Subnet != nil { - networkInterface.Subnetwork = path.Join("projects", m.ClusterGetter.NetworkProject(), "regions", m.ClusterGetter.Region(), "subnetworks", *m.GCPMachine.Spec.Subnet) + if subnet != nil { + networkInterface.Subnetwork = path.Join("projects", cluster.NetworkProject(), "regions", cluster.Region(), "subnetworks", *subnet) } return networkInterface @@ -366,9 +372,9 @@ func instanceServiceAccountsSpec(serviceAccount *infrav1.ServiceAccount) *comput } // InstanceAdditionalMetadataSpec returns additional metadata spec. -func (m *MachineScope) InstanceAdditionalMetadataSpec() *compute.Metadata { +func InstanceAdditionalMetadataSpec(spec []infrav1.MetadataItem) *compute.Metadata { metadata := new(compute.Metadata) - for _, additionalMetadata := range m.GCPMachine.Spec.AdditionalMetadata { + for _, additionalMetadata := range spec { metadata.Items = append(metadata.Items, &compute.MetadataItems{ Key: additionalMetadata.Key, Value: additionalMetadata.Value, @@ -458,9 +464,9 @@ func (m *MachineScope) InstanceSpec(log logr.Logger) *compute.Instance { if m.GCPMachine.Spec.OnHostMaintenance != nil { switch *m.GCPMachine.Spec.OnHostMaintenance { case infrav1.HostMaintenancePolicyMigrate: - instance.Scheduling.OnHostMaintenance = "MIGRATE" + instance.Scheduling.OnHostMaintenance = onHostMaintenanceMigrate case infrav1.HostMaintenancePolicyTerminate: - instance.Scheduling.OnHostMaintenance = "TERMINATE" + instance.Scheduling.OnHostMaintenance = onHostMaintenanceTerminate default: log.Error(errors.New("Invalid value"), "Unknown OnHostMaintenance value", "Spec.OnHostMaintenance", *m.GCPMachine.Spec.OnHostMaintenance) } @@ -485,12 +491,13 @@ func (m *MachineScope) InstanceSpec(log logr.Logger) *compute.Instance { instance.Disks = append(instance.Disks, m.InstanceImageSpec()) instance.Disks = append(instance.Disks, instanceAdditionalDiskSpec(ctx, m.GCPMachine.Spec.AdditionalDisks, m.GCPMachine.Spec.RootDiskEncryptionKey, m.Zone(), m.ResourceManagerTags())...) - instance.Metadata = m.InstanceAdditionalMetadataSpec() + + instance.Metadata = InstanceAdditionalMetadataSpec(m.GCPMachine.Spec.AdditionalMetadata) instance.ServiceAccounts = append(instance.ServiceAccounts, instanceServiceAccountsSpec(m.GCPMachine.Spec.ServiceAccount)) - instance.NetworkInterfaces = append(instance.NetworkInterfaces, m.InstanceNetworkInterfaceSpec()) + instance.NetworkInterfaces = append(instance.NetworkInterfaces, InstanceNetworkInterfaceSpec(m.ClusterGetter, m.GCPMachine.Spec.PublicIP, m.GCPMachine.Spec.Subnet)) instance.GuestAccelerators = instanceGuestAcceleratorsSpec(m.GCPMachine.Spec.GuestAccelerators) if len(instance.GuestAccelerators) > 0 { - instance.Scheduling.OnHostMaintenance = "TERMINATE" + instance.Scheduling.OnHostMaintenance = onHostMaintenanceTerminate } return instance @@ -499,15 +506,20 @@ func (m *MachineScope) InstanceSpec(log logr.Logger) *compute.Instance { // ANCHOR_END: MachineInstanceSpec // GetBootstrapData returns the bootstrap data from the secret in the Machine's bootstrap.dataSecretName. -func (m *MachineScope) GetBootstrapData() (string, error) { - if m.Machine.Spec.Bootstrap.DataSecretName == nil { +func (m *MachineScope) GetBootstrapData(ctx context.Context) (string, error) { + return GetBootstrapData(ctx, m.client, m.Machine, m.Machine.Spec.Bootstrap) +} + +// GetBootstrapData returns the bootstrap data from the secret in the Machine's bootstrap.dataSecretName. +func GetBootstrapData(ctx context.Context, client client.Client, parent client.Object, bootstrap clusterv1.Bootstrap) (string, error) { + if bootstrap.DataSecretName == nil { return "", errors.New("error retrieving bootstrap data: linked Machine's bootstrap.dataSecretName is nil") } secret := &corev1.Secret{} - key := types.NamespacedName{Namespace: m.Namespace(), Name: *m.Machine.Spec.Bootstrap.DataSecretName} - if err := m.client.Get(context.TODO(), key, secret); err != nil { - return "", errors.Wrapf(err, "failed to retrieve bootstrap data secret for GCPMachine %s/%s", m.Namespace(), m.Name()) + key := types.NamespacedName{Namespace: parent.GetNamespace(), Name: *bootstrap.DataSecretName} + if err := client.Get(ctx, key, secret); err != nil { + return "", errors.Wrapf(err, "failed to retrieve bootstrap data secret %s/%s", key.Namespace, key.Name) } value, ok := secret.Data["value"] diff --git a/cloud/scope/machinepool.go b/cloud/scope/machinepool.go new file mode 100644 index 000000000..4989e214a --- /dev/null +++ b/cloud/scope/machinepool.go @@ -0,0 +1,517 @@ +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package scope + +import ( + "context" + "fmt" + "path" + "strings" + + "github.com/GoogleCloudPlatform/k8s-cloud-provider/pkg/cloud/meta" + "github.com/pkg/errors" + "golang.org/x/mod/semver" + "google.golang.org/api/compute/v1" + "k8s.io/utils/ptr" + "sigs.k8s.io/controller-runtime/pkg/client" + + infrav1 "sigs.k8s.io/cluster-api-provider-gcp/api/v1beta1" + "sigs.k8s.io/cluster-api-provider-gcp/cloud" + "sigs.k8s.io/cluster-api-provider-gcp/cloud/services/shared" + expinfrav1 "sigs.k8s.io/cluster-api-provider-gcp/exp/api/v1beta1" + "sigs.k8s.io/cluster-api-provider-gcp/pkg/gcp" + "sigs.k8s.io/cluster-api-provider-gcp/pkg/logger" + clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta1" + patch "sigs.k8s.io/cluster-api/util/deprecated/v1beta1/patch" +) + +// MachinePoolScope defines a scope defined around a machine and its cluster. +type MachinePoolScope struct { + client client.Client + patchHelper *patch.Helper + capiMachinePoolPatchHelper *patch.Helper + + ClusterGetter cloud.ClusterGetter + MachinePool *clusterv1.MachinePool + GCPMachinePool *expinfrav1.GCPMachinePool +} + +// MachinePoolScopeParams defines a scope defined around a machine and its cluster. +type MachinePoolScopeParams struct { + client.Client + + ClusterGetter cloud.ClusterGetter + MachinePool *clusterv1.MachinePool + GCPMachinePool *expinfrav1.GCPMachinePool +} + +// NewMachinePoolScope creates a new MachinePoolScope from the supplied parameters. +// This is meant to be called for each reconcile iteration. +func NewMachinePoolScope(params MachinePoolScopeParams) (*MachinePoolScope, error) { + if params.ClusterGetter == nil { + return nil, errors.New("clusterGetter is required when creating a MachinePoolScope") + } + if params.Client == nil { + return nil, errors.New("client is required when creating a MachinePoolScope") + } + if params.MachinePool == nil { + return nil, errors.New("machinepool is required when creating a MachinePoolScope") + } + if params.GCPMachinePool == nil { + return nil, errors.New("gcp machine pool is required when creating a MachinePoolScope") + } + + ampHelper, err := patch.NewHelper(params.GCPMachinePool, params.Client) + if err != nil { + return nil, errors.Wrap(err, "failed to init GCPMachinePool patch helper") + } + mpHelper, err := patch.NewHelper(params.MachinePool, params.Client) + if err != nil { + return nil, errors.Wrap(err, "failed to init MachinePool patch helper") + } + + return &MachinePoolScope{ + client: params.Client, + patchHelper: ampHelper, + capiMachinePoolPatchHelper: mpHelper, + + ClusterGetter: params.ClusterGetter, + MachinePool: params.MachinePool, + GCPMachinePool: params.GCPMachinePool, + }, nil +} + +// Cloud returns initialized cloud. +func (m *MachinePoolScope) Cloud() cloud.Cloud { + return m.ClusterGetter.Cloud() +} + +// Name returns the GCPMachinePool name. +func (m *MachinePoolScope) Name() string { + return m.GCPMachinePool.Name +} + +// Namespace returns the namespace name. +func (m *MachinePoolScope) Namespace() string { + return m.GCPMachinePool.Namespace +} + +// GetBootstrapData returns the bootstrap data from the secret in the Machine's bootstrap.dataSecretName. +func (m *MachinePoolScope) getBootstrapData(ctx context.Context) (string, error) { + return GetBootstrapData(ctx, m.client, m.MachinePool, m.MachinePool.Spec.Template.Spec.Bootstrap) +} + +// Zones returns the targeted zones for the machine pool +func (m *MachinePoolScope) Zones() []string { + zones := m.MachinePool.Spec.FailureDomains + if len(zones) == 0 { + failureDomains := m.ClusterGetter.FailureDomains() + for zone := range failureDomains { + zones = append(zones, zone) + } + } + return zones +} + +// Region returns the region for the GCP resources +func (m *MachinePoolScope) Region() string { + return m.ClusterGetter.Region() +} + +// PatchObject persists the machinepool spec and status. +func (m *MachinePoolScope) PatchObject(ctx context.Context) error { + return m.patchHelper.Patch( + ctx, + m.GCPMachinePool, + patch.WithOwnedConditions{Conditions: []clusterv1.ConditionType{ + expinfrav1.MIGReadyCondition, + expinfrav1.InstanceTemplateReadyCondition, + }}) +} + +// PatchCAPIMachinePoolObject persists the capi machinepool configuration and status. +func (m *MachinePoolScope) PatchCAPIMachinePoolObject(ctx context.Context) error { + return m.capiMachinePoolPatchHelper.Patch( + ctx, + m.MachinePool, + ) +} + +// Close the MachinePoolScope by updating the machinepool spec, machine status. +func (m *MachinePoolScope) Close() error { + return m.PatchObject(context.TODO()) +} + +// InstanceGroupManagerResourceName is the name to use for the instanceGroupManager GCP resource +func (m *MachinePoolScope) InstanceGroupManagerResourceName() (*meta.Key, error) { + name := m.Name() // FUTURE: Sanitization? + + zones := m.Zones() + if len(zones) != 1 { + // FUTURE: Support regional instanceGroupManagers + return nil, fmt.Errorf("instanceGroupManager must be created in a single zone, got %d zones (%s)", len(zones), strings.Join(zones, ",")) + } + zone := zones[0] + igmKey := meta.ZonalKey(name, zone) + + return igmKey, nil +} + +// InstanceGroupManagerResource is the desired state for the instanceGroupManager GCP resource +func (m *MachinePoolScope) InstanceGroupManagerResource(instanceTemplate *meta.Key) (*compute.InstanceGroupManager, error) { + instanceTemplateSelfLink := gcp.SelfLink("instanceTemplates", instanceTemplate) + baseInstanceName := limitStringLength(m.Name(), 58) // FUTURE: Sanitization + + zones := m.Zones() + if len(zones) == 0 { + return nil, errors.New("must specify at least one zone") + } + + replicas := int64(1) + if p := m.MachinePool.Spec.Replicas; p != nil { + replicas = int64(*p) + } + + desired := &compute.InstanceGroupManager{ + BaseInstanceName: baseInstanceName, + Description: "", // FUTURE + InstanceTemplate: instanceTemplateSelfLink, + // ListManagedInstancesResults: "PAGINATED", // FUTURE + TargetSize: replicas, + } + + // DistributionPolicy can only be used if there are multiple zones + if len(zones) > 1 { + desired.DistributionPolicy = &compute.DistributionPolicy{} + for _, zone := range zones { + zoneSelfLink, err := buildZoneSelfLink(zone) + if err != nil { + return nil, err + } + desired.DistributionPolicy.Zones = append(desired.DistributionPolicy.Zones, &compute.DistributionPolicyZoneConfiguration{ + Zone: zoneSelfLink, + }) + } + } else { + zoneSelfLink, err := buildZoneSelfLink(zones[0]) + if err != nil { + return nil, err + } + desired.Zone = zoneSelfLink + } + + return desired, nil +} + +// buildZoneSelfLink returns a fully-qualified zone link from a user-provided zone +func buildZoneSelfLink(zone string) (string, error) { + tokens := strings.Split(zone, "/") + if len(tokens) == 1 { + return "zones/" + tokens[0], nil + } + return "", fmt.Errorf("zone %q was not a recognized format", zone) +} + +// BaseInstanceTemplateResourceName is the base name to use for the instanceTemplate GCP resource. +// The instance template is immutable, so we add a suffix that hash-encodes the version +func (m *MachinePoolScope) BaseInstanceTemplateResourceName() (*meta.Key, error) { + name := m.Name() // FUTURE: Sanitization? + + // We only use the first 46 characters, to leave room for a 16 character hash + // 63 characters max, 16 character hash; 1 hyphen + namePrefix := limitStringLength(name, 63-16-1) + "-" + + region := m.Region() + return meta.RegionalKey(namePrefix, region), nil +} + +// limitStringLength returns the string truncated to the specified maximum length. +func limitStringLength(s string, maxLength int) string { + if len(s) > maxLength { + return s[:maxLength] + } + return s +} + +// InstanceTemplateResource is the desired state for the instanceTemplate GCP resource +func (m *MachinePoolScope) InstanceTemplateResource(ctx context.Context) (*compute.InstanceTemplate, error) { + log := logger.FromContext(ctx) + + bootstrapData, err := m.getBootstrapData(ctx) + if err != nil { + return nil, fmt.Errorf("retrieving bootstrap data for instanceTemplate: %w", err) + } + + instance := &compute.InstanceProperties{ + MachineType: m.GCPMachinePool.Spec.InstanceType, + Tags: &compute.Tags{ + Items: append( + m.GCPMachinePool.Spec.AdditionalNetworkTags, + fmt.Sprintf("%s-%s", m.ClusterGetter.Name(), m.Role()), + m.ClusterGetter.Name(), + ), + }, + ResourceManagerTags: shared.ResourceTagConvert(ctx, m.ResourceManagerTags()), + Labels: infrav1.Build(infrav1.BuildParams{ + ClusterName: m.ClusterGetter.Name(), + Lifecycle: infrav1.ResourceLifecycleOwned, + Role: ptr.To[string](m.Role()), + //nolint: godox + // TODO: Check what needs to be added for the cloud provider label. + Additional: m.ClusterGetter.AdditionalLabels().AddLabels(m.GCPMachinePool.Spec.AdditionalLabels), + }), + Scheduling: &compute.Scheduling{ + Preemptible: m.GCPMachinePool.Spec.Preemptible, + }, + } + + if m.GCPMachinePool.Spec.ProvisioningModel != nil { + // FUTURE: Can we dedup with MachinePool logic - until then we have to keep them in sync manually + + switch *m.GCPMachinePool.Spec.ProvisioningModel { + case infrav1.ProvisioningModelSpot: + instance.Scheduling.ProvisioningModel = "SPOT" + case infrav1.ProvisioningModelStandard: + instance.Scheduling.ProvisioningModel = "STANDARD" + default: + return nil, fmt.Errorf("unknown ProvisioningModel value: %q", *m.GCPMachinePool.Spec.ProvisioningModel) + } + } + + instance.CanIpForward = true + if m.GCPMachinePool.Spec.IPForwarding != nil && *m.GCPMachinePool.Spec.IPForwarding == infrav1.IPForwardingDisabled { + // FUTURE: Can we dedup with MachinePool logic - until then we have to keep them in sync manually + instance.CanIpForward = false + } + if config := m.GCPMachinePool.Spec.ShieldedInstanceConfig; config != nil { + // FUTURE: Can we dedup with MachinePool logic - until then we have to keep them in sync manually + instance.ShieldedInstanceConfig = &compute.ShieldedInstanceConfig{ + EnableSecureBoot: false, + EnableVtpm: true, + EnableIntegrityMonitoring: true, + } + if config.SecureBoot == infrav1.SecureBootPolicyEnabled { + instance.ShieldedInstanceConfig.EnableSecureBoot = true + } + if config.VirtualizedTrustedPlatformModule == infrav1.VirtualizedTrustedPlatformModulePolicyDisabled { + instance.ShieldedInstanceConfig.EnableVtpm = false + } + if config.IntegrityMonitoring == infrav1.IntegrityMonitoringPolicyDisabled { + instance.ShieldedInstanceConfig.EnableIntegrityMonitoring = false + } + } + if onHostMaintenance := ValueOf(m.GCPMachinePool.Spec.OnHostMaintenance); onHostMaintenance != "" { + // FUTURE: Can we dedup with MachinePool logic - until then we have to keep them in sync manually + switch onHostMaintenance { + case infrav1.HostMaintenancePolicyMigrate: + instance.Scheduling.OnHostMaintenance = onHostMaintenanceMigrate + case infrav1.HostMaintenancePolicyTerminate: + instance.Scheduling.OnHostMaintenance = onHostMaintenanceTerminate + default: + log.Error(errors.New("Invalid value"), "Unknown OnHostMaintenance value", "Spec.OnHostMaintenance", onHostMaintenance) + instance.Scheduling.OnHostMaintenance = strings.ToUpper(string(onHostMaintenance)) + } + } + + if confidentialCompute := m.GCPMachinePool.Spec.ConfidentialCompute; confidentialCompute != nil { + // FUTURE: Can we dedup with MachinePool logic - until then we have to keep them in sync manually + enabled := *confidentialCompute != infrav1.ConfidentialComputePolicyDisabled + instance.ConfidentialInstanceConfig = &compute.ConfidentialInstanceConfig{ + EnableConfidentialCompute: enabled, + } + switch *confidentialCompute { + case infrav1.ConfidentialComputePolicySEV: + instance.ConfidentialInstanceConfig.ConfidentialInstanceType = "SEV" + case infrav1.ConfidentialComputePolicySEVSNP: + instance.ConfidentialInstanceConfig.ConfidentialInstanceType = "SEV_SNP" + case infrav1.ConfidentialComputePolicyTDX: + instance.ConfidentialInstanceConfig.ConfidentialInstanceType = "TDX" + default: + } + } + + instance.Disks = append(instance.Disks, m.InstanceImageSpec(ctx)) + instance.Disks = append(instance.Disks, m.InstanceAdditionalDiskSpec()...) + instance.Metadata = InstanceAdditionalMetadataSpec(m.GCPMachinePool.Spec.AdditionalMetadata) + instance.ServiceAccounts = append(instance.ServiceAccounts, instanceServiceAccountsSpec(m.GCPMachinePool.Spec.ServiceAccount)) + instance.NetworkInterfaces = append(instance.NetworkInterfaces, InstanceNetworkInterfaceSpec(m.ClusterGetter, m.GCPMachinePool.Spec.PublicIP, m.GCPMachinePool.Spec.Subnet)) + instance.GuestAccelerators = instanceGuestAcceleratorsSpec(m.GCPMachinePool.Spec.GuestAccelerators) + if len(instance.GuestAccelerators) > 0 { + instance.Scheduling.OnHostMaintenance = onHostMaintenanceTerminate + } + + instance.Metadata.Items = append(instance.Metadata.Items, &compute.MetadataItems{ + Key: "user-data", + Value: ptr.To[string](bootstrapData), + }) + + instanceTemplate := &compute.InstanceTemplate{ + Region: m.Region(), + Properties: instance, + } + + return instanceTemplate, nil +} + +// InstanceImageSpec returns compute instance image attched-disk spec. +func (m *MachinePoolScope) InstanceImageSpec(ctx context.Context) *compute.AttachedDisk { + // FUTURE: Can we dedup with MachinePool InstanceImageSpec - until then we have to keep them in sync manually + spec := m.GCPMachinePool.Spec + + version := "" + if m.MachinePool.Spec.Template.Spec.Version != nil { + version = *m.MachinePool.Spec.Template.Spec.Version + } + + image := "capi-ubuntu-1804-k8s-" + strings.ReplaceAll(semver.MajorMinor(version), ".", "-") + sourceImage := path.Join("projects", m.ClusterGetter.Project(), "global", "images", "family", image) + if spec.Image != nil { + sourceImage = *spec.Image + } else if spec.ImageFamily != nil { + sourceImage = *spec.ImageFamily + } + + diskType := infrav1.PdStandardDiskType + if t := spec.RootDeviceType; t != nil { + diskType = *t + } + + // FUTURE: diskType = path.Join("zones", m.Zone(), "diskTypes", string(diskType)), + + disk := &compute.AttachedDisk{ + AutoDelete: true, + Boot: true, + InitializeParams: &compute.AttachedDiskInitializeParams{ + DiskSizeGb: spec.RootDeviceSize, + DiskType: string(diskType), + ResourceManagerTags: shared.ResourceTagConvert(ctx, spec.ResourceManagerTags), + SourceImage: sourceImage, + Labels: m.ClusterGetter.AdditionalLabels().AddLabels(spec.AdditionalLabels), + }, + } + + if spec.RootDiskEncryptionKey != nil { + if spec.RootDiskEncryptionKey.KeyType == infrav1.CustomerManagedKey && spec.RootDiskEncryptionKey.ManagedKey != nil { + disk.DiskEncryptionKey = &compute.CustomerEncryptionKey{ + KmsKeyName: spec.RootDiskEncryptionKey.ManagedKey.KMSKeyName, + } + if spec.RootDiskEncryptionKey.KMSKeyServiceAccount != nil { + disk.DiskEncryptionKey.KmsKeyServiceAccount = *spec.RootDiskEncryptionKey.KMSKeyServiceAccount + } + } else if spec.RootDiskEncryptionKey.KeyType == infrav1.CustomerSuppliedKey && spec.RootDiskEncryptionKey.SuppliedKey != nil { + disk.DiskEncryptionKey = &compute.CustomerEncryptionKey{ + RawKey: string(spec.RootDiskEncryptionKey.SuppliedKey.RawKey), + RsaEncryptedKey: string(spec.RootDiskEncryptionKey.SuppliedKey.RSAEncryptedKey), + } + if spec.RootDiskEncryptionKey.KMSKeyServiceAccount != nil { + disk.DiskEncryptionKey.KmsKeyServiceAccount = *spec.RootDiskEncryptionKey.KMSKeyServiceAccount + } + } + } + + return disk +} + +// InstanceAdditionalDiskSpec returns compute instance additional attched-disk spec. +func (m *MachinePoolScope) InstanceAdditionalDiskSpec() []*compute.AttachedDisk { + // FUTURE: Can we dedup with MachinePool InstanceImageSpec - until then we have to keep them in sync manually + + spec := m.GCPMachinePool.Spec + + additionalDisks := make([]*compute.AttachedDisk, 0, len(spec.AdditionalDisks)) + for _, disk := range spec.AdditionalDisks { + diskType := string(ValueOf(disk.DeviceType)) + + // FUTURE: // path.Join("zones", m.Zone(), "diskTypes", string(*disk.DeviceType)) + + additionalDisk := &compute.AttachedDisk{ + AutoDelete: true, + InitializeParams: &compute.AttachedDiskInitializeParams{ + DiskSizeGb: ptr.Deref(disk.Size, 30), + DiskType: diskType, + ResourceManagerTags: shared.ResourceTagConvert(context.TODO(), spec.ResourceManagerTags), + }, + } + if strings.HasSuffix(additionalDisk.InitializeParams.DiskType, string(infrav1.LocalSsdDiskType)) { + additionalDisk.Type = "SCRATCH" // Default is PERSISTENT. + // Override the Disk size + additionalDisk.InitializeParams.DiskSizeGb = 375 + // For local SSDs set interface to NVME (instead of default SCSI) which is faster. + // Most OS images would work with both NVME and SCSI disks but some may work + // considerably faster with NVME. + // https://cloud.google.com/compute/docs/disks/local-ssd#choose_an_interface + additionalDisk.Interface = "NVME" + } + if disk.EncryptionKey != nil { + if spec.RootDiskEncryptionKey.KeyType == infrav1.CustomerManagedKey && spec.RootDiskEncryptionKey.ManagedKey != nil { + additionalDisk.DiskEncryptionKey = &compute.CustomerEncryptionKey{ + KmsKeyName: spec.RootDiskEncryptionKey.ManagedKey.KMSKeyName, + } + if spec.RootDiskEncryptionKey.KMSKeyServiceAccount != nil { + additionalDisk.DiskEncryptionKey.KmsKeyServiceAccount = *spec.RootDiskEncryptionKey.KMSKeyServiceAccount + } + } else if spec.RootDiskEncryptionKey.KeyType == infrav1.CustomerSuppliedKey && spec.RootDiskEncryptionKey.SuppliedKey != nil { + additionalDisk.DiskEncryptionKey = &compute.CustomerEncryptionKey{ + RawKey: string(spec.RootDiskEncryptionKey.SuppliedKey.RawKey), + RsaEncryptedKey: string(spec.RootDiskEncryptionKey.SuppliedKey.RSAEncryptedKey), + } + if spec.RootDiskEncryptionKey.KMSKeyServiceAccount != nil { + additionalDisk.DiskEncryptionKey.KmsKeyServiceAccount = *spec.RootDiskEncryptionKey.KMSKeyServiceAccount + } + } + } + + additionalDisks = append(additionalDisks, additionalDisk) + } + + return additionalDisks +} + +// ResourceManagerTags merges ResourceManagerTags from the scope's GCPCluster and GCPMachine. If the same key is present in both, +// the value from GCPMachine takes precedence. The returned ResourceManagerTags will never be nil. +func (m *MachinePoolScope) ResourceManagerTags() infrav1.ResourceManagerTags { + tags := infrav1.ResourceManagerTags{} + + // Start with the cluster-wide tags... + tags.Merge(m.ClusterGetter.ResourceManagerTags()) + // ... and merge in the Machine's + tags.Merge(m.GCPMachinePool.Spec.ResourceManagerTags) + + return tags +} + +// Role returns the machine role from the labels. +func (m *MachinePoolScope) Role() string { + // FUTURE: Or template labels? + _, isControlPlane := m.MachinePool.Labels[clusterv1.MachineControlPlaneLabel] + + if isControlPlane { + // FUTURE: Extract constants + return "control-plane" + } + + return "node" +} + +// ValueOf is a generic helper function that returns the value of a pointer, or the empty value if the pointer is nil. +func ValueOf[V any](v *V) V { + if v != nil { + return *v + } + var zero V + return zero +} diff --git a/cloud/services/compute/instancegroupmanagers/instancegroupmanagers_reconcile.go b/cloud/services/compute/instancegroupmanagers/instancegroupmanagers_reconcile.go new file mode 100644 index 000000000..d7ebc1c19 --- /dev/null +++ b/cloud/services/compute/instancegroupmanagers/instancegroupmanagers_reconcile.go @@ -0,0 +1,165 @@ +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package instancegroupmanagers + +import ( + "context" + "fmt" + "strings" + + "github.com/GoogleCloudPlatform/k8s-cloud-provider/pkg/cloud/filter" + "github.com/GoogleCloudPlatform/k8s-cloud-provider/pkg/cloud/meta" + "google.golang.org/api/compute/v1" + "sigs.k8s.io/cluster-api-provider-gcp/cloud/gcperrors" + "sigs.k8s.io/cluster-api-provider-gcp/pkg/gcp" + "sigs.k8s.io/controller-runtime/pkg/log" +) + +// Reconcile reconcile machine instance. +func (s *Service) Reconcile(ctx context.Context, instanceTemplateKey *meta.Key) (*compute.InstanceGroupManager, error) { + log := log.FromContext(ctx) + log.Info("Reconciling instanceGroupManager resources") + igm, err := s.createOrGet(ctx, instanceTemplateKey) + if err != nil { + return nil, err + } + log.V(2).Info("Reconciled instanceGroupManager", "selfLink", igm.SelfLink) + + return igm, nil +} + +// Delete deletes the GCP instanceGroupManager resource. +func (s *Service) Delete(ctx context.Context) error { + log := log.FromContext(ctx) + + igmKey, err := s.scope.InstanceGroupManagerResourceName() + if err != nil { + return err + } + + selfLink := gcp.FormatKey("instanceGroupManagers", igmKey) + + log = log.WithValues("instanceGroupManager", selfLink) + log.Info("Deleting instanceGroupManager resources") + + log.V(2).Info("Looking for instanceGroupManager before deleting") + if _, err := s.instanceGroupManagers.Get(ctx, igmKey); err != nil { + if !gcperrors.IsNotFound(err) { + log.Error(err, "Error looking for instanceGroupManager before deleting") + return fmt.Errorf("getting instanceGroupManager: %w", err) + } + + return nil + } + log.V(2).Info("found instanceGroupManager; will delete") + + log.V(2).Info("Deleting instanceGroupManager") + return gcperrors.IgnoreNotFound(s.instanceGroupManagers.Delete(ctx, igmKey)) +} + +func (s *Service) createOrGet(ctx context.Context, instanceTemplateKey *meta.Key) (*compute.InstanceGroupManager, error) { + log := log.FromContext(ctx) + + igmKey, err := s.scope.InstanceGroupManagerResourceName() + if err != nil { + return nil, err + } + + selfLink := gcp.FormatKey("instanceGroupManagers", igmKey) + + log = log.WithValues("instanceGroupManager", selfLink) + log.Info("Getting instanceGroupManager resources") + + desired, err := s.scope.InstanceGroupManagerResource(instanceTemplateKey) + if err != nil { + return nil, err + } + + log.V(2).Info("Looking for instanceGroupManager") + actual, err := s.instanceGroupManagers.Get(ctx, igmKey) + if err != nil { + if !gcperrors.IsNotFound(err) { + log.Error(err, "Error looking for instanceGroupManager") + return nil, fmt.Errorf("getting instanceGroupManager %v: %w", selfLink, err) + } + + log.V(2).Info("Creating instanceGroupManager") + if err := s.instanceGroupManagers.Insert(ctx, igmKey, desired); err != nil { + log.Error(err, "creating instanceGroupManager") + return nil, fmt.Errorf("creating instanceGroupManager %v: %w", selfLink, err) + } + + actual, err = s.instanceGroupManagers.Get(ctx, igmKey) + if err != nil { + return nil, fmt.Errorf("getting instanceGroupManager %v: %w", selfLink, err) + } + } + + if desired.TargetSize != actual.TargetSize { + log.V(2).Info("resizing instanceGroupManager", "targetSize", desired.TargetSize) + if err := s.instanceGroupManagers.Resize(ctx, igmKey, desired.TargetSize); err != nil { + log.Error(err, "resizing instanceGroupManager") + return nil, fmt.Errorf("resizing instanceGroupManager %v: %w", selfLink, err) + } + + actual.TargetSize = desired.TargetSize + } + + if desired.InstanceTemplate != actual.InstanceTemplate { + log.V(2).Info("updating instanceTemplate for instanceGroupManager", "desired.instanceTemplate", desired.InstanceTemplate, "actual.instanceTemplate", actual.InstanceTemplate) + if err := s.instanceGroupManagers.SetInstanceTemplate(ctx, igmKey, &compute.InstanceGroupManagersSetInstanceTemplateRequest{ + InstanceTemplate: desired.InstanceTemplate, + }); err != nil { + log.Error(err, "updating instanceTemplate for instanceGroupManager") + return nil, fmt.Errorf("updating instanceTemplate for instanceGroupManager %v: %w", selfLink, err) + } + + actual.TargetSize = desired.TargetSize + } + + return actual, nil +} + +// ListInstances lists instances in the the instanceGroup linked to the passed instanceGroupManager. +func (s *Service) ListInstances(ctx context.Context, instanceGroupManager *compute.InstanceGroupManager) ([]*compute.InstanceWithNamedPorts, error) { + log := log.FromContext(ctx) + + var igKey *meta.Key + { + instanceGroup := instanceGroupManager.InstanceGroup + instanceGroup = strings.TrimPrefix(instanceGroup, "https://www.googleapis.com/") + instanceGroup = strings.TrimPrefix(instanceGroup, "compute/v1/") + tokens := strings.Split(instanceGroup, "/") + if len(tokens) == 6 && tokens[0] == "projects" && tokens[2] == "zones" && tokens[4] == "instanceGroups" { + igKey = meta.ZonalKey(tokens[5], tokens[3]) + } else { + return nil, fmt.Errorf("unexpected format for instanceGroup: %q", instanceGroup) + } + } + + log.Info("Listing instances in instanceGroup", "instanceGroup", instanceGroupManager.InstanceGroup) + listInstancesRequest := &compute.InstanceGroupsListInstancesRequest{ + InstanceState: "ALL", + } + instances, err := s.instanceGroups.ListInstances(ctx, igKey, listInstancesRequest, filter.None) + if err != nil { + log.Error(err, "Error listing instances in instanceGroup", "instanceGroup", instanceGroupManager.InstanceGroup) + return nil, fmt.Errorf("listing instances in instanceGroup %q: %w", instanceGroupManager.InstanceGroup, err) + } + + return instances, nil +} diff --git a/cloud/services/compute/instancegroupmanagers/service.go b/cloud/services/compute/instancegroupmanagers/service.go new file mode 100644 index 000000000..6d482a1bb --- /dev/null +++ b/cloud/services/compute/instancegroupmanagers/service.go @@ -0,0 +1,67 @@ +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package instancegroupmanagers implements reconciliation for instanceGroupManager GCP resources. +package instancegroupmanagers + +import ( + "context" + + k8scloud "github.com/GoogleCloudPlatform/k8s-cloud-provider/pkg/cloud" + "github.com/GoogleCloudPlatform/k8s-cloud-provider/pkg/cloud/meta" + compute "google.golang.org/api/compute/v1" + + "sigs.k8s.io/cluster-api-provider-gcp/cloud" +) + +type instanceGroupManagersClient interface { + Get(ctx context.Context, key *meta.Key, options ...k8scloud.Option) (*compute.InstanceGroupManager, error) + Insert(ctx context.Context, key *meta.Key, obj *compute.InstanceGroupManager, options ...k8scloud.Option) error + Delete(ctx context.Context, key *meta.Key, options ...k8scloud.Option) error + Resize(context.Context, *meta.Key, int64, ...k8scloud.Option) error + SetInstanceTemplate(context.Context, *meta.Key, *compute.InstanceGroupManagersSetInstanceTemplateRequest, ...k8scloud.Option) error +} + +// Scope is an interfaces that hold used methods. +type Scope interface { + Cloud() cloud.Cloud + + // InstanceGroupManagerResource returns the desired instanceGroupManager + InstanceGroupManagerResource(instanceTemplateKey *meta.Key) (*compute.InstanceGroupManager, error) + + // InstanceGroupManagerResourceName returns the instanceGroupManager selfLink + InstanceGroupManagerResourceName() (*meta.Key, error) +} + +// Service implements managed instance groups reconciler. +type Service struct { + scope Scope + instanceGroupManagers instanceGroupManagersClient + instanceGroups k8scloud.InstanceGroups +} + +// var _ cloud.Reconciler = &Service{} + +// New returns Service from given scope. +func New(scope Scope) *Service { + cloudScope := scope.Cloud() + + return &Service{ + scope: scope, + instanceGroupManagers: cloudScope.InstanceGroupManagers(), + instanceGroups: cloudScope.InstanceGroups(), + } +} diff --git a/cloud/services/compute/instances/reconcile.go b/cloud/services/compute/instances/reconcile.go index 76277d072..1bedda66a 100644 --- a/cloud/services/compute/instances/reconcile.go +++ b/cloud/services/compute/instances/reconcile.go @@ -128,7 +128,7 @@ func (s *Service) Delete(ctx context.Context) error { func (s *Service) createOrGetInstance(ctx context.Context) (*compute.Instance, error) { log := log.FromContext(ctx) log.V(2).Info("Getting bootstrap data for machine") - bootstrapData, err := s.scope.GetBootstrapData() + bootstrapData, err := s.scope.GetBootstrapData(ctx) if err != nil { log.Error(err, "Error getting bootstrap data for machine") return nil, errors.Wrap(err, "failed to retrieve bootstrap data") diff --git a/cloud/services/compute/instances/service.go b/cloud/services/compute/instances/service.go index c31b8306a..e826275f3 100644 --- a/cloud/services/compute/instances/service.go +++ b/cloud/services/compute/instances/service.go @@ -19,11 +19,10 @@ package instances import ( "context" - "github.com/go-logr/logr" - k8scloud "github.com/GoogleCloudPlatform/k8s-cloud-provider/pkg/cloud" "github.com/GoogleCloudPlatform/k8s-cloud-provider/pkg/cloud/filter" "github.com/GoogleCloudPlatform/k8s-cloud-provider/pkg/cloud/meta" + "github.com/go-logr/logr" "google.golang.org/api/compute/v1" "sigs.k8s.io/cluster-api-provider-gcp/cloud" diff --git a/cloud/services/compute/instancetemplates/instancetemplate_reconcile.go b/cloud/services/compute/instancetemplates/instancetemplate_reconcile.go new file mode 100644 index 000000000..b8c8a6c94 --- /dev/null +++ b/cloud/services/compute/instancetemplates/instancetemplate_reconcile.go @@ -0,0 +1,151 @@ +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package instancetemplates + +import ( + "context" + "crypto/sha256" + "encoding/hex" + "encoding/json" + "errors" + "fmt" + + "github.com/GoogleCloudPlatform/k8s-cloud-provider/pkg/cloud/filter" + "github.com/GoogleCloudPlatform/k8s-cloud-provider/pkg/cloud/meta" + "google.golang.org/api/compute/v1" + + "sigs.k8s.io/cluster-api-provider-gcp/cloud/gcperrors" + "sigs.k8s.io/cluster-api-provider-gcp/pkg/gcp" + "sigs.k8s.io/controller-runtime/pkg/log" +) + +// Reconcile reconcile machine instance. +func (s *Service) Reconcile(ctx context.Context) (*meta.Key, error) { + log := log.FromContext(ctx) + log.Info("Reconciling instanceTemplate resources") + instanceTemplate, instanceTemplateKey, err := s.createOrGetInstanceTemplate(ctx) + if err != nil { + return nil, err + } + log.V(2).Info("binding to instanceTemplate", "selfLink", instanceTemplate.SelfLink) + + return instanceTemplateKey, nil +} + +// Delete delete machine instance. +func (s *Service) Delete(ctx context.Context) error { + log := log.FromContext(ctx) + + baseKey, err := s.scope.BaseInstanceTemplateResourceName() + if err != nil { + return err + } + + selfLink := gcp.FormatKey("instanceTemplates", baseKey) + log = log.WithValues("instanceTemplatesPrefix", selfLink) + + log.Info("Deleting instanceTemplate resources") + + log.V(2).Info("Looking for instanceTemplates for deletion") + // FUTURE: Create filter + var predicate *filter.F + instanceTemplates, err := s.instanceTemplates.List(ctx, predicate) + if err != nil { + log.Error(err, "looking for instanceTemplates for deletion") + return err + } + + var errs []error + for _, instanceTemplate := range instanceTemplates { + log.V(2).Info("found instanceTemplate; will delete", "selfLink", instanceTemplate.SelfLink) + + // FUTURE: Verify cluster name through metadata + + instanceName := instanceTemplate.Name + log.V(2).Info("Deleting instanceTemplate", "instanceType", instanceTemplate.SelfLink) + key := meta.GlobalKey(instanceName) + if err := s.instanceTemplates.Delete(ctx, key); err != nil { + if gcperrors.IsNotFound(err) { + log.V(2).Info("instanceTemplate not found for deletion", "instanceTemplate", instanceTemplate.SelfLink) + } else { + errs = append(errs, err) + } + } + } + + if len(errs) == 0 { + return nil + } + + joined := errors.Join(errs...) + log.Error(joined, "failed to delete instanceTemplates") + return joined +} + +func (s *Service) createOrGetInstanceTemplate(ctx context.Context) (*compute.InstanceTemplate, *meta.Key, error) { + log := log.FromContext(ctx) + + baseKey, err := s.scope.BaseInstanceTemplateResourceName() + if err != nil { + return nil, nil, err + } + + desired, err := s.scope.InstanceTemplateResource(ctx) + if err != nil { + return nil, nil, err + } + + desiredJSON, err := json.Marshal(desired) + if err != nil { + return nil, nil, fmt.Errorf("marshalling instance template to json: %w", err) + } + encoded := append([]byte(baseKey.Name), desiredJSON...) + hash := sha256.Sum256(encoded) + hashHex := hex.EncodeToString(hash[:]) + + namePrefix := baseKey.Name + suffix := hashHex[:16] + name := namePrefix + suffix + + // FUTURE: Support regional templates? + instanceTemplateKey := meta.GlobalKey(name) + + selfLink := gcp.FormatKey("instanceTemplates", baseKey) + log = log.WithValues("instanceTemplate", selfLink) + + log.V(2).Info("Looking for instanceTemplate") + instanceTemplate, err := s.instanceTemplates.Get(ctx, instanceTemplateKey) + if err != nil { + if !gcperrors.IsNotFound(err) { + log.Error(err, "Error looking for instanceTemplate") + return nil, nil, err + } + + log.V(2).Info("Creating instanceTemplate") + if err := s.instanceTemplates.Insert(ctx, instanceTemplateKey, desired); err != nil { + log.Error(err, "creating instanceTemplate") + return nil, nil, err + } + + instanceTemplate, err = s.instanceTemplates.Get(ctx, instanceTemplateKey) + if err != nil { + return nil, nil, err + } + } + + return instanceTemplate, instanceTemplateKey, nil +} diff --git a/cloud/services/compute/instancetemplates/service.go b/cloud/services/compute/instancetemplates/service.go new file mode 100644 index 000000000..0a1828fdb --- /dev/null +++ b/cloud/services/compute/instancetemplates/service.go @@ -0,0 +1,64 @@ +/* +Copyright 2022 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package instancetemplates implements reconciliation for instanceTemplate GCP resources. +package instancetemplates + +import ( + "context" + + k8scloud "github.com/GoogleCloudPlatform/k8s-cloud-provider/pkg/cloud" + "github.com/GoogleCloudPlatform/k8s-cloud-provider/pkg/cloud/filter" + "github.com/GoogleCloudPlatform/k8s-cloud-provider/pkg/cloud/meta" + compute "google.golang.org/api/compute/v1" + "sigs.k8s.io/cluster-api-provider-gcp/cloud" +) + +type instancetemplatesInterface interface { + Get(ctx context.Context, key *meta.Key, options ...k8scloud.Option) (*compute.InstanceTemplate, error) + List(ctx context.Context, fl *filter.F, options ...k8scloud.Option) ([]*compute.InstanceTemplate, error) + Insert(ctx context.Context, key *meta.Key, obj *compute.InstanceTemplate, options ...k8scloud.Option) error + Delete(ctx context.Context, key *meta.Key, options ...k8scloud.Option) error +} + +// Scope is an interfaces that hold used methods. +type Scope interface { + Cloud() cloud.Cloud + + // InstanceTemplateResource returns the desired instanceTemplate + InstanceTemplateResource(ctx context.Context) (*compute.InstanceTemplate, error) + + // BaseInstanceTemplateResourceName returns the base instanceTemplate selfLink + BaseInstanceTemplateResourceName() (*meta.Key, error) +} + +// Service implements managed instance groups reconciler. +type Service struct { + scope Scope + instanceTemplates instancetemplatesInterface +} + +// var _ cloud.Reconciler = &Service{} + +// New returns Service from given scope. +func New(scope Scope) *Service { + cloudScope := scope.Cloud() + + return &Service{ + scope: scope, + instanceTemplates: cloudScope.InstanceTemplates(), + } +} diff --git a/config/crd/bases/infrastructure.cluster.x-k8s.io_gcpmachinepools.yaml b/config/crd/bases/infrastructure.cluster.x-k8s.io_gcpmachinepools.yaml new file mode 100644 index 000000000..b1c84b06b --- /dev/null +++ b/config/crd/bases/infrastructure.cluster.x-k8s.io_gcpmachinepools.yaml @@ -0,0 +1,572 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.17.3 + name: gcpmachinepools.infrastructure.cluster.x-k8s.io +spec: + group: infrastructure.cluster.x-k8s.io + names: + categories: + - cluster-api + kind: GCPMachinePool + listKind: GCPMachinePoolList + plural: gcpmachinepools + shortNames: + - gcpmp + singular: gcpmachinepool + scope: Namespaced + versions: + - additionalPrinterColumns: + - description: MachinePool ready status + jsonPath: .status.ready + name: Ready + type: string + - description: Number of replicas + jsonPath: .status.replicas + name: Replicas + type: integer + name: v1beta1 + schema: + openAPIV3Schema: + description: GCPMachinePool is the Schema for the gcpmachinepools API. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: GCPMachinePoolSpec defines the desired state of GCPMachinePool. + properties: + additionalDisks: + description: AdditionalDisks are optional non-boot attached disks. + items: + description: AttachedDiskSpec degined GCP machine disk. + properties: + deviceType: + description: |- + DeviceType is a device type of the attached disk. + Supported types of non-root attached volumes: + 1. "pd-standard" - Standard (HDD) persistent disk + 2. "pd-ssd" - SSD persistent disk + 3. "local-ssd" - Local SSD disk (https://cloud.google.com/compute/docs/disks/local-ssd). + 4. "pd-balanced" - Balanced Persistent Disk + 5. "hyperdisk-balanced" - Hyperdisk Balanced + Default is "pd-standard". + type: string + encryptionKey: + description: EncryptionKey defines the KMS key to be used to + encrypt the disk. + properties: + keyType: + description: |- + KeyType is the type of encryption key. Must be either Managed, aka Customer-Managed Encryption Key (CMEK) or + Supplied, aka Customer-Supplied EncryptionKey (CSEK). + enum: + - Managed + - Supplied + type: string + kmsKeyServiceAccount: + description: |- + KMSKeyServiceAccount is the service account being used for the encryption request for the given KMS key. + If absent, the Compute Engine default service account is used. For example: + "kmsKeyServiceAccount": "name@project_id.iam.gserviceaccount.com. + The maximum length is based on the Service Account ID (max 30), Project (max 30), and a valid gcloud email + suffix ("iam.gserviceaccount.com"). + maxLength: 85 + pattern: '[-_[A-Za-z0-9]+@[-_[A-Za-z0-9]+.iam.gserviceaccount.com' + type: string + managedKey: + description: ManagedKey references keys managed by the Cloud + Key Management Service. This should be set when KeyType + is Managed. + properties: + kmsKeyName: + description: |- + KMSKeyName is the name of the encryption key that is stored in Google Cloud KMS. For example: + "kmsKeyName": "projects/kms_project_id/locations/region/keyRings/key_region/cryptoKeys/key + maxLength: 160 + pattern: projects\/[-_[A-Za-z0-9]+\/locations\/[-_[A-Za-z0-9]+\/keyRings\/[-_[A-Za-z0-9]+\/cryptoKeys\/[-_[A-Za-z0-9]+ + type: string + required: + - kmsKeyName + type: object + suppliedKey: + description: SuppliedKey provides the key used to create + or manage a disk. This should be set when KeyType is Managed. + maxProperties: 1 + minProperties: 1 + properties: + rawKey: + description: |- + RawKey specifies a 256-bit customer-supplied encryption key, encoded in RFC 4648 + base64 to either encrypt or decrypt this resource. You can provide either the rawKey or the rsaEncryptedKey. + For example: "rawKey": "SGVsbG8gZnJvbSBHb29nbGUgQ2xvdWQgUGxhdGZvcm0=" + format: byte + type: string + rsaEncryptedKey: + description: |- + RSAEncryptedKey specifies an RFC 4648 base64 encoded, RSA-wrapped 2048-bit customer-supplied encryption + key to either encrypt or decrypt this resource. You can provide either the rawKey or the + rsaEncryptedKey. + For example: "rsaEncryptedKey": "ieCx/NcW06PcT7Ep1X6LUTc/hLvUDYyzSZPPVCVPTVEohpeHASqC8uw5TzyO9U+Fka9JFHi + z0mBibXUInrC/jEk014kCK/NPjYgEMOyssZ4ZINPKxlUh2zn1bV+MCaTICrdmuSBTWlUUiFoDi + D6PYznLwh8ZNdaheCeZ8ewEXgFQ8V+sDroLaN3Xs3MDTXQEMMoNUXMCZEIpg9Vtp9x2oe==" + The key must meet the following requirements before you can provide it to Compute Engine: + 1. The key is wrapped using a RSA public key certificate provided by Google. + 2. After being wrapped, the key must be encoded in RFC 4648 base64 encoding. + Gets the RSA public key certificate provided by Google at: https://cloud-certs.storage.googleapis.com/google-cloud-csek-ingress.pem + format: byte + type: string + type: object + required: + - keyType + type: object + size: + description: |- + Size is the size of the disk in GBs. + Defaults to 30GB. For "local-ssd" size is always 375GB. + format: int64 + type: integer + type: object + type: array + additionalLabels: + additionalProperties: + type: string + description: |- + AdditionalLabels is an optional set of tags to add to an instance, in addition to the ones added by default by the + GCP provider. If both the GCPCluster and the GCPMachinePool specify the same tag name with different values, the + GCPMachinePool's value takes precedence. + type: object + additionalMetadata: + description: |- + AdditionalMetadata is an optional set of metadata to add to an instance, in addition to the ones added by default by the + GCP provider. + items: + description: MetadataItem defines a single piece of metadata associated + with an instance. + properties: + key: + description: Key is the identifier for the metadata entry. + type: string + value: + description: Value is the value of the metadata entry. + type: string + required: + - key + type: object + type: array + x-kubernetes-list-map-keys: + - key + x-kubernetes-list-type: map + additionalNetworkTags: + description: |- + AdditionalNetworkTags is a list of network tags that should be applied to the + instance. These tags are set in addition to any network tags defined + at the cluster level or in the actuator. + items: + type: string + type: array + confidentialCompute: + description: |- + ConfidentialCompute Defines whether the instance should have confidential compute enabled or not, and the confidential computing technology of choice. + If Disabled, the machine will not be configured to be a confidential computing instance. + If Enabled, confidential computing will be configured and AMD Secure Encrypted Virtualization will be configured by default. That is subject to change over time. If using AMD Secure Encrypted Virtualization is vital, use AMDEncryptedVirtualization explicitly instead. + If AMDEncryptedVirtualization, it will configure AMD Secure Encrypted Virtualization (AMD SEV) as the confidential computing technology. + If AMDEncryptedVirtualizationNestedPaging, it will configure AMD Secure Encrypted Virtualization Secure Nested Paging (AMD SEV-SNP) as the confidential computing technology. + If IntelTrustedDomainExtensions, it will configure Intel TDX as the confidential computing technology. + If enabled (any value other than Disabled) OnHostMaintenance is required to be set to "Terminate". + If omitted, the platform chooses a default, which is subject to change over time, currently that default is false. + enum: + - Enabled + - Disabled + - AMDEncryptedVirtualization + - AMDEncryptedVirtualizationNestedPaging + - IntelTrustedDomainExtensions + type: string + guestAccelerators: + description: |- + GuestAccelerators is a list of the type and count of accelerator cards + attached to the instance. + items: + description: |- + Accelerator is a specification of the type and number of accelerator + cards attached to the instance. + properties: + count: + description: |- + Count is the number of the guest accelerator cards exposed to this + instance. + format: int64 + type: integer + type: + description: |- + Type is the full or partial URL of the accelerator type resource to + attach to this instance. For example: + projects/my-project/zones/us-central1-c/acceleratorTypes/nvidia-tesla-p100 + If you are creating an instance template, specify only the accelerator name. + See GPUs on Compute Engine for a full list of accelerator types. + type: string + type: object + type: array + image: + description: |- + Image is the full reference to a valid image to be used for this machine. + Takes precedence over ImageFamily. + type: string + imageFamily: + description: ImageFamily is the full reference to a valid image family + to be used for this machine. + type: string + instanceType: + description: 'InstanceType is the type of instance to create. Example: + n1.standard-2' + type: string + ipForwarding: + default: Enabled + description: |- + IPForwarding Allows this instance to send and receive packets with non-matching destination or source IPs. + This is required if you plan to use this instance to forward routes. Defaults to enabled. + enum: + - Enabled + - Disabled + type: string + onHostMaintenance: + description: |- + OnHostMaintenance determines the behavior when a maintenance event occurs that might cause the instance to reboot. + If omitted, the platform chooses a default, which is subject to change over time, currently that default is "Migrate". + enum: + - Migrate + - Terminate + type: string + preemptible: + description: Preemptible defines if instance is preemptible + type: boolean + providerIDList: + description: |- + ProviderIDList are the identification IDs of machine instances provided by the provider. + This field must match the provider IDs as seen on the node objects corresponding to a machine pool's machine instances. + items: + type: string + type: array + provisioningModel: + description: |- + ProvisioningModel defines if instance is spot. + If set to "Standard" while preemptible is true, then the VM will be of type "Preemptible". + If "Spot", VM type is "Spot". When unspecified, defaults to "Standard". + enum: + - Standard + - Spot + type: string + publicIP: + description: |- + PublicIP specifies whether the instance should get a public IP. + Set this to true if you don't have a NAT instances or Cloud Nat setup. + type: boolean + resourceManagerTags: + description: |- + ResourceManagerTags is an optional set of tags to apply to GCP resources managed + by the GCP provider. GCP supports a maximum of 50 tags per resource. + items: + description: ResourceManagerTag is a tag to apply to GCP resources + managed by the GCP provider. + properties: + key: + description: |- + Key is the key part of the tag. A tag key can have a maximum of 63 characters and cannot + be empty. Tag key must begin and end with an alphanumeric character, and must contain + only uppercase, lowercase alphanumeric characters, and the following special + characters `._-`. + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z0-9]([0-9A-Za-z_.-]{0,61}[a-zA-Z0-9])?$ + type: string + parentID: + description: |- + ParentID is the ID of the hierarchical resource where the tags are defined + e.g. at the Organization or the Project level. To find the Organization or Project ID ref + https://cloud.google.com/resource-manager/docs/creating-managing-organization#retrieving_your_organization_id + https://cloud.google.com/resource-manager/docs/creating-managing-projects#identifying_projects + An OrganizationID must consist of decimal numbers, and cannot have leading zeroes. + A ProjectID must be 6 to 30 characters in length, can only contain lowercase letters, + numbers, and hyphens, and must start with a letter, and cannot end with a hyphen. + maxLength: 32 + minLength: 1 + pattern: (^[1-9][0-9]{0,31}$)|(^[a-z][a-z0-9-]{4,28}[a-z0-9]$) + type: string + value: + description: |- + Value is the value part of the tag. A tag value can have a maximum of 63 characters and + cannot be empty. Tag value must begin and end with an alphanumeric character, and must + contain only uppercase, lowercase alphanumeric characters, and the following special + characters `_-.@%=+:,*#&(){}[]` and spaces. + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z0-9]([0-9A-Za-z_.@%=+:,*#&()\[\]{}\-\s]{0,61}[a-zA-Z0-9])?$ + type: string + required: + - key + - parentID + - value + type: object + type: array + rootDeviceSize: + description: |- + RootDeviceSize is the size of the root volume in GB. + Defaults to 30. + format: int64 + type: integer + rootDeviceType: + description: |- + RootDeviceType is the type of the root volume. + Supported types of root volumes: + 1. "pd-standard" - Standard (HDD) persistent disk + 2. "pd-ssd" - SSD persistent disk + 3. "pd-balanced" - Balanced Persistent Disk + 4. "hyperdisk-balanced" - Hyperdisk Balanced + Default is "pd-standard". + type: string + rootDiskEncryptionKey: + description: RootDiskEncryptionKey defines the KMS key to be used + to encrypt the root disk. + properties: + keyType: + description: |- + KeyType is the type of encryption key. Must be either Managed, aka Customer-Managed Encryption Key (CMEK) or + Supplied, aka Customer-Supplied EncryptionKey (CSEK). + enum: + - Managed + - Supplied + type: string + kmsKeyServiceAccount: + description: |- + KMSKeyServiceAccount is the service account being used for the encryption request for the given KMS key. + If absent, the Compute Engine default service account is used. For example: + "kmsKeyServiceAccount": "name@project_id.iam.gserviceaccount.com. + The maximum length is based on the Service Account ID (max 30), Project (max 30), and a valid gcloud email + suffix ("iam.gserviceaccount.com"). + maxLength: 85 + pattern: '[-_[A-Za-z0-9]+@[-_[A-Za-z0-9]+.iam.gserviceaccount.com' + type: string + managedKey: + description: ManagedKey references keys managed by the Cloud Key + Management Service. This should be set when KeyType is Managed. + properties: + kmsKeyName: + description: |- + KMSKeyName is the name of the encryption key that is stored in Google Cloud KMS. For example: + "kmsKeyName": "projects/kms_project_id/locations/region/keyRings/key_region/cryptoKeys/key + maxLength: 160 + pattern: projects\/[-_[A-Za-z0-9]+\/locations\/[-_[A-Za-z0-9]+\/keyRings\/[-_[A-Za-z0-9]+\/cryptoKeys\/[-_[A-Za-z0-9]+ + type: string + required: + - kmsKeyName + type: object + suppliedKey: + description: SuppliedKey provides the key used to create or manage + a disk. This should be set when KeyType is Managed. + maxProperties: 1 + minProperties: 1 + properties: + rawKey: + description: |- + RawKey specifies a 256-bit customer-supplied encryption key, encoded in RFC 4648 + base64 to either encrypt or decrypt this resource. You can provide either the rawKey or the rsaEncryptedKey. + For example: "rawKey": "SGVsbG8gZnJvbSBHb29nbGUgQ2xvdWQgUGxhdGZvcm0=" + format: byte + type: string + rsaEncryptedKey: + description: |- + RSAEncryptedKey specifies an RFC 4648 base64 encoded, RSA-wrapped 2048-bit customer-supplied encryption + key to either encrypt or decrypt this resource. You can provide either the rawKey or the + rsaEncryptedKey. + For example: "rsaEncryptedKey": "ieCx/NcW06PcT7Ep1X6LUTc/hLvUDYyzSZPPVCVPTVEohpeHASqC8uw5TzyO9U+Fka9JFHi + z0mBibXUInrC/jEk014kCK/NPjYgEMOyssZ4ZINPKxlUh2zn1bV+MCaTICrdmuSBTWlUUiFoDi + D6PYznLwh8ZNdaheCeZ8ewEXgFQ8V+sDroLaN3Xs3MDTXQEMMoNUXMCZEIpg9Vtp9x2oe==" + The key must meet the following requirements before you can provide it to Compute Engine: + 1. The key is wrapped using a RSA public key certificate provided by Google. + 2. After being wrapped, the key must be encoded in RFC 4648 base64 encoding. + Gets the RSA public key certificate provided by Google at: https://cloud-certs.storage.googleapis.com/google-cloud-csek-ingress.pem + format: byte + type: string + type: object + required: + - keyType + type: object + serviceAccounts: + description: |- + ServiceAccount specifies the service account email and which scopes to assign to the machine. + Defaults to: email: "default", scope: []{compute.CloudPlatformScope} + properties: + email: + description: 'Email: Email address of the service account.' + type: string + scopes: + description: |- + Scopes: The list of scopes to be made available for this service + account. + items: + type: string + type: array + type: object + shieldedInstanceConfig: + description: ShieldedInstanceConfig is the Shielded VM configuration + for this machine + properties: + integrityMonitoring: + description: |- + IntegrityMonitoring determines whether the instance should have integrity monitoring that verify the runtime boot integrity. + Compares the most recent boot measurements to the integrity policy baseline and return + a pair of pass/fail results depending on whether they match or not. + If omitted, the platform chooses a default, which is subject to change over time, currently that default is Enabled. + enum: + - Enabled + - Disabled + type: string + secureBoot: + description: |- + SecureBoot Defines whether the instance should have secure boot enabled. + Secure Boot verify the digital signature of all boot components, and halting the boot process if signature verification fails. + If omitted, the platform chooses a default, which is subject to change over time, currently that default is Disabled. + enum: + - Enabled + - Disabled + type: string + virtualizedTrustedPlatformModule: + description: |- + VirtualizedTrustedPlatformModule enable virtualized trusted platform module measurements to create a known good boot integrity policy baseline. + The integrity policy baseline is used for comparison with measurements from subsequent VM boots to determine if anything has changed. + If omitted, the platform chooses a default, which is subject to change over time, currently that default is Enabled. + enum: + - Enabled + - Disabled + type: string + type: object + subnet: + description: |- + Subnet is a reference to the subnetwork to use for this instance. If not specified, + the first subnetwork retrieved from the Cluster Region and Network is picked. + type: string + required: + - instanceType + type: object + status: + description: GCPMachinePoolStatus defines the observed state of GCPMachinePool. + properties: + conditions: + description: Conditions defines current service state of the GCPMachinePool. + items: + description: Condition defines an observation of a Cluster API resource + operational state. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when + the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This field may be empty. + maxLength: 10240 + minLength: 1 + type: string + reason: + description: |- + reason is the reason for the condition's last transition in CamelCase. + The specific API may choose whether or not this field is considered a guaranteed API. + This field may be empty. + maxLength: 256 + minLength: 1 + type: string + severity: + description: |- + severity provides an explicit classification of Reason code, so the users or machines can immediately + understand the current situation and act accordingly. + The Severity field MUST be set only when Status=False. + maxLength: 32 + type: string + status: + description: status of the condition, one of True, False, Unknown. + type: string + type: + description: |- + type of condition in CamelCase or in foo.example.com/CamelCase. + Many .condition.type values are consistent across resources like Available, but because arbitrary conditions + can be useful (see .node.status.conditions), the ability to deconflict is important. + maxLength: 256 + minLength: 1 + type: string + required: + - lastTransitionTime + - status + - type + type: object + type: array + failureMessage: + description: |- + FailureMessage will be set in the event that there is a terminal problem + reconciling the MachinePool and will contain a more verbose string suitable + for logging and human consumption. + + This field should not be set for transitive errors that a controller + faces that are expected to be fixed automatically over + time (like service outages), but instead indicate that something is + fundamentally wrong with the MachinePool's spec or the configuration of + the controller, and that manual intervention is required. Examples + of terminal errors would be invalid combinations of settings in the + spec, values that are unsupported by the controller, or the + responsible controller itself being critically misconfigured. + + Any transient errors that occur during the reconciliation of MachinePools + can be added as events to the MachinePool object and/or logged in the + controller's output. + type: string + failureReason: + description: |- + FailureReason will be set in the event that there is a terminal problem + reconciling the MachinePool and will contain a succinct value suitable + for machine interpretation. + + This field should not be set for transitive errors that a controller + faces that are expected to be fixed automatically over + time (like service outages), but instead indicate that something is + fundamentally wrong with the MachinePool's spec or the configuration of + the controller, and that manual intervention is required. Examples + of terminal errors would be invalid combinations of settings in the + spec, values that are unsupported by the controller, or the + responsible controller itself being critically misconfigured. + + Any transient errors that occur during the reconciliation of MachinePools + can be added as events to the MachinePool object and/or logged in the + controller's output. + type: string + ready: + description: Ready is true when the provider resource is ready. + type: boolean + replicas: + description: Replicas is the most recently observed number of replicas + format: int32 + type: integer + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/config/crd/kustomization.yaml b/config/crd/kustomization.yaml index f28da985c..6787d71d8 100644 --- a/config/crd/kustomization.yaml +++ b/config/crd/kustomization.yaml @@ -7,6 +7,7 @@ commonLabels: resources: - bases/infrastructure.cluster.x-k8s.io_gcpmachines.yaml - bases/infrastructure.cluster.x-k8s.io_gcpclusters.yaml +- bases/infrastructure.cluster.x-k8s.io_gcpmachinepools.yaml - bases/infrastructure.cluster.x-k8s.io_gcpmachinetemplates.yaml - bases/infrastructure.cluster.x-k8s.io_gcpclustertemplates.yaml - bases/infrastructure.cluster.x-k8s.io_gcpmanagedclusters.yaml diff --git a/config/manager/manager.yaml b/config/manager/manager.yaml index 3fb502614..31d9055ca 100644 --- a/config/manager/manager.yaml +++ b/config/manager/manager.yaml @@ -21,7 +21,7 @@ spec: containers: - args: - --leader-elect - - --feature-gates=GKE=${EXP_CAPG_GKE:=false} + - --feature-gates=GKE=${EXP_CAPG_GKE:=false},MachinePool=${EXP_MACHINE_POOL:=false} - "--diagnostics-address=${CAPG_DIAGNOSTICS_ADDRESS:=:8443}" - "--insecure-diagnostics=${CAPG_INSECURE_DIAGNOSTICS:=false}" - "--v=${CAPG_LOGLEVEL:=0}" diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index 8f3ff3044..46fa95777 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -62,6 +62,8 @@ rules: resources: - clusters - clusters/status + - machinepools + - machinepools/status verbs: - get - list @@ -70,8 +72,6 @@ rules: - apiGroups: - cluster.x-k8s.io resources: - - machinepools - - machinepools/status - machines - machines/status verbs: @@ -98,6 +98,7 @@ rules: - infrastructure.cluster.x-k8s.io resources: - gcpclusters/status + - gcpmachinepools/status - gcpmachines/status - gcpmanagedclusters/status - gcpmanagedcontrolplanes/status @@ -106,6 +107,17 @@ rules: - get - patch - update +- apiGroups: + - infrastructure.cluster.x-k8s.io + resources: + - gcpmachinepools + verbs: + - delete + - get + - list + - patch + - update + - watch - apiGroups: - infrastructure.cluster.x-k8s.io resources: diff --git a/config/webhook/manifests.yaml b/config/webhook/manifests.yaml index 038406b07..e3a6fc66e 100644 --- a/config/webhook/manifests.yaml +++ b/config/webhook/manifests.yaml @@ -279,6 +279,25 @@ webhooks: resources: - gcpmachinetemplates sideEffects: None +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: webhook-service + namespace: system + path: /validate-infrastructure-cluster-x-k8s-io-v1beta1-gcpmachinepool + failurePolicy: Fail + name: validation.gcpmachinepool.infrastructure.cluster.x-k8s.io + rules: + - apiGroups: + - infrastructure.cluster.x-k8s.io + apiVersions: + - v1beta1 + operations: + - UPDATE + resources: + - gcpmachinepools + sideEffects: None - admissionReviewVersions: - v1 clientConfig: diff --git a/exp/api/v1beta1/conditions_consts.go b/exp/api/v1beta1/conditions_consts.go index b651dde77..9baf0c129 100644 --- a/exp/api/v1beta1/conditions_consts.go +++ b/exp/api/v1beta1/conditions_consts.go @@ -70,4 +70,22 @@ const ( GKEMachinePoolErrorReason = "GKEMachinePoolError" // GKEMachinePoolReconciliationFailedReason used to report failures while reconciling GKE node pool. GKEMachinePoolReconciliationFailedReason = "GKEMachinePoolReconciliationFailed" + + // MIGReadyCondition reports on current status of the managed instance group. Ready indicates the group is provisioned. + MIGReadyCondition clusterv1.ConditionType = "ManagedInstanceGroupReady" + // MIGNotFoundReason used when the managed instance group couldn't be retrieved. + MIGNotFoundReason = "ManagedInstanceGroupNotFound" + // MIGProvisionFailedReason used for failures during managed instance group provisioning. + MIGProvisionFailedReason = "ManagedInstanceGroupProvisionFailed" + // MIGDeletionInProgress MIG is in a deletion in progress state. + MIGDeletionInProgress = "ManagedInstanceGroupDeletionInProgress" + + // InstanceTemplateReadyCondition represents the status of an AWSMachinePool's associated Launch Template. + InstanceTemplateReadyCondition clusterv1.ConditionType = "InstanceTemplateReady" + // InstanceTemplateNotFoundReason is used when an associated Launch Template can't be found. + InstanceTemplateNotFoundReason = "InstanceTemplateNotFound" + // InstanceTemplateCreateFailedReason used for failures during Launch Template creation. + InstanceTemplateCreateFailedReason = "InstanceTemplateCreateFailed" + // InstanceTemplateReconcileFailedReason used for failures during Launch Template reconciliation. + InstanceTemplateReconcileFailedReason = "InstanceTemplateReconcileFailed" ) diff --git a/exp/api/v1beta1/gcpmachinepool_types.go b/exp/api/v1beta1/gcpmachinepool_types.go new file mode 100644 index 000000000..fc98da761 --- /dev/null +++ b/exp/api/v1beta1/gcpmachinepool_types.go @@ -0,0 +1,270 @@ +/* +Copyright 2021 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1beta1 + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime/schema" + + capg "sigs.k8s.io/cluster-api-provider-gcp/api/v1beta1" + clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta1" +) + +// Constants block. +const ( + // LaunchTemplateLatestVersion defines the launching of the latest version of the template. + LaunchTemplateLatestVersion = "$Latest" +) + +// GCPMachinePoolSpec defines the desired state of GCPMachinePool. +type GCPMachinePoolSpec struct { + // ProviderIDList are the identification IDs of machine instances provided by the provider. + // This field must match the provider IDs as seen on the node objects corresponding to a machine pool's machine instances. + // +optional + ProviderIDList []string `json:"providerIDList,omitempty"` + + // InstanceType is the type of instance to create. Example: n1.standard-2 + InstanceType string `json:"instanceType"` + + // Subnet is a reference to the subnetwork to use for this instance. If not specified, + // the first subnetwork retrieved from the Cluster Region and Network is picked. + // +optional + Subnet *string `json:"subnet,omitempty"` + + // // ProviderID is the unique identifier as specified by the cloud provider. + // // +optional + // ProviderID *string `json:"providerID,omitempty"` + + // ImageFamily is the full reference to a valid image family to be used for this machine. + // +optional + ImageFamily *string `json:"imageFamily,omitempty"` + + // Image is the full reference to a valid image to be used for this machine. + // Takes precedence over ImageFamily. + // +optional + Image *string `json:"image,omitempty"` + + // AdditionalLabels is an optional set of tags to add to an instance, in addition to the ones added by default by the + // GCP provider. If both the GCPCluster and the GCPMachinePool specify the same tag name with different values, the + // GCPMachinePool's value takes precedence. + // +optional + AdditionalLabels capg.Labels `json:"additionalLabels,omitempty"` + + // AdditionalMetadata is an optional set of metadata to add to an instance, in addition to the ones added by default by the + // GCP provider. + // +listType=map + // +listMapKey=key + // +optional + AdditionalMetadata []capg.MetadataItem `json:"additionalMetadata,omitempty"` + + // // IAMInstanceProfile is a name of an IAM instance profile to assign to the instance + // // +optional + // // IAMInstanceProfile string `json:"iamInstanceProfile,omitempty"` + + // PublicIP specifies whether the instance should get a public IP. + // Set this to true if you don't have a NAT instances or Cloud Nat setup. + // +optional + PublicIP *bool `json:"publicIP,omitempty"` + + // AdditionalNetworkTags is a list of network tags that should be applied to the + // instance. These tags are set in addition to any network tags defined + // at the cluster level or in the actuator. + // +optional + AdditionalNetworkTags []string `json:"additionalNetworkTags,omitempty"` + + // ResourceManagerTags is an optional set of tags to apply to GCP resources managed + // by the GCP provider. GCP supports a maximum of 50 tags per resource. + // +maxItems=50 + // +optional + ResourceManagerTags capg.ResourceManagerTags `json:"resourceManagerTags,omitempty"` + + // RootDeviceSize is the size of the root volume in GB. + // Defaults to 30. + // +optional + RootDeviceSize int64 `json:"rootDeviceSize,omitempty"` + + // RootDeviceType is the type of the root volume. + // Supported types of root volumes: + // 1. "pd-standard" - Standard (HDD) persistent disk + // 2. "pd-ssd" - SSD persistent disk + // 3. "pd-balanced" - Balanced Persistent Disk + // 4. "hyperdisk-balanced" - Hyperdisk Balanced + // Default is "pd-standard". + // +optional + RootDeviceType *capg.DiskType `json:"rootDeviceType,omitempty"` + + // AdditionalDisks are optional non-boot attached disks. + // +optional + AdditionalDisks []capg.AttachedDiskSpec `json:"additionalDisks,omitempty"` + + // ServiceAccount specifies the service account email and which scopes to assign to the machine. + // Defaults to: email: "default", scope: []{compute.CloudPlatformScope} + // +optional + ServiceAccount *capg.ServiceAccount `json:"serviceAccounts,omitempty"` + + // Preemptible defines if instance is preemptible + // +optional + Preemptible bool `json:"preemptible,omitempty"` + + // ProvisioningModel defines if instance is spot. + // If set to "Standard" while preemptible is true, then the VM will be of type "Preemptible". + // If "Spot", VM type is "Spot". When unspecified, defaults to "Standard". + // +kubebuilder:validation:Enum=Standard;Spot + // +optional + ProvisioningModel *capg.ProvisioningModel `json:"provisioningModel,omitempty"` + + // IPForwarding Allows this instance to send and receive packets with non-matching destination or source IPs. + // This is required if you plan to use this instance to forward routes. Defaults to enabled. + // +kubebuilder:validation:Enum=Enabled;Disabled + // +kubebuilder:default=Enabled + // +optional + IPForwarding *capg.IPForwarding `json:"ipForwarding,omitempty"` + + // ShieldedInstanceConfig is the Shielded VM configuration for this machine + // +optional + ShieldedInstanceConfig *capg.GCPShieldedInstanceConfig `json:"shieldedInstanceConfig,omitempty"` + + // OnHostMaintenance determines the behavior when a maintenance event occurs that might cause the instance to reboot. + // If omitted, the platform chooses a default, which is subject to change over time, currently that default is "Migrate". + // +kubebuilder:validation:Enum=Migrate;Terminate; + // +optional + OnHostMaintenance *capg.HostMaintenancePolicy `json:"onHostMaintenance,omitempty"` + + // ConfidentialCompute Defines whether the instance should have confidential compute enabled or not, and the confidential computing technology of choice. + // If Disabled, the machine will not be configured to be a confidential computing instance. + // If Enabled, confidential computing will be configured and AMD Secure Encrypted Virtualization will be configured by default. That is subject to change over time. If using AMD Secure Encrypted Virtualization is vital, use AMDEncryptedVirtualization explicitly instead. + // If AMDEncryptedVirtualization, it will configure AMD Secure Encrypted Virtualization (AMD SEV) as the confidential computing technology. + // If AMDEncryptedVirtualizationNestedPaging, it will configure AMD Secure Encrypted Virtualization Secure Nested Paging (AMD SEV-SNP) as the confidential computing technology. + // If IntelTrustedDomainExtensions, it will configure Intel TDX as the confidential computing technology. + // If enabled (any value other than Disabled) OnHostMaintenance is required to be set to "Terminate". + // If omitted, the platform chooses a default, which is subject to change over time, currently that default is false. + // +kubebuilder:validation:Enum=Enabled;Disabled;AMDEncryptedVirtualization;AMDEncryptedVirtualizationNestedPaging;IntelTrustedDomainExtensions + // +optional + ConfidentialCompute *capg.ConfidentialComputePolicy `json:"confidentialCompute,omitempty"` + + // RootDiskEncryptionKey defines the KMS key to be used to encrypt the root disk. + // +optional + RootDiskEncryptionKey *capg.CustomerEncryptionKey `json:"rootDiskEncryptionKey,omitempty"` + + // GuestAccelerators is a list of the type and count of accelerator cards + // attached to the instance. + // +optional + GuestAccelerators []capg.Accelerator `json:"guestAccelerators,omitempty"` +} + +// GCPMachinePoolStatus defines the observed state of GCPMachinePool. +type GCPMachinePoolStatus struct { + // Ready is true when the provider resource is ready. + // +optional + Ready bool `json:"ready"` + + // Replicas is the most recently observed number of replicas + // +optional + Replicas int32 `json:"replicas"` + + // Conditions defines current service state of the GCPMachinePool. + // +optional + Conditions clusterv1.Conditions `json:"conditions,omitempty"` + + // FailureReason will be set in the event that there is a terminal problem + // reconciling the MachinePool and will contain a succinct value suitable + // for machine interpretation. + // + // This field should not be set for transitive errors that a controller + // faces that are expected to be fixed automatically over + // time (like service outages), but instead indicate that something is + // fundamentally wrong with the MachinePool's spec or the configuration of + // the controller, and that manual intervention is required. Examples + // of terminal errors would be invalid combinations of settings in the + // spec, values that are unsupported by the controller, or the + // responsible controller itself being critically misconfigured. + // + // Any transient errors that occur during the reconciliation of MachinePools + // can be added as events to the MachinePool object and/or logged in the + // controller's output. + // +optional + FailureReason *string `json:"failureReason,omitempty"` + + // FailureMessage will be set in the event that there is a terminal problem + // reconciling the MachinePool and will contain a more verbose string suitable + // for logging and human consumption. + // + // This field should not be set for transitive errors that a controller + // faces that are expected to be fixed automatically over + // time (like service outages), but instead indicate that something is + // fundamentally wrong with the MachinePool's spec or the configuration of + // the controller, and that manual intervention is required. Examples + // of terminal errors would be invalid combinations of settings in the + // spec, values that are unsupported by the controller, or the + // responsible controller itself being critically misconfigured. + // + // Any transient errors that occur during the reconciliation of MachinePools + // can be added as events to the MachinePool object and/or logged in the + // controller's output. + // +optional + FailureMessage *string `json:"failureMessage,omitempty"` +} + +// +kubebuilder:object:root=true +// +kubebuilder:storageversion +// +kubebuilder:subresource:status +// +kubebuilder:resource:path=gcpmachinepools,scope=Namespaced,categories=cluster-api,shortName=gcpmp +// +kubebuilder:printcolumn:name="Ready",type="string",JSONPath=".status.ready",description="MachinePool ready status" +// +kubebuilder:printcolumn:name="Replicas",type="integer",JSONPath=".status.replicas",description="Number of replicas" + +// GCPMachinePool is the Schema for the gcpmachinepools API. +type GCPMachinePool struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + Spec GCPMachinePoolSpec `json:"spec,omitempty"` + Status GCPMachinePoolStatus `json:"status,omitempty"` +} + +// +kubebuilder:object:root=true +// +kubebuilder:storageversion + +// GCPMachinePoolList contains a list of GCPMachinePool. +type GCPMachinePoolList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []GCPMachinePool `json:"items"` +} + +func init() { + SchemeBuilder.Register(&GCPMachinePool{}, &GCPMachinePoolList{}) +} + +// GetConditions returns the observations of the operational state of the GCPMachinePool resource. +func (r *GCPMachinePool) GetConditions() clusterv1.Conditions { + return r.Status.Conditions +} + +// SetConditions sets the underlying service state of the GCPMachinePool to the predescribed clusterv1.Conditions. +func (r *GCPMachinePool) SetConditions(conditions clusterv1.Conditions) { + r.Status.Conditions = conditions +} + +// GetObjectKind will return the ObjectKind of an GCPMachinePool. +func (r *GCPMachinePool) GetObjectKind() schema.ObjectKind { + return &r.TypeMeta +} + +// GetObjectKind will return the ObjectKind of an GCPMachinePoolList. +func (r *GCPMachinePoolList) GetObjectKind() schema.ObjectKind { + return &r.TypeMeta +} diff --git a/exp/api/v1beta1/gcpmachinepool_webhook.go b/exp/api/v1beta1/gcpmachinepool_webhook.go new file mode 100644 index 000000000..b3819ccdb --- /dev/null +++ b/exp/api/v1beta1/gcpmachinepool_webhook.go @@ -0,0 +1,83 @@ +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1beta1 + +import ( + "context" + "fmt" + + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/runtime" + ctrl "sigs.k8s.io/controller-runtime" + logf "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/webhook" + "sigs.k8s.io/controller-runtime/pkg/webhook/admission" +) + +// log is for logging in this package. +var gcpMachinePoolLog = logf.Log.WithName("gcpmachinepool-resource") + +// SetupWebhookWithManager sets up and registers the webhook with the manager. +func (r *GCPMachinePool) SetupWebhookWithManager(mgr ctrl.Manager) error { + validator := new(gcpMachinePoolWebhook) + return ctrl.NewWebhookManagedBy(mgr). + For(r). + WithValidator(validator). + Complete() +} + +type gcpMachinePoolWebhook struct{} + +//+kubebuilder:webhook:verbs=update,path=/validate-infrastructure-cluster-x-k8s-io-v1beta1-gcpmachinepool,mutating=false,failurePolicy=fail,sideEffects=None,groups=infrastructure.cluster.x-k8s.io,resources=gcpmachinepools,versions=v1beta1,name=validation.gcpmachinepool.infrastructure.cluster.x-k8s.io,admissionReviewVersions=v1 + +var _ webhook.CustomValidator = &gcpMachinePoolWebhook{} + +// ValidateCreate implements webhook.Validator so a webhook will be registered for the type. +func (*gcpMachinePoolWebhook) ValidateCreate(_ context.Context, obj runtime.Object) (admission.Warnings, error) { + r, ok := obj.(*GCPMachinePool) + if !ok { + return nil, apierrors.NewBadRequest(fmt.Sprintf("expected a GCPMachinePool but got a %T", obj)) + } + + gcpMachinePoolLog.Info("Validating GCPMachinePool create", "name", r.Name) + + return nil, nil +} + +// ValidateUpdate implements webhook.Validator so a webhook will be registered for the type. +func (*gcpMachinePoolWebhook) ValidateUpdate(_ context.Context, _, newObj runtime.Object) (admission.Warnings, error) { + r, ok := newObj.(*GCPMachinePool) + if !ok { + return nil, fmt.Errorf("expected a GCPMachinePool object but got %T", r) + } + + gcpMachinePoolLog.Info("Validating GCPMachinePool update", "name", r.Name) + + return nil, nil +} + +// ValidateDelete implements webhook.Validator so a webhook will be registered for the type. +func (*gcpMachinePoolWebhook) ValidateDelete(_ context.Context, obj runtime.Object) (admission.Warnings, error) { + r, ok := obj.(*GCPMachinePool) + if !ok { + return nil, apierrors.NewBadRequest(fmt.Sprintf("expected a GCPMachinePool but got a %T", obj)) + } + + gcpMachinePoolLog.Info("Validating GCPMachinePool delete", "name", r.Name) + + return nil, nil +} diff --git a/exp/api/v1beta1/gcpmanagedmachinepool_types.go b/exp/api/v1beta1/gcpmanagedmachinepool_types.go index e7d11ab73..3f66b1630 100644 --- a/exp/api/v1beta1/gcpmanagedmachinepool_types.go +++ b/exp/api/v1beta1/gcpmanagedmachinepool_types.go @@ -22,6 +22,9 @@ import ( ) const ( + // MachinePoolFinalizer is the finalizer for the machine pool. + MachinePoolFinalizer = "gcpmachinepool.infrastructure.cluster.x-k8s.io" + // ManagedMachinePoolFinalizer allows Reconcile to clean up GCP resources associated with the GCPManagedMachinePool before // removing it from the apiserver. ManagedMachinePoolFinalizer = "gcpmanagedmachinepool.infrastructure.cluster.x-k8s.io" @@ -34,6 +37,7 @@ const ( // +kubebuilder:validation:Enum=pd-standard;pd-ssd;pd-balanced type DiskType string +// FUTURE: Share DiskType with non-experimental API const ( // Standard disk type. Standard DiskType = "pd-standard" diff --git a/exp/api/v1beta1/types_class.go b/exp/api/v1beta1/types_class.go index 95c72de26..8d33398ec 100644 --- a/exp/api/v1beta1/types_class.go +++ b/exp/api/v1beta1/types_class.go @@ -16,7 +16,9 @@ limitations under the License. package v1beta1 -import infrav1 "sigs.k8s.io/cluster-api-provider-gcp/api/v1beta1" +import ( + infrav1 "sigs.k8s.io/cluster-api-provider-gcp/api/v1beta1" +) // GCPManagedControlPlaneClassSpec defines the GCPManagedControlPlane properties that may be shared across several gcp managed control planes. type GCPManagedControlPlaneClassSpec struct { diff --git a/exp/api/v1beta1/zz_generated.deepcopy.go b/exp/api/v1beta1/zz_generated.deepcopy.go index 5d6407c41..17311f5d9 100644 --- a/exp/api/v1beta1/zz_generated.deepcopy.go +++ b/exp/api/v1beta1/zz_generated.deepcopy.go @@ -126,6 +126,213 @@ func (in *ClusterSecurity) DeepCopy() *ClusterSecurity { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *GCPMachinePool) DeepCopyInto(out *GCPMachinePool) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + in.Spec.DeepCopyInto(&out.Spec) + in.Status.DeepCopyInto(&out.Status) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GCPMachinePool. +func (in *GCPMachinePool) DeepCopy() *GCPMachinePool { + if in == nil { + return nil + } + out := new(GCPMachinePool) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *GCPMachinePool) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *GCPMachinePoolList) DeepCopyInto(out *GCPMachinePoolList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]GCPMachinePool, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GCPMachinePoolList. +func (in *GCPMachinePoolList) DeepCopy() *GCPMachinePoolList { + if in == nil { + return nil + } + out := new(GCPMachinePoolList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *GCPMachinePoolList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *GCPMachinePoolSpec) DeepCopyInto(out *GCPMachinePoolSpec) { + *out = *in + if in.ProviderIDList != nil { + in, out := &in.ProviderIDList, &out.ProviderIDList + *out = make([]string, len(*in)) + copy(*out, *in) + } + if in.Subnet != nil { + in, out := &in.Subnet, &out.Subnet + *out = new(string) + **out = **in + } + if in.ImageFamily != nil { + in, out := &in.ImageFamily, &out.ImageFamily + *out = new(string) + **out = **in + } + if in.Image != nil { + in, out := &in.Image, &out.Image + *out = new(string) + **out = **in + } + if in.AdditionalLabels != nil { + in, out := &in.AdditionalLabels, &out.AdditionalLabels + *out = make(apiv1beta1.Labels, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } + if in.AdditionalMetadata != nil { + in, out := &in.AdditionalMetadata, &out.AdditionalMetadata + *out = make([]apiv1beta1.MetadataItem, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + if in.PublicIP != nil { + in, out := &in.PublicIP, &out.PublicIP + *out = new(bool) + **out = **in + } + if in.AdditionalNetworkTags != nil { + in, out := &in.AdditionalNetworkTags, &out.AdditionalNetworkTags + *out = make([]string, len(*in)) + copy(*out, *in) + } + if in.ResourceManagerTags != nil { + in, out := &in.ResourceManagerTags, &out.ResourceManagerTags + *out = make(apiv1beta1.ResourceManagerTags, len(*in)) + copy(*out, *in) + } + if in.RootDeviceType != nil { + in, out := &in.RootDeviceType, &out.RootDeviceType + *out = new(apiv1beta1.DiskType) + **out = **in + } + if in.AdditionalDisks != nil { + in, out := &in.AdditionalDisks, &out.AdditionalDisks + *out = make([]apiv1beta1.AttachedDiskSpec, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + if in.ServiceAccount != nil { + in, out := &in.ServiceAccount, &out.ServiceAccount + *out = new(apiv1beta1.ServiceAccount) + (*in).DeepCopyInto(*out) + } + if in.ProvisioningModel != nil { + in, out := &in.ProvisioningModel, &out.ProvisioningModel + *out = new(apiv1beta1.ProvisioningModel) + **out = **in + } + if in.IPForwarding != nil { + in, out := &in.IPForwarding, &out.IPForwarding + *out = new(apiv1beta1.IPForwarding) + **out = **in + } + if in.ShieldedInstanceConfig != nil { + in, out := &in.ShieldedInstanceConfig, &out.ShieldedInstanceConfig + *out = new(apiv1beta1.GCPShieldedInstanceConfig) + **out = **in + } + if in.OnHostMaintenance != nil { + in, out := &in.OnHostMaintenance, &out.OnHostMaintenance + *out = new(apiv1beta1.HostMaintenancePolicy) + **out = **in + } + if in.ConfidentialCompute != nil { + in, out := &in.ConfidentialCompute, &out.ConfidentialCompute + *out = new(apiv1beta1.ConfidentialComputePolicy) + **out = **in + } + if in.RootDiskEncryptionKey != nil { + in, out := &in.RootDiskEncryptionKey, &out.RootDiskEncryptionKey + *out = new(apiv1beta1.CustomerEncryptionKey) + (*in).DeepCopyInto(*out) + } + if in.GuestAccelerators != nil { + in, out := &in.GuestAccelerators, &out.GuestAccelerators + *out = make([]apiv1beta1.Accelerator, len(*in)) + copy(*out, *in) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GCPMachinePoolSpec. +func (in *GCPMachinePoolSpec) DeepCopy() *GCPMachinePoolSpec { + if in == nil { + return nil + } + out := new(GCPMachinePoolSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *GCPMachinePoolStatus) DeepCopyInto(out *GCPMachinePoolStatus) { + *out = *in + if in.Conditions != nil { + in, out := &in.Conditions, &out.Conditions + *out = make(corev1beta1.Conditions, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + if in.FailureReason != nil { + in, out := &in.FailureReason, &out.FailureReason + *out = new(string) + **out = **in + } + if in.FailureMessage != nil { + in, out := &in.FailureMessage, &out.FailureMessage + *out = new(string) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GCPMachinePoolStatus. +func (in *GCPMachinePoolStatus) DeepCopy() *GCPMachinePoolStatus { + if in == nil { + return nil + } + out := new(GCPMachinePoolStatus) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *GCPManagedCluster) DeepCopyInto(out *GCPManagedCluster) { *out = *in diff --git a/exp/controllers/gcpmachinepool_controller.go b/exp/controllers/gcpmachinepool_controller.go new file mode 100644 index 000000000..9843fcfde --- /dev/null +++ b/exp/controllers/gcpmachinepool_controller.go @@ -0,0 +1,330 @@ +/* +Copyright 2020 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package controllers provides experimental API controllers. +package controllers + +import ( + "context" + "fmt" + "strings" + "time" + + "github.com/google/go-cmp/cmp" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/client-go/tools/record" + "k8s.io/klog/v2" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + "sigs.k8s.io/controller-runtime/pkg/event" + "sigs.k8s.io/controller-runtime/pkg/handler" + "sigs.k8s.io/controller-runtime/pkg/predicate" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + + clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta1" + "sigs.k8s.io/cluster-api/util/deprecated/v1beta1/conditions" + "sigs.k8s.io/cluster-api/util/predicates" + + infrav1 "sigs.k8s.io/cluster-api-provider-gcp/api/v1beta1" + "sigs.k8s.io/cluster-api-provider-gcp/cloud/scope" + "sigs.k8s.io/cluster-api-provider-gcp/cloud/services/compute/instancegroupmanagers" + "sigs.k8s.io/cluster-api-provider-gcp/cloud/services/compute/instancetemplates" + expinfrav1 "sigs.k8s.io/cluster-api-provider-gcp/exp/api/v1beta1" + "sigs.k8s.io/cluster-api-provider-gcp/pkg/capiutils" + "sigs.k8s.io/cluster-api-provider-gcp/pkg/logger" +) + +// GCPMachinePoolReconciler reconciles a GCPMachinePool object. +type GCPMachinePoolReconciler struct { + Client client.Client + Recorder record.EventRecorder + WatchFilterValue string +} + +// +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=gcpmachinepools,verbs=get;list;watch;update;patch;delete +// +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=gcpmachinepools/status,verbs=get;update;patch +// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machinepools;machinepools/status,verbs=get;list;watch;patch +// +kubebuilder:rbac:groups="",resources=events,verbs=get;list;watch;create;update;patch +// +kubebuilder:rbac:groups="",resources=secrets;,verbs=get;list;watch + +// Reconcile is the reconciliation loop for GCPMachinePool. +func (r *GCPMachinePoolReconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Result, reterr error) { + log := logger.FromContext(ctx) + + // Fetch the GCPMachinePool . + gcpMachinePool := &expinfrav1.GCPMachinePool{} + err := r.Client.Get(ctx, req.NamespacedName, gcpMachinePool) + if err != nil { + if apierrors.IsNotFound(err) { + return ctrl.Result{}, nil + } + return ctrl.Result{}, err + } + + // Fetch the CAPI MachinePool + machinePool, err := getOwnerMachinePool(ctx, r.Client, gcpMachinePool.ObjectMeta) + if err != nil { + return reconcile.Result{}, err + } + if machinePool == nil { + log.Info("MachinePool Controller has not yet set OwnerRef") + return reconcile.Result{}, nil + } + log = log.WithValues("machinePool", klog.KObj(machinePool)) + + // Fetch the Cluster. + clusterObj, err := capiutils.GetClusterFromMetadata(ctx, r.Client, machinePool.ObjectMeta) + if err != nil { + log.Info("MachinePool is missing cluster label or cluster does not exist") + return reconcile.Result{}, nil + } + log = log.WithValues("cluster", klog.KObj(clusterObj)) + + if capiutils.IsPaused(clusterObj, gcpMachinePool) { + log.Info("GCPMachinePool or linked Cluster is marked as paused. Won't reconcile") + return ctrl.Result{}, nil + } + + _, clusterScope, err := r.getInfraCluster(ctx, clusterObj, gcpMachinePool) + if err != nil { + return ctrl.Result{}, fmt.Errorf("getting infra provider cluster or control plane object: %w", err) + } + if clusterScope == nil { + log.Info("GCPCluster or GCPManagedControlPlane is not ready yet") + return ctrl.Result{}, nil + } + + // Create the machine pool scope + machinePoolScope, err := scope.NewMachinePoolScope(scope.MachinePoolScopeParams{ + ClusterGetter: clusterScope, + Client: r.Client, + MachinePool: machinePool, + GCPMachinePool: gcpMachinePool, + }) + if err != nil { + log.Error(err, "failed to create scope") + return ctrl.Result{}, err + } + + // Always close the scope when exiting this function so we can persist any GCPMachine changes. + defer func() { + // set Ready condition before GCPMachinePool is patched + conditions.SetSummary(machinePoolScope.GCPMachinePool, + conditions.WithConditions( + expinfrav1.MIGReadyCondition, + expinfrav1.InstanceTemplateReadyCondition, + ), + conditions.WithStepCounterIfOnly( + expinfrav1.MIGReadyCondition, + expinfrav1.InstanceTemplateReadyCondition, + ), + ) + + if err := machinePoolScope.Close(); err != nil && reterr == nil { + reterr = err + } + }() + + if !gcpMachinePool.DeletionTimestamp.IsZero() { + return ctrl.Result{}, r.reconcileDelete(ctx, machinePoolScope) + } + + return r.reconcile(ctx, machinePoolScope) +} + +func (r *GCPMachinePoolReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, options controller.Options) error { + return ctrl.NewControllerManagedBy(mgr). + WithOptions(options). + For(&expinfrav1.GCPMachinePool{}). + Watches( + &clusterv1.MachinePool{}, + handler.EnqueueRequestsFromMapFunc(machinePoolToInfrastructureMapFunc(expinfrav1.GroupVersion.WithKind("GCPMachinePool"))), + ). + WithEventFilter(predicates.ResourceNotPausedAndHasFilterLabel(mgr.GetScheme(), logger.FromContext(ctx).GetLogger(), r.WatchFilterValue)). + WithEventFilter( + predicate.Funcs{ + // Avoid reconciling if the event triggering the reconciliation is related to incremental status updates + // for GCPMachinePool resources only + UpdateFunc: func(e event.UpdateEvent) bool { + if e.ObjectOld.GetObjectKind().GroupVersionKind().Kind != "GCPMachinePool" { + return true + } + + oldCluster := e.ObjectOld.(*expinfrav1.GCPMachinePool).DeepCopy() + newCluster := e.ObjectNew.(*expinfrav1.GCPMachinePool).DeepCopy() + + oldCluster.Status = expinfrav1.GCPMachinePoolStatus{} + newCluster.Status = expinfrav1.GCPMachinePoolStatus{} + + oldCluster.ObjectMeta.ResourceVersion = "" + newCluster.ObjectMeta.ResourceVersion = "" + + return !cmp.Equal(oldCluster, newCluster) + }, + }, + ). + Complete(r) +} + +func (r *GCPMachinePoolReconciler) reconcile(ctx context.Context, machinePoolScope *scope.MachinePoolScope) (ctrl.Result, error) { + log := logger.FromContext(ctx) + + log.Info("Reconciling GCPMachinePool") + + // If the GCPMachinepool doesn't have our finalizer, add it + if controllerutil.AddFinalizer(machinePoolScope.GCPMachinePool, expinfrav1.MachinePoolFinalizer) { + // Register finalizer immediately to avoid orphaning GCP resources + if err := machinePoolScope.PatchObject(ctx); err != nil { + return ctrl.Result{}, err + } + } + + // // If the GCPMachine is in an error state, return early. + // if machinePoolScope.HasFailed() { + // log.Info("Error state detected, skipping reconciliation") + + // // FUTURE: If we are in a failed state, delete the secret regardless of instance state + + // return ctrl.Result{}, nil + // } + + // if !machinePoolScope.Cluster.Status.InfrastructureReady { + // log.Info("Cluster infrastructure is not ready yet") + // conditions.MarkFalse(machinePoolScope.GCPMachinePool, expinfrav1.MIGReadyCondition, infrav1.WaitingForClusterInfrastructureReason, clusterv1.ConditionSeverityInfo, "") + // return ctrl.Result{}, nil + // } + + // Make sure bootstrap data is available and populated + if machinePoolScope.MachinePool.Spec.Template.Spec.Bootstrap.DataSecretName == nil { + log.Info("Bootstrap data secret reference is not yet available") + conditions.MarkFalse(machinePoolScope.GCPMachinePool, expinfrav1.MIGReadyCondition, infrav1.WaitingForBootstrapDataReason, clusterv1.ConditionSeverityInfo, "") + return ctrl.Result{}, nil + } + + instanceTemplateKey, err := instancetemplates.New(machinePoolScope).Reconcile(ctx) + if err != nil { + log.Error(err, "Error reconciling instanceTemplate") + // record.Warnf(machineScope.GCPMachine, "GCPMachineReconcile", "Reconcile error - %v", err) + conditions.MarkUnknown(machinePoolScope.GCPMachinePool, expinfrav1.InstanceTemplateReadyCondition, expinfrav1.InstanceTemplateNotFoundReason, "%s", err.Error()) + return ctrl.Result{}, err + } + + // set the InstanceTemplateReadyCondition condition + conditions.MarkTrue(machinePoolScope.GCPMachinePool, expinfrav1.InstanceTemplateReadyCondition) + + igm, err := instancegroupmanagers.New(machinePoolScope).Reconcile(ctx, instanceTemplateKey) + if err != nil { + log.Error(err, "Error reconciling instanceGroupManager") + // record.Warnf(machineScope.GCPMachine, "GCPMachineReconcile", "Reconcile error - %v", err) + conditions.MarkUnknown(machinePoolScope.GCPMachinePool, expinfrav1.MIGReadyCondition, expinfrav1.MIGNotFoundReason, "%s", err.Error()) + return ctrl.Result{}, err + } + + // set the MIGReadyCondition condition + conditions.MarkTrue(machinePoolScope.GCPMachinePool, expinfrav1.MIGReadyCondition) + + igmInstances, err := instancegroupmanagers.New(machinePoolScope).ListInstances(ctx, igm) + if err != nil { + log.Error(err, "Error listing instances in instanceGroupManager") + return ctrl.Result{}, err + } + + providerIDList := make([]string, len(igmInstances)) + + for i, instance := range igmInstances { + var providerID string + + // Convert instance URL to providerID format + u := instance.Instance + u = strings.TrimPrefix(u, "https://www.googleapis.com/compute/v1/") + tokens := strings.Split(u, "/") + if len(tokens) == 6 && tokens[0] == "projects" && tokens[2] == "zones" && tokens[4] == "instances" { + providerID = fmt.Sprintf("gce://%s/%s/%s", tokens[1], tokens[3], tokens[5]) + } else { + return ctrl.Result{}, fmt.Errorf("unexpected instance URL format: %s", instance.Instance) + } + + providerIDList[i] = providerID + } + + // FUTURE: do we need to verify that the instances are actually running? + machinePoolScope.GCPMachinePool.Spec.ProviderIDList = providerIDList + machinePoolScope.GCPMachinePool.Status.Replicas = int32(len(providerIDList)) + machinePoolScope.GCPMachinePool.Status.Ready = true + + // Requeue so that we can keep the spec.providerIDList and status in sync with the MIG. + // This is important for scaling up and down, as the CAPI MachinePool controller relies on + // the providerIDList to determine which machines belong to the MachinePool. + return ctrl.Result{RequeueAfter: 1 * time.Minute}, nil +} + +func (r *GCPMachinePoolReconciler) reconcileDelete(ctx context.Context, machinePoolScope *scope.MachinePoolScope) error { + log := logger.FromContext(ctx) + + log.Info("Handling deleted GCPMachinePool") + + if err := instancegroupmanagers.New(machinePoolScope).Delete(ctx); err != nil { + log.Error(err, "Error deleting instanceGroupManager") + r.Recorder.Eventf(machinePoolScope.GCPMachinePool, corev1.EventTypeWarning, "FailedDelete", "Failed to delete instancegroupmanager: %v", err) + + // record.Warnf(machineScope.GCPMachine, "GCPMachineReconcile", "Reconcile error - %v", err) + conditions.MarkUnknown(machinePoolScope.GCPMachinePool, expinfrav1.MIGReadyCondition, expinfrav1.MIGNotFoundReason, "%s", err.Error()) + return err + } + + if err := instancetemplates.New(machinePoolScope).Delete(ctx); err != nil { + log.Error(err, "Error deleting instanceTemplates") + r.Recorder.Eventf(machinePoolScope.GCPMachinePool, corev1.EventTypeWarning, "FailedDelete", "Failed to delete instance template: %v", err) + + // record.Warnf(machineScope.GCPMachine, "GCPMachineReconcile", "Reconcile error - %v", err) + conditions.MarkUnknown(machinePoolScope.GCPMachinePool, expinfrav1.InstanceTemplateReadyCondition, expinfrav1.InstanceTemplateReconcileFailedReason, "%s", err.Error()) + return err + } + + // remove finalizer + controllerutil.RemoveFinalizer(machinePoolScope.GCPMachinePool, expinfrav1.MachinePoolFinalizer) + + return nil +} + +func (r *GCPMachinePoolReconciler) getInfraCluster(ctx context.Context, cluster *clusterv1.Cluster, gcpMachinePool *expinfrav1.GCPMachinePool) (*infrav1.GCPCluster, *scope.ClusterScope, error) { + gcpCluster := &infrav1.GCPCluster{} + + gcpClusterKey := client.ObjectKey{ + Namespace: gcpMachinePool.Namespace, + Name: cluster.Spec.InfrastructureRef.Name, + } + + if err := r.Client.Get(ctx, gcpClusterKey, gcpCluster); err != nil { + // GCPCluster is not ready + return nil, nil, nil //nolint:nilerr + } + + // Create the cluster scope + clusterScope, err := scope.NewClusterScope(ctx, scope.ClusterScopeParams{ + Client: r.Client, + Cluster: cluster, + GCPCluster: gcpCluster, + }) + if err != nil { + return nil, nil, err + } + + return gcpCluster, clusterScope, nil +} diff --git a/main.go b/main.go index 37d782d65..eafb0ec22 100644 --- a/main.go +++ b/main.go @@ -27,11 +27,13 @@ import ( "time" // +kubebuilder:scaffold:imports + "github.com/spf13/pflag" "k8s.io/apimachinery/pkg/runtime" clientgoscheme "k8s.io/client-go/kubernetes/scheme" cgrecord "k8s.io/client-go/tools/record" "k8s.io/klog/v2" + "k8s.io/utils/ptr" infrav1beta1 "sigs.k8s.io/cluster-api-provider-gcp/api/v1beta1" "sigs.k8s.io/cluster-api-provider-gcp/controllers" infrav1exp "sigs.k8s.io/cluster-api-provider-gcp/exp/api/v1beta1" @@ -205,6 +207,19 @@ func setupReconcilers(ctx context.Context, mgr ctrl.Manager) error { return fmt.Errorf("setting up GCPCluster controller: %w", err) } + if feature.Gates.Enabled(capifeature.MachinePool) { + setupLog.Info("Enabling MachinePool reconcilers") + gcpMachinePoolConcurrency := gcpMachineConcurrency // FUTURE: Use our own flag while feature-gated? + + if err := (&expcontrollers.GCPMachinePoolReconciler{ + Client: mgr.GetClient(), + Recorder: mgr.GetEventRecorderFor("gcpmachinepool-controller"), + WatchFilterValue: watchFilterValue, + }).SetupWithManager(ctx, mgr, controller.Options{MaxConcurrentReconciles: gcpMachinePoolConcurrency, RecoverPanic: ptr.To[bool](true)}); err != nil { + return fmt.Errorf("creating GCPMachinePool controller: %w", err) + } + } + if feature.Gates.Enabled(feature.GKE) { setupLog.Info("Enabling GKE reconcilers") @@ -260,6 +275,14 @@ func setupWebhooks(mgr ctrl.Manager) error { return fmt.Errorf("setting up GCPMachineTemplate webhook: %w", err) } + if feature.Gates.Enabled(capifeature.MachinePool) { + setupLog.Info("Enabling GCPMachinePool webhooks") + + if err := (&infrav1exp.GCPMachinePool{}).SetupWebhookWithManager(mgr); err != nil { + return fmt.Errorf("creating GCPMachinePool webhook: %w", err) + } + } + if feature.Gates.Enabled(feature.GKE) { setupLog.Info("Enabling GKE webhooks") diff --git a/pkg/gcp/selflink.go b/pkg/gcp/selflink.go new file mode 100644 index 000000000..5454434d2 --- /dev/null +++ b/pkg/gcp/selflink.go @@ -0,0 +1,36 @@ +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package gcp implements helper functions for working with GCP. +package gcp + +import "github.com/GoogleCloudPlatform/k8s-cloud-provider/pkg/cloud/meta" + +// FormatKey builds a selfLink style string from a meta.Key for logging / human-facing error messages +func FormatKey(resourceType string, key *meta.Key) string { + return SelfLink(resourceType, key) +} + +// SelfLink builds a selfLink for passing to GCP APIs +func SelfLink(resourceType string, key *meta.Key) string { + if key.Region != "" { + return "regions/" + key.Region + "/" + resourceType + "/" + key.Name + } + if key.Zone != "" { + return "zones/" + key.Zone + "/" + resourceType + "/" + key.Name + } + return "global/" + resourceType + "/" + key.Name +} diff --git a/pkg/logger/logger.go b/pkg/logger/logger.go new file mode 100644 index 000000000..fa05ff542 --- /dev/null +++ b/pkg/logger/logger.go @@ -0,0 +1,121 @@ +/* +Copyright 2022 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package logger provides a convenient interface to use to log. +package logger + +import ( + "context" + + "github.com/go-logr/logr" +) + +// These are the log levels used by the logger. +// See https://github.com/kubernetes/community/blob/master/contributors/devel/sig-instrumentation/logging.md#what-method-to-use +const ( + logLevelWarn = 1 + logLevelDebug = 4 + logLevelTrace = 5 +) + +// Wrapper defines a convenient interface to use to log things. +type Wrapper interface { + Info(msg string, keysAndValues ...any) + Debug(msg string, keysAndValues ...any) + Warn(msg string, keysAndValues ...any) + Trace(msg string, keysAndValues ...any) + Error(err error, msg string, keysAndValues ...any) + WithValues(keysAndValues ...any) *Logger + WithName(name string) *Logger + GetLogger() logr.Logger +} + +// Logger is a concrete logger using logr underneath. +type Logger struct { + callStackHelper func() + logger logr.Logger +} + +// NewLogger creates a logger with a passed in logr.Logger implementation directly. +func NewLogger(log logr.Logger) *Logger { + helper, log := log.WithCallStackHelper() + return &Logger{ + callStackHelper: helper, + logger: log, + } +} + +// FromContext retrieves the logr implementation from Context and uses it as underlying logger. +func FromContext(ctx context.Context) *Logger { + helper, log := logr.FromContextOrDiscard(ctx).WithCallStackHelper() + return &Logger{ + callStackHelper: helper, + logger: log, + } +} + +var _ Wrapper = &Logger{} + +// Info logs a message at the info level. +func (c *Logger) Info(msg string, keysAndValues ...any) { + c.callStackHelper() + c.logger.Info(msg, keysAndValues...) +} + +// Debug logs a message at the debug level. +func (c *Logger) Debug(msg string, keysAndValues ...any) { + c.callStackHelper() + c.logger.V(logLevelDebug).Info(msg, keysAndValues...) +} + +// Warn logs a message at the warn level. +func (c *Logger) Warn(msg string, keysAndValues ...any) { + c.callStackHelper() + c.logger.V(logLevelWarn).Info(msg, keysAndValues...) +} + +// Trace logs a message at the trace level. +func (c *Logger) Trace(msg string, keysAndValues ...any) { + c.callStackHelper() + c.logger.V(logLevelTrace).Info(msg, keysAndValues...) +} + +// Error logs a message at the error level. +func (c *Logger) Error(err error, msg string, keysAndValues ...any) { + c.callStackHelper() + c.logger.Error(err, msg, keysAndValues...) +} + +// GetLogger returns the underlying logr.Logger. +func (c *Logger) GetLogger() logr.Logger { + return c.logger +} + +// WithValues adds some key-value pairs of context to a logger. +func (c *Logger) WithValues(keysAndValues ...any) *Logger { + return &Logger{ + callStackHelper: c.callStackHelper, + logger: c.logger.WithValues(keysAndValues...), + } +} + +// WithName adds a new element to the logger's name. +func (c *Logger) WithName(name string) *Logger { + return &Logger{ + callStackHelper: c.callStackHelper, + logger: c.logger.WithName(name), + } +} diff --git a/test/e2e/config/gcp-ci.yaml b/test/e2e/config/gcp-ci.yaml index 2789a9ab7..0b9e24073 100644 --- a/test/e2e/config/gcp-ci.yaml +++ b/test/e2e/config/gcp-ci.yaml @@ -75,6 +75,7 @@ providers: - sourcePath: "${PWD}/test/e2e/data/infrastructure-gcp/cluster-template-ci-gke-autopilot.yaml" - sourcePath: "${PWD}/test/e2e/data/infrastructure-gcp/cluster-template-ci-gke-custom-subnet.yaml" - sourcePath: "${PWD}/test/e2e/data/infrastructure-gcp/cluster-template-ci-with-internal-lb.yaml" + - sourcePath: "${PWD}/test/e2e/data/infrastructure-gcp/cluster-template-ci-with-machinepool.yaml" - sourcePath: "${PWD}/test/e2e/data/infrastructure-gcp/withclusterclass/cluster-template-ci-gke-autopilot-topology.yaml" variables: @@ -120,6 +121,7 @@ intervals: default/wait-gpu-nodes: ["30m", "10s"] default/wait-delete-cluster: ["30m", "10s"] default/wait-machine-upgrade: ["60m", "10s"] + default/wait-machine-pool-nodes: ["40m", "10s"] default/wait-machine-pool-upgrade: ["60m", "10s"] default/wait-machine-remediation: ["30m", "10s"] default/wait-deployment: ["15m", "10s"] diff --git a/test/e2e/data/infrastructure-gcp/cluster-template-ci-with-machinepool.yaml b/test/e2e/data/infrastructure-gcp/cluster-template-ci-with-machinepool.yaml new file mode 100644 index 000000000..082b0ae6a --- /dev/null +++ b/test/e2e/data/infrastructure-gcp/cluster-template-ci-with-machinepool.yaml @@ -0,0 +1,164 @@ +--- +apiVersion: cluster.x-k8s.io/v1beta1 +kind: Cluster +metadata: + name: "${CLUSTER_NAME}" + labels: + cni: "${CLUSTER_NAME}-crs-cni" + ccm: "${CLUSTER_NAME}-crs-ccm" +spec: + clusterNetwork: + pods: + cidrBlocks: ["192.168.0.0/16"] + infrastructureRef: + apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 + kind: GCPCluster + name: "${CLUSTER_NAME}" + controlPlaneRef: + kind: KubeadmControlPlane + apiVersion: controlplane.cluster.x-k8s.io/v1beta1 + name: "${CLUSTER_NAME}-control-plane" +--- +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: GCPCluster +metadata: + name: "${CLUSTER_NAME}" +spec: + project: "${GCP_PROJECT}" + region: "${GCP_REGION}" + network: + name: "${GCP_NETWORK_NAME}" +--- +apiVersion: controlplane.cluster.x-k8s.io/v1beta1 +kind: KubeadmControlPlane +metadata: + name: "${CLUSTER_NAME}-control-plane" +spec: + replicas: ${CONTROL_PLANE_MACHINE_COUNT} + machineTemplate: + infrastructureRef: + kind: GCPMachineTemplate + apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 + name: "${CLUSTER_NAME}-control-plane" + kubeadmConfigSpec: + initConfiguration: + nodeRegistration: + name: '{{ ds.meta_data.local_hostname.split(".")[0] }}' + kubeletExtraArgs: + cloud-provider: external + clusterConfiguration: + apiServer: + timeoutForControlPlane: 20m + controllerManager: + extraArgs: + cloud-provider: external + allocate-node-cidrs: "false" + kubernetesVersion: "${KUBERNETES_VERSION}" + files: + - content: | + [Global] + + project-id = "${GCP_PROJECT}" + network-name = "${GCP_NETWORK_NAME}" + multizone = true + owner: root:root + path: /etc/kubernetes/cloud.config + permissions: "0744" + joinConfiguration: + nodeRegistration: + name: '{{ ds.meta_data.local_hostname.split(".")[0] }}' + kubeletExtraArgs: + cloud-provider: external + version: "${KUBERNETES_VERSION}" +--- +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: GCPMachineTemplate +metadata: + name: "${CLUSTER_NAME}-control-plane" +spec: + template: + spec: + instanceType: "${GCP_CONTROL_PLANE_MACHINE_TYPE}" + image: "${IMAGE_ID}" +--- +apiVersion: cluster.x-k8s.io/v1beta1 +kind: MachinePool +metadata: + name: "${CLUSTER_NAME}-md-0" +spec: + clusterName: "${CLUSTER_NAME}" + replicas: ${WORKER_MACHINE_COUNT} + failureDomains: + # We currently only support a single zone (failure domain) per MachinePool, + # when using GCPMachinePool. + - ${GCP_REGION}-a + template: + spec: + clusterName: "${CLUSTER_NAME}" + version: "${KUBERNETES_VERSION}" + bootstrap: + configRef: + name: "${CLUSTER_NAME}-md-0" + apiVersion: bootstrap.cluster.x-k8s.io/v1beta1 + kind: KubeadmConfig + infrastructureRef: + name: "${CLUSTER_NAME}-md-0" + apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 + kind: GCPMachinePool +--- +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: GCPMachinePool +metadata: + name: "${CLUSTER_NAME}-md-0" +spec: + instanceType: "${GCP_NODE_MACHINE_TYPE}" + image: "${IMAGE_ID}" +--- +apiVersion: bootstrap.cluster.x-k8s.io/v1beta1 +kind: KubeadmConfig +metadata: + name: "${CLUSTER_NAME}-md-0" +spec: + joinConfiguration: + nodeRegistration: + name: '{{ ds.meta_data.local_hostname.split(".")[0] }}' + kubeletExtraArgs: + cloud-provider: external +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: "${CLUSTER_NAME}-crs-cni" +data: ${CNI_RESOURCES} +--- +apiVersion: addons.cluster.x-k8s.io/v1beta1 +kind: ClusterResourceSet +metadata: + name: "${CLUSTER_NAME}-crs-cni" +spec: + strategy: ApplyOnce + clusterSelector: + matchLabels: + cni: "${CLUSTER_NAME}-crs-cni" + resources: + - name: "${CLUSTER_NAME}-crs-cni" + kind: ConfigMap +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: "${CLUSTER_NAME}-crs-ccm" +data: ${CCM_RESOURCES} +--- +apiVersion: addons.cluster.x-k8s.io/v1beta1 +kind: ClusterResourceSet +metadata: + name: "${CLUSTER_NAME}-crs-ccm" +spec: + strategy: ApplyOnce + clusterSelector: + matchLabels: + ccm: "${CLUSTER_NAME}-crs-ccm" + resources: + - name: "${CLUSTER_NAME}-crs-ccm" + kind: ConfigMap diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index 3e1823a27..8a255ba30 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -125,6 +125,53 @@ var _ = Describe("Workload cluster creation", func() { }) }) + Context("Creating a single control-plane cluster with MachinePool", func() { + It("Should create a cluster with 1 worker node and can be scaled", func() { + clusterName := fmt.Sprintf("%s-single", clusterNamePrefix) + By("Initializes with 1 worker node") + clusterctl.ApplyClusterTemplateAndWait(ctx, clusterctl.ApplyClusterTemplateAndWaitInput{ + ClusterProxy: bootstrapClusterProxy, + ConfigCluster: clusterctl.ConfigClusterInput{ + LogFolder: clusterctlLogFolder, + ClusterctlConfigPath: clusterctlConfigPath, + KubeconfigPath: bootstrapClusterProxy.GetKubeconfigPath(), + InfrastructureProvider: clusterctl.DefaultInfrastructureProvider, + Flavor: "ci-with-machinepool", + Namespace: namespace.Name, + ClusterName: clusterName, + KubernetesVersion: e2eConfig.MustGetVariable(KubernetesVersion), + ControlPlaneMachineCount: ptr.To[int64](1), + WorkerMachineCount: ptr.To[int64](1), + }, + WaitForClusterIntervals: e2eConfig.GetIntervals(specName, "wait-cluster"), + WaitForControlPlaneIntervals: e2eConfig.GetIntervals(specName, "wait-control-plane"), + WaitForMachineDeployments: e2eConfig.GetIntervals(specName, "wait-worker-nodes"), + WaitForMachinePools: e2eConfig.GetIntervals(specName, "wait-machine-pool-nodes"), + }, result) + + By("Scaling worker node to 3") + clusterctl.ApplyClusterTemplateAndWait(ctx, clusterctl.ApplyClusterTemplateAndWaitInput{ + ClusterProxy: bootstrapClusterProxy, + ConfigCluster: clusterctl.ConfigClusterInput{ + LogFolder: clusterctlLogFolder, + ClusterctlConfigPath: clusterctlConfigPath, + KubeconfigPath: bootstrapClusterProxy.GetKubeconfigPath(), + InfrastructureProvider: clusterctl.DefaultInfrastructureProvider, + Flavor: "ci-with-machinepool", + Namespace: namespace.Name, + ClusterName: clusterName, + KubernetesVersion: e2eConfig.MustGetVariable(KubernetesVersion), + ControlPlaneMachineCount: ptr.To[int64](1), + WorkerMachineCount: ptr.To[int64](3), + }, + WaitForClusterIntervals: e2eConfig.GetIntervals(specName, "wait-cluster"), + WaitForControlPlaneIntervals: e2eConfig.GetIntervals(specName, "wait-control-plane"), + WaitForMachineDeployments: e2eConfig.GetIntervals(specName, "wait-worker-nodes"), + WaitForMachinePools: e2eConfig.GetIntervals(specName, "wait-machine-pool-nodes"), + }, result) + }) + }) + Context("Creating a highly available control-plane cluster", func() { It("Should create a cluster with 3 control-plane and 2 worker nodes", func() { clusterName := fmt.Sprintf("%s-ha", clusterNamePrefix)