Skip to content

Commit 3a92e5c

Browse files
committed
fix-automode-nil-pointer-issue
1 parent 9ed2482 commit 3a92e5c

File tree

2 files changed

+215
-62
lines changed

2 files changed

+215
-62
lines changed

pkg/resource/cluster/hook.go

Lines changed: 33 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -187,69 +187,42 @@ func (rm *resourceManager) clusterInUse(ctx context.Context, r *resource) (bool,
187187
// isAutoModeCluster returns true if the cluster is configured for EKS Auto Mode.
188188
// According to AWS documentation, compute, block storage, and load balancing capabilities
189189
// must all be enabled or disabled together. Any partial configuration is invalid.
190-
func isAutoModeCluster(r *resource) bool {
190+
// Returns an error for invalid configurations.
191+
func isAutoModeCluster(r *resource) (bool, error) {
191192
if r == nil || r.ko == nil {
192-
return false
193+
return false, nil
193194
}
194195

195-
// If ComputeConfig is not specified, this is not an Auto Mode cluster
196-
if r.ko.Spec.ComputeConfig == nil {
197-
return false
198-
}
199-
200-
// Check compute configuration
201-
computeEnabled := r.ko.Spec.ComputeConfig.Enabled != nil && *r.ko.Spec.ComputeConfig.Enabled
196+
hasComputeConfig := r.ko.Spec.ComputeConfig != nil
197+
hasStorageConfig := r.ko.Spec.StorageConfig != nil && r.ko.Spec.StorageConfig.BlockStorage != nil
198+
hasELBConfig := r.ko.Spec.KubernetesNetworkConfig != nil && r.ko.Spec.KubernetesNetworkConfig.ElasticLoadBalancing != nil
202199

203-
// Check storage configuration
204-
storageEnabled := false
205-
if r.ko.Spec.StorageConfig != nil && r.ko.Spec.StorageConfig.BlockStorage != nil {
206-
storageEnabled = r.ko.Spec.StorageConfig.BlockStorage.Enabled != nil && *r.ko.Spec.StorageConfig.BlockStorage.Enabled
200+
// If no Auto Mode configuration is present, it's valid (not an Auto Mode cluster)
201+
if !hasComputeConfig && !hasStorageConfig && !hasELBConfig {
202+
return false, nil
207203
}
208204

209-
// Check elastic load balancing configuration
210-
elbEnabled := false
211-
if r.ko.Spec.KubernetesNetworkConfig != nil && r.ko.Spec.KubernetesNetworkConfig.ElasticLoadBalancing != nil {
212-
elbEnabled = r.ko.Spec.KubernetesNetworkConfig.ElasticLoadBalancing.Enabled != nil && *r.ko.Spec.KubernetesNetworkConfig.ElasticLoadBalancing.Enabled
213-
}
214-
215-
// Auto Mode requires all three capabilities to have the same state (all true or all false)
216-
// If they are all true, Auto Mode is enabled
217-
// If they are all false, Auto Mode is being disabled
218-
// Any other combination is invalid
219-
return (computeEnabled && storageEnabled && elbEnabled) || (!computeEnabled && !storageEnabled && !elbEnabled)
220-
}
221-
222-
// validateAutoModeConfig validates that Auto Mode configuration is consistent.
223-
// Returns an error if the configuration is invalid (partial enablement).
224-
func validateAutoModeConfig(r *resource) error {
225-
if r == nil || r.ko == nil || r.ko.Spec.ComputeConfig == nil {
226-
return nil // Not an Auto Mode configuration
205+
// If any Auto Mode configuration is present, ALL must be present
206+
if !hasComputeConfig || !hasStorageConfig || !hasELBConfig {
207+
return false, fmt.Errorf("invalid Auto Mode configuration: when configuring Auto Mode, all three capabilities must be specified (compute=%v, storage=%v, elb=%v)",
208+
hasComputeConfig, hasStorageConfig, hasELBConfig)
227209
}
228210

229-
// Check compute configuration
230211
computeEnabled := r.ko.Spec.ComputeConfig.Enabled != nil && *r.ko.Spec.ComputeConfig.Enabled
231-
232-
// Check storage configuration
233-
storageEnabled := false
234-
if r.ko.Spec.StorageConfig != nil && r.ko.Spec.StorageConfig.BlockStorage != nil {
235-
storageEnabled = r.ko.Spec.StorageConfig.BlockStorage.Enabled != nil && *r.ko.Spec.StorageConfig.BlockStorage.Enabled
236-
}
237-
238-
// Check elastic load balancing configuration
239-
elbEnabled := false
240-
if r.ko.Spec.KubernetesNetworkConfig != nil && r.ko.Spec.KubernetesNetworkConfig.ElasticLoadBalancing != nil {
241-
elbEnabled = r.ko.Spec.KubernetesNetworkConfig.ElasticLoadBalancing.Enabled != nil && *r.ko.Spec.KubernetesNetworkConfig.ElasticLoadBalancing.Enabled
242-
}
212+
storageEnabled := r.ko.Spec.StorageConfig.BlockStorage.Enabled != nil && *r.ko.Spec.StorageConfig.BlockStorage.Enabled
213+
elbEnabled := r.ko.Spec.KubernetesNetworkConfig.ElasticLoadBalancing.Enabled != nil && *r.ko.Spec.KubernetesNetworkConfig.ElasticLoadBalancing.Enabled
243214

244215
// All three must be in the same state
245-
if computeEnabled == storageEnabled && storageEnabled == elbEnabled {
246-
return nil // Valid configuration
216+
if computeEnabled != storageEnabled || storageEnabled != elbEnabled {
217+
return false, fmt.Errorf("invalid Auto Mode configuration: compute, block storage, and load balancing capabilities must all be enabled or disabled together (compute=%v, storage=%v, elb=%v)",
218+
computeEnabled, storageEnabled, elbEnabled)
247219
}
248220

249-
return fmt.Errorf("invalid Auto Mode configuration: compute, block storage, and load balancing capabilities must all be enabled or disabled together (compute=%v, storage=%v, elb=%v)",
250-
computeEnabled, storageEnabled, elbEnabled)
221+
isAutoMode := (computeEnabled && storageEnabled && elbEnabled) || (!computeEnabled && !storageEnabled && !elbEnabled)
222+
return isAutoMode, nil
251223
}
252224

225+
253226
func customPreCompare(
254227
a *resource,
255228
b *resource,
@@ -448,22 +421,25 @@ func (rm *resourceManager) customUpdate(
448421

449422
// Handle computeConfig updates - only for Auto Mode clusters
450423
if delta.DifferentAt("Spec.ComputeConfig") || delta.DifferentAt("Spec.StorageConfig") || delta.DifferentAt("Spec.KubernetesNetworkConfig") {
451-
// Validate Auto Mode configuration consistency before attempting update
452-
if err := validateAutoModeConfig(desired); err != nil {
424+
// Validate Auto Mode configuration and proceed only if cluster is configured for Auto Mode
425+
isAutoMode, err := isAutoModeCluster(desired)
426+
if err != nil {
453427
return nil, ackerr.NewTerminalError(err)
454428
}
455-
456-
// Only proceed with Auto Mode updates if the cluster is actually configured for Auto Mode
457-
if isAutoModeCluster(desired) {
429+
if isAutoMode {
458430
if err := rm.updateComputeConfig(ctx, desired); err != nil {
459431
awsErr, ok := extractAWSError(err)
432+
var awsErrorCode string
433+
if ok && awsErr != nil {
434+
awsErrorCode = awsErr.Code
435+
}
460436
rlog.Info("attempting to update AutoMode config",
461437
"error", err,
462438
"isAWSError", ok,
463-
"awsErrorCode", awsErr.Code)
439+
"awsErrorCode", awsErrorCode)
464440

465441
// Check to see if we've raced an async update call and need to requeue
466-
if ok && awsErr.Code == "ResourceInUseException" {
442+
if ok && awsErr != nil && awsErr.Code == "ResourceInUseException" {
467443
rlog.Info("resource in use, requeueing after async update")
468444
return nil, requeueAfterAsyncUpdate()
469445
}
@@ -473,8 +449,8 @@ func (rm *resourceManager) customUpdate(
473449

474450
return returnClusterUpdating(updatedRes)
475451
}
476-
// If not Auto Mode, ignore the diff (likely elasticLoadBalancing false vs absent)
477-
rlog.Debug("ignoring diff on compute/storage/network config for non-Auto Mode cluster")
452+
// If not Auto Mode, ignore the diff
453+
rlog.Info("ignoring diff on compute/storage/network config for non-Auto Mode cluster")
478454
}
479455

480456
// Handle zonalShiftConfig updates

test/e2e/tests/test_cluster_automode.py

Lines changed: 182 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import logging
1919
import time
2020
import pytest
21+
import json
2122

2223
from acktest.k8s import resource as k8s
2324
from acktest.k8s import condition
@@ -33,9 +34,10 @@
3334
from e2e.common.types import CLUSTER_RESOURCE_PLURAL
3435
from e2e.common.waiter import wait_until_deleted
3536
from e2e.replacement_values import REPLACEMENT_VALUES
37+
from e2e.tests.test_cluster import simple_cluster
3638

37-
MODIFY_WAIT_AFTER_SECONDS = 240
38-
CHECK_STATUS_WAIT_SECONDS = 240
39+
MODIFY_WAIT_AFTER_SECONDS = 60
40+
CHECK_STATUS_WAIT_SECONDS = 30
3941

4042

4143
def wait_for_cluster_active(eks_client, cluster_name):
@@ -93,8 +95,13 @@ def auto_mode_cluster(eks_client):
9395

9496
yield (ref, cr)
9597

96-
pass
97-
98+
# Try to delete, if doesn't already exist
99+
try:
100+
_, deleted = k8s.delete_custom_resource(ref, 9, 10)
101+
assert deleted
102+
wait_until_deleted(cluster_name)
103+
except Exception:
104+
pass
98105

99106
@service_marker
100107
@pytest.mark.canary
@@ -141,6 +148,176 @@ def test_create_auto_mode_cluster(self, eks_client, auto_mode_cluster):
141148
time.sleep(CHECK_STATUS_WAIT_SECONDS)
142149

143150
# Clean up
144-
_, deleted = k8s.delete_custom_resource(ref, 3, 10)
151+
_, deleted = k8s.delete_custom_resource(ref, 9, 10)
145152
assert deleted
146153
wait_until_deleted(cluster_name)
154+
155+
156+
@service_marker
157+
@pytest.mark.canary
158+
class TestAutoModeClusterUpdates:
159+
def test_enable_auto_mode_on_standard_cluster(self, eks_client, simple_cluster):
160+
(ref, cr) = simple_cluster
161+
cluster_name = cr["spec"]["name"]
162+
163+
try:
164+
aws_res = eks_client.describe_cluster(name=cluster_name)
165+
assert aws_res is not None
166+
except eks_client.exceptions.ResourceNotFoundException:
167+
pytest.fail(f"Could not find cluster '{cluster_name}' in EKS")
168+
169+
# Wait for the cluster to be ACTIVE and let controller refresh status
170+
wait_for_cluster_active(eks_client, cluster_name)
171+
time.sleep(CHECK_STATUS_WAIT_SECONDS)
172+
get_and_assert_status(ref, "ACTIVE", True)
173+
174+
# Patch to enable auto-mode
175+
patch_enable_auto_mode = {
176+
"spec": {
177+
"computeConfig": {"enabled": True},
178+
"storageConfig": {"blockStorage": {"enabled": True}},
179+
"kubernetesNetworkConfig": {
180+
"elasticLoadBalancing": {"enabled": True},
181+
"ipFamily": "ipv4",
182+
},
183+
}
184+
}
185+
k8s.patch_custom_resource(ref, patch_enable_auto_mode)
186+
time.sleep(MODIFY_WAIT_AFTER_SECONDS)
187+
get_and_assert_status(ref, "UPDATING", False)
188+
189+
# Wait for cluster to become active after update
190+
wait_for_cluster_active(eks_client, cluster_name)
191+
time.sleep(CHECK_STATUS_WAIT_SECONDS)
192+
get_and_assert_status(ref, "ACTIVE", True)
193+
194+
# Verify auto-mode activation via EKS update history (since DescribeCluster may not reflect the fields immediately)
195+
updates_summary = eks_client.list_updates(name=cluster_name)
196+
197+
update_ids = updates_summary.get("updateIds", [])
198+
assert len(update_ids) == 1, (
199+
f"Expected exactly 1 update, got {len(update_ids)}: {update_ids}"
200+
)
201+
202+
update_id = update_ids[0]
203+
upd_desc = eks_client.describe_update(name=cluster_name, updateId=update_id)
204+
205+
update_info = upd_desc["update"]
206+
207+
# Verify update type and status
208+
assert update_info["type"] == "AutoModeUpdate", (
209+
f"Expected AutoModeUpdate, got: {update_info['type']}"
210+
)
211+
assert update_info["status"] == "Successful", (
212+
f"Expected Successful status, got: {update_info['status']}"
213+
)
214+
215+
def test_disable_auto_mode_incorrectly(self, eks_client, auto_mode_cluster):
216+
(ref, cr) = auto_mode_cluster
217+
cluster_name = cr["spec"]["name"]
218+
219+
try:
220+
aws_res = eks_client.describe_cluster(name=cluster_name)
221+
assert aws_res is not None
222+
except eks_client.exceptions.ResourceNotFoundException:
223+
pytest.fail(f"Could not find cluster '{cluster_name}' in EKS")
224+
225+
wait_for_cluster_active(eks_client, cluster_name)
226+
time.sleep(CHECK_STATUS_WAIT_SECONDS)
227+
get_and_assert_status(ref, "ACTIVE", True)
228+
229+
# Patch with incorrect parameters to disable auto-mode
230+
patch_disable_auto_mode_incorrectly = {
231+
"spec": {
232+
"computeConfig": {"enabled": False},
233+
"storageConfig": {
234+
"blockStorage": {
235+
"enabled": True # Should be False
236+
}
237+
},
238+
"kubernetesNetworkConfig": {"elasticLoadBalancing": {"enabled": False}},
239+
}
240+
}
241+
logging.info(
242+
f"Applying patch with incorrect parameters: {patch_disable_auto_mode_incorrectly}"
243+
)
244+
k8s.patch_custom_resource(ref, patch_disable_auto_mode_incorrectly)
245+
time.sleep(MODIFY_WAIT_AFTER_SECONDS)
246+
247+
# The controller should detect the invalid configuration and set a terminal condition.
248+
terminal_condition = "ACK.Terminal"
249+
cond = k8s.get_resource_condition(ref, terminal_condition)
250+
if cond is None:
251+
pytest.fail(
252+
f"Failed to find {terminal_condition} condition in resource {ref}"
253+
)
254+
255+
cond_status = cond.get("status", None)
256+
if str(cond_status) != str(True):
257+
pytest.fail(
258+
f"Expected {terminal_condition} condition to have status True but found {cond_status}"
259+
)
260+
261+
# Verify the error message contains information about invalid Auto Mode configuration
262+
assert "invalid Auto Mode configuration" in cond.get("message", "")
263+
264+
def test_disable_auto_mode_correctly(self, eks_client, auto_mode_cluster):
265+
(ref, cr) = auto_mode_cluster
266+
cluster_name = cr["spec"]["name"]
267+
268+
try:
269+
aws_res = eks_client.describe_cluster(name=cluster_name)
270+
assert aws_res is not None
271+
except eks_client.exceptions.ResourceNotFoundException:
272+
pytest.fail(f"Could not find cluster '{cluster_name}' in EKS")
273+
274+
wait_for_cluster_active(eks_client, cluster_name)
275+
time.sleep(CHECK_STATUS_WAIT_SECONDS)
276+
get_and_assert_status(ref, "ACTIVE", True)
277+
278+
# Patch to disable auto-mode correctly
279+
patch_disable_auto_mode = {
280+
"spec": {
281+
"computeConfig": {"enabled": False},
282+
"storageConfig": {"blockStorage": {"enabled": False}},
283+
"kubernetesNetworkConfig": {"elasticLoadBalancing": {"enabled": False}},
284+
}
285+
}
286+
logging.info(f"Applying patch to disable auto-mode: {patch_disable_auto_mode}")
287+
k8s.patch_custom_resource(ref, patch_disable_auto_mode)
288+
time.sleep(MODIFY_WAIT_AFTER_SECONDS )
289+
290+
get_and_assert_status(ref, "UPDATING", False)
291+
292+
# Wait for cluster to become active after update
293+
wait_for_cluster_active(eks_client, cluster_name)
294+
time.sleep(CHECK_STATUS_WAIT_SECONDS)
295+
296+
get_and_assert_status(ref, "ACTIVE", True)
297+
298+
# Verify auto-mode is disabled
299+
aws_res = eks_client.describe_cluster(name=cluster_name)
300+
301+
# Check compute config - should be absent or disabled
302+
compute_config = aws_res["cluster"].get("computeConfig")
303+
if compute_config is not None:
304+
assert compute_config.get("enabled") is False, (
305+
f"computeConfig.enabled should be False or absent, got: {compute_config.get('enabled')}"
306+
)
307+
308+
# Check storage config - should be absent or disabled
309+
storage_config = aws_res["cluster"].get("storageConfig")
310+
if storage_config is not None:
311+
block_storage = storage_config.get("blockStorage", {})
312+
if block_storage:
313+
assert block_storage.get("enabled") is False, (
314+
f"storageConfig.blockStorage.enabled should be False or absent, got: {block_storage.get('enabled')}"
315+
)
316+
317+
# Check elastic load balancing config - should be absent or disabled
318+
k8s_network_config = aws_res["cluster"].get("kubernetesNetworkConfig", {})
319+
elb_config = k8s_network_config.get("elasticLoadBalancing")
320+
if elb_config is not None:
321+
assert elb_config.get("enabled") is False, (
322+
f"kubernetesNetworkConfig.elasticLoadBalancing.enabled should be False or absent, got: {elb_config.get('enabled')}"
323+
)

0 commit comments

Comments
 (0)