Skip to content
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 38 additions & 7 deletions go-controller/pkg/clustermanager/egressip_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ import (
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types"
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util"
utilerrors "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util/errors"
"k8s.io/apimachinery/pkg/api/meta"
)

const (
Expand Down Expand Up @@ -211,6 +212,31 @@ func (eIPC *egressIPClusterController) executeCloudPrivateIPConfigOps(egressIPNa
if cloudPrivateIPConfig.GetDeletionTimestamp() != nil && !cloudPrivateIPConfig.GetDeletionTimestamp().IsZero() {
return fmt.Errorf("cloud update request failed, CloudPrivateIPConfig: %s is being deleted", cloudPrivateIPConfigName)
}

// Handle a scenario in which the object exists in a failed state by removing it if the node it was assigned to no longer exists
assignedCondition := meta.FindStatusCondition(cloudPrivateIPConfig.Status.Conditions, string(ocpcloudnetworkapi.Assigned))
if cloudPrivateIPConfig.Status.Node != "" && assignedCondition != nil && assignedCondition.Status == metav1.ConditionFalse {
_, err := eIPC.watchFactory.GetNode(cloudPrivateIPConfig.Status.Node)
if err != nil && apierrors.IsNotFound(err) {
klog.Warningf("CloudPrivateIPConfig: %s is in Failed state (reason: %s) and node %s no longer exists, deleting to allow retry",
cloudPrivateIPConfigName, assignedCondition.Message, cloudPrivateIPConfig.Status.Node)
eIPRef := corev1.ObjectReference{
Kind: "EgressIP",
Name: egressIPName,
}
eIPC.recorder.Eventf(&eIPRef, corev1.EventTypeWarning, "CloudAssignmentRetry",
"egress IP: %s previously failed on deleted node %s (reason: %s), will retry assignment",
egressIP, cloudPrivateIPConfig.Status.Node, assignedCondition.Message)
if err := eIPC.kube.DeleteCloudPrivateIPConfig(cloudPrivateIPConfigName); err != nil {
return fmt.Errorf("failed to delete failed CloudPrivateIPConfig: %s, err: %v", cloudPrivateIPConfigName, err)
}

return fmt.Errorf("deleted failed CloudPrivateIPConfig: %s, will retry creation in next reconciliation", cloudPrivateIPConfigName)
} else if err != nil {
klog.Errorf("Failed to check if node %s exists for CloudPrivateIPConfig %s: %v", cloudPrivateIPConfig.Status.Node, cloudPrivateIPConfigName, err)
}
}

if op.toAdd == cloudPrivateIPConfig.Spec.Node {
klog.Infof("CloudPrivateIPConfig: %s already assigned to node: %s", cloudPrivateIPConfigName, cloudPrivateIPConfig.Spec.Node)
continue
Expand Down Expand Up @@ -1671,21 +1697,21 @@ func cloudPrivateIPConfigNameToIPString(name string) string {
// removePendingOps removes the existing pending CloudPrivateIPConfig operations
// from the cache and returns the EgressIP object which can be re-synced given
// the new assignment possibilities.
func (eIPC *egressIPClusterController) removePendingOpsAndGetResyncs(egressIPName, egressIP string) ([]*egressipv1.EgressIP, error) {
func (eIPC *egressIPClusterController) removePendingOpsAndGetResyncs(egressIPName, egressIPAddr string) ([]*egressipv1.EgressIP, error) {
eIPC.pendingCloudPrivateIPConfigsMutex.Lock()
defer eIPC.pendingCloudPrivateIPConfigsMutex.Unlock()
ops, pending := eIPC.pendingCloudPrivateIPConfigsOps[egressIPName]
if !pending {
return nil, fmt.Errorf("no pending operation found for EgressIP: %s", egressIPName)
}
op, exists := ops[egressIP]
op, exists := ops[egressIPAddr]
if !exists {
return nil, fmt.Errorf("pending operations found for EgressIP: %s, but not for the finalized IP: %s", egressIPName, egressIP)
return nil, fmt.Errorf("pending operations found for EgressIP: %s, but not for the finalized IP: %s", egressIPName, egressIPAddr)
}
// Make sure we are dealing with a delete operation, since for update
// operations will still need to process the add afterwards.
if op.toAdd == "" && op.toDelete != "" {
delete(ops, egressIP)
delete(ops, egressIPAddr)
}
if len(ops) == 0 {
delete(eIPC.pendingCloudPrivateIPConfigsOps, egressIPName)
Expand All @@ -1703,10 +1729,15 @@ func (eIPC *egressIPClusterController) removePendingOpsAndGetResyncs(egressIPNam
resyncs := make([]*egressipv1.EgressIP, 0, len(egressIPs))
for _, egressIP := range egressIPs {
egressIP := *egressIP
// Do not process the egress IP object which owns the
// CloudPrivateIPConfig for which we are currently processing the
// deletion for.
if egressIP.Name == egressIPName {
for _, specIP := range egressIP.Spec.EgressIPs {
// Do not process the egress IP object which owns the
// CloudPrivateIPConfig for which we are currently processing the
// deletion for unless it still has the IP in it's spec
if specIP == egressIPAddr {
resyncs = append(resyncs, &egressIP)
}
}
continue
}
unassigned := len(egressIP.Spec.EgressIPs) - len(egressIP.Status.Items)
Expand Down