Skip to content

Commit 246770e

Browse files
committed
feat: Adding support for shared-requestor flow
Signed-off-by: Ido Heyvi <[email protected]>
1 parent 0f2e999 commit 246770e

File tree

9 files changed

+518
-73
lines changed

9 files changed

+518
-73
lines changed

docs/automatic-ofed-upgrade.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,21 @@ controller manager watchers:
114114
> Meaning in case node undergoes upgrade prior to enabling `requestor` mode, node will continue `inplace` upgrade mode. Only after `requestor` mode is set, and upgrade
115115
> controller has set nodes state to be upgrade-required, only then new requestor mode will take place.
116116
117+
###### shared-requestor
118+
The requestor mode supports a `shared-requestor` flow where multiple operators can coordinate node maintenance operations:
119+
Assumptions:
120+
1. Cluster admin, which requires `shared-requestor` flow, needs to make sure that all operators, utilizing maintenance OP, use same upgrade policy specs (same drainSpec).
121+
2. To be able to accommodate both GPU/Network drivers upgrade, `DrainSpec.PodSelector` should be set accordingly (hard-coded).
122+
* podSelector: `nvidia.com/ofed-driver-upgrade-drain.skip!=true,nvidia.com/gpu-driver-upgrade-drain.skip!=true`
123+
3. No custom `NodeMaintenanceNamePrefix` should be used. Requestor will use `DefaultNodeMaintenanceNamePrefix` as a common prefix for nodeMaintenance name.
124+
Flow:
125+
1. Each operator adds its dedicated operator label to the nodeMaintenance object
126+
2. When a nodeMaintenance object exists, additional operators append their requestorID to the spec.AdditionalRequestors list
127+
3. During `uncordon-required` completion:
128+
- Non-owning operators remove themselves from spec.AdditionalRequestors list using optimistic locking
129+
- Each operator removes its dedicated label from the nodeMaintenance object
130+
4. The owning nodeMaintenance operator handles the actual, client side, deletion of the nodeMaintenance object
131+
117132
### Troubleshooting
118133
#### Node is in `upgrade-failed` state
119134
* Drain the node manually by running `kubectl drain <node_name> --ignore-daemonsets`

pkg/upgrade/common_manager.go

Lines changed: 35 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ type CommonUpgradeManagerImpl struct {
9494
ValidationManager ValidationManager
9595
SafeDriverLoadManager SafeDriverLoadManager
9696

97+
shouldSkipUpgradeDoneFunc func(requestorID string, nodeState *NodeUpgradeState) (bool, error)
9798
// optional states
9899
podDeletionStateEnabled bool
99100
validationStateEnabled bool
@@ -104,6 +105,7 @@ func NewCommonUpgradeStateManager(
104105
log logr.Logger,
105106
k8sConfig *rest.Config,
106107
scheme *runtime.Scheme,
108+
skipFunc func(requestorID string, nodeState *NodeUpgradeState) (bool, error),
107109
eventRecorder record.EventRecorder) (*CommonUpgradeManagerImpl, error) {
108110
k8sClient, err := client.New(k8sConfig, client.Options{Scheme: scheme})
109111
if err != nil {
@@ -117,16 +119,17 @@ func NewCommonUpgradeStateManager(
117119

118120
nodeUpgradeStateProvider := NewNodeUpgradeStateProvider(k8sClient, log, eventRecorder)
119121
commonUpgrade := CommonUpgradeManagerImpl{
120-
Log: log,
121-
K8sClient: k8sClient,
122-
K8sInterface: k8sInterface,
123-
EventRecorder: eventRecorder,
124-
DrainManager: NewDrainManager(k8sInterface, nodeUpgradeStateProvider, log, eventRecorder),
125-
PodManager: NewPodManager(k8sInterface, nodeUpgradeStateProvider, log, nil, eventRecorder),
126-
CordonManager: NewCordonManager(k8sInterface, log),
127-
NodeUpgradeStateProvider: nodeUpgradeStateProvider,
128-
ValidationManager: NewValidationManager(k8sInterface, log, eventRecorder, nodeUpgradeStateProvider, ""),
129-
SafeDriverLoadManager: NewSafeDriverLoadManager(nodeUpgradeStateProvider, log),
122+
Log: log,
123+
K8sClient: k8sClient,
124+
K8sInterface: k8sInterface,
125+
EventRecorder: eventRecorder,
126+
DrainManager: NewDrainManager(k8sInterface, nodeUpgradeStateProvider, log, eventRecorder),
127+
PodManager: NewPodManager(k8sInterface, nodeUpgradeStateProvider, log, nil, eventRecorder),
128+
CordonManager: NewCordonManager(k8sInterface, log),
129+
NodeUpgradeStateProvider: nodeUpgradeStateProvider,
130+
ValidationManager: NewValidationManager(k8sInterface, log, eventRecorder, nodeUpgradeStateProvider, ""),
131+
SafeDriverLoadManager: NewSafeDriverLoadManager(nodeUpgradeStateProvider, log),
132+
shouldSkipUpgradeDoneFunc: skipFunc,
130133
}
131134

132135
return &commonUpgrade, nil
@@ -487,7 +490,7 @@ func (m *CommonUpgradeManagerImpl) ProcessPodRestartNodes(
487490
}
488491
if driverPodInSync {
489492
if !m.IsValidationEnabled() {
490-
err = m.updateNodeToUncordonOrDoneState(ctx, nodeState.Node)
493+
err = m.updateNodeToUncordonOrDoneState(ctx, nodeState)
491494
if err != nil {
492495
return err
493496
}
@@ -595,7 +598,7 @@ func (m *CommonUpgradeManagerImpl) ProcessValidationRequiredNodes(
595598
continue
596599
}
597600

598-
err = m.updateNodeToUncordonOrDoneState(ctx, node)
601+
err = m.updateNodeToUncordonOrDoneState(ctx, nodeState)
599602
if err != nil {
600603
return err
601604
}
@@ -670,16 +673,30 @@ func (m *CommonUpgradeManagerImpl) SkipNodeUpgrade(node *corev1.Node) bool {
670673
// updateNodeToUncordonOrDoneState skips moving the node to the UncordonRequired state if the node
671674
// was Unschedulable at the beginning of the upgrade so that the node remains in the same state as
672675
// when the upgrade started. In addition, the annotation tracking this information is removed.
673-
func (m *CommonUpgradeManagerImpl) updateNodeToUncordonOrDoneState(ctx context.Context, node *corev1.Node) error {
676+
func (m *CommonUpgradeManagerImpl) updateNodeToUncordonOrDoneState(ctx context.Context,
677+
nodeState *NodeUpgradeState) error {
678+
var (
679+
shouldSkip bool
680+
err error
681+
)
682+
node := nodeState.Node
674683
newUpgradeState := UpgradeStateUncordonRequired
675684
annotationKey := GetUpgradeInitialStateAnnotationKey()
676-
if _, ok := node.Annotations[annotationKey]; ok {
677-
m.Log.V(consts.LogLevelInfo).Info("Node was Unschedulable at beginning of upgrade, skipping uncordon",
678-
"node", node.Name)
679-
newUpgradeState = UpgradeStateDone
685+
if m.shouldSkipUpgradeDoneFunc != nil {
686+
shouldSkip, err = m.shouldSkipUpgradeDoneFunc(GetRequestorOptsFromEnvs().MaintenanceOPRequestorID, nodeState)
687+
if err != nil {
688+
return err
689+
}
690+
}
691+
if !shouldSkip {
692+
if _, ok := node.Annotations[annotationKey]; ok {
693+
m.Log.V(consts.LogLevelInfo).Info("Node was Unschedulable at beginning of upgrade, skipping uncordon",
694+
"node", node.Name)
695+
newUpgradeState = UpgradeStateDone
696+
}
680697
}
681698

682-
err := m.NodeUpgradeStateProvider.ChangeNodeUpgradeState(ctx, node, newUpgradeState)
699+
err = m.NodeUpgradeStateProvider.ChangeNodeUpgradeState(ctx, node, newUpgradeState)
683700
if err != nil {
684701
m.Log.V(consts.LogLevelError).Error(
685702
err, "Failed to change node upgrade state", "node", node.Name, "state", newUpgradeState)

pkg/upgrade/consts.go

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,9 @@ const (
2121
UpgradeStateLabelKeyFmt = "nvidia.com/%s-driver-upgrade-state"
2222
// UpgradeSkipNodeLabelKeyFmt is the format of the node label boolean key indicating to skip driver upgrade
2323
UpgradeSkipNodeLabelKeyFmt = "nvidia.com/%s-driver-upgrade.skip"
24+
// UpgradeSkipDrainDriverSelectorFmt is the format of the pod selector key indicating to skip driver
25+
// in upgrade drain spec
26+
UpgradeSkipDrainDriverSelectorFmt = "nvidia.com/%s-driver-upgrade-drain.skip"
2427
// UpgradeWaitForSafeDriverLoadAnnotationKeyFmt is the format of the node annotation key indicating that
2528
// the driver is waiting for safe load. Meaning node should be cordoned and workloads should be removed from the
2629
// node before the driver can continue to load.
@@ -39,8 +42,12 @@ const (
3942
// (used for orphaned pods)
4043
// Setting this label will trigger setting upgrade state to upgrade-required
4144
UpgradeRequestedAnnotationKeyFmt = "nvidia.com/%s-driver-upgrade-requested"
42-
// UpgradeRequestorModeAnnotationKeyFmt
45+
// UpgradeRequestorModeAnnotationKeyFmt is the format of the node annotation indicating requestor driver upgrade
46+
// mode is used for underlying node
4347
UpgradeRequestorModeAnnotationKeyFmt = "nvidia.com/%s-driver-upgrade-requestor-mode"
48+
// UpgradeRequestorLabelKeyFmt is the format of the nodeMaintenance label marked by requestor after modifying
49+
// the nodeMaintenance object
50+
UpgradeRequestorLabelKeyFmt = "nvidia.com/%s-requestor"
4451
// UpgradeStateUnknown Node has this state when the upgrade flow is disabled or the node hasn't been processed yet
4552
UpgradeStateUnknown = ""
4653
// UpgradeStateUpgradeRequired is set when the driver pod on the node is not up-to-date and required upgrade

pkg/upgrade/upgrade_inplace.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,10 @@ func (m *InplaceNodeStateManagerImpl) ProcessUncordonRequiredNodes(
130130
if nodeState.NodeMaintenance != nil {
131131
continue
132132
}
133+
// check if if node upgrade is handled by requestor mode, if so node uncordon will be performed by requestor flow
134+
if _, exists := nodeState.Node.Annotations[GetUpgradeRequestorModeAnnotationKey()]; exists {
135+
continue
136+
}
133137
err := m.CordonManager.Uncordon(ctx, nodeState.Node)
134138
if err != nil {
135139
m.Log.V(consts.LogLevelWarning).Error(

0 commit comments

Comments
 (0)