Skip to content

Commit

Permalink
Add pod anti affinity rule by default
Browse files Browse the repository at this point in the history
Right now, the Kubernetes scheduler can deploy replicas of the same
service on the same node. This is not desirable for HA. Also, it is
impossible for the user to specify any kind of affinity rules for the
pods.

This PR adds the ability to configure an affinity spec in the config.
Also, it adds by default an anti-affinity rule to make sure that the
Kubernetes scheduler is doing its best to schedule different replicas
for a service to different nodes.
  • Loading branch information
rgrandl committed Jun 17, 2024
1 parent b7b4fd5 commit 55c239b
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 3 deletions.
6 changes: 6 additions & 0 deletions internal/impl/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,12 @@ type kubeConfig struct {
// [1] https://pkg.go.dev/k8s.io/kubernetes/pkg/apis/autoscaling#HorizontalPodAutoscalerSpec.
ScalingSpec *autoscalingv2.HorizontalPodAutoscalerSpec

// Specs for pod affinity. Note that the affinity specs should satisfy
// the format specified in [1].
//
// [1] https://pkg.go.dev/k8s.io/api/core/v1#Affinity
AffinitySpec *corev1.Affinity

// Volumes that should be provided to all the running components.
StorageSpec volumeSpecs

Expand Down
47 changes: 44 additions & 3 deletions internal/impl/kube.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,10 @@ func buildDeployment(d deployment, g group) (*appsv1.Deployment, error) {
dnsPolicy = corev1.DNSClusterFirstWithHostNet
}

matchLabels := map[string]string{
"serviceweaver/name": name,
}

// Create container.
container, err := buildContainer(d, g)
if err != nil {
Expand All @@ -121,9 +125,7 @@ func buildDeployment(d deployment, g group) (*appsv1.Deployment, error) {
},
Spec: appsv1.DeploymentSpec{
Selector: &metav1.LabelSelector{
MatchLabels: map[string]string{
"serviceweaver/name": name,
},
MatchLabels: matchLabels,
},
Template: corev1.PodTemplateSpec{
ObjectMeta: metav1.ObjectMeta{
Expand All @@ -138,6 +140,7 @@ func buildDeployment(d deployment, g group) (*appsv1.Deployment, error) {
Containers: []corev1.Container{container},
DNSPolicy: dnsPolicy,
HostNetwork: d.config.UseHostNetwork,
Affinity: updateAffinitySpec(d.config.AffinitySpec, matchLabels),
Volumes: []corev1.Volume{
{
Name: "config",
Expand Down Expand Up @@ -711,7 +714,45 @@ func newDeployment(app *protos.AppConfig, cfg *kubeConfig, depId, image string)
app: app,
groups: sorted,
}, nil
}

// updateAffinitySpec updates an affinity spec with a rule that instructs the
// kubernetes scheduler to try its best to assign different replicas for the same
// deployment to different nodes.
//
// Note that this rule isn't necessarily enforced, and the scheduler can ignore
// it if there is no way this can be done (e.g., number of replicas is greater or
// equal to the number of nodes).
func updateAffinitySpec(spec *corev1.Affinity, labels map[string]string) *corev1.Affinity {
updated := &corev1.Affinity{PodAntiAffinity: &corev1.PodAntiAffinity{
PreferredDuringSchedulingIgnoredDuringExecution: []corev1.WeightedPodAffinityTerm{
{
Weight: 100,
PodAffinityTerm: corev1.PodAffinityTerm{
LabelSelector: &metav1.LabelSelector{
MatchLabels: labels,
},
TopologyKey: corev1.LabelHostname,
},
},
},
},
}
if spec == nil {
return updated
}
if spec.NodeAffinity != nil {
updated.NodeAffinity = spec.NodeAffinity
}
if spec.PodAffinity != nil {
updated.PodAffinity = spec.PodAffinity
}
if spec.PodAntiAffinity != nil {
updated.PodAntiAffinity.RequiredDuringSchedulingIgnoredDuringExecution = spec.PodAntiAffinity.RequiredDuringSchedulingIgnoredDuringExecution
updated.PodAntiAffinity.PreferredDuringSchedulingIgnoredDuringExecution = append(
updated.PodAntiAffinity.PreferredDuringSchedulingIgnoredDuringExecution, spec.PodAntiAffinity.PreferredDuringSchedulingIgnoredDuringExecution...)
}
return updated
}

// newListener returns a new listener.
Expand Down

0 comments on commit 55c239b

Please sign in to comment.