Skip to content
This repository has been archived by the owner on Oct 22, 2024. It is now read-only.

Commit

Permalink
deploy: add liveness probes
Browse files Browse the repository at this point in the history
Liveness probes serve as a fallback mechanisms for cases where the
container gets stuck but fails to terminate. We piggy-back onto the
metrics endpoint with HTTP probes because that endpoint is readily
available and, in the case of the PMEM-CSI node driver, already does
some work that mirrors what the driver needs to do during
operation (interact with underlying storage to retrieve capacity).

A startup probe is used because the node driver might start up more
slowly when setting up LVM. We can also add that for direct mode for
the sake of simplicity.

The liveness probes could be useful, although in practice the node
driver should already terminate if it runs into fatal issues, so we
probably won't need to rely on the probe. To avoid potential false
positives (killing the container although it only had a temporary
outage), the threshold is 5.

Readiness probes make less sense. The node driver does not do any load
balancing. It might make sense if we had more than one instance and
let the scheduler access only ready instances, but right now we only
have one instance.
  • Loading branch information
pohly committed Jun 14, 2021
1 parent 776137c commit f2e1671
Show file tree
Hide file tree
Showing 29 changed files with 1,197 additions and 0 deletions.
45 changes: 45 additions & 0 deletions deploy/kubernetes-1.19/direct/pmem-csi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,15 @@ spec:
fieldPath: metadata.namespace
image: intel/pmem-csi-driver:canary
imagePullPolicy: IfNotPresent
livenessProbe:
failureThreshold: 5
httpGet:
path: /metrics
port: metrics
scheme: HTTP
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 5
name: pmem-driver
ports:
- containerPort: 10010
Expand Down Expand Up @@ -463,6 +472,15 @@ spec:
value: /tmp/termination-log
image: intel/pmem-csi-driver:canary
imagePullPolicy: IfNotPresent
livenessProbe:
failureThreshold: 5
httpGet:
path: /metrics
port: metrics
scheme: HTTP
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 5
name: pmem-driver
ports:
- containerPort: 10010
Expand All @@ -474,6 +492,15 @@ spec:
securityContext:
privileged: true
runAsUser: 0
startupProbe:
failureThreshold: 30
httpGet:
path: /metrics
port: metrics
scheme: HTTP
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 5
terminationMessagePath: /tmp/termination-log
volumeMounts:
- mountPath: /var/lib/kubelet/plugins/kubernetes.io/csi
Expand Down Expand Up @@ -534,6 +561,15 @@ spec:
fieldPath: spec.nodeName
image: k8s.gcr.io/sig-storage/csi-provisioner:v2.2.2
imagePullPolicy: IfNotPresent
livenessProbe:
failureThreshold: 5
httpGet:
path: /metrics
port: metrics
scheme: HTTP
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 5
name: external-provisioner
ports:
- containerPort: 10011
Expand All @@ -544,6 +580,15 @@ spec:
memory: 128Mi
securityContext:
readOnlyRootFilesystem: true
startupProbe:
failureThreshold: 30
httpGet:
path: /metrics
port: metrics
scheme: HTTP
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 5
volumeMounts:
- mountPath: /csi
name: socket-dir
Expand Down
45 changes: 45 additions & 0 deletions deploy/kubernetes-1.19/direct/testing/pmem-csi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -382,6 +382,15 @@ spec:
fieldPath: metadata.namespace
image: intel/pmem-csi-driver-test:canary
imagePullPolicy: IfNotPresent
livenessProbe:
failureThreshold: 5
httpGet:
path: /metrics
port: metrics
scheme: HTTP
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 5
name: pmem-driver
ports:
- containerPort: 10010
Expand Down Expand Up @@ -487,6 +496,15 @@ spec:
value: /tmp/termination-log
image: intel/pmem-csi-driver-test:canary
imagePullPolicy: IfNotPresent
livenessProbe:
failureThreshold: 5
httpGet:
path: /metrics
port: metrics
scheme: HTTP
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 5
name: pmem-driver
ports:
- containerPort: 10010
Expand All @@ -498,6 +516,15 @@ spec:
securityContext:
privileged: true
runAsUser: 0
startupProbe:
failureThreshold: 30
httpGet:
path: /metrics
port: metrics
scheme: HTTP
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 5
terminationMessagePath: /tmp/termination-log
volumeMounts:
- mountPath: /var/lib/kubelet/plugins/kubernetes.io/csi
Expand Down Expand Up @@ -562,6 +589,15 @@ spec:
fieldPath: spec.nodeName
image: k8s.gcr.io/sig-storage/csi-provisioner:v2.2.2
imagePullPolicy: IfNotPresent
livenessProbe:
failureThreshold: 5
httpGet:
path: /metrics
port: metrics
scheme: HTTP
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 5
name: external-provisioner
ports:
- containerPort: 10011
Expand All @@ -572,6 +608,15 @@ spec:
memory: 128Mi
securityContext:
readOnlyRootFilesystem: true
startupProbe:
failureThreshold: 30
httpGet:
path: /metrics
port: metrics
scheme: HTTP
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 5
volumeMounts:
- mountPath: /csi
name: socket-dir
Expand Down
45 changes: 45 additions & 0 deletions deploy/kubernetes-1.19/lvm/pmem-csi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,15 @@ spec:
fieldPath: metadata.namespace
image: intel/pmem-csi-driver:canary
imagePullPolicy: IfNotPresent
livenessProbe:
failureThreshold: 5
httpGet:
path: /metrics
port: metrics
scheme: HTTP
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 5
name: pmem-driver
ports:
- containerPort: 10010
Expand Down Expand Up @@ -463,6 +472,15 @@ spec:
value: /tmp/termination-log
image: intel/pmem-csi-driver:canary
imagePullPolicy: IfNotPresent
livenessProbe:
failureThreshold: 5
httpGet:
path: /metrics
port: metrics
scheme: HTTP
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 5
name: pmem-driver
ports:
- containerPort: 10010
Expand All @@ -474,6 +492,15 @@ spec:
securityContext:
privileged: true
runAsUser: 0
startupProbe:
failureThreshold: 30
httpGet:
path: /metrics
port: metrics
scheme: HTTP
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 5
terminationMessagePath: /tmp/termination-log
volumeMounts:
- mountPath: /var/lib/kubelet/plugins/kubernetes.io/csi
Expand Down Expand Up @@ -534,6 +561,15 @@ spec:
fieldPath: spec.nodeName
image: k8s.gcr.io/sig-storage/csi-provisioner:v2.2.2
imagePullPolicy: IfNotPresent
livenessProbe:
failureThreshold: 5
httpGet:
path: /metrics
port: metrics
scheme: HTTP
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 5
name: external-provisioner
ports:
- containerPort: 10011
Expand All @@ -544,6 +580,15 @@ spec:
memory: 128Mi
securityContext:
readOnlyRootFilesystem: true
startupProbe:
failureThreshold: 30
httpGet:
path: /metrics
port: metrics
scheme: HTTP
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 5
volumeMounts:
- mountPath: /csi
name: socket-dir
Expand Down
45 changes: 45 additions & 0 deletions deploy/kubernetes-1.19/lvm/testing/pmem-csi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -382,6 +382,15 @@ spec:
fieldPath: metadata.namespace
image: intel/pmem-csi-driver-test:canary
imagePullPolicy: IfNotPresent
livenessProbe:
failureThreshold: 5
httpGet:
path: /metrics
port: metrics
scheme: HTTP
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 5
name: pmem-driver
ports:
- containerPort: 10010
Expand Down Expand Up @@ -487,6 +496,15 @@ spec:
value: /tmp/termination-log
image: intel/pmem-csi-driver-test:canary
imagePullPolicy: IfNotPresent
livenessProbe:
failureThreshold: 5
httpGet:
path: /metrics
port: metrics
scheme: HTTP
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 5
name: pmem-driver
ports:
- containerPort: 10010
Expand All @@ -498,6 +516,15 @@ spec:
securityContext:
privileged: true
runAsUser: 0
startupProbe:
failureThreshold: 30
httpGet:
path: /metrics
port: metrics
scheme: HTTP
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 5
terminationMessagePath: /tmp/termination-log
volumeMounts:
- mountPath: /var/lib/kubelet/plugins/kubernetes.io/csi
Expand Down Expand Up @@ -562,6 +589,15 @@ spec:
fieldPath: spec.nodeName
image: k8s.gcr.io/sig-storage/csi-provisioner:v2.2.2
imagePullPolicy: IfNotPresent
livenessProbe:
failureThreshold: 5
httpGet:
path: /metrics
port: metrics
scheme: HTTP
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 5
name: external-provisioner
ports:
- containerPort: 10011
Expand All @@ -572,6 +608,15 @@ spec:
memory: 128Mi
securityContext:
readOnlyRootFilesystem: true
startupProbe:
failureThreshold: 30
httpGet:
path: /metrics
port: metrics
scheme: HTTP
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 5
volumeMounts:
- mountPath: /csi
name: socket-dir
Expand Down
45 changes: 45 additions & 0 deletions deploy/kubernetes-1.19/pmem-csi-direct-testing.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -382,6 +382,15 @@ spec:
fieldPath: metadata.namespace
image: intel/pmem-csi-driver-test:canary
imagePullPolicy: IfNotPresent
livenessProbe:
failureThreshold: 5
httpGet:
path: /metrics
port: metrics
scheme: HTTP
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 5
name: pmem-driver
ports:
- containerPort: 10010
Expand Down Expand Up @@ -487,6 +496,15 @@ spec:
value: /tmp/termination-log
image: intel/pmem-csi-driver-test:canary
imagePullPolicy: IfNotPresent
livenessProbe:
failureThreshold: 5
httpGet:
path: /metrics
port: metrics
scheme: HTTP
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 5
name: pmem-driver
ports:
- containerPort: 10010
Expand All @@ -498,6 +516,15 @@ spec:
securityContext:
privileged: true
runAsUser: 0
startupProbe:
failureThreshold: 30
httpGet:
path: /metrics
port: metrics
scheme: HTTP
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 5
terminationMessagePath: /tmp/termination-log
volumeMounts:
- mountPath: /var/lib/kubelet/plugins/kubernetes.io/csi
Expand Down Expand Up @@ -562,6 +589,15 @@ spec:
fieldPath: spec.nodeName
image: k8s.gcr.io/sig-storage/csi-provisioner:v2.2.2
imagePullPolicy: IfNotPresent
livenessProbe:
failureThreshold: 5
httpGet:
path: /metrics
port: metrics
scheme: HTTP
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 5
name: external-provisioner
ports:
- containerPort: 10011
Expand All @@ -572,6 +608,15 @@ spec:
memory: 128Mi
securityContext:
readOnlyRootFilesystem: true
startupProbe:
failureThreshold: 30
httpGet:
path: /metrics
port: metrics
scheme: HTTP
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 5
volumeMounts:
- mountPath: /csi
name: socket-dir
Expand Down
Loading

0 comments on commit f2e1671

Please sign in to comment.