Skip to content
This repository has been archived by the owner on Oct 22, 2024. It is now read-only.

Commit

Permalink
Merge pull request #604 from intel/full-test-fixes
Browse files Browse the repository at this point in the history
full test fixes
  • Loading branch information
avalluri authored Apr 22, 2020
2 parents 970ad6f + 3c061cd commit dfdd0a0
Show file tree
Hide file tree
Showing 10 changed files with 141 additions and 70 deletions.
5 changes: 3 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,8 @@ replace (
k8s.io/sample-controller => k8s.io/sample-controller v0.18.1
)

// Temporary fork based on 1.18.1 with two additional PRs:
// Temporary fork based on 1.18.1 with additional PRs:
// - https://github.com/kubernetes/kubernetes/pull/89819
// - https://github.com/kubernetes/kubernetes/pull/90214
replace k8s.io/kubernetes => github.com/pohly/kubernetes v1.18.1-pmem-csi-20200416
// - https://github.com/kubernetes/kubernetes/pull/90335
replace k8s.io/kubernetes => github.com/pohly/kubernetes v1.18.1-pmem-csi-20200421-2
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -498,8 +498,8 @@ github.com/pkg/errors v0.8.1 h1:iURUrRGxPUNPdy5/HRSm+Yj6okJ6UtLINN0Q9M4+h3I=
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/pohly/kubernetes v1.18.1-pmem-csi-20200416 h1:hx31zWWCNLZCH5+mnBgwp18m46HxLXDOMvDvoDsk1kc=
github.com/pohly/kubernetes v1.18.1-pmem-csi-20200416/go.mod h1:z8xjOOO1Ljz+TaHpOxVGC7cxtF32TesIamoQ+BZrVS0=
github.com/pohly/kubernetes v1.18.1-pmem-csi-20200421-2 h1:AQ2rM97cDjKIrTMVFkFtMFbMzu174j3MnII/3b2IFno=
github.com/pohly/kubernetes v1.18.1-pmem-csi-20200421-2/go.mod h1:z8xjOOO1Ljz+TaHpOxVGC7cxtF32TesIamoQ+BZrVS0=
github.com/pquerna/cachecontrol v0.0.0-20171018203845-0dec1b30a021/go.mod h1:prYjPmNq4d1NPVmpShWobRqXY3q7Vp+80DqgxxUrUIA=
github.com/pquerna/ffjson v0.0.0-20180717144149-af8b230fcd20 h1:7sBb9iOkeq+O7AXlVoH/8zpIcRXX523zMkKKspHjjx8=
github.com/pquerna/ffjson v0.0.0-20180717144149-af8b230fcd20/go.mod h1:YARuvh7BUWHNhzDq2OM5tzR2RiCcN2D7sapiKyCel/M=
Expand Down
10 changes: 10 additions & 0 deletions test/e2e/deploy/deploy.go
Original file line number Diff line number Diff line change
Expand Up @@ -392,10 +392,15 @@ func EnsureDeployment(deploymentName string) *Deployment {

f := framework.NewDefaultFramework("cluster")
f.SkipNamespaceCreation = true
var prevVol map[string][]string

ginkgo.BeforeEach(func() {
ginkgo.By(fmt.Sprintf("preparing for test %q", ginkgo.CurrentGinkgoTestDescription().FullTestText))
c, err := NewCluster(f.ClientSet)

// Remember list of volumes before test, using out-of-band host commands (i.e. not CSI API).
prevVol = GetHostVolumes(deployment)

framework.ExpectNoError(err, "get cluster information")
running, err := FindPMEMDriver(c)
framework.ExpectNoError(err, "check for PMEM-CSI driver")
Expand Down Expand Up @@ -428,6 +433,11 @@ func EnsureDeployment(deploymentName string) *Deployment {
}
})

ginkgo.AfterEach(func() {
// Check list of volumes after test to detect left-overs
CheckForLeftoverVolumes(deployment, prevVol)
})

return deployment
}

Expand Down
62 changes: 62 additions & 0 deletions test/e2e/deploy/volumeleaks.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
/*
Copyright 2020 Intel Corporation.
SPDX-License-Identifier: Apache-2.0
*/

package deploy

import (
"fmt"
"os"
"os/exec"
"strings"

pmemcsidriver "github.com/intel/pmem-csi/pkg/pmem-csi-driver"

. "github.com/onsi/gomega"
)

// Register list of volumes before test, using out-of-band host commands (i.e. not CSI API).
func GetHostVolumes(d *Deployment) map[string][]string {
var cmd string
var hdr string
switch d.Mode {
case pmemcsidriver.LVM:
// lvs adds many space (0x20) chars at end, we could squeeze
// repetitions using tr here, but TrimSpace() below strips those away
cmd = "sudo lvs --foreign --noheadings"
hdr = "LVM Volumes"
case pmemcsidriver.Direct:
// ndctl produces multiline block. We want one line per namespace.
// Pick uuid, mode, size for comparison. Note that sorting changes the order so lines
// are not grouped by volume, but keeping volume order would need more complex parsing
// and this is not meant to be pretty-printed for human, just to detect the change.
cmd = "sudo ndctl list |tr -d '\"' |egrep 'uuid|mode|^ *size' |sort |tr -d ' \n'"
hdr = "Namespaces"
}
result := make(map[string][]string)
// Instead of trying to find out number of hosts, we trust the set of
// ssh.N helper scripts matches running hosts, which should be the case in
// correctly running tester system. We run ssh.N commands until a ssh.N
// script appears to be "no such file".
for worker := 1; ; worker++ {
sshcmd := fmt.Sprintf("%s/_work/%s/ssh.%d", os.Getenv("REPO_ROOT"), os.Getenv("CLUSTER"), worker)
ssh := exec.Command(sshcmd, cmd)
// Intentional Output instead of CombinedOutput to dismiss warnings from stderr.
// lvs may emit lvmetad-related WARNING msg which can't be silenced using -q option.
out, err := ssh.Output()
if err != nil && os.IsNotExist(err) {
break
}
buf := fmt.Sprintf("%s on Node %d", hdr, worker)
result[buf] = strings.Split(strings.TrimSpace(string(out)), "\n")
}
return result
}

// CheckForLeftovers lists volumes again after test, diff means leftovers.
func CheckForLeftoverVolumes(d *Deployment, volBefore map[string][]string) {
volNow := GetHostVolumes(d)
Expect(volNow).To(Equal(volBefore), "same volumes before and after the test")
}
8 changes: 0 additions & 8 deletions test/e2e/storage/csi_volumes.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,6 @@ var _ = deploy.DescribeForAll("E2E", func(d *deploy.Deployment) {
var (
storageClassLateBindingName = "pmem-csi-sc-late-binding" // from deploy/common/pmem-storageclass-late-binding.yaml
claim v1.PersistentVolumeClaim
prevVol map[string][]string
)
f := framework.NewDefaultFramework("latebinding")
BeforeEach(func() {
Expand All @@ -129,8 +128,6 @@ var _ = deploy.DescribeForAll("E2E", func(d *deploy.Deployment) {
skipper.Skipf("storage class %s not found, late binding not supported", storageClassLateBindingName)
}
framework.ExpectNoError(err, "get storage class %s", storageClassLateBindingName)
// Register list of volumes before test, using out-of-band host commands (i.e. not CSI API).
prevVol = GetHostVolumes(d)

claim = v1.PersistentVolumeClaim{
ObjectMeta: metav1.ObjectMeta{
Expand All @@ -151,11 +148,6 @@ var _ = deploy.DescribeForAll("E2E", func(d *deploy.Deployment) {
}
})

AfterEach(func() {
// Check list of volumes after test to detect left-overs
CheckForLeftoverVolumes(d, prevVol)
})

It("works", func() {
TestDynamicLateBindingProvisioning(f.ClientSet, &claim, "latebinding")
})
Expand Down
49 changes: 0 additions & 49 deletions test/e2e/storage/sanity.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,6 @@ var _ = deploy.DescribeForSome("sanity", func(d *deploy.Deployment) bool {
f.SkipNamespaceCreation = true // We don't need a per-test namespace and skipping it makes the tests run faster.
var execOnTestNode func(args ...string) string
var cleanup func()
var prevVol map[string][]string
var cluster *deploy.Cluster

const socatPort = 9735
Expand Down Expand Up @@ -181,13 +180,9 @@ var _ = deploy.DescribeForSome("sanity", func(d *deploy.Deployment) bool {
config.CreateStagingDir = mkdir
config.RemoveTargetPath = rmdir
config.RemoveStagingPath = rmdir
// Register list of volumes before test, using out-of-band host commands (i.e. not CSI API).
prevVol = GetHostVolumes(d)
})

AfterEach(func() {
// Check list of volumes after test to detect left-overs
CheckForLeftoverVolumes(d, prevVol)
if cleanup != nil {
cleanup()
}
Expand Down Expand Up @@ -1026,47 +1021,3 @@ func WaitForPodsWithLabelRunningReady(c clientset.Interface, ns string, label la
})
return pods, err
}

// Register list of volumes before test, using out-of-band host commands (i.e. not CSI API).
func GetHostVolumes(d *deploy.Deployment) map[string][]string {
var cmd string
var hdr string
switch d.Mode {
case pmemcsidriver.LVM:
// lvs adds many space (0x20) chars at end, we could squeeze
// repetitions using tr here, but TrimSpace() below strips those away
cmd = "sudo lvs --foreign --noheadings"
hdr = "LVM Volumes"
case pmemcsidriver.Direct:
// ndctl produces multiline block. We want one line per namespace.
// Pick uuid, mode, size for comparison. Note that sorting changes the order so lines
// are not grouped by volume, but keeping volume order would need more complex parsing
// and this is not meant to be pretty-printed for human, just to detect the change.
cmd = "sudo ndctl list |tr -d '\"' |egrep 'uuid|mode|^ *size' |sort |tr -d ' \n'"
hdr = "Namespaces"
}
result := make(map[string][]string)
// Instead of trying to find out number of hosts, we trust the set of
// ssh.N helper scripts matches running hosts, which should be the case in
// correctly running tester system. We run ssh.N commands until a ssh.N
// script appears to be "no such file".
for worker := 1; ; worker++ {
sshcmd := fmt.Sprintf("%s/_work/%s/ssh.%d", os.Getenv("REPO_ROOT"), os.Getenv("CLUSTER"), worker)
ssh := exec.Command(sshcmd, cmd)
// Intentional Output instead of CombinedOutput to dismiss warnings from stderr.
// lvs may emit lvmetad-related WARNING msg which can't be silenced using -q option.
out, err := ssh.Output()
if err != nil && os.IsNotExist(err) {
break
}
buf := fmt.Sprintf("%s on Node %d", hdr, worker)
result[buf] = strings.Split(strings.TrimSpace(string(out)), "\n")
}
return result
}

// CheckForLeftovers lists volumes again after test, diff means leftovers.
func CheckForLeftoverVolumes(d *deploy.Deployment, volBefore map[string][]string) {
volNow := GetHostVolumes(d)
Expect(volNow).To(Equal(volBefore), "same volumes before and after the test")
}
6 changes: 2 additions & 4 deletions test/e2e/storage/scheduler/scheduler.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,9 @@ var _ testsuites.TestSuite = &schedulerTestSuite{}
// webhook work.
func InitSchedulerTestSuite() testsuites.TestSuite {
// We test with an ephemeral inline volume and a PVC with late
// binding. The webhook works reliably only for the inline
// volume. With PVCs there are race conditions (PVC created,
// but controller not informed yet when webhook is called), so
// we may have to wait until eventually it works.
// binding.
lateBinding := testpatterns.DefaultFsDynamicPV
lateBinding.Name = "Dynamic PV with late binding"
lateBinding.BindingMode = storagev1.VolumeBindingWaitForFirstConsumer

suite := &schedulerTestSuite{
Expand Down
30 changes: 25 additions & 5 deletions test/setup-deployment.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,7 @@ REPO_DIRECTORY="${REPO_DIRECTORY:-$(dirname $(dirname $(readlink -f $0)))}"
CLUSTER_DIRECTORY="${CLUSTER_DIRECTORY:-${REPO_DIRECTORY}/_work/${CLUSTER}}"
SSH="${CLUSTER_DIRECTORY}/ssh.0"
KUBECTL="${SSH} kubectl" # Always use the kubectl installed in the cluster.
KUBERNETES_VERSION="$(${KUBECTL} version --short | grep 'Server Version' | \
sed -e 's/.*: v\([0-9]*\)\.\([0-9]*\)\..*/\1.\2/')"
KUBERNETES_VERSION="$(cat "$CLUSTER_DIRECTORY/kubernetes.version")"
DEPLOYMENT_DIRECTORY="${REPO_DIRECTORY}/deploy/kubernetes-$KUBERNETES_VERSION"
case ${TEST_DEPLOYMENTMODE} in
testing)
Expand Down Expand Up @@ -73,7 +72,6 @@ data:
tls.key: ${NODE_KEY}
EOF

echo "$KUBERNETES_VERSION" > $CLUSTER_DIRECTORY/kubernetes.version
case "$KUBERNETES_VERSION" in
1.1[01234])
# We cannot exclude the PMEM-CSI pods from the webhook because objectSelector
Expand Down Expand Up @@ -113,12 +111,34 @@ patchesJson6902:
version: v1
kind: StatefulSet
name: pmem-csi-controller
path: scheduler-patch.yaml
path: controller-patch.yaml
EOF
${SSH} "cat >'$tmpdir/my-deployment/scheduler-patch.yaml'" <<EOF
${SSH} "cat >'$tmpdir/my-deployment/controller-patch.yaml'" <<EOF
- op: add
path: /spec/template/spec/containers/0/command/-
value: "--schedulerListen=:8000" # Exposed to kube-scheduler via the pmem-csi-scheduler service.
- op: add
path: /spec/template/spec/affinity
value:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
# Do *not* run controller on worker nodes with PMEM. This is
# a workaround for a particular issue on Clear Linux where network
# configuration randomly fails such that the driver which runs on the same
# node as the controller cannot connect to the controller
# (https://github.com/intel/pmem-csi/issues/555).
- key: storage
operator: NotIn
values:
- pmem
- op: add
path: /spec/template/spec/tolerations
value:
- key: "node-role.kubernetes.io/master"
operator: "Exists"
effect: "NoSchedule"
EOF
if [ "${TEST_DEVICEMODE}" = "lvm" ]; then
# Test these options and kustomization by injecting some non-default values.
Expand Down
7 changes: 7 additions & 0 deletions test/start-kubernetes.sh
Original file line number Diff line number Diff line change
Expand Up @@ -416,6 +416,13 @@ function init_kubernetes_cluster() (
done
waitall $pids || die "at least one worker failed to join the cluster"

# Determine actual Kubernetes version and record for other tools which need
# to know without being able to call kubectl.
ssh $SSH_ARGS ${CLOUD_USER}@${master_ip} kubectl version --short | \
grep 'Server Version' | \
sed -e 's/.*: v\([0-9]*\)\.\([0-9]*\)\..*/\1.\2/' \
>"${CLUSTER_DIRECTORY}/kubernetes.version"

kubernetes_usage
)

Expand Down
30 changes: 30 additions & 0 deletions test/test.make
Original file line number Diff line number Diff line change
Expand Up @@ -143,9 +143,39 @@ TEST_E2E_SKIP_ALL = $(TEST_E2E_SKIP)
# https://github.com/kubernetes/kubernetes/blob/25ffbe633810609743944edd42d164cd7990071c/test/e2e/storage/testsuites/provisioning.go#L175-L181
TEST_E2E_SKIP_ALL += should.access.volume.from.different.nodes

# This is a test for behavior of kubelet which Kubernetes <= 1.15 doesn't pass.
TEST_E2E_SKIP_1.14 += volumeMode.should.not.mount.*map.unused.volumes.in.a.pod
TEST_E2E_SKIP_1.15 += volumeMode.should.not.mount.*map.unused.volumes.in.a.pod

# It looks like Kubernetes <= 1.15 does not wait for
# NodeUnpublishVolume to complete before deleting the pod:
#
# Apr 21 17:33:12.743: INFO: Wait up to 5m0s for pod "dax-volume-test" to be fully deleted
# pmem-csi-node-4dsmr/[email protected]: I0421 17:33:34.491659 1 tracing.go:19] GRPC call: /csi.v1.Node/NodeGetCapabilities
# pmem-csi-node-4dsmr/[email protected]: I0421 17:33:45.549013 1 tracing.go:19] GRPC call: /csi.v1.Node/NodeUnpublishVolume
# pmem-csi-node-4dsmr/[email protected]: I0421 17:33:45.549189 1 nodeserver.go:295] NodeUnpublishVolume: unmount /var/lib/kubelet/pods/1c5f1fec-b08b-4264-8c55-40a22c1b3d16/volumes/kubernetes.io~csi/vol1/mount
# STEP: delete the pod
# Apr 21 17:33:46.769: INFO: Waiting for pod dax-volume-test to disappear
# Apr 21 17:33:46.775: INFO: Pod dax-volume-test no longer exists
#
# That breaks our volume leak detection because the test continues
# before the volume is truly removed. As a workaround, we disable
# ephemeral volume tests on Kubernetes <= 1.15. That's okay because the feature
# was alpha in those releases and shouldn't be used.
TEST_E2E_SKIP_1.14 += Testpattern:.Ephemeral-volume Testpattern:.inline.ephemeral.CSI.volume
TEST_E2E_SKIP_1.15 += Testpattern:.Ephemeral-volume Testpattern:.inline.ephemeral.CSI.volume

# Add all Kubernetes version-specific suppressions.
TEST_E2E_SKIP_ALL += $(TEST_E2E_SKIP_$(shell cat _work/$(CLUSTER)/kubernetes.version))

# E2E tests which are to be executed (space separated list of regular expressions, default is all that aren't skipped).
TEST_E2E_FOCUS =

foobar:
echo TEST_E2E_SKIP_$(shell cat _work/$(CLUSTER)/kubernetes.version)
echo $(TEST_E2E_SKIP_$(shell cat _work/$(CLUSTER)/kubernetes.version))
echo $(TEST_E2E_SKIP_ALL)

# E2E Junit output directory (default empty = none). junit_<ginkgo node>.xml files will be written there,
# i.e. usually just junit_01.xml.
TEST_E2E_REPORT_DIR=
Expand Down

0 comments on commit dfdd0a0

Please sign in to comment.