From 612cf381a44a8ac9cd8e42c170b72bf476961f37 Mon Sep 17 00:00:00 2001 From: Patrick Ohly Date: Fri, 2 Jul 2021 15:41:00 +0200 Subject: [PATCH] test: stop depending on host port for sanity testing We can connect to the socat port via port forwarding. The advantage is that the e2e.test can run on a host that has no IP routing to the cluster nodes, it only needs access to the API server. A secondary benefit is that we avoid potential conflicts because of the host port. --- .../direct/testing/pmem-csi.yaml | 4 +- .../kubernetes-1.19/lvm/testing/pmem-csi.yaml | 4 +- .../pmem-csi-direct-testing.yaml | 4 +- .../kubernetes-1.19/pmem-csi-lvm-testing.yaml | 4 +- .../direct/testing/pmem-csi.yaml | 4 +- .../kubernetes-1.20/lvm/testing/pmem-csi.yaml | 4 +- .../pmem-csi-direct-testing.yaml | 4 +- .../kubernetes-1.20/pmem-csi-lvm-testing.yaml | 4 +- .../direct/testing/pmem-csi.yaml | 4 +- .../kubernetes-1.21/lvm/testing/pmem-csi.yaml | 4 +- .../pmem-csi-direct-testing.yaml | 4 +- .../kubernetes-1.21/pmem-csi-lvm-testing.yaml | 4 +- .../scheduler/openshift-configmap.yaml | 23 ++++++++ deploy/kustomize/testing/socat.yaml | 6 ++- test/e2e/deploy/cluster.go | 4 ++ test/e2e/deploy/deploy.go | 50 ++++++++++++----- test/e2e/storage/sanity.go | 53 ++++++++++++++----- 17 files changed, 144 insertions(+), 40 deletions(-) create mode 100644 deploy/kustomize/scheduler/openshift-configmap.yaml diff --git a/deploy/kubernetes-1.19/direct/testing/pmem-csi.yaml b/deploy/kubernetes-1.19/direct/testing/pmem-csi.yaml index 356821f71b..8ce9c96541 100644 --- a/deploy/kubernetes-1.19/direct/testing/pmem-csi.yaml +++ b/deploy/kubernetes-1.19/direct/testing/pmem-csi.yaml @@ -754,6 +754,9 @@ spec: - unix-connect:/csi/csi.sock image: alpine/socat:1.0.3 name: socat + ports: + - containerPort: 9735 + name: csi-socket securityContext: privileged: true volumeMounts: @@ -765,7 +768,6 @@ spec: - mountPath: /var/lib/kubelet/plugins/kubernetes.io/csi/pv mountPropagation: Bidirectional name: staging-dir - hostNetwork: true volumes: - hostPath: path: /var/lib/kubelet/plugins/pmem-csi.intel.com diff --git a/deploy/kubernetes-1.19/lvm/testing/pmem-csi.yaml b/deploy/kubernetes-1.19/lvm/testing/pmem-csi.yaml index 50359719a3..2cf012632d 100644 --- a/deploy/kubernetes-1.19/lvm/testing/pmem-csi.yaml +++ b/deploy/kubernetes-1.19/lvm/testing/pmem-csi.yaml @@ -754,6 +754,9 @@ spec: - unix-connect:/csi/csi.sock image: alpine/socat:1.0.3 name: socat + ports: + - containerPort: 9735 + name: csi-socket securityContext: privileged: true volumeMounts: @@ -765,7 +768,6 @@ spec: - mountPath: /var/lib/kubelet/plugins/kubernetes.io/csi/pv mountPropagation: Bidirectional name: staging-dir - hostNetwork: true volumes: - hostPath: path: /var/lib/kubelet/plugins/pmem-csi.intel.com diff --git a/deploy/kubernetes-1.19/pmem-csi-direct-testing.yaml b/deploy/kubernetes-1.19/pmem-csi-direct-testing.yaml index 356821f71b..8ce9c96541 100644 --- a/deploy/kubernetes-1.19/pmem-csi-direct-testing.yaml +++ b/deploy/kubernetes-1.19/pmem-csi-direct-testing.yaml @@ -754,6 +754,9 @@ spec: - unix-connect:/csi/csi.sock image: alpine/socat:1.0.3 name: socat + ports: + - containerPort: 9735 + name: csi-socket securityContext: privileged: true volumeMounts: @@ -765,7 +768,6 @@ spec: - mountPath: /var/lib/kubelet/plugins/kubernetes.io/csi/pv mountPropagation: Bidirectional name: staging-dir - hostNetwork: true volumes: - hostPath: path: /var/lib/kubelet/plugins/pmem-csi.intel.com diff --git a/deploy/kubernetes-1.19/pmem-csi-lvm-testing.yaml b/deploy/kubernetes-1.19/pmem-csi-lvm-testing.yaml index 50359719a3..2cf012632d 100644 --- a/deploy/kubernetes-1.19/pmem-csi-lvm-testing.yaml +++ b/deploy/kubernetes-1.19/pmem-csi-lvm-testing.yaml @@ -754,6 +754,9 @@ spec: - unix-connect:/csi/csi.sock image: alpine/socat:1.0.3 name: socat + ports: + - containerPort: 9735 + name: csi-socket securityContext: privileged: true volumeMounts: @@ -765,7 +768,6 @@ spec: - mountPath: /var/lib/kubelet/plugins/kubernetes.io/csi/pv mountPropagation: Bidirectional name: staging-dir - hostNetwork: true volumes: - hostPath: path: /var/lib/kubelet/plugins/pmem-csi.intel.com diff --git a/deploy/kubernetes-1.20/direct/testing/pmem-csi.yaml b/deploy/kubernetes-1.20/direct/testing/pmem-csi.yaml index 356821f71b..8ce9c96541 100644 --- a/deploy/kubernetes-1.20/direct/testing/pmem-csi.yaml +++ b/deploy/kubernetes-1.20/direct/testing/pmem-csi.yaml @@ -754,6 +754,9 @@ spec: - unix-connect:/csi/csi.sock image: alpine/socat:1.0.3 name: socat + ports: + - containerPort: 9735 + name: csi-socket securityContext: privileged: true volumeMounts: @@ -765,7 +768,6 @@ spec: - mountPath: /var/lib/kubelet/plugins/kubernetes.io/csi/pv mountPropagation: Bidirectional name: staging-dir - hostNetwork: true volumes: - hostPath: path: /var/lib/kubelet/plugins/pmem-csi.intel.com diff --git a/deploy/kubernetes-1.20/lvm/testing/pmem-csi.yaml b/deploy/kubernetes-1.20/lvm/testing/pmem-csi.yaml index 50359719a3..2cf012632d 100644 --- a/deploy/kubernetes-1.20/lvm/testing/pmem-csi.yaml +++ b/deploy/kubernetes-1.20/lvm/testing/pmem-csi.yaml @@ -754,6 +754,9 @@ spec: - unix-connect:/csi/csi.sock image: alpine/socat:1.0.3 name: socat + ports: + - containerPort: 9735 + name: csi-socket securityContext: privileged: true volumeMounts: @@ -765,7 +768,6 @@ spec: - mountPath: /var/lib/kubelet/plugins/kubernetes.io/csi/pv mountPropagation: Bidirectional name: staging-dir - hostNetwork: true volumes: - hostPath: path: /var/lib/kubelet/plugins/pmem-csi.intel.com diff --git a/deploy/kubernetes-1.20/pmem-csi-direct-testing.yaml b/deploy/kubernetes-1.20/pmem-csi-direct-testing.yaml index 356821f71b..8ce9c96541 100644 --- a/deploy/kubernetes-1.20/pmem-csi-direct-testing.yaml +++ b/deploy/kubernetes-1.20/pmem-csi-direct-testing.yaml @@ -754,6 +754,9 @@ spec: - unix-connect:/csi/csi.sock image: alpine/socat:1.0.3 name: socat + ports: + - containerPort: 9735 + name: csi-socket securityContext: privileged: true volumeMounts: @@ -765,7 +768,6 @@ spec: - mountPath: /var/lib/kubelet/plugins/kubernetes.io/csi/pv mountPropagation: Bidirectional name: staging-dir - hostNetwork: true volumes: - hostPath: path: /var/lib/kubelet/plugins/pmem-csi.intel.com diff --git a/deploy/kubernetes-1.20/pmem-csi-lvm-testing.yaml b/deploy/kubernetes-1.20/pmem-csi-lvm-testing.yaml index 50359719a3..2cf012632d 100644 --- a/deploy/kubernetes-1.20/pmem-csi-lvm-testing.yaml +++ b/deploy/kubernetes-1.20/pmem-csi-lvm-testing.yaml @@ -754,6 +754,9 @@ spec: - unix-connect:/csi/csi.sock image: alpine/socat:1.0.3 name: socat + ports: + - containerPort: 9735 + name: csi-socket securityContext: privileged: true volumeMounts: @@ -765,7 +768,6 @@ spec: - mountPath: /var/lib/kubelet/plugins/kubernetes.io/csi/pv mountPropagation: Bidirectional name: staging-dir - hostNetwork: true volumes: - hostPath: path: /var/lib/kubelet/plugins/pmem-csi.intel.com diff --git a/deploy/kubernetes-1.21/direct/testing/pmem-csi.yaml b/deploy/kubernetes-1.21/direct/testing/pmem-csi.yaml index 1384b6633f..146b5ed4dc 100644 --- a/deploy/kubernetes-1.21/direct/testing/pmem-csi.yaml +++ b/deploy/kubernetes-1.21/direct/testing/pmem-csi.yaml @@ -763,6 +763,9 @@ spec: - unix-connect:/csi/csi.sock image: alpine/socat:1.0.3 name: socat + ports: + - containerPort: 9735 + name: csi-socket securityContext: privileged: true volumeMounts: @@ -774,7 +777,6 @@ spec: - mountPath: /var/lib/kubelet/plugins/kubernetes.io/csi/pv mountPropagation: Bidirectional name: staging-dir - hostNetwork: true volumes: - hostPath: path: /var/lib/kubelet/plugins/pmem-csi.intel.com diff --git a/deploy/kubernetes-1.21/lvm/testing/pmem-csi.yaml b/deploy/kubernetes-1.21/lvm/testing/pmem-csi.yaml index 2aecf3d926..87db2732c9 100644 --- a/deploy/kubernetes-1.21/lvm/testing/pmem-csi.yaml +++ b/deploy/kubernetes-1.21/lvm/testing/pmem-csi.yaml @@ -763,6 +763,9 @@ spec: - unix-connect:/csi/csi.sock image: alpine/socat:1.0.3 name: socat + ports: + - containerPort: 9735 + name: csi-socket securityContext: privileged: true volumeMounts: @@ -774,7 +777,6 @@ spec: - mountPath: /var/lib/kubelet/plugins/kubernetes.io/csi/pv mountPropagation: Bidirectional name: staging-dir - hostNetwork: true volumes: - hostPath: path: /var/lib/kubelet/plugins/pmem-csi.intel.com diff --git a/deploy/kubernetes-1.21/pmem-csi-direct-testing.yaml b/deploy/kubernetes-1.21/pmem-csi-direct-testing.yaml index 1384b6633f..146b5ed4dc 100644 --- a/deploy/kubernetes-1.21/pmem-csi-direct-testing.yaml +++ b/deploy/kubernetes-1.21/pmem-csi-direct-testing.yaml @@ -763,6 +763,9 @@ spec: - unix-connect:/csi/csi.sock image: alpine/socat:1.0.3 name: socat + ports: + - containerPort: 9735 + name: csi-socket securityContext: privileged: true volumeMounts: @@ -774,7 +777,6 @@ spec: - mountPath: /var/lib/kubelet/plugins/kubernetes.io/csi/pv mountPropagation: Bidirectional name: staging-dir - hostNetwork: true volumes: - hostPath: path: /var/lib/kubelet/plugins/pmem-csi.intel.com diff --git a/deploy/kubernetes-1.21/pmem-csi-lvm-testing.yaml b/deploy/kubernetes-1.21/pmem-csi-lvm-testing.yaml index 2aecf3d926..87db2732c9 100644 --- a/deploy/kubernetes-1.21/pmem-csi-lvm-testing.yaml +++ b/deploy/kubernetes-1.21/pmem-csi-lvm-testing.yaml @@ -763,6 +763,9 @@ spec: - unix-connect:/csi/csi.sock image: alpine/socat:1.0.3 name: socat + ports: + - containerPort: 9735 + name: csi-socket securityContext: privileged: true volumeMounts: @@ -774,7 +777,6 @@ spec: - mountPath: /var/lib/kubelet/plugins/kubernetes.io/csi/pv mountPropagation: Bidirectional name: staging-dir - hostNetwork: true volumes: - hostPath: path: /var/lib/kubelet/plugins/pmem-csi.intel.com diff --git a/deploy/kustomize/scheduler/openshift-configmap.yaml b/deploy/kustomize/scheduler/openshift-configmap.yaml new file mode 100644 index 0000000000..620efee914 --- /dev/null +++ b/deploy/kustomize/scheduler/openshift-configmap.yaml @@ -0,0 +1,23 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: scheduler-policy + namespace: openshift-config +data: + policy.cfg: | + { + "kind" : "Policy", + "apiVersion" : "v1", + "extenders" : [ + {"urlPrefix": "https://127.0.0.1:30674", + "filterVerb": "filter", + "prioritizeVerb": "prioritize", + "nodeCacheCapable": true, + "weight": 1, + "managedResources": [ { + "name": "pmem-csi.intel.com/scheduler", + "ignoredByScheduler": true + } ] + } + ] + } diff --git a/deploy/kustomize/testing/socat.yaml b/deploy/kustomize/testing/socat.yaml index c9c01a5e87..e86e3ea3c8 100644 --- a/deploy/kustomize/testing/socat.yaml +++ b/deploy/kustomize/testing/socat.yaml @@ -22,13 +22,12 @@ spec: app.kubernetes.io/instance: pmem-csi.intel.com pmem-csi.intel.com/webhook: ignore spec: - hostNetwork: true containers: - name: socat image: alpine/socat:1.0.3 args: - -s - - tcp-listen:9735,fork,reuseaddr # port 9735 *on the host* + - tcp-listen:9735,fork,reuseaddr - unix-connect:/csi/csi.sock securityContext: privileged: true @@ -42,6 +41,9 @@ spec: - name: staging-dir mountPath: /var/lib/kubelet/plugins/kubernetes.io/csi/pv # preliminary (?), https://github.com/kubernetes-csi/docs/issues/130 mountPropagation: Bidirectional + ports: + - name: csi-socket + containerPort: 9735 volumes: - hostPath: path: /var/lib/kubelet/plugins/pmem-csi.intel.com diff --git a/test/e2e/deploy/cluster.go b/test/e2e/deploy/cluster.go index 6b648d0019..a4714068b7 100644 --- a/test/e2e/deploy/cluster.go +++ b/test/e2e/deploy/cluster.go @@ -71,6 +71,10 @@ func (c *Cluster) ClientSet() kubernetes.Interface { return c.cs } +func (c *Cluster) Config() *rest.Config { + return c.cfg +} + // NumNodes returns the total number of nodes in the cluster. // Node #0 is the master node, the rest are workers. func (c *Cluster) NumNodes() int { diff --git a/test/e2e/deploy/deploy.go b/test/e2e/deploy/deploy.go index 82d81511ef..0891b0334a 100644 --- a/test/e2e/deploy/deploy.go +++ b/test/e2e/deploy/deploy.go @@ -11,7 +11,6 @@ import ( "context" "fmt" "io/ioutil" - "net" "net/http" "os" "os/exec" @@ -21,6 +20,7 @@ import ( "strings" "time" + "github.com/go-logr/logr" cm "github.com/prometheus/client_model/go" dto "github.com/prometheus/client_model/go" "github.com/prometheus/common/expfmt" @@ -172,7 +172,7 @@ func WaitForPMEMDriver(c *Cluster, d *Deployment) (metricsURL string) { check := func() error { // Do not linger too long here, we rather want to // abort and print the error instead of getting stuck. - const timeout = time.Second + const timeout = 10 * time.Second deadline, cancel := context.WithTimeout(deadline, timeout) defer cancel() @@ -329,16 +329,21 @@ func WaitForPMEMDriver(c *Cluster, d *Deployment) (metricsURL string) { } // For testing deployments, also ensure that the CSI endpoints can be reached. - nodeAddress, controllerAddress, err := LookupCSIAddresses(c, d.Namespace) + nodeAddress, controllerAddress, err := LookupCSIAddresses(deadline, c, d.Namespace) if err != nil { return fmt.Errorf("look up CSI addresses: %v", err) } tryConnect := func(address string) error { - prefix := "dns:///" // triple slash is used by gRPC, which makes the address unparsable with net/url - if !strings.HasPrefix(address, prefix) { - return fmt.Errorf("unexpected non-DNS URL: %s", address) + addr, err := pod.ParseAddr(address) + if err != nil { + return err } - conn, err := net.Dial("tcp", address[len(prefix):]) + dialer := pod.NewDialer(c.ClientSet(), c.Config()) + // Here we discard error messages because those are expected while + // the driver starts up. Once we update to logr 1.0.0, we could redirect + // the output into a string buffer via the sink mechanism and include + // it in the error message. + conn, err := dialer.DialContainerPort(deadline, logr.Discard(), *addr) if err != nil { return fmt.Errorf("dial %s: %v", address, err) } @@ -1277,18 +1282,37 @@ func (d Deployment) DeleteAllPods(c *Cluster) error { return nil } -// LookupCSIAddresses returns controller and node addresses for gRPC dial. +// LookupCSIAddresses returns controller and node addresses for pod/dial.go (.:). // Only works for testing deployments. -func LookupCSIAddresses(c *Cluster, namespace string) (nodeAddress, controllerAddress string, err error) { +func LookupCSIAddresses(ctx context.Context, c *Cluster, namespace string) (nodeAddress, controllerAddress string, err error) { // Node #1 is expected to have a PMEM-CSI node driver // instance. If it doesn't, connecting to the PMEM-CSI - // node service will fail. - nodeAddress = c.NodeServiceAddress(1, SocatPort) + // node service will fail. If we only have one node, + // then we use that one. + node := 1 + if node >= c.NumNodes() { + node = 0 + } + ip := c.NodeIP(node) + pod, err := c.GetAppInstance(ctx, labels.Set{"app.kubernetes.io/component": "node-testing"}, ip, namespace) + if err != nil { + return "", "", fmt.Errorf("find socat pod on node #%d = %s: %v", node, ip, err) + } - // Also use that same node as controller. - controllerAddress = nodeAddress + for _, port := range pod.Spec.Containers[0].Ports { + if port.Name == "csi-socket" { + nodeAddress = fmt.Sprintf("%s.%s:%d", namespace, pod.Name, port.ContainerPort) + // Also use that same node as controller. + controllerAddress = nodeAddress + return + } + } + // Fallback for PMEM-CSI 0.9. Can be removed once we stop testing against it. + nodeAddress = fmt.Sprintf("%s.%s:9735", namespace, pod.Name) + controllerAddress = nodeAddress return + // return "", "", fmt.Errorf("container port 'csi-socket' not found in pod %+v", pod) } // DescribeForAll registers tests like gomega.Describe does, except that diff --git a/test/e2e/storage/sanity.go b/test/e2e/storage/sanity.go index f2f1f28b51..31ee33479d 100644 --- a/test/e2e/storage/sanity.go +++ b/test/e2e/storage/sanity.go @@ -44,8 +44,11 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/util/clock" + "k8s.io/client-go/kubernetes" clientset "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" clientexec "k8s.io/client-go/util/exec" + "k8s.io/klog/v2/klogr" "k8s.io/kubernetes/test/e2e/framework" e2epod "k8s.io/kubernetes/test/e2e/framework/pod" "k8s.io/kubernetes/test/e2e/framework/skipper" @@ -54,6 +57,7 @@ import ( pmemlog "github.com/intel/pmem-csi/pkg/logger" "github.com/intel/pmem-csi/pkg/pmem-csi-driver/parameters" "github.com/intel/pmem-csi/test/e2e/deploy" + "github.com/intel/pmem-csi/test/e2e/pod" pmeme2epod "github.com/intel/pmem-csi/test/e2e/pod" . "github.com/onsi/ginkgo" @@ -74,6 +78,35 @@ var _ = deploy.DescribeForSome("sanity", func(d *deploy.Deployment) bool { // This is not the case when deployed in production mode. return d.Testing }, func(d *deploy.Deployment) { + // This must be set before the grpcDialer gets used for the first time. + var cfg *rest.Config + var cs kubernetes.Interface + grpcDialer := func(ctx context.Context, address string) (net.Conn, error) { + addr, err := pod.ParseAddr(address) + if err != nil { + return nil, err + } + cs, err := kubernetes.NewForConfig(cfg) + if err != nil { + return nil, err + } + dialer := pod.NewDialer(cs, cfg) + return dialer.DialContainerPort(ctx, klogr.New().WithName("gRPC socat"), *addr) + } + dialOptions := []grpc.DialOption{ + // For our restart tests. + grpc.WithKeepaliveParams(keepalive.ClientParameters{ + PermitWithoutStream: true, + // This is the minimum. Specifying it explicitly + // avoids some log output from gRPC. + Time: 10 * time.Second, + }), + // For plain HTTP. + grpc.WithInsecure(), + // Connect to socat pods through port-forwarding. + grpc.WithContextDialer(grpcDialer), + } + config := sanity.NewTestConfig() // The size has to be large enough that even after rounding up to // the next alignment boundary, the final volume size is still about @@ -90,17 +123,8 @@ var _ = deploy.DescribeForSome("sanity", func(d *deploy.Deployment) bool { // and deletes all extra entries that it does not know about. config.TargetPath = "/var/lib/kubelet/plugins/kubernetes.io/csi/pv/pmem-sanity-target.XXXXXX" config.StagingPath = "/var/lib/kubelet/plugins/kubernetes.io/csi/pv/pmem-sanity-staging.XXXXXX" - config.ControllerDialOptions = []grpc.DialOption{ - // For our restart tests. - grpc.WithKeepaliveParams(keepalive.ClientParameters{ - PermitWithoutStream: true, - // This is the minimum. Specifying it explicitly - // avoids some log output from gRPC. - Time: 10 * time.Second, - }), - // For plain HTTP. - grpc.WithInsecure(), - } + config.DialOptions = dialOptions + config.ControllerDialOptions = dialOptions f := framework.NewDefaultFramework("pmem") f.SkipNamespaceCreation = true // We don't need a per-test namespace and skipping it makes the tests run faster. @@ -112,13 +136,16 @@ var _ = deploy.DescribeForSome("sanity", func(d *deploy.Deployment) bool { const testNode = 1 BeforeEach(func() { - cs := f.ClientSet + // Store them for grpcDialer above. We cannot let it reference f itself because + // f.ClientSet gets unset at some point. + cs = f.ClientSet + cfg = f.ClientConfig() var err error cluster, err = deploy.NewCluster(cs, f.DynamicClient, f.ClientConfig()) framework.ExpectNoError(err, "query cluster") - config.Address, config.ControllerAddress, err = deploy.LookupCSIAddresses(cluster, d.Namespace) + config.Address, config.ControllerAddress, err = deploy.LookupCSIAddresses(context.Background(), cluster, d.Namespace) framework.ExpectNoError(err, "find CSI addresses") framework.Logf("sanity: using controller %s and node %s", config.ControllerAddress, config.Address)