From af1468c421fdedfe6c62e731eaac7fd4df720fc7 Mon Sep 17 00:00:00 2001 From: Dusty Mabe Date: Fri, 25 Oct 2024 12:13:58 -0400 Subject: [PATCH] Revert "tests/ingition/kdump: add a remote NFS kdump test" This reverts commit b10d8dcfe730d2e7c8a5366bfcddc7f3081be203. The test passes on F40 but not on F41+ [1] and also it is failing on RHCOS so let's just yank it for now and re-apply when it's confirmed to be passing everywhere. [1] https://github.com/coreos/fedora-coreos-tracker/issues/1820 --- mantle/kola/tests/ignition/kdump.go | 184 ++++------------------------ 1 file changed, 25 insertions(+), 159 deletions(-) diff --git a/mantle/kola/tests/ignition/kdump.go b/mantle/kola/tests/ignition/kdump.go index 3e83ab0a1c..e7266b2aa6 100644 --- a/mantle/kola/tests/ignition/kdump.go +++ b/mantle/kola/tests/ignition/kdump.go @@ -28,63 +28,6 @@ func init() { Tags: []string{"kdump", kola.SkipBaseChecksTag, kola.NeedsInternetTag}, Platforms: []string{"qemu"}, }) - register.RegisterTest(®ister.Test{ - Run: kdumpNFSTest, - ClusterSize: 0, - Name: `kdump.crash.nfs`, - Description: "Verifies kdump logs are exported to NFS destination", - Tags: []string{"kdump", kola.SkipBaseChecksTag, kola.NeedsInternetTag}, - Platforms: []string{"qemu"}, - }) -} - -// This function test the remote kdump feature by: -// - making sure kdump is ready -// - crashing machine -// - monitoring the expected vmcore path -func testRemoteKdump(c cluster.TestCluster, kdump_machine platform.Machine, remote_machine platform.Machine, crash_path string) { - - // Wait for kdump to become active - // 3 minutes should be enough to generate the kdump initramfs - err := util.Retry(12, 15*time.Second, func() error { - - kdump_status, err := c.SSH(kdump_machine, "systemctl is-active kdump.service") - - if err != nil { - return err - } else if string(kdump_status) == "inactive" { - return fmt.Errorf("Kdump.service is not ready: %s.", string(kdump_status)) - } - return nil - }) - if err != nil { - c.Fatalf("Timed out while waiting for kdump.service to be ready: %v", err) - } - - // crash the kernel - // use systemd-run because direclty calling `echo c > ...` will always - // throw an error as the kernel immediately hangs. - _, err = c.SSH(kdump_machine, "sudo systemd-run sh -c 'sleep 5 && echo c > /proc/sysrq-trigger'") - if err != nil { - c.Fatalf("failed to queue kernel crash: %v", err) - } - - // Wait for kdump to create vmcore dump on the remote host - err = util.Retry(5, 10*time.Second, func() error { - - // Look for the crash files created on the SSH machine - logs, err := c.SSH(remote_machine, fmt.Sprintf("find %s -type f -name vmcore*", crash_path)) - - if err != nil { - return fmt.Errorf("failed to search for vmcore: %w", err) - } else if logs == nil { - return fmt.Errorf("No vmcore created on remote host") - } - return nil - }) - if err != nil { - c.Fatalf("Timed out while waiting for kdump to create vmcore files: %v", err) - } } // The destination VM for kdump logs @@ -237,122 +180,45 @@ kernel_arguments: c.Fatalf("Unable to create test machine: %v", err) } - testRemoteKdump(c, kdump_machine, ssh_host.Machine, "/home/core/crash") -} - -// The destination VM for kdump logs over NFS -type NfsServer struct { - Machine platform.Machine - MachineAddress string -} - -func setupNFSMachine(c cluster.TestCluster) NfsServer { - var m platform.Machine - var err error - - options := platform.QemuMachineOptions{ - HostForwardPorts: []platform.HostForwardPort{ - {Service: "ssh", HostPort: 0, GuestPort: 22}, - // Kdump NFS option does not allow a custom port - {Service: "nfs", HostPort: 2049, GuestPort: 2049}, - }, - } + // Wait for kdump to become active + // 3 minutes should be enough to generate the kdump initramfs + err = util.Retry(12, 15*time.Second, func() error { - nfs_server_butane := conf.Butane(`variant: fcos -version: 1.5.0 -storage: - files: - - path: /etc/containers/systemd/nfs.container - overwrite: true - contents: - inline: | - [Container] - Image=quay.io/openshifttest/nfs-server - Volume=/var/nfs:/mnt/data - PublishPort=2049:2049 - PodmanArgs=--privileged - [Install] - WantedBy=default.target - directories: - - path: /var/nfs/crash`) + kdump_status, err := c.SSH(kdump_machine, "systemctl is-active kdump.service") - // start the machine - switch c := c.Cluster.(type) { - // These cases have to be separated because when put together to the same case statement - // the golang compiler no longer checks that the individual types in the case have the - // NewMachineWithQemuOptions function, but rather whether platform.Cluster - // does which fails - case *qemu.Cluster: - m, err = c.NewMachineWithQemuOptions(nfs_server_butane, options) - default: - panic("unreachable") - } + if err != nil { + return err + } else if string(kdump_status) == "inactive" { + return fmt.Errorf(fmt.Sprintf("Kdump.service is not ready: %s.", string(kdump_status))) + } + return nil + }) if err != nil { - c.Fatal(err) - } - - return NfsServer{ - Machine: m, - MachineAddress: "10.0.2.2", - } -} - -func kdumpNFSTest(c cluster.TestCluster) { - nfs_host := setupNFSMachine(c) - - butane := conf.Butane(fmt.Sprintf(`variant: fcos -version: 1.5.0 -storage: - files: - - path: /etc/kdump.conf - overwrite: true - contents: - inline: | - nfs %s:/ - path /crash - core_collector makedumpfile -l --message-level 1 -d 31 - extra_bins /sbin/mount.nfs - extra_modules nfs nfsv3 nfs_layout_nfsv41_files blocklayoutdriver nfs_layout_flexfiles nfs_layout_nfsv41_files -systemd: - units: - - name: kdump.service - enabled: true - dropins: - - name: debug.conf - contents: | - [Service] - Environment="debug=1" -kernel_arguments: - should_exist: - - crashkernel=512M`, - nfs_host.MachineAddress)) - - opts := platform.MachineOptions{ - MinMemory: 2048, + c.Fatalf("Timed out while waiting for kdump.service to be ready: %v", err) } - kdump_machine, err := c.NewMachineWithOptions(butane, opts) + // crash the kernel + // use systemd-run because direclty calling `echo c...` will alaways + // throw an error as the kernel immediately hangs. + _, err = c.SSH(kdump_machine, "sudo systemd-run sh -c 'sleep 5 && echo c > /proc/sysrq-trigger'") if err != nil { - c.Fatalf("Unable to create test machine: %v", err) + c.Fatalf("failed to queue kernel crash: %v", err) } - // XXX Refactor this - // Wait for nfs server to become active - // 1 minutes should be enough to pull the container image - err = util.Retry(4, 15*time.Second, func() error { + // Wait for kdump to create vmcore dump on the remote host + err = util.Retry(5, 10*time.Second, func() error { - nfs_status, err := c.SSH(nfs_host.Machine, "systemctl is-active nfs.service") + // Look for the crash files created on the SSH machine + logs, err := c.SSH(ssh_host.Machine, "find /home/core/crash -type f -name vmcore*") if err != nil { - return err - } else if string(nfs_status) == "inactive" { - return fmt.Errorf("nfs.service is not ready: %s.", string(nfs_status)) + return fmt.Errorf("failed to search for vmcore: %w", err) + } else if logs == nil { + return fmt.Errorf("No vmcore created on remote SSH host") } return nil }) if err != nil { - c.Fatalf("Timed out while waiting for nfs.service to be ready: %v", err) + c.Fatalf("Timed out while waiting for kdump to create vmcore files: %v", err) } - - testRemoteKdump(c, kdump_machine, nfs_host.Machine, "/var/nfs/crash") }