Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a delay between killing teamd processes #3325

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
93 changes: 33 additions & 60 deletions cfgmgr/teammgr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
#include <net/if.h>
#include <sys/ioctl.h>
#include <sys/stat.h>
#include <sys/wait.h>
#include <sys/types.h>
#include <signal.h>


Expand Down Expand Up @@ -172,17 +174,29 @@ void TeamMgr::cleanTeamProcesses()
SWSS_LOG_NOTICE("Cleaning up LAGs during shutdown...");

std::unordered_map<std::string, pid_t> aliasPidMap;
uint32_t sleepCounter = 0;

for (const auto& alias: m_lagList)
{
std::string res;
pid_t pid;
if (++sleepCounter % 10 == 0) {
// Sleep for 100 milliseconds so as to not overwhelm the netlink
// socket buffers with events about interfaces going down
std::this_thread::sleep_for(std::chrono::milliseconds(100));
sleepCounter = 0;
}

try
{
std::stringstream cmd;
cmd << "cat " << shellquote("/var/run/teamd/" + alias + ".pid");
EXEC_WITH_ERROR_THROW(cmd.str(), res);
ifstream pidFile("/var/run/teamd/" + alias + ".pid");
if (pidFile.is_open()) {
pidFile >> pid;
aliasPidMap[alias] = pid;
SWSS_LOG_INFO("Read port channel %s pid %d", alias.c_str(), pid);
} else {
SWSS_LOG_NOTICE("Unable to read pid file for %s, skipping...", alias.c_str());
continue;
}
}
catch (const std::exception &e)
{
Expand All @@ -191,31 +205,11 @@ void TeamMgr::cleanTeamProcesses()
continue;
}

try
{
pid = static_cast<pid_t>(std::stoul(res, nullptr, 10));
aliasPidMap[alias] = pid;

SWSS_LOG_INFO("Read port channel %s pid %d", alias.c_str(), pid);
}
catch (const std::exception &e)
{
SWSS_LOG_ERROR("Failed to read port channel %s pid: %s", alias.c_str(), e.what());
continue;
}

try
{
std::stringstream cmd;
cmd << "kill -TERM " << pid;
EXEC_WITH_ERROR_THROW(cmd.str(), res);

SWSS_LOG_NOTICE("Sent SIGTERM to port channel %s pid %d", alias.c_str(), pid);
}
catch (const std::exception &e)
{
SWSS_LOG_ERROR("Failed to send SIGTERM to port channel %s pid %d: %s", alias.c_str(), pid, e.what());
if (kill(pid, SIGTERM)) {
SWSS_LOG_ERROR("Failed to send SIGTERM to port channel %s pid %d: %s", alias.c_str(), pid, strerror(errno));
aliasPidMap.erase(alias);
} else {
SWSS_LOG_NOTICE("Sent SIGTERM to port channel %s pid %d", alias.c_str(), pid);
}
}

Expand All @@ -228,9 +222,7 @@ void TeamMgr::cleanTeamProcesses()
std::string res;

SWSS_LOG_NOTICE("Waiting for port channel %s pid %d to stop...", alias.c_str(), pid);

cmd << "tail -f --pid=" << pid << " /dev/null";
EXEC_WITH_ERROR_THROW(cmd.str(), res);
waitpid(pid, NULL, 0);
}

SWSS_LOG_NOTICE("LAGs cleanup is done");
Expand Down Expand Up @@ -662,38 +654,19 @@ bool TeamMgr::removeLag(const string &alias)
string res;
pid_t pid;

try
{
std::stringstream cmd;
cmd << "cat " << shellquote("/var/run/teamd/" + alias + ".pid");
EXEC_WITH_ERROR_THROW(cmd.str(), res);
}
catch (const std::exception &e)
{
SWSS_LOG_NOTICE("Failed to remove non-existent port channel %s pid...", alias.c_str());
return false;
}

try
{
pid = static_cast<pid_t>(std::stoul(res, nullptr, 10));
SWSS_LOG_INFO("Read port channel %s pid %d", alias.c_str(), pid);
}
catch (const std::exception &e)
{
SWSS_LOG_ERROR("Failed to read port channel %s pid: %s", alias.c_str(), e.what());
return false;
ifstream pidfile("/var/run/teamd/" + alias + ".pid");
if (pidfile.is_open()) {
pidfile >> pid;
SWSS_LOG_INFO("Read port channel %s pid %d", alias.c_str(), pid);
} else {
SWSS_LOG_NOTICE("Failed to remove non-existent port channel %s pid...", alias.c_str());
return false;
}
}

try
{
std::stringstream cmd;
cmd << "kill -TERM " << pid;
EXEC_WITH_ERROR_THROW(cmd.str(), res);
}
catch (const std::exception &e)
{
SWSS_LOG_ERROR("Failed to send SIGTERM to port channel %s pid %d: %s", alias.c_str(), pid, e.what());
if (kill(pid, SIGTERM)) {
SWSS_LOG_ERROR("Failed to send SIGTERM to port channel %s pid %d: %s", alias.c_str(), pid, strerror(errno));
return false;
}

Expand Down
4 changes: 2 additions & 2 deletions tests/mock_tests/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -225,8 +225,8 @@ tests_teammgrd_SOURCES = teammgrd/teammgr_ut.cpp \
tests_teammgrd_INCLUDES = $(tests_INCLUDES) -I$(top_srcdir)/cfgmgr -I$(top_srcdir)/lib
tests_teammgrd_CFLAGS = $(DBGFLAGS) $(AM_CFLAGS) $(CFLAGS_COMMON) $(CFLAGS_GTEST) $(CFLAGS_SAI)
tests_teammgrd_CPPFLAGS = $(DBGFLAGS) $(AM_CFLAGS) $(CFLAGS_COMMON) $(CFLAGS_GTEST) $(CFLAGS_SAI) $(tests_teammgrd_INCLUDES)
tests_teammgrd_LDADD = $(LDADD_GTEST) $(LDADD_SAI) -lnl-genl-3 -lhiredis -lhiredis \
-lswsscommon -lswsscommon -lgtest -lgtest_main -lzmq -lnl-3 -lnl-route-3 -lpthread -lgmock -lgmock_main
tests_teammgrd_LDADD = $(LDADD_GTEST) $(LDADD_SAI) -ldl -lhiredis \
-lswsscommon -lgtest -lgtest_main -lzmq -lpthread -lgmock -lgmock_main

## fpmsyncd unit tests

Expand Down
191 changes: 180 additions & 11 deletions tests/mock_tests/teammgrd/teammgr_ut.cpp
Original file line number Diff line number Diff line change
@@ -1,22 +1,110 @@
#include "gtest/gtest.h"
#include "../mock_table.h"
#include "teammgr.h"
#include <dlfcn.h>

extern int (*callback)(const std::string &cmd, std::string &stdout);
extern std::vector<std::string> mockCallArgs;
static std::vector< std::pair<pid_t, int> > mockKillCommands;
static std::map<std::string, std::FILE*> pidFiles;

static int (*callback_kill)(pid_t pid, int sig) = NULL;
static std::pair<bool, FILE*> (*callback_fopen)(const char *pathname, const char *mode) = NULL;

static int cb_kill(pid_t pid, int sig)
{
mockKillCommands.push_back(std::make_pair(pid, sig));
return 0;
}

int kill(pid_t pid, int sig)
{
if (callback_kill) {
return callback_kill(pid, sig);
}
int (*realfunc)(pid_t, int) =
(int(*)(pid_t, int))(dlsym (RTLD_NEXT, "kill"));
return realfunc(pid, sig);
}

static std::pair<bool, FILE*> cb_fopen(const char *pathname, const char *mode)
{
auto pidFileSearch = pidFiles.find(pathname);
if (pidFileSearch != pidFiles.end()) {
if (!pidFileSearch->second) {
errno = ENOENT;
}
return std::make_pair(true, pidFileSearch->second);
} else {
return std::make_pair(false, (FILE*)NULL);
}
}

FILE* fopen(const char *pathname, const char *mode)
{
if (callback_fopen) {
std::pair<bool, FILE*> callback_fd = callback_fopen(pathname, mode);
if (callback_fd.first) {
return callback_fd.second;
}
}
FILE* (*realfunc)(const char *, const char *) =
(FILE* (*)(const char *, const char *))(dlsym (RTLD_NEXT, "fopen"));
return realfunc(pathname, mode);
}

FILE* fopen64(const char *pathname, const char *mode)
{
if (callback_fopen) {
std::pair<bool, FILE*> callback_fd = callback_fopen(pathname, mode);
if (callback_fd.first) {
return callback_fd.second;
}
}
FILE* (*realfunc)(const char *, const char *) =
(FILE* (*)(const char *, const char *))(dlsym (RTLD_NEXT, "fopen64"));
return realfunc(pathname, mode);
}

int cb(const std::string &cmd, std::string &stdout)
{
mockCallArgs.push_back(cmd);
if (cmd.find("/usr/bin/teamd -r -t PortChannel1") != std::string::npos)
if (cmd.find("/usr/bin/teamd -r -t PortChannel382") != std::string::npos)
{
mkdir("/var/run/teamd", 0755);
std::FILE* pidFile = std::tmpfile();
std::fputs("1234", pidFile);
std::rewind(pidFile);
pidFiles["/var/run/teamd/PortChannel382.pid"] = pidFile;
return 1;
}
else if (cmd.find("cat \"/var/run/teamd/PortChannel1.pid\"") != std::string::npos)
else if (cmd.find("/usr/bin/teamd -r -t PortChannel812") != std::string::npos)
{
stdout = "1234";
pidFiles["/var/run/teamd/PortChannel812.pid"] = NULL;
return 1;
}
else if (cmd.find("/usr/bin/teamd -r -t PortChannel495") != std::string::npos)
{
mkdir("/var/run/teamd", 0755);
std::FILE* pidFile = std::tmpfile();
std::fputs("5678", pidFile);
std::rewind(pidFile);
pidFiles["/var/run/teamd/PortChannel495.pid"] = pidFile;
return 0;
}
else if (cmd.find("/usr/bin/teamd -r -t PortChannel198") != std::string::npos)
{
pidFiles["/var/run/teamd/PortChannel198.pid"] = NULL;
}
else {
for (int i = 600; i < 620; i++)
{
if (cmd.find(std::string("/usr/bin/teamd -r -t PortChannel") + std::to_string(i)) != std::string::npos)
{
pidFiles[std::string("/var/run/teamd/PortChannel") + std::to_string(i) + std::string(".pid")] = NULL;
}
}
}
return 0;
}

Expand Down Expand Up @@ -53,26 +141,107 @@ namespace teammgr_ut

cfg_lag_tables = tables;
mockCallArgs.clear();
mockKillCommands.clear();
pidFiles.clear();
callback = cb;
callback_kill = cb_kill;
callback_fopen = cb_fopen;
}

virtual void TearDown() override
{
callback = NULL;
callback_kill = NULL;
callback_fopen = NULL;
}
};

TEST_F(TeamMgrTest, testProcessKilledAfterAddLagFailure)
{
swss::TeamMgr teammgr(m_config_db.get(), m_app_db.get(), m_state_db.get(), cfg_lag_tables);
swss::Table cfg_lag_table = swss::Table(m_config_db.get(), CFG_LAG_TABLE_NAME);
cfg_lag_table.set("PortChannel1", { { "admin_status", "up" },
cfg_lag_table.set("PortChannel382", { { "admin_status", "up" },
{ "mtu", "9100" },
{ "lacp_key", "auto" },
{ "min_links", "2" } });
teammgr.addExistingData(&cfg_lag_table);
teammgr.doTask();
int kill_cmd_called = 0;
for (auto cmd : mockCallArgs){
if (cmd.find("kill -TERM 1234") != std::string::npos){
kill_cmd_called++;
}
ASSERT_NE(mockCallArgs.size(), 0);
EXPECT_NE(mockCallArgs.front().find("/usr/bin/teamd -r -t PortChannel382"), std::string::npos);
EXPECT_EQ(mockCallArgs.size(), 1);
EXPECT_EQ(mockKillCommands.front().first, 1234);
EXPECT_EQ(mockKillCommands.size(), 1);
}

TEST_F(TeamMgrTest, testProcessPidFileMissingAfterAddLagFailure)
{
swss::TeamMgr teammgr(m_config_db.get(), m_app_db.get(), m_state_db.get(), cfg_lag_tables);
swss::Table cfg_lag_table = swss::Table(m_config_db.get(), CFG_LAG_TABLE_NAME);
cfg_lag_table.set("PortChannel812", { { "admin_status", "up" },
{ "mtu", "9100" },
{ "fallback", "true" },
{ "lacp_key", "auto" },
{ "min_links", "1" } });
teammgr.addExistingData(&cfg_lag_table);
teammgr.doTask();
ASSERT_NE(mockCallArgs.size(), 0);
EXPECT_NE(mockCallArgs.front().find("/usr/bin/teamd -r -t PortChannel812"), std::string::npos);
EXPECT_EQ(mockCallArgs.size(), 1);
EXPECT_EQ(mockKillCommands.size(), 0);
}

TEST_F(TeamMgrTest, testProcessCleanupAfterAddLag)
{
swss::TeamMgr teammgr(m_config_db.get(), m_app_db.get(), m_state_db.get(), cfg_lag_tables);
swss::Table cfg_lag_table = swss::Table(m_config_db.get(), CFG_LAG_TABLE_NAME);
cfg_lag_table.set("PortChannel495", { { "admin_status", "up" },
{ "mtu", "9100" },
{ "lacp_key", "auto" },
{ "min_links", "2" } });
teammgr.addExistingData(&cfg_lag_table);
teammgr.doTask();
ASSERT_EQ(mockCallArgs.size(), 3);
ASSERT_NE(mockCallArgs.front().find("/usr/bin/teamd -r -t PortChannel495"), std::string::npos);
teammgr.cleanTeamProcesses();
EXPECT_EQ(mockKillCommands.size(), 1);
EXPECT_EQ(mockKillCommands.front().first, 5678);
}

TEST_F(TeamMgrTest, testProcessPidFileMissingDuringCleanup)
{
swss::TeamMgr teammgr(m_config_db.get(), m_app_db.get(), m_state_db.get(), cfg_lag_tables);
swss::Table cfg_lag_table = swss::Table(m_config_db.get(), CFG_LAG_TABLE_NAME);
cfg_lag_table.set("PortChannel198", { { "admin_status", "up" },
{ "mtu", "9100" },
{ "fallback", "true" },
{ "lacp_key", "auto" },
{ "min_links", "1" } });
teammgr.addExistingData(&cfg_lag_table);
teammgr.doTask();
ASSERT_NE(mockCallArgs.size(), 0);
EXPECT_NE(mockCallArgs.front().find("/usr/bin/teamd -r -t PortChannel198"), std::string::npos);
EXPECT_EQ(mockCallArgs.size(), 3);
teammgr.cleanTeamProcesses();
EXPECT_EQ(mockKillCommands.size(), 0);
}

TEST_F(TeamMgrTest, testSleepDuringCleanup)
{
swss::TeamMgr teammgr(m_config_db.get(), m_app_db.get(), m_state_db.get(), cfg_lag_tables);
swss::Table cfg_lag_table = swss::Table(m_config_db.get(), CFG_LAG_TABLE_NAME);
for (int i = 600; i < 620; i++)
{
cfg_lag_table.set(std::string("PortChannel") + std::to_string(i), { { "admin_status", "up" },
{ "mtu", "9100" },
{ "lacp_key", "auto" } });
}
ASSERT_EQ(kill_cmd_called, 1);
teammgr.addExistingData(&cfg_lag_table);
teammgr.doTask();
ASSERT_EQ(mockCallArgs.size(), 60);
std::chrono::steady_clock::time_point begin = std::chrono::steady_clock::now();
teammgr.cleanTeamProcesses();
std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now();
EXPECT_EQ(mockKillCommands.size(), 0);
EXPECT_GE(std::chrono::duration_cast<std::chrono::milliseconds>(end - begin).count(), 200);
}
}
}
Loading