-
Notifications
You must be signed in to change notification settings - Fork 25
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add support for agent self-restarting: Development Phase #386
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -65,7 +65,6 @@ target_link_libraries(Agent | |
MultiTypeQueue | ||
ModuleManager | ||
ModuleCommand | ||
CentralizedConfiguration | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It was duplicated, so I removed it. |
||
Boost::asio | ||
sysinfo | ||
PRIVATE | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,12 +5,13 @@ After=network.target network-online.target | |
|
||
[Service] | ||
Type=simple | ||
|
||
PIDFile=/var/run/wazuh-agent.lock | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Systemd cannot handle the lockfile as a PIDfile. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, you are right. In a previous test, I used |
||
ExecStart=/usr/bin/env WAZUH_HOME/wazuh-agent | ||
TimeoutStopSec=30s # Wait for 30 seconds before killing the service | ||
|
||
KillSignal=SIGTERM | ||
|
||
KillMode=process | ||
KillMode=mixed | ||
|
||
SendSIGKILL=no | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,40 +1,201 @@ | ||
#include <process_options.hpp> | ||
#include <process_options_unix.hpp> | ||
|
||
#include <agent.hpp> | ||
#include <ctime> | ||
#include <fmt/format.h> | ||
#include <fmt/ranges.h> | ||
#include <fstream> | ||
#include <logger.hpp> | ||
#include <unix_daemon.hpp> | ||
|
||
#include <csignal> | ||
#include <iostream> | ||
#include <thread> | ||
#include <vector> | ||
|
||
void StartAgent(const std::string& configFilePath) | ||
#include <sys/wait.h> | ||
|
||
// Flag to signal that SIGUSR1 was received | ||
volatile sig_atomic_t SIGNAL_RECEIVED = 0; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As this symbol is not a constant, please write it in lowercase. |
||
|
||
// Signal handler for SIGUSR1 | ||
void sigusr1_handler(int signal) | ||
{ | ||
unix_daemon::LockFileHandler lockFileHandler = unix_daemon::GenerateLockFile(configFilePath); | ||
if (signal == SIGUSR1) | ||
{ | ||
LogDebug("Received SIGUSR1: Restarting child agent process..."); | ||
SIGNAL_RECEIVED = SIGUSR1; // Set flag to indicate restart needed | ||
} | ||
} | ||
|
||
if (!lockFileHandler.isLockFileCreated()) | ||
// Signal handler for SIGCHLD | ||
void sigchld_handler(int signal) | ||
{ | ||
if (signal == SIGCHLD) | ||
{ | ||
std::cout << "wazuh-agent already running\n"; | ||
return; | ||
LogDebug("Received SIGCHLD: Child agent process terminated."); | ||
} | ||
} | ||
|
||
// Signal handler for SIGTERM | ||
void sigterm_handler(int signal) | ||
{ | ||
if (signal == SIGTERM) | ||
{ | ||
LogDebug("Received SIGTERM: Stop the agent process..."); | ||
SIGNAL_RECEIVED = SIGTERM; // Set flag to indicate restart needed | ||
} | ||
} | ||
|
||
LogInfo("Starting wazuh-agent"); | ||
void StartAgent(const std::string& configFilePath) | ||
{ | ||
// Set up signal handlers | ||
struct sigaction sa_usr1 = {}, sa_chld = {}, sa_term = {}; | ||
|
||
sa_usr1.sa_handler = sigusr1_handler; | ||
sa_usr1.sa_flags = 0; | ||
sigaction(SIGUSR1, &sa_usr1, nullptr); | ||
|
||
sa_chld.sa_handler = sigchld_handler; | ||
sa_chld.sa_flags = SA_NOCLDSTOP; // Avoid receiving SIGCHLD for stopped children | ||
sigaction(SIGCHLD, &sa_chld, nullptr); | ||
|
||
try | ||
// Set up SIGTERM handler | ||
sa_term.sa_handler = sigterm_handler; | ||
sa_term.sa_flags = 0; | ||
sigaction(SIGTERM, &sa_term, nullptr); | ||
|
||
pid_t pid = fork(); | ||
|
||
if (pid < 0) | ||
{ | ||
Agent agent(configFilePath); | ||
agent.Run(); | ||
LogError("Restart: Fork failed"); | ||
exit(1); | ||
} | ||
catch (const std::exception& e) | ||
else if (pid == 0) | ||
{ | ||
LogError("Exception thrown in wazuh-agent: {}", e.what()); | ||
unix_daemon::LockFileHandler lockFileHandler = unix_daemon::GenerateLockFile(configFilePath); | ||
|
||
// Child process: Run the agent | ||
if (!lockFileHandler.isLockFileCreated()) | ||
{ | ||
LogInfo("wazuh-agent already running"); | ||
return; | ||
} | ||
|
||
LogInfo("Starting wazuh-agent"); | ||
try | ||
{ | ||
Agent agent(configFilePath); | ||
agent.Run(); | ||
} | ||
catch (const std::exception& e) | ||
{ | ||
LogError("Exception thrown in wazuh-agent: {}", e.what()); | ||
} | ||
|
||
exit(0); | ||
} | ||
else | ||
{ | ||
// Parent process - Monitoring the agent, taking care of self-restart | ||
pause(); // Suspend parent until a signal is received | ||
|
||
// Stop Agent | ||
if (SIGNAL_RECEIVED == SIGTERM) | ||
{ | ||
LogDebug("Received SIGTERM, terminating child process..."); | ||
kill(pid, SIGTERM); | ||
waitpid(pid, nullptr, 0); // Wait for the child to terminate. | ||
} | ||
|
||
// Self-restart agent | ||
if (SIGNAL_RECEIVED == SIGUSR1) | ||
{ | ||
if (using_systemctl()) | ||
{ | ||
LogDebug("Restart: systemctl restarting wazuh agent service."); | ||
std::system("systemctl restart wazuh-agent"); | ||
} | ||
else | ||
{ | ||
StopAgent(pid, configFilePath); | ||
|
||
std::vector<const char*> args = get_command_line_args(); | ||
LogDebug("Restart: starting wazuh agent in a new process."); | ||
if (execve(args[0], const_cast<char* const*>(args.data()), nullptr) == -1) | ||
{ | ||
LogError("Failed to spawn new Wazuh agent process."); | ||
} | ||
} | ||
} | ||
exit(0); // Exit the parent process | ||
} | ||
} | ||
|
||
void StatusAgent(const std::string& configFilePath) | ||
{ | ||
std::cout << fmt::format("wazuh-agent status: {}\n", unix_daemon::GetDaemonStatus(configFilePath)); | ||
} | ||
|
||
std::vector<const char*> get_command_line_args() | ||
{ | ||
std::vector<const char*> args; | ||
std::ifstream cmdline_file("/proc/self/cmdline"); | ||
|
||
if (!cmdline_file) | ||
{ | ||
LogError("Failed to open /proc/self/cmdline"); | ||
return args; | ||
} | ||
|
||
std::string arg; | ||
while (getline(cmdline_file, arg, '\0')) | ||
{ | ||
args.push_back(strdup(arg.c_str())); | ||
} | ||
|
||
args.push_back(nullptr); | ||
|
||
return args; | ||
} | ||
|
||
bool using_systemctl() | ||
{ | ||
return (0 == std::system("which systemctl > /dev/null 2>&1") && nullptr != std::getenv("INVOCATION_ID")); | ||
} | ||
|
||
void StopAgent(pid_t pid, const std::string& configFilePath) | ||
{ | ||
int status {}; | ||
pid_t result {}; | ||
|
||
const int timeout = 30; // Timeout duration (in seconds) for killing the agent child process | ||
time_t start_time = time(nullptr); // Record the start time to track the timeout duration | ||
|
||
// Initiate the process termination by sending SIGTERM | ||
kill(pid, SIGTERM); | ||
|
||
while (true) | ||
{ | ||
result = waitpid(pid, &status, WNOHANG); // Non-blocking check for agent process status | ||
|
||
if (result == pid) | ||
{ | ||
LogDebug("Agent process terminated."); | ||
break; | ||
} | ||
|
||
if (difftime(time(nullptr), start_time) > timeout) | ||
{ | ||
LogError("Timeout reached! Forcing agent process termination."); | ||
unix_daemon::LockFileHandler lockFileHandler = unix_daemon::GenerateLockFile(configFilePath); | ||
kill(pid, SIGKILL); | ||
// Remove lock file | ||
lockFileHandler.~LockFileHandler(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There is a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would probably be better to just make the method public. |
||
} | ||
|
||
// Sleep for a short time before checking again | ||
std::this_thread::sleep_for(std::chrono::seconds(1)); | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why do we replace
-SIGTERM
with-15
? I think the former is more readable.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As I mentioned here, this change is intended to avoid the following error: