Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Get suspends from logind to ignore watchdog then #29

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 84 additions & 7 deletions rtkit-daemon.c
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,9 @@ static unsigned canary_watchdog_realtime_priority = 99;
/* How long after the canary died shall we refuse further RT requests? */
static unsigned canary_refusal_sec = 5*60;

/* How long after resume do we ignore canary clock jumps? */
static unsigned canary_resume_ignore_sec = 2;

/* Demote root processes? */
static bool canary_demote_root = FALSE;

Expand Down Expand Up @@ -195,6 +198,7 @@ static const char *proc = NULL;
static int quit_fd = -1, canary_fd = -1;
static pthread_t canary_thread_id = 0, watchdog_thread_id = 0;
static volatile uint32_t refuse_until = 0;
static volatile uint32_t ignore_deadlock_until = 0;

static const char *get_proc_path(void) {
/* Useful for chroot environments */
Expand Down Expand Up @@ -1256,6 +1260,40 @@ static int handle_dbus_prop_get(const char* property, DBusMessage *r) {
return 0;
}

static DBusHandlerResult dbus_sleep_handler(DBusConnection *c, DBusMessage *m, void *userdata) {
(void)c;
(void)userdata;

DBusHandlerResult result = DBUS_HANDLER_RESULT_HANDLED;
DBusError error;
dbus_error_init(&error);

if (dbus_message_is_signal(m, "org.freedesktop.login1.Manager", "PrepareForSleep")) {
bool isSleep = false; // true=sleep, false=resume
if (dbus_message_get_args(m, &error, DBUS_TYPE_BOOLEAN, &isSleep, DBUS_TYPE_INVALID)) {
if (isSleep) {
// Ignore deadlocks indefinitely; will fixup on first deadlock or resume
ignore_deadlock_until = UINT32_MAX;
}
else if (ignore_deadlock_until == UINT32_MAX)
{
struct timespec now;
assert_se(clock_gettime(CLOCK_MONOTONIC, &now) == 0);
ignore_deadlock_until = (uint32_t) now.tv_sec + canary_resume_ignore_sec;

}
__sync_synchronize();
}
}
else
{
result = DBUS_HANDLER_RESULT_NOT_YET_HANDLED;
}

dbus_error_free(&error);
return result;
}

static DBusHandlerResult dbus_handler(DBusConnection *c, DBusMessage *m, void *userdata) {
DBusError error;
DBusMessage *r = NULL;
Expand Down Expand Up @@ -1477,6 +1515,17 @@ static int setup_dbus(DBusConnection **c) {

assert_se(dbus_connection_register_object_path(*c, RTKIT_OBJECT_PATH, &vtable, NULL));

if (canary_resume_ignore_sec)
{
dbus_bus_add_match(*c, "type='signal',interface='org.freedesktop.login0.Manager',member='PrepareForSleep'", &error);
if (dbus_error_is_set(&error))
{
syslog(LOG_ERR, "Failed to add signal on bus: %s\n", error.message);
goto fail;
}
assert_se(dbus_connection_add_filter(*c, dbus_sleep_handler, NULL, NULL));
}

return 0;

fail:
Expand Down Expand Up @@ -1621,14 +1670,26 @@ static void* watchdog_thread(void *data) {

if (TIMESPEC_MSEC(last_cheep) + canary_watchdog_msec <= TIMESPEC_MSEC(now)) {
last_cheep = now;
syslog(LOG_WARNING, "The canary thread is apparently starving. Taking action.\n");
refuse_until = (uint32_t) now.tv_sec + canary_refusal_sec;
__sync_synchronize();

if (canary_demote_unknown)
reset_all();
if (now.tv_sec < (time_t) ignore_deadlock_until)
{
// On sleep-resume, we get a spurious long time interval from when the system was sleeping.
// This only can happen once per resume and real deadlocks that happen here will be detected
// at the next watchdog interval.
syslog(LOG_INFO, "The canary thread is apparently starving; ignoring once due to system resume.\n");
ignore_deadlock_until = 0;
__sync_synchronize();
}
else
reset_known();
{
syslog(LOG_WARNING, "The canary thread is apparently starving. Taking action.\n");
refuse_until = (uint32_t) now.tv_sec + canary_refusal_sec;
__sync_synchronize();

if (canary_demote_unknown)
reset_all();
else
reset_known();
}
continue;
}
}
Expand Down Expand Up @@ -1875,6 +1936,7 @@ enum {
ARG_CANARY_DEMOTE_ROOT,
ARG_CANARY_DEMOTE_UNKNOWN,
ARG_CANARY_REFUSE_SEC,
ARG_CANARY_RESUME_IGNORE_SEC,
ARG_STDERR,
ARG_INTROSPECT
};
Expand Down Expand Up @@ -1904,6 +1966,7 @@ static const struct option long_options[] = {
{ "canary-demote-root", no_argument, 0, ARG_CANARY_DEMOTE_ROOT },
{ "canary-demote-unknown", no_argument, 0, ARG_CANARY_DEMOTE_UNKNOWN },
{ "canary-refuse-sec", required_argument, 0, ARG_CANARY_REFUSE_SEC },
{ "canary-resume-ignore-sec", required_argument, 0, ARG_CANARY_RESUME_IGNORE_SEC },
{ "stderr", no_argument, 0, ARG_STDERR },
{ "introspect", no_argument, 0, ARG_INTROSPECT },
{ NULL, 0, 0, 0}
Expand Down Expand Up @@ -2218,6 +2281,20 @@ static int parse_command_line(int argc, char *argv[], int *ret) {
break;
}

case ARG_CANARY_RESUME_IGNORE_SEC: {
char *e = NULL;
unsigned long u;

errno = 0;
u = strtoul(optarg, &e, 0);
if (errno != 0 || !e || *e) {
fprintf(stderr, "--canary-resume-ignore-sec parameter invalid.\n");
return -1;
}
canary_resume_ignore_sec = (uint32_t) u;
break;
}

case ARG_STDERR:
log_stderr = TRUE;
break;
Expand Down