Skip to content

Commit

Permalink
stress-ng: re-work aggressive pid waiting to use stress_wait_pid
Browse files Browse the repository at this point in the history
Make stress_wait_pid a little more generic with a waitpid flag and
re-work the aggressive pid waiting loop to use this. Add a reaped
flag to the stress_pid_t struct to indicate whether a process has
been reaped using wait* to avoid redundant re-waiting on a reaped
process.

Signed-off-by: Colin Ian King <[email protected]>
  • Loading branch information
ColinIanKing committed Dec 19, 2024
1 parent b249347 commit 76df244
Show file tree
Hide file tree
Showing 2 changed files with 97 additions and 85 deletions.
181 changes: 96 additions & 85 deletions stress-ng.c
Original file line number Diff line number Diff line change
Expand Up @@ -1066,96 +1066,33 @@ void stress_sync_start_cont_s_pid(stress_pid_t *s_pid)
(void)kill(pid, SIGCONT);
}

#if defined(HAVE_SCHED_GETAFFINITY) && \
NEED_GLIBC(2,3,0)
/*
* stress_wait_aggressive()
* while waiting for stressors to complete add some aggressive
* CPU affinity changing to exercise the scheduler placement
*/
static void stress_wait_aggressive(
const int32_t ticks_per_sec,
stress_stressor_t *stressors_list)
{
stress_stressor_t *ss;
cpu_set_t proc_mask;
const useconds_t usec_sleep =
ticks_per_sec ? 1000000 / ((useconds_t)5 * ticks_per_sec) : 1000000 / 250;

pr_dbg("changing stressor cpu affinity every %lu usecs\n", (unsigned long int)usec_sleep);

while (wait_flag) {
const int32_t cpus = stress_get_processors_configured();
bool procs_alive = false;

/*
* If we can't get the mask, then don't do
* any affinity twiddling
*/
if (sched_getaffinity(0, sizeof(proc_mask), &proc_mask) < 0)
return;
if (!CPU_COUNT(&proc_mask)) /* Highly unlikely */
return;

(void)shim_usleep(usec_sleep);

for (ss = stressors_list; ss; ss = ss->next) {
int32_t j;

for (j = 0; j < ss->num_instances; j++) {
const stress_stats_t *const stats = ss->stats[j];
const pid_t pid = stats->s_pid.pid;

if (pid) {
cpu_set_t mask;
int32_t cpu_num;
int status, ret;

ret = waitpid(pid, &status, WNOHANG);
if ((ret < 0) && ((errno == ESRCH) || (errno == ECHILD)))
continue;
procs_alive = true;

do {
cpu_num = (int32_t)stress_mwc32modn(cpus);
} while (!(CPU_ISSET(cpu_num, &proc_mask)));

CPU_ZERO(&mask);
CPU_SET(cpu_num, &mask);

/* may fail if child has just died, just continue */
(void)sched_setaffinity(pid, sizeof(mask), &mask);
(void)shim_sched_yield();
}
}
}
if (!procs_alive)
break;
}
}
#endif

/*
* stress_wait_pid()
* wait for a stressor by their given pid
*/
static void stress_wait_pid(
stress_stressor_t *ss,
const pid_t pid,
const char *stressor_name,
stress_stats_t *stats,
bool *success,
bool *resource_success,
bool *metrics_success)
bool *metrics_success,
const int flag)
{
int status, ret;
bool do_abort = false;
const char *name = ss->stressor->name;

redo:
ret = shim_waitpid(pid, &status, 0);
if (stats->s_pid.reaped)
return;

ret = shim_waitpid(pid, &status, flag);
if (ret > 0) {
int wexit_status = WEXITSTATUS(status);

stats->s_pid.reaped = true;

if (WIFSIGNALED(status)) {
#if defined(WTERMSIG)
const int wterm_signal = WTERMSIG(status);
Expand All @@ -1165,15 +1102,15 @@ static void stress_wait_pid(
const char *signame = strsignal(wterm_signal);

pr_dbg("%s: [%d] terminated on signal: %d (%s)\n",
stressor_name, ret, wterm_signal, signame);
name, ret, wterm_signal, signame);
#else
pr_dbg("%s: [%d] terminated on signal: %d\n",
stressor_name, ret, wterm_signal);
name, ret, wterm_signal);
#endif
}
#else
pr_dbg("%s [%d] terminated on signal\n",
stressor_name, ret);
name, ret);
#endif
/*
* If the stressor got killed by OOM or SIGKILL
Expand All @@ -1183,10 +1120,10 @@ static void stress_wait_pid(
*/
if (stress_process_oomed(ret)) {
pr_dbg("%s: [%d] killed by the OOM killer\n",
stressor_name, ret);
name, ret);
} else if (wterm_signal == SIGKILL) {
pr_dbg("%s: [%d] possibly killed by the OOM killer\n",
stressor_name, ret);
name, ret);
} else if (wterm_signal != SIGALRM) {
*success = false;
}
Expand All @@ -1198,7 +1135,7 @@ static void stress_wait_pid(
case EXIT_NO_RESOURCE:
ss->status[STRESS_STRESSOR_STATUS_SKIPPED]++;
pr_warn_skip("%s: [%d] aborted early, out of system resources\n",
stressor_name, ret);
name, ret);
*resource_success = false;
do_abort = true;
break;
Expand All @@ -1212,13 +1149,13 @@ static void stress_wait_pid(
*success = false;
#if defined(STRESS_REPORT_EXIT_SIGNALED)
pr_dbg("%s: [%d] aborted via a termination signal\n",
stressor_name, ret);
name, ret);
#endif
break;
case EXIT_BY_SYS_EXIT:
ss->status[STRESS_STRESSOR_STATUS_FAILED]++;
pr_dbg("%s: [%d] aborted via exit() which was not expected\n",
stressor_name, ret);
name, ret);
do_abort = true;
break;
case EXIT_METRICS_UNTRUSTWORTHY:
Expand All @@ -1237,7 +1174,7 @@ static void stress_wait_pid(
default:
wexit_status_default:
pr_err("%s: [%d] terminated with an error, exit status=%d (%s)\n",
stressor_name, ret, wexit_status,
name, ret, wexit_status,
stress_exit_status_to_string(wexit_status));
*success = false;
do_abort = true;
Expand All @@ -1251,18 +1188,91 @@ static void stress_wait_pid(

stress_stressor_finished(&stats->s_pid.pid);
pr_dbg("%s: [%d] terminated (%s)\n",
stressor_name, ret,
name, ret,
stress_exit_status_to_string(wexit_status));
} else if (ret == -1) {
/* Somebody interrupted the wait */
if (errno == EINTR)
goto redo;
/* This child did not exist, mark it done anyhow */
if (errno == ECHILD)
if ((errno == ECHILD) || (errno == ESRCH))
stress_stressor_finished(&stats->s_pid.pid);
}
}

#if defined(HAVE_SCHED_GETAFFINITY) && \
NEED_GLIBC(2,3,0)
/*
* stress_wait_aggressive()
* while waiting for stressors to complete add some aggressive
* CPU affinity changing to exercise the scheduler placement
*/
static void stress_wait_aggressive(
const int32_t ticks_per_sec,
stress_stressor_t *stressors_list,
bool *success,
bool *resource_success,
bool *metrics_success)
{
stress_stressor_t *ss;
cpu_set_t proc_mask;
const useconds_t usec_sleep =
ticks_per_sec ? 1000000 / ((useconds_t)5 * ticks_per_sec) : 1000000 / 250;

pr_dbg("changing stressor cpu affinity every %lu usecs\n", (unsigned long int)usec_sleep);

while (wait_flag) {
const int32_t cpus = stress_get_processors_configured();
bool procs_alive = false;

/*
* If we can't get the mask, then don't do
* any affinity twiddling
*/
if (sched_getaffinity(0, sizeof(proc_mask), &proc_mask) < 0)
return;
if (!CPU_COUNT(&proc_mask)) /* Highly unlikely */
return;

(void)shim_usleep(usec_sleep);

for (ss = stressors_list; ss; ss = ss->next) {
int32_t j;

for (j = 0; j < ss->num_instances; j++) {
stress_stats_t *const stats = ss->stats[j];
const pid_t pid = stats->s_pid.pid;

if (pid && !stats->s_pid.reaped) {
cpu_set_t mask;
int32_t cpu_num;

stress_wait_pid(ss, pid, stats,
success, resource_success,
metrics_success, WNOHANG);

/* PID not reaped by the WNOHANG waitpid? */
if (!stats->s_pid.reaped)
procs_alive = true;
do {
cpu_num = (int32_t)stress_mwc32modn(cpus);
} while (!(CPU_ISSET(cpu_num, &proc_mask)));

CPU_ZERO(&mask);
CPU_SET(cpu_num, &mask);

/* may fail if child has just died, just continue */
(void)sched_setaffinity(pid, sizeof(mask), &mask);
(void)shim_sched_yield();
}
}
}
if (!procs_alive)
break;
}
}
#endif

void stress_sync_start_cont_list(stress_pid_t *s_pids_head)
{
int unready, n_pids;
Expand Down Expand Up @@ -1337,7 +1347,7 @@ static void stress_wait_stressors(
* try to thrash the system when in aggressive mode
*/
if (g_opt_flags & (OPT_FLAGS_AGGRESSIVE | OPT_FLAGS_TASKSET_RANDOM))
stress_wait_aggressive(ticks_per_sec, stressors_list);
stress_wait_aggressive(ticks_per_sec, stressors_list, success, resource_success, metrics_success);
#else
(void)ticks_per_sec;
#endif
Expand All @@ -1354,7 +1364,8 @@ static void stress_wait_stressors(
if (pid) {
const char *name = ss->stressor->name;

stress_wait_pid(ss, pid, name, stats, success, resource_success, metrics_success);
stress_wait_pid(ss, pid, stats,
success, resource_success, metrics_success, 0);
stress_clean_dir(name, pid, (uint32_t)j);
}
}
Expand Down
1 change: 1 addition & 0 deletions stress-ng.h
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,7 @@ typedef struct stress_pid {
pid_t pid; /* PID of process */
pid_t oomable_child; /* oomable child pid, zero = none */
volatile uint8_t state; /* sync start state */
bool reaped; /* successfully waited for */
} stress_pid_t;

typedef struct {
Expand Down

0 comments on commit 76df244

Please sign in to comment.