Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions scheds/rust/scx_mitosis/test/synthetic_workloads/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Synthetic Workloads for scx_mitosis Testing

This directory contains tools and experiments for testing the scx_mitosis scheduler with synthetic workloads.

### `cgroup_cli.sh`
A utility script that creates transient systemd services running busy loops on specified CPU sets.

**Usage:**
```bash
./cgroup_cli.sh start <unit_name> <cpuspec> <nthreads> # Start workload
./cgroup_cli.sh stop [unit_name|all] # Stop workload(s)
./cgroup_cli.sh status [unit_name|all] # Check status
./cgroup_cli.sh list # List active services
./cgroup_cli.sh monitor # Monitor CPU usage
```

### How I've used cgroup_cli.sh to test scx_mitosis
It's easy to make different numbers of cgroups, with different cpusets, and with different numbers of threads.
This can be useful for testing hypotheses about why scx_mitosis barfed on a macrobenchmark.

One time this was useful was showing that (before adding work stealing), scx_mitosis could get into states where it was not doing a good job with work conservation. It's easier to demonstrate this and narrow in on the simplest reproducible experiment. In this case I started by launching 80 threads on 80 cpus and saw that many were idle. Then I simplified it to launching 2 threads on 2 cpus that were members of different L3s. I could show that 50% of the time, a CPU would sit idle while the remaining one ran both threads. After adding work stealing, both experiments showed ideal work conservation.

### Future use
This may prove useful for developing and testing dynamic cell creation and destruction.
101 changes: 101 additions & 0 deletions scheds/rust/scx_mitosis/test/synthetic_workloads/cgroup_cli.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
#!/usr/bin/env bash

# cgroup_cli.sh — Creates transient systemd services that run busy loops on specified CPU sets.
# ./cgroup_cli.sh start <unit_name> <cpuspec> <nthreads>
# see usage() for more details

set -euo pipefail

# Developed for the mitosis scheduler.
UNIT_PREFIX="mito-spin"

usage() {
cat <<EOF
Usage:
$0 start <unit_name> <cpuspec> <nthreads>
$0 stop [unit_name|all]
$0 status [unit_name|all]
$0 list
$0 monitor
Notes:
- cpuspec like "0-7,16,18" (cpuset syntax).
- unit name is prefixed with "$UNIT_PREFIX-".
EOF
}

ensure_cpuset() {
# Require cpuset controller under cgroup v2
local ctrls="/sys/fs/cgroup/cgroup.controllers"
if [[ ! -r "$ctrls" ]] || ! grep -qw cpuset "$ctrls"; then
echo "Error: cpuset controller not available (cgroup v2). Enable cpuset on your systemd hierarchy." >&2
exit 2
fi
}

unit_pattern() {
# Build a systemctl pattern like: mito-spin-<name>.service (or mito-spin-*.service)
local name="${1:-*}"
[[ "$name" == "all" || -z "$name" ]] && name="*"
printf '%s-%s.service' "$UNIT_PREFIX" "$name"
}

start_service() {
local name=${1:?unit name required}
local cpus=${2:?cpuspec required}
local n=${3:?nthreads required}

ensure_cpuset

[[ "$name" == "all" ]] && { echo "Error: 'all' is not a valid unit name."; exit 2; }
[[ "$n" =~ ^[0-9]+$ && "$n" -gt 0 ]] || { echo "Error: nthreads must be a positive integer."; exit 2; }

local unit; unit=$(unit_pattern "$name") # e.g., mito-spin-foo.service

# Stop any stale instance quietly
sudo systemctl stop "$unit" >/dev/null 2>&1 || true

# Build properties
local props=(-p "AllowedCPUs=$cpus" --collect)

# Launch N busy loops and sleep forever
sudo systemd-run \
--unit="$unit" \
"${props[@]}" \
-E N="$n" \
bash -lc '
for i in $(seq 1 "$N"); do
while :; do :; done &
done
exec sleep infinity
'
echo "Started $unit on CPUs [$cpus] with $n spinner(s)."
}

stop_service() {
local pattern; pattern=$(unit_pattern "${1:-all}")
sudo systemctl stop "$pattern" || true
}

status_service() {
local pattern; pattern=$(unit_pattern "${1:-all}")
systemctl --no-pager status "$pattern" || true
}

list_services() {
local pattern; pattern=$(unit_pattern '*')
echo "Active $UNIT_PREFIX services:"
systemctl --no-pager list-units --type=service --state=active --plain --no-legend "$pattern" | awk '{print $1}' || echo " (none)"
}

mpstat_monitor() {
mpstat --dec=0 -P ALL 1
}

case "${1:-}" in
start) start_service "${2:-}" "${3:-}" "${4:-}";;
stop) stop_service "${2:-all}";;
status) status_service "${2:-all}";;
list) list_services;;
monitor) mpstat_monitor;;
*) usage; exit 1;;
esac
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#!/usr/bin/env bash
# Experiment 5: 1 cgroup, 2 threads, 2 CPUs — verify threads stay on CPUs 7 and 8 (different L3s), log total busy (%) for those CPUs.
set -euo pipefail

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
CGROUP_CLI="$SCRIPT_DIR/../cgroup_cli.sh"

UNIT_NAME="${UNIT_NAME:-expt5}"
TARGET_CPU1="${TARGET_CPU1:-7}" # override via env if desired, e.g. TARGET_CPU1=7 ./exp5_1cgrp_2th_2cpu.sh
TARGET_CPU2="${TARGET_CPU2:-8}" # override via env if desired, e.g. TARGET_CPU2=8 ./exp5_1cgrp_2th_2cpu.sh

command -v mpstat >/dev/null || { echo "mpstat not found (install 'sysstat')"; exit 1; }

# Cleanup function
cleanup() {
echo "Cleaning up..."
"$CGROUP_CLI" stop "$UNIT_NAME" || true
}

trap cleanup EXIT

printf "=== Experiment 5: 1 cgroup, 2 threads, 2 CPUs — threads on CPUs %s and %s (different L3s) ===\n" "$TARGET_CPU1" "$TARGET_CPU2"
printf "Starting workload:\n"

# Start workload with 2 threads on CPUs 7 and 8
"$CGROUP_CLI" start "$UNIT_NAME" "$TARGET_CPU1,$TARGET_CPU2" 2

printf "Monitoring. Expect CPUs %s and %s ≈ 100%% busy; others mostly idle. Press Ctrl+C to stop.\n\n" "$TARGET_CPU1" "$TARGET_CPU2"
printf "Alternatively, we may see both tasks allocated to the same CPU. Poor work conservation, motivates work stealing."

# Monitor
"$CGROUP_CLI" monitor
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#!/usr/bin/env bash
# Experiment 3: 4 cgroups, 10 threads each, 10 CPUs each — verify thread distribution across multiple cgroups
set -euo pipefail

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
CGROUP_CLI="$SCRIPT_DIR/../cgroup_cli.sh"

# 4 cgroups with their respective CPU ranges
UNIT_NAME_1="${UNIT_NAME_1:-expt3_1}"
UNIT_NAME_2="${UNIT_NAME_2:-expt3_2}"
UNIT_NAME_3="${UNIT_NAME_3:-expt3_3}"
UNIT_NAME_4="${UNIT_NAME_4:-expt3_4}"

TARGET_CPU_1="${TARGET_CPU_1:-0-9}" # CPUs 0-9 for cgroup 1
TARGET_CPU_2="${TARGET_CPU_2:-10-19}" # CPUs 10-19 for cgroup 2
TARGET_CPU_3="${TARGET_CPU_3:-20-29}" # CPUs 20-29 for cgroup 3
TARGET_CPU_4="${TARGET_CPU_4:-30-39}" # CPUs 30-39 for cgroup 4

command -v mpstat >/dev/null || { echo "mpstat not found (install 'sysstat')"; exit 1; }

# Cleanup function
cleanup() {
echo "Cleaning up..."
"$CGROUP_CLI" stop "$UNIT_NAME_1" || true
"$CGROUP_CLI" stop "$UNIT_NAME_2" || true
"$CGROUP_CLI" stop "$UNIT_NAME_3" || true
"$CGROUP_CLI" stop "$UNIT_NAME_4" || true
}

trap cleanup EXIT

printf "=== Experiment 3: 4 cgroups, 10 threads each, 10 CPUs each ===\n"
printf "Starting workloads:\n"

# Start all 4 cgroups
printf " Cgroup 1: %s on CPUs %s\n" "$UNIT_NAME_1" "$TARGET_CPU_1"
"$CGROUP_CLI" start "$UNIT_NAME_1" "$TARGET_CPU_1" 10

printf " Cgroup 2: %s on CPUs %s\n" "$UNIT_NAME_2" "$TARGET_CPU_2"
"$CGROUP_CLI" start "$UNIT_NAME_2" "$TARGET_CPU_2" 10

printf " Cgroup 3: %s on CPUs %s\n" "$UNIT_NAME_3" "$TARGET_CPU_3"
"$CGROUP_CLI" start "$UNIT_NAME_3" "$TARGET_CPU_3" 10

printf " Cgroup 4: %s on CPUs %s\n" "$UNIT_NAME_4" "$TARGET_CPU_4"
"$CGROUP_CLI" start "$UNIT_NAME_4" "$TARGET_CPU_4" 10

printf "\nMonitoring. Expect load distributed across 4 CPU groups. Press Ctrl+C to stop.\n\n"

# Monitor
"$CGROUP_CLI" monitor