Skip to content

Commit f29f5b9

Browse files
Copilotmikeagundthaler
authored
Add missing multi-threaded stress tests for extension restart scenarios (#4526)
* Initial plan * Add new extension restart stress test scenarios Co-authored-by: mikeagun <[email protected]> * Apply clang-format to stress test code for consistency Co-authored-by: mikeagun <[email protected]> * Fix critical logic bug in stress test thread configuration Co-authored-by: mikeagun <[email protected]> * Fix thread creation timing and object table initialization in stress tests Co-authored-by: mikeagun <[email protected]> * Update code comments to remove scenario references and make them self-descriptive Co-authored-by: mikeagun <[email protected]> * Fix comment punctuation consistency - ensure all comments end with periods Co-authored-by: dthaler <[email protected]> * Revert unrelated formatting changes to keep PR focused on stress tests Co-authored-by: dthaler <[email protected]> * Wrap JIT stress tests with CONFIG_BPF_JIT_DISABLED ifdef guards Co-authored-by: mikeagun <[email protected]> --------- Co-authored-by: copilot-swe-agent[bot] <[email protected]> Co-authored-by: mikeagun <[email protected]> Co-authored-by: D. Michael Agun <[email protected]> Co-authored-by: dthaler <[email protected]>
1 parent 894abc6 commit f29f5b9

File tree

1 file changed

+312
-8
lines changed

1 file changed

+312
-8
lines changed

tests/stress/km/stress_tests_km.cpp

Lines changed: 312 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,9 @@ static const std::map<std::string, test_program_attributes> _test_program_info =
3030
struct object_table_entry
3131
{
3232
std::unique_ptr<std::mutex> lock{nullptr};
33-
_Guarded_by_(lock) bool available { true };
34-
_Guarded_by_(lock) bpf_object_ptr object { nullptr };
35-
_Guarded_by_(lock) bool loaded { false };
33+
_Guarded_by_(lock) bool available{true};
34+
_Guarded_by_(lock) bpf_object_ptr object{nullptr};
35+
_Guarded_by_(lock) bool loaded{false};
3636
bool attach{false};
3737

3838
// The following fields are for debugging this test itself.
@@ -1634,6 +1634,7 @@ _mt_bindmonitor_tail_call_invoke_program_test(
16341634
WSACleanup();
16351635
}
16361636

1637+
#if !defined(CONFIG_BPF_JIT_DISABLED)
16371638
TEST_CASE("jit_load_attach_detach_unload_random_v4_test", "[jit_mt_stress_test]")
16381639
{
16391640
// This test attempts to load the same JIT'ed ebpf program multiple times in different threads. This test
@@ -1652,6 +1653,7 @@ TEST_CASE("jit_load_attach_detach_unload_random_v4_test", "[jit_mt_stress_test]"
16521653
_print_test_control_info(local_test_control_info);
16531654
_mt_prog_load_stress_test(EBPF_EXECUTION_JIT, local_test_control_info);
16541655
}
1656+
#endif // !defined(CONFIG_BPF_JIT_DISABLED)
16551657

16561658
TEST_CASE("native_load_attach_detach_unload_random_v4_test", "[native_mt_stress_test]")
16571659
{
@@ -1694,13 +1696,16 @@ TEST_CASE("native_unique_load_attach_detach_unload_random_v4_test", "[native_mt_
16941696

16951697
TEST_CASE("native_invoke_v4_v6_programs_restart_extension_test", "[native_mt_stress_test]")
16961698
{
1699+
// Multi-threaded stress test where each thread loads different programs with extension restart.
16971700
// Test layout:
1698-
// 1. Create 2 'monitor' threads:
1699-
// - Thread #1 loads a native ebpf SOCK_ADDR program that attaches to CGROUP/CONNECT4.
1701+
// 1. Create 2 'monitor' threads, each loading a DIFFERENT native eBPF program:
1702+
// - Thread #1 loads a native eBPF SOCK_ADDR program (cgroup_count_connect4.sys) that attaches to
1703+
// CGROUP/CONNECT4.
17001704
// > This program monitors an IPv4 endpoint, 127.0.0.1:<target_port>. On every invocation, the program updates
17011705
// the count (TCP) 'connect' attempts in the 'connect4_count_map' map at its port.
1702-
// - Thread #2 loads another native ebpf SOCK_ADDR program that attaches to CGROUP/CONNECT6.
1703-
// > The behavior of this program is identical to that of the v4 program (loaded by thread #1), except it is
1706+
// - Thread #2 loads a DIFFERENT native eBPF SOCK_ADDR program (cgroup_count_connect6.sys) that attaches to
1707+
// CGROUP/CONNECT6.
1708+
// > The behavior of this program is similar to the v4 program (loaded by thread #1), except it is
17041709
// invoked for IPv6 connection attempts ([::1]:<target_port>).
17051710
//
17061711
// 2 Until the end of test, each test thread will:
@@ -1714,13 +1719,14 @@ TEST_CASE("native_invoke_v4_v6_programs_restart_extension_test", "[native_mt_str
17141719
// 3. In parallel, start the 'extension restart' thread to continuously restart the netebpf extension
17151720
// (if specified on the command line).
17161721
//
1722+
// NOTE: Each thread loads different programs to test eBPF component resiliency with different program instances.
17171723
// NOTE: The '-tt', '-er' and the '-erd' command line parameters are not used by this test.
17181724

17191725
_km_test_init();
17201726
LOG_INFO("\nStarting test *** native_invoke_v4_v6_programs_restart_extension_test ***");
17211727
test_control_info local_test_control_info = _global_test_control_info;
17221728

1723-
// This test needs only 2 threads (one per program).
1729+
// This test needs only 2 threads (one per different program).
17241730
local_test_control_info.threads_count = 2;
17251731

17261732
_print_test_control_info(local_test_control_info);
@@ -1772,6 +1778,7 @@ TEST_CASE("bindmonitor_tail_call_invoke_program_test", "[native_mt_stress_test]"
17721778
_mt_bindmonitor_tail_call_invoke_program_test(EBPF_EXECUTION_NATIVE, local_test_control_info);
17731779
}
17741780

1781+
#if !defined(CONFIG_BPF_JIT_DISABLED)
17751782
TEST_CASE("jit_unique_load_attach_detach_unload_random_v4_test", "[jit_mt_stress_test]")
17761783
{
17771784
// This test attempts to load a unique JIT ebpf program multiple times in different threads. Specifically:
@@ -1872,3 +1879,300 @@ TEST_CASE("jit_bindmonitor_tail_call_invoke_program_test", "[jit_mt_stress_test]
18721879
_print_test_control_info(local_test_control_info);
18731880
_mt_bindmonitor_tail_call_invoke_program_test(EBPF_EXECUTION_JIT, local_test_control_info);
18741881
}
1882+
#endif // !defined(CONFIG_BPF_JIT_DISABLED)
1883+
1884+
static void
1885+
_mt_load_stress_test_with_restart_timing(
1886+
ebpf_execution_type_t program_type, const test_control_info& test_control_info, bool start_restart_before_load)
1887+
{
1888+
constexpr uint32_t OBJECT_TABLE_SIZE{64};
1889+
std::vector<object_table_entry> object_table(OBJECT_TABLE_SIZE);
1890+
for (uint32_t index = 0; auto& entry : object_table) {
1891+
entry.available = true;
1892+
entry.lock = std::make_unique<std::mutex>();
1893+
entry.object.reset();
1894+
entry.attach = !(index % 2) ? true : false;
1895+
entry.index = index++;
1896+
entry.reuse_count = 0;
1897+
entry.tag = 0xC001DEA1;
1898+
}
1899+
1900+
// We have 3 types of threads, so we need (test_control_info.threads_count * 3) total threads.
1901+
size_t total_threads = ((size_t)test_control_info.threads_count * 3);
1902+
std::vector<thread_context> thread_context_table(
1903+
total_threads, {{}, {}, false, {}, thread_role_type::ROLE_NOT_SET, 0, 0, 0, false, 0, 0, object_table});
1904+
std::vector<std::thread> test_thread_table(total_threads);
1905+
1906+
// Extension restart thread setup.
1907+
std::vector<std::string> extension_names;
1908+
std::vector<std::thread> extension_restart_thread_table;
1909+
std::vector<thread_context> extension_restart_thread_context_table;
1910+
1911+
// An incrementing 'compartment Id' to ensure that _each_ 'Attacher' thread gets a unique compartment id.
1912+
uint32_t compartment_id{1};
1913+
1914+
// Get the single program info - we only have one program in km tests.
1915+
const auto& program_info = _test_program_info.begin();
1916+
const auto& program_name = program_info->first;
1917+
const auto& program_attribs = program_info->second;
1918+
extension_names.push_back(program_attribs.extension_name);
1919+
1920+
// Configure context for each thread.
1921+
for (size_t i = 0; i < total_threads; i++) {
1922+
// First, prepare the context for this thread.
1923+
auto& context_entry = thread_context_table[i];
1924+
context_entry.program_name = program_name;
1925+
1926+
if (!(compartment_id % 3)) {
1927+
context_entry.role = thread_role_type::DESTROYER;
1928+
} else if (!(compartment_id % 2)) {
1929+
context_entry.role = thread_role_type::ATTACHER;
1930+
} else {
1931+
context_entry.role = thread_role_type::CREATOR;
1932+
}
1933+
1934+
if (program_type == EBPF_EXECUTION_NATIVE) {
1935+
context_entry.is_native_program = true;
1936+
if (test_control_info.use_unique_native_programs && context_entry.role == thread_role_type::CREATOR) {
1937+
// Create unique native programs for 'creator' threads only.
1938+
context_entry.file_name = _make_unique_file_copy(program_attribs.native_file_name);
1939+
} else {
1940+
context_entry.file_name = program_attribs.native_file_name;
1941+
}
1942+
} else {
1943+
context_entry.file_name = program_attribs.jit_file_name;
1944+
}
1945+
context_entry.thread_index = (uint32_t)i;
1946+
context_entry.compartment_id = compartment_id++;
1947+
context_entry.duration_minutes = test_control_info.duration_minutes;
1948+
context_entry.extension_restart_enabled = test_control_info.extension_restart_enabled;
1949+
}
1950+
1951+
// Handle extension restart timing.
1952+
if (test_control_info.extension_restart_enabled) {
1953+
if (start_restart_before_load) {
1954+
// Start extension restart immediately, then start load threads.
1955+
LOG_INFO("Starting extension restart BEFORE program loading threads");
1956+
configure_extension_restart(
1957+
test_control_info,
1958+
extension_names,
1959+
extension_restart_thread_table,
1960+
extension_restart_thread_context_table,
1961+
object_table);
1962+
// Small delay to ensure restart thread is running before we start load threads.
1963+
std::this_thread::sleep_for(std::chrono::milliseconds(500));
1964+
}
1965+
}
1966+
1967+
// Now create the program loading threads.
1968+
for (size_t i = 0; i < total_threads; i++) {
1969+
auto& context_entry = thread_context_table[i];
1970+
auto& thread_entry = test_thread_table[i];
1971+
thread_entry = std::move(std::thread(_test_thread_function, std::ref(context_entry)));
1972+
}
1973+
1974+
if (test_control_info.extension_restart_enabled && !start_restart_before_load) {
1975+
// Wait for programs to load, then start extension restart.
1976+
LOG_INFO("Waiting for programs to load, then starting extension restart");
1977+
std::this_thread::sleep_for(std::chrono::seconds(5)); // Give programs time to load and attach
1978+
configure_extension_restart(
1979+
test_control_info,
1980+
extension_names,
1981+
extension_restart_thread_table,
1982+
extension_restart_thread_context_table,
1983+
object_table);
1984+
}
1985+
1986+
wait_and_verify_test_threads(
1987+
test_control_info,
1988+
test_thread_table,
1989+
thread_context_table,
1990+
extension_restart_thread_table,
1991+
extension_restart_thread_context_table);
1992+
}
1993+
1994+
static void
1995+
_mt_invoke_stress_test_multiple_programs(ebpf_execution_type_t program_type, const test_control_info& test_control_info)
1996+
{
1997+
WSAData data{};
1998+
auto error = WSAStartup(MAKEWORD(2, 2), &data);
1999+
REQUIRE(error == 0);
2000+
2001+
// For testing different programs per thread, create multiple copies of programs.
2002+
constexpr uint32_t MAX_PROGRAM_COPIES = 4;
2003+
2004+
size_t actual_threads = std::min((size_t)test_control_info.threads_count, (size_t)MAX_PROGRAM_COPIES);
2005+
std::vector<object_table_entry> object_table(actual_threads);
2006+
for (uint32_t index = 0; auto& entry : object_table) {
2007+
entry.available = true;
2008+
entry.lock = std::make_unique<std::mutex>();
2009+
entry.object.reset();
2010+
entry.attach = !(index % 2) ? true : false;
2011+
entry.index = index++;
2012+
entry.reuse_count = 0;
2013+
entry.tag = 0xC001DEA1;
2014+
}
2015+
2016+
std::vector<thread_context> thread_context_table(
2017+
actual_threads, {{}, {}, false, {}, thread_role_type::ROLE_NOT_SET, 0, 0, 0, false, 0, 0, object_table});
2018+
std::vector<std::thread> test_thread_table(actual_threads);
2019+
2020+
for (uint32_t i = 0; i < actual_threads; i++) {
2021+
// First, prepare the context for this thread.
2022+
auto& context_entry = thread_context_table[i];
2023+
2024+
if (program_type == EBPF_EXECUTION_NATIVE) {
2025+
// For native programs, create unique file copies so each thread has a different program.
2026+
context_entry.file_name = _make_unique_file_copy("cgroup_sock_addr.sys");
2027+
context_entry.is_native_program = true;
2028+
} else {
2029+
// For JIT programs, all threads can load the same file (but get different instances).
2030+
context_entry.file_name = "cgroup_sock_addr.o";
2031+
context_entry.is_native_program = false;
2032+
}
2033+
2034+
context_entry.program_name = "cgroup_sock_addr";
2035+
context_entry.role = thread_role_type::CREATOR; // All threads are creators for this test
2036+
context_entry.thread_index = i;
2037+
context_entry.compartment_id = i + 1; // Unique compartment IDs
2038+
context_entry.duration_minutes = test_control_info.duration_minutes;
2039+
context_entry.extension_restart_enabled = test_control_info.extension_restart_enabled;
2040+
}
2041+
2042+
// Now create all the threads after context setup is complete.
2043+
for (uint32_t i = 0; i < actual_threads; i++) {
2044+
auto& context_entry = thread_context_table[i];
2045+
auto& thread_entry = test_thread_table[i];
2046+
thread_entry = std::move(std::thread(_test_thread_function, std::ref(context_entry)));
2047+
}
2048+
2049+
// If requested, start the 'extension stop-and-restart' thread for extension for this program type.
2050+
std::vector<std::string> extension_names = {"netebpfext"};
2051+
std::vector<std::thread> extension_restart_thread_table;
2052+
std::vector<thread_context> extension_restart_thread_context_table;
2053+
if (test_control_info.extension_restart_enabled) {
2054+
configure_extension_restart(
2055+
test_control_info,
2056+
extension_names,
2057+
extension_restart_thread_table,
2058+
extension_restart_thread_context_table,
2059+
object_table);
2060+
}
2061+
2062+
wait_and_verify_test_threads(
2063+
test_control_info,
2064+
test_thread_table,
2065+
thread_context_table,
2066+
extension_restart_thread_table,
2067+
extension_restart_thread_context_table);
2068+
}
2069+
2070+
#if !defined(CONFIG_BPF_JIT_DISABLED)
2071+
TEST_CASE("load_attach_stress_test_restart_during_load_jit", "[jit_mt_stress_test]")
2072+
{
2073+
// Test resiliency during program 'open + load + attach' sequence with extension restart.
2074+
// Starts extension restart immediately and then begins program loading in multiple threads.
2075+
// Tests JIT programs with multiple threads loading the same program.
2076+
2077+
_km_test_init();
2078+
LOG_INFO("\nStarting test *** load_attach_stress_test_restart_during_load_jit ***");
2079+
test_control_info local_test_control_info = _global_test_control_info;
2080+
2081+
// Enable extension restart if not already enabled.
2082+
local_test_control_info.extension_restart_enabled = true;
2083+
2084+
_print_test_control_info(local_test_control_info);
2085+
_mt_load_stress_test_with_restart_timing(EBPF_EXECUTION_JIT, local_test_control_info, true);
2086+
}
2087+
#endif // !defined(CONFIG_BPF_JIT_DISABLED)
2088+
2089+
TEST_CASE("load_attach_stress_test_restart_during_load_native", "[native_mt_stress_test]")
2090+
{
2091+
// Test resiliency during program 'open + load + attach' sequence with extension restart.
2092+
// Starts extension restart immediately and then begins program loading in multiple threads.
2093+
// Tests native programs with multiple threads loading copies of programs.
2094+
2095+
_km_test_init();
2096+
LOG_INFO("\nStarting test *** load_attach_stress_test_restart_during_load_native ***");
2097+
test_control_info local_test_control_info = _global_test_control_info;
2098+
2099+
// Enable extension restart and unique native programs for this test.
2100+
local_test_control_info.extension_restart_enabled = true;
2101+
local_test_control_info.use_unique_native_programs = true;
2102+
2103+
_print_test_control_info(local_test_control_info);
2104+
_mt_load_stress_test_with_restart_timing(EBPF_EXECUTION_NATIVE, local_test_control_info, true);
2105+
}
2106+
2107+
#if !defined(CONFIG_BPF_JIT_DISABLED)
2108+
TEST_CASE("load_attach_stress_test_restart_after_load_jit", "[jit_mt_stress_test]")
2109+
{
2110+
// Test resiliency after program 'open + load + attach' sequence with extension restart.
2111+
// Completes program loading first, then starts extension restart.
2112+
// Ensures loaded + attached programs continue to be invoked after extension restart.
2113+
// Tests JIT programs with multiple threads loading the same program.
2114+
2115+
_km_test_init();
2116+
LOG_INFO("\nStarting test *** load_attach_stress_test_restart_after_load_jit ***");
2117+
test_control_info local_test_control_info = _global_test_control_info;
2118+
2119+
// Enable extension restart if not already enabled.
2120+
local_test_control_info.extension_restart_enabled = true;
2121+
2122+
_print_test_control_info(local_test_control_info);
2123+
_mt_load_stress_test_with_restart_timing(EBPF_EXECUTION_JIT, local_test_control_info, false);
2124+
}
2125+
#endif // !defined(CONFIG_BPF_JIT_DISABLED)
2126+
2127+
TEST_CASE("load_attach_stress_test_restart_after_load_native", "[native_mt_stress_test]")
2128+
{
2129+
// Test resiliency after program 'open + load + attach' sequence with extension restart.
2130+
// Completes program loading first, then starts extension restart.
2131+
// Ensures loaded + attached programs continue to be invoked after extension restart.
2132+
// Tests native programs with multiple threads loading copies of programs.
2133+
2134+
_km_test_init();
2135+
LOG_INFO("\nStarting test *** load_attach_stress_test_restart_after_load_native ***");
2136+
test_control_info local_test_control_info = _global_test_control_info;
2137+
2138+
// Enable extension restart and unique native programs for this test.
2139+
local_test_control_info.extension_restart_enabled = true;
2140+
local_test_control_info.use_unique_native_programs = true;
2141+
2142+
_print_test_control_info(local_test_control_info);
2143+
_mt_load_stress_test_with_restart_timing(EBPF_EXECUTION_NATIVE, local_test_control_info, false);
2144+
}
2145+
2146+
#if !defined(CONFIG_BPF_JIT_DISABLED)
2147+
TEST_CASE("invoke_different_programs_restart_extension_test_jit", "[jit_mt_stress_test]")
2148+
{
2149+
// Multi-threaded stress test where each thread loads different programs with extension restart.
2150+
// Tests JIT programs with different programs in each thread.
2151+
2152+
_km_test_init();
2153+
LOG_INFO("\nStarting test *** invoke_different_programs_restart_extension_test_jit ***");
2154+
test_control_info local_test_control_info = _global_test_control_info;
2155+
2156+
// This test needs multiple threads for different programs.
2157+
local_test_control_info.threads_count = std::max(local_test_control_info.threads_count, 4u);
2158+
2159+
_print_test_control_info(local_test_control_info);
2160+
_mt_invoke_stress_test_multiple_programs(EBPF_EXECUTION_JIT, local_test_control_info);
2161+
}
2162+
#endif // !defined(CONFIG_BPF_JIT_DISABLED)
2163+
2164+
TEST_CASE("invoke_different_programs_restart_extension_test_native", "[native_mt_stress_test]")
2165+
{
2166+
// Multi-threaded stress test where each thread loads different programs with extension restart.
2167+
// Tests native programs with different programs in each thread.
2168+
2169+
_km_test_init();
2170+
LOG_INFO("\nStarting test *** invoke_different_programs_restart_extension_test_native ***");
2171+
test_control_info local_test_control_info = _global_test_control_info;
2172+
2173+
// This test needs multiple threads for different programs.
2174+
local_test_control_info.threads_count = std::max(local_test_control_info.threads_count, 4u);
2175+
2176+
_print_test_control_info(local_test_control_info);
2177+
_mt_invoke_stress_test_multiple_programs(EBPF_EXECUTION_NATIVE, local_test_control_info);
2178+
}

0 commit comments

Comments
 (0)