Skip to content

Commit f555d49

Browse files
committed
olShutDown was not properly calling deinit on the platforms, resulting
in random segfaults on AMD devices.
1 parent 0870c88 commit f555d49

File tree

3 files changed

+43
-6
lines changed

3 files changed

+43
-6
lines changed

offload/liboffload/API/Common.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,7 @@ def : Function {
176176
let desc = "Release the resources in use by Offload";
177177
let details = [
178178
"This decrements an internal reference count. When this reaches 0, all resources will be released",
179-
"Subsequent API calls made after this are not valid"
179+
"Subsequent API calls to methods other than `olInit` made after resources are released will return OL_ERRC_UNINITIALIZED"
180180
];
181181
let params = [];
182182
let returns = [];

offload/liboffload/src/OffloadImpl.cpp

Lines changed: 30 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ struct AllocInfo {
9696
// Global shared state for liboffload
9797
struct OffloadContext;
9898
static OffloadContext *OffloadContextVal;
99+
std::mutex OffloadContextValMutex;
99100
struct OffloadContext {
100101
OffloadContext(OffloadContext &) = delete;
101102
OffloadContext(OffloadContext &&) = delete;
@@ -106,6 +107,7 @@ struct OffloadContext {
106107
bool ValidationEnabled = true;
107108
DenseMap<void *, AllocInfo> AllocInfoMap{};
108109
SmallVector<ol_platform_impl_t, 4> Platforms{};
110+
size_t RefCount;
109111

110112
ol_device_handle_t HostDevice() {
111113
// The host platform is always inserted last
@@ -186,15 +188,38 @@ void initPlugins() {
186188
OffloadContextVal = Context;
187189
}
188190

189-
// TODO: We can properly reference count here and manage the resources in a more
190-
// clever way
191191
Error olInit_impl() {
192-
static std::once_flag InitFlag;
193-
std::call_once(InitFlag, initPlugins);
192+
std::lock_guard<std::mutex> Lock{OffloadContextValMutex};
193+
194+
if (!isOffloadInitialized())
195+
initPlugins();
196+
197+
OffloadContext::get().RefCount ++;
194198

195199
return Error::success();
196200
}
197-
Error olShutDown_impl() { return Error::success(); }
201+
202+
Error olShutDown_impl() {
203+
std::lock_guard<std::mutex> Lock{OffloadContextValMutex};
204+
205+
if (--OffloadContext::get().RefCount != 0)
206+
return Error::success();
207+
208+
llvm::Error Result = Error::success();
209+
210+
for (auto &P : OffloadContext::get().Platforms) {
211+
// Host plugin is nullptr and has no deinit
212+
if (!P.Plugin)
213+
continue;
214+
215+
if (auto Res = P.Plugin->deinit())
216+
Result = llvm::joinErrors(std::move(Result), std::move(Res));
217+
}
218+
delete OffloadContextVal;
219+
OffloadContextVal = nullptr;
220+
221+
return Result;
222+
}
198223

199224
Error olGetPlatformInfoImplDetail(ol_platform_handle_t Platform,
200225
ol_platform_info_t PropName, size_t PropSize,

offload/unittests/OffloadAPI/init/olInit.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,20 @@
1515

1616
struct olInitTest : ::testing::Test {};
1717

18+
TEST_F(olInitTest, Success) {
19+
ASSERT_SUCCESS(olInit());
20+
ASSERT_SUCCESS(olShutDown());
21+
}
22+
1823
TEST_F(olInitTest, Uninitialized) {
1924
ASSERT_ERROR(OL_ERRC_UNINITIALIZED,
2025
olIterateDevices(
2126
[](ol_device_handle_t, void *) { return false; }, nullptr));
2227
}
28+
29+
TEST_F(olInitTest, RepeatedInit) {
30+
for (size_t I = 0; I < 10; I++) {
31+
ASSERT_SUCCESS(olInit());
32+
ASSERT_SUCCESS(olShutDown());
33+
}
34+
}

0 commit comments

Comments
 (0)