Skip to content

Commit 364ad9a

Browse files
committed
[Offload] Implement olShutDown
`olShutDown` was not properly calling deinit on the platforms, resulting in random segfaults on AMD devices.
1 parent 39f19f2 commit 364ad9a

File tree

3 files changed

+44
-7
lines changed

3 files changed

+44
-7
lines changed

offload/liboffload/API/Common.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,7 @@ def : Function {
176176
let desc = "Release the resources in use by Offload";
177177
let details = [
178178
"This decrements an internal reference count. When this reaches 0, all resources will be released",
179-
"Subsequent API calls made after this are not valid"
179+
"Subsequent API calls to methods other than `olInit` made after resources are released will return OL_ERRC_UNINITIALIZED"
180180
];
181181
let params = [];
182182
let returns = [];

offload/liboffload/src/OffloadImpl.cpp

Lines changed: 31 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@ struct AllocInfo {
9797
// Global shared state for liboffload
9898
struct OffloadContext;
9999
static OffloadContext *OffloadContextVal;
100+
std::mutex OffloadContextValMutex;
100101
struct OffloadContext {
101102
OffloadContext(OffloadContext &) = delete;
102103
OffloadContext(OffloadContext &&) = delete;
@@ -107,6 +108,7 @@ struct OffloadContext {
107108
bool ValidationEnabled = true;
108109
DenseMap<void *, AllocInfo> AllocInfoMap{};
109110
SmallVector<ol_platform_impl_t, 4> Platforms{};
111+
size_t RefCount;
110112

111113
ol_device_handle_t HostDevice() {
112114
// The host platform is always inserted last
@@ -191,18 +193,41 @@ Error initPlugins() {
191193
return Plugin::success();
192194
}
193195

194-
// TODO: We can properly reference count here and manage the resources in a more
195-
// clever way
196196
Error olInit_impl() {
197-
static std::once_flag InitFlag;
198-
std::optional<Error> InitResult{};
199-
std::call_once(InitFlag, [&] { InitResult = initPlugins(); });
197+
std::lock_guard<std::mutex> Lock{OffloadContextValMutex};
198+
199+
std::optional<Error> InitResult;
200+
if (!isOffloadInitialized())
201+
InitResult = initPlugins();
202+
203+
OffloadContext::get().RefCount++;
200204

201205
if (InitResult)
202206
return std::move(*InitResult);
203207
return Error::success();
204208
}
205-
Error olShutDown_impl() { return Error::success(); }
209+
210+
Error olShutDown_impl() {
211+
std::lock_guard<std::mutex> Lock{OffloadContextValMutex};
212+
213+
if (--OffloadContext::get().RefCount != 0)
214+
return Error::success();
215+
216+
llvm::Error Result = Error::success();
217+
218+
for (auto &P : OffloadContext::get().Platforms) {
219+
// Host plugin is nullptr and has no deinit
220+
if (!P.Plugin)
221+
continue;
222+
223+
if (auto Res = P.Plugin->deinit())
224+
Result = llvm::joinErrors(std::move(Result), std::move(Res));
225+
}
226+
delete OffloadContextVal;
227+
OffloadContextVal = nullptr;
228+
229+
return Result;
230+
}
206231

207232
Error olGetPlatformInfoImplDetail(ol_platform_handle_t Platform,
208233
ol_platform_info_t PropName, size_t PropSize,

offload/unittests/OffloadAPI/init/olInit.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,20 @@
1515

1616
struct olInitTest : ::testing::Test {};
1717

18+
TEST_F(olInitTest, Success) {
19+
ASSERT_SUCCESS(olInit());
20+
ASSERT_SUCCESS(olShutDown());
21+
}
22+
1823
TEST_F(olInitTest, Uninitialized) {
1924
ASSERT_ERROR(OL_ERRC_UNINITIALIZED,
2025
olIterateDevices(
2126
[](ol_device_handle_t, void *) { return false; }, nullptr));
2227
}
28+
29+
TEST_F(olInitTest, RepeatedInit) {
30+
for (size_t I = 0; I < 10; I++) {
31+
ASSERT_SUCCESS(olInit());
32+
ASSERT_SUCCESS(olShutDown());
33+
}
34+
}

0 commit comments

Comments
 (0)