Skip to content

Commit 58c9d3a

Browse files
[SYCL][L0] Bump minimum shared read-only allocation slab to 2M (#6271)
* Bump minimum shared read-only allocation slab to 2M * Avoid using non-trivially destructible globals Signed-off-by: Sergey V Maslov <[email protected]>
1 parent 82b302e commit 58c9d3a

File tree

4 files changed

+102
-93
lines changed

4 files changed

+102
-93
lines changed

sycl/plugins/level_zero/pi_level_zero.cpp

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -7331,17 +7331,11 @@ pi_result USMHostMemoryAlloc::allocateImpl(void **ResultPtr, size_t Size,
73317331
return USMHostAllocImpl(ResultPtr, Context, nullptr, Size, Alignment);
73327332
}
73337333

7334-
SystemMemory::MemType USMSharedMemoryAlloc::getMemTypeImpl() {
7335-
return SystemMemory::Shared;
7336-
}
7334+
MemType USMSharedMemoryAlloc::getMemTypeImpl() { return MemType::Shared; }
73377335

7338-
SystemMemory::MemType USMDeviceMemoryAlloc::getMemTypeImpl() {
7339-
return SystemMemory::Device;
7340-
}
7336+
MemType USMDeviceMemoryAlloc::getMemTypeImpl() { return MemType::Device; }
73417337

7342-
SystemMemory::MemType USMHostMemoryAlloc::getMemTypeImpl() {
7343-
return SystemMemory::Host;
7344-
}
7338+
MemType USMHostMemoryAlloc::getMemTypeImpl() { return MemType::Host; }
73457339

73467340
void *USMMemoryAllocBase::allocate(size_t Size) {
73477341
void *Ptr = nullptr;
@@ -7371,9 +7365,7 @@ void USMMemoryAllocBase::deallocate(void *Ptr, bool OwnZeMemHandle) {
73717365
}
73727366
}
73737367

7374-
SystemMemory::MemType USMMemoryAllocBase::getMemType() {
7375-
return getMemTypeImpl();
7376-
}
7368+
MemType USMMemoryAllocBase::getMemType() { return getMemTypeImpl(); }
73777369

73787370
pi_result piextUSMDeviceAlloc(void **ResultPtr, pi_context Context,
73797371
pi_device Device,

sycl/plugins/level_zero/pi_level_zero.hpp

100644100755
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -402,7 +402,7 @@ class USMSharedReadOnlyMemoryAlloc : public USMMemoryAllocBase {
402402
protected:
403403
pi_result allocateImpl(void **ResultPtr, size_t Size,
404404
pi_uint32 Alignment) override;
405-
MemType getMemTypeImpl() override { return SystemMemory::SharedReadOnly; }
405+
MemType getMemTypeImpl() override { return MemType::SharedReadOnly; }
406406

407407
public:
408408
USMSharedReadOnlyMemoryAlloc(pi_context Ctx, pi_device Dev)

sycl/plugins/level_zero/usm_allocator.cpp

Lines changed: 95 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -55,13 +55,6 @@ constexpr auto operator""_GB(unsigned long long x) -> size_t {
5555
return x * 1024 * 1024 * 1024;
5656
}
5757

58-
// Buckets for Host use a minimum of the cache line size of 64 bytes.
59-
// This prevents two separate allocations residing in the same cache line.
60-
// Buckets for Device and Shared allocations will use starting size of 512.
61-
// This is because memory compression on newer GPUs makes the
62-
// minimum granularity 512 bytes instead of 64.
63-
static constexpr size_t MinBucketSize[SystemMemory::All] = {64, 512, 512, 512};
64-
6558
// The largest size which is allocated via the allocator.
6659
// Allocations with size > CutOff bypass the USM allocator and
6760
// go directly to the runtime.
@@ -72,46 +65,63 @@ static sycl::detail::SpinLock PoolLock;
7265

7366
static class SetLimits {
7467
public:
68+
// String names of memory types for printing in limits traces.
69+
static constexpr const char *MemTypeNames[MemType::All] = {
70+
"Host", "Device", "Shared", "SharedReadOnly"};
71+
7572
// Minimum allocation size that will be requested from the system.
7673
// By default this is the minimum allocation size of each memory type.
77-
size_t SlabMinSize[SystemMemory::All] = {};
74+
size_t SlabMinSize[MemType::All] = {};
7875

7976
// Allocations up to this limit will be subject to chunking/pooling
80-
size_t MaxPoolableSize[SystemMemory::All] = {};
77+
size_t MaxPoolableSize[MemType::All] = {};
8178

8279
// When pooling, each bucket will hold a max of 4 unfreed slabs
83-
size_t Capacity[SystemMemory::All] = {};
80+
size_t Capacity[MemType::All] = {};
81+
82+
// Holds the minimum bucket size valid for allocation of a memory type.
83+
size_t MinBucketSize[MemType::All] = {};
8484

8585
// Maximum memory left unfreed in pool
8686
size_t MaxPoolSize = 16_MB;
8787

8888
size_t CurPoolSize = 0;
89-
size_t CurPoolSizes[SystemMemory::All] = {0, 0, 0, 0};
89+
size_t CurPoolSizes[MemType::All] = {};
9090

9191
size_t EnableBuffers = 1;
9292

9393
// Whether to print pool usage statistics
9494
int PoolTrace = 0;
9595

9696
SetLimits() {
97+
// Buckets for Host use a minimum of the cache line size of 64 bytes.
98+
// This prevents two separate allocations residing in the same cache line.
99+
// Buckets for Device and Shared allocations will use starting size of 512.
100+
// This is because memory compression on newer GPUs makes the
101+
// minimum granularity 512 bytes instead of 64.
102+
MinBucketSize[MemType::Host] = 64;
103+
MinBucketSize[MemType::Device] = 512;
104+
MinBucketSize[MemType::Shared] = 512;
105+
MinBucketSize[MemType::SharedReadOnly] = 512;
106+
97107
// Initialize default pool settings.
98-
MaxPoolableSize[SystemMemory::Host] = 2_MB;
99-
Capacity[SystemMemory::Host] = 4;
100-
SlabMinSize[SystemMemory::Host] = 64_KB;
108+
MaxPoolableSize[MemType::Host] = 2_MB;
109+
Capacity[MemType::Host] = 4;
110+
SlabMinSize[MemType::Host] = 64_KB;
101111

102-
MaxPoolableSize[SystemMemory::Device] = 4_MB;
103-
Capacity[SystemMemory::Device] = 4;
104-
SlabMinSize[SystemMemory::Device] = 64_KB;
112+
MaxPoolableSize[MemType::Device] = 4_MB;
113+
Capacity[MemType::Device] = 4;
114+
SlabMinSize[MemType::Device] = 64_KB;
105115

106116
// Disable pooling of shared USM allocations.
107-
MaxPoolableSize[SystemMemory::Shared] = 0;
108-
Capacity[SystemMemory::Shared] = 0;
109-
SlabMinSize[SystemMemory::Shared] = 2_MB;
117+
MaxPoolableSize[MemType::Shared] = 0;
118+
Capacity[MemType::Shared] = 0;
119+
SlabMinSize[MemType::Shared] = 2_MB;
110120

111121
// Allow pooling of shared allocations that are only modified on host.
112-
MaxPoolableSize[SystemMemory::SharedReadOnly] = 4_MB;
113-
Capacity[SystemMemory::SharedReadOnly] = 4;
114-
SlabMinSize[SystemMemory::SharedReadOnly] = 64_KB;
122+
MaxPoolableSize[MemType::SharedReadOnly] = 4_MB;
123+
Capacity[MemType::SharedReadOnly] = 4;
124+
SlabMinSize[MemType::SharedReadOnly] = 2_MB;
115125

116126
// Parse optional parameters of this form:
117127
// SYCL_PI_LEVEL_ZERO_USM_ALLOCATOR=[EnableBuffers][;[MaxPoolSize][;memtypelimits]...]
@@ -180,47 +190,48 @@ static class SetLimits {
180190
return More;
181191
};
182192

183-
auto MemParser = [=](std::string &Params, SystemMemory::MemType M) {
193+
auto MemParser = [=](std::string &Params, MemType M) {
184194
bool ParamWasSet;
185-
SystemMemory::MemType LM = M;
186-
if (M == SystemMemory::All)
187-
LM = SystemMemory::Host;
195+
MemType LM = M;
196+
if (M == MemType::All)
197+
LM = MemType::Host;
188198

189199
bool More = ParamParser(Params, MaxPoolableSize[LM], ParamWasSet);
190-
if (ParamWasSet && M == SystemMemory::All) {
191-
MaxPoolableSize[SystemMemory::Shared] =
192-
MaxPoolableSize[SystemMemory::Device] =
193-
MaxPoolableSize[SystemMemory::Host];
200+
if (ParamWasSet && M == MemType::All) {
201+
MaxPoolableSize[MemType::Shared] = MaxPoolableSize[MemType::Device] =
202+
MaxPoolableSize[MemType::Host];
194203
}
195204
if (More) {
196205
More = ParamParser(Params, Capacity[LM], ParamWasSet);
197-
if (ParamWasSet && M == SystemMemory::All) {
198-
Capacity[SystemMemory::Shared] = Capacity[SystemMemory::Device] =
199-
Capacity[SystemMemory::Host];
206+
if (ParamWasSet && M == MemType::All) {
207+
Capacity[MemType::Shared] = Capacity[MemType::Device] =
208+
Capacity[MemType::Host];
200209
}
201210
}
202211
if (More) {
203212
ParamParser(Params, SlabMinSize[LM], ParamWasSet);
204-
if (ParamWasSet && M == SystemMemory::All) {
205-
SlabMinSize[SystemMemory::Shared] =
206-
SlabMinSize[SystemMemory::Device] =
207-
SlabMinSize[SystemMemory::Host];
213+
if (ParamWasSet && M == MemType::All) {
214+
SlabMinSize[MemType::Shared] = SlabMinSize[MemType::Device] =
215+
SlabMinSize[MemType::Host];
208216
}
209217
}
210218
};
211219

212220
auto MemTypeParser = [=](std::string &Params) {
213221
int Pos = 0;
214-
SystemMemory::MemType M = SystemMemory::All;
222+
MemType M = MemType::All;
215223
if (Params.compare(0, 5, "host:") == 0) {
216224
Pos = 5;
217-
M = SystemMemory::Host;
225+
M = MemType::Host;
218226
} else if (Params.compare(0, 7, "device:") == 0) {
219227
Pos = 7;
220-
M = SystemMemory::Device;
228+
M = MemType::Device;
221229
} else if (Params.compare(0, 7, "shared:") == 0) {
222230
Pos = 7;
223-
M = SystemMemory::Shared;
231+
M = MemType::Shared;
232+
} else if (Params.compare(0, 17, "read_only_shared:") == 0) {
233+
Pos = 17;
234+
M = MemType::SharedReadOnly;
224235
}
225236
if (Pos > 0)
226237
Params.erase(0, Pos);
@@ -274,20 +285,27 @@ static class SetLimits {
274285
return;
275286

276287
std::cout << "USM Pool Settings (Built-in or Adjusted by Environment "
277-
"Variable)\n";
288+
"Variable)"
289+
<< std::endl;
278290

279291
std::cout << std::setw(15) << "Parameter" << std::setw(12) << "Host"
280-
<< std::setw(12) << "Device" << std::setw(12) << "Shared"
281-
<< std::endl;
292+
<< std::setw(12) << "Device" << std::setw(12) << "Shared RW"
293+
<< std::setw(12) << "Shared RO" << std::endl;
282294
std::cout << std::setw(15) << "SlabMinSize" << std::setw(12)
283-
<< SlabMinSize[0] << std::setw(12) << SlabMinSize[1]
284-
<< std::setw(12) << SlabMinSize[2] << std::endl;
295+
<< SlabMinSize[MemType::Host] << std::setw(12)
296+
<< SlabMinSize[MemType::Device] << std::setw(12)
297+
<< SlabMinSize[MemType::Shared] << std::setw(12)
298+
<< SlabMinSize[MemType::SharedReadOnly] << std::endl;
285299
std::cout << std::setw(15) << "MaxPoolableSize" << std::setw(12)
286-
<< MaxPoolableSize[0] << std::setw(12) << MaxPoolableSize[1]
287-
<< std::setw(12) << MaxPoolableSize[2] << std::endl;
288-
std::cout << std::setw(15) << "Capacity" << std::setw(12) << Capacity[0]
289-
<< std::setw(12) << Capacity[1] << std::setw(12) << Capacity[2]
290-
<< std::endl;
300+
<< MaxPoolableSize[MemType::Host] << std::setw(12)
301+
<< MaxPoolableSize[MemType::Device] << std::setw(12)
302+
<< MaxPoolableSize[MemType::Shared] << std::setw(12)
303+
<< MaxPoolableSize[MemType::SharedReadOnly] << std::endl;
304+
std::cout << std::setw(15) << "Capacity" << std::setw(12)
305+
<< Capacity[MemType::Host] << std::setw(12)
306+
<< Capacity[MemType::Device] << std::setw(12)
307+
<< Capacity[MemType::Shared] << std::setw(12)
308+
<< Capacity[MemType::SharedReadOnly] << std::endl;
291309
std::cout << std::setw(15) << "MaxPoolSize" << std::setw(12) << MaxPoolSize
292310
<< std::endl;
293311
std::cout << std::setw(15) << "EnableBuffers" << std::setw(12)
@@ -299,9 +317,6 @@ static class SetLimits {
299317

300318
using namespace settings;
301319

302-
static const char *MemTypeNames[SystemMemory::All] = {
303-
"Host", "Device", "Shared", "SharedReadOnly"};
304-
305320
// Aligns the pointer down to the specified alignment
306321
// (e.g. returns 8 for Size = 13, Alignment = 8)
307322
static void *AlignPtrDown(void *Ptr, const size_t Alignment) {
@@ -468,7 +483,7 @@ class Bucket {
468483

469484
SystemMemory &getMemHandle();
470485

471-
SystemMemory::MemType getMemType();
486+
MemType getMemType();
472487

473488
USMAllocContext::USMAllocImpl &getUsmAllocCtx() { return OwnAllocCtx; }
474489

@@ -500,7 +515,7 @@ class Bucket {
500515
void updateStats(int InUse, int InPool);
501516

502517
// Print bucket statistics
503-
void printStats(bool &TitlePrinted, SystemMemory::MemType MT);
518+
void printStats(bool &TitlePrinted, MemType MT);
504519

505520
private:
506521
void onFreeChunk(Slab &, bool &ToPool);
@@ -534,7 +549,7 @@ class USMAllocContext::USMAllocImpl {
534549

535550
// Generate buckets sized such as: 64, 96, 128, 192, ..., CutOff.
536551
// Powers of 2 and the value halfway between the powers of 2.
537-
auto Size1 = MinBucketSize[MemHandle->getMemType()];
552+
auto Size1 = USMSettings.MinBucketSize[MemHandle->getMemType()];
538553
auto Size2 = Size1 + Size1 / 2;
539554
for (; Size2 < CutOff; Size1 *= 2, Size2 *= 2) {
540555
Buckets.push_back(std::make_unique<Bucket>(Size1, *this));
@@ -559,7 +574,7 @@ class USMAllocContext::USMAllocImpl {
559574
};
560575

561576
void printStats(bool &TitlePrinted, size_t &HighBucketSize,
562-
size_t &HighPeakSlabsInUse, SystemMemory::MemType MT);
577+
size_t &HighPeakSlabsInUse, MemType MT);
563578

564579
private:
565580
Bucket &findBucket(size_t Size);
@@ -859,9 +874,7 @@ bool Bucket::CanPool(bool &ToPool) {
859874

860875
SystemMemory &Bucket::getMemHandle() { return OwnAllocCtx.getMemHandle(); }
861876

862-
SystemMemory::MemType Bucket::getMemType() {
863-
return getMemHandle().getMemType();
864-
}
877+
MemType Bucket::getMemType() { return getMemHandle().getMemType(); }
865878

866879
size_t Bucket::SlabMinSize() { return USMSettings.SlabMinSize[getMemType()]; }
867880

@@ -902,10 +915,10 @@ void Bucket::updateStats(int InUse, int InPool) {
902915
USMSettings.CurPoolSizes[getMemType()] += InPool * SlabAllocSize();
903916
}
904917

905-
void Bucket::printStats(bool &TitlePrinted, SystemMemory::MemType MT) {
918+
void Bucket::printStats(bool &TitlePrinted, MemType MT) {
906919
if (allocCount) {
907920
if (!TitlePrinted) {
908-
auto Label = MemTypeNames[MT];
921+
auto Label = USMSettings.MemTypeNames[MT];
909922
std::cout << Label << " memory statistics\n";
910923
std::cout << std::setw(14) << "Bucket Size" << std::setw(12) << "Allocs"
911924
<< std::setw(12) << "Frees" << std::setw(18)
@@ -1046,9 +1059,9 @@ void *USMAllocContext::allocate(size_t size) {
10461059

10471060
if (USMSettings.PoolTrace > 2) {
10481061
auto MT = pImpl->getMemHandle().getMemType();
1049-
std::cout << "Allocated " << std::setw(8) << size << " " << MemTypeNames[MT]
1050-
<< " USM bytes from " << (FromPool ? "Pool" : "USM") << " ->"
1051-
<< Ptr << std::endl;
1062+
std::cout << "Allocated " << std::setw(8) << size << " "
1063+
<< USMSettings.MemTypeNames[MT] << " USM bytes from "
1064+
<< (FromPool ? "Pool" : "USM") << " ->" << Ptr << std::endl;
10521065
}
10531066
return Ptr;
10541067
}
@@ -1059,9 +1072,10 @@ void *USMAllocContext::allocate(size_t size, size_t alignment) {
10591072

10601073
if (USMSettings.PoolTrace > 2) {
10611074
auto MT = pImpl->getMemHandle().getMemType();
1062-
std::cout << "Allocated " << std::setw(8) << size << " " << MemTypeNames[MT]
1063-
<< " USM bytes aligned at " << alignment << " from "
1064-
<< (FromPool ? "Pool" : "USM") << " ->" << Ptr << std::endl;
1075+
std::cout << "Allocated " << std::setw(8) << size << " "
1076+
<< USMSettings.MemTypeNames[MT] << " USM bytes aligned at "
1077+
<< alignment << " from " << (FromPool ? "Pool" : "USM") << " ->"
1078+
<< Ptr << std::endl;
10651079
}
10661080
return Ptr;
10671081
}
@@ -1072,12 +1086,14 @@ void USMAllocContext::deallocate(void *ptr, bool OwnZeMemHandle) {
10721086

10731087
if (USMSettings.PoolTrace > 2) {
10741088
auto MT = pImpl->getMemHandle().getMemType();
1075-
std::cout << "Freed " << MemTypeNames[MT] << " USM " << ptr << " to "
1076-
<< (ToPool ? "Pool" : "USM") << ", Current total pool size "
1077-
<< USMSettings.CurPoolSize << ", Current pool sizes ["
1078-
<< USMSettings.CurPoolSizes[SystemMemory::Host] << ", "
1079-
<< USMSettings.CurPoolSizes[SystemMemory::Device] << ", "
1080-
<< USMSettings.CurPoolSizes[SystemMemory::Shared] << "]\n";
1089+
std::cout << "Freed " << USMSettings.MemTypeNames[MT] << " USM " << ptr
1090+
<< " to " << (ToPool ? "Pool" : "USM")
1091+
<< ", Current total pool size " << USMSettings.CurPoolSize
1092+
<< ", Current pool sizes ["
1093+
<< USMSettings.CurPoolSizes[MemType::Host] << ", "
1094+
<< USMSettings.CurPoolSizes[MemType::Device] << ", "
1095+
<< USMSettings.CurPoolSizes[MemType::Shared] << ", "
1096+
<< USMSettings.CurPoolSizes[MemType::SharedReadOnly] << "]\n";
10811097
}
10821098
return;
10831099
}
@@ -1088,11 +1104,11 @@ USMAllocContext::~USMAllocContext() {
10881104
size_t HighBucketSize;
10891105
size_t HighPeakSlabsInUse;
10901106
if (USMSettings.PoolTrace > 1) {
1091-
SystemMemory::MemType MT = pImpl->getMemHandle().getMemType();
1107+
MemType MT = pImpl->getMemHandle().getMemType();
10921108
pImpl->printStats(TitlePrinted, HighBucketSize, HighPeakSlabsInUse, MT);
10931109
if (TitlePrinted) {
10941110
std::cout << "Current Pool Size " << USMSettings.CurPoolSize << std::endl;
1095-
const char *Label = MemTypeNames[MT];
1111+
const char *Label = USMSettings.MemTypeNames[MT];
10961112
std::cout << "Suggested Setting: SYCL_PI_LEVEL_ZERO_USM_ALLOCATOR=;"
10971113
<< std::string(1, tolower(*Label)) << std::string(Label + 1)
10981114
<< ":" << HighBucketSize << "," << HighPeakSlabsInUse << ",64K"
@@ -1104,7 +1120,7 @@ USMAllocContext::~USMAllocContext() {
11041120
void USMAllocContext::USMAllocImpl::printStats(bool &TitlePrinted,
11051121
size_t &HighBucketSize,
11061122
size_t &HighPeakSlabsInUse,
1107-
SystemMemory::MemType MT) {
1123+
MemType MT) {
11081124
HighBucketSize = 0;
11091125
HighPeakSlabsInUse = 0;
11101126
for (auto &B : Buckets) {

sycl/plugins/level_zero/usm_allocator.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,11 @@
1111

1212
#include <memory>
1313

14+
enum MemType { Host, Device, Shared, SharedReadOnly, All };
15+
1416
// USM system memory allocation/deallocation interface.
1517
class SystemMemory {
1618
public:
17-
enum MemType { Host, Device, Shared, SharedReadOnly, All };
1819
virtual void *allocate(size_t size) = 0;
1920
virtual void *allocate(size_t size, size_t aligned) = 0;
2021
virtual void deallocate(void *ptr, bool OwnZeMemHandle) = 0;

0 commit comments

Comments
 (0)