@@ -55,13 +55,6 @@ constexpr auto operator""_GB(unsigned long long x) -> size_t {
55
55
return x * 1024 * 1024 * 1024 ;
56
56
}
57
57
58
- // Buckets for Host use a minimum of the cache line size of 64 bytes.
59
- // This prevents two separate allocations residing in the same cache line.
60
- // Buckets for Device and Shared allocations will use starting size of 512.
61
- // This is because memory compression on newer GPUs makes the
62
- // minimum granularity 512 bytes instead of 64.
63
- static constexpr size_t MinBucketSize[SystemMemory::All] = {64 , 512 , 512 , 512 };
64
-
65
58
// The largest size which is allocated via the allocator.
66
59
// Allocations with size > CutOff bypass the USM allocator and
67
60
// go directly to the runtime.
@@ -72,46 +65,63 @@ static sycl::detail::SpinLock PoolLock;
72
65
73
66
static class SetLimits {
74
67
public:
68
+ // String names of memory types for printing in limits traces.
69
+ static constexpr const char *MemTypeNames[MemType::All] = {
70
+ " Host" , " Device" , " Shared" , " SharedReadOnly" };
71
+
75
72
// Minimum allocation size that will be requested from the system.
76
73
// By default this is the minimum allocation size of each memory type.
77
- size_t SlabMinSize[SystemMemory ::All] = {};
74
+ size_t SlabMinSize[MemType ::All] = {};
78
75
79
76
// Allocations up to this limit will be subject to chunking/pooling
80
- size_t MaxPoolableSize[SystemMemory ::All] = {};
77
+ size_t MaxPoolableSize[MemType ::All] = {};
81
78
82
79
// When pooling, each bucket will hold a max of 4 unfreed slabs
83
- size_t Capacity[SystemMemory::All] = {};
80
+ size_t Capacity[MemType::All] = {};
81
+
82
+ // Holds the minimum bucket size valid for allocation of a memory type.
83
+ size_t MinBucketSize[MemType::All] = {};
84
84
85
85
// Maximum memory left unfreed in pool
86
86
size_t MaxPoolSize = 16_MB;
87
87
88
88
size_t CurPoolSize = 0 ;
89
- size_t CurPoolSizes[SystemMemory ::All] = {0 , 0 , 0 , 0 };
89
+ size_t CurPoolSizes[MemType ::All] = {};
90
90
91
91
size_t EnableBuffers = 1 ;
92
92
93
93
// Whether to print pool usage statistics
94
94
int PoolTrace = 0 ;
95
95
96
96
SetLimits () {
97
+ // Buckets for Host use a minimum of the cache line size of 64 bytes.
98
+ // This prevents two separate allocations residing in the same cache line.
99
+ // Buckets for Device and Shared allocations will use starting size of 512.
100
+ // This is because memory compression on newer GPUs makes the
101
+ // minimum granularity 512 bytes instead of 64.
102
+ MinBucketSize[MemType::Host] = 64 ;
103
+ MinBucketSize[MemType::Device] = 512 ;
104
+ MinBucketSize[MemType::Shared] = 512 ;
105
+ MinBucketSize[MemType::SharedReadOnly] = 512 ;
106
+
97
107
// Initialize default pool settings.
98
- MaxPoolableSize[SystemMemory ::Host] = 2_MB;
99
- Capacity[SystemMemory ::Host] = 4 ;
100
- SlabMinSize[SystemMemory ::Host] = 64_KB;
108
+ MaxPoolableSize[MemType ::Host] = 2_MB;
109
+ Capacity[MemType ::Host] = 4 ;
110
+ SlabMinSize[MemType ::Host] = 64_KB;
101
111
102
- MaxPoolableSize[SystemMemory ::Device] = 4_MB;
103
- Capacity[SystemMemory ::Device] = 4 ;
104
- SlabMinSize[SystemMemory ::Device] = 64_KB;
112
+ MaxPoolableSize[MemType ::Device] = 4_MB;
113
+ Capacity[MemType ::Device] = 4 ;
114
+ SlabMinSize[MemType ::Device] = 64_KB;
105
115
106
116
// Disable pooling of shared USM allocations.
107
- MaxPoolableSize[SystemMemory ::Shared] = 0 ;
108
- Capacity[SystemMemory ::Shared] = 0 ;
109
- SlabMinSize[SystemMemory ::Shared] = 2_MB;
117
+ MaxPoolableSize[MemType ::Shared] = 0 ;
118
+ Capacity[MemType ::Shared] = 0 ;
119
+ SlabMinSize[MemType ::Shared] = 2_MB;
110
120
111
121
// Allow pooling of shared allocations that are only modified on host.
112
- MaxPoolableSize[SystemMemory ::SharedReadOnly] = 4_MB;
113
- Capacity[SystemMemory ::SharedReadOnly] = 4 ;
114
- SlabMinSize[SystemMemory ::SharedReadOnly] = 64_KB ;
122
+ MaxPoolableSize[MemType ::SharedReadOnly] = 4_MB;
123
+ Capacity[MemType ::SharedReadOnly] = 4 ;
124
+ SlabMinSize[MemType ::SharedReadOnly] = 2_MB ;
115
125
116
126
// Parse optional parameters of this form:
117
127
// SYCL_PI_LEVEL_ZERO_USM_ALLOCATOR=[EnableBuffers][;[MaxPoolSize][;memtypelimits]...]
@@ -180,47 +190,48 @@ static class SetLimits {
180
190
return More;
181
191
};
182
192
183
- auto MemParser = [=](std::string &Params, SystemMemory:: MemType M) {
193
+ auto MemParser = [=](std::string &Params, MemType M) {
184
194
bool ParamWasSet;
185
- SystemMemory:: MemType LM = M;
186
- if (M == SystemMemory ::All)
187
- LM = SystemMemory ::Host;
195
+ MemType LM = M;
196
+ if (M == MemType ::All)
197
+ LM = MemType ::Host;
188
198
189
199
bool More = ParamParser (Params, MaxPoolableSize[LM], ParamWasSet);
190
- if (ParamWasSet && M == SystemMemory::All) {
191
- MaxPoolableSize[SystemMemory::Shared] =
192
- MaxPoolableSize[SystemMemory::Device] =
193
- MaxPoolableSize[SystemMemory::Host];
200
+ if (ParamWasSet && M == MemType::All) {
201
+ MaxPoolableSize[MemType::Shared] = MaxPoolableSize[MemType::Device] =
202
+ MaxPoolableSize[MemType::Host];
194
203
}
195
204
if (More) {
196
205
More = ParamParser (Params, Capacity[LM], ParamWasSet);
197
- if (ParamWasSet && M == SystemMemory ::All) {
198
- Capacity[SystemMemory ::Shared] = Capacity[SystemMemory ::Device] =
199
- Capacity[SystemMemory ::Host];
206
+ if (ParamWasSet && M == MemType ::All) {
207
+ Capacity[MemType ::Shared] = Capacity[MemType ::Device] =
208
+ Capacity[MemType ::Host];
200
209
}
201
210
}
202
211
if (More) {
203
212
ParamParser (Params, SlabMinSize[LM], ParamWasSet);
204
- if (ParamWasSet && M == SystemMemory::All) {
205
- SlabMinSize[SystemMemory::Shared] =
206
- SlabMinSize[SystemMemory::Device] =
207
- SlabMinSize[SystemMemory::Host];
213
+ if (ParamWasSet && M == MemType::All) {
214
+ SlabMinSize[MemType::Shared] = SlabMinSize[MemType::Device] =
215
+ SlabMinSize[MemType::Host];
208
216
}
209
217
}
210
218
};
211
219
212
220
auto MemTypeParser = [=](std::string &Params) {
213
221
int Pos = 0 ;
214
- SystemMemory:: MemType M = SystemMemory ::All;
222
+ MemType M = MemType ::All;
215
223
if (Params.compare (0 , 5 , " host:" ) == 0 ) {
216
224
Pos = 5 ;
217
- M = SystemMemory ::Host;
225
+ M = MemType ::Host;
218
226
} else if (Params.compare (0 , 7 , " device:" ) == 0 ) {
219
227
Pos = 7 ;
220
- M = SystemMemory ::Device;
228
+ M = MemType ::Device;
221
229
} else if (Params.compare (0 , 7 , " shared:" ) == 0 ) {
222
230
Pos = 7 ;
223
- M = SystemMemory::Shared;
231
+ M = MemType::Shared;
232
+ } else if (Params.compare (0 , 17 , " read_only_shared:" ) == 0 ) {
233
+ Pos = 17 ;
234
+ M = MemType::SharedReadOnly;
224
235
}
225
236
if (Pos > 0 )
226
237
Params.erase (0 , Pos);
@@ -274,20 +285,27 @@ static class SetLimits {
274
285
return ;
275
286
276
287
std::cout << " USM Pool Settings (Built-in or Adjusted by Environment "
277
- " Variable)\n " ;
288
+ " Variable)"
289
+ << std::endl;
278
290
279
291
std::cout << std::setw (15 ) << " Parameter" << std::setw (12 ) << " Host"
280
- << std::setw (12 ) << " Device" << std::setw (12 ) << " Shared"
281
- << std::endl;
292
+ << std::setw (12 ) << " Device" << std::setw (12 ) << " Shared RW "
293
+ << std::setw ( 12 ) << " Shared RO " << std:: endl;
282
294
std::cout << std::setw (15 ) << " SlabMinSize" << std::setw (12 )
283
- << SlabMinSize[0 ] << std::setw (12 ) << SlabMinSize[1 ]
284
- << std::setw (12 ) << SlabMinSize[2 ] << std::endl;
295
+ << SlabMinSize[MemType::Host] << std::setw (12 )
296
+ << SlabMinSize[MemType::Device] << std::setw (12 )
297
+ << SlabMinSize[MemType::Shared] << std::setw (12 )
298
+ << SlabMinSize[MemType::SharedReadOnly] << std::endl;
285
299
std::cout << std::setw (15 ) << " MaxPoolableSize" << std::setw (12 )
286
- << MaxPoolableSize[0 ] << std::setw (12 ) << MaxPoolableSize[1 ]
287
- << std::setw (12 ) << MaxPoolableSize[2 ] << std::endl;
288
- std::cout << std::setw (15 ) << " Capacity" << std::setw (12 ) << Capacity[0 ]
289
- << std::setw (12 ) << Capacity[1 ] << std::setw (12 ) << Capacity[2 ]
290
- << std::endl;
300
+ << MaxPoolableSize[MemType::Host] << std::setw (12 )
301
+ << MaxPoolableSize[MemType::Device] << std::setw (12 )
302
+ << MaxPoolableSize[MemType::Shared] << std::setw (12 )
303
+ << MaxPoolableSize[MemType::SharedReadOnly] << std::endl;
304
+ std::cout << std::setw (15 ) << " Capacity" << std::setw (12 )
305
+ << Capacity[MemType::Host] << std::setw (12 )
306
+ << Capacity[MemType::Device] << std::setw (12 )
307
+ << Capacity[MemType::Shared] << std::setw (12 )
308
+ << Capacity[MemType::SharedReadOnly] << std::endl;
291
309
std::cout << std::setw (15 ) << " MaxPoolSize" << std::setw (12 ) << MaxPoolSize
292
310
<< std::endl;
293
311
std::cout << std::setw (15 ) << " EnableBuffers" << std::setw (12 )
@@ -299,9 +317,6 @@ static class SetLimits {
299
317
300
318
using namespace settings ;
301
319
302
- static const char *MemTypeNames[SystemMemory::All] = {
303
- " Host" , " Device" , " Shared" , " SharedReadOnly" };
304
-
305
320
// Aligns the pointer down to the specified alignment
306
321
// (e.g. returns 8 for Size = 13, Alignment = 8)
307
322
static void *AlignPtrDown (void *Ptr , const size_t Alignment) {
@@ -468,7 +483,7 @@ class Bucket {
468
483
469
484
SystemMemory &getMemHandle ();
470
485
471
- SystemMemory:: MemType getMemType ();
486
+ MemType getMemType ();
472
487
473
488
USMAllocContext::USMAllocImpl &getUsmAllocCtx () { return OwnAllocCtx; }
474
489
@@ -500,7 +515,7 @@ class Bucket {
500
515
void updateStats (int InUse, int InPool);
501
516
502
517
// Print bucket statistics
503
- void printStats (bool &TitlePrinted, SystemMemory:: MemType MT);
518
+ void printStats (bool &TitlePrinted, MemType MT);
504
519
505
520
private:
506
521
void onFreeChunk (Slab &, bool &ToPool);
@@ -534,7 +549,7 @@ class USMAllocContext::USMAllocImpl {
534
549
535
550
// Generate buckets sized such as: 64, 96, 128, 192, ..., CutOff.
536
551
// Powers of 2 and the value halfway between the powers of 2.
537
- auto Size1 = MinBucketSize[MemHandle->getMemType ()];
552
+ auto Size1 = USMSettings. MinBucketSize [MemHandle->getMemType ()];
538
553
auto Size2 = Size1 + Size1 / 2 ;
539
554
for (; Size2 < CutOff; Size1 *= 2 , Size2 *= 2 ) {
540
555
Buckets.push_back (std::make_unique<Bucket>(Size1, *this ));
@@ -559,7 +574,7 @@ class USMAllocContext::USMAllocImpl {
559
574
};
560
575
561
576
void printStats (bool &TitlePrinted, size_t &HighBucketSize,
562
- size_t &HighPeakSlabsInUse, SystemMemory:: MemType MT);
577
+ size_t &HighPeakSlabsInUse, MemType MT);
563
578
564
579
private:
565
580
Bucket &findBucket (size_t Size );
@@ -859,9 +874,7 @@ bool Bucket::CanPool(bool &ToPool) {
859
874
860
875
SystemMemory &Bucket::getMemHandle () { return OwnAllocCtx.getMemHandle (); }
861
876
862
- SystemMemory::MemType Bucket::getMemType () {
863
- return getMemHandle ().getMemType ();
864
- }
877
+ MemType Bucket::getMemType () { return getMemHandle ().getMemType (); }
865
878
866
879
size_t Bucket::SlabMinSize () { return USMSettings.SlabMinSize [getMemType ()]; }
867
880
@@ -902,10 +915,10 @@ void Bucket::updateStats(int InUse, int InPool) {
902
915
USMSettings.CurPoolSizes [getMemType ()] += InPool * SlabAllocSize ();
903
916
}
904
917
905
- void Bucket::printStats (bool &TitlePrinted, SystemMemory:: MemType MT) {
918
+ void Bucket::printStats (bool &TitlePrinted, MemType MT) {
906
919
if (allocCount) {
907
920
if (!TitlePrinted) {
908
- auto Label = MemTypeNames[MT];
921
+ auto Label = USMSettings. MemTypeNames [MT];
909
922
std::cout << Label << " memory statistics\n " ;
910
923
std::cout << std::setw (14 ) << " Bucket Size" << std::setw (12 ) << " Allocs"
911
924
<< std::setw (12 ) << " Frees" << std::setw (18 )
@@ -1046,9 +1059,9 @@ void *USMAllocContext::allocate(size_t size) {
1046
1059
1047
1060
if (USMSettings.PoolTrace > 2 ) {
1048
1061
auto MT = pImpl->getMemHandle ().getMemType ();
1049
- std::cout << " Allocated " << std::setw (8 ) << size << " " << MemTypeNames[MT]
1050
- << " USM bytes from " << (FromPool ? " Pool " : " USM" ) << " -> "
1051
- << Ptr << std::endl;
1062
+ std::cout << " Allocated " << std::setw (8 ) << size << " "
1063
+ << USMSettings. MemTypeNames [MT] << " USM bytes from "
1064
+ << (FromPool ? " Pool " : " USM " ) << " -> " << Ptr << std::endl;
1052
1065
}
1053
1066
return Ptr ;
1054
1067
}
@@ -1059,9 +1072,10 @@ void *USMAllocContext::allocate(size_t size, size_t alignment) {
1059
1072
1060
1073
if (USMSettings.PoolTrace > 2 ) {
1061
1074
auto MT = pImpl->getMemHandle ().getMemType ();
1062
- std::cout << " Allocated " << std::setw (8 ) << size << " " << MemTypeNames[MT]
1063
- << " USM bytes aligned at " << alignment << " from "
1064
- << (FromPool ? " Pool" : " USM" ) << " ->" << Ptr << std::endl;
1075
+ std::cout << " Allocated " << std::setw (8 ) << size << " "
1076
+ << USMSettings.MemTypeNames [MT] << " USM bytes aligned at "
1077
+ << alignment << " from " << (FromPool ? " Pool" : " USM" ) << " ->"
1078
+ << Ptr << std::endl;
1065
1079
}
1066
1080
return Ptr ;
1067
1081
}
@@ -1072,12 +1086,14 @@ void USMAllocContext::deallocate(void *ptr, bool OwnZeMemHandle) {
1072
1086
1073
1087
if (USMSettings.PoolTrace > 2 ) {
1074
1088
auto MT = pImpl->getMemHandle ().getMemType ();
1075
- std::cout << " Freed " << MemTypeNames[MT] << " USM " << ptr << " to "
1076
- << (ToPool ? " Pool" : " USM" ) << " , Current total pool size "
1077
- << USMSettings.CurPoolSize << " , Current pool sizes ["
1078
- << USMSettings.CurPoolSizes [SystemMemory::Host] << " , "
1079
- << USMSettings.CurPoolSizes [SystemMemory::Device] << " , "
1080
- << USMSettings.CurPoolSizes [SystemMemory::Shared] << " ]\n " ;
1089
+ std::cout << " Freed " << USMSettings.MemTypeNames [MT] << " USM " << ptr
1090
+ << " to " << (ToPool ? " Pool" : " USM" )
1091
+ << " , Current total pool size " << USMSettings.CurPoolSize
1092
+ << " , Current pool sizes ["
1093
+ << USMSettings.CurPoolSizes [MemType::Host] << " , "
1094
+ << USMSettings.CurPoolSizes [MemType::Device] << " , "
1095
+ << USMSettings.CurPoolSizes [MemType::Shared] << " , "
1096
+ << USMSettings.CurPoolSizes [MemType::SharedReadOnly] << " ]\n " ;
1081
1097
}
1082
1098
return ;
1083
1099
}
@@ -1088,11 +1104,11 @@ USMAllocContext::~USMAllocContext() {
1088
1104
size_t HighBucketSize;
1089
1105
size_t HighPeakSlabsInUse;
1090
1106
if (USMSettings.PoolTrace > 1 ) {
1091
- SystemMemory:: MemType MT = pImpl->getMemHandle ().getMemType ();
1107
+ MemType MT = pImpl->getMemHandle ().getMemType ();
1092
1108
pImpl->printStats (TitlePrinted, HighBucketSize, HighPeakSlabsInUse, MT);
1093
1109
if (TitlePrinted) {
1094
1110
std::cout << " Current Pool Size " << USMSettings.CurPoolSize << std::endl;
1095
- const char *Label = MemTypeNames[MT];
1111
+ const char *Label = USMSettings. MemTypeNames [MT];
1096
1112
std::cout << " Suggested Setting: SYCL_PI_LEVEL_ZERO_USM_ALLOCATOR=;"
1097
1113
<< std::string (1 , tolower (*Label)) << std::string (Label + 1 )
1098
1114
<< " :" << HighBucketSize << " ," << HighPeakSlabsInUse << " ,64K"
@@ -1104,7 +1120,7 @@ USMAllocContext::~USMAllocContext() {
1104
1120
void USMAllocContext::USMAllocImpl::printStats (bool &TitlePrinted,
1105
1121
size_t &HighBucketSize,
1106
1122
size_t &HighPeakSlabsInUse,
1107
- SystemMemory:: MemType MT) {
1123
+ MemType MT) {
1108
1124
HighBucketSize = 0 ;
1109
1125
HighPeakSlabsInUse = 0 ;
1110
1126
for (auto &B : Buckets) {
0 commit comments