Skip to content

Commit 28fe2d3

Browse files
cyyeverpytorchmergebot
authored andcommitted
Fix clang-tidy warnings on c10/xpu files (pytorch#169231)
This PR fixes clang-tidy warnings on c10/xpu files. Pull Request resolved: pytorch#169231 Approved by: https://github.com/guangyey, https://github.com/EikanWang, https://github.com/albanD
1 parent b870068 commit 28fe2d3

File tree

5 files changed

+44
-39
lines changed

5 files changed

+44
-39
lines changed

c10/xpu/XPUCachingAllocator.cpp

Lines changed: 19 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ struct BlockPool {
3434

3535
std::set<Block*, Comparison> blocks;
3636
std::set<Block*, Comparison> unmapped;
37+
// NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members)
3738
const bool is_small;
3839
PrivatePool* owner_PrivatePool;
3940
};
@@ -63,19 +64,14 @@ struct Block {
6364
void* ptr)
6465
: device(device),
6566
queue(queue),
66-
stream_uses(),
6767
size(size),
6868
requested_size(0),
6969
pool(pool),
7070
ptr(ptr) {}
7171

7272
// constructor for search key
7373
Block(DeviceIndex device, sycl::queue* queue, size_t size)
74-
: device(device),
75-
queue(queue),
76-
stream_uses(),
77-
size(size),
78-
requested_size(0) {}
74+
: device(device), queue(queue), size(size), requested_size(0) {}
7975

8076
bool is_split() const {
8177
return (prev != nullptr) || (next != nullptr);
@@ -142,7 +138,8 @@ struct ExpandableSegment {
142138
// The extra 1/8 allows flexibility for remapping or moving pages within the
143139
// segment when unmapping earlier regions.
144140
constexpr float kVirtualMemOversubscriptFactor = 1.125f; // 1 + 1/8
145-
max_handles_ = numSegments(device_total * kVirtualMemOversubscriptFactor);
141+
max_handles_ = numSegments(static_cast<size_t>(
142+
static_cast<float>(device_total) * kVirtualMemOversubscriptFactor));
146143
ptr_ = sycl::ext::oneapi::experimental::reserve_virtual_mem(
147144
segment_size_ * max_handles_, xpu::get_device_context());
148145
}
@@ -168,15 +165,16 @@ struct ExpandableSegment {
168165
// Allocate and map physical memory for each segment.
169166
for (const auto i : c10::irange(begin, end)) {
170167
TORCH_INTERNAL_ASSERT(!handles_.at(i));
168+
auto& handle = handles_.at(i);
171169
try {
172170
// Allocate physical memory for each segment. Construct the physical_mem
173171
// in-place to avoid copies.
174-
handles_.at(i).emplace(
172+
auto& mem = handle.emplace(
175173
xpu::get_raw_device(device_),
176174
xpu::get_device_context(),
177175
segment_size_);
178176
// Map the allocated physical memory into the virtual address space.
179-
handles_.at(i).value().map(
177+
mem.map(
180178
ptr_ + i * segment_size_,
181179
segment_size_,
182180
sycl::ext::oneapi::experimental::address_access_mode::read_write);
@@ -187,13 +185,14 @@ struct ExpandableSegment {
187185
// Note: constructing physical_mem may over-subscribe device memory but
188186
// not immediately trigger OOM. The actual OOM can occur during map().
189187
// Roll back all segments allocated or mapped in this operation.
190-
handles_.at(i) = std::nullopt;
188+
handle.reset();
191189
for (const auto j : c10::irange(begin, i)) {
192190
sycl::ext::oneapi::experimental::unmap(
191+
// NOLINTNEXTLINE(performance-no-int-to-ptr)
193192
reinterpret_cast<void*>(ptr_ + segment_size_ * j),
194193
segment_size_,
195194
xpu::get_device_context());
196-
handles_.at(j) = std::nullopt;
195+
handles_.at(j).reset();
197196
}
198197
trimHandles();
199198
return rangeFromHandles(begin, begin);
@@ -245,6 +244,7 @@ struct ExpandableSegment {
245244
// ranges. Users must explicitly call unmap on all ranges before
246245
// destroying the physical_mem object.
247246
sycl::ext::oneapi::experimental::unmap(
247+
// NOLINTNEXTLINE(performance-no-int-to-ptr)
248248
reinterpret_cast<void*>(ptr_ + segment_size_ * i),
249249
segment_size_,
250250
xpu::get_device_context());
@@ -318,9 +318,9 @@ struct ExpandableSegment {
318318
size_t max_handles_{0};
319319
// Physical memory handles for the segments.
320320
std::vector<std::optional<sycl::ext::oneapi::experimental::physical_mem>>
321-
handles_{};
321+
handles_;
322322
// Peer devices on which this memory could be accessible, reserved.
323-
std::vector<c10::DeviceIndex> peers_{};
323+
std::vector<c10::DeviceIndex> peers_;
324324
};
325325

326326
struct AllocParams {
@@ -330,10 +330,7 @@ struct AllocParams {
330330
sycl::queue* queue,
331331
BlockPool* pool,
332332
size_t alloc_size)
333-
: search_key(device, queue, size),
334-
pool(pool),
335-
alloc_size(alloc_size),
336-
block(nullptr) {}
333+
: search_key(device, queue, size), pool(pool), alloc_size(alloc_size) {}
337334

338335
DeviceIndex device() const {
339336
return search_key.device;
@@ -350,7 +347,7 @@ struct AllocParams {
350347
Block search_key;
351348
BlockPool* pool;
352349
size_t alloc_size;
353-
Block* block;
350+
Block* block{nullptr};
354351
StatTypes stat_types = {};
355352
};
356353

@@ -987,7 +984,7 @@ class DeviceCachingAllocator {
987984
}
988985

989986
Block* alloc_found_block(
990-
AllocParams params,
987+
const AllocParams& params,
991988
size_t orig_size,
992989
bool split_remainder) {
993990
auto size = params.size();
@@ -1151,7 +1148,7 @@ class DeviceCachingAllocator {
11511148
" Please use `empty_cache` to release all unoccupied cached memory.");
11521149
}
11531150
bool split_remainder = should_split(params.block, params.size());
1154-
return alloc_found_block(std::move(params), orig_size, split_remainder);
1151+
return alloc_found_block(params, orig_size, split_remainder);
11551152
}
11561153

11571154
void free(Block* block) {
@@ -1254,7 +1251,8 @@ class DeviceCachingAllocator {
12541251
const auto device_total =
12551252
xpu::get_raw_device(device_index)
12561253
.get_info<sycl::info::device::global_mem_size>();
1257-
allowed_memory_maximum = static_cast<size_t>(fraction * device_total);
1254+
allowed_memory_maximum =
1255+
static_cast<size_t>(fraction * static_cast<double>(device_total));
12581256
set_fraction = true;
12591257
}
12601258

c10/xpu/XPUException.h

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,18 +5,19 @@
55

66
namespace c10::xpu {
77

8-
static inline sycl::async_handler asyncHandler = [](sycl::exception_list el) {
9-
if (el.size() == 0) {
10-
return;
11-
}
12-
for (const auto& e : el) {
13-
try {
14-
std::rethrow_exception(e);
15-
} catch (sycl::exception& e) {
16-
TORCH_WARN("SYCL Exception: ", e.what());
17-
}
18-
}
19-
throw;
20-
};
8+
static inline sycl::async_handler asyncHandler =
9+
[](const sycl::exception_list& el) {
10+
if (el.size() == 0) {
11+
return;
12+
}
13+
for (const auto& e : el) {
14+
try {
15+
std::rethrow_exception(e);
16+
} catch (sycl::exception& e) {
17+
TORCH_WARN("SYCL Exception: ", e.what());
18+
}
19+
}
20+
throw;
21+
};
2122

2223
} // namespace c10::xpu

c10/xpu/XPUStream.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55

66
#include <atomic>
77
#include <deque>
8-
#include <mutex>
98
#include <vector>
109

1110
namespace c10::xpu {
@@ -30,6 +29,7 @@ std::deque<
3029
std::array<std::atomic<uint32_t>, max_compile_time_stream_priorities>>
3130
priority_counters;
3231

32+
// NOLINTNEXTLINE(*c-arrays)
3333
thread_local std::unique_ptr<StreamId[]> current_streams = nullptr;
3434

3535
/*
@@ -174,6 +174,7 @@ void initXPUStreamsOnce() {
174174
// Inits current streams (thread local) to the last queue in the "normal
175175
// priority" queue pool. Note: the queue pool have not been initialized yet.
176176
// It will be initialized in initDeviceStreamState for the specified device.
177+
// NOLINTNEXTLINE(*c-arrays)
177178
current_streams = std::make_unique<StreamId[]>(num_gpus);
178179
for (const auto i : c10::irange(num_gpus)) {
179180
// Assigning the current stream to the last one in the pool can be
@@ -238,9 +239,11 @@ sycl::queue& XPUStream::queue() const {
238239
switch (st) {
239240
case StreamIdType::NORMAL:
240241
case StreamIdType::HIGH:
242+
// NOLINTNEXTLINE(performance-no-int-to-ptr)
241243
return *streams[device_index][static_cast<uint8_t>(st)][si];
242244
// See Note [External XPU Stream]
243245
case StreamIdType::EXT:
246+
// NOLINTNEXTLINE(performance-no-int-to-ptr)
244247
return *(reinterpret_cast<sycl::queue*>(stream_id));
245248
default:
246249
TORCH_CHECK(

c10/xpu/XPUStream.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ class C10_XPU_API XPUStream {
4444
}
4545

4646
/// Construct a XPUStream from a Stream with no error checking.
47-
explicit XPUStream(Unchecked, Stream stream) : stream_(stream) {}
47+
explicit XPUStream(Unchecked /*unused*/, Stream stream) : stream_(stream) {}
4848

4949
bool operator==(const XPUStream& other) const noexcept {
5050
return unwrap() == other.unwrap();

c10/xpu/test/impl/XPUStreamTest.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ TEST(XPUStreamTest, CopyAndMoveTest) {
2020
return;
2121
}
2222

23-
int32_t device = -1;
23+
c10::DeviceIndex device = -1;
2424
sycl::queue queue;
2525
c10::xpu::XPUStream copyStream = c10::xpu::getStreamFromPool();
2626
{
@@ -119,8 +119,10 @@ TEST(XPUStreamTest, MultithreadStreamBehavior) {
119119

120120
c10::xpu::XPUStream cur_stream = c10::xpu::getCurrentXPUStream();
121121

122-
EXPECT_NE(cur_stream, *s0);
123-
EXPECT_NE(cur_stream, *s1);
122+
EXPECT_TRUE(s0);
123+
EXPECT_TRUE(s1);
124+
EXPECT_NE(cur_stream, s0);
125+
EXPECT_NE(cur_stream, s1);
124126
EXPECT_NE(s0, s1);
125127
}
126128

@@ -167,6 +169,7 @@ TEST(XPUStreamTest, StreamFunction) {
167169
}
168170

169171
constexpr int numel = 1024;
172+
// NOLINTNEXTLINE(*-c-arrays)
170173
int hostData[numel];
171174
initHostData(hostData, numel);
172175

0 commit comments

Comments
 (0)