Skip to content

Commit 2258c1e

Browse files
Chamberlain0w0YdrMaster
authored andcommitted
fix: 解决一些warning,并把sync操作从算子内部移除
1 parent 8bde8c1 commit 2258c1e

File tree

35 files changed

+67
-65
lines changed

35 files changed

+67
-65
lines changed

src/04kernel/src/kernels/batch_normalization/cnnl_kernel.cc

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -128,8 +128,8 @@ namespace refactor::kernel {
128128
auto y = outputs[0];
129129

130130
void *xTrans = workspace;
131-
void *yTrans = xTrans + xTransSize;
132-
void *cursor = yTrans + xTransSize;
131+
void *yTrans = reinterpret_cast<uint8_t *>(xTrans) + xTransSize;
132+
void *cursor = reinterpret_cast<uint8_t *>(yTrans) + xTransSize;
133133

134134
// transpose NCHW input to NHWC
135135
CNNL_ASSERT(cnnlTranspose_v2(handle, d->NCHW2NHWC, d->inDesc, x,
@@ -147,7 +147,6 @@ namespace refactor::kernel {
147147
CNNL_ASSERT(cnnlTranspose_v2(handle, d->NHWC2NCHW, d->inDescTrans, yTrans,
148148
d->inDesc, y, cursor, workspaceSize));
149149

150-
BANG_ASSERT(cnrtQueueSync(res.fetchOrStore<CnnlContext>()->queue));
151150
};
152151

153152
return {std::move(routine), totalWorkspaceSize};

src/04kernel/src/kernels/cast/cnnl_kernel.cc

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,6 @@ namespace refactor::kernel {
6565
return [d = std::move(d)](Resources &res, void *workspace, void const *const *inputs, void *const *outputs) {
6666
CNNL_ASSERT(cnnlCastDataType(res.fetchOrStore<CnnlContext>()->handle,
6767
d->inDesc, inputs[0], d->cast, d->outDesc, outputs[0]));
68-
// BANG_ASSERT(cnrtQueueSync(res.fetchOrStore<CnnlContext>()->queue));
6968
};
7069
}
7170

src/04kernel/src/kernels/clip/cnnl_kernel.cc

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,6 @@ namespace refactor::kernel {
5757
CNNL_POINTER_MODE_DEVICE, d->t,
5858
inputs[0], inputs[1], hasMax ? inputs[2] : nullptr,
5959
d->t, outputs[0]));
60-
BANG_ASSERT(cnrtQueueSync(res.fetchOrStore<CnnlContext>()->queue));
6160
};
6261
}
6362

src/04kernel/src/kernels/concat/cnnl_kernel.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ namespace refactor::kernel {
5252
}
5353
~Descriptors() noexcept(false) {
5454
CNNL_ASSERT(cnnlDestroyTensorDescriptor(in));
55-
for (auto i = 0; i < out.size(); i++) {
55+
for (size_t i = 0; i < out.size(); i++) {
5656
CNNL_ASSERT(cnnlDestroyTensorDescriptor(out[i]));
5757
}
5858
}
@@ -62,7 +62,7 @@ namespace refactor::kernel {
6262
};
6363
auto d = std::make_shared<Descriptors>(info.num, info.dataType != DT::F64);
6464
setCnnlTensor(d->in, info.dataType, slice(info.inDim.data(), info.inDim.size()));
65-
for (auto i = 0; i < info.outDims.size(); i++) {
65+
for (size_t i = 0; i < info.outDims.size(); i++) {
6666
setCnnlTensor(d->out[i], info.dataType, slice(info.outDims[i].data(), info.outDims[i].size()));
6767
}
6868

src/04kernel/src/kernels/conv/cnnl_kernel.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -209,9 +209,9 @@ namespace refactor::kernel {
209209
// }
210210

211211
void *xTrans = workspace;
212-
void *wTrans = xTrans + xTransSize;
213-
void *yTrans = wTrans + wTransSize;
214-
void *opWorkspace = yTrans + yTransSize;
212+
void *wTrans = reinterpret_cast<uint8_t *>(xTrans) + xTransSize;
213+
void *yTrans = reinterpret_cast<uint8_t *>(wTrans) + wTransSize;
214+
void *opWorkspace = reinterpret_cast<uint8_t *>(yTrans) + yTransSize;
215215

216216
// transpose NCHW input to NHWC
217217
CNNL_ASSERT(cnnlTranspose_v2(handle, d->NCHW2NHWC, d->x, x,

src/04kernel/src/kernels/expand/cnnl_kernel.cc

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,6 @@ namespace refactor::kernel {
6060
return [d = std::move(d)](Resources &res, void *workspace, void const *const *inputs, void *const *outputs) {
6161
CNNL_ASSERT(cnnlExpand(res.fetchOrStore<CnnlContext>()->handle,
6262
d->inDesc, inputs[0], d->outDesc, outputs[0]));
63-
// BANG_ASSERT(cnrtQueueSync(res.fetchOrStore<CnnlContext>()->queue));
6463
};
6564
}
6665
#endif

src/04kernel/src/kernels/gather/cnnl_kernel.cc

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,6 @@ namespace refactor::kernel {
7979
d->inDesc, inputs[0], reinterpret_cast<const int *>(workspace),
8080
d->indexDesc, reinterpret_cast<const int *>(inputs[1]),
8181
d->outDesc, outputs[0]));
82-
BANG_ASSERT(cnrtQueueSync(res.fetchOrStore<CnnlContext>()->queue));
8382
};
8483

8584
return {std::move(routine), workspaceSize};

src/04kernel/src/kernels/mat_mul/cnnl_kernel.cc

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,6 @@ namespace refactor::kernel {
141141
workspace, algoWorkspaceSize));
142142
}
143143

144-
BANG_ASSERT(cnrtQueueSync(res.fetchOrStore<CnnlContext>()->queue));
145144
};
146145

147146
return {std::move(routine), algoWorkspaceSize};

src/04kernel/src/kernels/pool/cnnl_kernel.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ namespace refactor::kernel {
130130
auto handle = res.fetchOrStore<CnnlContext>()->handle;
131131

132132
void *extraInputDev = workspace;
133-
void *poolWorkSpace = workspace + extraInputSize;
133+
void *poolWorkSpace = reinterpret_cast<uint8_t *>(workspace) + extraInputSize;
134134

135135
void *extraInputHost = malloc(extraInputSize);
136136
CNNL_ASSERT(cnnlInitPoolingExtraInput(handle, d->pooling, d->x, d->y, extraInputHost));
@@ -145,7 +145,7 @@ namespace refactor::kernel {
145145
&b, extraInputDev, d->y, outputs[0],
146146
poolWorkSpace, workspaceSize));
147147

148-
BANG_ASSERT(cnrtQueueSync(res.fetchOrStore<CnnlContext>()->queue));
148+
res.fetchOrStore<CnnlContext>()->queueSync();
149149

150150
free(extraInputHost);
151151
};

src/04kernel/src/kernels/simple_binary/binary_cnnl.cc

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,6 @@ namespace refactor::kernel {
180180
workspace, workspaceSize));
181181
}
182182

183-
BANG_ASSERT(cnrtQueueSync(res.fetchOrStore<CnnlContext>()->queue));
184183
};
185184

186185
return {std::move(routine), workspaceSize};

src/04kernel/src/kernels/slice/cnnl_kernel.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ namespace refactor::kernel {
6464
CNNL_ASSERT(cnnlSetTensorDescriptor(d->in, CNNL_LAYOUT_NCHW, cnnlDataTypeConvert(info.dataType), info.inDim.size(), info.inDim.data()));
6565
CNNL_ASSERT(cnnlSetTensorDescriptor(d->out, CNNL_LAYOUT_NCHW, cnnlDataTypeConvert(info.dataType), info.outDim.size(), info.outDim.data()));
6666
std::vector<int> begin, end, stride;
67-
for (auto i = 0; i < info.dims.size(); i++) {
67+
for (size_t i = 0; i < info.dims.size(); i++) {
6868
// [begin, end), end is not inclued
6969
begin.push_back(info.dims[i].start);
7070
auto sign = info.dims[i].step > 0 ? 1 : -1;

src/04kernel/src/kernels/softmax/cnnl_kernel.cc

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,6 @@ namespace refactor::kernel {
8080
CNNL_COMPUTATION_ULTRAHIGH_PRECISION,
8181
&a, d->t, inputs[0],
8282
&b, d->t, outputs[0]));
83-
res.fetchOrStore<CnnlContext>()->queueSync();
8483
};
8584
}
8685

src/04kernel/src/kernels/split/cnnl_kernel.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ namespace refactor::kernel {
6969
}
7070
~Descriptors() noexcept(false) {
7171
CNNL_ASSERT(cnnlDestroyTensorDescriptor(in));
72-
for (auto i = 0; i < out.size(); i++) {
72+
for (size_t i = 0; i < out.size(); i++) {
7373
CNNL_ASSERT(cnnlDestroyTensorDescriptor(out[i]));
7474
}
7575
}
@@ -81,7 +81,7 @@ namespace refactor::kernel {
8181
// setCnnlTensor(d->in, info.dataType, slice(info.inDim.data(), info.inDim.size()));
8282
CNNL_ASSERT(cnnlSetTensorDescriptor(d->in, CNNL_LAYOUT_NCHW, cnnlDataTypeConvert(info.dataType), info.inDim.size(), info.inDim.data()));
8383

84-
for (auto i = 0; i < info.outDims.size(); i++) {
84+
for (size_t i = 0; i < info.outDims.size(); i++) {
8585
// setCnnlTensor(d->out[i], info.dataType, slice(info.outDims[i].data(), info.outDims[i].size()));
8686
CNNL_ASSERT(cnnlSetTensorDescriptor(d->out[i], CNNL_LAYOUT_NCHW, cnnlDataTypeConvert(info.dataType), info.outDims[i].size(), info.outDims[i].data()));
8787
}

src/04kernel/src/kernels/where/cnnl_kernel.cc

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,6 @@ namespace refactor::kernel {
102102
d->y, inputs[2], workspace, workspaceSize,
103103
d->ans, outputs[0]));
104104

105-
res.fetchOrStore<CnnlContext>()->queueSync();
106105
};
107106

108107
return {std::move(routine), workspaceSize};

src/04kernel/src/utilities/bang/cnrt_functions.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
#include <cnrt.h>
55
#include <cstdio>
66

7-
namespace refactor::kernel::cnnl {
7+
namespace refactor::kernel::bang {
88

99
int currentDevice() {
1010
int device;
@@ -22,6 +22,6 @@ namespace refactor::kernel::cnnl {
2222
CNRT_MEM_TRANS_DIR_DEV2HOST));
2323
}
2424

25-
}// namespace refactor::kernel::cnnl
25+
}// namespace refactor::kernel::bang
2626

2727
#endif

src/04kernel/src/utilities/bang/cnrt_functions.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,14 @@
33

44
#include "common.h"
55

6-
namespace refactor::kernel::cnnl {
6+
namespace refactor::kernel::bang {
77

88
int currentDevice();
99

1010
void sync();
1111

1212
void copyOut(void *dst, const void *src, size_t size);
1313

14-
}// namespace refactor::kernel::cnnl
14+
}// namespace refactor::kernel::bang
1515

1616
#endif// KERNEL_CNRT_FUNCTIONS_H

src/04kernel/test/kernels/batch_normalization/test_cnnl.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
#include "../../../src/kernels/batch_normalization/cnnl_kernel.hh"
44
#include "../../../src/kernels/batch_normalization/cpu_kernel.hh"
5+
#include "../src/utilities/bang/cnrt_functions.h"
56
#include "hardware/device_manager.h"
67
#include <gtest/gtest.h>
78

@@ -57,6 +58,7 @@ TEST(kernel, BatchNormalizationCnnl) {
5758
void const *inputs[]{*mluIn, *mluScale, *mluBias, *mluMean, *mluVar};
5859
void *outputs[]{*mluOut};
5960
rMlu(res, *workspace, inputs, outputs);
61+
kernel::bang::sync();
6062
}
6163
// take output data
6264
std::vector<float> result(outTensor->elementsSize());

src/04kernel/test/kernels/cast/test_cnnl.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
#ifdef USE_BANG
22

3-
#include "../../../src/kernels/cast/cpu_kernel.hh"
43
#include "../../../src/kernels/cast/cnnl_kernel.hh"
4+
#include "../../../src/kernels/cast/cpu_kernel.hh"
5+
#include "../src/utilities/bang/cnrt_functions.h"
56
#include "hardware/device_manager.h"
67
#include <gtest/gtest.h>
78
#include <numeric>
@@ -34,6 +35,7 @@ TEST(kernel, CastCnnl) {
3435
void const *inputs[]{*xMlu};
3536
void *outputs[]{*yMlu};
3637
routine(res, nullptr, inputs, outputs);
38+
kernel::bang::sync();
3739
}
3840
{
3941
void const *inputs[]{x_.data()};

src/04kernel/test/kernels/clip/test_cnnl.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
#ifdef USE_BANG
22

3-
#include "../../../src/kernels/clip/cpu_kernel.hh"
43
#include "../../../src/kernels/clip/cnnl_kernel.hh"
4+
#include "../../../src/kernels/clip/cpu_kernel.hh"
5+
#include "../src/utilities/bang/cnrt_functions.h"
56
#include "hardware/device_manager.h"
67
#include <gtest/gtest.h>
78
#include <numeric>
@@ -36,6 +37,7 @@ TEST(kernel, ClipCnnl) {
3637
void const *inputs[]{*mluMem, *mluMin, *mluMax};
3738
void *outputs[]{*mluMem};
3839
routine(res, nullptr, inputs, outputs);
40+
kernel::bang::sync();
3941
}
4042
{
4143
void const *inputs[]{value.data(), &min, &max};

src/04kernel/test/kernels/concat/test_cnnl.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
#ifdef USE_BANG
22

3-
#include "../../../src/kernels/concat/cpu_kernel.hh"
43
#include "../../../src/kernels/concat/cnnl_kernel.hh"
4+
#include "../../../src/kernels/concat/cpu_kernel.hh"
5+
#include "../src/utilities/bang/cnrt_functions.h"
56
#include "hardware/device_manager.h"
67
#include <gtest/gtest.h>
78
#include <numeric>
@@ -65,6 +66,7 @@ TEST(kernel, ConcatCnnl) {
6566
void const *inputs[]{*mluIns[0], *mluIns[1], *mluIns[2], *mluIns[3]};
6667
void *outputs[]{*mluOut};
6768
routine(res, *workspace, inputs, outputs);
69+
kernel::bang::sync();
6870
}
6971
{
7072
void const *inputs[]{cpuIns[0].data(), cpuIns[1].data(), cpuIns[2].data(), cpuIns[3].data()};

src/04kernel/test/kernels/conv/test_cnnl.cpp

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#ifdef USE_BANG
22

33
#include "../../../src/kernels/conv/cnnl_kernel.hh"
4+
#include "../src/utilities/bang/cnrt_functions.h"
45
#include "hardware/device_manager.h"
56
#include <gtest/gtest.h>
67
#include <numeric>
@@ -39,15 +40,7 @@ void testConvCnnl(int rank, const int64_t *pads, const int64_t *strides, const i
3940
void const *inputs[]{*xMlu, *wMlu};
4041
void *outputs[]{*yMlu};
4142
routine(res, *workspace, inputs, outputs);
42-
43-
xMlu->copyToHost(xData.data(), xTensor->bytesSize());
44-
wMlu->copyToHost(wData.data(), wTensor->bytesSize());
45-
// fmt::println("{}", vec2str(xData));
46-
// fmt::println("{}", vec2str(wData));
47-
48-
// std::vector<float> ws(workspaceSize);
49-
// workspace->copyToHost(ws.data(), workspaceSize);
50-
// fmt::println("{}", vec2str(ws));
43+
kernel::bang::sync();
5144

5245
// take output data
5346
std::vector<float> result(yTensor->elementsSize());

src/04kernel/test/kernels/expand/test_cnnl.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
#include "../../../src/kernels/expand/cnnl_kernel.hh"
44
#include "../../../src/kernels/expand/cpu_kernel.hh"
5+
#include "../src/utilities/bang/cnrt_functions.h"
56
#include "hardware/device_manager.h"
67
#include <gtest/gtest.h>
78
#include <numeric>
@@ -36,6 +37,7 @@ TEST(kernel, ExpandCnnl) {
3637
void const *inputs[]{*mluIn};
3738
void *outputs[]{*mluOut};
3839
routine(res, nullptr, inputs, outputs);
40+
kernel::bang::sync();
3941
}
4042
{
4143
void const *inputs[]{data.data()};

src/04kernel/test/kernels/gather/test_gather_cnnl.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
#include "../src/kernels/gather/cnnl_kernel.hh"
44
#include "../src/kernels/gather/cpu_kernel.hh"
5+
#include "../src/utilities/bang/cnrt_functions.h"
56
#include "hardware/device_manager.h"
67
#include <gtest/gtest.h>
78

@@ -39,6 +40,7 @@ TEST(kernel, GatherCnnl) {
3940
void const *inputs[]{*aMLU, *bMLU};
4041
void *outputs[]{*cMLU};
4142
cnnlRoutine(res, *workspace, inputs, outputs);
43+
kernel::bang::sync();
4244
}
4345
{
4446
void const *inputs[]{a.data(), b.data()};
@@ -81,6 +83,7 @@ TEST(kernel, GatherCnnl) {
8183
void const *inputs[]{*aMLU, *bMLU};
8284
void *outputs[]{*cMLU};
8385
cnnlRoutine(res, *workspace, inputs, outputs);
86+
kernel::bang::sync();
8487
}
8588
{
8689
void const *inputs[]{a.data(), b.data()};
@@ -110,7 +113,7 @@ TEST(kernel, GatherCnnl) {
110113
auto cpuRoutine = cpuKernel->lower(res).routine;
111114
// Init inputs and outputs
112115
std::vector<float> a;
113-
for (auto i = 0; i < data->elementsSize(); i++) {
116+
for (size_t i = 0; i < data->elementsSize(); i++) {
114117
a.push_back(i + 0.1f);
115118
}
116119
std::vector<int64_t> b(indices->elementsSize(), 0);
@@ -126,6 +129,7 @@ TEST(kernel, GatherCnnl) {
126129
void const *inputs[]{*aMLU, *bMLU};
127130
void *outputs[]{*cMLU};
128131
cnnlRoutine(res, *workspace, inputs, outputs);
132+
kernel::bang::sync();
129133
}
130134
{
131135
void const *inputs[]{a.data(), b.data()};

src/04kernel/test/kernels/mat_mul/test_cnnl.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
#include "../src/kernels/mat_mul/cnnl_kernel.hh"
44
#include "../src/kernels/mat_mul/cpu_kernel.hh"
5+
#include "../src/utilities/bang/cnrt_functions.h"
56
#include "hardware/device_manager.h"
67
#include <gtest/gtest.h>
78

@@ -48,6 +49,7 @@ TEST(kernel, MatMulCnnl_OnlyBias) {
4849
void const *inputs[]{*ma, *mb, *mc};
4950
void *outputs[]{*my};
5051
routine(res, *workspace, inputs, outputs);
52+
kernel::bang::sync();
5153
// take output data
5254
std::vector<float> result(Y->elementsSize());
5355
my->copyToHost(result.data(), Y->bytesSize());
@@ -91,6 +93,7 @@ TEST(kernel, MatMulCnnl_Broadcast) {
9193
void const *inputs[]{*ma, *mb, *mc};
9294
void *outputs[]{*my};
9395
mluRoutine(res, *workspace, inputs, outputs);
96+
kernel::bang::sync();
9497
}
9598
{
9699
void const *inputs[]{dataA.data(), dataB.data(), dataC.data()};
@@ -135,6 +138,7 @@ TEST(kernel, MatMulCnnl_TransABNoBias) {
135138
void const *inputs[]{*ma, *mb};
136139
void *outputs[]{*my};
137140
mluRoutine(res, *workspace, inputs, outputs);
141+
kernel::bang::sync();
138142
}
139143
{
140144
void const *inputs[]{dataA.data(), dataB.data()};
@@ -189,6 +193,7 @@ TEST(kernel, MatMulCnnl_Large) {
189193
void const *inputs[]{*ma, *mb, *mc};
190194
void *outputs[]{*my};
191195
mluRoutine(res, *workspace, inputs, outputs);
196+
kernel::bang::sync();
192197
}
193198
{
194199
void const *inputs[]{dataA.data(), dataB.data(), dataC.data()};

0 commit comments

Comments
 (0)