Skip to content

Commit 4d645d4

Browse files
authored
Merge pull request #320 from hc235280/support_ib_send_lat
Add support for DMA-buffers in Cambricon devices
2 parents f961e40 + 0490645 commit 4d645d4

File tree

6 files changed

+106
-2
lines changed

6 files changed

+106
-2
lines changed

configure.ac

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -447,6 +447,9 @@ AS_IF([test "x$enable_mlu" = xyes], [
447447
[AC_MSG_ERROR([could not find cn_api.h in include path])])
448448
AC_SEARCH_LIBS([cnMalloc], [cndrv], [],
449449
[AC_MSG_ERROR([could not find library, cndrv])])
450+
AC_SEARCH_LIBS([cnMemGetHandleForAddressRange], [cndrv],
451+
[AC_DEFINE([HAVE_MLU_DMABUF], [1], [Enable MLU DMA buffers])],
452+
[])
450453
])
451454

452455
AM_CONDITIONAL([MLU], [test x$enable_mlu = xyes])

man/perftest.1

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -389,6 +389,14 @@ many different options and modes.
389389
Not relevant for raw_ethernet_fs_rate.
390390
System support required.
391391
.TP
392+
.B --use_mlu=<mlu device id>
393+
Use MLU specific device for HW accelerator direct RDMA testing.
394+
System support required.
395+
.TP
396+
.B --use_mlu_dmabuf
397+
Use MLU DMA-BUF for HW accelerator direct RDMA testing.
398+
System support required.
399+
.TP
392400
.B --use_opencl=<opencl device id>
393401
Use OpenCl specific device for GPUDirect RDMA testing
394402
Not relevant for raw_ethernet_fs_rate.

src/mlu_memory.c

Lines changed: 52 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ struct mlu_memory_ctx {
4343
int device_id;
4444
CNdev cnDevice;
4545
CNcontext cnContext;
46-
46+
bool use_dmabuf;
4747
};
4848

4949

@@ -129,6 +129,18 @@ int mlu_memory_init(struct memory_ctx *ctx) {
129129
return FAILURE;
130130
}
131131

132+
#ifdef HAVE_MLU_DMABUF
133+
if (mlu_ctx->use_dmabuf) {
134+
int is_supported = 0;
135+
136+
ERROR_CHECK(cnDeviceGetAttribute(&is_supported, CN_DEVICE_ATTRIBUTE_DMA_BUF_SUPPORTED, mlu_ctx->cnDevice));
137+
if (!is_supported) {
138+
fprintf(stderr, "DMA-BUF is not supported on this MLU\n");
139+
return FAILURE;
140+
}
141+
}
142+
#endif
143+
132144
return SUCCESS;
133145
}
134146

@@ -144,8 +156,8 @@ int mlu_memory_allocate_buffer(struct memory_ctx *ctx, int alignment, uint64_t s
144156
uint64_t *dmabuf_offset, void **addr, bool *can_init) {
145157
CNresult error;
146158
size_t buf_size = (size + ACCEL_PAGE_SIZE - 1) & ~(ACCEL_PAGE_SIZE - 1);
147-
148159
CNaddr mlu_addr;
160+
149161
printf("cnMalloc() of a %lu bytes MLU buffer\n", size);
150162

151163
error = cnMalloc(&mlu_addr, buf_size);
@@ -157,6 +169,35 @@ int mlu_memory_allocate_buffer(struct memory_ctx *ctx, int alignment, uint64_t s
157169
printf("allocated %lu bytes of MLU buffer at %ld\n", (unsigned long)buf_size, mlu_addr);
158170
*addr = (void *)mlu_addr;
159171
*can_init = false;
172+
173+
#ifdef HAVE_MLU_DMABUF
174+
struct mlu_memory_ctx *mlu_ctx = container_of(ctx, struct mlu_memory_ctx, base);
175+
176+
{
177+
if (mlu_ctx->use_dmabuf) {
178+
CNaddr aligned_ptr;
179+
const size_t host_page_size = sysconf(_SC_PAGESIZE);
180+
uint64_t offset;
181+
size_t aligned_size;
182+
183+
// Round down to host page size
184+
aligned_ptr = mlu_addr & ~(host_page_size - 1);
185+
offset = mlu_addr - aligned_ptr;
186+
aligned_size = (size + offset + host_page_size - 1) & ~(host_page_size - 1);
187+
188+
printf("using DMA-BUF for MLU buffer address at %#lx aligned at %#lx with aligned size %zu\n", mlu_addr, aligned_ptr, aligned_size);
189+
*dmabuf_fd = 0;
190+
error = cnMemGetHandleForAddressRange((void *)dmabuf_fd, aligned_ptr, aligned_size, CN_MEM_RANGE_HANDLE_TYPE_DMA_BUF_FD, 0);
191+
if (error != CN_SUCCESS) {
192+
printf("cnMemGetHandleForAddressRange error=%d\n", error);
193+
return FAILURE;
194+
}
195+
196+
*dmabuf_offset = offset;
197+
}
198+
}
199+
#endif
200+
160201
return SUCCESS;
161202
}
162203

@@ -182,6 +223,14 @@ bool mlu_memory_supported() {
182223
return true;
183224
}
184225

226+
bool mlu_memory_dmabuf_supported() {
227+
#ifdef HAVE_MLU_DMABUF
228+
return true;
229+
#else
230+
return false;
231+
#endif
232+
}
233+
185234
struct memory_ctx *mlu_memory_create(struct perftest_parameters *params) {
186235
struct mlu_memory_ctx *ctx;
187236

@@ -194,6 +243,7 @@ struct memory_ctx *mlu_memory_create(struct perftest_parameters *params) {
194243
ctx->base.copy_buffer_to_host = mlu_memory_copy_host_buffer;
195244
ctx->base.copy_buffer_to_buffer = mlu_memory_copy_buffer_to_buffer;
196245
ctx->device_id = params->mlu_device_id;
246+
ctx->use_dmabuf = params->use_mlu_dmabuf;
197247

198248
return &ctx->base;
199249
}

src/mlu_memory.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ struct perftest_parameters;
1616

1717
bool mlu_memory_supported();
1818

19+
bool mlu_memory_dmabuf_supported();
20+
1921
struct memory_ctx *mlu_memory_create(struct perftest_parameters *params);
2022

2123

@@ -25,6 +27,10 @@ inline bool mlu_memory_supported() {
2527
return false;
2628
}
2729

30+
inline bool mlu_memory_dmabuf_supported() {
31+
return false;
32+
}
33+
2834
inline struct memory_ctx *mlu_memory_create(struct perftest_parameters *params) {
2935
return NULL;
3036
}

src/perftest_parameters.c

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -668,6 +668,11 @@ static void usage(const char *argv0, VerbType verb, TestType tst, int connection
668668
if (mlu_memory_supported()) {
669669
printf(" --use_mlu=<mlu device id>");
670670
printf(" Use selected MLU device for MLUDirect RDMA testing\n");
671+
672+
if (mlu_memory_dmabuf_supported()) {
673+
printf(" --use_mlu_dmabuf");
674+
printf(" Use DMA-BUF for HW accelerator direct RDMA testing\n");
675+
}
671676
}
672677

673678
if (opencl_memory_supported()) {
@@ -913,6 +918,7 @@ static void init_perftest_params(struct perftest_parameters *user_param)
913918
user_param->rocm_device_id = 0;
914919
user_param->neuron_core_id = 0;
915920
user_param->mlu_device_id = 0;
921+
user_param->use_mlu_dmabuf = 0;
916922
user_param->opencl_platform_id = 0;
917923
user_param->opencl_device_id = 0;
918924
user_param->gpu_touch = GPU_NO_TOUCH;
@@ -1897,6 +1903,18 @@ static void force_dependecies(struct perftest_parameters *user_param)
18971903
exit(1);
18981904
}
18991905

1906+
if (user_param->memory_type == MEMORY_MLU && user_param->tst == LAT && (user_param->verb == WRITE || user_param->verb == WRITE_IMM)) {
1907+
printf(RESULT_LINE);
1908+
fprintf(stderr,"Perftest supports MLU latency tests with read/send verbs only\n");
1909+
exit(1);
1910+
}
1911+
1912+
if (user_param->memory_type == MEMORY_MLU && (int)user_param->size <= user_param->inline_size) {
1913+
printf(RESULT_LINE);
1914+
fprintf(stderr,"Perftest doesn't support MLU tests with inline messages\n");
1915+
exit(1);
1916+
}
1917+
19001918
if (user_param->use_data_direct) {
19011919
user_param->use_cuda_pcie_mapping = 1;
19021920
}
@@ -2333,6 +2351,12 @@ static void ctx_set_max_inline(struct ibv_context *context,struct perftest_param
23332351
return;
23342352
}
23352353

2354+
if (user_param->memory_type == MEMORY_MLU){
2355+
user_param->inline_size = 0;
2356+
printf("Perftest doesn't supports MLU tests with inline messages: inline size set to 0\n");
2357+
return;
2358+
}
2359+
23362360
if (user_param->tst == LAT) {
23372361
switch(user_param->verb) {
23382362
case WRITE_IMM:
@@ -2434,6 +2458,7 @@ int parser(struct perftest_parameters *user_param,char *argv[], int argc)
24342458
static int use_neuron_dmabuf_flag = 0;
24352459
static int use_hl_flag = 0;
24362460
static int use_mlu_flag = 0;
2461+
static int use_mlu_dmabuf_flag = 0;
24372462
static int use_opencl_flag = 0;
24382463
static int opencl_platform_id_flag = 0;
24392464
static int gpu_touch_flag = 0;
@@ -2614,6 +2639,7 @@ int parser(struct perftest_parameters *user_param,char *argv[], int argc)
26142639
{ .name = "use_neuron_dmabuf", .has_arg = 0, .flag = &use_neuron_dmabuf_flag, .val = 1},
26152640
{ .name = "use_hl", .has_arg = 1, .flag = &use_hl_flag, .val = 1},
26162641
{ .name = "use_mlu", .has_arg = 1, .flag = &use_mlu_flag, .val = 1},
2642+
{ .name = "use_mlu_dmabuf", .has_arg = 0, .flag = &use_mlu_dmabuf_flag, .val = 1},
26172643
{ .name = "use_opencl", .has_arg = 1, .flag = &use_opencl_flag, .val = 1},
26182644
{ .name = "opencl_platform_id", .has_arg = 1, .flag = &opencl_platform_id_flag, .val = 1},
26192645
{ .name = "gpu_touch", .has_arg = 1, .flag = &gpu_touch_flag, .val = 1},
@@ -3061,6 +3087,7 @@ int parser(struct perftest_parameters *user_param,char *argv[], int argc)
30613087
(use_neuron_dmabuf_flag && !neuron_memory_dmabuf_supported()) ||
30623088
(use_hl_flag && !hl_memory_supported()) ||
30633089
(use_mlu_flag && !mlu_memory_supported()) ||
3090+
(use_mlu_dmabuf_flag && !mlu_memory_dmabuf_supported()) ||
30643091
(use_opencl_flag && !opencl_memory_supported())) {
30653092
printf(" Unsupported memory type\n");
30663093
return FAILURE;
@@ -3175,6 +3202,15 @@ int parser(struct perftest_parameters *user_param,char *argv[], int argc)
31753202
user_param->memory_create = mlu_memory_create;
31763203
use_mlu_flag = 0;
31773204
}
3205+
if (use_mlu_dmabuf_flag) {
3206+
user_param->use_mlu_dmabuf = 1;
3207+
if (user_param->memory_type != MEMORY_MLU) {
3208+
fprintf(stderr, "MLU DMA-BUF cannot be used without MLU device\n");
3209+
free(duplicates_checker);
3210+
return FAILURE;
3211+
}
3212+
use_mlu_dmabuf_flag = 0;
3213+
}
31783214

31793215
if (use_opencl_flag) {
31803216
CHECK_VALUE_NON_NEGATIVE(user_param->opencl_device_id,int,"OPENCL device",not_int_ptr);

src/perftest_parameters.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -599,6 +599,7 @@ struct perftest_parameters {
599599
int use_neuron_dmabuf;
600600
char *hl_device_bus_id;
601601
int mlu_device_id;
602+
int use_mlu_dmabuf;
602603
int opencl_platform_id;
603604
int opencl_device_id;
604605
int gpu_touch;

0 commit comments

Comments
 (0)