Skip to content

Commit

Permalink
speedup 10x
Browse files Browse the repository at this point in the history
  • Loading branch information
wangshuai09 committed Jul 5, 2024
1 parent 28b97e8 commit 34f957f
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 3 deletions.
9 changes: 6 additions & 3 deletions ggml/src/ggml-cann/aclnn_ops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1903,12 +1903,15 @@ static void aclnn_mat_mul(ggml_backend_cann_context& ctx, aclTensor* acl_input,
ACL_CHECK(aclnnMatmulGetWorkspaceSize(acl_input, acl_weight, acl_dst,
cube_math_type, &workspaceSize,
&executor));
if (workspaceSize > 0) {
workspaceAddr = ctx.alloc_buffer(bind_tensor, workspaceSize);
if (workspaceSize > ctx.aclnn_workspace_size) {
aclrtFree(ctx.aclnn_buffer);
ACL_CHECK(aclrtMalloc(&ctx.aclnn_buffer, workspaceSize,
ACL_MEM_MALLOC_HUGE_FIRST));
ctx.aclnn_workspace_size = workspaceSize;
}

aclrtStream main_stream = ctx.stream();
ACL_CHECK(aclnnMatmul(workspaceAddr, workspaceSize, executor,
ACL_CHECK(aclnnMatmul(ctx.aclnn_buffer, workspaceSize, executor,
main_stream));
}

Expand Down
4 changes: 4 additions & 0 deletions ggml/src/ggml-cann/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@ struct ggml_backend_cann_context {
std::string name;
aclrtEvent copy_event = nullptr;

uint64_t aclnn_workspace_size = 0;
void* aclnn_buffer;

aclrtStream streams[GGML_CANN_MAX_STREAMS] = {{nullptr}};

// bind temp buffers to stream. Free after sync.
Expand All @@ -73,6 +76,7 @@ struct ggml_backend_cann_context {
GGML_ASSERT(buffers[i].size() == 0);
}
}
aclrtFree(aclnn_buffer);
}

void* alloc_buffer(ggml_tensor* dst, size_t size, int stream) {
Expand Down

0 comments on commit 34f957f

Please sign in to comment.