From 759a92f398d9e6b93041c30a5a7bd82a95f65987 Mon Sep 17 00:00:00 2001 From: huafengchun Date: Thu, 11 Jul 2024 08:02:12 +0000 Subject: [PATCH] use cpu rope operator to avoid bugs, rever me after fixed --- ggml/include/ggml.h | 15 ++++++++++++ ggml/src/ggml-cann/aclnn_ops.cpp | 42 ++++++++++++++++++++++++++++++++ ggml/src/ggml.c | 11 +-------- 3 files changed, 58 insertions(+), 10 deletions(-) diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h index 2e534f3f871102..eeeff0715903f4 100644 --- a/ggml/include/ggml.h +++ b/ggml/include/ggml.h @@ -2425,6 +2425,21 @@ extern "C" { GGML_API ggml_type_traits_t ggml_internal_get_type_traits(enum ggml_type type); + struct ggml_compute_params { + // ith = thread index, nth = number of threads + int ith, nth; + + // work buffer for all threads + size_t wsize; + void * wdata; + + struct ggml_compute_state_shared * shared; + }; + + void ggml_compute_forward_rope( + const struct ggml_compute_params * params, + struct ggml_tensor * dst); + #ifdef __cplusplus } #endif diff --git a/ggml/src/ggml-cann/aclnn_ops.cpp b/ggml/src/ggml-cann/aclnn_ops.cpp index 175949c2109c72..800a7b265c5798 100644 --- a/ggml/src/ggml-cann/aclnn_ops.cpp +++ b/ggml/src/ggml-cann/aclnn_ops.cpp @@ -2247,10 +2247,52 @@ static void aclnn_roll(ggml_backend_cann_context& ctx, aclTensor* acl_src, } void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) { + // TODO + // ROPE on NPU has some bugs that not found yet. + // Use CPU function instead to have a quick try of NPU backend. + aclrtSynchronizeStream(ctx.stream()); ggml_tensor* src0 = dst->src[0]; // input ggml_tensor* src1 = dst->src[1]; // position ggml_tensor* src2 = dst->src[2]; // freq_factors + size_t src0_len = ggml_nbytes(src0); + size_t src1_len = ggml_nbytes(src1); + size_t dst_len = ggml_nbytes(dst); + void* src0_host = malloc(src0_len); + void* src1_host = malloc(src1_len); + void* dst_host = malloc(dst_len); + + void* src0_dev_ptr = src0->data; + void* src1_dev_ptr = src1->data; + void* dst_dev_ptr = dst->data; + + aclrtMemcpy(src0_host, src0_len, src0_dev_ptr, src0_len, ACL_MEMCPY_DEVICE_TO_HOST); + aclrtMemcpy(src1_host, src1_len, src1_dev_ptr, src1_len, ACL_MEMCPY_DEVICE_TO_HOST); + + src0->data = src0_host; + src1->data = src1_host; + dst->data = dst_host; + + ggml_compute_params param1; + param1.ith=0; + param1.nth=1; + param1.wdata = malloc(102400); + + ggml_compute_forward_rope(¶m1, dst); + + aclrtMemcpy(dst_dev_ptr, dst_len, dst_host, dst_len, ACL_MEMCPY_HOST_TO_DEVICE); + + src0->data = src0_dev_ptr; + src1->data = src1_dev_ptr; + dst->data = dst_dev_ptr; + + free(src0_host); + free(src1_host); + free(dst_host); + free(param1.wdata); + + return; + // TODO: with freq_factors GGML_ASSERT(src2 == NULL); diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index fd3321eb635026..3190b44b040d8a 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -1754,16 +1754,7 @@ struct ggml_compute_state { struct ggml_compute_state_shared * shared; }; -struct ggml_compute_params { - // ith = thread index, nth = number of threads - int ith, nth; - // work buffer for all threads - size_t wsize; - void * wdata; - - struct ggml_compute_state_shared * shared; -}; // // fundamental operations @@ -14055,7 +14046,7 @@ static void ggml_compute_forward_rope_f16( } } -static void ggml_compute_forward_rope( +void ggml_compute_forward_rope( const struct ggml_compute_params * params, struct ggml_tensor * dst) {