Skip to content

Commit

Permalink
add q8_t transform
Browse files Browse the repository at this point in the history
  • Loading branch information
hipudding committed Apr 26, 2024
1 parent 0c159d8 commit 9461051
Show file tree
Hide file tree
Showing 2 changed files with 75 additions and 3 deletions.
71 changes: 68 additions & 3 deletions ggml-cann.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,6 @@ GGML_CALL static void ggml_backend_cann_transform_q4_0(ggml_tensor* tensor, cons
GGML_ASSERT(tensor->extra == nullptr);
GGML_ASSERT(tensor->op == GGML_OP_NONE);

void *buffer_host;
size_t n_bytes = ggml_nbytes(tensor);
int64_t n_elems = ggml_nelements(tensor);
int64_t groups = n_elems / QK4_0;
Expand Down Expand Up @@ -176,7 +175,6 @@ GGML_CALL static void ggml_backend_cann_transform_back_q4_0(const ggml_tensor* t
GGML_ASSERT(tensor->extra == nullptr);
GGML_ASSERT(tensor->op == GGML_OP_NONE);

void *buffer_host;
size_t n_bytes = ggml_nbytes(tensor);
int64_t n_elems = ggml_nelements(tensor);
int64_t groups = n_elems / QK4_0;
Expand Down Expand Up @@ -206,12 +204,66 @@ GGML_CALL static void ggml_backend_cann_transform_back_q4_0(const ggml_tensor* t
}
}

#define QK8_0 32
typedef struct {
uint16_t d; // delta
int8_t qs[QK8_0]; // quants
} block_q8_0;

GGML_CALL static void ggml_backend_cann_transform_q8_0(ggml_tensor* tensor, const void *src, void* dst) {
GGML_ASSERT(tensor->extra == nullptr);
GGML_ASSERT(tensor->op == GGML_OP_NONE);

size_t n_bytes = ggml_nbytes(tensor);
int64_t n_elems = ggml_nelements(tensor);
int64_t groups = n_elems / QK8_0;
size_t quant_bytes = n_elems * sizeof(uint8_t);

uint8_t* quant_offset = (uint8_t*)dst;
uint16_t* scale_offset = (uint16_t*)((char*)dst + quant_bytes);

for (int i = 0;i<groups; i++) {
block_q8_0 *group = (block_q8_0*)((char*)src + i * sizeof(block_q8_0));
*scale_offset = group->d;
scale_offset++;
size_t group_quant_size = QK8_0 * sizeof(uint8_t);
memcpy(quant_offset, group->qs, group_quant_size);
quant_offset += group_quant_size;
}
}

GGML_CALL static void ggml_backend_cann_transform_back_q8_0(const ggml_tensor* tensor, const void *src, void* dst) {
GGML_ASSERT(tensor->extra == nullptr);
GGML_ASSERT(tensor->op == GGML_OP_NONE);

size_t n_bytes = ggml_nbytes(tensor);
int64_t n_elems = ggml_nelements(tensor);
int64_t groups = n_elems / QK8_0;
size_t quant_bytes = n_elems * sizeof(uint8_t);

uint8_t* quant_offset = (uint8_t*)src;
uint16_t* scale_offset = (uint16_t*)((char*)src + quant_bytes);

for (int i = 0;i<groups; i++) {
block_q8_0 *group = (block_q8_0*)((char*)dst + i * sizeof(block_q8_0));
group->d = *scale_offset;
scale_offset++;
size_t group_quant_size = QK8_0 * sizeof(uint8_t);
memcpy(group->qs, quant_offset, group_quant_size);
quant_offset += group_quant_size;
}
}


GGML_CALL static void ggml_backend_cann_transform(ggml_tensor* tensor, const void* src, void *dst) {
std::cout<<"Transform tensor:"<<tensor->name<<std::endl;
switch (tensor->type) {
case GGML_TYPE_Q4_0:
ggml_backend_cann_transform_q4_0(tensor, src, dst);
break;
case GGML_TYPE_Q8_0:
ggml_backend_cann_transform_q8_0(tensor, src, dst);
break;
default:
break;
}
Expand All @@ -223,6 +275,9 @@ GGML_CALL static void ggml_backend_cann_transform_back(const ggml_tensor* tensor
case GGML_TYPE_Q4_0:
ggml_backend_cann_transform_back_q4_0(tensor, src, dst);
break;
case GGML_TYPE_Q8_0:
ggml_backend_cann_transform_back_q8_0(tensor, src, dst);
break;
default:
break;
}
Expand All @@ -231,6 +286,7 @@ GGML_CALL static void ggml_backend_cann_transform_back(const ggml_tensor* tensor
GGML_CALL static bool need_transform(ggml_type type) {
switch (type) {
case GGML_TYPE_Q4_0:
case GGML_TYPE_Q8_0:
return true;
default:
return false;
Expand Down Expand Up @@ -820,7 +876,16 @@ GGML_CALL static bool ggml_backend_cann_supports_op(ggml_backend_t backend,
case GGML_OP_MUL_MAT_ID:
// embedding
case GGML_OP_GET_ROWS:
return false;
{
switch (op->src[0]->type) {
//case GGML_TYPE_Q4_0:
case GGML_TYPE_Q8_0:
return true;
default:
return false;
}
}
break;
case GGML_OP_CPY:
case GGML_OP_DUP:
case GGML_OP_REPEAT:
Expand Down
7 changes: 7 additions & 0 deletions ggml-cann/aclnn_ops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1687,4 +1687,11 @@ void ggml_cann_softmax(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
ACL_CHECK(aclDestroyScalar(acl_scale));
ACL_CHECK(aclDestroyTensor(temp_tensor));
ACL_CHECK(aclDestroyTensor(temp_output_tensor));
}

void ggml_cann_get_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
ggml_tensor* src0 = dst->src[0];
ggml_tensor* src1 = dst->src[1];


}

0 comments on commit 9461051

Please sign in to comment.