From f9e1b9a1369e6e57570cc095b8898940551ea418 Mon Sep 17 00:00:00 2001 From: "zhou.weiguo" Date: Wed, 24 Apr 2024 18:26:49 +0800 Subject: [PATCH] make github CI happy --- ggml-qnn.cpp | 13 +++++++++++++ ggml-qnn.h | 4 ---- ggml.c | 3 +-- llama.cpp | 11 +---------- 4 files changed, 15 insertions(+), 16 deletions(-) diff --git a/ggml-qnn.cpp b/ggml-qnn.cpp index 5d698f184c25df..4eeb048f2ef9b2 100644 --- a/ggml-qnn.cpp +++ b/ggml-qnn.cpp @@ -89,6 +89,19 @@ class qnn_instance; //TODO: should be removed because this is a workaround method during development stage +//a minor modification is required during development stage for validate QNN backend on Android phone: +// +//modify from +// +//static void ggml_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor) +// +//to +// +//void ggml_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor) +// +//in source file ggml.c#L16156 +// +//this workaround will not be needed when the final QNN backend is complete extern "C" void ggml_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor); #if (defined __ANDROID__) || (defined ANDROID) //Qualcomm's QNN could running on Windows over ARM(aka WoA) diff --git a/ggml-qnn.h b/ggml-qnn.h index 51f02d4ba3078f..dae4b476134299 100644 --- a/ggml-qnn.h +++ b/ggml-qnn.h @@ -46,10 +46,6 @@ GGML_API void ggml_backend_qnn_get_device_description(int device, char GGML_API ggml_backend_buffer_type_t ggml_backend_qnn_buffer_type(size_t dev_num); -//temporary API, should be removed in the future -GGML_API bool ggml_qnn_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor); - - #ifdef __cplusplus } #endif diff --git a/ggml.c b/ggml.c index 919eb0b7b1ff16..086db96af7fcd1 100644 --- a/ggml.c +++ b/ggml.c @@ -16153,8 +16153,7 @@ static void ggml_compute_forward_cross_entropy_loss_back( ///////////////////////////////// -//workaround for Qualcomm QNN backend -void ggml_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor) { +static void ggml_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor) { GGML_ASSERT(params); if (tensor->op == GGML_OP_NONE || ggml_is_empty(tensor)) { diff --git a/llama.cpp b/llama.cpp index a10c3e1fc8488b..451d0aadb51a05 100644 --- a/llama.cpp +++ b/llama.cpp @@ -15403,7 +15403,7 @@ struct llama_context * llama_new_context_with_model( #elif defined(GGML_USE_QNN) if (model->n_gpu_layers > 0) { //the second param is package name of Andorid app, can be got by JNI from Java layer - ggml_backend_t backend = ggml_backend_qnn_init(QNN_CPU, "/data/data/com.ggml.llamacpp/"); + ggml_backend_t backend = ggml_backend_qnn_init(model->main_gpu, "/data/data/com.ggml.llamacpp/"); if (nullptr == backend) { LLAMA_LOG_ERROR("%s: failed to initialize QNN backend\n", __func__); llama_free(ctx); @@ -17577,14 +17577,6 @@ void llama_reset_timings(struct llama_context * ctx) { ctx->t_p_eval_us = ctx->n_p_eval = 0; } -static int llama_has_qnn(void) { -#ifdef GGML_USE_QNN - return 1; -#else - return 0; -#endif -} - const char * llama_print_system_info(void) { static std::string s; @@ -17606,7 +17598,6 @@ const char * llama_print_system_info(void) { s += "SSSE3 = " + std::to_string(ggml_cpu_has_ssse3()) + " | "; s += "VSX = " + std::to_string(ggml_cpu_has_vsx()) + " | "; s += "MATMUL_INT8 = " + std::to_string(ggml_cpu_has_matmul_int8()) + " | "; - s += "QNN = " + std::to_string(llama_has_qnn()) + " | "; return s.c_str(); }