Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: support QNN #1

Open
wants to merge 27 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
5e9ac90
feat: trying support QNN
hans00 Apr 30, 2024
d5a62f7
fix: fix parameter fill
hans00 Apr 30, 2024
a7a3099
fix: fix windows build
hans00 May 1, 2024
71eefd4
fix: fix binary copy files
hans00 May 1, 2024
a044dbc
feat: setup env before binding load
hans00 May 1, 2024
6058861
feat: increase limit to capable Llama 7B
hans00 May 3, 2024
e0693de
chore: reduce log
hans00 May 3, 2024
2b7cd15
chore: change backend order
hans00 May 3, 2024
6a7ed61
ci: test all platforms
hans00 May 3, 2024
03571c7
fix: fix missing file for Metal
hans00 May 3, 2024
e1c07b8
fix: only copy `libqnnhtpv73.cat` for Windows
hans00 May 3, 2024
6467e34
fix: force GGML artifacts output to bin dir
hans00 May 3, 2024
7b0a5ad
fix: fix lib output
hans00 May 3, 2024
7d2a054
chore: remove set CMAKE_RUNTIME_OUTPUT_DIRECTORY
hans00 May 3, 2024
b65e516
fix: fix metal binary deploy
hans00 May 3, 2024
478cc73
test: change test model to bypass OP problem
hans00 May 3, 2024
3a4c2ef
feat: support multiple variant load
hans00 May 3, 2024
149ba90
test: disable GPU to fixed test result
hans00 May 3, 2024
e507eae
Revert "test: change test model to bypass OP problem"
hans00 May 3, 2024
fa7321f
revert: use post build command to copy binary
hans00 May 3, 2024
3fd2471
feat: support auto setup Windows env
hans00 May 3, 2024
12d1e3a
refactor: let system auto find so/dll
hans00 May 3, 2024
d4d4e97
refactor: make QNN log to stderr (sync behavior with llama.cpp)
hans00 May 3, 2024
38d4de9
Merge branch 'main' into qnn
hans00 May 4, 2024
225c7c6
Merge branch 'main' into qnn
hans00 May 5, 2024
0777434
Merge branch 'main' into qnn
hans00 May 7, 2024
575219e
fix: fix patch step
hans00 May 7, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -1,9 +1,16 @@
name: CI
on: push

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

jobs:
build:
runs-on: ubuntu-latest
strategy:
matrix:
os: [ubuntu-latest, windows-latest, macos-latest]
runs-on: ${{ matrix.os }}

steps:
- uses: actions/checkout@v4
Expand Down
62 changes: 61 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ cmake_policy(SET CMP0042 NEW)
project (llama-node)

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED true)
set(CMAKE_C_STANDARD 11)
set(CMAKE_C_STANDARD_REQUIRED true)

if(NOT DEFINED napi_build_version)
set(napi_build_version 6)
Expand Down Expand Up @@ -68,7 +71,7 @@ find_program(PATCH patch REQUIRED)

add_custom_target(
patch ALL
COMMAND ${PATCH} -p1 -n -i ${CMAKE_SOURCE_DIR}/patches/llama.patch
COMMAND ${PATCH} -p1 -N -i ${CMAKE_SOURCE_DIR}/patches/llama.patch
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/src/llama.cpp
COMMENT "Applying patches"
)
Expand All @@ -92,6 +95,63 @@ file(
"src/SaveSessionWorker.h"
)

if (LLAMA_QNN)
if (PLATFORM STREQUAL "linux" AND ARCH STREQUAL "x64")
set(QNN_PLATFORM "x86_64-linux-clang")
elseif (PLATFORM STREQUAL "linux" AND ARCH STREQUAL "arm64")
set(QNN_PLATFORM "aarch64-ubuntu-gcc7.5")
elseif (PLATFORM STREQUAL "win32" AND ARCH STREQUAL "x64")
set(QNN_PLATFORM "x86_64-windows-msvc")
elseif (PLATFORM STREQUAL "win32" AND ARCH STREQUAL "arm64")
set(QNN_PLATFORM "aarch64-windows-msvc")
endif()

if (NOT QNN_PLATFORM)
message(FATAL_ERROR "QNN is not supported on this platform")
endif()
set(QNN_LIB_PATH ${QNN_ROOT}/lib/${QNN_PLATFORM})
message(STATUS "QNN_LIB_PATH: ${QNN_LIB_PATH}")

file(
GLOB QNN_SO_FILES
"${QNN_LIB_PATH}/libc++*"
"${QNN_LIB_PATH}/libQnn*.so"
"${QNN_LIB_PATH}/Htp*.dll"
"${QNN_LIB_PATH}/Qnn*"
)

file(COPY ${QNN_SO_FILES} DESTINATION ${PLATFORM_BINARY_DIR})

file(GLOB QNN_EXTRA_FILES "${QNN_ROOT}/lib/hexagon-v*/unsigned/libQnn*Skel.so")

if (PLATFORM STREQUAL "win32")
list(APPEND QNN_EXTRA_FILES "${QNN_ROOT}/lib/hexagon-v73/unsigned/libqnnhtpv73.cat")
endif()

file(COPY ${QNN_EXTRA_FILES} DESTINATION ${PLATFORM_BINARY_DIR})

list(APPEND LINKS ${QNN_SO_FILES})

file(
GLOB QNN_HEADER_FILES
"src/ggml-qnn/ggml-qnn.h"
)

file(
GLOB QNN_SOURCE_FILES
"src/ggml-qnn/pthread-shim.h"
"src/ggml-qnn/ggml-qnn.cpp"
)

target_compile_definitions(ggml PUBLIC GGML_USE_QNN)
target_include_directories(ggml PUBLIC ${QNN_ROOT}/include ${QNN_ROOT}/include/QNN)
target_sources(ggml PRIVATE ${QNN_SOURCE_FILES} ${QNN_HEADER_FILES})
target_include_directories(llama PRIVATE "src/ggml-qnn")
set_target_properties(ggml PROPERTIES CXX_STANDARD 17)
set_target_properties(ggml PROPERTIES CXX_STANDARD_REQUIRED ON)
set_target_properties(ggml PROPERTIES C_STANDARD 11)
endif()

add_library(${PROJECT_NAME} SHARED ${SOURCE_FILES} ${CMAKE_JS_SRC})
set_target_properties(${PROJECT_NAME} PROPERTIES PREFIX "" SUFFIX ".node")
target_link_libraries(${PROJECT_NAME} ${CMAKE_JS_LIB} llama ggml common)
Expand Down
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
},
"files": [
"bin/**/*",
"patches/*",
"src/**/*.{c,cc,cpp,h,hh,hpp,txt,cmake}",
"lib/*.js",
"lib/*.ts",
Expand Down
64 changes: 64 additions & 0 deletions patches/llama.patch
Original file line number Diff line number Diff line change
@@ -1,3 +1,67 @@
diff --git a/ggml-backend.c b/ggml-backend.c
index e91d97cd..39d4efec 100644
--- a/ggml-backend.c
+++ b/ggml-backend.c
@@ -445,6 +445,11 @@ GGML_CALL static void ggml_backend_registry_init(void) {
extern GGML_CALL void ggml_backend_kompute_reg_devices(void);
ggml_backend_kompute_reg_devices();
#endif
+
+#ifdef GGML_USE_QNN
+ extern GGML_CALL void ggml_backend_qnn_reg_devices(void);
+ ggml_backend_qnn_reg_devices();
+#endif
}

GGML_CALL void ggml_backend_register(const char * name, ggml_backend_init_fn init_fn, ggml_backend_buffer_type_t default_buffer_type, void * user_data) {
diff --git a/llama.cpp b/llama.cpp
index a25d115c..7dedb2a1 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -17,6 +17,8 @@
# include "ggml-sycl.h"
#elif defined(GGML_USE_KOMPUTE)
# include "ggml-kompute.h"
+#elif defined(GGML_USE_QNN)
+# include "ggml-qnn.h"
#endif

#ifdef GGML_USE_METAL
@@ -1658,6 +1660,8 @@ static ggml_backend_buffer_type_t llama_default_buffer_type_offload(int gpu) {
buft = ggml_backend_opencl_buffer_type();
#elif defined(GGML_USE_KOMPUTE)
buft = ggml_backend_kompute_buffer_type(gpu);
+#elif defined(GGML_USE_QNN)
+ buft = ggml_backend_qnn_buffer_type(gpu);
if (buft == nullptr) {
LLAMA_LOG_WARN("%s: cannot use GPU %d, check `vulkaninfo --summary`\n", __func__, gpu);
}
@@ -14916,7 +14920,7 @@ bool llama_supports_mlock(void) {

bool llama_supports_gpu_offload(void) {
#if defined(GGML_USE_CUDA) || defined(GGML_USE_CLBLAST) || defined(GGML_USE_METAL) || defined(GGML_USE_VULKAN) || \
- defined(GGML_USE_SYCL) || defined(GGML_USE_KOMPUTE)
+ defined(GGML_USE_SYCL) || defined(GGML_USE_KOMPUTE) || defined(GGML_USE_QNN)
// Defined when llama.cpp is compiled with support for offloading model layers to GPU.
return true;
#else
@@ -15143,6 +15147,16 @@ struct llama_context * llama_new_context_with_model(
ctx->backends.push_back(backend);
}
}
+#elif defined(GGML_USE_QNN)
+ if (model->n_gpu_layers > 0) {
+ auto * backend = ggml_backend_qnn_init(model->main_gpu);
+ if (backend == nullptr) {
+ LLAMA_LOG_ERROR("%s: failed to initialize Kompute backend\n", __func__);
+ llama_free(ctx);
+ return nullptr;
+ }
+ ctx->backends.push_back(backend);
+ }
#elif defined(GGML_USE_VULKAN)
if (model->split_mode == LLAMA_SPLIT_MODE_ROW) {
LLAMA_LOG_ERROR("%s: Row split not supported. Failed to initialize Vulkan backend\n", __func__);
diff --git a/ggml-vulkan.cpp b/ggml-vulkan.cpp
index 1736ab73..55831936 100644
--- a/ggml-vulkan.cpp
Expand Down
1 change: 1 addition & 0 deletions src/LlamaContext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
params.use_mmap = get_option<bool>(options, "use_mmap", true);
params.numa =
static_cast<ggml_numa_strategy>(get_option<uint32_t>(options, "numa", 0));
params.main_gpu = get_option<int32_t>(options, "main_gpu", 0);

llama_backend_init();
llama_numa_init(params.numa);
Expand Down
Loading