Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -60,3 +60,4 @@ python/setup.py

# Xmake
.xmake/
CMakePresets.json
3 changes: 2 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -81,14 +81,15 @@ option(NCNN_SIMPLEVK "minimal in-house vulkan loader" ON)
option(NCNN_SYSTEM_GLSLANG "use system glslang library" OFF)
option(NCNN_RUNTIME_CPU "runtime dispatch cpu routines" ON)
option(NCNN_DISABLE_PIC "disable position-independent code" OFF)
option(NCNN_BUILD_TESTS "build tests" OFF)
option(NCNN_BUILD_TESTS "build tests" ON)
option(NCNN_COVERAGE "build for coverage" OFF)
option(NCNN_ASAN "build for address sanitizer" OFF)
option(NCNN_BUILD_BENCHMARK "build benchmark" ON)
option(NCNN_PYTHON "build python api" OFF)
option(NCNN_INT8 "int8 inference" ON)
option(NCNN_BF16 "bf16 inference" ON)
option(NCNN_FORCE_INLINE "force inline some function" ON)
option(NCNN_MUTITHREAD "enable multi thread bata" ON)

if(ANDROID OR IOS OR NCNN_SIMPLESTL)
option(NCNN_DISABLE_RTTI "disable rtti" ON)
Expand Down
12 changes: 12 additions & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,13 @@ if(ANDROID)
list(APPEND ncnn_SRCS mat_pixel_android.cpp)
endif()

if(NCNN_MUTITHREAD)
list(APPEND ncnn_SRCS thread.cpp)
if(WIN32)
list(APPEND ncnn_SRCS TheadInfo.cpp)
endif()
endif()

ncnn_src_group(ncnn_SRCS "sources")

include_directories("${CMAKE_CURRENT_SOURCE_DIR}/layer/${NCNN_TARGET_ARCH}")
Expand Down Expand Up @@ -266,6 +273,11 @@ if(NCNN_THREADS)
target_link_libraries(ncnn PUBLIC pthread)
endif()
endif()
if(NCNN_MUTITHREAD)
if(NOT WIN32 AND (NOT NCNN_SIMPLEOMP) AND (NOT NCNN_SIMPLESTL))
target_link_libraries(ncnn PUBLIC -pthread)
endif()
endif()

if(NCNN_VULKAN)
if(NCNN_SIMPLEVK)
Expand Down
69 changes: 69 additions & 0 deletions src/TheadInfo.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#ifdef NCNN_MUTITHREAD
#ifdef _WIN32

#include "TheadInfo.h"
namespace ncnn {

// 初始化静态成员
ThreadInfo* ThreadInfo::thread_info = nullptr;

ThreadInfo::ThreadInfo(/* args */)
{
int groupCount = GetActiveProcessorGroupCount();
for (WORD group = 0; group < groupCount; group++)
{
DWORD processorsInGroup = GetActiveProcessorCount(group);
for (int i = 0; i < static_cast<int>(processorsInGroup); i++)
{
CoreInfo info;
info.group = group;
info.id = i + core_infos.size();
info.affinity = (static_cast<DWORD_PTR>(1) << i);
core_infos.push_back(info);
}
}
}

ThreadInfo* ThreadInfo::get()
{
static Mutex lock;
AutoLock guard(lock);

if (!thread_info)
{
thread_info = new ThreadInfo();
}
return thread_info;
}

CoreInfo ThreadInfo::getCurrentCore()
{
// 获取当前线程运行的CPU核心(支持多处理器组)
DWORD_PTR process_affinity, system_affinity;
GetProcessAffinityMask(GetCurrentProcess(), &process_affinity, &system_affinity);

// 使用扩展API获取处理器组信息
PROCESSOR_NUMBER proc_num;
GetCurrentProcessorNumberEx(&proc_num);

for (const auto& core : core_infos)
{
// 匹配组号和组内核心编号
if (core.group == proc_num.Group && (core.affinity & (1ULL << proc_num.Number)))
{
return core;
}
}

// 未找到时返回默认值
return {-1, -1, 0};
}

void ThreadInfo::getAllCore(std::vector<CoreInfo>& out)
{
out = core_infos;
}
} // namespace ncnn

#endif
#endif
30 changes: 30 additions & 0 deletions src/TheadInfo.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#ifndef THREAD_INFO_H
#define THREAD_INFO_H
#ifdef NCNN_MUTITHREAD
#if defined _WIN32
#include "cpu.h"
namespace ncnn {
struct CoreInfo
{
public:
int id;
int group;
DWORD_PTR affinity;
};
class ThreadInfo
{
private:
static ThreadInfo* thread_info;
std::vector<CoreInfo> core_infos;
ThreadInfo(/* args */);

public:
static ThreadInfo* get();
CoreInfo getCurrentCore();
void getAllCore(std::vector<CoreInfo>& out);
};
} // namespace ncnn

#endif
#endif
#endif
54 changes: 51 additions & 3 deletions src/cpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1424,12 +1424,21 @@ static std::vector<int> get_max_freq_mhz()

static int set_sched_affinity(const ncnn::CpuSet& thread_affinity_mask)
{
#ifdef _WIN32
GROUP_AFFINITY groupAffinity;
ZeroMemory(&groupAffinity, sizeof(groupAffinity));
groupAffinity.Group = static_cast<WORD>(thread_affinity_mask.cpu_group);
groupAffinity.Mask = thread_affinity_mask.mask;

SetThreadGroupAffinity(GetCurrentThread(), &groupAffinity, NULL);
#else
DWORD_PTR prev_mask = SetThreadAffinityMask(GetCurrentThread(), thread_affinity_mask.mask);
if (prev_mask == 0)
{
NCNN_LOGE("SetThreadAffinityMask failed %d", GetLastError());
return -1;
}
#endif

return 0;
}
Expand Down Expand Up @@ -2266,22 +2275,27 @@ CpuSet::CpuSet()

void CpuSet::enable(int cpu)
{
mask |= ((ULONG_PTR)1 << cpu);
cpu_group = cpu / 64;
mask |= ((ULONG_PTR)1 << (cpu - cpu_group * 64));
}

void CpuSet::disable(int cpu)
{
mask &= ~((ULONG_PTR)1 << cpu);
cpu_group = cpu / 64;
mask &= ~((ULONG_PTR)1 << (cpu - cpu_group * 64));
}

void CpuSet::disable_all()
{
cpu_group = 0;
mask = 0;
}

bool CpuSet::is_enabled(int cpu) const
{
return mask & ((ULONG_PTR)1 << cpu);
if (cpu_group != cpu / 64)
return false;
return mask & ((ULONG_PTR)1 << (cpu - cpu_group * 64));
}

int CpuSet::num_enabled() const
Expand Down Expand Up @@ -3266,4 +3280,38 @@ int set_flush_denormals(int flush_denormals)
#endif
}

int get_multi_thread_batch()
{
#if defined(_NCNN_MUTITHREAD)
#if defined _WIN32
DWORD length = 0;
GetLogicalProcessorInformation(NULL, &length);
if (GetLastError() != ERROR_INSUFFICIENT_BUFFER)
return 0;

PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION)malloc(length);

int count = 0;
if (GetLogicalProcessorInformation(buffer, &length))
{
DWORD offset = 0;
while (offset < length)
{
if (buffer->Relationship == RelationProcessorCore)
count++;

offset += sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION);
buffer++;
}
}
free(buffer);
return count;
#else
return get_cpu_count();
#endif
#else
return get_cpu_count();
#endif
}

} // namespace ncnn
5 changes: 5 additions & 0 deletions src/cpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

#if defined _WIN32
#define WIN32_LEAN_AND_MEAN
#define _WIN32_WINNT 0x0601 // Windows 7+
#include <windows.h>
#endif
#if defined __ANDROID__ || defined __linux__
Expand All @@ -30,6 +31,7 @@ class NCNN_EXPORT CpuSet

public:
#if defined _WIN32
int cpu_group;
ULONG_PTR mask;
#endif
#if defined __ANDROID__ || defined __linux__
Expand Down Expand Up @@ -172,6 +174,9 @@ NCNN_EXPORT void set_kmp_blocktime(int time_ms);
NCNN_EXPORT int get_flush_denormals();
NCNN_EXPORT int set_flush_denormals(int flush_denormals);

// multi thread batch inference
NCNN_EXPORT int get_multi_thread_batch();

} // namespace ncnn

#endif // NCNN_CPU_H
5 changes: 5 additions & 0 deletions src/layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,11 @@ int Layer::forward_inplace(Mat& /*bottom_top_blob*/, const Option& /*opt*/) cons
return -1;
}

int Layer::forward_thread(void* /*info*/) const
{
return -1;
}

#if NCNN_VULKAN
int Layer::upload_model(VkTransfer& /*cmd*/, const Option& /*opt*/)
{
Expand Down
5 changes: 5 additions & 0 deletions src/layer.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,10 @@ class NCNN_EXPORT Layer
// return 0 if success
virtual int forward_inplace(std::vector<Mat>& bottom_top_blobs, const Option& opt) const;
virtual int forward_inplace(Mat& bottom_top_blob, const Option& opt) const;
/// @brief mutithread work function
/// @param workspace thread infomation
/// @return 0 if success
virtual int forward_thread(void* workspace);

#if NCNN_VULKAN
public:
Expand Down Expand Up @@ -139,6 +143,7 @@ class NCNN_EXPORT Layer
// layer factory function
typedef Layer* (*layer_creator_func)(void*);
typedef void (*layer_destroyer_func)(Layer*, void*);
typedef int (*layer_work_func)(Layer*, void*);

struct layer_registry_entry
{
Expand Down
54 changes: 54 additions & 0 deletions src/layer/absval.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// SPDX-License-Identifier: BSD-3-Clause

#include "absval.h"
#include "thread.h"

namespace ncnn {

Expand All @@ -17,6 +18,16 @@ int AbsVal::forward_inplace(Mat& bottom_top_blob, const Option& opt) const
int h = bottom_top_blob.h;
int channels = bottom_top_blob.c;
int size = w * h;
if (opt.num_threads > 64)
{
ThreadWorkspace workspace;
workspace.layer = (Layer*)this;
MutilThread thread(workspace, opt);
std::vector<Mat> workspace_blobs;
workspace_blobs.push_back(bottom_top_blob);
thread.join(workspace_blobs);
return 0;
}

#pragma omp parallel for num_threads(opt.num_threads)
for (int q = 0; q < channels; q++)
Expand All @@ -33,4 +44,47 @@ int AbsVal::forward_inplace(Mat& bottom_top_blob, const Option& opt) const
return 0;
}

int AbsVal::forward_thread(void* workspace)
{
ThreadInfoExc* info = (ThreadInfoExc*)workspace;
Mat& bottom_top_blob = info->mats->at(0);
if (bottom_top_blob.elemsize == 1)
{
int8_t* ptr = (int8_t*)bottom_top_blob.data;
const int8_t flag = 1 << 7;
for (size_t i = info->start_index; i < info->end_index; i++)
{
if (ptr[i] & flag)
{
ptr[i] = -ptr[i];
}
}
}
else if (bottom_top_blob.elemsize == 2)
{
int16_t* ptr = (int16_t*)bottom_top_blob.data;
const int16_t flag = 1 << 15;
for (size_t i = info->start_index; i < info->end_index; i++)
{
if (ptr[i] & flag)
{
ptr[i] = -ptr[i];
}
}
}
else
{
float* ptr = (float*)bottom_top_blob.data;
for (size_t i = info->start_index; i < info->end_index; i++)
{
if (ptr[i] < 0)
{
ptr[i] = -ptr[i];
}
}
}

return 0;
}

} // namespace ncnn
1 change: 1 addition & 0 deletions src/layer/absval.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ class AbsVal : public Layer
AbsVal();

virtual int forward_inplace(Mat& bottom_top_blob, const Option& opt) const;
virtual int forward_thread(void* workspace);
};

} // namespace ncnn
Expand Down
2 changes: 2 additions & 0 deletions src/layer/batchnorm.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ class BatchNorm : public Layer

virtual int forward_inplace(Mat& bottom_top_blob, const Option& opt) const;

virtual int forward_thread(void* workspace);

public:
// param
int channels;
Expand Down
1 change: 1 addition & 0 deletions src/platform.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
#cmakedefine01 NCNN_INT8
#cmakedefine01 NCNN_BF16
#cmakedefine01 NCNN_FORCE_INLINE
#cmakedefine01 NCNN_MUTITHREAD

#cmakedefine NCNN_VERSION_STRING "@NCNN_VERSION_STRING@"

Expand Down
Loading