diff --git a/ggml/include/ggml-cann.h b/ggml/include/ggml-cann.h index a15d6020bbaac0..99a813affd9b59 100644 --- a/ggml/include/ggml-cann.h +++ b/ggml/include/ggml-cann.h @@ -1,46 +1,148 @@ +/* + * Copyright (c) 2023-2024 The ggml authors + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + #pragma once #include "ggml-backend.h" #include "ggml.h" -#define GGML_CANN_NAME "CANN" - #ifdef __cplusplus extern "C" { #endif +/** + * @def GGML_CANN_NAME + * @brief Define for the name of the CANN backend. + */ +#define GGML_CANN_NAME "CANN" + +/** + * @brief Maximum number of CANN devices supported. + */ #define GGML_CANN_MAX_DEVICES 16 +/** + * @brief Structure for QK4_0 data format. + */ #define QK4_0 32 typedef struct { - uint16_t d; // delta - uint8_t qs[QK4_0 / 2]; // nibbles / quants + uint16_t d; /**< Delta */ + uint8_t qs[QK4_0 / 2]; /**< Nibbles / quants */ } block_q4_0; - +/** + * @brief Structure for QK8_0 data format. + */ #define QK8_0 32 typedef struct { - uint16_t d; // delta - int8_t qs[QK8_0]; // quants + uint16_t d; /**< Delta */ + int8_t qs[QK8_0]; /**< Quants */ } block_q8_0; -// backend API +/** + * @brief Initializes the CANN backend for a specified device. + * + * This function initializes the CANN backend for the given device. + * It verifies the device index, allocates a context, and creates a backend + * instance. + * + * @param device The index of the device to initialize. + * @return A pointer to the initialized backend instance, or nullptr on failure. + */ GGML_API GGML_CALL ggml_backend_t ggml_backend_cann_init(int32_t device); +/** + * @brief Checks if a given backend is a CANN backend. + * + * This function verifies if the provided backend is a CANN backend by comparing + * its GUID with the CANN backend's GUID. + * + * @param backend The backend instance to check. + * @return True if the backend is a CANN backend, false otherwise. + */ GGML_API GGML_CALL bool ggml_backend_is_cann(ggml_backend_t backend); -// device buffer +/** + * @brief Retrieves the CANN buffer type for a specified device. + * + * This function initializes and returns the buffer type interface associated + * with the given device. It ensures thread-safe access using a mutex. + * + * @param device The device index for which to retrieve the buffer type. + * @return A pointer to the buffer type interface for the specified device, or + * nullptr if the device index is out of range. + */ GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_cann_buffer_type(int32_t device); +/** + * @brief Retrieves the number of CANN devices available. + * + * This function returns the number of CANN devices available based on + * information obtained from `ggml_cann_info()`. + * + * @return The number of CANN devices available. + */ GGML_API GGML_CALL int32_t ggml_backend_cann_get_device_count(void); + +/** + * @brief Retrieves the description of a specific CANN device. + * + * This function sets the specified device, retrieves the SoC name, + * and writes it into the provided description buffer. + * + * @param device The device index to retrieve the description for. + * @param description Pointer to a buffer where the description will be written. + * @param description_size Size of the description buffer. + */ GGML_API GGML_CALL void ggml_backend_cann_get_device_description( int32_t device, char* description, size_t description_size); + +/** + * @brief Retrieves the memory information of a specific CANN device. + * + * This function sets the specified device, retrieves the free and total + * memory information of the specified type (ACL_HBM_MEM), and stores them + * in the provided pointers. + * + * @param device The device index to retrieve memory information for. + * @param free Pointer to a variable where the free memory size will be stored. + * @param total Pointer to a variable where the total memory size will be + * stored. + */ GGML_API GGML_CALL void ggml_backend_cann_get_device_memory(int32_t device, size_t* free, size_t* total); + +/** + * @brief Initializes resources required by the CANN backend. + */ void ggml_cann_backend_init(void); + +/** + * @brief Frees resources used by the CANN backend. + */ void ggml_cann_backend_free(void); + #ifdef __cplusplus } -#endif +#endif \ No newline at end of file diff --git a/ggml/src/ggml-cann.cpp b/ggml/src/ggml-cann.cpp index 844f497252a4db..15465bc422c601 100644 --- a/ggml/src/ggml-cann.cpp +++ b/ggml/src/ggml-cann.cpp @@ -1,3 +1,25 @@ +/* + * Copyright (c) 2023-2024 The ggml authors + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + #include "ggml-cann.h" #include @@ -11,6 +33,15 @@ #include "ggml-cann/aclnn_ops.h" #include "ggml-cann/common.h" +/** + * @brief Handles CANN errors by printing an error message and aborting. + * + * @param stmt The statement that caused the error. + * @param func The function in which the error occurred. + * @param file The file in which the error occurred. + * @param line The line number where the error occurred. + * @param msg The error message. + */ [[noreturn]] void ggml_cann_error(const char* stmt, const char* func, const char* file, int line, const char* msg) { int32_t id = -1; @@ -24,6 +55,11 @@ GGML_ASSERT(!"CANN error"); } +/** + * @brief Sets the device to be used by CANN. + * + * @param device The device ID to set. + */ void ggml_cann_set_device(const int32_t device) { // TODO: uncomment these lines after empty context has fixed. // int current_device; @@ -35,12 +71,25 @@ void ggml_cann_set_device(const int32_t device) { ACL_CHECK(aclrtSetDevice(device)); } +/** + * @brief Retrieves the current device ID. + * + * @return The current device ID. + */ int32_t ggml_cann_get_device() { int32_t id; ACL_CHECK(aclrtGetDevice(&id)); return id; } +/** + * @brief Initialize the CANN device information. + * + * This function initializes the CANN device information by obtaining the + * device count and setting the memory allocation granularity for each device. + * + * @return A structure containing the device information. + */ static ggml_cann_device_info ggml_cann_init() { ggml_cann_device_info info = {}; @@ -62,41 +111,78 @@ static ggml_cann_device_info ggml_cann_init() { prop.location.type = ACL_MEM_LOCATION_TYPE_DEVICE; prop.location.id = id; prop.reserve = 0; - ACL_CHECK(aclrtMemGetAllocationGranularity(&prop, ACL_RT_MEM_ALLOC_GRANULARITY_RECOMMENDED, &info.devices[id].vmm_granularity)); + ACL_CHECK(aclrtMemGetAllocationGranularity( + &prop, ACL_RT_MEM_ALLOC_GRANULARITY_RECOMMENDED, + &info.devices[id].vmm_granularity)); } // TODO: add more device info later. return info; } +/** + * @brief Retrieve the CANN device information. + * + * This function returns a reference to a structure containing the CANN device + * information. The device information is initialized once and reused on + * subsequent calls. + * + * @return A reference to the structure containing the device information. + */ const ggml_cann_device_info& ggml_cann_info() { static ggml_cann_device_info info = ggml_cann_init(); return info; } -//#define DEBUG_CANN_MALLOC - -// buffer pool for cann (legacy) +// #define DEBUG_CANN_MALLOC +/** + * @brief A pool of CANN buffers(legacy). + * + * This class manages a pool of CANN buffers for a specific device. + */ struct ggml_cann_pool_leg : public ggml_cann_pool { + /** + * @brief The maximum number of buffers in the pool. + */ static const int MAX_BUFFERS = 256; + /** + * @brief The device ID associated with this buffer pool. + */ int device; + + /** + * @brief Structure representing a CANN buffer. + */ struct ggml_cann_buffer { - void * ptr = nullptr; - size_t size = 0; + void* ptr = nullptr; ///< Pointer to the buffer memory. + size_t size = 0; ///< Size of the buffer. }; + /** + * @brief Array of CANN buffers in the pool. + */ ggml_cann_buffer buffer_pool[MAX_BUFFERS] = {}; + + /** + * @brief Total size of all buffers in the pool. + */ size_t pool_size = 0; - explicit ggml_cann_pool_leg(int device) : - device(device) { - } + /** + * @brief Constructor to initialize the buffer pool for a specific device. + * + * @param device The device ID to associate with this buffer pool. + */ + explicit ggml_cann_pool_leg(int device) : device(device) {} + /** + * @brief Destructor to free all buffers in the pool. + */ ~ggml_cann_pool_leg() { ggml_cann_set_device(device); for (int i = 0; i < MAX_BUFFERS; ++i) { - ggml_cann_buffer & b = buffer_pool[i]; + ggml_cann_buffer& b = buffer_pool[i]; if (b.ptr != nullptr) { ACL_CHECK(aclrtFree(b.ptr)); pool_size -= b.size; @@ -105,7 +191,15 @@ struct ggml_cann_pool_leg : public ggml_cann_pool { GGML_ASSERT(pool_size == 0); } - void * alloc(size_t size, size_t * actual_size) override { + /** + * @brief Allocate a buffer of the given size. + * + * @param size The size of the buffer to allocate. + * @param actual_size A pointer to a variable to receive the actual size of + * the allocated buffer. + * @return A pointer to the allocated buffer. + */ + void* alloc(size_t size, size_t* actual_size) override { #ifdef DEBUG_CANN_MALLOC int nnz = 0; size_t max_size = 0; @@ -125,7 +219,7 @@ struct ggml_cann_pool_leg : public ggml_cann_pool { best_diff = diff; ibest = i; if (!best_diff) { - void * ptr = b.ptr; + void* ptr = b.ptr; *actual_size = b.size; b.ptr = nullptr; b.size = 0; @@ -137,27 +231,38 @@ struct ggml_cann_pool_leg : public ggml_cann_pool { } if (ibest >= 0) { ggml_cann_buffer& b = buffer_pool[ibest]; - void * ptr = b.ptr; + void* ptr = b.ptr; *actual_size = b.size; b.ptr = nullptr; b.size = 0; return ptr; } - void * ptr; - size_t look_ahead_size = (size_t) (1.05 * size); - look_ahead_size = 256 * ((look_ahead_size + 255)/256); + void* ptr; + size_t look_ahead_size = (size_t)(1.05 * size); + look_ahead_size = 256 * ((look_ahead_size + 255) / 256); ggml_cann_set_device(device); - ACL_CHECK(aclrtMalloc(&ptr, look_ahead_size, ACL_MEM_MALLOC_HUGE_FIRST)); + ACL_CHECK( + aclrtMalloc(&ptr, look_ahead_size, ACL_MEM_MALLOC_HUGE_FIRST)); *actual_size = look_ahead_size; pool_size += look_ahead_size; #ifdef DEBUG_CANN_MALLOC - printf("%s[%d]: %d buffers, max_size = %u MB, pool_size = %u MB, requested %u MB\n", __func__, device, nnz, - (uint32_t)(max_size / 1024 / 1024), (uint32_t)(pool_size / 1024 / 1024), (uint32_t)(size / 1024 / 1024)); + printf( + "%s[%d]: %d buffers, max_size = %u MB, pool_size = %u MB, " + "requested %u MB\n", + __func__, device, nnz, (uint32_t)(max_size / 1024 / 1024), + (uint32_t)(pool_size / 1024 / 1024), + (uint32_t)(size / 1024 / 1024)); #endif return ptr; } - void free(void * ptr, size_t size) override { + /** + * @brief Free a buffer and return it to the pool. + * + * @param ptr Pointer to the buffer to free. + * @param size Size of the buffer to free. + */ + void free(void* ptr, size_t size) override { for (int i = 0; i < MAX_BUFFERS; ++i) { ggml_cann_buffer& b = buffer_pool[i]; if (b.ptr == nullptr) { @@ -173,26 +278,69 @@ struct ggml_cann_pool_leg : public ggml_cann_pool { } }; -// pool with virtual memory +/** + * @brief A pool of CANN buffers with virtual memory. + * + * This class manages a pool of CANN buffers with virtual memory for a specific + * device. + */ struct ggml_cann_pool_vmm : public ggml_cann_pool { - static const size_t CANN_POOL_VMM_MAX_SIZE = 1ull << 35; // 32 GB - + /** + * @brief The maximum size of the virtual memory pool (32 GB). + */ + static const size_t CANN_POOL_VMM_MAX_SIZE = 1ull << 35; // 32 GB + + /** + * @brief The device ID associated with this buffer pool. + */ int device; + + /** + * @brief Pointer to the start of the virtual memory pool. + */ void* pool_addr = 0; + + /** + * @brief Amount of virtual memory used in the pool. + */ size_t pool_used = 0; + + /** + * @brief Total size of the virtual memory pool. + */ size_t pool_size = 0; + + /** + * @brief Allocation granularity for the virtual memory pool. + */ size_t granularity; + + /** + * @brief Handles for the physical memory allocated. + */ std::vector handles; + + /** + * @brief Offsets for the mapped memory regions. + */ std::vector map_offsets; - explicit ggml_cann_pool_vmm(int device) : - device(device), - granularity(ggml_cann_info().devices[device].vmm_granularity) { - } + /** + * @brief Constructor to initialize the buffer pool with virtual memory for + * a specific device. + * + * @param device The device ID to associate with this buffer pool. + */ + explicit ggml_cann_pool_vmm(int device) + : device(device), + granularity(ggml_cann_info().devices[device].vmm_granularity) {} + /** + * @brief Destructor to free all buffers in the virtual memory pool. + */ ~ggml_cann_pool_vmm() { if (pool_addr != 0) { - for(auto& offset : map_offsets){ + for (auto& offset : map_offsets) { ACL_CHECK(aclrtUnmapMem(offset)); } for (auto& handle : handles) { @@ -202,8 +350,17 @@ struct ggml_cann_pool_vmm : public ggml_cann_pool { } } - void * alloc(size_t size, size_t * actual_size) override { - // round up the allocation size to the alignment to ensure that all allocations are aligned for all data types + /** + * @brief Allocate a buffer of the given size in the virtual memory pool. + * + * @param size The size of the buffer to allocate. + * @param actual_size A pointer to a variable to receive the actual size of + * the allocated buffer. + * @return A pointer to the allocated buffer. + */ + void* alloc(size_t size, size_t* actual_size) override { + // round up the allocation size to the alignment to ensure that all + // allocations are aligned for all data types const size_t alignment = 128; size = alignment * ((size + alignment - 1) / alignment); @@ -212,7 +369,8 @@ struct ggml_cann_pool_vmm : public ggml_cann_pool { if (size > avail) { // round up to the next multiple of the granularity size_t reserve_size = size - avail; - reserve_size = granularity * ((reserve_size + granularity - 1) / granularity); + reserve_size = + granularity * ((reserve_size + granularity - 1) / granularity); GGML_ASSERT(pool_size + reserve_size <= CANN_POOL_VMM_MAX_SIZE); @@ -229,11 +387,13 @@ struct ggml_cann_pool_vmm : public ggml_cann_pool { // reserve virtual address space (if not already reserved) if (pool_addr == 0) { - ACL_CHECK(aclrtReserveMemAddress(&pool_addr, CANN_POOL_VMM_MAX_SIZE, 0, NULL, 1)); + ACL_CHECK(aclrtReserveMemAddress( + &pool_addr, CANN_POOL_VMM_MAX_SIZE, 0, NULL, 1)); } // map at the end of the pool - ACL_CHECK(aclrtMapMem((char*)pool_addr + pool_size, reserve_size, 0, handle, 0)); + ACL_CHECK(aclrtMapMem((char*)pool_addr + pool_size, reserve_size, 0, + handle, 0)); handles.push_back(handle); map_offsets.push_back((char*)pool_addr + pool_size); @@ -241,56 +401,98 @@ struct ggml_cann_pool_vmm : public ggml_cann_pool { // add to the pool pool_size += reserve_size; - //printf("cann pool[%d]: size increased to %llu MB (reserved %llu MB)\n", + // printf("cann pool[%d]: size increased to %llu MB (reserved %llu + // MB)\n", // device, (unsigned long long) (pool_size/1024/1024), // (unsigned long long) (reserve_size/1024/1024)); } GGML_ASSERT(pool_addr != 0); - void * ptr = (void *) ((char*)pool_addr + pool_used); + void* ptr = (void*)((char*)pool_addr + pool_used); *actual_size = size; pool_used += size; #ifdef DEBUG_CANN_MALLOC - printf("cann pool[%d]: allocated %llu bytes at %llx\n", device, (unsigned long long) size, (unsigned long long) ptr); + printf("cann pool[%d]: allocated %llu bytes at %llx\n", device, + (unsigned long long)size, (unsigned long long)ptr); #endif return ptr; } - void free(void * ptr, size_t size) override { + /** + * @brief Free a buffer and return it to the virtual memory pool. + * + * @param ptr Pointer to the buffer to free. + * @param size Size of the buffer to free. + */ + void free(void* ptr, size_t size) override { #ifdef DEBUG_CANN_MALLOC - printf("cann pool[%d]: freed %llu bytes at %llx\n", device, (unsigned long long) size, (unsigned long long) ptr); + printf("cann pool[%d]: freed %llu bytes at %llx\n", device, + (unsigned long long)size, (unsigned long long)ptr); #endif pool_used -= size; // all deallocations must be in reverse order of the allocations - GGML_ASSERT(ptr == (void *) ((char*)pool_addr + pool_used)); + GGML_ASSERT(ptr == (void*)((char*)pool_addr + pool_used)); } }; -std::unique_ptr ggml_backend_cann_context::new_pool_for_device(int device) { - //return std::unique_ptr(new ggml_cann_pool_leg(device)); +/** + * @brief Create a new CANN pool for a specific device. + * + * Factory method to create a new CANN pool object based on the device type. + * + * @param device The device ID for which to create the pool. + * @return A unique pointer to the created CANN pool. + */ +std::unique_ptr ggml_backend_cann_context::new_pool_for_device( + int device) { + // return std::unique_ptr(new ggml_cann_pool_leg(device)); return std::unique_ptr(new ggml_cann_pool_vmm(device)); } // cann buffer +/** + * @brief Context for managing a CANN buffer associated with a specific device. + * + * This structure holds information about a CANN buffer, including the device + * ID, device pointer, and a name derived from GGML_CANN_NAME and the device ID. + */ struct ggml_backend_cann_buffer_context { - int32_t device; - void* dev_ptr = nullptr; - std::string name; - + int32_t device; ///< The device ID associated with this buffer context. + void* dev_ptr = + nullptr; ///< Pointer to the device memory allocated for the buffer. + std::string name; ///< Name of the buffer context. + + /** + * @brief Constructor to initialize the CANN buffer context. + * + * @param device The device ID associated with this buffer context. + * @param dev_ptr Pointer to the device memory allocated for the buffer. + */ ggml_backend_cann_buffer_context(int32_t device, void* dev_ptr) : device(device), dev_ptr(dev_ptr), name(GGML_CANN_NAME + std::to_string(device)) {} - ~ggml_backend_cann_buffer_context() { - ACL_CHECK(aclrtFree(dev_ptr)); - } + /** + * @brief Destructor to free the device memory allocated for the buffer. + */ + ~ggml_backend_cann_buffer_context() { ACL_CHECK(aclrtFree(dev_ptr)); } }; +/** + * @brief Retrieve the name associated with a CANN buffer. + * + * This function returns the name of a CANN buffer, which is stored in the + * context of the buffer. + * + * @param buffer The CANN buffer whose name is to be retrieved. + * @return A pointer to a C-string containing the name of the buffer. + */ + GGML_CALL static const char* ggml_backend_cann_buffer_get_name( ggml_backend_buffer_t buffer) { ggml_backend_cann_buffer_context* ctx = @@ -298,11 +500,28 @@ GGML_CALL static const char* ggml_backend_cann_buffer_get_name( return ctx->name.c_str(); } +/** + * @brief Check if a buffer is a CANN buffer. + * + * This function checks if a given buffer is a CANN buffer by comparing its + * `get_name` function pointer to `ggml_backend_cann_buffer_get_name`. + * + * @param buffer The buffer to check. + * @return true if the buffer is a CANN buffer, false otherwise. + */ GGML_CALL static bool ggml_backend_buffer_is_cann( ggml_backend_buffer_t buffer) { return buffer->iface.get_name == ggml_backend_cann_buffer_get_name; } +/** + * @brief Free resources associated with a CANN buffer. + * + * This function frees the resources associated with a CANN buffer, including + * its context. + * + * @param buffer The CANN buffer to free. + */ GGML_CALL static void ggml_backend_cann_buffer_free_buffer( ggml_backend_buffer_t buffer) { ggml_backend_cann_buffer_context* ctx = @@ -310,6 +529,15 @@ GGML_CALL static void ggml_backend_cann_buffer_free_buffer( delete ctx; } +/** + * @brief Retrieve the base pointer of a CANN buffer. + * + * This function returns the base pointer of a CANN buffer, which points to the + * device memory allocated for the buffer. + * + * @param buffer The CANN buffer whose base pointer is to be retrieved. + * @return A pointer to the base of the device memory allocated for the buffer. + */ GGML_CALL static void* ggml_backend_cann_buffer_get_base( ggml_backend_buffer_t buffer) { ggml_backend_cann_buffer_context* ctx = @@ -317,6 +545,19 @@ GGML_CALL static void* ggml_backend_cann_buffer_get_base( return ctx->dev_ptr; } +/** + * @brief Transform quantized Q4.0 tensor data into a format suitable for CANN + * processing. + * + * This function transforms quantized Q4.0 tensor data into a format suitable + * for CANN processing. It extracts quantization values and scales from the + * source data and prepares them in a format expected by CANN operations. + * + * @param tensor Pointer to the tensor information. + * @param src Pointer to the source data in Q4.0 format. + * @param dst Pointer to the destination buffer where transformed data will be + * stored. + */ GGML_CALL static void ggml_backend_cann_transform_q4_0(ggml_tensor* tensor, const void* src, void* dst) { @@ -330,7 +571,8 @@ GGML_CALL static void ggml_backend_cann_transform_q4_0(ggml_tensor* tensor, uint16_t* scale_offset = (uint16_t*)((char*)dst + quant_bytes); for (int i = 0; i < groups; i++) { - const block_q4_0* group = (const block_q4_0*)((const char*)src + i * sizeof(block_q4_0)); + const block_q4_0* group = + (const block_q4_0*)((const char*)src + i * sizeof(block_q4_0)); *scale_offset = group->d; scale_offset++; @@ -356,6 +598,19 @@ GGML_CALL static void ggml_backend_cann_transform_q4_0(ggml_tensor* tensor, } } +/** + * @brief Transform CANN processed data back into quantized Q4.0 format. + * + * This function transforms CANN processed data back into quantized Q4.0 format. + * It reverses the transformation performed by + * ggml_backend_cann_transform_q4_0(), converting the data back into its + * original quantized form. + * + * @param tensor Pointer to the tensor information. + * @param src Pointer to the source buffer containing transformed data. + * @param dst Pointer to the destination buffer where the Q4.0 formatted data + * will be stored. + */ GGML_CALL static void ggml_backend_cann_transform_back_q4_0( const ggml_tensor* tensor, void* src, void* dst) { GGML_ASSERT(tensor->op == GGML_OP_NONE); @@ -367,7 +622,7 @@ GGML_CALL static void ggml_backend_cann_transform_back_q4_0( uint8_t* quant_offset = (uint8_t*)src; uint16_t* scale_offset = (uint16_t*)((char*)src + quant_bytes); - for (;quant_offset < (uint8_t*)src + quant_bytes; quant_offset++) { + for (; quant_offset < (uint8_t*)src + quant_bytes; quant_offset++) { (*quant_offset) ^= 0x88; } quant_offset = (uint8_t*)src; @@ -393,6 +648,19 @@ GGML_CALL static void ggml_backend_cann_transform_back_q4_0( } } +/** + * @brief Transform quantized Q8.0 tensor data into a format suitable for CANN + * processing. + * + * This function transforms quantized Q8.0 tensor data into a format suitable + * for CANN processing. It extracts quantization values and scales from the + * source data and prepares them in a format expected by CANN operations. + * + * @param tensor Pointer to the tensor information. + * @param src Pointer to the source data in Q8.0 format. + * @param dst Pointer to the destination buffer where transformed data will be + * stored. + */ GGML_CALL static void ggml_backend_cann_transform_q8_0(ggml_tensor* tensor, const void* src, void* dst) { @@ -404,7 +672,8 @@ GGML_CALL static void ggml_backend_cann_transform_q8_0(ggml_tensor* tensor, uint16_t* scale_offset = (uint16_t*)((char*)dst + quant_bytes); for (int i = 0; i < groups; i++) { - const block_q8_0* group = (const block_q8_0*)((const char*)src + i * sizeof(block_q8_0)); + const block_q8_0* group = + (const block_q8_0*)((const char*)src + i * sizeof(block_q8_0)); *scale_offset = group->d; scale_offset++; size_t group_quant_size = QK8_0 * sizeof(uint8_t); @@ -413,6 +682,19 @@ GGML_CALL static void ggml_backend_cann_transform_q8_0(ggml_tensor* tensor, } } +/** + * @brief Transform CANN processed data back into quantized Q8.0 format. + * + * This function transforms CANN processed data back into quantized Q8.0 format. + * It reverses the transformation performed by + * ggml_backend_cann_transform_q8_0(), converting the data back into its + * original quantized form. + * + * @param tensor Pointer to the tensor information. + * @param src Pointer to the source buffer containing transformed data. + * @param dst Pointer to the destination buffer where the Q8.0 formatted data + * will be stored. + */ GGML_CALL static void ggml_backend_cann_transform_back_q8_0( const ggml_tensor* tensor, const void* src, void* dst) { int64_t n_elems = ggml_nelements(tensor); @@ -420,7 +702,8 @@ GGML_CALL static void ggml_backend_cann_transform_back_q8_0( size_t quant_bytes = n_elems * sizeof(uint8_t); const uint8_t* quant_offset = (const uint8_t*)src; - const uint16_t* scale_offset = (const uint16_t*)((const char*)src + quant_bytes); + const uint16_t* scale_offset = + (const uint16_t*)((const char*)src + quant_bytes); for (int i = 0; i < groups; i++) { block_q8_0* group = (block_q8_0*)((char*)dst + i * sizeof(block_q8_0)); @@ -432,6 +715,18 @@ GGML_CALL static void ggml_backend_cann_transform_back_q8_0( } } +/** + * @brief Transform tensor data based on its type for CANN processing. + * + * This function transforms tensor data based on its quantization type for CANN + * processing. It dispatches the transformation based on the tensor's type to + * specialized functions handling Q4.0 and Q8.0 formats. + * + * @param tensor Pointer to the tensor information. + * @param src Pointer to the source data to be transformed. + * @param dst Pointer to the destination buffer where transformed data will be + * stored. + */ GGML_CALL static void ggml_backend_cann_transform(ggml_tensor* tensor, const void* src, void* dst) { switch (tensor->type) { @@ -446,6 +741,18 @@ GGML_CALL static void ggml_backend_cann_transform(ggml_tensor* tensor, } } +/** + * @brief Transform CANN processed data back into tensor data based on its type. + * + * This function transforms CANN processed data back into tensor data based on + * its quantization type for Q4.0 and Q8.0 formats. It dispatches the + * transformation based on the tensor's type to specialized functions. + * + * @param tensor Pointer to the tensor information. + * @param src Pointer to the source data containing CANN processed data. + * @param dst Pointer to the destination buffer where transformed tensor data + * will be stored. + */ GGML_CALL static void ggml_backend_cann_transform_back( const ggml_tensor* tensor, void* src, void* dst) { switch (tensor->type) { @@ -460,6 +767,15 @@ GGML_CALL static void ggml_backend_cann_transform_back( } } +/** + * @brief Check if transformation is needed for a given tensor type. + * + * This function checks if transformation is needed for a given tensor type + * to prepare data for CANN processing. + * + * @param type The tensor type to check. + * @return true if transformation is needed, false otherwise. + */ GGML_CALL static bool need_transform(ggml_type type) { switch (type) { case GGML_TYPE_Q4_0: @@ -470,6 +786,15 @@ GGML_CALL static bool need_transform(ggml_type type) { } } +/** + * @brief Initialize a tensor using data from a CANN buffer. + * + * This function initializes a tensor using data from a CANN buffer. + * It handles special cases such as views and quantization. + * + * @param buffer The CANN buffer from which to initialize the tensor. + * @param tensor Pointer to the tensor to be initialized. + */ GGML_CALL static void ggml_backend_cann_buffer_init_tensor( ggml_backend_buffer_t buffer, ggml_tensor* tensor) { if (tensor->view_src != NULL && tensor->view_offs == 0) { @@ -493,11 +818,23 @@ GGML_CALL static void ggml_backend_cann_buffer_init_tensor( } } -// TODO: need handle tensor which pas paddings. +// TODO: need handle tensor which has paddings. +/** + * @brief Set tensor data in a CANN buffer. + * + * This function sets tensor data in a CANN buffer, handling transformations + * if needed based on the tensor's type. + * + * @param buffer The CANN buffer where the tensor data will be set. + * @param tensor Pointer to the tensor whose data will be set. + * @param data Pointer to the source data to be copied into the tensor. + * @param offset Offset in the source data from where to start copying. + * @param size Size of the data to be copied, in bytes. + */ GGML_CALL static void ggml_backend_cann_buffer_set_tensor( ggml_backend_buffer_t buffer, ggml_tensor* tensor, const void* data, size_t offset, size_t size) { - //GGML_ASSERT(size == ggml_nbytes(tensor)); + // GGML_ASSERT(size == ggml_nbytes(tensor)); ggml_backend_cann_buffer_context* ctx = (ggml_backend_cann_buffer_context*)buffer->context; @@ -507,8 +844,8 @@ GGML_CALL static void ggml_backend_cann_buffer_set_tensor( // Why aclrtSynchronizeDevice? if (!need_transform(tensor->type)) { - ACL_CHECK(aclrtMemcpy(tensor->data, size, (const char*)data + offset, size, - ACL_MEMCPY_HOST_TO_DEVICE)); + ACL_CHECK(aclrtMemcpy(tensor->data, size, (const char*)data + offset, + size, ACL_MEMCPY_HOST_TO_DEVICE)); } else { void* transform_buffer = malloc(size); ggml_backend_cann_transform(tensor, (const char*)data + offset, @@ -518,7 +855,8 @@ GGML_CALL static void ggml_backend_cann_buffer_set_tensor( void* check_buffer = malloc(size); ggml_backend_cann_transform_back(tensor, transform_buffer, check_buffer); - GGML_ASSERT(memcmp((const char*)data + offset, check_buffer, size) == 0); + GGML_ASSERT(memcmp((const char*)data + offset, check_buffer, size) == + 0); free(check_buffer); #endif ACL_CHECK(aclrtMemcpy(tensor->data, size, transform_buffer, size, @@ -527,6 +865,19 @@ GGML_CALL static void ggml_backend_cann_buffer_set_tensor( } } +/** + * @brief Get tensor data from a CANN buffer. + * + * This function retrieves tensor data from a CANN buffer, handling + * transformations if needed based on the tensor's type. + * + * @param buffer The CANN buffer from which to retrieve tensor data. + * @param tensor Pointer to the tensor whose data will be retrieved. + * @param data Pointer to the destination buffer where the tensor data will be + * copied. + * @param offset Offset in the destination buffer where to start copying. + * @param size Size of the data to be copied, in bytes. + */ GGML_CALL static void ggml_backend_cann_buffer_get_tensor( ggml_backend_buffer_t buffer, const ggml_tensor* tensor, void* data, size_t offset, size_t size) { @@ -549,6 +900,19 @@ GGML_CALL static void ggml_backend_cann_buffer_get_tensor( } } +/** + * @brief Copy tensor data between CANN buffers if possible. + * + * This function copies tensor data between CANN buffers if the source and + * destination buffers are CANN buffers and they meet the necessary conditions + * (same device or devices can access each other). + * + * @param buffer The destination CANN buffer where the tensor data will be + * copied. + * @param src Pointer to the source tensor whose data will be copied. + * @param dst Pointer to the destination tensor where the data will be copied. + * @return true if the copy operation succeeded, false otherwise. + */ GGML_CALL static bool ggml_backend_cann_buffer_cpy_tensor( ggml_backend_buffer_t buffer, const ggml_tensor* src, ggml_tensor* dst) { if (ggml_backend_buffer_is_cann(src->buffer)) { @@ -582,6 +946,15 @@ GGML_CALL static bool ggml_backend_cann_buffer_cpy_tensor( return false; } +/** + * @brief Clear a CANN buffer by setting all its memory to a specified value. + * + * This function clears a CANN buffer by setting all its memory to a specified + * value. + * + * @param buffer The CANN buffer to be cleared. + * @param value The value to which each byte in the buffer will be set. + */ GGML_CALL static void ggml_backend_cann_buffer_clear( ggml_backend_buffer_t buffer, uint8_t value) { ggml_backend_cann_buffer_context* ctx = @@ -591,6 +964,12 @@ GGML_CALL static void ggml_backend_cann_buffer_clear( ACL_CHECK(aclrtMemset(ctx->dev_ptr, buffer->size, value, buffer->size)); } +/** + * @brief Interface for a CANN buffer in the backend. + * + * This structure defines function pointers to operations that can be performed + * on a CANN buffer within the backend. + */ static ggml_backend_buffer_i ggml_backend_cann_buffer_interface = { /* .get_name = */ ggml_backend_cann_buffer_get_name, /* .free_buffer = */ ggml_backend_cann_buffer_free_buffer, @@ -604,11 +983,25 @@ static ggml_backend_buffer_i ggml_backend_cann_buffer_interface = { }; // cann buffer type +/** + * @brief Structure representing context information for a specific backend + * buffer type. + */ struct ggml_backend_cann_buffer_type_context { - int32_t device; - std::string name; + int32_t + device; /**< Device identifier associated with the buffer context. */ + std::string name; /**< Name associated with the buffer context. */ }; +/** + * @brief Retrieves the name associated with a CANN buffer type. + * + * This function returns the descriptive name associated with the specified + * CANN buffer type context. + * + * @param buft Pointer to the buffer type context. + * @return Const pointer to the C-style string containing the name. + */ GGML_CALL static const char* ggml_backend_cann_buffer_type_name( ggml_backend_buffer_type_t buft) { ggml_backend_cann_buffer_type_context* ctx = @@ -617,6 +1010,16 @@ GGML_CALL static const char* ggml_backend_cann_buffer_type_name( return ctx->name.c_str(); } +/** + * @brief Allocates a new CANN buffer of the specified type and size. + * + * This function allocates a new CANN buffer on the specified device with the + * given size. + * + * @param buft Pointer to the buffer type context. + * @param size Size in bytes of the buffer to allocate. + * @return Pointer to the allocated buffer, or nullptr if allocation fails. + */ GGML_CALL static ggml_backend_buffer_t ggml_backend_cann_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) { @@ -645,6 +1048,18 @@ ggml_backend_cann_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, ctx, size); } +/** + * @brief Retrieves the memory alignment requirement for CANN buffers of this + * type. + * + * This function returns the alignment requirement in bytes for memory allocated + * by the CANN buffer type. + * + * @param buft Pointer to the buffer type context (unused in this + * implementation). + * @return The alignment requirement in bytes (fixed at 128 bytes for CANN + * buffers). + */ GGML_CALL static size_t ggml_backend_cann_buffer_type_get_alignment( ggml_backend_buffer_type_t buft) { return 128; @@ -652,6 +1067,20 @@ GGML_CALL static size_t ggml_backend_cann_buffer_type_get_alignment( GGML_UNUSED(buft); } +/** + * @brief Calculates the allocation size required for a tensor in a CANN buffer. + * + * Computes the total allocation size needed for storing the tensor's data in a + * CANN buffer, considering any necessary padding or adjustments for quantized + * types. + * + * @param buft Pointer to the buffer type context (unused in this + * implementation). + * @param tensor Pointer to the tensor for which the allocation size is + * calculated. + * @return The total allocation size in bytes required for the tensor in the + * CANN buffer. + */ GGML_CALL static size_t ggml_backend_cann_buffer_type_get_alloc_size( ggml_backend_buffer_type_t buft, const ggml_tensor* tensor) { size_t size = ggml_nbytes(tensor); @@ -678,6 +1107,12 @@ GGML_CALL static size_t ggml_backend_cann_buffer_type_get_alloc_size( GGML_UNUSED(buft); } +/** + * @brief Interface for managing CANN buffer types in the GGML backend. + * + * Provides function pointers for allocating, querying properties, and managing + * memory for CANN buffer types in the GGML backend. + */ static ggml_backend_buffer_type_i ggml_backend_cann_buffer_type_interface = { /* .get_name = */ ggml_backend_cann_buffer_type_name, /* .alloc_buffer = */ ggml_backend_cann_buffer_type_alloc_buffer, @@ -687,6 +1122,16 @@ static ggml_backend_buffer_type_i ggml_backend_cann_buffer_type_interface = { /* .is_host = */ NULL, }; +/** + * @brief Retrieves the CANN buffer type for a specified device. + * + * This function initializes and returns the buffer type interface associated + * with the given device. It ensures thread-safe access using a mutex. + * + * @param device The device index for which to retrieve the buffer type. + * @return A pointer to the buffer type interface for the specified device, or + * nullptr if the device index is out of range. + */ GGML_CALL ggml_backend_buffer_type_t ggml_backend_cann_buffer_type(int32_t device) { static std::mutex mutex; @@ -716,6 +1161,19 @@ ggml_backend_cann_buffer_type(int32_t device) { return &ggml_backend_cann_buffer_types[device]; } +/** + * @brief Computes the forward operation for a given tensor using CANN + * operations. + * + * This function selects the appropriate CANN operation based on the type of + * operation specified in the tensor and performs the computation. + * + * @param ctx The CANN context containing necessary resources and + * configurations. + * @param dst The destination tensor where the result of the computation will be + * stored. + * @return true if the computation was successful; false otherwise. + */ static bool ggml_cann_compute_forward(ggml_backend_cann_context& ctx, struct ggml_tensor* dst) { switch (dst->op) { @@ -857,6 +1315,15 @@ static bool ggml_cann_compute_forward(ggml_backend_cann_context& ctx, } // backend +/** + * @brief Retrieves the name associated with the CANN backend. + * + * This function returns the name assigned to the CANN backend, which is stored + * in the context of the provided backend structure. + * + * @param backend Pointer to the CANN backend structure. + * @return A pointer to a constant string representing the backend name. + */ GGML_CALL static const char* ggml_backend_cann_name(ggml_backend_t backend) { ggml_backend_cann_context* cann_ctx = (ggml_backend_cann_context*)backend->context; @@ -864,6 +1331,14 @@ GGML_CALL static const char* ggml_backend_cann_name(ggml_backend_t backend) { return cann_ctx->name.c_str(); } +/** + * @brief Frees resources associated with the CANN backend. + * + * This function releases resources associated with the CANN backend context + * and resets the device associated with the backend to its initial state. + * + * @param backend Pointer to the CANN backend structure to be freed. + */ GGML_CALL static void ggml_backend_cann_free(ggml_backend_t backend) { ggml_backend_cann_context* cann_ctx = (ggml_backend_cann_context*)backend->context; @@ -873,6 +1348,16 @@ GGML_CALL static void ggml_backend_cann_free(ggml_backend_t backend) { delete backend; } +/** + * @brief Retrieves the default buffer type associated with the CANN backend. + * + * This function returns the buffer type specific to the device associated + * with the CANN backend. It is used to allocate buffers for computations + * performed by the backend. + * + * @param backend Pointer to the CANN backend structure. + * @return Pointer to the buffer type structure for the CANN backend. + */ GGML_CALL static ggml_backend_buffer_type_t ggml_backend_cann_get_default_buffer_type(ggml_backend_t backend) { ggml_backend_cann_context* cann_ctx = @@ -881,6 +1366,19 @@ ggml_backend_cann_get_default_buffer_type(ggml_backend_t backend) { return ggml_backend_cann_buffer_type(cann_ctx->device); } +/** + * @brief Sets tensor data asynchronously in the CANN backend. + * + * This function asynchronously sets tensor data in the CANN backend. Depending + * on the tensor type, it may perform data transformations before copying data + * to the device. + * + * @param backend Pointer to the CANN backend structure. + * @param tensor Pointer to the tensor structure to set data for. + * @param data Pointer to the host data to copy to the tensor. + * @param offset Offset in bytes within the host data. + * @param size Size of the data to copy in bytes. + */ GGML_CALL static void ggml_backend_cann_set_tensor_async(ggml_backend_t backend, ggml_tensor* tensor, const void* data, @@ -890,9 +1388,9 @@ GGML_CALL static void ggml_backend_cann_set_tensor_async(ggml_backend_t backend, (ggml_backend_cann_context*)backend->context; if (!need_transform(tensor->type)) { - ACL_CHECK(aclrtMemcpyAsync(tensor->data, size, (const char*)data + offset, - size, ACL_MEMCPY_HOST_TO_DEVICE, - cann_ctx->stream())); + ACL_CHECK(aclrtMemcpyAsync( + tensor->data, size, (const char*)data + offset, size, + ACL_MEMCPY_HOST_TO_DEVICE, cann_ctx->stream())); } else { void* transform_buffer = malloc(size); ggml_backend_cann_transform(tensor, (const char*)data + offset, @@ -940,6 +1438,19 @@ GGML_CALL static void ggml_backend_cann_get_tensor_async( } } +/** + * @brief Asynchronously copies tensor data between CANN backends. + * + * This function copies tensor data asynchronously between two CANN backends. It + * checks if both tensors reside in CANN buffers and whether the devices support + * peer-to-peer access for direct copying. If not, it returns false. + * + * @param backend_src Pointer to the source CANN backend structure. + * @param backend_dst Pointer to the destination CANN backend structure. + * @param src Pointer to the source tensor to copy data from. + * @param dst Pointer to the destination tensor to copy data to. + * @return true if the copy operation succeeds, false otherwise. + */ GGML_CALL static bool ggml_backend_cann_cpy_tensor_async( ggml_backend_t backend_src, ggml_backend_t backend_dst, const ggml_tensor* src, ggml_tensor* dst) { @@ -1005,6 +1516,14 @@ GGML_CALL static bool ggml_backend_cann_cpy_tensor_async( return true; } +/** + * @brief Synchronizes a CANN backend. + * + * This function synchronizes the specified CANN backend by waiting for all + * operations in its associated stream to complete. + * + * @param backend Pointer to the CANN backend structure to synchronize. + */ GGML_CALL static void ggml_backend_cann_synchronize(ggml_backend_t backend) { ggml_backend_cann_context* cann_ctx = (ggml_backend_cann_context*)backend->context; @@ -1014,6 +1533,18 @@ GGML_CALL static void ggml_backend_cann_synchronize(ggml_backend_t backend) { ACL_CHECK(aclrtSynchronizeStream(cann_ctx->stream())); } +/** + * @brief Computes a computational graph using a CANN backend. + * + * This function computes the operations defined in the computational graph + * using the specified CANN backend. + * + * @param backend Pointer to the CANN backend structure to use for computation. + * @param cgraph Pointer to the computational graph structure containing nodes + * representing operations to be computed. + * @return enum ggml_status Returns GGML_STATUS_SUCCESS if computation + * completes successfully, otherwise an appropriate error status. + */ GGML_CALL static enum ggml_status ggml_backend_cann_graph_compute( ggml_backend_t backend, ggml_cgraph* cgraph) { ggml_backend_cann_context* cann_ctx = @@ -1040,6 +1571,18 @@ GGML_CALL static enum ggml_status ggml_backend_cann_graph_compute( return GGML_STATUS_SUCCESS; } +/** + * @brief Checks if the CANN backend supports a specific operation. + * + * This function checks whether the specified operation is supported by the + * CANN backend. + * + * @param backend Pointer to the CANN backend structure to check support for + * the operation. + * @param op Pointer to the tensor representing the operation to check. + * @return bool Returns true if the operation is supported by the backend, + * otherwise false. + */ GGML_CALL static bool ggml_backend_cann_supports_op(ggml_backend_t backend, const ggml_tensor* op) { switch (op->op) { @@ -1130,20 +1673,61 @@ GGML_CALL static bool ggml_backend_cann_supports_op(ggml_backend_t backend, GGML_UNUSED(backend); } +/** + * @brief Checks if the backend buffer type is associated with the CANN backend. + * + * This function checks whether the provided backend buffer type is associated + * with the CANN backend based on the comparison of its name retrieval function + * pointer. + * + * @param buft Pointer to the backend buffer type to check. + * @return bool Returns true if the buffer type is associated with the CANN + * backend, otherwise false. + */ static bool ggml_backend_buft_is_cann(ggml_backend_buffer_type_t buft) { return buft->iface.get_name == ggml_backend_cann_buffer_type_name; } -GGML_CALL static bool ggml_backend_cann_supports_buft(ggml_backend_t backend, ggml_backend_buffer_type_t buft) { +/** + * @brief Checks if the CANN backend supports a specific backend buffer type. + * + * This function determines whether the CANN backend supports the given backend + * buffer type by comparing the device context of the backend and buffer type. + * It returns true if the device associated with the buffer type matches the + * device associated with the backend. + * + * @param backend Pointer to the CANN backend. + * @param buft Pointer to the backend buffer type to check. + * @return bool Returns true if the CANN backend supports the buffer type, + * otherwise false. + */ +GGML_CALL static bool ggml_backend_cann_supports_buft( + ggml_backend_t backend, ggml_backend_buffer_type_t buft) { if (ggml_backend_buft_is_cann(buft)) { - ggml_backend_cann_context * cann_ctx = (ggml_backend_cann_context *)backend->context; - ggml_backend_cann_buffer_type_context * buft_ctx = (ggml_backend_cann_buffer_type_context *)buft->context; + ggml_backend_cann_context* cann_ctx = + (ggml_backend_cann_context*)backend->context; + ggml_backend_cann_buffer_type_context* buft_ctx = + (ggml_backend_cann_buffer_type_context*)buft->context; return buft_ctx->device == cann_ctx->device; } return false; } +/** + * @brief Determines if a tensor operation should be offloaded to the CANN + * backend. + * + * This function checks if a given tensor operation should be offloaded to the + * CANN backend based on the operation type and the size of the tensor. It + * returns true if the second dimension (ne[1]) of the tensor is greater than or + * equal to the minimum batch size and the operation is not GGML_OP_GET_ROWS. + * + * @param backend Pointer to the CANN backend. + * @param op Pointer to the tensor operation to check. + * @return bool Returns true if the operation should be offloaded, otherwise + * false. + */ GGML_CALL static bool ggml_backend_cann_offload_op(ggml_backend_t backend, const ggml_tensor* op) { const int min_batch_size = 32; @@ -1152,6 +1736,16 @@ GGML_CALL static bool ggml_backend_cann_offload_op(ggml_backend_t backend, return op->ne[1] >= min_batch_size && op->op != GGML_OP_GET_ROWS; } +/** + * @brief Creates a new event for the CANN backend. + * + * This function initializes a new event for the CANN backend by setting the + * device and creating an ACL runtime event. The created event is then wrapped + * in a ggml_backend_event structure and returned. + * + * @param backend Pointer to the CANN backend. + * @return ggml_backend_event_t Returns a pointer to the new event structure. + */ static ggml_backend_event_t ggml_backend_cann_event_new( ggml_backend_t backend) { ggml_backend_cann_context* cann_ctx = @@ -1168,12 +1762,28 @@ static ggml_backend_event_t ggml_backend_cann_event_new( }; } +/** + * @brief Frees a CANN backend event. + * + * This function destroys the ACL runtime event associated with the given CANN + * backend event and then deletes the event structure itself. + * + * @param event Pointer to the event structure to be freed. + */ static void ggml_backend_cann_event_free(ggml_backend_event_t event) { ACL_CHECK(aclrtDestroyEvent((aclrtEvent)event->context)); delete event; } +/** + * @brief Records an event on the CANN backend stream. + * + * This function records the given event on the ACL runtime stream associated + * with the backend context. + * + * @param event Pointer to the event structure to be recorded. + */ static void ggml_backend_cann_event_record(ggml_backend_event_t event) { ggml_backend_cann_context* cann_ctx = (ggml_backend_cann_context*)event->backend->context; @@ -1181,6 +1791,16 @@ static void ggml_backend_cann_event_record(ggml_backend_event_t event) { ACL_CHECK(aclrtRecordEvent((aclrtEvent)event->context, cann_ctx->stream())); } +/** + * @brief Waits for a recorded event to complete on the CANN backend stream. + * + * This function makes the given backend wait for the event to complete on its + * ACL runtime stream. + * + * @param backend Pointer to the backend structure. + * @param event Pointer to the event structure that the backend needs to wait + * for. + */ static void ggml_backend_cann_event_wait(ggml_backend_t backend, ggml_backend_event_t event) { ggml_backend_cann_context* cann_ctx = @@ -1194,10 +1814,24 @@ static void ggml_backend_cann_event_wait(ggml_backend_t backend, } } +/** + * @brief Synchronizes the given event on the CANN backend. + * + * This function waits for the specified event to complete on the ACL runtime. + * + * @param event Pointer to the event structure to be synchronized. + */ static void ggml_backend_cann_event_synchronize(ggml_backend_event_t event) { ACL_CHECK(aclrtSynchronizeEvent((aclrtEvent)event->context)); } +/** + * @brief Structure defining the interface for the CANN backend. + * + * This structure contains function pointers for various operations + * supported by the CANN backend, including name retrieval, memory + * management, tensor operations, synchronization, and event handling. + */ static ggml_backend_i ggml_backend_cann_interface = { /* .get_name = */ ggml_backend_cann_name, /* .free = */ ggml_backend_cann_free, @@ -1221,6 +1855,14 @@ static ggml_backend_i ggml_backend_cann_interface = { /* .event_synchronize = */ ggml_backend_cann_event_synchronize, }; +/** + * @brief Return the hardcoded GUID for the CANN backend. + * + * This function returns a static GUID which uniquely identifies the CANN + * backend. + * + * @return A pointer to the static GUID. + */ static ggml_guid_t ggml_backend_cann_guid() { static ggml_guid guid = {0xa1, 0x94, 0xaf, 0xac, 0xbd, 0x4f, 0x47, 0x34, 0xbe, 0x1a, 0x9e, 0x71, 0x1f, 0x9e, 0xed, 0x64}; @@ -1270,6 +1912,17 @@ GGML_CALL void ggml_backend_cann_get_device_memory(int32_t device, size_t* free, } // backend registry +/** + * @brief Initializes a CANN backend based on the provided parameters. + * + * This function initializes a CANN backend using the device index and then + * initializes the backend using `ggml_backend_cann_init`. + * + * @param params Parameters for initialization (unused in this implementation). + * @param user_data User data containing the device index to initialize the + * backend. + * @return ggml_backend_t The initialized CANN backend. + */ GGML_CALL static ggml_backend_t ggml_backend_reg_cann_init(const char* params, void* user_data) { ggml_backend_t cann_backend = @@ -1281,6 +1934,16 @@ GGML_CALL static ggml_backend_t ggml_backend_reg_cann_init(const char* params, extern "C" GGML_CALL int ggml_backend_cann_reg_devices(); +/** + * @brief Registers CANN (Ascend) devices as backend options. + * + * This function initializes ACL, retrieves the number of available CANN + * devices, and registers each device as a backend option using + * `ggml_backend_register`. Each device is given a unique name based on + * `GGML_CANN_NAME` followed by its index. + * + * @return int The number of CANN devices registered. + */ GGML_CALL int ggml_backend_cann_reg_devices() { aclInit(nullptr); uint32_t device_count = ggml_backend_cann_get_device_count(); diff --git a/ggml/src/ggml-cann/acl_tensor.cpp b/ggml/src/ggml-cann/acl_tensor.cpp index 40bbfeb5475115..17d1f56836eee8 100644 --- a/ggml/src/ggml-cann/acl_tensor.cpp +++ b/ggml/src/ggml-cann/acl_tensor.cpp @@ -1,4 +1,4 @@ -/** +/* * Copyright (c) 2023-2024 The ggml authors * * Permission is hereby granted, free of charge, to any person obtaining a copy diff --git a/ggml/src/ggml-cann/acl_tensor.h b/ggml/src/ggml-cann/acl_tensor.h index 3210684149c42a..3856695b889af7 100644 --- a/ggml/src/ggml-cann/acl_tensor.h +++ b/ggml/src/ggml-cann/acl_tensor.h @@ -1,15 +1,4 @@ -#ifndef CANN_ACL_TENSOR_H -#define CANN_ACL_TENSOR_H - -/** - * @file acl_tensor - * @brief This file contains related functions of ggml_tensor and acl_tensor. - * Contains conversion from ggml_tensor to acl_tensor, broadcast and other - * functions. - * @author hipudding - * @author wangshuai09 <391746016@qq.com> - * @date July 15, 2024 - * +/* * Copyright (c) 2023-2024 The ggml authors * * Permission is hereby granted, free of charge, to any person obtaining a copy @@ -31,6 +20,9 @@ * IN THE SOFTWARE. */ +#ifndef CANN_ACL_TENSOR_H +#define CANN_ACL_TENSOR_H + #include #include "common.h" @@ -42,7 +34,7 @@ * does not match any of the predefined ggml_types, the function returns * ACL_DT_UNDEFINED. * - * @param type The ggml_type to be mapped. + * @param type The ggml_type to be mapped. * @return The corresponding aclDataType. If the input type is not recognized, * ACL_DT_UNDEFINED is returned. */ @@ -140,6 +132,7 @@ bool need_bcast(const ggml_tensor* t0, const ggml_tensor* t1); * that src1 can be element-wise broadcasted to src0's shape. * * How it works: + * * if dim0 has padding. * a -> (2, 2) padding = 2 * a: [[1, 2, *, *] @@ -161,8 +154,9 @@ bool need_bcast(const ggml_tensor* t0, const ggml_tensor* t1); * b' : [[[1, 2], [3, 4], *, *] * [[5, 6], [7, 8], *, *]] * nb = (12, 6, 2, 1) + * \endcode * - * because dim1 in a inserted dim, should add nb for dim1, + * dim1 in a inserted dim, should add nb for dim1, * and all other nb moves to next in order. */ int64_t get_bcast_shape(const ggml_tensor* src0, const ggml_tensor* src1, diff --git a/ggml/src/ggml-cann/common.h b/ggml/src/ggml-cann/common.h index 11f8da1a3a3d0a..0989fc058cf3fa 100644 --- a/ggml/src/ggml-cann/common.h +++ b/ggml/src/ggml-cann/common.h @@ -1,3 +1,25 @@ +/* + * Copyright (c) 2023-2024 The ggml authors + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + #ifndef CANN_COMMON_H #define CANN_COMMON_H