Skip to content

fork ProcessGroupNCCL #134

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[submodule "third-party/nccl"]
path = third_party/nccl
url = https://github.com/NVIDIA/nccl.git
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ tonic = "0.12.2"

[build-dependencies]
tonic-build = "0.12.2"
cmake = "0.1"

[lib]
name = "torchft"
Expand Down
3 changes: 3 additions & 0 deletions build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,8 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
tonic_build::configure()
.protoc_arg("--experimental_allow_proto3_optional")
.compile_protos(&["proto/torchft.proto"], &["proto"])?;

let dst = cmake::build("csrc");

Ok(())
}
75 changes: 75 additions & 0 deletions cmake/External/nccl.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
if(NOT __NCCL_INCLUDED)
set(__NCCL_INCLUDED TRUE)

if(USE_SYSTEM_NCCL)
# NCCL_ROOT, NCCL_LIB_DIR, NCCL_INCLUDE_DIR will be accounted in the following line.
find_package(NCCL REQUIRED)
if(NCCL_FOUND)
add_library(__caffe2_nccl INTERFACE)
target_link_libraries(__caffe2_nccl INTERFACE ${NCCL_LIBRARIES})
target_include_directories(__caffe2_nccl INTERFACE ${NCCL_INCLUDE_DIRS})
endif()
else()
cuda_select_nvcc_arch_flags(NVCC_GENCODE ${TORCH_CUDA_ARCH_LIST})

string(REPLACE "-gencode;" "-gencode=" NVCC_GENCODE "${NVCC_GENCODE}")
# this second replacement is needed when there are multiple archs
string(REPLACE ";-gencode" " -gencode" NVCC_GENCODE "${NVCC_GENCODE}")

if(DEFINED ENV{MAX_JOBS})
set(MAX_JOBS "$ENV{MAX_JOBS}")
else()
include(ProcessorCount)
ProcessorCount(NUM_HARDWARE_THREADS)
# Assume 2 hardware threads per cpu core
math(EXPR MAX_JOBS "${NUM_HARDWARE_THREADS} / 2")
# ProcessorCount might return 0, set to a positive number
if(MAX_JOBS LESS 2)
set(MAX_JOBS 2)
endif()
endif()

if("${CMAKE_GENERATOR}" MATCHES "Make")
# Recursive make with jobserver for parallelism, and also put a load limit
# here to avoid flaky OOM, https://www.gnu.org/software/make/manual/html_node/Parallel.html
set(MAKE_COMMAND "$(MAKE)" "-l${MAX_JOBS}")
else()
# Parallel build with CPU load limit to avoid oversubscription
set(MAKE_COMMAND "make" "-j${MAX_JOBS}" "-l${MAX_JOBS}")
endif()

set(__NCCL_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/nccl")
ExternalProject_Add(nccl_external
SOURCE_DIR ${PROJECT_SOURCE_DIR}/../third_party/nccl
BUILD_IN_SOURCE 1
CONFIGURE_COMMAND ""
BUILD_COMMAND
${MAKE_COMMAND}
"CXX=${CMAKE_CXX_COMPILER}"
"CUDA_HOME=${CUDA_TOOLKIT_ROOT_DIR}"
"NVCC=${CUDA_NVCC_EXECUTABLE}"
"NVCC_GENCODE=${NVCC_GENCODE}"
"BUILDDIR=${__NCCL_BUILD_DIR}"
"VERBOSE=0"
"DEBUG=0"
BUILD_BYPRODUCTS "${__NCCL_BUILD_DIR}/lib/libnccl_static.a"
INSTALL_COMMAND ""
)

set(__NCCL_LIBRARY_DEP nccl_external)
set(NCCL_LIBRARIES ${__NCCL_BUILD_DIR}/lib/libnccl_static.a)

set(NCCL_FOUND TRUE)
add_library(__caffe2_nccl INTERFACE)
# The following old-style variables are set so that other libs, such as Gloo,
# can still use it.
set(NCCL_INCLUDE_DIRS ${__NCCL_BUILD_DIR}/include)
add_dependencies(__caffe2_nccl ${__NCCL_LIBRARY_DEP})
target_link_libraries(__caffe2_nccl INTERFACE ${NCCL_LIBRARIES})
target_include_directories(__caffe2_nccl INTERFACE ${NCCL_INCLUDE_DIRS})
# nccl includes calls to shm_open/shm_close and therefore must depend on librt on Linux
if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
target_link_libraries(__caffe2_nccl INTERFACE rt)
endif()
endif()
endif()
54 changes: 54 additions & 0 deletions csrc/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
cmake_minimum_required(VERSION 3.4...3.18)
project(_torchft_cpp)

# Python
find_package (Python3 COMPONENTS Interpreter Development)
set(PYTORCH_ROOT "${Python3_SITELIB}")
include_directories(BEFORE "${Python3_INCLUDE_DIRS}")

# CUDA
find_package(CUDA REQUIRED)
include_directories(BEFORE "${CUDA_INCLUDE_DIRS}")
message(STATUS "CUDA_VERSION: ${CUDA_VERSION}")

set(TORCH_CUDA_ARCH_LIST "5.0;6.0;7.0;7.5;8.0;8.6;9.0")

# NCCL
include(ExternalProject)
include(${CMAKE_CURRENT_LIST_DIR}/../cmake/External/nccl.cmake)

# torch
# if pytorch was installed in develop mode we need to resolve the egg-link
set(PYTORCH_EGG_LINK "${PYTORCH_ROOT}/torch.egg-link")
if (EXISTS "${PYTORCH_EGG_LINK}")
file (STRINGS "${PYTORCH_EGG_LINK}" PYTORCH_ROOT LIMIT_COUNT 1)
endif()

message(STATUS "PYTORCH_ROOT: ${PYTORCH_ROOT}" )

include_directories(BEFORE "${PYTORCH_ROOT}/torch/include")
include_directories(BEFORE "${PYTORCH_ROOT}/torch/include/torch/csrc/api/include/")
LINK_DIRECTORIES("${PYTORCH_ROOT}/torch/lib")

#include_directories(BEFORE "${Python3_SITELIB}/triton/backends/nvidia/include/")
#include_directories(BEFORE "${Python3_SITELIB}/nvidia/cuda_runtime/include/")
#include_directories(BEFORE "${Python3_SITELIB}/nvidia/cusparse/include/")

add_definitions(-DUSE_C10D_NCCL)

add_library(${PROJECT_NAME} SHARED ProcessGroupNCCL.cpp init.cpp NCCLUtils.cpp cuda_utils.cpp)

target_link_libraries(${PROJECT_NAME} ${CUDA_LIBRARIES})
target_link_libraries(${PROJECT_NAME} __caffe2_nccl)
target_link_libraries(${PROJECT_NAME} torch_cpu torch_cuda c10_cuda)

set_target_properties(${PROJECT_NAME} PROPERTIES
PREFIX ""
SUFFIX ".${Python3_SOABI}.so"
)


install(
TARGETS ${PROJECT_NAME}
DESTINATION "${PROJECT_SOURCE_DIR}/../torchft"
)
Loading
Loading