diff --git a/.github/workflows/docs-deploy.yml b/.github/workflows/docs-deploy.yml index afeda954..e002d1d5 100644 --- a/.github/workflows/docs-deploy.yml +++ b/.github/workflows/docs-deploy.yml @@ -3,7 +3,6 @@ # To get started with mdBook see: https://rust-lang.github.io/mdBook/index.html # name: Deploy sphinx site to Pages -run-name: Deploy sphinx site to Pages on: # Runs on pushes targeting the default branch diff --git a/.github/workflows/linux-compileonly.yaml b/.github/workflows/linux-compileonly.yaml index 43a199e2..78cfa52c 100644 --- a/.github/workflows/linux-compileonly.yaml +++ b/.github/workflows/linux-compileonly.yaml @@ -66,5 +66,5 @@ jobs: cmake --build "$KOKKOS_BUILD" --parallel $(nproc) -t install - name: Build Kokkos Comm run: | - cmake -S "$COMM_SRC" -B "$COMM_BUILD" -DCMAKE_CXX_COMPILER="$KOKKOS_SRC/bin/nvcc_wrapper" -DKokkos_ROOT="$KOKKOS_INSTALL" -DCMAKE_BUILD_TYPE=Release + cmake -S "$COMM_SRC" -B "$COMM_BUILD" -DCMAKE_CXX_COMPILER="$KOKKOS_SRC/bin/nvcc_wrapper" -DKokkos_ROOT="$KOKKOS_INSTALL" -DCMAKE_BUILD_TYPE=Release -DKokkosComm_ENABLE_TESTS=ON -DKokkosComm_ENABLE_PERFTESTS=ON VERBOSE=1 cmake --build "$COMM_BUILD" diff --git a/.github/workflows/linux.yaml b/.github/workflows/linux.yaml index ef29f8fb..2d058ef8 100644 --- a/.github/workflows/linux.yaml +++ b/.github/workflows/linux.yaml @@ -62,7 +62,7 @@ jobs: COMM_SRC: ${{ github.workspace }} COMM_BUILD: ${{ github.workspace }}/build runs-on: ubuntu-latest - timeout-minutes: 5 + timeout-minutes: 10 steps: - name: Install MPI run: | diff --git a/CMakeLists.txt b/CMakeLists.txt index b5ef9c82..1644baf4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,11 +6,13 @@ project(KokkosComm VERSION 0.0.2) option(KokkosComm_ENABLE_PERFTESTS "Build KokkosComm perf tests" OFF) option(KokkosComm_ENABLE_TESTS "Build KokkosComm perf tests" OFF) +option(KokkosComm_ENABLE_MPI "Build KokkosComm with MPI transport" ON) ## resolve options set(KOKKOSCOMM_ENABLE_PERFTESTS ${KokkosComm_ENABLE_PERFTESTS} CACHE BOOL "" FORCE) set(KOKKOSCOMM_ENABLE_TESTS ${KokkosComm_ENABLE_TESTS} CACHE BOOL "" FORCE) +set(KOKKOSCOMM_ENABLE_MPI ${KokkosComm_ENABLE_MPI} CACHE BOOL "" FORCE) find_package(Kokkos REQUIRED) find_package(MPI REQUIRED) diff --git a/cmake/Config.cmake.in b/cmake/Config.cmake.in index 877f3e24..47388a45 100644 --- a/cmake/Config.cmake.in +++ b/cmake/Config.cmake.in @@ -8,6 +8,8 @@ include(CMakeFindDependencyMacro) find_dependency(MPI) find_dependency(Kokkos) +set(KOKKOSCOMM_ENABLE_MPI @KOKKOSCOMM_ENABLE_MPI@) + ## FIXME: do we need this? set(KokkosComm_INCLUDE_DIR "@CMAKE_INSTALL_FULL_INCLUDEDIR@" ) set(KokkosComm_DATA_DIR "@CMAKE_INSTALL_PREFIX@/@RELATIVE_DATA_INSTALL_DIR@" ) diff --git a/cmake/KokkosComm_config.hpp.in b/cmake/KokkosComm_config.hpp.in index 9664a347..81bf9a1c 100644 --- a/cmake/KokkosComm_config.hpp.in +++ b/cmake/KokkosComm_config.hpp.in @@ -19,3 +19,5 @@ #define KOKKOSCOMM_VERSION_MAJOR @KOKKOSCOMM_VERSION_MAJOR@ #define KOKKOSCOMM_VERSION_MINOR @KOKKOSCOMM_VERSION_MINOR@ #define KOKKOSCOMM_VERSION_PATCH @KOKKOSCOMM_VERSION_PATCH@ + +#cmakedefine KOKKOSCOMM_ENABLE_MPI diff --git a/docs/CONTRIBUTING.rst b/docs/CONTRIBUTING.rst index 7638de84..4393b0f3 100644 --- a/docs/CONTRIBUTING.rst +++ b/docs/CONTRIBUTING.rst @@ -27,7 +27,7 @@ Alternatively, you can use docker/podman: (expects $PWD to be the kokkos-comm tr .. code-block:: bash shopt -s globstar - podman run -v $PWD:/src xianpengshen/clang-tools:14 clang-format -i {src,unit_tests,perf_tests}/**/*.[ch]pp + podman run --rm -v ${PWD}:/src ghcr.io/cwpearson/clang-format-14 clang-format -i {src,unit_tests,perf_tests}/**/*.[ch]pp Site-Specific Documentation --------------------------- diff --git a/docs/api/core.rst b/docs/api/core.rst index f998ed91..743ea41b 100644 --- a/docs/api/core.rst +++ b/docs/api/core.rst @@ -1,162 +1,118 @@ Core ==== -.. list-table:: MPI API Support - :widths: 40 30 15 - :header-rows: 1 - - * - MPI - - ``KokkosComm::`` - - ``Kokkos::View`` - * - ``MPI_Send`` - - ``send`` or ``send(KokkosComm::DefaultCommMode{}, ...)`` - - ✓ - * - ``MPI_Rsend`` - - ``send(KokkosComm::ReadyCommMode{}, ...)`` - - ✓ - * - ``MPI_Recv`` - - ``recv`` - - ✓ - * - ``MPI_Ssend`` - - ``send(KokkosComm::SynchronousCommMode{}, ...)`` - - ✓ - * - ``MPI_Isend`` - - ``isend`` or ``isend(KokkosComm::DefaultCommMode{}, ...)`` - - ✓ - * - ``MPI_Irsend`` - - ``isend(KokkosComm::ReadyCommMode{}, ...)`` - - ✓ - * - ``MPI_Issend`` - - ``isend(KokkosComm::SynchronousCommMode{}, ...)`` - - ✓ - * - ``MPI_Reduce`` - - ``reduce`` - - ✓ - Point-to-point -------------- -.. cpp:function:: template \ - Req KokkosComm::isend(const ExecSpace &space, const SendView &sv, int dest, int tag, MPI_Comm comm) +.. cpp:namespace:: KokkosComm - Wrapper for ``MPI_Isend``, ``MPI_Irsend`` and ``MPI_Issend``. +.. cpp:function:: template Req send(Handle &h, SendView &sv, int dest) - :param mode: The communication mode to use - :param space: The execution space to operate in - :param sv: The data to send - :param dest: the destination rank - :param tag: the MPI tag - :param comm: the MPI communicator - :tparam IsendMode: A communication mode to use, one of: ``KokkosComm::DefaultCommMode``, ``KokkosComm::StandardCommMode``, ``KokkosComm::SynchronousCommMode`` or ``KokkosComm::ReadyCommMode`` (modeled with the ``KokkosComm::CommunicationMode`` concept) - :tparam SendView: A Kokkos::View to send - :tparam ExecSpace: A Kokkos execution space to operate in - :returns: A KokkosComm::Req representing the asynchronous communication and any lifetime-extended views. + Initiates a non-blocking send operation. -.. cpp:function:: template \ - void KokkosComm::send(const ExecSpace &space, const SendView &sv, int dest, int tag, MPI_Comm comm) + .. warning:: + This is not a blocking operation despite being named like ``MPI_Send``. - Wrapper for ``MPI_Send``, ``MPI_Rsend`` and ``MPI_Ssend``. + :tparam SendView: The type of the Kokkos view to send. + :tparam ExecSpace: The execution space to use. Defaults to Kokkos::DefaultExecutionSpace. + :tparam TRANSPORT: The transport mechanism to use. Defaults to DefaultTransport. - :param mode: The communication mode to use - :param space: The execution space to operate in - :param sv: The data to send - :param dest: the destination rank - :param tag: the MPI tag - :param comm: the MPI communicator - :tparam SendMode: A communication mode to use, one of: ``KokkosComm::DefaultCommMode``, ``KokkosComm::StandardCommMode``, ``KokkosComm::SynchronousCommMode`` or ``KokkosComm::ReadyCommMode`` (modeled with the ``KokkosComm::CommunicationMode`` concept) - :tparam SendView: A Kokkos::View to send - :tparam ExecSpace: A Kokkos execution space to operate in + :param h: A handle to the execution space and transport mechanism. + :param sv: The Kokkos view to send. + :param dest: The destination rank. -.. cpp:function:: template \ - void KokkosComm::recv(const ExecSpace &space, RecvView &rv, int src, int tag, MPI_Comm comm) + :return: A request object for the non-blocking send operation. - MPI_Recv wrapper +.. cpp:function:: template Req send(SendView &sv, int dest) - :param space: The execution space to operate in - :param srv: The data to recv - :param src: the source rank - :param tag: the MPI tag - :param comm: the MPI communicator - :tparam Recv: A Kokkos::View to send - :tparam ExecSpace: A Kokkos execution space to operate in + Initiates a non-blocking send operation using a default handle. + .. warning:: + This is not a blocking operation despite being named like ``MPI_Send``. -Collective ----------- + :tparam SendView: The type of the Kokkos view to send. + :tparam ExecSpace: The execution space to use. Defaults to Kokkos::DefaultExecutionSpace. + :tparam TRANSPORT: The transport mechanism to use. Defaults to DefaultTransport. -.. cpp:function:: template \ - void KokkosComm::reduce(const ExecSpace &space, const SendView &sv, const RecvView &rv, MPI_Op op, int root, MPI_Comm comm) + :param sv: The Kokkos view to send. + :param dest: The destination rank. - MPI_Reduce wrapper + :return: A request object for the non-blocking send operation. - :param space: The execution space to operate in - :param sv: The data to send - :param rv: The view to receive into - :param op: The MPI_Op to use in the reduction - :param root: The root rank for the reduction - :param comm: the MPI communicator - :tparam SendView: A Kokkos::View to send - :tparam RecvView: A Kokkos::View to recv - :tparam ExecSpace: A Kokkos execution space to operate in + Example usage: +.. literalinclude:: core_send.cpp + :language: cpp -.. cpp:function:: template \ - void KokkosComm::allgather(const ExecSpace &space, const SendView &sv, const RecvView &rv, MPI_Comm comm) - MPI_Allgather wrapper - :param space: The execution space to operate in - :param sv: The data to send - :param rv: The view to receive into - :param comm: the MPI communicator - :tparam SendView: A Kokkos::View to send. Contiguous and rank less than 2. - :tparam RecvView: A Kokkos::View to recv. Contiguous and rank 1. - :tparam ExecSpace: A Kokkos execution space to operate in +.. cpp:function:: template Req recv(Handle &h, RecvView &rv, int src) - If ``sv`` is a rank-0 view, the value from the jth rank will be placed in index j of ``rv``. + Initiates a non-blocking receive operation. -Related Types -------------- + .. warning:: + This is not a blocking operation despite being named like ``MPI_Recv``. + + :tparam RecvView: The type of the Kokkos view for receiving data. + :tparam ExecSpace: The execution space where the operation will be performed. Defaults to `Kokkos::DefaultExecutionSpace`. + :tparam TRANSPORT: The transport mechanism to be used. Defaults to `DefaultTransport`. + + :param h: A handle to the execution space and transport mechanism. + :param rv: The Kokkos view where the received data will be stored. + :param src: The source rank from which to receive data. + + :return: A request object of type `Req` representing the non-blocking receive operation. + + This function initiates a non-blocking receive operation using the specified execution space and transport mechanism. The data will be received into the provided view from the specified source rank and message tag. The function returns a request object that can be used to check the status of the receive operation or to wait for its completion. -Communication Modes -^^^^^^^^^^^^^^^^^^^ + Example usage: -Structures to specify the mode of an operation. Buffered mode is not supported. +.. literalinclude:: core_recv.cpp + :language: cpp -.. cpp:struct:: KokkosComm::StandardCommMode - Let the MPI implementation decide whether outgoing messages will be buffered. Send operations can be started whether or not a matching receive has been started. They may complete before a matching receive begins. Standard mode is non-local: successful completion of the send operation may depend on the occurrence of a matching receive. -.. cpp:struct:: KokkosComm::SynchronousCommMode - Send operations complete successfully only if a matching receive is started, and the receive operation has started to receive the message sent. -.. cpp:struct:: KokkosComm::ReadyCommMode +.. cpp:function:: template Req recv(RecvView &rv, int src) - Send operations may be started only if the matching receive is already started. + Initiates a non-blocking receive operation using a default handle. -.. cpp:struct:: KokkosComm::DefaultCommMode + .. warning:: + This is not a blocking operation despite being named like ``MPI_Recv``. - The default mode is aliased as ``Standard`` but lets users override the behavior of operations at compile-time using the ``KOKKOSCOMM_FORCE_SYNCHRONOUS_MODE`` pre-processor definition. The latter forces ``Synchronous`` mode for all "default-mode" operations, which can be helpful for debugging purposes, e.g., asserting that the communication scheme is correct. + :tparam RecvView: The type of the Kokkos view for receiving data. + :tparam ExecSpace: The execution space where the operation will be performed. Defaults to `Kokkos::DefaultExecutionSpace`. + :tparam TRANSPORT: The transport mechanism to be used. Defaults to `DefaultTransport`. + :param rv: The Kokkos view where the received data will be stored. + :param src: The source rank from which to receive data. -Requests -^^^^^^^^ + :return: A request object of type `Req` representing the non-blocking receive operation. -.. cpp:class:: KokkosComm::Req - A wrapper around an MPI_Request that can also extend the lifetime of Views. +Collective +---------- + +.. cpp:namespace:: KokkosComm + +.. cpp:function:: template void barrier(Handle &&h) + + A function to create a barrier using the given execution space and transport handle. - .. cpp:function:: MPI_Request &KokkosComm::Req::mpi_req() + :tparam ExecSpace: The execution space to be used. Defaults to `Kokkos::DefaultExecutionSpace`. + :tparam TRANSPORT: The transport mechanism to be used. Defaults to `DefaultTransport`. + :param h: A handle of type `Handle` to be forwarded to the barrier implementation. - Retrieve a reference to the held MPI_Request. - .. cpp:function:: void KokkosComm::Req::wait() - Call MPI_Wait on the held MPI_Request and drop copies of any previous arguments to Req::keep_until_wait(). +Related Types +------------- + +.. cpp:namespace:: KokkosComm + +.. cpp:class:: template Req - .. cpp:function:: template \ - void KokkosComm::Req::keep_until_wait(const View &v) + A template class to handle requests with different transport types. - Extend the lifetime of v at least until Req::wait() is called. - This is useful to prevent a View from being destroyed during an asynchronous MPI operation. + :tparam TRANSPORT: The type of transport. Defaults to :cpp:enumerator:`KokkosComm::DefaultTransport`. diff --git a/docs/api/core_recv.cpp b/docs/api/core_recv.cpp new file mode 100644 index 00000000..8b949bca --- /dev/null +++ b/docs/api/core_recv.cpp @@ -0,0 +1,4 @@ +Handle<> handle; +Kokkos::View recv_view("recv_view", 100); +auto req = recv(handle, recv_view, 1/*src*/); +KokkosComm::wait(req); \ No newline at end of file diff --git a/docs/api/core_send.cpp b/docs/api/core_send.cpp new file mode 100644 index 00000000..b23d28fe --- /dev/null +++ b/docs/api/core_send.cpp @@ -0,0 +1,29 @@ +#include + +// Define the execution space and transport +using ExecSpace = Kokkos::DefaultExecutionSpace; +using Transport = DefaultTransport; + +// Create a Kokkos view +Kokkos::View data("data", 100); + +// Fill the view with some data +Kokkos::parallel_for("fill_data", Kokkos::RangePolicy(0, 100), KOKKOS_LAMBDA(int i) { + data(i) = static_cast(i); +}); + +// Destination rank and message tag +int dest = 1; + +// Create a handle +KokkosComm::Handle<> handle; // Same as Handle + +// Initiate a non-blocking send with a handle +auto req1 = send(handle, data, dest); + +// Initiate a non-blocking send with a default handle +auto req2 = send(data, dest); + +// Wait for the requests to complete (assuming a wait function exists) +KokkosComm::wait(req1); +KokkosComm::wait(req2); \ No newline at end of file diff --git a/docs/api/mpi.rst b/docs/api/mpi.rst new file mode 100644 index 00000000..9f6b5748 --- /dev/null +++ b/docs/api/mpi.rst @@ -0,0 +1,225 @@ +MPI +==== + +.. list-table:: MPI API Support + :widths: 40 30 15 + :header-rows: 1 + + * - MPI + - ``KokkosComm::mpi::`` + - ``Kokkos::View`` + * - ``MPI_Send`` + - ``send`` or ``send`` + - ✓ + * - ``MPI_Rsend`` + - ``send`` + - ✓ + * - ``MPI_Recv`` + - ``recv`` + - ✓ + * - ``MPI_Ssend`` + - ``send`` + - ✓ + * - ``MPI_Isend`` + - ``isend`` or ``isend`` + - ✓ + * - ``MPI_Irsend`` + - ``isend`` + - ✓ + * - ``MPI_Issend`` + - ``isend`` + - ✓ + * - ``MPI_Reduce`` + - ``reduce`` + - ✓ + +Point-to-point +-------------- + +.. cpp:namespace:: KokkosComm::mpi + +.. cpp:function:: template Req isend(Handle &h, const SendView &sv, int dest, int tag) + + Initiates a non-blocking send operation. + + :tparam SendMode: The communication mode. + :tparam ExecSpace: The execution space. + :tparam SendView: The type of the view to be sent. + :param h: The handle for the execution space and MPI. + :param sv: The view to be sent. + :param dest: The destination rank. + :param tag: The message tag. + :return: A request object for the non-blocking send operation. + +.. cpp:function:: template void irecv(const RecvView &rv, int src, int tag, MPI_Comm comm, MPI_Request &req) + + Initiates a non-blocking receive operation. + + :tparam RecvView: The type of the view to be received. + :param rv: The view to be received. + :param src: The source rank. + :param tag: The message tag. + :param comm: The MPI communicator. + :param req: The MPI request object for the non-blocking receive operation. + :throws std::runtime_error: If the view is not contiguous. + +.. cpp:function:: template void send(const SendView &sv, int dest, int tag, MPI_Comm comm) + + Initiates a blocking send operation. + + :tparam SendView: The type of the view to be sent. + :param sv: The view to be sent. + :param dest: The destination rank. + :param tag: The message tag. + :param comm: The MPI communicator. + +.. cpp:function:: template void send(const ExecSpace &space, const SendView &sv, int dest, int tag, MPI_Comm comm) + + Initiates a blocking send operation with a specified execution space and communication mode. + + :tparam SendMode: The communication mode (default is CommMode::Default). + :tparam ExecSpace: The execution space. + :tparam SendView: The type of the view to be sent. + :param space: The execution space. + :param sv: The view to be sent. + :param dest: The destination rank. + :param tag: The message tag. + :param comm: The MPI communicator. + +.. cpp:function:: template void recv(const RecvView &rv, int src, int tag, MPI_Comm comm, MPI_Status *status) + + Initiates a blocking receive operation. + + :tparam RecvView: The type of the view to be received. + :param rv: The view to be received. + :param src: The source rank. + :param tag: The message tag. + :param comm: The MPI communicator. + :param status: The MPI status object for the blocking receive operation. + +.. cpp:function:: template void recv(const ExecSpace &space, RecvView &rv, int src, int tag, MPI_Comm comm) + + Initiates a blocking receive operation with a specified execution space. + + :tparam ExecSpace: The execution space. + :tparam RecvView: The type of the view to be received. + :param space: The execution space. + :param rv: The view to be received. + :param src: The source rank. + :param tag: The message tag. + :param comm: The MPI communicator. + + + +Collective +---------- + +.. cpp:function:: template \ + void KokkosComm::reduce(const ExecSpace &space, const SendView &sv, const RecvView &rv, MPI_Op op, int root, MPI_Comm comm) + + MPI_Reduce wrapper + + :param space: The execution space to operate in + :param sv: The data to send + :param rv: The view to receive into + :param op: The MPI_Op to use in the reduction + :param root: The root rank for the reduction + :param comm: the MPI communicator + :tparam SendView: A Kokkos::View to send + :tparam RecvView: A Kokkos::View to recv + :tparam ExecSpace: A Kokkos execution space to operate in + +.. cpp:function:: template void allgather(const SendView &sv, const RecvView &rv, MPI_Comm comm) + + Performs an allgather operation, gathering data from all processes and distributing it to all processes. + + :tparam SendView: The type of the view to be sent. + :tparam RecvView: The type of the view to be received. + :param sv: The view to be sent. + :param rv: The view to be received. + :param comm: The MPI communicator. + + If ``sv`` is a rank-0 view, the value from the jth rank will be placed in index j of ``rv``. + +.. cpp:function:: template void allgather(const RecvView &rv, MPI_Comm comm) + + Performs an in-place allgather operation, gathering data from all processes and distributing it to all processes. + + :tparam RecvView: The type of the view to be received. + :param rv: The view to be received. + :param comm: The MPI communicator. + +.. cpp:function:: template void allgather(const ExecSpace &space, const SendView &sv, const RecvView &rv, MPI_Comm comm) + + Performs an allgather operation with a specified execution space, gathering data from all processes and distributing it to all processes. + + :tparam ExecSpace: The execution space. + :tparam SendView: The type of the view to be sent. + :tparam RecvView: The type of the view to be received. + :param space: The execution space. + :param sv: The view to be sent. + :param rv: The view to be received. + :param comm: The MPI communicator. + +.. cpp:function:: inline void barrier(MPI_Comm comm) + + Blocks until all processes in the communicator have reached this routine. + + :param comm: The MPI communicator. + + +.. cpp:function:: template void reduce(const SendView &sv, const RecvView &rv, MPI_Op op, int root, MPI_Comm comm) + + Performs a reduction operation, combining data from all processes and distributing the result to the root process. + + :tparam SendView: The type of the view to be sent. + :tparam RecvView: The type of the view to be received. + :param sv: The view to be sent. + :param rv: The view to be received. + :param op: The MPI operation to be applied. + :param root: The rank of the root process. + :param comm: The MPI communicator. + +.. cpp:function:: template void reduce(const ExecSpace &space, const SendView &sv, const RecvView &rv, MPI_Op op, int root, MPI_Comm comm) + + Performs a reduction operation with a specified execution space, combining data from all processes and distributing the result to the root process. + + :tparam ExecSpace: The execution space. + :tparam SendView: The type of the view to be sent. + :tparam RecvView: The type of the view to be received. + :param space: The execution space. + :param sv: The view to be sent. + :param rv: The view to be received. + :param op: The MPI operation to be applied. + :param root: The rank of the root process. + :param comm: The MPI communicator. + + +Related Types +------------- + +.. cpp:namespace:: KokkosComm::mpi + +.. _CommMode: + +.. cpp:enum-class:: CommMode + + A scoped enum to specify the mode of an operation. Buffered mode is not supported. + + .. cpp:enumerator:: Standard + + Standard mode: the MPI implementation decides whether outgoing messages will be buffered. Send operations can be started whether or not a matching receive has been started. They may complete before a matching receive is started. Standard mode is non-local: successful completion of the send operation may depend on the occurrence of a matching receive. + + .. cpp:enumerator:: Ready + + Ready mode: Send operations may be started only if the matching receive is already started. + + .. cpp:enumerator:: Synchronous + + Synchronous mode: Send operations complete successfully only if a matching receive is started, and the receive operation has started to receive the message sent. + + .. cpp:enumerator:: Default + + Default mode is an alias for ``Standard`` mode, but lets users override the behavior of operations at compile-time using the ``KOKKOSCOMM_FORCE_SYNCHRONOUS_MODE`` pre-processor define. This forces ``Synchronous`` mode for all "default-mode" operations, which can be useful for debugging purposes, e.g., for asserting that the communication scheme is correct. + + diff --git a/docs/dev/testing.rst b/docs/dev/testing.rst index 7d8a6783..af1dfe46 100644 --- a/docs/dev/testing.rst +++ b/docs/dev/testing.rst @@ -53,7 +53,7 @@ Testing the Install cmake -S "$COMM_SRC"/unit_tests -B "$COMM_UNIT_TESTS_BUILD" -DKokkos_ROOT="$KOKKOS_INSTALL" -DKokkosComm_ROOT="$COMM_INSTALL" -DCMAKE_BUILD_TYPE=RelWithDebInfo echo "==== BUILD UNIT TESTS ====" - VERBOSE=1 cmake --build "$COMM_UNIT_TESTS_BUILD" --parallel 4 + VERBOSE=1 cmake --build "$COMM_UNIT_TESTS_BUILD" --parallel $(nproc) echo "==== RUN UNIT TESTS ====" ctest -V --test-dir "$COMM_UNIT_TESTS_BUILD" @@ -63,7 +63,7 @@ Testing the Install cmake -S "$COMM_SRC"/perf_tests -B "$COMM_PERF_TESTS_BUILD" -DKokkos_ROOT="$KOKKOS_INSTALL" -DKokkosComm_ROOT="$COMM_INSTALL" -DCMAKE_BUILD_TYPE=RelWithDebInfo echo "==== BUILD PERF TESTS ====" - VERBOSE=1 cmake --build "$COMM_PERF_TESTS_BUILD" --parallel 4 + VERBOSE=1 cmake --build "$COMM_PERF_TESTS_BUILD" --parallel $(nproc) echo "==== RUN PERF TESTS ====" ctest -V --test-dir "$COMM_PERF_TESTS_BUILD" diff --git a/docs/index.rst b/docs/index.rst index fed07349..c18762fd 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -19,6 +19,7 @@ API Reference api/core api/traits api/packing + api/mpi Design ------ diff --git a/perf_tests/CMakeLists.txt b/perf_tests/CMakeLists.txt index e45ed897..82194239 100644 --- a/perf_tests/CMakeLists.txt +++ b/perf_tests/CMakeLists.txt @@ -1,5 +1,6 @@ cmake_minimum_required(VERSION 3.12) # same as Kokkos Comm project(KokkosCommPerfTests VERSION 0.0.2) + enable_testing() # Treat the perf tests as a separate project @@ -30,18 +31,6 @@ if (NOT benchmark_POPULATED) endif() unset(BENCHMARK_ENABLE_TESTING) - -add_executable(perf_test-main test_main.cpp - test_sendrecv.cpp - test_2dhalo.cpp - test_osu_latency_sendrecv.cpp - test_osu_latency_isendirecv.cpp -) -if(KOKKOSCOMM_ENABLE_TESTS) - kokkoscomm_add_cxx_flags(TARGET perf_test-main) -endif() -target_link_libraries(perf_test-main KokkosComm::KokkosComm benchmark::benchmark) -add_test(NAME perf_test-main - COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} 2 ./perf_test-main) - - +if (KOKKOSCOMM_ENABLE_MPI) + add_subdirectory(mpi) +endif(KOKKOSCOMM_ENABLE_MPI) diff --git a/perf_tests/mpi/CMakeLists.txt b/perf_tests/mpi/CMakeLists.txt new file mode 100644 index 00000000..0a08c203 --- /dev/null +++ b/perf_tests/mpi/CMakeLists.txt @@ -0,0 +1,11 @@ +add_executable(perf_test-main test_main.cpp + test_sendrecv.cpp + test_2dhalo.cpp + test_osu_latency.cpp + ) + if(KOKKOSCOMM_ENABLE_TESTS) + kokkoscomm_add_cxx_flags(TARGET perf_test-main) + endif() + target_link_libraries(perf_test-main KokkosComm::KokkosComm benchmark::benchmark) + add_test(NAME perf_test-main + COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} 2 ./perf_test-main) \ No newline at end of file diff --git a/perf_tests/test_2dhalo.cpp b/perf_tests/mpi/test_2dhalo.cpp similarity index 71% rename from perf_tests/test_2dhalo.cpp rename to perf_tests/mpi/test_2dhalo.cpp index 1203d4e0..4cb4f324 100644 --- a/perf_tests/test_2dhalo.cpp +++ b/perf_tests/mpi/test_2dhalo.cpp @@ -16,15 +16,17 @@ #include "test_utils.hpp" -#include "KokkosComm.hpp" +#include "KokkosComm/KokkosComm.hpp" #include void noop(benchmark::State, MPI_Comm) {} -template -void send_recv(benchmark::State &, MPI_Comm comm, const Mode &mode, const Space &space, int nx, int ny, int rx, int ry, - int rs, const View &v) { +template +void send_recv(benchmark::State &, MPI_Comm comm, const Space &space, int nx, int ny, int rx, int ry, int rs, + const View &v) { + KokkosComm::Handle<> h{space, comm}; + // 2D index of nbrs in minus and plus direction (periodic) const int xm1 = (rx + rs - 1) % rs; const int ym1 = (ry + rs - 1) % rs; @@ -46,22 +48,20 @@ void send_recv(benchmark::State &, MPI_Comm comm, const Mode &mode, const Space auto ym1_s = Kokkos::subview(v, make_pair(1, nx + 1), 1, Kokkos::ALL); auto ym1_r = Kokkos::subview(v, make_pair(1, nx + 1), 0, Kokkos::ALL); - std::vector reqs; + std::vector> reqs; // std::cerr << get_rank(rx, ry) << " -> " << get_rank(xp1, ry) << "\n"; - reqs.push_back(KokkosComm::isend(mode, space, xp1_s, get_rank(xp1, ry), 0, comm)); - reqs.push_back(KokkosComm::isend(mode, space, xm1_s, get_rank(xm1, ry), 1, comm)); - reqs.push_back(KokkosComm::isend(mode, space, yp1_s, get_rank(rx, yp1), 2, comm)); - reqs.push_back(KokkosComm::isend(mode, space, ym1_s, get_rank(rx, ym1), 3, comm)); + reqs.push_back(KokkosComm::send(h, xp1_s, get_rank(xp1, ry))); + reqs.push_back(KokkosComm::send(h, xm1_s, get_rank(xm1, ry))); + reqs.push_back(KokkosComm::send(h, yp1_s, get_rank(rx, yp1))); + reqs.push_back(KokkosComm::send(h, ym1_s, get_rank(rx, ym1))); - KokkosComm::recv(space, xm1_r, get_rank(xm1, ry), 0, comm); - KokkosComm::recv(space, xp1_r, get_rank(xp1, ry), 1, comm); - KokkosComm::recv(space, ym1_r, get_rank(rx, ym1), 2, comm); - KokkosComm::recv(space, yp1_r, get_rank(rx, yp1), 3, comm); + reqs.push_back(KokkosComm::recv(h, xm1_r, get_rank(xm1, ry))); + reqs.push_back(KokkosComm::recv(h, xp1_r, get_rank(xp1, ry))); + reqs.push_back(KokkosComm::recv(h, ym1_r, get_rank(rx, ym1))); + reqs.push_back(KokkosComm::recv(h, yp1_r, get_rank(rx, yp1))); // wait for comm - for (KokkosComm::Req &req : reqs) { - req.wait(); - } + KokkosComm::wait_all(reqs); } void benchmark_2dhalo(benchmark::State &state) { @@ -82,14 +82,12 @@ void benchmark_2dhalo(benchmark::State &state) { const int ry = rank / rs; if (rank < rs * rs) { - auto mode = KokkosComm::DefaultCommMode(); auto space = Kokkos::DefaultExecutionSpace(); // grid of elements, each with 3 properties, and a radius-1 halo grid_type grid("", nx + 2, ny + 2, nprops); while (state.KeepRunning()) { - do_iteration(state, MPI_COMM_WORLD, - send_recv, mode, space, nx, - ny, rx, ry, rs, grid); + do_iteration(state, MPI_COMM_WORLD, send_recv, space, nx, ny, rx, ry, + rs, grid); } } else { while (state.KeepRunning()) { diff --git a/perf_tests/test_main.cpp b/perf_tests/mpi/test_main.cpp similarity index 97% rename from perf_tests/test_main.cpp rename to perf_tests/mpi/test_main.cpp index cbfc7d2a..cc087348 100644 --- a/perf_tests/test_main.cpp +++ b/perf_tests/mpi/test_main.cpp @@ -14,7 +14,7 @@ // //@HEADER -#include "impl/KokkosComm_include_mpi.hpp" +#include "KokkosComm/mpi/impl/include_mpi.hpp" #include #include diff --git a/perf_tests/mpi/test_osu_latency.cpp b/perf_tests/mpi/test_osu_latency.cpp new file mode 100644 index 00000000..e9ce1f0f --- /dev/null +++ b/perf_tests/mpi/test_osu_latency.cpp @@ -0,0 +1,152 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +// Adapted from the OSU Benchmarks +// Copyright (c) 2002-2024 the Network-Based Computing Laboratory +// (NBCL), The Ohio State University. + +#include "test_utils.hpp" +#include "KokkosComm/KokkosComm.hpp" + +template +void osu_latency_Kokkos_Comm_sendrecv(benchmark::State &, MPI_Comm, KokkosComm::Handle<> &h, const View &v) { + if (h.rank() == 0) { + KokkosComm::wait(KokkosComm::send(h, v, 1)); + } else if (h.rank() == 1) { + KokkosComm::wait(KokkosComm::recv(h, v, 0)); + } +} + +void benchmark_osu_latency_KokkosComm_sendrecv(benchmark::State &state) { + KokkosComm::Handle<> h; + if (h.size() != 2) { + state.SkipWithError("benchmark_osu_latency_KokkosComm needs exactly 2 ranks"); + } + + using view_type = Kokkos::View; + view_type a("A", state.range(0)); + + while (state.KeepRunning()) { + do_iteration(state, h.mpi_comm(), osu_latency_Kokkos_Comm_sendrecv, h, a); + } + state.counters["bytes"] = a.size() * 2; +} + +template +void osu_latency_Kokkos_Comm_mpi_sendrecv(benchmark::State &, MPI_Comm comm, const Space &space, int rank, + const View &v) { + if (rank == 0) { + KokkosComm::mpi::send(space, v, 1, 0, comm); + } else if (rank == 1) { + KokkosComm::mpi::recv(space, v, 0, 0, comm); + } +} + +void benchmark_osu_latency_Kokkos_Comm_mpi_sendrecv(benchmark::State &state) { + int rank, size; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); + if (size != 2) { + state.SkipWithError("benchmark_osu_latency_KokkosComm needs exactly 2 ranks"); + } + + auto space = Kokkos::DefaultExecutionSpace(); + using view_type = Kokkos::View; + view_type a("A", state.range(0)); + + while (state.KeepRunning()) { + do_iteration(state, MPI_COMM_WORLD, osu_latency_Kokkos_Comm_mpi_sendrecv, + space, rank, a); + } + state.counters["bytes"] = a.size() * 2; +} + +template +void osu_latency_MPI_isendirecv(benchmark::State &, MPI_Comm comm, int rank, const View &v) { + MPI_Request sendreq, recvreq; + if (rank == 0) { + MPI_Irecv(v.data(), v.size(), KokkosComm::Impl::mpi_type(), 1, 0, comm, &recvreq); + MPI_Wait(&recvreq, MPI_STATUS_IGNORE); + } else if (rank == 1) { + MPI_Isend(v.data(), v.size(), KokkosComm::Impl::mpi_type(), 0, 0, comm, &sendreq); + MPI_Wait(&sendreq, MPI_STATUS_IGNORE); + } +} + +void benchmark_osu_latency_MPI_isendirecv(benchmark::State &state) { + int rank, size; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); + if (size != 2) { + state.SkipWithError("benchmark_osu_latency_MPI needs exactly 2 ranks"); + } + + using view_type = Kokkos::View; + view_type a("A", state.range(0)); + + while (state.KeepRunning()) { + do_iteration(state, MPI_COMM_WORLD, osu_latency_MPI_isendirecv, rank, a); + } + state.counters["bytes"] = a.size() * 2; +} + +template +void osu_latency_MPI_sendrecv(benchmark::State &, MPI_Comm comm, int rank, const View &v) { + if (rank == 0) { + MPI_Recv(v.data(), v.size(), KokkosComm::Impl::mpi_type(), 1, 0, comm, + MPI_STATUS_IGNORE); + } else if (rank == 1) { + MPI_Send(v.data(), v.size(), KokkosComm::Impl::mpi_type(), 0, 0, comm); + } +} + +void benchmark_osu_latency_MPI_sendrecv(benchmark::State &state) { + int rank, size; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); + if (size != 2) { + state.SkipWithError("benchmark_osu_latency_MPI needs exactly 2 ranks"); + } + + using view_type = Kokkos::View; + view_type a("A", state.range(0)); + + while (state.KeepRunning()) { + do_iteration(state, MPI_COMM_WORLD, osu_latency_MPI_sendrecv, rank, a); + } + state.counters["bytes"] = a.size() * 2; +} + +BENCHMARK(benchmark_osu_latency_KokkosComm_sendrecv) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond) + ->RangeMultiplier(8) + ->Range(1, 1 << 28); +BENCHMARK(benchmark_osu_latency_Kokkos_Comm_mpi_sendrecv) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond) + ->RangeMultiplier(8) + ->Range(1, 1 << 28); +BENCHMARK(benchmark_osu_latency_MPI_isendirecv) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond) + ->RangeMultiplier(8) + ->Range(1, 1 << 28); +BENCHMARK(benchmark_osu_latency_MPI_sendrecv) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond) + ->RangeMultiplier(8) + ->Range(1, 1 << 28); \ No newline at end of file diff --git a/perf_tests/test_sendrecv.cpp b/perf_tests/mpi/test_sendrecv.cpp similarity index 66% rename from perf_tests/test_sendrecv.cpp rename to perf_tests/mpi/test_sendrecv.cpp index b138df24..c81ae24f 100644 --- a/perf_tests/test_sendrecv.cpp +++ b/perf_tests/mpi/test_sendrecv.cpp @@ -16,16 +16,16 @@ #include "test_utils.hpp" -#include "KokkosComm.hpp" +#include "KokkosComm/KokkosComm.hpp" -template -void send_recv(benchmark::State &, MPI_Comm comm, const Mode &mode, const Space &space, int rank, const View &v) { +template +void send_recv(benchmark::State &, MPI_Comm comm, const Space &space, int rank, const View &v) { if (0 == rank) { - KokkosComm::send(mode, space, v, 1, 0, comm); - KokkosComm::recv(space, v, 1, 0, comm); + KokkosComm::mpi::send(space, v, 1, 0, comm, Mode{}); + KokkosComm::mpi::recv(space, v, 1, 0, comm); } else if (1 == rank) { - KokkosComm::recv(space, v, 0, 0, comm); - KokkosComm::send(mode, space, v, 0, 0, comm); + KokkosComm::mpi::recv(space, v, 0, 0, comm); + KokkosComm::mpi::send(space, v, 0, 0, comm, Mode{}); } } @@ -39,15 +39,13 @@ void benchmark_sendrecv(benchmark::State &state) { using Scalar = double; - auto mode = KokkosComm::DefaultCommMode(); + using Mode = KokkosComm::mpi::DefaultCommMode; auto space = Kokkos::DefaultExecutionSpace(); using view_type = Kokkos::View; view_type a("", 1000000); while (state.KeepRunning()) { - do_iteration(state, MPI_COMM_WORLD, - send_recv, mode, space, rank, - a); + do_iteration(state, MPI_COMM_WORLD, send_recv, space, rank, a); } state.SetBytesProcessed(sizeof(Scalar) * state.iterations() * a.size() * 2); diff --git a/perf_tests/test_utils.hpp b/perf_tests/mpi/test_utils.hpp similarity index 96% rename from perf_tests/test_utils.hpp rename to perf_tests/mpi/test_utils.hpp index 1f3dddd9..c022f91c 100644 --- a/perf_tests/test_utils.hpp +++ b/perf_tests/mpi/test_utils.hpp @@ -20,7 +20,7 @@ #include -#include "impl/KokkosComm_include_mpi.hpp" +#include "KokkosComm/mpi/impl/include_mpi.hpp" // F is a function that takes (state, MPI_Comm, args...) template diff --git a/perf_tests/test_osu_latency_isendirecv.cpp b/perf_tests/test_osu_latency_isendirecv.cpp deleted file mode 100644 index dd026528..00000000 --- a/perf_tests/test_osu_latency_isendirecv.cpp +++ /dev/null @@ -1,96 +0,0 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER - -// Adapted from the OSU Benchmarks -// Copyright (c) 2002-2024 the Network-Based Computing Laboratory -// (NBCL), The Ohio State University. - -#include "test_utils.hpp" -#include "KokkosComm.hpp" - -template -void osu_latency_Kokkos_Comm_isendirecv(benchmark::State &, MPI_Comm comm, const Mode &mode, const Space &space, - int rank, const View &v) { - if (rank == 0) { - KokkosComm::Req sendreq = KokkosComm::isend(mode, space, v, 1, 1, comm); - sendreq.wait(); - } else if (rank == 1) { - KokkosComm::Req recvreq = KokkosComm::irecv(v, 0, 1, comm); - recvreq.wait(); - } -} - -template -void osu_latency_MPI_isendirecv(benchmark::State &, MPI_Comm comm, int rank, const View &v) { - MPI_Request sendreq, recvreq; - if (rank == 0) { - MPI_Irecv(v.data(), v.size(), KokkosComm::Impl::mpi_type(), 1, 0, comm, &recvreq); - MPI_Wait(&recvreq, MPI_STATUS_IGNORE); - } else if (rank == 1) { - MPI_Isend(v.data(), v.size(), KokkosComm::Impl::mpi_type(), 0, 0, comm, &sendreq); - MPI_Wait(&sendreq, MPI_STATUS_IGNORE); - } -} - -void benchmark_osu_latency_KokkosComm_isendirecv(benchmark::State &state) { - int rank, size; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - MPI_Comm_size(MPI_COMM_WORLD, &size); - if (size != 2) { - state.SkipWithError("benchmark_osu_latency_KokkosComm needs exactly 2 ranks"); - } - - auto mode = KokkosComm::DefaultCommMode(); - auto space = Kokkos::DefaultExecutionSpace(); - using view_type = Kokkos::View; - view_type a("A", state.range(0)); - - while (state.KeepRunning()) { - do_iteration( - state, MPI_COMM_WORLD, - osu_latency_Kokkos_Comm_isendirecv, mode, - space, rank, a); - } - state.counters["bytes"] = a.size() * 2; -} - -void benchmark_osu_latency_MPI_isendirecv(benchmark::State &state) { - int rank, size; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - MPI_Comm_size(MPI_COMM_WORLD, &size); - if (size != 2) { - state.SkipWithError("benchmark_osu_latency_MPI needs exactly 2 ranks"); - } - - using view_type = Kokkos::View; - view_type a("A", state.range(0)); - - while (state.KeepRunning()) { - do_iteration(state, MPI_COMM_WORLD, osu_latency_MPI_isendirecv, rank, a); - } - state.counters["bytes"] = a.size() * 2; -} - -BENCHMARK(benchmark_osu_latency_KokkosComm_isendirecv) - ->UseManualTime() - ->Unit(benchmark::kMicrosecond) - ->RangeMultiplier(8) - ->Range(1, 1 << 28); -BENCHMARK(benchmark_osu_latency_MPI_isendirecv) - ->UseManualTime() - ->Unit(benchmark::kMicrosecond) - ->RangeMultiplier(8) - ->Range(1, 1 << 28); diff --git a/perf_tests/test_osu_latency_sendrecv.cpp b/perf_tests/test_osu_latency_sendrecv.cpp deleted file mode 100644 index e38d6057..00000000 --- a/perf_tests/test_osu_latency_sendrecv.cpp +++ /dev/null @@ -1,92 +0,0 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER - -// Adapted from the OSU Benchmarks -// Copyright (c) 2002-2024 the Network-Based Computing Laboratory -// (NBCL), The Ohio State University. - -#include "test_utils.hpp" -#include "KokkosComm.hpp" - -template -void osu_latency_Kokkos_Comm_sendrecv(benchmark::State &, MPI_Comm comm, const Mode &mode, const Space &space, int rank, - const View &v) { - if (rank == 0) { - KokkosComm::send(mode, space, v, 1, 0, comm); - } else if (rank == 1) { - KokkosComm::recv(space, v, 0, 0, comm); - } -} - -template -void osu_latency_MPI_sendrecv(benchmark::State &, MPI_Comm comm, int rank, const View &v) { - if (rank == 0) { - MPI_Recv(v.data(), v.size(), KokkosComm::Impl::mpi_type(), 1, 0, comm, - MPI_STATUS_IGNORE); - } else if (rank == 1) { - MPI_Send(v.data(), v.size(), KokkosComm::Impl::mpi_type(), 0, 0, comm); - } -} - -void benchmark_osu_latency_KokkosComm_sendrecv(benchmark::State &state) { - int rank, size; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - MPI_Comm_size(MPI_COMM_WORLD, &size); - if (size != 2) { - state.SkipWithError("benchmark_osu_latency_KokkosComm needs exactly 2 ranks"); - } - - auto mode = KokkosComm::DefaultCommMode(); - auto space = Kokkos::DefaultExecutionSpace(); - using view_type = Kokkos::View; - view_type a("A", state.range(0)); - - while (state.KeepRunning()) { - do_iteration( - state, MPI_COMM_WORLD, - osu_latency_Kokkos_Comm_sendrecv, mode, - space, rank, a); - } - state.counters["bytes"] = a.size() * 2; -} - -void benchmark_osu_latency_MPI_sendrecv(benchmark::State &state) { - int rank, size; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - MPI_Comm_size(MPI_COMM_WORLD, &size); - if (size != 2) { - state.SkipWithError("benchmark_osu_latency_MPI needs exactly 2 ranks"); - } - - using view_type = Kokkos::View; - view_type a("A", state.range(0)); - - while (state.KeepRunning()) { - do_iteration(state, MPI_COMM_WORLD, osu_latency_MPI_sendrecv, rank, a); - } - state.counters["bytes"] = a.size() * 2; -} - -BENCHMARK(benchmark_osu_latency_KokkosComm_sendrecv) - ->UseManualTime() - ->Unit(benchmark::kMicrosecond) - ->RangeMultiplier(8) - ->Range(1, 1 << 28); -BENCHMARK(benchmark_osu_latency_MPI_sendrecv) - ->UseManualTime() - ->Unit(benchmark::kMicrosecond) - ->RangeMultiplier(8) - ->Range(1, 1 << 28); diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 46d64cb6..138f7e34 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -9,3 +9,4 @@ target_include_directories(KokkosComm INTERFACE $ ) target_link_libraries(KokkosComm INTERFACE MPI::MPI_CXX Kokkos::kokkos) + diff --git a/src/KokkosComm/KokkosComm.hpp b/src/KokkosComm/KokkosComm.hpp new file mode 100644 index 00000000..b69f58bc --- /dev/null +++ b/src/KokkosComm/KokkosComm.hpp @@ -0,0 +1,44 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#pragma once + +#include "fwd.hpp" + +// transport declarations +// TODO: could probably be moved to a per-transport file to be included +#if defined(KOKKOSCOMM_ENABLE_MPI) +#include "mpi/mpi.hpp" +#include "mpi/send.hpp" +#include "mpi/allgather.hpp" +#include "mpi/alltoall.hpp" +#include "mpi/barrier.hpp" +#include "mpi/handle.hpp" +#include "mpi/irecv.hpp" +#include "mpi/isend.hpp" +#include "mpi/recv.hpp" +#include "mpi/reduce.hpp" +#else +#error at least one transport must be defined +#endif + +#include "concepts.hpp" +#include "point_to_point.hpp" +#include "collective.hpp" + +#include + +namespace KokkosComm {} // namespace KokkosComm diff --git a/src/KokkosComm.hpp b/src/KokkosComm/collective.hpp similarity index 57% rename from src/KokkosComm.hpp rename to src/KokkosComm/collective.hpp index 15c1b7e1..c7125c0c 100644 --- a/src/KokkosComm.hpp +++ b/src/KokkosComm/collective.hpp @@ -16,26 +16,19 @@ #pragma once -#include "KokkosComm_config.hpp" -#include "KokkosComm_collective.hpp" -#include "impl/KokkosComm_isend.hpp" -#include "impl/KokkosComm_irecv.hpp" -#include "impl/KokkosComm_recv.hpp" -#include "impl/KokkosComm_send.hpp" -#include "impl/KokkosComm_alltoall.hpp" -#include "impl/KokkosComm_barrier.hpp" -#include "impl/KokkosComm_concepts.hpp" -#include "KokkosComm_comm_modes.hpp" +#include #include +#include "fwd.hpp" +#include "concepts.hpp" + namespace KokkosComm { -using Impl::alltoall; -using Impl::barrier; -using Impl::irecv; -using Impl::isend; -using Impl::recv; -using Impl::send; +template +void barrier(Handle &&h) { + Impl::Barrier{std::forward>(h)}; +} } // namespace KokkosComm diff --git a/src/KokkosComm_comm_modes.hpp b/src/KokkosComm/comm_modes.hpp similarity index 100% rename from src/KokkosComm_comm_modes.hpp rename to src/KokkosComm/comm_modes.hpp diff --git a/src/impl/KokkosComm_concepts.hpp b/src/KokkosComm/concepts.hpp similarity index 63% rename from src/impl/KokkosComm_concepts.hpp rename to src/KokkosComm/concepts.hpp index 8a2e2bce..19be6697 100644 --- a/src/impl/KokkosComm_concepts.hpp +++ b/src/KokkosComm/concepts.hpp @@ -16,34 +16,25 @@ #pragma once -#include "KokkosComm_comm_modes.hpp" +#include #include namespace KokkosComm { +namespace Impl { +// fallback - most types are not a KokkosComm transport template -concept KokkosView = Kokkos::is_view_v; +struct is_communication_space : public std::false_type {}; +} // namespace Impl template -concept KokkosExecutionSpace = Kokkos::is_execution_space_v; - -template -struct is_communication_mode : std::false_type {}; - -template <> -struct is_communication_mode : std::true_type {}; - -template <> -struct is_communication_mode : std::true_type {}; - -template <> -struct is_communication_mode : std::true_type {}; +concept KokkosView = Kokkos::is_view_v; template -inline constexpr bool is_communication_mode_v = is_communication_mode::value; +concept KokkosExecutionSpace = Kokkos::is_execution_space_v; template -concept CommunicationMode = KokkosComm::is_communication_mode_v; +concept CommunicationSpace = KokkosComm::Impl::is_communication_space::value; } // namespace KokkosComm diff --git a/src/KokkosComm/fwd.hpp b/src/KokkosComm/fwd.hpp new file mode 100644 index 00000000..0bd3c255 --- /dev/null +++ b/src/KokkosComm/fwd.hpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#pragma once + +#include + +#include "concepts.hpp" +#include "KokkosComm_config.hpp" + +namespace KokkosComm { +#if defined(KOKKOSCOMM_ENABLE_MPI) +class Mpi; +using DefaultCommunicationSpace = Mpi; +using FallbackCommunicationSpace = Mpi; +#else +#error at least one transport must be defined +#endif + +template +class Req; + +template +class Handle; + +namespace Impl { + +template +struct Recv; +template +struct Send; +template +struct Barrier; + +} // namespace Impl + +} // namespace KokkosComm \ No newline at end of file diff --git a/src/KokkosComm/impl/KokkosComm_contiguous.hpp b/src/KokkosComm/impl/KokkosComm_contiguous.hpp new file mode 100644 index 00000000..4caa7f10 --- /dev/null +++ b/src/KokkosComm/impl/KokkosComm_contiguous.hpp @@ -0,0 +1,63 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#pragma once + +#include + +#include + +#include "concepts.hpp" +#include "traits.hpp" + +namespace KokkosComm::Impl { + +template +struct contiguous_view { + using type = Kokkos::View; +}; + +template +using contiguous_view_t = contiguous_view::type; + +template +auto allocate_contiguous_for(const Space &space, const std::string &label, View &v) { + using non_const_packed_view_type = contiguous_view_t; + + if constexpr (KokkosComm::rank() == 1) { + return non_const_packed_view_type(Kokkos::view_alloc(space, Kokkos::WithoutInitializing, label), v.extent(0)); + } else if constexpr (KokkosComm::rank() == 2) { + return non_const_packed_view_type(Kokkos::view_alloc(space, Kokkos::WithoutInitializing, label), v.extent(0), + v.extent(1)); + } else { + static_assert(std::is_void_v, "allocate_contiguous_for for views > rank 2 not implemented"); + } +} + +template +auto resize_contiguous_for(const Space &space, DstView &out, const SrcView &in) { + static_assert(DstView::rank == SrcView::rank, ""); + + if constexpr (KokkosComm::rank() == 1) { + Kokkos::realloc(Kokkos::view_alloc(space, Kokkos::WithoutInitializing), out, in.extent(0)); + } else if constexpr (KokkosComm::rank() == 2) { + Kokkos::realloc(Kokkos::view_alloc(space, Kokkos::WithoutInitializing), out, in.extent(0), in.extent(1)); + } else { + static_assert(std::is_void_v, "realloc_contiguous_for for views > rank 2 not implemented"); + } +} + +} // namespace KokkosComm::Impl \ No newline at end of file diff --git a/src/impl/KokkosComm_allgather.hpp b/src/KokkosComm/mpi/allgather.hpp similarity index 74% rename from src/impl/KokkosComm_allgather.hpp rename to src/KokkosComm/mpi/allgather.hpp index c9f089d5..f3e930dc 100644 --- a/src/impl/KokkosComm_allgather.hpp +++ b/src/KokkosComm/mpi/allgather.hpp @@ -18,18 +18,16 @@ #include -#include "KokkosComm_pack_traits.hpp" -#include "KokkosComm_traits.hpp" +#include "KokkosComm/traits.hpp" +#include "impl/pack_traits.hpp" +#include "impl/include_mpi.hpp" +#include "impl/types.hpp" -// impl -#include "KokkosComm_include_mpi.hpp" -#include "KokkosComm_types.hpp" - -namespace KokkosComm::Impl { +namespace KokkosComm::mpi { template void allgather(const SendView &sv, const RecvView &rv, MPI_Comm comm) { - Kokkos::Tools::pushRegion("KokkosComm::Impl::allgather"); + Kokkos::Tools::pushRegion("KokkosComm::Mpi::allgather"); using SendScalar = typename SendView::value_type; using RecvScalar = typename RecvView::value_type; @@ -44,8 +42,8 @@ void allgather(const SendView &sv, const RecvView &rv, MPI_Comm comm) { throw std::runtime_error("low-level allgather requires contiguous recv view"); } const int count = KokkosComm::span(sv); // all ranks send/recv same count - MPI_Allgather(KokkosComm::data_handle(sv), count, mpi_type_v, KokkosComm::data_handle(rv), count, - mpi_type_v, comm); + MPI_Allgather(KokkosComm::data_handle(sv), count, KokkosComm::Impl::mpi_type_v, + KokkosComm::data_handle(rv), count, KokkosComm::Impl::mpi_type_v, comm); Kokkos::Tools::popRegion(); } @@ -53,7 +51,7 @@ void allgather(const SendView &sv, const RecvView &rv, MPI_Comm comm) { // in-place allgather template void allgather(const RecvView &rv, MPI_Comm comm) { - Kokkos::Tools::pushRegion("KokkosComm::Impl::allgather"); + Kokkos::Tools::pushRegion("KokkosComm::Mpi::allgather"); using RT = KokkosComm::Traits; using RecvScalar = typename RecvView::value_type; @@ -63,24 +61,25 @@ void allgather(const RecvView &rv, MPI_Comm comm) { if (!RT::is_contiguous(rv)) { throw std::runtime_error("low-level allgather requires contiguous recv view"); } - MPI_Allgather(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, RT::data_handle(rv), RT::span(rv), mpi_type_v, comm); + MPI_Allgather(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, RT::data_handle(rv), RT::span(rv), + KokkosComm::Impl::mpi_type_v, comm); Kokkos::Tools::popRegion(); } template void allgather(const ExecSpace &space, const SendView &sv, const RecvView &rv, MPI_Comm comm) { - Kokkos::Tools::pushRegion("KokkosComm::Impl::allgather"); + Kokkos::Tools::pushRegion("KokkosComm::Mpi::allgather"); using SPT = KokkosComm::PackTraits; using RPT = KokkosComm::PackTraits; - if (SPT::needs_pack(sv) || RPT::needs_pack(rv)) { + if (!KokkosComm::is_contiguous(sv) || !KokkosComm::is_contiguous(rv)) { throw std::runtime_error("allgather for non-contiguous views not implemented"); } else { - space.fence(); // work in space may have been used to produce send view data + space.fence("fence before allgather"); // work in space may have been used to produce send view data allgather(sv, rv, comm); } Kokkos::Tools::popRegion(); } -} // namespace KokkosComm::Impl +} // namespace KokkosComm::mpi diff --git a/src/impl/KokkosComm_alltoall.hpp b/src/KokkosComm/mpi/alltoall.hpp similarity index 92% rename from src/impl/KokkosComm_alltoall.hpp rename to src/KokkosComm/mpi/alltoall.hpp index 1bc6aa93..b2b29512 100644 --- a/src/impl/KokkosComm_alltoall.hpp +++ b/src/KokkosComm/mpi/alltoall.hpp @@ -36,12 +36,10 @@ #include -#include "KokkosComm_pack_traits.hpp" -#include "KokkosComm_traits.hpp" - -// impl -#include "KokkosComm_include_mpi.hpp" -#include "KokkosComm_types.hpp" +#include "KokkosComm/traits.hpp" +#include "impl/pack_traits.hpp" +#include "impl/include_mpi.hpp" +#include "impl/types.hpp" namespace KokkosComm::Impl { template @@ -58,7 +56,7 @@ void alltoall(const ExecSpace &space, const SendView &sv, const size_t sendCount // Make sure views are ready space.fence("KokkosComm::Impl::alltoall"); - if (KokkosComm::PackTraits::needs_pack(sv) || KokkosComm::PackTraits::needs_pack(rv)) { + if (!KokkosComm::is_contiguous(sv) || !KokkosComm::is_contiguous(rv)) { throw std::runtime_error("alltoall for non-contiguous views not implemented"); } else { int size; @@ -96,7 +94,7 @@ void alltoall(const ExecSpace &space, const RecvView &rv, const size_t recvCount // Make sure views are ready space.fence("KokkosComm::Impl::alltoall"); - if (KokkosComm::PackTraits::needs_pack(rv)) { + if (!KokkosComm::is_contiguous(rv)) { throw std::runtime_error("alltoall for non-contiguous views not implemented"); } else { int size; diff --git a/src/impl/KokkosComm_barrier.hpp b/src/KokkosComm/mpi/barrier.hpp similarity index 58% rename from src/impl/KokkosComm_barrier.hpp rename to src/KokkosComm/mpi/barrier.hpp index 44580086..736f5c86 100644 --- a/src/impl/KokkosComm_barrier.hpp +++ b/src/KokkosComm/mpi/barrier.hpp @@ -16,26 +16,26 @@ #pragma once -#include +#include "KokkosComm/concepts.hpp" -#include "KokkosComm_concepts.hpp" +namespace KokkosComm { -// impl -#include "KokkosComm_include_mpi.hpp" - -namespace KokkosComm::Impl { +namespace Impl { +template +struct Barrier { + Barrier(Handle &&h) { + h.space().fence("KokkosComm::Impl::Barrier"); + MPI_Barrier(h.mpi_comm()); + } +}; +} // namespace Impl +namespace mpi { inline void barrier(MPI_Comm comm) { - Kokkos::Tools::pushRegion("KokkosComm::Impl::barrier"); + Kokkos::Tools::pushRegion("KokkosComm::mpi::barrier"); MPI_Barrier(comm); Kokkos::Tools::popRegion(); } +} // namespace mpi -// a barrier in the provided space. For MPI, we have to fence the space and do a host barrier -template -void barrier(const ExecSpace &space, MPI_Comm comm) { - space.fence("KokkosComm::Impl::barrier"); - barrier(comm); -} - -} // namespace KokkosComm::Impl +} // namespace KokkosComm diff --git a/src/KokkosComm/mpi/commmode.hpp b/src/KokkosComm/mpi/commmode.hpp new file mode 100644 index 00000000..d0249d9d --- /dev/null +++ b/src/KokkosComm/mpi/commmode.hpp @@ -0,0 +1,67 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#pragma once + +#include + +// See section 3.4 of the MPI standard for a complete specification. + +namespace KokkosComm::mpi { +// Standard mode: MPI implementation decides whether outgoing messages will +// be buffered. Send operations can be started whether or not a matching +// receive has been started. They may complete before a matching receive is +// started. Standard mode is non-local: successful completion of the send +// operation may depend on the occurrence of a matching receive. +struct CommModeStandard {}; + +// Ready mode: Send operations may be started only if the matching receive is +// already started. +struct CommModeReady {}; + +// Synchronous mode: Send operations complete successfully only if a matching +// receive is started, and the receive operation has started to receive the +// message sent. +struct CommModeSynchronous {}; + +// Default mode: lets the user override the send operations behavior at +// compile-time. E.g., this can be set to mode "Synchronous" for debug +// builds by defining KOKKOSCOMM_FORCE_SYNCHRONOUS_MODE. +#ifdef KOKKOSCOMM_FORCE_SYNCHRONOUS_MODE +using DefaultCommMode = CommModeSynchronous; +#else +using DefaultCommMode = CommModeStandard; +#endif + +template +struct is_communication_mode : std::false_type {}; + +template <> +struct is_communication_mode : std::true_type {}; + +template <> +struct is_communication_mode : std::true_type {}; + +template <> +struct is_communication_mode : std::true_type {}; + +template +inline constexpr bool is_communication_mode_v = is_communication_mode::value; + +template +concept CommunicationMode = is_communication_mode_v; + +} // namespace KokkosComm::mpi \ No newline at end of file diff --git a/src/KokkosComm/mpi/handle.hpp b/src/KokkosComm/mpi/handle.hpp new file mode 100644 index 00000000..192beeb5 --- /dev/null +++ b/src/KokkosComm/mpi/handle.hpp @@ -0,0 +1,65 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#pragma once + +#include "KokkosComm/fwd.hpp" + +#include "req.hpp" + +namespace KokkosComm { + +/* +- init_fence +- allocations +- pre_copies +- pre_comm_fence +- comm +- wait +- post-wait +*/ +template +class Handle { + public: + using execution_space = ExecSpace; + using transport_type = Mpi; + using size_type = int; + + explicit Handle(const execution_space &space, MPI_Comm comm) : space_(space), comm_(comm) {} + explicit Handle(MPI_Comm comm) : Handle(Kokkos::DefaultExecutionSpace{}, comm) {} + Handle() : Handle(Kokkos::DefaultExecutionSpace{}, MPI_COMM_WORLD) {} + + MPI_Comm &mpi_comm() { return comm_; } + const execution_space &space() const { return space_; } + + size_type size() { + size_type ret; + MPI_Comm_size(comm_, &ret); + return ret; + } + + size_type rank() { + size_type ret; + MPI_Comm_rank(comm_, &ret); + return ret; + } + + private: + execution_space space_; + MPI_Comm comm_; +}; + +} // namespace KokkosComm \ No newline at end of file diff --git a/src/KokkosComm/mpi/impl/include_mpi.hpp b/src/KokkosComm/mpi/impl/include_mpi.hpp new file mode 100644 index 00000000..1219c04e --- /dev/null +++ b/src/KokkosComm/mpi/impl/include_mpi.hpp @@ -0,0 +1,28 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#pragma once + +#define KOKKOSCOMM_GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) + +#if KOKKOSCOMM_GCC_VERSION >= 11400 +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wcast-function-type" +#include +#pragma GCC diagnostic pop +#else +#include +#endif \ No newline at end of file diff --git a/src/KokkosComm_pack_traits.hpp b/src/KokkosComm/mpi/impl/pack_traits.hpp similarity index 68% rename from src/KokkosComm_pack_traits.hpp rename to src/KokkosComm/mpi/impl/pack_traits.hpp index 39a74eac..e5c8c0cb 100644 --- a/src/KokkosComm_pack_traits.hpp +++ b/src/KokkosComm/mpi/impl/pack_traits.hpp @@ -16,29 +16,22 @@ #pragma once -#include "KokkosComm_traits.hpp" +#include "KokkosComm/traits.hpp" +#include "KokkosComm/concepts.hpp" -#include "impl/KokkosComm_concepts.hpp" -#include "impl/KokkosComm_packer.hpp" - -/*! \brief Defines a common interface for packing and unpacking - Kokkos::View-like types \file KokkosComm_traits.hpp -*/ +#include "packer.hpp" namespace KokkosComm { template struct PackTraits { - static_assert(std::is_void_v, "KokkosComm::PackTraits not specialized for type"); + static_assert(std::is_void_v, "KokkosComm::PackTraits not specialized for requested type"); }; /*! \brief This can be specialized to do custom behavior for a particular view*/ template struct PackTraits { using packer_type = Impl::Packer::DeepCopy; - - static bool needs_unpack(const View &v) { return !KokkosComm::is_contiguous(v); } - static bool needs_pack(const View &v) { return !KokkosComm::is_contiguous(v); } }; } // namespace KokkosComm diff --git a/src/impl/KokkosComm_packer.hpp b/src/KokkosComm/mpi/impl/packer.hpp similarity index 96% rename from src/impl/KokkosComm_packer.hpp rename to src/KokkosComm/mpi/impl/packer.hpp index 3429b1ed..e972d6ad 100644 --- a/src/impl/KokkosComm_packer.hpp +++ b/src/KokkosComm/mpi/impl/packer.hpp @@ -16,10 +16,12 @@ #pragma once -#include "KokkosComm_concepts.hpp" -#include "KokkosComm_traits.hpp" -#include "KokkosComm_types.hpp" -#include "KokkosComm_include_mpi.hpp" +#include "KokkosComm/concepts.hpp" +#include "KokkosComm/traits.hpp" +#include "types.hpp" +#include "include_mpi.hpp" + +// todo: redo this using KokkosComm_contiguous namespace KokkosComm::Impl { namespace Packer { diff --git a/src/KokkosComm/mpi/impl/tags.hpp b/src/KokkosComm/mpi/impl/tags.hpp new file mode 100644 index 00000000..e1230bb0 --- /dev/null +++ b/src/KokkosComm/mpi/impl/tags.hpp @@ -0,0 +1,5 @@ +#pragma once + +namespace KokkosComm::Impl { +constexpr int POINTTOPOINT_TAG = 17; +} \ No newline at end of file diff --git a/src/impl/KokkosComm_types.hpp b/src/KokkosComm/mpi/impl/types.hpp similarity index 74% rename from src/impl/KokkosComm_types.hpp rename to src/KokkosComm/mpi/impl/types.hpp index 548f21de..a2b798db 100644 --- a/src/impl/KokkosComm_types.hpp +++ b/src/KokkosComm/mpi/impl/types.hpp @@ -16,7 +16,7 @@ #pragma once -#include "KokkosComm_include_mpi.hpp" +#include "include_mpi.hpp" #include @@ -107,4 +107,43 @@ MPI_Datatype mpi_type() { template inline MPI_Datatype mpi_type_v = mpi_type(); + +template +MPI_Datatype view_mpi_type(const View &view) { +#define USE_CACHE + +#if defined(USE_CACHE) + using Key = std::array; + static std::map cache; + + Key key; + for (size_t d = 0; d < View::rank; d++) { + key[2 * d] = view.extent(d); + key[2 * d + 1] = view.stride(d); + } + if (cache.count(key) > 0) { + return cache[key]; + } +#endif + + using value_type = typename View::non_const_value_type; + MPI_Datatype type = mpi_type_v; + + // This doesn't work for 1D contiguous views into reduce because it + // represents the whole 1D view as 1 Hvector, rather than N elements. + // FIXME: is there a more generic way to handle this, maybe by treating + // the last dimension specially under certain circumstances? + for (size_t d = 0; d < KokkosComm::rank(); ++d) { + MPI_Datatype newtype; + MPI_Type_create_hvector(KokkosComm::extent(view, d) /*count*/, 1 /*block length*/, + KokkosComm::stride(view, d) * sizeof(value_type), type, &newtype); + type = newtype; + } + MPI_Type_commit(&type); +#if defined(USE_CACHE) + cache[key] = type; +#endif + return type; +} + }; // namespace KokkosComm::Impl diff --git a/src/KokkosComm/mpi/irecv.hpp b/src/KokkosComm/mpi/irecv.hpp new file mode 100644 index 00000000..cd167a9c --- /dev/null +++ b/src/KokkosComm/mpi/irecv.hpp @@ -0,0 +1,70 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#pragma once + +#include "mpi.hpp" +#include "impl/tags.hpp" + +namespace KokkosComm { + +namespace Impl { +// Recv implementation for Mpi +template +struct Recv { + static Req execute(Handle &h, const RecvView &rv, int src) { + using KCT = KokkosComm::Traits; + using KCPT = KokkosComm::PackTraits; + using Packer = typename KCPT::packer_type; + using Args = typename Packer::args_type; + + const ExecSpace &space = h.space(); + + Req req; + if (KokkosComm::is_contiguous(rv)) { + space.fence("fence before irecv"); + MPI_Irecv(KokkosComm::data_handle(rv), 1, view_mpi_type(rv), src, POINTTOPOINT_TAG, h.mpi_comm(), + &req.mpi_request()); // TODO: probably best to just use the scalar type + req.extend_view_lifetime(rv); + } else { + Args args = Packer::allocate_packed_for(space, "TODO", rv); + space.fence("fence before irecv"); + MPI_Irecv(args.view.data(), args.count, args.datatype, src, POINTTOPOINT_TAG, h.mpi_comm(), &req.mpi_request()); + req.extend_view_lifetime(rv); + // implicitly extends args.view lifetime since lambda holds a copy + req.call_after_mpi_wait([=]() { Packer::unpack_into(space, rv, args.view); }); + } + return req; + } +}; +} // namespace Impl + +namespace mpi { +template +void irecv(const RecvView &rv, int src, int tag, MPI_Comm comm, MPI_Request &req) { + Kokkos::Tools::pushRegion("KokkosComm::mpi::irecv"); + + if (KokkosComm::is_contiguous(rv)) { + using RecvScalar = typename RecvView::non_const_value_type; + MPI_Irecv(KokkosComm::data_handle(rv), KokkosComm::span(rv), Impl::mpi_type_v, src, tag, comm, &req); + } else { + throw std::runtime_error("Only contiguous irecv viewsupported"); + } + Kokkos::Tools::popRegion(); +} +} // namespace mpi + +} // namespace KokkosComm \ No newline at end of file diff --git a/src/KokkosComm/mpi/isend.hpp b/src/KokkosComm/mpi/isend.hpp new file mode 100644 index 00000000..09b301b7 --- /dev/null +++ b/src/KokkosComm/mpi/isend.hpp @@ -0,0 +1,100 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#pragma once + +#include "KokkosComm/traits.hpp" + +#include "mpi.hpp" +#include "impl/types.hpp" +#include "impl/tags.hpp" +#include "commmode.hpp" + +namespace KokkosComm { + +namespace Impl { + +template +Req isend_impl(Handle &h, const SendView &sv, int dest, int tag, SendMode) { + auto mpi_isend_fn = [](void *mpi_view, int mpi_count, MPI_Datatype mpi_datatype, int mpi_dest, int mpi_tag, + MPI_Comm mpi_comm, MPI_Request *mpi_req) { + if constexpr (std::is_same_v) { + MPI_Isend(mpi_view, mpi_count, mpi_datatype, mpi_dest, mpi_tag, mpi_comm, mpi_req); + } else if constexpr (std::is_same_v) { + MPI_Irsend(mpi_view, mpi_count, mpi_datatype, mpi_dest, mpi_tag, mpi_comm, mpi_req); + } else if constexpr (std::is_same_v) { + MPI_Issend(mpi_view, mpi_count, mpi_datatype, mpi_dest, mpi_tag, mpi_comm, mpi_req); + } else { + static_assert(std::is_void_v, "unexpected communication mode"); + } + }; + + Req req; + if (KokkosComm::is_contiguous(sv)) { + h.space().fence("fence before isend"); + mpi_isend_fn(KokkosComm::data_handle(sv), 1, view_mpi_type(sv), dest, tag, h.mpi_comm(), &req.mpi_request()); + req.extend_view_lifetime(sv); + } else { + using Packer = typename KokkosComm::PackTraits::packer_type; + using Args = typename Packer::args_type; + + Args args = Packer::pack(h.space(), sv); + h.space().fence("fence before isend"); + mpi_isend_fn(args.view.data(), args.count, args.datatype, dest, tag, h.mpi_comm(), &req.mpi_request()); + req.extend_view_lifetime(args.view); + req.extend_view_lifetime(sv); + } + return req; +} + +// Implementation of KokkosComm::Send +template +struct Send { + static Req execute(Handle &h, const SendView &sv, int dest) { + return isend_impl(h, sv, dest, POINTTOPOINT_TAG, mpi::DefaultCommMode{}); + } +}; + +} // namespace Impl + +namespace mpi { + +template +Req isend(Handle &h, const SendView &sv, int dest, int tag, SendMode) { + return KokkosComm::Impl::isend_impl(h, sv, dest, tag, SendMode{}); +} + +template +Req isend(Handle &h, const SendView &sv, int dest, int tag) { + return isend(h, sv, dest, tag, DefaultCommMode{}); +} + +template +void isend(const SendView &sv, int dest, int tag, MPI_Comm comm, MPI_Request &req) { + Kokkos::Tools::pushRegion("KokkosComm::Impl::isend"); + + if (KokkosComm::is_contiguous(sv)) { + using SendScalar = typename SendView::non_const_value_type; + MPI_Isend(KokkosComm::data_handle(sv), KokkosComm::span(sv), Impl::mpi_type_v, dest, tag, comm, &req); + } else { + throw std::runtime_error("only contiguous views supported for low-level isend"); + } + Kokkos::Tools::popRegion(); +} + +} // namespace mpi + +} // namespace KokkosComm \ No newline at end of file diff --git a/src/KokkosComm/mpi/mpi.hpp b/src/KokkosComm/mpi/mpi.hpp new file mode 100644 index 00000000..9a484db1 --- /dev/null +++ b/src/KokkosComm/mpi/mpi.hpp @@ -0,0 +1,48 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#pragma once + +#include + +#include "../concepts.hpp" +#include "impl/include_mpi.hpp" + +namespace KokkosComm { + +// TODO: not sure what members this thing needs +struct Mpi { + // TODO: just an example + static int world_size() { + int size; + MPI_Comm_size(MPI_COMM_WORLD, &size); + return size; + } + + // TODO: just an example + static int world_rank() { + int rank; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + return rank; + } + +}; // struct Mpi + +// KokkosComm::Mpi is a KokkosComm::CommunicationSpace +template <> +struct Impl::is_communication_space : public std::true_type {}; + +} // namespace KokkosComm \ No newline at end of file diff --git a/src/impl/KokkosComm_recv.hpp b/src/KokkosComm/mpi/recv.hpp similarity index 78% rename from src/impl/KokkosComm_recv.hpp rename to src/KokkosComm/mpi/recv.hpp index f3d2039e..10a9c97a 100644 --- a/src/impl/KokkosComm_recv.hpp +++ b/src/KokkosComm/mpi/recv.hpp @@ -18,14 +18,12 @@ #include -#include "KokkosComm_concepts.hpp" -#include "KokkosComm_pack_traits.hpp" -#include "KokkosComm_traits.hpp" +#include "KokkosComm/concepts.hpp" +#include "KokkosComm/traits.hpp" +#include "impl/pack_traits.hpp" +#include "impl/include_mpi.hpp" -// impl -#include "KokkosComm_include_mpi.hpp" - -namespace KokkosComm::Impl { +namespace KokkosComm::mpi { template void recv(const RecvView &rv, int src, int tag, MPI_Comm comm, MPI_Status *status) { @@ -34,7 +32,8 @@ void recv(const RecvView &rv, int src, int tag, MPI_Comm comm, MPI_Status *statu if (KokkosComm::is_contiguous(rv)) { using ScalarType = typename RecvView::non_const_value_type; - MPI_Recv(KokkosComm::data_handle(rv), KokkosComm::span(rv), mpi_type_v, src, tag, comm, status); + MPI_Recv(KokkosComm::data_handle(rv), KokkosComm::span(rv), KokkosComm::Impl::mpi_type_v, src, tag, + comm, status); } else { throw std::runtime_error("only contiguous views supported for low-level recv"); } @@ -45,13 +44,12 @@ template void recv(const ExecSpace &space, RecvView &rv, int src, int tag, MPI_Comm comm) { Kokkos::Tools::pushRegion("KokkosComm::Impl::recv"); - using KCT = KokkosComm::Traits; - using KCPT = KokkosComm::PackTraits; - - if (KCPT::needs_unpack(rv)) { - using Packer = typename KCPT::packer_type; - using Args = typename Packer::args_type; + using KCT = KokkosComm::Traits; + using KCPT = KokkosComm::PackTraits; + using Packer = typename KCPT::packer_type; + using Args = typename Packer::args_type; + if (!KokkosComm::is_contiguous(rv)) { Args args = Packer::allocate_packed_for(space, "packed", rv); space.fence(); // make sure allocation is complete before recv MPI_Recv(KokkosComm::data_handle(args.view), args.count, args.datatype, src, tag, comm, MPI_STATUS_IGNORE); @@ -64,4 +62,4 @@ void recv(const ExecSpace &space, RecvView &rv, int src, int tag, MPI_Comm comm) Kokkos::Tools::popRegion(); } -} // namespace KokkosComm::Impl +} // namespace KokkosComm::mpi diff --git a/src/impl/KokkosComm_reduce.hpp b/src/KokkosComm/mpi/reduce.hpp similarity index 78% rename from src/impl/KokkosComm_reduce.hpp rename to src/KokkosComm/mpi/reduce.hpp index 9c28acc6..056a2f03 100644 --- a/src/impl/KokkosComm_reduce.hpp +++ b/src/KokkosComm/mpi/reduce.hpp @@ -18,14 +18,12 @@ #include -#include "KokkosComm_pack_traits.hpp" -#include "KokkosComm_traits.hpp" +#include "KokkosComm/traits.hpp" +#include "impl/pack_traits.hpp" +#include "impl/include_mpi.hpp" +#include "impl/types.hpp" -// impl -#include "KokkosComm_include_mpi.hpp" -#include "KokkosComm_types.hpp" - -namespace KokkosComm::Impl { +namespace KokkosComm::mpi { template void reduce(const SendView &sv, const RecvView &rv, MPI_Op op, int root, MPI_Comm comm) { @@ -35,7 +33,8 @@ void reduce(const SendView &sv, const RecvView &rv, MPI_Op op, int root, MPI_Com if (SPT::is_contiguous(sv) && RPT::is_contiguous(rv)) { using SendScalar = typename SendView::non_const_value_type; - MPI_Reduce(SPT::data_handle(sv), RPT::data_handle(rv), SPT::span(sv), mpi_type_v, op, root, comm); + MPI_Reduce(SPT::data_handle(sv), RPT::data_handle(rv), SPT::span(sv), KokkosComm::Impl::mpi_type_v, op, + root, comm); } else { throw std::runtime_error("only contiguous views supported for low-level reduce"); } @@ -55,10 +54,10 @@ void reduce(const ExecSpace &space, const SendView &sv, const RecvView &rv, MPI_ using SendPacker = typename KokkosComm::PackTraits::packer_type; using RecvPacker = typename KokkosComm::PackTraits::packer_type; - if (KokkosComm::PackTraits::needs_pack(sv)) { + if (!KokkosComm::is_contiguous(sv)) { auto sendArgs = SendPacker::pack(space, sv); space.fence(); - if ((root == rank) && KokkosComm::PackTraits::needs_unpack(rv)) { + if ((root == rank) && !KokkosComm::is_contiguous(rv)) { auto recvArgs = RecvPacker::allocate_packed_for(space, "reduce recv", rv); space.fence(); MPI_Reduce(sendArgs.view.data(), recvArgs.view.data(), sendArgs.count, sendArgs.datatype, op, root, comm); @@ -69,17 +68,17 @@ void reduce(const ExecSpace &space, const SendView &sv, const RecvView &rv, MPI_ } } else { using SendScalar = typename SendView::value_type; - if ((root == rank) && KokkosComm::PackTraits::needs_unpack(rv)) { + if ((root == rank) && !KokkosComm::is_contiguous(rv)) { auto recvArgs = RecvPacker::allocate_packed_for(space, "reduce recv", rv); space.fence(); - MPI_Reduce(sv.data(), recvArgs.view.data(), sv.span(), mpi_type_v, op, root, comm); + MPI_Reduce(sv.data(), recvArgs.view.data(), sv.span(), KokkosComm::Impl::mpi_type_v, op, root, comm); RecvPacker::unpack_into(space, rv, recvArgs.view); } else { space.fence(); - MPI_Reduce(sv.data(), rv.data(), sv.span(), mpi_type_v, op, root, comm); + MPI_Reduce(sv.data(), rv.data(), sv.span(), KokkosComm::Impl::mpi_type_v, op, root, comm); } } Kokkos::Tools::popRegion(); } -} // namespace KokkosComm::Impl +} // namespace KokkosComm::mpi diff --git a/src/KokkosComm_request.hpp b/src/KokkosComm/mpi/req.hpp similarity index 54% rename from src/KokkosComm_request.hpp rename to src/KokkosComm/mpi/req.hpp index 1e3d57ae..72d4b1cb 100644 --- a/src/KokkosComm_request.hpp +++ b/src/KokkosComm/mpi/req.hpp @@ -16,14 +16,18 @@ #pragma once -#include #include +#include +#include -#include "impl/KokkosComm_include_mpi.hpp" +#include "KokkosComm/fwd.hpp" + +#include "mpi.hpp" namespace KokkosComm { -class Req { +template <> +class Req { // a type-erased view. Request uses these to keep temporary views alive for // the lifetime of "Immediate" MPI operations struct ViewHolderBase { @@ -38,30 +42,56 @@ class Req { struct Record { Record() : req_(MPI_REQUEST_NULL) {} MPI_Request req_; - std::vector> until_waits_; + std::vector> postWaits_; }; public: Req() : record_(std::make_shared()) {} - MPI_Request &mpi_req() { return record_->req_; } - - void wait() { - MPI_Wait(&(record_->req_), MPI_STATUS_IGNORE); - record_->until_waits_.clear(); // drop any views we're keeping alive until wait() - } + MPI_Request &mpi_request() { return record_->req_; } // keep a reference to this view around until wait() is called template - void keep_until_wait(const View &v) { + void extend_view_lifetime(const View &v) { // unmanaged views don't own the underlying buffer, so no need to extend lifetime if (v.use_count() != 0) { - record_->until_waits_.push_back(std::make_shared>(v)); + record_->postWaits_.push_back([v]() {}); } } + void call_after_mpi_wait(std::function &&f) { record_->postWaits_.push_back(f); } + private: std::shared_ptr record_; + + friend void wait(Req req); + friend void wait_all(std::vector> &reqs); + friend int wait_any(std::vector> &reqs); }; -} // namespace KokkosComm +inline void wait(Req req) { + MPI_Wait(&req.mpi_request(), MPI_STATUS_IGNORE); + for (auto &f : req.record_->postWaits_) { + f(); + } + req.record_->postWaits_.clear(); +} + +inline void wait_all(std::vector> &reqs) { + for (Req &req : reqs) { + wait(req); + } +} + +inline int wait_any(std::vector> &reqs) { + for (size_t i = 0; i < reqs.size(); ++i) { + int completed; + MPI_Test(&(reqs[i].mpi_request()), &completed, MPI_STATUS_IGNORE); + if (completed) { + return true; + } + } + return false; +} + +} // namespace KokkosComm \ No newline at end of file diff --git a/src/impl/KokkosComm_send.hpp b/src/KokkosComm/mpi/send.hpp similarity index 61% rename from src/impl/KokkosComm_send.hpp rename to src/KokkosComm/mpi/send.hpp index 2ea5dfe6..70a05f17 100644 --- a/src/impl/KokkosComm_send.hpp +++ b/src/KokkosComm/mpi/send.hpp @@ -18,69 +18,66 @@ #include -#include "KokkosComm_pack_traits.hpp" -#include "KokkosComm_concepts.hpp" -#include "KokkosComm_comm_modes.hpp" +#include "commmode.hpp" +#include "impl/pack_traits.hpp" +#include "impl/include_mpi.hpp" -// impl -#include "KokkosComm_include_mpi.hpp" +namespace KokkosComm::mpi { -namespace KokkosComm::Impl { - -template -void send(const SendMode &, const SendView &sv, int dest, int tag, MPI_Comm comm) { +template +void send(const SendView &sv, int dest, int tag, MPI_Comm comm, SendMode) { Kokkos::Tools::pushRegion("KokkosComm::Impl::send"); using KCT = typename KokkosComm::Traits; auto mpi_send_fn = [](void *mpi_view, int mpi_count, MPI_Datatype mpi_datatype, int mpi_dest, int mpi_tag, MPI_Comm mpi_comm) { - if constexpr (std::is_same_v) { + if constexpr (std::is_same_v) { MPI_Send(mpi_view, mpi_count, mpi_datatype, mpi_dest, mpi_tag, mpi_comm); - } else if constexpr (std::is_same_v) { + } else if constexpr (std::is_same_v) { MPI_Rsend(mpi_view, mpi_count, mpi_datatype, mpi_dest, mpi_tag, mpi_comm); - } else if constexpr (std::is_same_v) { + } else if constexpr (std::is_same_v) { MPI_Ssend(mpi_view, mpi_count, mpi_datatype, mpi_dest, mpi_tag, mpi_comm); + } else { + static_assert(std::is_void_v, "unexpected communication mode"); } }; if (KokkosComm::is_contiguous(sv)) { using SendScalar = typename SendView::non_const_value_type; - mpi_send_fn(KokkosComm::data_handle(sv), KokkosComm::span(sv), mpi_type_v, dest, tag, comm); + MPI_Send(KokkosComm::data_handle(sv), KokkosComm::span(sv), KokkosComm::Impl::mpi_type_v, dest, tag, + comm); } else { throw std::runtime_error("only contiguous views supported for low-level send"); } Kokkos::Tools::popRegion(); } -template -void send(const SendView &sv, int dest, int tag, MPI_Comm comm) { - send(KokkosComm::DefaultCommMode(), sv, dest, tag, comm); -} - -template -void send(const SendMode &, const ExecSpace &space, const SendView &sv, int dest, int tag, MPI_Comm comm) { +template +void send(const ExecSpace &space, const SendView &sv, int dest, int tag, MPI_Comm comm, SendMode) { Kokkos::Tools::pushRegion("KokkosComm::Impl::send"); using Packer = typename KokkosComm::PackTraits::packer_type; auto mpi_send_fn = [](void *mpi_view, int mpi_count, MPI_Datatype mpi_datatype, int mpi_dest, int mpi_tag, MPI_Comm mpi_comm) { - if constexpr (std::is_same_v) { + if constexpr (std::is_same_v) { MPI_Send(mpi_view, mpi_count, mpi_datatype, mpi_dest, mpi_tag, mpi_comm); - } else if constexpr (std::is_same_v) { + } else if constexpr (std::is_same_v) { MPI_Rsend(mpi_view, mpi_count, mpi_datatype, mpi_dest, mpi_tag, mpi_comm); - } else if constexpr (std::is_same_v) { + } else if constexpr (std::is_same_v) { MPI_Ssend(mpi_view, mpi_count, mpi_datatype, mpi_dest, mpi_tag, mpi_comm); + } else { + static_assert(std::is_void_v, "unexpected communication mode"); } }; - if (KokkosComm::PackTraits::needs_pack(sv)) { + if (KokkosComm::is_contiguous(sv)) { + using SendScalar = typename SendView::value_type; + mpi_send_fn(sv.data(), sv.span(), KokkosComm::Impl::mpi_type_v, dest, tag, comm); + } else { auto args = Packer::pack(space, sv); space.fence(); mpi_send_fn(args.view.data(), args.count, args.datatype, dest, tag, comm); - } else { - using SendScalar = typename SendView::value_type; - mpi_send_fn(sv.data(), sv.span(), mpi_type_v, dest, tag, comm); } Kokkos::Tools::popRegion(); @@ -88,7 +85,7 @@ void send(const SendMode &, const ExecSpace &space, const SendView &sv, int dest template void send(const ExecSpace &space, const SendView &sv, int dest, int tag, MPI_Comm comm) { - send(KokkosComm::DefaultCommMode(), space, sv, dest, tag, comm); + send(space, sv, dest, tag, comm, DefaultCommMode{}); } -} // namespace KokkosComm::Impl +} // namespace KokkosComm::mpi diff --git a/src/KokkosComm/point_to_point.hpp b/src/KokkosComm/point_to_point.hpp new file mode 100644 index 00000000..64486d91 --- /dev/null +++ b/src/KokkosComm/point_to_point.hpp @@ -0,0 +1,50 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#pragma once + +#include + +#include "fwd.hpp" +#include "concepts.hpp" + +namespace KokkosComm { + +template +Req recv(Handle &h, RecvView &rv, int src) { + return Impl::Recv::execute(h, rv, src); +} + +template +Req recv(RecvView &rv, int src) { + return recv(Handle{}, rv, src); +} + +template +Req send(Handle &h, SendView &sv, int dest) { + return Impl::Send::execute(h, sv, dest); +} + +template +Req send(SendView &sv, int dest) { + return send(Handle{}, sv, dest); +} + +} // namespace KokkosComm diff --git a/src/KokkosComm_traits.hpp b/src/KokkosComm/traits.hpp similarity index 96% rename from src/KokkosComm_traits.hpp rename to src/KokkosComm/traits.hpp index d66e6be9..55cba517 100644 --- a/src/KokkosComm_traits.hpp +++ b/src/KokkosComm/traits.hpp @@ -15,15 +15,15 @@ //@HEADER /*! \brief Defines a common interface for Kokkos::View-like types - \file KokkosComm_traits.hpp + \file traits.hpp */ #pragma once -#include "impl/KokkosComm_concepts.hpp" - #include +#include "concepts.hpp" + namespace KokkosComm { template diff --git a/src/KokkosComm_collective.hpp b/src/KokkosComm_collective.hpp deleted file mode 100644 index a99ca83e..00000000 --- a/src/KokkosComm_collective.hpp +++ /dev/null @@ -1,38 +0,0 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER - -#pragma once - -#include - -#include "impl/KokkosComm_concepts.hpp" -#include "impl/KokkosComm_alltoall.hpp" -#include "impl/KokkosComm_reduce.hpp" -#include "impl/KokkosComm_allgather.hpp" - -namespace KokkosComm { - -template -void reduce(const ExecSpace &space, const SendView &sv, const RecvView &rv, MPI_Op op, int root, MPI_Comm comm) { - return Impl::reduce(space, sv, rv, op, root, comm); -} - -template -void allgather(const ExecSpace &space, const SendView &sv, const RecvView &rv, MPI_Comm comm) { - return Impl::allgather(space, sv, rv, comm); -} - -} // namespace KokkosComm diff --git a/src/impl/KokkosComm_include_mpi.hpp b/src/impl/KokkosComm_include_mpi.hpp deleted file mode 100644 index c955521a..00000000 --- a/src/impl/KokkosComm_include_mpi.hpp +++ /dev/null @@ -1,12 +0,0 @@ -#pragma once - -#define KOKKOSCOMM_GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) - -#if KOKKOSCOMM_GCC_VERSION >= 11400 -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wcast-function-type" -#include -#pragma GCC diagnostic pop -#else -#include -#endif \ No newline at end of file diff --git a/src/impl/KokkosComm_irecv.hpp b/src/impl/KokkosComm_irecv.hpp deleted file mode 100644 index 91c36bb0..00000000 --- a/src/impl/KokkosComm_irecv.hpp +++ /dev/null @@ -1,55 +0,0 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER - -#pragma once - -#include - -#include - -#include "KokkosComm_pack_traits.hpp" -#include "KokkosComm_request.hpp" -#include "KokkosComm_traits.hpp" - -// impl -#include "KokkosComm_include_mpi.hpp" - -namespace KokkosComm::Impl { - -// low-level API -template -void irecv(RecvView &rv, int src, int tag, MPI_Comm comm, MPI_Request &req) { - Kokkos::Tools::pushRegion("KokkosComm::Impl::irecv"); - - if (KokkosComm::is_contiguous(rv)) { - using RecvScalar = typename RecvView::value_type; - MPI_Irecv(KokkosComm::data_handle(rv), KokkosComm::span(rv), mpi_type_v, src, tag, comm, &req); - } else { - throw std::runtime_error("Only contiguous irecv viewsupported"); - } - - Kokkos::Tools::popRegion(); -} - -template -KokkosComm::Req irecv(RecvView &rv, int src, int tag, MPI_Comm comm) { - Kokkos::Tools::pushRegion("KokkosComm::Impl::irecv"); - KokkosComm::Req req; - irecv(rv, src, tag, comm, req.mpi_req()); - return req; -} - -} // namespace KokkosComm::Impl diff --git a/src/impl/KokkosComm_isend.hpp b/src/impl/KokkosComm_isend.hpp deleted file mode 100644 index ec742fb6..00000000 --- a/src/impl/KokkosComm_isend.hpp +++ /dev/null @@ -1,96 +0,0 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER - -#pragma once - -#include - -#include -#include - -#include "KokkosComm_concepts.hpp" -#include "KokkosComm_pack_traits.hpp" -#include "KokkosComm_request.hpp" -#include "KokkosComm_traits.hpp" -#include "KokkosComm_comm_modes.hpp" - -// impl -#include "KokkosComm_include_mpi.hpp" - -namespace KokkosComm::Impl { - -template -void isend(const SendView &sv, int dest, int tag, MPI_Comm comm, MPI_Request &req) { - Kokkos::Tools::pushRegion("KokkosComm::Impl::isend"); - using KCT = typename KokkosComm::Traits; - - if (KokkosComm::is_contiguous(sv)) { - using SendScalar = typename SendView::non_const_value_type; - MPI_Isend(KokkosComm::data_handle(sv), KokkosComm::span(sv), mpi_type_v, dest, tag, comm, &req); - } else { - throw std::runtime_error("only contiguous views supported for low-level isend"); - } - Kokkos::Tools::popRegion(); -} - -template -Req isend(const SendMode &, const ExecSpace &space, const SendView &sv, int dest, int tag, MPI_Comm comm) { - Kokkos::Tools::pushRegion("KokkosComm::Impl::isend"); - - KokkosComm::Req req; - - using KCT = KokkosComm::Traits; - using KCPT = KokkosComm::PackTraits; - - auto mpi_isend_fn = [](void *mpi_view, int mpi_count, MPI_Datatype mpi_datatype, int mpi_dest, int mpi_tag, - MPI_Comm mpi_comm, MPI_Request *mpi_req) { - if constexpr (std::is_same_v) { - MPI_Isend(mpi_view, mpi_count, mpi_datatype, mpi_dest, mpi_tag, mpi_comm, mpi_req); - } else if constexpr (std::is_same_v) { - MPI_Irsend(mpi_view, mpi_count, mpi_datatype, mpi_dest, mpi_tag, mpi_comm, mpi_req); - } else if constexpr (std::is_same_v) { - MPI_Issend(mpi_view, mpi_count, mpi_datatype, mpi_dest, mpi_tag, mpi_comm, mpi_req); - } - }; - - if (KCPT::needs_pack(sv)) { - using Packer = typename KCPT::packer_type; - using MpiArgs = typename Packer::args_type; - - MpiArgs args = Packer::pack(space, sv); - space.fence(); - mpi_isend_fn(KokkosComm::data_handle(args.view), args.count, args.datatype, dest, tag, comm, &req.mpi_req()); - req.keep_until_wait(args.view); - } else { - using SendScalar = typename SendView::value_type; - space.fence(); // can't issue isend until work in space is complete - mpi_isend_fn(KokkosComm::data_handle(sv), KokkosComm::span(sv), mpi_type_v, dest, tag, comm, - &req.mpi_req()); - if (KokkosComm::is_reference_counted()) { - req.keep_until_wait(sv); - } - } - - Kokkos::Tools::popRegion(); - return req; -} - -template -Req isend(const ExecSpace &space, const SendView &sv, int dest, int tag, MPI_Comm comm) { - return isend(KokkosComm::DefaultCommMode(), space, sv, dest, tag, comm); -} - -} // namespace KokkosComm::Impl diff --git a/unit_tests/CMakeLists.txt b/unit_tests/CMakeLists.txt index 93ea170f..6a13c96f 100644 --- a/unit_tests/CMakeLists.txt +++ b/unit_tests/CMakeLists.txt @@ -31,27 +31,44 @@ if (NOT googletest_POPULATED) endif() # Standalone MPI smoke tests (do not use KokkosComm) -add_executable(test-mpi test_mpi.cpp) -add_test(NAME test-mpi-1 - COMMAND mpirun -np 1 ./test-mpi -) -add_test(NAME test-mpi-2 - COMMAND mpirun -np 2 ./test-mpi -) -# doesn't use KokkosComm, so explicitly link MPI -target_link_libraries(test-mpi MPI::MPI_CXX) + # Kokkos Comm tests -add_executable(test-main test_main.cpp - test_gtest_mpi.cpp - test_isendirecv.cpp - test_isendrecv.cpp +set(KOKKOSCOMM_TEST_SOURCES) +list(APPEND KOKKOSCOMM_TEST_SOURCES test_main.cpp test_sendrecv.cpp test_barrier.cpp - test_alltoall.cpp - test_reduce.cpp - test_allgather.cpp + +) +if(KOKKOSCOMM_ENABLE_MPI) + + add_executable(test-mpi mpi/test_mpi.cpp) + add_test(NAME test-mpi-1 + COMMAND mpirun -np 1 ./test-mpi + ) + add_test(NAME test-mpi-2 + COMMAND mpirun -np 2 ./test-mpi + ) + # doesn't use KokkosComm, so explicitly link MPI + target_link_libraries(test-mpi MPI::MPI_CXX) + + + list(PREPEND KOKKOSCOMM_TEST_SOURCES + mpi/test_gtest_mpi.cpp + ) + list(APPEND KOKKOSCOMM_TEST_SOURCES + mpi/test_sendrecv.cpp + mpi/test_allgather.cpp + mpi/test_alltoall.cpp + mpi/test_isendrecv.cpp + mpi/test_alltoall.cpp + mpi/test_reduce.cpp + mpi/test_allgather.cpp + ) +endif() + add_executable(test-main ${KOKKOSCOMM_TEST_SOURCES} ) + target_link_libraries(test-main KokkosComm::KokkosComm gtest) if(KOKKOSCOMM_ENABLE_TESTS) kokkoscomm_add_cxx_flags(TARGET test-main) diff --git a/unit_tests/test_allgather.cpp b/unit_tests/mpi/test_allgather.cpp similarity index 91% rename from unit_tests/test_allgather.cpp rename to unit_tests/mpi/test_allgather.cpp index 8671039c..a7798abb 100644 --- a/unit_tests/test_allgather.cpp +++ b/unit_tests/mpi/test_allgather.cpp @@ -16,7 +16,7 @@ #include -#include "KokkosComm.hpp" +#include "KokkosComm/KokkosComm.hpp" namespace { @@ -42,7 +42,7 @@ void test_allgather_0d() { Kokkos::parallel_for( sv.extent(0), KOKKOS_LAMBDA(const int) { sv() = rank; }); - KokkosComm::allgather(Kokkos::DefaultExecutionSpace(), sv, rv, MPI_COMM_WORLD); + KokkosComm::mpi::allgather(Kokkos::DefaultExecutionSpace(), sv, rv, MPI_COMM_WORLD); int errs; Kokkos::parallel_reduce( @@ -67,7 +67,7 @@ void test_allgather_1d_contig() { Kokkos::parallel_for( sv.extent(0), KOKKOS_LAMBDA(const int i) { sv(i) = rank + i; }); - KokkosComm::allgather(Kokkos::DefaultExecutionSpace(), sv, rv, MPI_COMM_WORLD); + KokkosComm::mpi::allgather(Kokkos::DefaultExecutionSpace(), sv, rv, MPI_COMM_WORLD); int errs; Kokkos::parallel_reduce( diff --git a/unit_tests/test_alltoall.cpp b/unit_tests/mpi/test_alltoall.cpp similarity index 98% rename from unit_tests/test_alltoall.cpp rename to unit_tests/mpi/test_alltoall.cpp index db0103c2..da074c6d 100644 --- a/unit_tests/test_alltoall.cpp +++ b/unit_tests/mpi/test_alltoall.cpp @@ -16,7 +16,7 @@ #include -#include "KokkosComm.hpp" +#include "KokkosComm/KokkosComm.hpp" namespace { diff --git a/unit_tests/test_gtest_mpi.cpp b/unit_tests/mpi/test_gtest_mpi.cpp similarity index 100% rename from unit_tests/test_gtest_mpi.cpp rename to unit_tests/mpi/test_gtest_mpi.cpp diff --git a/unit_tests/test_isendrecv.cpp b/unit_tests/mpi/test_isendrecv.cpp similarity index 58% rename from unit_tests/test_isendrecv.cpp rename to unit_tests/mpi/test_isendrecv.cpp index 40e6995f..c74179bb 100644 --- a/unit_tests/test_isendrecv.cpp +++ b/unit_tests/mpi/test_isendrecv.cpp @@ -17,10 +17,12 @@ #include #include -#include "KokkosComm.hpp" +#include "KokkosComm/KokkosComm.hpp" namespace { +using namespace KokkosComm::mpi; + template class IsendRecv : public testing::Test { public: @@ -31,29 +33,28 @@ using ScalarTypes = ::testing::Types, Kokkos::complex, int, unsigned, int64_t, size_t>; TYPED_TEST_SUITE(IsendRecv, ScalarTypes); -template +template void isend_comm_mode_1d_contig() { - if constexpr (std::is_same_v) { + if constexpr (std::is_same_v) { GTEST_SKIP() << "Skipping test for ready-mode send"; } Kokkos::View a("a", 1000); - int rank, size; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - MPI_Comm_size(MPI_COMM_WORLD, &size); - if (size < 2) { - GTEST_SKIP() << "Requires >= 2 ranks (" << size << " provided)"; + KokkosComm::Handle<> h; + if (h.size() < 2) { + GTEST_SKIP() << "Requires >= 2 ranks (" << h.size() << " provided)"; } - if (0 == rank) { + if (0 == h.rank()) { int dst = 1; Kokkos::parallel_for( a.extent(0), KOKKOS_LAMBDA(const int i) { a(i) = i; }); - KokkosComm::isend(IsendMode(), Kokkos::DefaultExecutionSpace(), a, dst, 0, MPI_COMM_WORLD).wait(); - } else if (1 == rank) { + KokkosComm::Req req = KokkosComm::mpi::isend(h, a, dst, 0, IsendMode{}); + KokkosComm::wait(req); + } else if (1 == h.rank()) { int src = 0; - KokkosComm::recv(Kokkos::DefaultExecutionSpace(), a, src, 0, MPI_COMM_WORLD); + KokkosComm::mpi::recv(h.space(), a, src, 0, h.mpi_comm()); int errs; Kokkos::parallel_reduce( a.extent(0), KOKKOS_LAMBDA(const int &i, int &lsum) { lsum += a(i) != Scalar(i); }, errs); @@ -61,9 +62,9 @@ void isend_comm_mode_1d_contig() { } } -template +template void isend_comm_mode_1d_noncontig() { - if constexpr (std::is_same_v) { + if constexpr (std::is_same_v) { GTEST_SKIP() << "Skipping test for ready-mode send"; } @@ -71,17 +72,20 @@ void isend_comm_mode_1d_noncontig() { Kokkos::View b("a", 10, 10); auto a = Kokkos::subview(b, Kokkos::ALL, 2); // take column 2 (non-contiguous) - int rank; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); + KokkosComm::Handle<> h; + if (h.size() < 2) { + GTEST_SKIP() << "Requires >= 2 ranks (" << h.size() << " provided)"; + } - if (0 == rank) { + if (0 == h.rank()) { int dst = 1; Kokkos::parallel_for( a.extent(0), KOKKOS_LAMBDA(const int i) { a(i) = i; }); - KokkosComm::isend(IsendMode(), Kokkos::DefaultExecutionSpace(), a, dst, 0, MPI_COMM_WORLD).wait(); - } else if (1 == rank) { + KokkosComm::Req req = KokkosComm::mpi::isend(h, a, dst, 0, IsendMode{}); + KokkosComm::wait(req); + } else if (1 == h.rank()) { int src = 0; - KokkosComm::recv(Kokkos::DefaultExecutionSpace(), a, src, 0, MPI_COMM_WORLD); + KokkosComm::mpi::recv(h.space(), a, src, 0, h.mpi_comm()); int errs; Kokkos::parallel_reduce( a.extent(0), KOKKOS_LAMBDA(const int &i, int &lsum) { lsum += a(i) != Scalar(i); }, errs); @@ -90,27 +94,25 @@ void isend_comm_mode_1d_noncontig() { } TYPED_TEST(IsendRecv, 1D_contig_standard) { - isend_comm_mode_1d_contig(); + isend_comm_mode_1d_contig(); } -TYPED_TEST(IsendRecv, 1D_contig_ready) { - isend_comm_mode_1d_contig(); -} +TYPED_TEST(IsendRecv, 1D_contig_ready) { isend_comm_mode_1d_contig(); } TYPED_TEST(IsendRecv, 1D_contig_synchronous) { - isend_comm_mode_1d_contig(); + isend_comm_mode_1d_contig(); } TYPED_TEST(IsendRecv, 1D_noncontig_standard) { - isend_comm_mode_1d_noncontig(); + isend_comm_mode_1d_noncontig(); } TYPED_TEST(IsendRecv, 1D_noncontig_ready) { - isend_comm_mode_1d_noncontig(); + isend_comm_mode_1d_noncontig(); } TYPED_TEST(IsendRecv, 1D_noncontig_synchronous) { - isend_comm_mode_1d_noncontig(); + isend_comm_mode_1d_noncontig(); } } // namespace diff --git a/unit_tests/test_mpi.cpp b/unit_tests/mpi/test_mpi.cpp similarity index 100% rename from unit_tests/test_mpi.cpp rename to unit_tests/mpi/test_mpi.cpp diff --git a/unit_tests/test_reduce.cpp b/unit_tests/mpi/test_reduce.cpp similarity index 93% rename from unit_tests/test_reduce.cpp rename to unit_tests/mpi/test_reduce.cpp index a8d33efe..82db420a 100644 --- a/unit_tests/test_reduce.cpp +++ b/unit_tests/mpi/test_reduce.cpp @@ -16,7 +16,7 @@ #include -#include "KokkosComm.hpp" +#include "KokkosComm/KokkosComm.hpp" namespace { @@ -50,7 +50,7 @@ void test_reduce_1d_contig() { Kokkos::parallel_for( sendv.extent(0), KOKKOS_LAMBDA(const int i) { sendv(i) = rank + i; }); - KokkosComm::reduce(Kokkos::DefaultExecutionSpace(), sendv, recvv, MPI_SUM, 0, MPI_COMM_WORLD); + KokkosComm::mpi::reduce(Kokkos::DefaultExecutionSpace(), sendv, recvv, MPI_SUM, 0, MPI_COMM_WORLD); if (0 == rank) { int errs; diff --git a/unit_tests/mpi/test_sendrecv.cpp b/unit_tests/mpi/test_sendrecv.cpp new file mode 100644 index 00000000..c0031bba --- /dev/null +++ b/unit_tests/mpi/test_sendrecv.cpp @@ -0,0 +1,115 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#include +#include + +#include "KokkosComm/KokkosComm.hpp" + +namespace { + +using namespace KokkosComm::mpi; + +template +class MpiSendRecv : public testing::Test { + public: + using Scalar = T; +}; + +using ScalarTypes = ::testing::Types, Kokkos::complex>; +TYPED_TEST_SUITE(MpiSendRecv, ScalarTypes); + +template +void send_comm_mode_1d_contig() { + if constexpr (std::is_same_v) { + GTEST_SKIP() << "Skipping test for ready-mode send"; + } + + Kokkos::View a("a", 1000); + + int rank, size; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); + if (size < 2) { + GTEST_SKIP() << "Requires >= 2 ranks (" << size << " provided)"; + } + + if (0 == rank) { + int dst = 1; + Kokkos::parallel_for( + a.extent(0), KOKKOS_LAMBDA(const int i) { a(i) = i; }); + KokkosComm::mpi::send(Kokkos::DefaultExecutionSpace(), a, dst, 0, MPI_COMM_WORLD, SendMode{}); + } else if (1 == rank) { + int src = 0; + KokkosComm::mpi::recv(Kokkos::DefaultExecutionSpace(), a, src, 0, MPI_COMM_WORLD); + int errs; + Kokkos::parallel_reduce( + a.extent(0), KOKKOS_LAMBDA(const int &i, int &lsum) { lsum += a(i) != i; }, errs); + ASSERT_EQ(errs, 0); + } +} + +template +void send_comm_mode_1d_noncontig() { + if constexpr (std::is_same_v) { + GTEST_SKIP() << "Skipping test for ready-mode send"; + } + + // this is C-style layout, i.e. b(0,0) is next to b(0,1) + Kokkos::View b("b", 10, 10); + auto a = Kokkos::subview(b, Kokkos::ALL, 2); // take column 2 (non-contiguous) + + int rank; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + + if (0 == rank) { + int dst = 1; + Kokkos::parallel_for( + a.extent(0), KOKKOS_LAMBDA(const int i) { a(i) = i; }); + KokkosComm::mpi::send(Kokkos::DefaultExecutionSpace(), a, dst, 0, MPI_COMM_WORLD, SendMode{}); + } else if (1 == rank) { + int src = 0; + KokkosComm::mpi::recv(Kokkos::DefaultExecutionSpace(), a, src, 0, MPI_COMM_WORLD); + int errs; + Kokkos::parallel_reduce( + a.extent(0), KOKKOS_LAMBDA(const int &i, int &lsum) { lsum += a(i) != i; }, errs); + ASSERT_EQ(errs, 0); + } +} + +TYPED_TEST(MpiSendRecv, 1D_contig_standard) { + send_comm_mode_1d_contig(); +} + +TYPED_TEST(MpiSendRecv, 1D_contig_ready) { send_comm_mode_1d_contig(); } + +TYPED_TEST(MpiSendRecv, 1D_contig_synchronous) { + send_comm_mode_1d_contig(); +} + +TYPED_TEST(MpiSendRecv, 1D_noncontig_standard) { + send_comm_mode_1d_noncontig(); +} + +TYPED_TEST(MpiSendRecv, 1D_noncontig_ready) { + send_comm_mode_1d_noncontig(); +} + +TYPED_TEST(MpiSendRecv, 1D_noncontig_synchronous) { + send_comm_mode_1d_noncontig(); +} + +} // namespace diff --git a/unit_tests/test_barrier.cpp b/unit_tests/test_barrier.cpp index c9aa95ca..c3d57f30 100644 --- a/unit_tests/test_barrier.cpp +++ b/unit_tests/test_barrier.cpp @@ -16,15 +16,10 @@ #include -#include "KokkosComm.hpp" +#include "KokkosComm/KokkosComm.hpp" namespace { -TEST(Barrier, 0) { - int rank, size; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - MPI_Comm_size(MPI_COMM_WORLD, &size); - KokkosComm::barrier(Kokkos::DefaultExecutionSpace(), MPI_COMM_WORLD); -} +TEST(Barrier, 0) { KokkosComm::barrier(KokkosComm::Handle<>{}); } } // namespace diff --git a/unit_tests/test_isendirecv.cpp b/unit_tests/test_isendirecv.cpp deleted file mode 100644 index 0c0ada4a..00000000 --- a/unit_tests/test_isendirecv.cpp +++ /dev/null @@ -1,110 +0,0 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER - -#include - -#include "KokkosComm.hpp" -#include "impl/KokkosComm_irecv.hpp" - -#include "view_builder.hpp" - -namespace { - -template -class IsendIrecv : public testing::Test { - public: - using Scalar = T; -}; - -using ScalarTypes = - ::testing::Types, Kokkos::complex, int, unsigned, int64_t, size_t>; -TYPED_TEST_SUITE(IsendIrecv, ScalarTypes); - -template -void test_1d(const View1D &a) { - static_assert(View1D::rank == 1, ""); - using Scalar = typename View1D::non_const_value_type; - - int rank, size; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - MPI_Comm_size(MPI_COMM_WORLD, &size); - if (size < 2) { - GTEST_SKIP() << "Requires >= 2 ranks (" << size << " provided)"; - } - - if (0 == rank) { - int dst = 1; - Kokkos::parallel_for( - a.extent(0), KOKKOS_LAMBDA(const int i) { a(i) = i; }); - KokkosComm::Req req = - KokkosComm::isend(KokkosComm::DefaultCommMode(), Kokkos::DefaultExecutionSpace(), a, dst, 0, MPI_COMM_WORLD); - req.wait(); - } else if (1 == rank) { - int src = 0; - MPI_Request req; - KokkosComm::irecv(a, src, 0, MPI_COMM_WORLD, req); - MPI_Wait(&req, MPI_STATUS_IGNORE); - int errs; - Kokkos::parallel_reduce( - a.extent(0), KOKKOS_LAMBDA(const int &i, int &lsum) { lsum += a(i) != Scalar(i); }, errs); - ASSERT_EQ(errs, 0); - } -} - -template -void test_2d(const View2D &a) { - static_assert(View2D::rank == 2, ""); - using Scalar = typename View2D::non_const_value_type; - - int rank, size; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - MPI_Comm_size(MPI_COMM_WORLD, &size); - if (size < 2) { - GTEST_SKIP() << "Requires >= 2 ranks (" << size << " provided)"; - } - - using Policy = Kokkos::MDRangePolicy>; - Policy policy({0, 0}, {a.extent(0), a.extent(1)}); - - if (0 == rank) { - int dst = 1; - Kokkos::parallel_for( - policy, KOKKOS_LAMBDA(int i, int j) { a(i, j) = i * a.extent(0) + j; }); - KokkosComm::Req req = - KokkosComm::isend(KokkosComm::DefaultCommMode(), Kokkos::DefaultExecutionSpace(), a, dst, 0, MPI_COMM_WORLD); - req.wait(); - } else if (1 == rank) { - int src = 0; - KokkosComm::Req req = KokkosComm::irecv(a, src, 0, MPI_COMM_WORLD); - req.wait(); - int errs; - Kokkos::parallel_reduce( - policy, KOKKOS_LAMBDA(int i, int j, int &lsum) { lsum += a(i, j) != Scalar(i * a.extent(0) + j); }, errs); - ASSERT_EQ(errs, 0); - } -} - -TYPED_TEST(IsendIrecv, 1D_contig) { - auto a = ViewBuilder::view(contig{}, "a", 1013); - test_1d(a); -} - -TYPED_TEST(IsendIrecv, 2D_contig) { - auto a = ViewBuilder::view(contig{}, "a", 137, 17); - test_2d(a); -} - -} // namespace diff --git a/unit_tests/test_main.cpp b/unit_tests/test_main.cpp index 7e9b0b44..de3bd1ac 100644 --- a/unit_tests/test_main.cpp +++ b/unit_tests/test_main.cpp @@ -23,7 +23,7 @@ #include #include -#include "impl/KokkosComm_include_mpi.hpp" +#include "KokkosComm/mpi/impl/include_mpi.hpp" class MpiEnvironment : public ::testing::Environment { public: diff --git a/unit_tests/test_sendrecv.cpp b/unit_tests/test_sendrecv.cpp index 04ceabd2..dc779297 100644 --- a/unit_tests/test_sendrecv.cpp +++ b/unit_tests/test_sendrecv.cpp @@ -15,9 +15,10 @@ //@HEADER #include -#include -#include "KokkosComm.hpp" +#include "KokkosComm/KokkosComm.hpp" + +#include "view_builder.hpp" namespace { @@ -27,89 +28,71 @@ class SendRecv : public testing::Test { using Scalar = T; }; -using ScalarTypes = ::testing::Types, Kokkos::complex>; +using ScalarTypes = + ::testing::Types, Kokkos::complex, int, unsigned, int64_t, size_t>; TYPED_TEST_SUITE(SendRecv, ScalarTypes); -template -void send_comm_mode_1d_contig() { - if constexpr (std::is_same_v) { - GTEST_SKIP() << "Skipping test for ready-mode send"; - } - - Kokkos::View a("a", 1000); +template +void test_1d(const View1D &a) { + static_assert(View1D::rank == 1, ""); + using Scalar = typename View1D::non_const_value_type; - int rank, size; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - MPI_Comm_size(MPI_COMM_WORLD, &size); - if (size < 2) { - GTEST_SKIP() << "Requires >= 2 ranks (" << size << " provided)"; + KokkosComm::Handle<> h; + if (h.size() < 2) { + GTEST_SKIP() << "Requires >= 2 ranks (" << h.size() << " provided)"; } - if (0 == rank) { + if (0 == h.rank()) { int dst = 1; Kokkos::parallel_for( a.extent(0), KOKKOS_LAMBDA(const int i) { a(i) = i; }); - KokkosComm::send(SendMode(), Kokkos::DefaultExecutionSpace(), a, dst, 0, MPI_COMM_WORLD); - } else if (1 == rank) { + KokkosComm::wait(KokkosComm::send(h, a, dst)); + } else if (1 == h.rank()) { int src = 0; - KokkosComm::recv(Kokkos::DefaultExecutionSpace(), a, src, 0, MPI_COMM_WORLD); + KokkosComm::wait(KokkosComm::recv(h, a, src)); int errs; Kokkos::parallel_reduce( - a.extent(0), KOKKOS_LAMBDA(const int &i, int &lsum) { lsum += a(i) != i; }, errs); + a.extent(0), KOKKOS_LAMBDA(const int &i, int &lsum) { lsum += a(i) != Scalar(i); }, errs); ASSERT_EQ(errs, 0); } } -template -void send_comm_mode_1d_noncontig() { - if constexpr (std::is_same_v) { - GTEST_SKIP() << "Skipping test for ready-mode send"; - } +template +void test_2d(const View2D &a) { + static_assert(View2D::rank == 2, ""); + using Scalar = typename View2D::non_const_value_type; - // this is C-style layout, i.e. b(0,0) is next to b(0,1) - Kokkos::View b("b", 10, 10); - auto a = Kokkos::subview(b, Kokkos::ALL, 2); // take column 2 (non-contiguous) + KokkosComm::Handle<> h; + if (h.size() < 2) { + GTEST_SKIP() << "Requires >= 2 ranks (" << h.size() << " provided)"; + } - int rank; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); + using Policy = Kokkos::MDRangePolicy>; + Policy policy({0, 0}, {a.extent(0), a.extent(1)}); - if (0 == rank) { + if (0 == h.rank()) { int dst = 1; Kokkos::parallel_for( - a.extent(0), KOKKOS_LAMBDA(const int i) { a(i) = i; }); - KokkosComm::send(SendMode(), Kokkos::DefaultExecutionSpace(), a, dst, 0, MPI_COMM_WORLD); - } else if (1 == rank) { + policy, KOKKOS_LAMBDA(int i, int j) { a(i, j) = i * a.extent(0) + j; }); + KokkosComm::wait(KokkosComm::send(h, a, dst)); + } else if (1 == h.rank()) { int src = 0; - KokkosComm::recv(Kokkos::DefaultExecutionSpace(), a, src, 0, MPI_COMM_WORLD); + KokkosComm::wait(KokkosComm::recv(h, a, src)); int errs; Kokkos::parallel_reduce( - a.extent(0), KOKKOS_LAMBDA(const int &i, int &lsum) { lsum += a(i) != i; }, errs); + policy, KOKKOS_LAMBDA(int i, int j, int &lsum) { lsum += a(i, j) != Scalar(i * a.extent(0) + j); }, errs); ASSERT_EQ(errs, 0); } } -TYPED_TEST(SendRecv, 1D_contig_standard) { - send_comm_mode_1d_contig(); -} - -TYPED_TEST(SendRecv, 1D_contig_ready) { - send_comm_mode_1d_contig(); -} - -TYPED_TEST(SendRecv, 1D_contig_synchronous) { - send_comm_mode_1d_contig(); -} - -TYPED_TEST(SendRecv, 1D_noncontig_standard) { - send_comm_mode_1d_noncontig(); -} - -TYPED_TEST(SendRecv, 1D_noncontig_ready) { - send_comm_mode_1d_noncontig(); +TYPED_TEST(SendRecv, 1D_contig) { + auto a = ViewBuilder::view(contig{}, "a", 1013); + test_1d(a); } -TYPED_TEST(SendRecv, 1D_noncontig_synchronous) { - send_comm_mode_1d_noncontig(); +TYPED_TEST(SendRecv, 2D_contig) { + auto a = ViewBuilder::view(contig{}, "a", 137, 17); + test_2d(a); } } // namespace