Skip to content

Commit

Permalink
Updates for Offline Deployment and General Bug/QoL Fixes (#349)
Browse files Browse the repository at this point in the history
* Adding all Thirdparty packages to CPM for ease of deployment
* Updating CPM and adding as git subtree of project
* Documentation on all of the above (in progress)
* Fixes for NVTX Macros and CMake flags
* Update includes for einsum
* Fix for size checks on matvec
* Documentation on linkage/usage
  • Loading branch information
tylera-nvidia authored Jan 11, 2023
1 parent 74442fa commit 20e00a2
Show file tree
Hide file tree
Showing 155 changed files with 5,206 additions and 326 deletions.
3 changes: 2 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ set(CMAKE_CXX_STANDARD 17)
set(CUDA_CXX_STANDARD 17)

# CPM is required for all package management
include(cmake/GetCPM.cmake)
include(public/cpm-cmake/cmake/CPM.cmake)
# Helper for selecting build type
include(cmake/BuildType.cmake)

Expand Down Expand Up @@ -244,6 +244,7 @@ endif()

if (MATX_NVTX_FLAGS)
add_definitions(-DMATX_NVTX_FLAGS)
target_compile_definitions(matx INTERFACE MATX_NVTX_FLAGS)
endif()
if (MATX_BUILD_32_BIT)
add_definitions(-DINDEX_32_BIT)
Expand Down
16 changes: 10 additions & 6 deletions cmake/FindcuTENSOR.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -84,13 +84,17 @@ if(NOT cuTENSOR_FOUND)

message(STATUS "cuTENSOR not found. Downloading library. By continuing this download you accept to the license terms of cuTENSOR")

file(DOWNLOAD https://developer.download.nvidia.com/compute/cutensor/redist/libcutensor/linux-x86_64/libcutensor-linux-x86_64-${CUTENSOR_VERSION}-archive.tar.xz
${CMAKE_BINARY_DIR}/${CUTENSOR_FILENAME}.tar.xz)
CPMAddPackage(
NAME cutensor
VERSION ${CUTENSOR_VERSION}
URL https://developer.download.nvidia.com/compute/cutensor/redist/libcutensor/linux-x86_64/libcutensor-linux-x86_64-${CUTENSOR_VERSION}-archive.tar.xz
# Eigen's CMakelists are not intended for library use
DOWNLOAD_ONLY YES
)

set(cuTENSOR_LIBRARY ${cutensor_SOURCE_DIR}/lib/11/libcutensor.so)
set(cuTENSOR_INCLUDE_DIR ${cutensor_SOURCE_DIR}/include)

file(ARCHIVE_EXTRACT INPUT ${CMAKE_BINARY_DIR}/${CUTENSOR_FILENAME}.tar.xz DESTINATION ${CMAKE_BINARY_DIR}/cutensor/)

set(cuTENSOR_LIBRARY ${CMAKE_BINARY_DIR}/cutensor/${CUTENSOR_FILENAME}/lib/11/libcutensor.so)
set(cuTENSOR_INCLUDE_DIR ${CMAKE_BINARY_DIR}/cutensor/${CUTENSOR_FILENAME}/include)

set(cuTENSOR_FOUND TRUE)
endif()
Expand Down
17 changes: 10 additions & 7 deletions cmake/FindcuTensorNet.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -83,14 +83,17 @@ if(NOT cuTensorNet_FOUND)

set(CUTENSORNET_VERSION 22.03.0.40)
set(CUTENSORNET_FILENAME cuquantum-linux-x86_64-${CUTENSORNET_VERSION}-archive)

file(DOWNLOAD https://developer.download.nvidia.com/compute/cuquantum/redist/cuquantum/linux-x86_64/${CUTENSORNET_FILENAME}.tar.xz
${CMAKE_BINARY_DIR}/${CUTENSORNET_FILENAME}.tar.xz)

file(ARCHIVE_EXTRACT INPUT ${CMAKE_BINARY_DIR}/${CUTENSORNET_FILENAME}.tar.xz DESTINATION ${CMAKE_BINARY_DIR}/cutensornet/)

set(cuTensorNet_LIBRARY ${CMAKE_BINARY_DIR}/cutensornet/${CUTENSORNET_FILENAME}/lib/libcutensornet.so)
set(cuTensorNet_INCLUDE_DIR ${CMAKE_BINARY_DIR}/cutensornet/${CUTENSORNET_FILENAME}/include)
CPMAddPackage(
NAME cutensornet
VERSION ${CUTENSORNET_VERSION}
URL https://developer.download.nvidia.com/compute/cuquantum/redist/cuquantum/linux-x86_64/${CUTENSORNET_FILENAME}.tar.xz
# Eigen's CMakelists are not intended for library use
DOWNLOAD_ONLY YES
)

set(cuTensorNet_LIBRARY ${cutensornet_SOURCE_DIR}/lib/libcutensornet.so)
set(cuTensorNet_INCLUDE_DIR ${cutensornet_SOURCE_DIR}/include)

set(cuTensorNet_FOUND TRUE)
endif()
Expand Down
8 changes: 4 additions & 4 deletions docs/_sources/api/fft.rst.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ FFT
###
The API below provides transformations for Fast Fourier Transforms (FFTs) of both 1D and 2D types with batching.

.. doxygenfunction:: fft(OutputTensor &o, const InputTensor &i, index_t fft_size = 0, cudaStream_t stream = 0)
.. doxygenfunction:: ifft(OutputTensor &o, const InputTensor &i, index_t fft_size = 0, cudaStream_t stream = 0)
.. doxygenfunction:: fft2(OutputTensor &o, const InputTensor &i, cudaStream_t stream = 0)
.. doxygenfunction:: ifft2(OutputTensor &o, const InputTensor &i, cudaStream_t stream = 0)
.. doxygenfunction:: fft(OutputTensor o, const InputTensor i, index_t fft_size = 0, cudaStream_t stream = 0)
.. doxygenfunction:: ifft(OutputTensor o, const InputTensor i, index_t fft_size = 0, cudaStream_t stream = 0)
.. doxygenfunction:: fft2(OutputTensor o, const InputTensor i, cudaStream_t stream = 0)
.. doxygenfunction:: ifft2(OutputTensor o, const InputTensor i, cudaStream_t stream = 0)
.. doxygenfunction:: dct(OutputTensor &out, const InputTensor &in, const cudaStream_t stream = 0)
1 change: 1 addition & 0 deletions docs/_sources/api/index.rst.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,4 @@ API Reference
utilities.rst
type_traits.rst
einsum.rst
nvtx.rst
3 changes: 2 additions & 1 deletion docs/_sources/api/matmul.rst.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,6 @@ is supported for any tensor with a rank higher than 2.

Cached API
----------
.. doxygenfunction:: matmul
.. doxygenfunction:: matmul(TensorTypeC C, const TensorTypeA A, const TensorTypeB B, const int32_t (&axis)[2], cudaStream_t stream = 0, float alpha = 1.0, float beta = 0.0)
.. doxygenfunction:: matmul(TensorTypeC C, const TensorTypeA A, const TensorTypeB B, cudaStream_t stream = 0, float alpha = 1.0, float beta = 0.0)

70 changes: 70 additions & 0 deletions docs/_sources/api/nvtx.rst.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
NVTX Profiling
##############

Overview
--------
MatX provides an NVTX API to enable native compile-in profiling capabilities. The MatX NVTX API enable a user to
easily profile all MatX calls using built-in NVTX ranges, while also providing a convenient API for the user to insert
custom ranges in their own code. This API provides many convenience features such as:

- A convenient compile-in/compile-out MACRO based API
- verbosity levels allowing varying levels of profiling detail
- Built-in color rotation
- Automatic scope management and range naming
- Overloaded API for manual range specification

The MatX NVTX API is implemented as a set of C++ Macros, allowing the user to compile all calls out of the project for
maximum performance when profiling is not needed.

Enabling NVTX API and Setting Log Level
---------------------------------------
To enable the NVTX Profiling API, simple compile with the ``MATX_NVTX_FLAG=ON`` enabled in the cmake command.
Once the flags are enabled at compile time, the project defaults to logging at the API level, which will provide NVTX
ranges for all MatX API calls. If another logging level is desired, this can be changed using the ``matx::setNVTXLogLevel()`` call.
Possible log levels are defined in ``matx_nvxtLogLevels``.

Using the NVTX API
------------------
The MatX NVTX API consists of two modes: auto managed, and manual range. The auto-managed API will automatically match the NVTX range to
the scope in which it is declared, establishing the NVTX range from the call’s instantiation to the end of its parent scope. Only a single
call is needed, with optional inputs defined below. If no message is provided, the call defaults to using the calling function’s name as
the NVTX range’s message.

The Manual Range NVTX API requires the user to make a call to the NVTX API at both the beginning and end of the desired range. The Manual
Range API uses a user defined handle (int) to reference the NVTX range. A Manual NVTX Range must be fully qualified on every instantiation.

NVTX Examples
-------------

.. list-table::
:widths: 60 40
:header-rows: 1

* - Command
- Result
* - MATX_NVTX_START("")
- NVTX range scoped to this function, named the same as function with log level of Internal
* - MATX_NVTX_START("MY_MESSAGE")
- NVTX range scoped to this function, named “MY_MESSAGE” with log level of Internal
* - MATX_NVTX_START("MY_MESSAGE", matx::MATX_NVTX_LOG_API )
- NVTX range scoped to this function, named “MY_MESSAGE” with log level of API
* - MATX_NVTX_START_RANGE( "MY_MESSAGE", matx_nvxtLogLevels::MATX_NVTX_LOG_USER, 1 )
- NVTX range with manual scope, named “MY_MESSAGE”, log level of USER, and handle ID of 1
* - MATX_NVTX_END_RANGE(1)
- Ends the NVTX range of range with a handle of 1 used in NVTX_START_RANGE

Code examples are provided in the ``simple_radar_pipeline`` code to show user utilization of the MatX NVTX API.

MatX NVTX API
-------------
.. doxygenfunction:: matx::setNVTXLogLevel
.. doxygenfunction:: matx::registerEvent
.. doxygenfunction:: matx::endEvent

MatX NVTX Logging Levels
------------------------
.. doxygenenum:: matx::matx_nvxtLogLevels

MatX NVTX Auto Range Colors
---------------------------
.. doxygenvariable:: matx::nvtxColors
25 changes: 17 additions & 8 deletions docs/_sources/api/reduce.rst.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,25 @@ The reductions API provides functions for reducing data from a higher rank to a
are listed in the :ref:`statistics` API guide section. Note that to avoid collissions with the C++ standard library,
``min`` and ``max`` are called ``rmin`` and ``rmax`` in MatX.

.. doxygenfunction:: reduce(OutType dest, [[maybe_unused]] TensorIndexType idest, InType in, ReduceOp op, cudaStream_t stream = 0, bool init = true)
.. doxygenfunction:: reduce(OutType dest, [[maybe_unused]] TensorIndexType idest, const InType &in, ReduceOp op, cudaStream_t stream = 0, bool init = true)
.. doxygenfunction:: reduce(OutType dest, const InType &in, ReduceOp op, cudaStream_t stream = 0, [[maybe_unused]] bool init = true)
.. doxygenfunction:: any
.. doxygenfunction:: all
.. doxygenfunction:: rmin
.. doxygenfunction:: rmax
.. doxygenfunction:: sum
.. doxygenfunction:: argmin
.. doxygenfunction:: argmax
.. doxygenfunction:: any(OutType dest, const InType &in, const int (&dims)[D], cudaStream_t stream = 0)
.. doxygenfunction:: any(OutType dest, const InType &in, cudaStream_t stream = 0)
.. doxygenfunction:: all(OutType dest, const InType &in, const int (&dims)[D], cudaStream_t stream = 0)
.. doxygenfunction:: all(OutType dest, const InType &in, cudaStream_t stream = 0)
.. doxygenfunction:: rmin(OutType dest, const InType &in, const int (&dims)[D], cudaStream_t stream = 0)
.. doxygenfunction:: rmin(OutType dest, const InType &in, cudaStream_t stream = 0)
.. doxygenfunction:: rmax(OutType dest, const InType &in, const int (&dims)[D], cudaStream_t stream = 0)
.. doxygenfunction:: rmax(OutType dest, const InType &in, cudaStream_t stream = 0)
.. doxygenfunction:: sum(OutType dest, const InType &in, const int (&dims)[D], cudaStream_t stream = 0)
.. doxygenfunction:: sum(OutType dest, const InType &in, cudaStream_t stream = 0)
.. doxygenfunction:: argmin(OutType dest, TensorIndexType &idest, const InType &in, const int (&dims)[D], cudaStream_t stream = 0)
.. doxygenfunction:: argmin(OutType dest, TensorIndexType &idest, const InType &in, cudaStream_t stream = 0)
.. doxygenfunction:: argmax(OutType dest, const TensorIndexType &idest, const InType &in, const int (&dims)[D], cudaStream_t stream = 0)
.. doxygenfunction:: argmax(OutType dest, TensorIndexType &idest, const InType &in, cudaStream_t stream = 0)
.. doxygenfunction:: trace
.. doxygenfunction:: find
.. doxygenfunction:: find_idx
.. doxygenfunction:: unique
.. doxygenfunction:: softmax(OutType dest, const InType &in, const int (&dims)[D], cudaStream_t stream = 0)
.. doxygenfunction:: softmax(OutType dest, const InType &in, cudaStream_t stream = 0)
12 changes: 8 additions & 4 deletions docs/_sources/api/stats.rst.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,11 @@ The API below provides methods for statistics functions.

.. doxygenfunction:: cumsum
.. doxygenfunction:: hist
.. doxygenfunction:: mean
.. doxygenfunction:: median
.. doxygenfunction:: var
.. doxygenfunction:: stdd
.. doxygenfunction:: mean(OutType dest, const InType &in, const int (&dims)[D], cudaStream_t stream = 0)
.. doxygenfunction:: mean(OutType dest, const InType &in, cudaStream_t stream = 0)
.. doxygenfunction:: median(OutType dest, const InType &in, const int (&dims)[D], cudaStream_t stream = 0)
.. doxygenfunction:: median(OutType dest, const InType &in, cudaStream_t stream = 0)
.. doxygenfunction:: var(OutType dest, const InType &in, const int (&dims)[D], cudaStream_t stream = 0)
.. doxygenfunction:: var(OutType dest, const InType &in, cudaStream_t stream = 0)
.. doxygenfunction:: stdd(OutType dest, const InType &in, const int (&dims)[D], cudaStream_t stream = 0)
.. doxygenfunction:: stdd(OutType dest, const InType &in, cudaStream_t stream = 0)
3 changes: 1 addition & 2 deletions docs/_sources/api/tensorgenerators.rst.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,7 @@ since their only purpose is to compute a single value at a particular location b
.. doxygenfunction:: matx::linspace(const index_t (&s)[RANK], T first, T last)
.. doxygenfunction:: matx::logspace(ShapeType &&s, T first, T last)
.. doxygenfunction:: matx::logspace(const index_t (&s)[RANK], T first, T last)
.. doxygenfunction:: matx::meshgrid_x(const std::array<T, 3> &x, const std::array<T, 3> &y)
.. doxygenfunction:: matx::meshgrid_y(const std::array<T, 3> &x, const std::array<T, 3> &y)
.. doxygenfunction:: matx::meshgrid(Ts&&... ts)
.. doxygenfunction:: matx::chirp(SpaceOp t, FreqType f0, typename SpaceOp::scalar_type t1, FreqType f1, ChirpMethod method )
.. doxygenfunction:: matx::chirp(index_t num, TimeType last, FreqType f0, TimeType t1, FreqType f1, ChirpMethod method)
.. doxygenfunction:: matx::cchirp(SpaceOp t, FreqType f0, typename SpaceOp::scalar_type t1, FreqType f1, ChirpMethod method)
Expand Down
25 changes: 16 additions & 9 deletions docs/_sources/api/tensorops.rst.txt
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ Casting Operators
Advanced Operators
------------------

.. doxygenclass:: matx::IF
.. doxygenfunction:: matx::IF
.. doxygenclass:: matx::IFELSE
.. doxygenfunction:: reverse(Op t)
.. doxygenfunction:: reverse(Op_type t)
Expand All @@ -85,19 +85,26 @@ Advanced Operators
.. doxygenfunction:: repmat(T1 t, index_t reps)
.. doxygenfunction:: repmat(T1 t, const index_t(&reps)[])
.. doxygenfunction:: repmat(T1 t, const index_t *reps)
.. doxygenfunction:: cart2sph
.. doxygenfunction:: sph2cart
.. doxygenfunction:: kron
.. doxygenfunction:: legendre(T1 n, T2 m, const T3 in)
.. doxygenfunction:: legendre(T1 n, T2 m, const T3 in, int (&axis)[2])
.. doxygenfunction:: legendre(T1 n, T2 m, const T3 in, std::array<int, 2> axis)
.. doxygenfunction:: hermitianT
.. doxygenfunction:: r2cop
.. doxygenfunction:: r2c(T1 t, index_t orig)
.. doxygenfunction:: flatten
.. doxygenfunction:: remap(Op t, Ind idx)
.. doxygenfunction:: remap(Op t, Ind idx, Inds... inds)
.. doxygenfunction:: rcollapse
.. doxygenfunction:: lcollapse
.. doxygenfunction:: clone
.. doxygenfunction:: slice( const T_wStrideT op, const typename T_wStrideT::shape_type (&starts)[T_wStrideT::Rank()], const typename T_wStrideT::shape_type (&ends)[T_wStrideT::Rank()], const typename T_wStrideT::stride_type (&strides)[T_wStrideT::Rank()])
.. doxygenfunction:: slice( const T_wShapeT op, const typename T_wShapeT::shape_type (&starts)[T_wShapeT::Rank()], const typename T_wShapeT::shape_type (&ends)[T_wShapeT::Rank()])
.. doxygenfunction:: slice( const T_wStridet_2 op, const typename T_wStridet_2::shape_type (&starts)[T_wStridet_2::Rank()], const typename T_wStridet_2::shape_type (&ends)[T_wStridet_2::Rank()], const typename T_wStridet_2::stride_type (&strides)[T_wStridet_2::Rank()])
.. doxygenfunction:: slice( const T_wShapet_2 op, const typename T_wShapet_2::shape_type (&starts)[T_wShapet_2::Rank()], const typename T_wShapet_2::shape_type (&ends)[T_wShapet_2::Rank()])

.. doxygenfunction:: clone(Op t, const index_t (&shape)[Rank])
.. doxygenfunction:: clone(Op t, const std::array<index_t, Rank> &shape)
.. doxygenfunction:: stack
.. doxygenfunction:: slice(const OpType opIn, const index_t (&starts)[OpType::Rank()], const index_t (&ends)[OpType::Rank()])
.. doxygenfunction:: slice(const OpType op, const index_t (&starts)[OpType::Rank()], const index_t (&ends)[OpType::Rank()], const index_t (&strides)[OpType::Rank()])
.. doxygenfunction:: permute(detail::tensor_impl_t<T, Rank> &out, const detail::tensor_impl_t<T, Rank> &in, const std::initializer_list<uint32_t> &dims, const cudaStream_t stream)
.. doxygenfunction:: permute(const T op, const int32_t (&dims)[T::Rank()])
.. doxygenfunction:: permute(const T &op, const int32_t (&dims)[T::Rank()])
.. doxygenfunction:: permute(const T &op, const std::array<int32_t, T::Rank()> &dims)
.. doxygenfunction:: reshape(const T &op, ShapeType &&s)
.. doxygenfunction:: reshape(const T &op, const int32_t (&sizes)[RANK])
2 changes: 1 addition & 1 deletion docs/_sources/api/tensorview.rst.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
.. _tensorview_api:

Tensors
########
#######

tensor_t is the main tensor class in MatX used for tensor operations. The member functions allow different ways
to manipulate how the same data region is viewed without modifying the data.
Expand Down
2 changes: 1 addition & 1 deletion docs/_sources/api/transformations.rst.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,4 @@ below provide both cached and non-cached interfaces.
filter.rst
reduce.rst
sort.rst

.. doxygenfunction:: permute(detail::tensor_impl_t<T, Rank> &out, const detail::tensor_impl_t<T, Rank> &in, const std::initializer_list<uint32_t> &dims, const cudaStream_t stream)
Loading

0 comments on commit 20e00a2

Please sign in to comment.