Skip to content

Commit

Permalink
Fix intertype gpu copy (elemental#92)
Browse files Browse the repository at this point in the history
* Fix copying between types on GPU

* Update version to 1.3.2
  • Loading branch information
benson31 authored Dec 14, 2019
1 parent aced0a7 commit 752d139
Show file tree
Hide file tree
Showing 4 changed files with 55 additions and 5 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ endif (__GIT_EXECUTABLE)
# This must be set because version tags
set(HYDROGEN_VERSION_MAJOR 1)
set(HYDROGEN_VERSION_MINOR 3)
set(HYDROGEN_VERSION_PATCH 1)
set(HYDROGEN_VERSION_PATCH 2)
set(HYDROGEN_VERSION_MAJOR_MINOR
"${HYDROGEN_VERSION_MAJOR}.${HYDROGEN_VERSION_MINOR}")
set(HYDROGEN_VERSION
Expand Down
27 changes: 27 additions & 0 deletions include/El/blas_like/level1/Copy.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@
#include <El/blas_like/level1/Copy/GeneralPurpose.hpp>
#include <El/blas_like/level1/Copy/util.hpp>

#ifdef HYDROGEN_HAVE_GPU
#include <hydrogen/blas/GPU_BLAS.hpp>
#endif

namespace El {
namespace details {

Expand Down Expand Up @@ -248,6 +252,29 @@ void Copy( const Matrix<T>& A, Matrix<T>& B )
}

#ifdef HYDROGEN_HAVE_CUDA
template <typename T, typename U>
void Copy(Matrix<T, Device::GPU> const& A, Matrix<U, Device::GPU>& B)
{
EL_DEBUG_CSE;
Int const height = A.Height();
Int const width = A.Width();
B.Resize(height, width);
Int const ldA = A.LDim();
Int const ldB = B.LDim();
T const* ABuf = A.LockedBuffer();
U* BBuf = B.Buffer();

SyncInfo<Device::GPU> syncInfoA = SyncInfoFromMatrix(A),
syncInfoB = SyncInfoFromMatrix(B);
auto syncHelper = MakeMultiSync(syncInfoB, syncInfoA);

gpu_blas::Copy(TransposeMode::NORMAL,
height, width,
ABuf, ldA,
BBuf, ldB,
syncInfoB);
}

template<typename T>
void Copy(const Matrix<T,Device::GPU>& A, Matrix<T,Device::GPU>& B)
{
Expand Down
4 changes: 2 additions & 2 deletions include/hydrogen/blas/GPU_BLAS_decl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -216,11 +216,11 @@ void Copy(SizeT size,
*
* @ingroup device_blas
*/
template <typename T, typename SizeT>
template <typename T, typename U, typename SizeT>
void Copy(TransposeMode transpA,
SizeT num_rows, SizeT num_cols,
T const* A, SizeT lda,
T* B, SizeT ldb,
U* B, SizeT ldb,
SyncInfo<Device::GPU> const& syncinfo);

/** @brief 2-D Copy operation in GPU memory with 2 strides.
Expand Down
27 changes: 25 additions & 2 deletions include/hydrogen/blas/GPU_BLAS_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -394,6 +394,29 @@ void Copy2DImpl(SizeT nrows, SizeT ncols,
}
}

template <typename T, typename U, typename SizeT>
void Copy2DImpl(SizeT nrows, SizeT ncols,
TransposeMode transA,
T const* A, SizeT lda,
U* B, SizeT ldb,
SyncInfo<Device::GPU> const& si)
{
switch (transA)
{
case TransposeMode::NORMAL:
Copy_GPU_impl(nrows, ncols,
A, SizeT(1), lda,
B, SizeT(1), ldb, si.stream_);
break;
case TransposeMode::TRANSPOSE:
throw std::logic_error(
"Copy2DImpl: Need to implement multitype transpose");
break;
default:
throw std::logic_error("Copy2DImpl: TransposeMode not supported!");
}
}

template <typename T, typename SizeT,
typename=EnableUnless<IsSupportedType<T,BLAS_Op::COPY>>,
typename=void>
Expand Down Expand Up @@ -555,11 +578,11 @@ void Copy(SizeT size,
details::CopyImpl(size, X, incx, Y, incy, si);
}

template <typename T, typename SizeT>
template <typename T, typename U, typename SizeT>
void Copy(TransposeMode transA,
SizeT num_rows, SizeT num_cols,
T const* A, SizeT lda,
T* B, SizeT ldb,
U* B, SizeT ldb,
SyncInfo<Device::GPU> const& si)
{
details::Copy2DImpl(num_rows, num_cols, transA,
Expand Down

0 comments on commit 752d139

Please sign in to comment.