Skip to content

Commit

Permalink
[XPU] feat: add xpu async memory copy to enable zero cost checkpoint
Browse files Browse the repository at this point in the history
  • Loading branch information
tizhou86 committed Feb 18, 2025
1 parent bb523c5 commit 033a925
Show file tree
Hide file tree
Showing 2 changed files with 107 additions and 1 deletion.
2 changes: 1 addition & 1 deletion paddle/fluid/platform/init.cc
Original file line number Diff line number Diff line change
Expand Up @@ -435,7 +435,7 @@ void InitMemoryMethod() {
memory_method->allocation_deleter =
paddle::memory::allocation::Allocator::AllocationDeleter;
#if defined(PADDLE_WITH_CUSTOM_DEVICE) || defined(PADDLE_WITH_CUDA) || \
defined(PADDLE_WITH_HIP)
defined(PADDLE_WITH_HIP) || defined(PADDLE_WITH_XPU)
memory_method->copy_with_stream =
paddle::memory::Copy<phi::Place, phi::Place>;
#endif
Expand Down
106 changes: 106 additions & 0 deletions paddle/phi/core/memory/memcpy.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ limitations under the License. */

#ifdef PADDLE_WITH_XPU
#include "paddle/phi/backends/xpu/xpu_header.h"
#include "xpu/runtime.h"
#include "xpu/runtime_ex.h"
#endif

namespace paddle::memory {
Expand Down Expand Up @@ -248,6 +250,110 @@ void Copy<phi::Place, phi::XPUPlace>(phi::Place dst_place,
}
}

template <>
void Copy<phi::XPUPlace, phi::CPUPlace>(phi::XPUPlace dst_place,
void* dst,
phi::CPUPlace src_place,
const void* src,
size_t num,
void* stream) {
if (num <= 0) {
VLOG(1) << "memcpy XPU_HOST_TO_DEVICE size <= 0 (" << num << ")";
return;
}
// platform::MemcpySyncH2D(dst, src, num, dst_place);
xpu_memcpy_async(dst, src, num, XPU_HOST_TO_DEVICE, stream);
}

template <>
void Copy<phi::CPUPlace, phi::XPUPlace>(phi::CPUPlace dst_place,
void* dst,
phi::XPUPlace src_place,
const void* src,
size_t num,
void* stream) {
if (num <= 0) {
VLOG(1) << "memcpy XPU_DEVICE_TO_HOST size <= 0 (" << num << ")";
return;
}
// platform::MemcpySyncD2H(dst, src, num, src_place);
xpu_memcpy_async(dst, src, num, XPU_DEVICE_TO_HOST, stream);
}

template <>
void Copy<phi::XPUPlace, phi::XPUPlace>(phi::XPUPlace dst_place,
void* dst,
phi::XPUPlace src_place,
const void* src,
size_t num,
void* stream) {
if (num <= 0) {
VLOG(1) << "memcpy XPU_DEVICE_TO_DEVICE size <= 0 (" << num << ")";
return;
}
platform::MemcpySyncD2D(dst, dst_place, src, src_place, num);
}

// NOTE: only for (CPUPlace and XPUPlace) -> (XPUPlace).
template <>
void Copy<phi::XPUPlace, phi::Place>(phi::XPUPlace dst_place,
void* dst,
phi::Place src_place,
const void* src,
size_t num,
void* stream) {
if (src_place.GetType() == phi::AllocationType::CPU) {
phi::CPUPlace place_src;
return Copy(dst_place, dst, place_src, src, num);
} else if (src_place.GetType() == phi::AllocationType::XPU) {
phi::XPUPlace place_src(src_place.GetDeviceId());
return Copy(dst_place, dst, place_src, src, num);
}
}

// NOTE: only for (XPUPlace) -> (CPUPlace and XPUPlace).
template <>
void Copy<phi::Place, phi::XPUPlace>(phi::Place dst_place,
void* dst,
phi::XPUPlace src_place,
const void* src,
size_t num,
void* stream) {
if (dst_place.GetType() == phi::AllocationType::CPU) {
phi::CPUPlace place_dst;
return Copy(place_dst, dst, src_place, src, num);
} else if (dst_place.GetType() == phi::AllocationType::XPU) {
phi::XPUPlace place_dst(dst_place.GetDeviceId());
return Copy(place_dst, dst, src_place, src, num);
}
}

template <>
void Copy<phi::Place, phi::Place>(phi::Place dst_place,
void* dst,
phi::Place src_place,
const void* src,
size_t num,
void* stream) {
if (dst_place.GetType() == phi::AllocationType::CPU) {
phi::CPUPlace place_dst;
if (src_place.GetType() == phi::AllocationType::XPU) {
phi::XPUPlace place_src(src_place.GetDeviceId());
return Copy(place_dst, dst, place_src, src, num);
} else {
VLOG(4) << "cannot fit into a copy stereotype, might be an error";
}
} else if (dst_place.GetType() == phi::AllocationType::XPU) {
phi::XPUPlace place_dst(dst_place.GetDeviceId());
if (src_place.GetType() == phi::AllocationType::CPU) {
phi::CPUPlace place_src;
return Copy(place_dst, dst, place_src, src, num);
} else {
VLOG(4) << "cannot fit into a copy stereotype, might be an error";
}
}
}

#endif

#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
Expand Down

0 comments on commit 033a925

Please sign in to comment.