@@ -552,26 +552,26 @@ inline void hwc2chw_rocm(
552
552
const size_t input_size = pixel_size * sizeof (uint8_t );
553
553
const size_t output_size = pixel_size * sizeof (float );
554
554
555
- void * rocm_input_memory = nullptr ;
556
- void * rocm_output_memory = nullptr ;
555
+ hipDeviceptr_t rocm_input_memory = 0 ;
556
+ hipDeviceptr_t rocm_output_memory = 0 ;
557
557
558
558
// Allocate host-pinned memory
559
- hipError_t hipRes0 = hipHostMalloc (&rocm_input_memory, input_size, 0 );
560
- hipError_t hipRes1 = hipHostMalloc (&rocm_output_memory, output_size, 0 );
559
+ hipError_t hipRes0 = hipMallocAsync (&rocm_input_memory, input_size, rocmstream );
560
+ hipError_t hipRes1 = hipMallocAsync (&rocm_output_memory, output_size, rocmstream );
561
561
562
562
if (hipRes0 != 0 || hipRes1 != 0 ) {
563
- hipHostFree (rocm_input_memory);
564
- hipHostFree (rocm_output_memory);
563
+ hipFreeAsync (rocm_input_memory, rocmstream );
564
+ hipFreeAsync (rocm_output_memory, rocmstream );
565
565
hwc2chw<uint8_t , float >(h, w, c, src, dst, alpha);
566
566
return ;
567
567
}
568
568
569
569
// Copy host memory to device memory
570
- hipError_t hipRes2 = hipMemcpyAsync (rocm_input_memory, src, input_size, hipMemcpyKind::hipMemcpyHostToDevice , rocmstream);
570
+ hipError_t hipRes2 = hipMemcpyHtoDAsync (rocm_input_memory, src, input_size, rocmstream);
571
571
572
572
if (hipRes2 != 0 ) {
573
- hipHostFree (rocm_input_memory);
574
- hipHostFree (rocm_output_memory);
573
+ hipFreeAsync (rocm_input_memory, rocmstream );
574
+ hipFreeAsync (rocm_output_memory, rocmstream );
575
575
hwc2chw<uint8_t , float >(h, w, c, src, dst, alpha);
576
576
return ;
577
577
}
@@ -593,25 +593,25 @@ inline void hwc2chw_rocm(
593
593
0 , rocmstream, args, nullptr );
594
594
595
595
if (hipRes3 != 0 ) {
596
- hipHostFree (rocm_input_memory);
597
- hipHostFree (rocm_output_memory);
596
+ hipFreeAsync (rocm_input_memory, rocmstream );
597
+ hipFreeAsync (rocm_output_memory, rocmstream );
598
598
hwc2chw<uint8_t , float >(h, w, c, src, dst, alpha);
599
599
return ;
600
600
}
601
601
602
602
// Copy device memory to host memory
603
- hipError_t hipRes5 = hipMemcpyAsync (dst, rocm_output_memory, output_size, hipMemcpyKind::hipMemcpyDeviceToHost , rocmstream);
603
+ hipError_t hipRes5 = hipMemcpyDtoHAsync (dst, rocm_output_memory, output_size, rocmstream);
604
604
605
605
if (hipRes5 != 0 ) {
606
- hipHostFree (rocm_input_memory);
607
- hipHostFree (rocm_output_memory);
606
+ hipFreeAsync (rocm_input_memory, rocmstream );
607
+ hipFreeAsync (rocm_output_memory, rocmstream );
608
608
hwc2chw<uint8_t , float >(h, w, c, src, dst, alpha);
609
609
return ;
610
610
}
611
611
612
612
// Free memory
613
- hipHostFree (rocm_input_memory);
614
- hipHostFree (rocm_output_memory);
613
+ hipFreeAsync (rocm_input_memory, rocmstream );
614
+ hipFreeAsync (rocm_output_memory, rocmstream );
615
615
616
616
// Stream synchronization
617
617
hipError_t hipRes4 = hipStreamSynchronize (rocmstream);
@@ -645,25 +645,25 @@ inline void chw2hwc_rocm(
645
645
const size_t pixel_size = h * w * c;
646
646
size_t input_size = pixel_size * sizeof (float );
647
647
size_t output_size = pixel_size * sizeof (uint8_t );
648
- void * rocm_input_memory = nullptr ;
649
- void * rocm_output_memory = nullptr ;
648
+ hipDeviceptr_t rocm_input_memory = 0 ;
649
+ hipDeviceptr_t rocm_output_memory = 0 ;
650
650
651
651
// Allocate device memory
652
- hipError_t hipRes0 = hipHostMalloc (&rocm_input_memory, input_size, 0 );
653
- hipError_t hipRes1 = hipHostMalloc (&rocm_output_memory, output_size, 0 );
652
+ hipError_t hipRes0 = hipMallocAsync (&rocm_input_memory, input_size, rocmstream );
653
+ hipError_t hipRes1 = hipMallocAsync (&rocm_output_memory, output_size, rocmstream );
654
654
655
655
if (hipRes0 != 0 || hipRes1 != 0 ) {
656
- hipHostFree (rocm_input_memory);
657
- hipHostFree (rocm_output_memory);
656
+ hipFreeAsync (rocm_input_memory, rocmstream );
657
+ hipFreeAsync (rocm_output_memory, rocmstream );
658
658
chw2hwc<float , uint8_t >(h, w, c, src, dst, alpha); return ;
659
659
}
660
660
661
661
// Copy host memory to device memory
662
- hipError_t hipRes2 = hipMemcpyAsync (rocm_input_memory, src, input_size, hipMemcpyKind::hipMemcpyHostToDevice , rocmstream);
662
+ hipError_t hipRes2 = hipMemcpyHtoDAsync (rocm_input_memory, src, input_size, rocmstream);
663
663
664
664
if (hipRes2 != 0 ) {
665
- hipHostFree (rocm_input_memory);
666
- hipHostFree (rocm_output_memory);
665
+ hipFreeAsync (rocm_input_memory, rocmstream );
666
+ hipFreeAsync (rocm_output_memory, rocmstream );
667
667
chw2hwc<float , uint8_t >(h, w, c, src, dst, alpha); return ;
668
668
}
669
669
@@ -686,23 +686,23 @@ inline void chw2hwc_rocm(
686
686
0 , rocmstream, args, nullptr );
687
687
688
688
if (hipRes3 != 0 ) {
689
- hipHostFree (rocm_input_memory);
690
- hipHostFree (rocm_output_memory);
689
+ hipFreeAsync (rocm_input_memory, rocmstream );
690
+ hipFreeAsync (rocm_output_memory, rocmstream );
691
691
chw2hwc<float , uint8_t >(h, w, c, src, dst, alpha); return ;
692
692
}
693
693
694
694
// Copy device memory to host memory
695
- hipError_t hipRes5 = hipMemcpyAsync (dst, rocm_output_memory, output_size, hipMemcpyKind::hipMemcpyDeviceToHost , rocmstream);
695
+ hipError_t hipRes5 = hipMemcpyDtoHAsync (dst, rocm_output_memory, output_size, rocmstream);
696
696
697
697
if (hipRes5 != 0 ) {
698
- hipHostFree (rocm_input_memory);
699
- hipHostFree (rocm_output_memory);
698
+ hipFreeAsync (rocm_input_memory, rocmstream );
699
+ hipFreeAsync (rocm_output_memory, rocmstream );
700
700
chw2hwc<float , uint8_t >(h, w, c, src, dst, alpha); return ;
701
701
}
702
702
703
703
// Free memory
704
- hipHostFree (rocm_input_memory);
705
- hipHostFree (rocm_output_memory);
704
+ hipFreeAsync (rocm_input_memory, rocmstream );
705
+ hipFreeAsync (rocm_output_memory, rocmstream );
706
706
707
707
hipError_t hipRes4 = hipStreamSynchronize (rocmstream);
708
708
@@ -725,7 +725,7 @@ inline void chw2hwc_rocm(
725
725
*/
726
726
inline void hwc2chw_rocm (
727
727
const size_t h, const size_t w, const size_t c,
728
- void * src, void * dst,
728
+ hipDeviceptr_t src, hipDeviceptr_t dst,
729
729
const float alpha = 1 .f / 255 .f) {
730
730
731
731
const size_t pixel_size = h * w * c;
@@ -769,7 +769,7 @@ inline void hwc2chw_rocm(
769
769
*/
770
770
inline void chw2hwc_rocm (
771
771
const size_t c, const size_t h, const size_t w,
772
- void * src, void * dst,
772
+ hipDeviceptr_t src, hipDeviceptr_t dst,
773
773
const uint8_t alpha = 255 .0f ) {
774
774
775
775
const unsigned int blockDimX = 32 , blockDimY = 32 , blockDimZ = 1 ;
0 commit comments