@@ -529,12 +529,23 @@ gather_rgba(const Tx *p, simd<uint32_t, N> offsets, simd_mask<N> mask = 1) {
529
529
return __esimd_svm_gather4_scaled<T, N, Mask>(addrs.data (), mask.data ());
530
530
}
531
531
532
+ namespace detail {
533
+ template <rgba_channel_mask M> static void validate_rgba_write_channel_mask () {
534
+ using CM = rgba_channel_mask;
535
+ static_assert (
536
+ (M == CM::ABGR || M == CM::BGR || M == CM::GR || M == CM::R) &&
537
+ " Only ABGR, BGR, GR, R channel masks are valid in write operations" );
538
+ }
539
+ } // namespace detail
540
+
532
541
// / @anchor usm_scatter_rgba
533
542
// / Transpose and scatter pixels to given memory locations defined by the base
534
543
// / pointer \c p and \c offsets. Up to 4 32-bit data elements may be accessed at
535
544
// / each address depending on the channel mask \c Mask template parameter. Each
536
545
// / pixel's address must be 4 byte aligned. This is basically an inverse
537
- // / operation for gather_rgba.
546
+ // / operation for gather_rgba. Unlike \c gather_rgba, this function imposes
547
+ // / restrictions on possible \c Mask template argument values. It can only be
548
+ // / one of the following: \c ABGR, \c BGR, \c GR, \c R.
538
549
// /
539
550
// / @tparam Tx Element type of the returned vector. Must be 4 bytes in size.
540
551
// / @tparam N Number of pixels to access (matches the size of the \c offsets
@@ -553,6 +564,7 @@ __ESIMD_API std::enable_if_t<(N == 8 || N == 16 || N == 32) && (sizeof(T) == 4)>
553
564
scatter_rgba (Tx *p, simd<uint32_t , N> offsets,
554
565
simd<Tx, N * get_num_channels_enabled (Mask)> vals,
555
566
simd_mask<N> mask = 1) {
567
+ detail::validate_rgba_write_channel_mask<Mask>();
556
568
simd<uint64_t , N> offsets_i = convert<uint64_t >(offsets);
557
569
simd<uint64_t , N> addrs (reinterpret_cast <uint64_t >(p));
558
570
addrs = addrs + offsets_i;
@@ -875,7 +887,7 @@ slm_gather_rgba(simd<uint32_t, N> offsets, simd_mask<N> mask = 1) {
875
887
}
876
888
877
889
// / Gather data from the Shared Local Memory at specified \c offsets and return
878
- // / it as simd vector. See @ref usm_gather_rgba for information about the
890
+ // / it as simd vector. See @ref usm_scatter_rgba for information about the
879
891
// / operation semantics and parameter restrictions/interdependencies.
880
892
// / @tparam T The element type of the returned vector.
881
893
// / @tparam N The number of elements to access.
@@ -889,6 +901,7 @@ __ESIMD_API std::enable_if_t<(N == 8 || N == 16 || N == 32) && (sizeof(T) == 4)>
889
901
slm_scatter_rgba (simd<uint32_t , N> offsets,
890
902
simd<T, N * get_num_channels_enabled (Mask)> vals,
891
903
simd_mask<N> mask = 1) {
904
+ detail::validate_rgba_write_channel_mask<Mask>();
892
905
const auto si = __ESIMD_GET_SURF_HANDLE (detail::LocalAccessorMarker ());
893
906
constexpr int16_t Scale = 0 ;
894
907
constexpr int global_offset = 0 ;
0 commit comments