Skip to content

Commit 1811162

Browse files
authored
[ESIMD] Implement stateless memory accesses enforcement (#6287)
The driver option -f[no-]sycl-esimd-force-stateless-mem is added. -fsycl-esimd-force-stateless-mem enables the automatic conversion of stateful memory accesses via SYCL accessors or surface-index to stateless within ESIMD kernels. It also disables those ESIMD intrinsics that use stateful accesses that cannot be converted to stateless. -fsycl-esimd-force-stateless-mem defines the macro __ESIMD_FORCE_STATELESS_MEM to map the calls of ESIMD API using accessors to calls of API using pointers. It also passes a switch to sycl-post-link to signal it that it should ignore the buffer_t attribute and use svmptr_t. -fno-sycl-esimd-force-stateless-mem is used to tell the compiler not to convert stateful memory accesses to stateless. Default behavior. Draft of the design document/proposal for this change-set: #6187
1 parent 7a076bd commit 1811162

File tree

15 files changed

+203
-14
lines changed

15 files changed

+203
-14
lines changed

clang/include/clang/Basic/LangOptions.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,7 @@ LANGOPT(SYCLIsHost , 1, 0, "SYCL host compilation")
275275
LANGOPT(SYCLAllowFuncPtr , 1, 0, "Allow function pointers in SYCL device code")
276276
LANGOPT(SYCLStdLayoutKernelParams, 1, 0, "Enable standard layout requirement for SYCL kernel parameters")
277277
LANGOPT(SYCLUnnamedLambda , 1, 0, "Allow unnamed lambda SYCL kernels")
278+
LANGOPT(SYCLESIMDForceStatelessMem, 1, 0, "Make accessors use USM memory in ESIMD kernels")
278279
ENUM_LANGOPT(SYCLVersion , SYCLMajorVersion, 2, SYCL_None, "Version of the SYCL standard used")
279280
LANGOPT(DeclareSPIRVBuiltins, 1, 0, "Declare SPIR-V builtin functions")
280281
LANGOPT(SYCLExplicitSIMD , 1, 0, "SYCL compilation with explicit SIMD extension")

clang/include/clang/Driver/Options.td

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2792,6 +2792,14 @@ def fintelfpga : Flag<["-"], "fintelfpga">, Group<f_Group>,
27922792
Flags<[CC1Option, CoreOption]>, HelpText<"Perform ahead of time compilation for FPGA">;
27932793
def fsycl_device_only : Flag<["-"], "fsycl-device-only">, Flags<[CoreOption]>,
27942794
HelpText<"Compile SYCL kernels for device">;
2795+
defm sycl_esimd_force_stateless_mem : BoolFOption<"sycl-esimd-force-stateless-mem",
2796+
LangOpts<"SYCLESIMDForceStatelessMem">, DefaultFalse,
2797+
PosFlag<SetTrue, [], "Enforce using stateless memory accesses. "
2798+
"Convert stateful accesses via SYCL accessors to stateless within ESIMD kernels. "
2799+
"Disabled by default. (experimental)">,
2800+
NegFlag<SetFalse, [], "Do not enforce using stateless memory accesses. (experimental)">,
2801+
BothFlags<[CC1Option, CoreOption], "">>;
2802+
27952803
def fsycl_targets_EQ : CommaJoined<["-"], "fsycl-targets=">, Flags<[NoXarchOption, CC1Option, CoreOption]>,
27962804
HelpText<"Specify comma-separated list of triples SYCL offloading targets to be supported">;
27972805
def fsycl_add_targets_EQ : CommaJoined<["-"], "fsycl-add-targets=">,

clang/lib/CodeGen/BackendUtil.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -830,8 +830,8 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
830830

831831
if (LangOpts.SYCLIsDevice)
832832
PB.registerPipelineStartEPCallback(
833-
[](ModulePassManager &MPM, OptimizationLevel Level) {
834-
MPM.addPass(ESIMDVerifierPass());
833+
[&](ModulePassManager &MPM, OptimizationLevel Level) {
834+
MPM.addPass(ESIMDVerifierPass(LangOpts.SYCLESIMDForceStatelessMem));
835835
});
836836

837837
bool IsThinLTO = CodeGenOpts.PrepareForThinLTO;

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4912,6 +4912,10 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
49124912
CmdArgs.push_back("-fsycl-allow-func-ptr");
49134913
}
49144914

4915+
if (Args.hasFlag(options::OPT_fsycl_esimd_force_stateless_mem,
4916+
options::OPT_fno_sycl_esimd_force_stateless_mem, false))
4917+
CmdArgs.push_back("-fsycl-esimd-force-stateless-mem");
4918+
49154919
// Forward -fsycl-instrument-device-code option to cc1. This option will
49164920
// only be used for SPIR-V-based targets.
49174921
if (Triple.isSPIR())
@@ -9486,6 +9490,11 @@ void SYCLPostLink::ConstructJob(Compilation &C, const JobAction &JA,
94869490
else
94879491
addArgs(CmdArgs, TCArgs, {"-spec-const=default"});
94889492

9493+
// Make ESIMD accessors use stateless memory accesses.
9494+
if (TCArgs.hasFlag(options::OPT_fsycl_esimd_force_stateless_mem,
9495+
options::OPT_fno_sycl_esimd_force_stateless_mem, false))
9496+
addArgs(CmdArgs, TCArgs, {"-lower-esimd-force-stateless-mem"});
9497+
94899498
// Add output file table file option
94909499
assert(Output.isFilename() && "output must be a filename");
94919500
addArgs(CmdArgs, TCArgs, {"-o", Output.getFilename()});

clang/lib/Frontend/InitPreprocessor.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1303,6 +1303,9 @@ static void InitializePredefinedMacros(const TargetInfo &TI,
13031303
Builder.defineMacro("__ENABLE_USM_ADDR_SPACE__");
13041304
Builder.defineMacro("SYCL_DISABLE_FALLBACK_ASSERT");
13051305
}
1306+
1307+
if (LangOpts.SYCLESIMDForceStatelessMem)
1308+
Builder.defineMacro("__ESIMD_FORCE_STATELESS_MEM");
13061309
}
13071310
if (LangOpts.SYCLUnnamedLambda)
13081311
Builder.defineMacro("__SYCL_UNNAMED_LAMBDA__");
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
2+
/// Verify that the driver option is translated to corresponding options
3+
/// to device compilation and sycl-post-link.
4+
// RUN: %clang -### -fsycl -fsycl-esimd-force-stateless-mem \
5+
// RUN: %s 2>&1 | FileCheck -check-prefix=CHECK-PASS-TO-COMPS %s
6+
// CHECK-PASS-TO-COMPS: clang{{.*}} "-fsycl-esimd-force-stateless-mem"
7+
// CHECK-PASS-TO-COMPS: sycl-post-link{{.*}} "-lower-esimd-force-stateless-mem"
8+
// CHECK-PASS-TO-COMPS-NOT: clang{{.*}} "-fsycl-is-host" {{.*}}"-fsycl-esimd-force-stateless-mem"
9+
// CHECK-PASS-TO-COMPS-NOT: clang{{.*}} "-fsycl-esimd-force-stateless-mem" {{.*}}"-fsycl-is-host"
10+
11+
/// Verify that stateless memory accesses mapping is not enforced by default
12+
// RUN: %clang -### -fsycl %s 2>&1 | FileCheck -check-prefix=CHECK-DEFAULT %s
13+
// CHECK-DEFAULT-NOT: clang{{.*}} "-fsycl-esimd-force-stateless-mem"
14+
// CHECK-DEFAULT-NOT: sycl-post-link{{.*}} "-lower-esimd-force-stateless-mem"
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
/// This test checks that the macro __ESIMD_FORCE_STATELESS_MEM is automatically
2+
/// defined only if the option -fsycl-esimd-force-stateless-mem is used.
3+
4+
// RUN: %clang_cc1 %s -fsycl-is-device -fsycl-esimd-force-stateless-mem -E -dM | FileCheck --check-prefix=CHECK-OPT %s
5+
6+
// RUN: %clang_cc1 %s -E -dM | FileCheck --check-prefix=CHECK-NOOPT %s
7+
// RUN: %clang_cc1 %s -fsycl-is-device -E -dM | FileCheck --check-prefix=CHECK-NOOPT %s
8+
// RUN: %clang_cc1 %s -fsycl-is-host -E -dM | FileCheck --check-prefix=CHECK-NOOPT %s
9+
10+
// CHECK-OPT:#define __ESIMD_FORCE_STATELESS_MEM 1
11+
// CHECK-NOOPT-NOT:#define __ESIMD_FORCE_STATELESS_MEM 1

llvm/include/llvm/SYCLLowerIR/ESIMD/ESIMDVerifier.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,15 @@ namespace llvm {
2020
class ModulePass;
2121

2222
struct ESIMDVerifierPass : public PassInfoMixin<ESIMDVerifierPass> {
23-
ESIMDVerifierPass() {}
23+
ESIMDVerifierPass() : ForceStatelessMem(false) {}
24+
ESIMDVerifierPass(bool ForceStatelessMem)
25+
: ForceStatelessMem(ForceStatelessMem) {}
2426
PreservedAnalyses run(Module &M, ModuleAnalysisManager &);
2527
static bool isRequired() { return true; }
28+
29+
// The verifier pass allows more SYCL classes/methods when
30+
// stateless memory accesses are enforced.
31+
bool ForceStatelessMem;
2632
};
2733

2834
ModulePass *createESIMDVerifierPass();

llvm/lib/SYCLLowerIR/ESIMD/ESIMDVerifier.cpp

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include "llvm/IR/Module.h"
2020
#include "llvm/InitializePasses.h"
2121
#include "llvm/Pass.h"
22+
#include "llvm/Support/CommandLine.h"
2223
#include "llvm/Support/Debug.h"
2324
#include "llvm/Support/Regex.h"
2425

@@ -49,6 +50,12 @@ static const char *LegalSYCLFunctions[] = {
4950
"^cl::sycl::ext::oneapi::experimental::spec_constant<.+>::.+",
5051
"^cl::sycl::ext::oneapi::experimental::this_sub_group"};
5152

53+
static const char *LegalSYCLFunctionsInStatelessMode[] = {
54+
"^cl::sycl::multi_ptr<.+>::get", "^cl::sycl::multi_ptr<.+>::multi_ptr",
55+
"^cl::sycl::accessor<.+>::get_pointer.+",
56+
"^cl::sycl::accessor<.+>::getPointerAdjusted",
57+
"^cl::sycl::accessor<.+>::getQualifiedPtr"};
58+
5259
namespace {
5360

5461
// Simplest possible implementation of an allocator for the Itanium demangler
@@ -83,9 +90,11 @@ class SimpleAllocator {
8390

8491
class ESIMDVerifierImpl {
8592
const Module &M;
93+
bool ForceStatelessMem;
8694

8795
public:
88-
ESIMDVerifierImpl(const Module &M) : M(M) {}
96+
ESIMDVerifierImpl(const Module &M, bool ForceStatelessMem)
97+
: M(M), ForceStatelessMem(ForceStatelessMem) {}
8998

9099
void verify() {
91100
SmallPtrSet<const Function *, 8u> Visited;
@@ -142,11 +151,14 @@ class ESIMDVerifierImpl {
142151
continue;
143152

144153
// Check if function name matches any allowed SYCL function name.
145-
if (any_of(LegalSYCLFunctions, [Name](const char *LegalName) {
146-
Regex LegalNameRE(LegalName);
147-
assert(LegalNameRE.isValid() && "invalid function name regex");
148-
return LegalNameRE.match(Name);
149-
}))
154+
auto checkLegalFunc = [Name](const char *LegalName) {
155+
Regex LegalNameRE(LegalName);
156+
assert(LegalNameRE.isValid() && "invalid function name regex");
157+
return LegalNameRE.match(Name);
158+
};
159+
if (any_of(LegalSYCLFunctions, checkLegalFunc) ||
160+
(ForceStatelessMem &&
161+
any_of(LegalSYCLFunctionsInStatelessMode, checkLegalFunc)))
150162
continue;
151163

152164
// If not, report an error.
@@ -163,14 +175,15 @@ class ESIMDVerifierImpl {
163175
} // end anonymous namespace
164176

165177
PreservedAnalyses ESIMDVerifierPass::run(Module &M, ModuleAnalysisManager &AM) {
166-
ESIMDVerifierImpl(M).verify();
178+
ESIMDVerifierImpl(M, ForceStatelessMem).verify();
167179
return PreservedAnalyses::all();
168180
}
169181

170182
namespace {
171183

172184
struct ESIMDVerifier : public ModulePass {
173185
static char ID;
186+
bool ForceStatelessMem;
174187

175188
ESIMDVerifier() : ModulePass(ID) {
176189
initializeESIMDVerifierPass(*PassRegistry::getPassRegistry());
@@ -181,7 +194,7 @@ struct ESIMDVerifier : public ModulePass {
181194
}
182195

183196
bool runOnModule(Module &M) override {
184-
ESIMDVerifierImpl(M).verify();
197+
ESIMDVerifierImpl(M, ForceStatelessMem).verify();
185198
return false;
186199
}
187200
};

llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,11 @@ namespace id = itanium_demangle;
4747

4848
#define MAX_DIMS 3
4949

50+
cl::opt<bool> ForceStatelessMem(
51+
"lower-esimd-force-stateless-mem", llvm::cl::Optional, llvm::cl::Hidden,
52+
llvm::cl::desc("Use stateless API for accessor based API."),
53+
llvm::cl::init(false));
54+
5055
namespace {
5156
SmallPtrSet<Type *, 4> collectGenXVolatileTypes(Module &);
5257
void generateKernelMetadata(Module &);
@@ -1573,7 +1578,7 @@ void generateKernelMetadata(Module &M) {
15731578
->getValue()
15741579
.getZExtValue())
15751580
: 0;
1576-
if (IsAcc) {
1581+
if (IsAcc && !ForceStatelessMem) {
15771582
ArgDesc = "buffer_t";
15781583
Kind = AK_SURFACE;
15791584
} else

sycl/doc/UsersManual.md

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -290,6 +290,31 @@ and not recommended to use in production environment.
290290

291291
NOTE: This flag is currently only supported with the CUDA and HIP targets.
292292

293+
294+
**`-f[no-]sycl-esimd-force-stateless-mem`** [EXPERIMENTAL]
295+
296+
Enforces stateless memory access and enables the automatic conversion of
297+
"stateful" memory access via SYCL accessors to "stateless" within ESIMD
298+
(Explicit SIMD) kernels.
299+
300+
-fsycl-esimd-force-stateless-mem disables the intrinsics and methods
301+
accepting SYCL accessors or "surface-index" which cannot be automatically
302+
converted to their "stateless" equivalents.
303+
304+
-fno-sycl-esimd-force-stateless-mem is used to tell compiler not to
305+
enforce usage of stateless memory accesses. This is the default behavior.
306+
307+
NOTE: "Stateful" access is the one that uses SYCL accessor or a pair
308+
of "surface-index" + 32-bit byte-offset and uses specific memory access
309+
data port messages to read/write/fetch.
310+
"Stateless" memory access uses memory location represented with virtual
311+
memory address pointer such as USM pointer.
312+
313+
The "stateless" memory may be beneficial as it does not have the limit
314+
of 4Gb per surface.
315+
Also, some of Intel GPUs or GPU run-time/drivers may support only
316+
"stateless" memory accesses.
317+
293318
# Example: SYCL device code compilation
294319

295320
To invoke SYCL device compiler set `-fsycl-device-only` flag.

sycl/include/sycl/ext/intel/esimd/detail/memory_intrin.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1099,6 +1099,10 @@ __ESIMD_INTRIN void __esimd_media_st(TACC handle, unsigned x, unsigned y,
10991099
}
11001100
#endif // __SYCL_DEVICE_ONLY__
11011101

1102+
// getter methods returning surface index are not available when stateless
1103+
// memory accesses are enforced.
1104+
#ifndef __ESIMD_FORCE_STATELESS_MEM
1105+
11021106
// \brief Converts given value to a surface index.
11031107
// The input must always be a result of
11041108
// detail::AccessorPrivateProxy::getNativeImageObj(acc)
@@ -1131,4 +1135,6 @@ __ESIMD_INTRIN __ESIMD_NS::SurfaceIndex __esimd_get_surface_index(MemObjTy obj)
11311135
}
11321136
#endif // __SYCL_DEVICE_ONLY__
11331137

1138+
#endif // !__ESIMD_FORCE_STATELESS_MEM
1139+
11341140
/// @endcond ESIMD_DETAIL

sycl/include/sycl/ext/intel/esimd/detail/util.hpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,16 @@ template <unsigned N> class ForHelper {
187187
}
188188
};
189189

190+
#ifdef __ESIMD_FORCE_STATELESS_MEM
191+
/// Returns the address referenced by the accessor \p Acc and
192+
/// the byte offset \p Offset.
193+
template <typename T, typename AccessorTy>
194+
T *accessorToPointer(AccessorTy Acc, uint32_t Offset = 0) {
195+
auto BytePtr = reinterpret_cast<char *>(Acc.get_pointer().get()) + Offset;
196+
return reinterpret_cast<T *>(BytePtr);
197+
}
198+
#endif // __ESIMD_FORCE_STATELESS_MEM
199+
190200
} // namespace __ESIMD_DNS
191201
} // __SYCL_INLINE_NAMESPACE(cl)
192202

sycl/include/sycl/ext/intel/esimd/memory.hpp

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,9 @@ template <typename Tx, int N, typename AccessorTy,
244244
class T = detail::__raw_t<Tx>>
245245
__ESIMD_API simd<Tx, N> block_load(AccessorTy acc, uint32_t offset,
246246
Flags = {}) {
247+
#ifdef __ESIMD_FORCE_STATELESS_MEM
248+
return block_load<Tx, N>(__ESIMD_DNS::accessorToPointer<Tx>(acc, offset));
249+
#else
247250
constexpr unsigned Sz = sizeof(T) * N;
248251
static_assert(Sz >= detail::OperandSize::OWORD,
249252
"block size must be at least 1 oword");
@@ -263,6 +266,7 @@ __ESIMD_API simd<Tx, N> block_load(AccessorTy acc, uint32_t offset,
263266
} else {
264267
return __esimd_oword_ld_unaligned<T, N>(surf_ind, offset);
265268
}
269+
#endif
266270
}
267271

268272
/// Stores elements of a vector to a contiguous block of memory at given
@@ -304,6 +308,9 @@ template <typename Tx, int N, typename AccessorTy,
304308
class T = detail::__raw_t<Tx>>
305309
__ESIMD_API void block_store(AccessorTy acc, uint32_t offset,
306310
simd<Tx, N> vals) {
311+
#ifdef __ESIMD_FORCE_STATELESS_MEM
312+
block_store<Tx, N>(__ESIMD_DNS::accessorToPointer<Tx>(acc, offset), vals);
313+
#else
307314
constexpr unsigned Sz = sizeof(T) * N;
308315
static_assert(Sz >= detail::OperandSize::OWORD,
309316
"block size must be at least 1 oword");
@@ -317,6 +324,7 @@ __ESIMD_API void block_store(AccessorTy acc, uint32_t offset,
317324
auto surf_ind = __esimd_get_surface_index(
318325
detail::AccessorPrivateProxy::getNativeImageObj(acc));
319326
__esimd_oword_st<T, N>(surf_ind, offset >> 4, vals.data());
327+
#endif
320328
}
321329

322330
/// @} sycl_esimd_memory
@@ -426,8 +434,12 @@ __ESIMD_API std::enable_if_t<(sizeof(T) <= 4) &&
426434
simd<T, N>>
427435
gather(AccessorTy acc, simd<uint32_t, N> offsets, uint32_t glob_offset = 0,
428436
simd_mask<N> mask = 1) {
429-
437+
#ifdef __ESIMD_FORCE_STATELESS_MEM
438+
return gather<T, N>(__ESIMD_DNS::accessorToPointer<T>(acc, glob_offset),
439+
offsets, mask);
440+
#else
430441
return detail::gather_impl<T, N, AccessorTy>(acc, offsets, glob_offset, mask);
442+
#endif
431443
}
432444

433445
/// @anchor accessor_scatter
@@ -455,8 +467,12 @@ __ESIMD_API std::enable_if_t<(sizeof(T) <= 4) &&
455467
!std::is_pointer<AccessorTy>::value>
456468
scatter(AccessorTy acc, simd<uint32_t, N> offsets, simd<T, N> vals,
457469
uint32_t glob_offset = 0, simd_mask<N> mask = 1) {
458-
470+
#ifdef __ESIMD_FORCE_STATELESS_MEM
471+
scatter<T, N>(__ESIMD_DNS::accessorToPointer<T>(acc, glob_offset), offsets,
472+
vals, mask);
473+
#else
459474
detail::scatter_impl<T, N, AccessorTy>(acc, vals, offsets, glob_offset, mask);
475+
#endif
460476
}
461477

462478
/// Load a scalar value from an accessor.
@@ -623,12 +639,17 @@ __ESIMD_API std::enable_if_t<((N == 8 || N == 16 || N == 32) &&
623639
simd<T, N * get_num_channels_enabled(RGBAMask)>>
624640
gather_rgba(AccessorT acc, simd<uint32_t, N> offsets,
625641
uint32_t global_offset = 0, simd_mask<N> mask = 1) {
642+
#ifdef __ESIMD_FORCE_STATELESS_MEM
643+
return gather_rgba<RGBAMask>(
644+
__ESIMD_DNS::accessorToPointer<T>(acc, global_offset), offsets, mask);
645+
#else
626646
// TODO (performance) use hardware-supported scale once BE supports it
627647
constexpr uint32_t Scale = 0;
628648
const auto SI = get_surface_index(acc);
629649
return __esimd_gather4_masked_scaled2<detail::__raw_t<T>, N, RGBAMask,
630650
decltype(SI), Scale>(
631651
SI, global_offset, offsets.data(), mask.data());
652+
#endif
632653
}
633654

634655
/// Gather data from the memory addressed by accessor \c acc, offset common
@@ -654,11 +675,16 @@ scatter_rgba(AccessorT acc, simd<uint32_t, N> offsets,
654675
simd<T, N * get_num_channels_enabled(RGBAMask)> vals,
655676
uint32_t global_offset = 0, simd_mask<N> mask = 1) {
656677
detail::validate_rgba_write_channel_mask<RGBAMask>();
678+
#ifdef __ESIMD_FORCE_STATELESS_MEM
679+
scatter_rgba<RGBAMask>(__ESIMD_DNS::accessorToPointer<T>(acc, global_offset),
680+
offsets, vals, mask);
681+
#else
657682
// TODO (performance) use hardware-supported scale once BE supports it
658683
constexpr uint32_t Scale = 0;
659684
const auto SI = get_surface_index(acc);
660685
__esimd_scatter4_scaled<T, N, decltype(SI), RGBAMask, Scale>(
661686
mask.data(), SI, global_offset, offsets.data(), vals.data());
687+
#endif
662688
}
663689

664690
/// @} sycl_esimd_memory
@@ -1082,6 +1108,7 @@ slm_atomic_update(simd<uint32_t, N> offsets, simd<Tx, N> src0, simd<Tx, N> src1,
10821108

10831109
/// @} sycl_esimd_memory_slm
10841110

1111+
#ifndef __ESIMD_FORCE_STATELESS_MEM
10851112
/// @addtogroup sycl_esimd_memory
10861113
/// @{
10871114

@@ -1167,6 +1194,7 @@ __ESIMD_API void media_block_store(AccessorTy acc, unsigned x, unsigned y,
11671194
vals.data());
11681195
}
11691196
}
1197+
#endif // !__ESIMD_FORCE_STATELESS_MEM
11701198

11711199
/// @} sycl_esimd_memory
11721200

0 commit comments

Comments
 (0)