Skip to content

Commit 9a55da5

Browse files
authored
[ESIMD] Add set_kernel_properties API and use_double_grf property. (#6182)
This patch: 1) Adds esimd::set_kernel_properties API with the single supported property esimd::kernel_properties::use_double_grf, which lets compiler know that the calling kernel needs run in "double GRF" mode - more registers per thread at the expense of fewer H/W threads. This is temporary API until generic SYCL support for kernel properties is implemented: https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/proposed/sycl_ext_oneapi_kernel_properties.asciidoc 2) Provides "lowering" of this API by the new LowerESIMDKernelProps pass, which marks such kernels with "esimd-double-grf" function attribute, and invoke it from the sycl-post-link as a part of ESIMD lowering. 3) Implements new "dimension" of device code splitting in sycl-post-link: functions with and without "esimd-double-grf" attribute go to different modules. Device binary images resulting from "double-grf" modules are assigned the "isDoubleGRFEsimdImage" property 4) Updates runtime to add "-doubleGRF" option when JITting SPIRV binaries with the "isDoubleGRFEsimdImage" property. 5) Fixes sycl-post-link bug in ModuleSplitter.cpp:extractSubModule, where Function objects in the entry point list were not replaced with new Function objects in the cloned Module. This lead to corrupted symbol file in some cases. 6) Misc refactoring: - factor out call graph traversal from LowerESIMD into ESIMDUtils to use from multiple sources - fix entry group and module properties handling in the light of multi-dimensional splitting - improve internal interfaces to pass single ModuleDesc instead of Module + entry points + properties - limit entry points of a ModuleDesc to the ModuleDesc it was split from AOT compilation support is TBD. Signed-off-by: Konstantin S Bobrovsky <[email protected]>
1 parent a61ac7a commit 9a55da5

File tree

17 files changed

+919
-312
lines changed

17 files changed

+919
-312
lines changed
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
//===----------- ESIMDUtils.hpp - ESIMD t-forms-related utility functions ===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
// Utility functions for processing ESIMD code.
9+
//===----------------------------------------------------------------------===//
10+
11+
#include "llvm/IR/Function.h"
12+
13+
#include <functional>
14+
15+
namespace llvm {
16+
namespace esimd {
17+
18+
constexpr char ATTR_DOUBLE_GRF[] = "esimd-double-grf";
19+
20+
using CallGraphNodeAction = std::function<void(Function *)>;
21+
void traverseCallgraphUp(llvm::Function *F, CallGraphNodeAction NodeF,
22+
bool ErrorOnNonCallUse);
23+
24+
// Traverses call graph starting from given function up the call chain applying
25+
// given action to each function met on the way. If \c ErrorOnNonCallUse
26+
// parameter is true, then no functions' uses are allowed except calls.
27+
// Otherwise, any function where use of the current one happened is added to the
28+
// call graph as if the use was a call.
29+
template <class CallGraphNodeActionF>
30+
void traverseCallgraphUp(Function *F, CallGraphNodeActionF ActionF,
31+
bool ErrorOnNonCallUse = true) {
32+
traverseCallgraphUp(F, CallGraphNodeAction{ActionF}, ErrorOnNonCallUse);
33+
}
34+
35+
// Tells whether given function is a ESIMD kernel.
36+
bool isESIMDKernel(const Function &F);
37+
38+
} // namespace esimd
39+
} // namespace llvm

llvm/include/llvm/SYCLLowerIR/ESIMD/LowerESIMD.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,13 @@ class ESIMDLowerVecArgPass : public PassInfoMixin<ESIMDLowerVecArgPass> {
6969
ModulePass *createESIMDLowerVecArgPass();
7070
void initializeESIMDLowerVecArgLegacyPassPass(PassRegistry &);
7171

72+
// Lowers calls to __esimd_set_kernel_properties
73+
class SYCLLowerESIMDKernelPropsPass
74+
: public PassInfoMixin<SYCLLowerESIMDKernelPropsPass> {
75+
public:
76+
PreservedAnalyses run(Module &M, ModuleAnalysisManager &);
77+
};
78+
7279
} // namespace llvm
7380

7481
#endif // LLVM_SYCLLOWERIR_LOWERESIMD_H

llvm/lib/Passes/PassRegistry.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,7 @@ MODULE_PASS("memprof-module", ModuleMemProfilerPass())
127127
MODULE_PASS("poison-checking", PoisonCheckingPass())
128128
MODULE_PASS("pseudo-probe-update", PseudoProbeUpdatePass())
129129
MODULE_PASS("LowerESIMD", SYCLLowerESIMDPass())
130+
MODULE_PASS("lower-esimd-kernel-props", SYCLLowerESIMDKernelPropsPass())
130131
MODULE_PASS("ESIMDLowerVecArg", ESIMDLowerVecArgPass())
131132
MODULE_PASS("esimd-verifier", ESIMDVerifierPass())
132133
MODULE_PASS("lower-invoke-simd", SYCLLowerInvokeSimdPass())

llvm/lib/SYCLLowerIR/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,10 @@ set_property(GLOBAL PROPERTY LLVMGenXIntrinsics_BINARY_PROP ${LLVMGenXIntrinsics
4848

4949
add_llvm_component_library(LLVMSYCLLowerIR
5050
ESIMD/LowerESIMD.cpp
51+
ESIMD/LowerESIMDKernelProps.cpp
5152
ESIMD/LowerESIMDVLoadVStore.cpp
5253
ESIMD/LowerESIMDVecArg.cpp
54+
ESIMD/ESIMDUtils.cpp
5355
ESIMD/ESIMDVerifier.cpp
5456
LowerInvokeSimd.cpp
5557
LowerWGScope.cpp
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
#include "llvm/SYCLLowerIR/ESIMD/ESIMDUtils.h"
2+
3+
#include "llvm/ADT/SmallPtrSet.h"
4+
#include "llvm/ADT/SmallVector.h"
5+
#include "llvm/IR/Instructions.h"
6+
#include "llvm/Support/Casting.h"
7+
8+
namespace llvm {
9+
namespace esimd {
10+
11+
void traverseCallgraphUp(llvm::Function *F, CallGraphNodeAction ActionF,
12+
bool ErrorOnNonCallUse) {
13+
SmallPtrSet<Function *, 32> FunctionsVisited;
14+
SmallVector<Function *, 32> Worklist{F};
15+
16+
while (!Worklist.empty()) {
17+
Function *CurF = Worklist.pop_back_val();
18+
FunctionsVisited.insert(CurF);
19+
// Apply the action function.
20+
ActionF(CurF);
21+
22+
// Update all callers as well.
23+
for (auto It = CurF->use_begin(); It != CurF->use_end(); It++) {
24+
auto FCall = It->getUser();
25+
auto ErrMsg =
26+
llvm::Twine(__FILE__ " ") +
27+
"Function use other than call detected while traversing call\n"
28+
"graph up to a kernel";
29+
if (!isa<CallInst>(FCall)) {
30+
// A use other than a call is met...
31+
if (ErrorOnNonCallUse) {
32+
// ... non-call is an error - report
33+
llvm::report_fatal_error(ErrMsg);
34+
} else {
35+
// ... non-call is OK - add using function to the worklist
36+
if (auto *I = dyn_cast<Instruction>(FCall)) {
37+
auto UseF = I->getFunction();
38+
39+
if (!FunctionsVisited.count(UseF)) {
40+
Worklist.push_back(UseF);
41+
}
42+
}
43+
}
44+
} else {
45+
auto *CI = cast<CallInst>(FCall);
46+
47+
if ((CI->getCalledFunction() != CurF) && ErrorOnNonCallUse) {
48+
// CurF is used in a call, but not as the callee.
49+
llvm::report_fatal_error(ErrMsg);
50+
} else {
51+
auto FCaller = CI->getFunction();
52+
53+
if (!FunctionsVisited.count(FCaller)) {
54+
Worklist.push_back(FCaller);
55+
}
56+
}
57+
}
58+
}
59+
}
60+
}
61+
62+
bool isESIMDKernel(const Function &F) {
63+
return (F.getCallingConv() == CallingConv::SPIR_KERNEL) &&
64+
(F.getMetadata("sycl_explicit_simd") != nullptr);
65+
}
66+
67+
} // namespace esimd
68+
} // namespace llvm

llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp

Lines changed: 61 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
//===----------------------------------------------------------------------===//
1515

1616
#include "llvm/SYCLLowerIR/ESIMD/LowerESIMD.h"
17+
#include "llvm/SYCLLowerIR/ESIMD/ESIMDUtils.h"
1718

1819
#include "llvm/ADT/DenseMap.h"
1920
#include "llvm/ADT/DenseSet.h"
@@ -71,7 +72,8 @@ class SYCLLowerESIMDLegacyPass : public ModulePass {
7172

7273
char SYCLLowerESIMDLegacyPass::ID = 0;
7374
INITIALIZE_PASS(SYCLLowerESIMDLegacyPass, "LowerESIMD",
74-
"Lower constructs specific to Close To Metal", false, false)
75+
"Lower constructs specific to the 'explicit SIMD' extension",
76+
false, false)
7577

7678
// Public interface to the SYCLLowerESIMDPass.
7779
ModulePass *llvm::createSYCLLowerESIMDPass() {
@@ -899,59 +901,63 @@ static inline llvm::Metadata *getMD(llvm::Value *V) {
899901
return llvm::ValueAsMetadata::get(V);
900902
}
901903

902-
/// Updates genx.kernels metadata attribute \p MD for the given function \p F.
903-
/// The value of the attribute is updated only if the new value \p NewVal is
904-
/// bigger than what is already stored in the attribute.
905-
// TODO: 1) In general this function is supposed to handle intrinsics
906-
// translated into kernel's metadata. So, the primary/intended usage model is
907-
// when such intrinsics are called from kernels.
908-
// 2) For now such intrinsics are also handled in functions directly called
909-
// from kernels and being translate into those caller-kernel meeven though such
910-
// behaviour is not fully specified/documented.
911-
// 3) This code (or the code in FE) must verify that slm_init or other such
912-
// intrinsic is not called from another module because kernels in that other
913-
// module would not get updated meta data attributes.
914-
static void updateGenXMDNodes(llvm::Function *F, genx::KernelMDOp MD,
915-
uint64_t NewVal) {
916-
llvm::NamedMDNode *GenXKernelMD =
917-
F->getParent()->getNamedMetadata(GENX_KERNEL_METADATA);
918-
assert(GenXKernelMD && "invalid genx.kernels metadata");
919-
920-
SmallPtrSet<Function *, 32> FunctionsVisited;
921-
SmallVector<Function *, 32> Worklist{F};
922-
while (!Worklist.empty()) {
923-
Function *CurF = Worklist.pop_back_val();
924-
FunctionsVisited.insert(CurF);
925-
926-
// Update the meta data attribute for the current function.
904+
// A functor which updates ESIMD kernel's uint64_t metadata in case it is less
905+
// than the given one. Used in callgraph traversal to update nbarriers or SLM
906+
// size metadata. Update is performed by the '()' operator and happens only
907+
// when given function matches one of the kernels - thus, only reachable kernels
908+
// are updated.
909+
struct UpdateUint64MetaDataToMaxValue {
910+
Module &M;
911+
// The uint64_t metadata key to update.
912+
genx::KernelMDOp Key;
913+
// The new metadata value. Must be greater than the old for update to happen.
914+
uint64_t NewVal;
915+
// Pre-selected nodes from GENX_KERNEL_METADATA which can only potentially be
916+
// updated.
917+
SmallVector<MDNode *, 4> CandidatesToUpdate;
918+
919+
UpdateUint64MetaDataToMaxValue(Module &M, genx::KernelMDOp Key,
920+
uint64_t NewVal)
921+
: M(M), Key(Key), NewVal(NewVal) {
922+
// Pre-select nodes for update to do less work in the '()' operator.
923+
llvm::NamedMDNode *GenXKernelMD = M.getNamedMetadata(GENX_KERNEL_METADATA);
924+
assert(GenXKernelMD && "invalid genx.kernels metadata");
927925
for (auto Node : GenXKernelMD->operands()) {
928-
if (Node->getNumOperands() <= MD ||
929-
getVal(Node->getOperand(genx::KernelMDOp::FunctionRef)) != CurF)
926+
if (Node->getNumOperands() <= (unsigned)Key) {
930927
continue;
931-
932-
llvm::Value *Old = getVal(Node->getOperand(MD));
928+
}
929+
llvm::Value *Old = getVal(Node->getOperand(Key));
933930
uint64_t OldVal = cast<llvm::ConstantInt>(Old)->getZExtValue();
931+
934932
if (OldVal < NewVal) {
935-
llvm::Value *New = llvm::ConstantInt::get(Old->getType(), NewVal);
936-
Node->replaceOperandWith(MD, getMD(New));
933+
CandidatesToUpdate.push_back(Node);
937934
}
938935
}
936+
}
937+
938+
void operator()(Function *F) {
939+
// Update the meta data attribute for the current function.
940+
for (auto Node : CandidatesToUpdate) {
941+
assert(Node->getNumOperands() > (unsigned)Key);
939942

940-
// Update all callers as well.
941-
for (auto It = CurF->use_begin(); It != CurF->use_end(); It++) {
942-
auto FCall = It->getUser();
943-
if (!isa<CallInst>(FCall))
944-
llvm::report_fatal_error(
945-
llvm::Twine(__FILE__ " ") +
946-
"Found an intrinsic violating assumption on usage from a kernel or "
947-
"a func directly called from a kernel");
948-
949-
auto FCaller = cast<CallInst>(FCall)->getFunction();
950-
if (!FunctionsVisited.count(FCaller))
951-
Worklist.push_back(FCaller);
943+
if (getVal(Node->getOperand(genx::KernelMDOp::FunctionRef)) != F) {
944+
continue;
945+
}
946+
llvm::Value *Old = getVal(Node->getOperand(Key));
947+
#ifndef NDEBUG
948+
uint64_t OldVal = cast<llvm::ConstantInt>(Old)->getZExtValue();
949+
assert(OldVal < NewVal);
950+
#endif // NDEBUG
951+
llvm::Value *New = llvm::ConstantInt::get(Old->getType(), NewVal);
952+
Node->replaceOperandWith(Key, getMD(New));
952953
}
953954
}
954-
}
955+
};
956+
957+
// TODO Specify document behavior for slm_init and nbarrier_init when:
958+
// 1) they are called not from kernels
959+
// 2) there are multiple such calls reachable from a kernel
960+
// 3) when a call in external function linked by the Back-End
955961

956962
// This function sets/updates VCSLMSize attribute to the kernels
957963
// calling this intrinsic initializing SLM memory.
@@ -964,7 +970,9 @@ static void translateSLMInit(CallInst &CI) {
964970

965971
uint64_t NewVal = cast<llvm::ConstantInt>(ArgV)->getZExtValue();
966972
assert(NewVal != 0 && "zero slm bytes being requested");
967-
updateGenXMDNodes(F, genx::KernelMDOp::SLMSize, NewVal);
973+
UpdateUint64MetaDataToMaxValue SetMaxSLMSize{
974+
*F->getParent(), genx::KernelMDOp::SLMSize, NewVal};
975+
esimd::traverseCallgraphUp(F, SetMaxSLMSize);
968976
}
969977

970978
// This function sets/updates VCNamedBarrierCount attribute to the kernels
@@ -979,7 +987,9 @@ static void translateNbarrierInit(CallInst &CI) {
979987

980988
auto NewVal = cast<llvm::ConstantInt>(ArgV)->getZExtValue();
981989
assert(NewVal != 0 && "zero named barrier count being requested");
982-
updateGenXMDNodes(F, genx::KernelMDOp::NBarrierCnt, NewVal);
990+
UpdateUint64MetaDataToMaxValue SetMaxNBarrierCnt{
991+
*F->getParent(), genx::KernelMDOp::NBarrierCnt, NewVal};
992+
esimd::traverseCallgraphUp(F, SetMaxNBarrierCnt);
983993
}
984994

985995
static void translatePackMask(CallInst &CI) {
@@ -1514,8 +1524,7 @@ void generateKernelMetadata(Module &M) {
15141524

15151525
for (auto &F : M.functions()) {
15161526
// Skip non-SIMD kernels.
1517-
if (F.getCallingConv() != CallingConv::SPIR_KERNEL ||
1518-
F.getMetadata("sycl_explicit_simd") == nullptr)
1527+
if (!esimd::isESIMDKernel(F))
15191528
continue;
15201529

15211530
// Metadata node containing N i32s, where N is the number of kernel
@@ -1708,15 +1717,14 @@ size_t SYCLLowerESIMDPass::runOnFunction(Function &F,
17081717

17091718
// process ESIMD builtins that go through special handling instead of
17101719
// the translation procedure
1711-
// TODO FIXME slm_init should be made top-level __esimd_slm_init
1720+
17121721
if (Name.startswith("__esimd_slm_init") &&
17131722
isa<ConstantInt>(CI->getArgOperand(0))) {
17141723
// tag the kernel with meta-data SLMSize, and remove this builtin
17151724
translateSLMInit(*CI);
17161725
ToErase.push_back(CI);
17171726
continue;
17181727
}
1719-
17201728
if (Name.startswith("__esimd_nbarrier_init")) {
17211729
translateNbarrierInit(*CI);
17221730
ToErase.push_back(CI);
@@ -1748,12 +1756,13 @@ size_t SYCLLowerESIMDPass::runOnFunction(Function &F,
17481756
continue;
17491757
}
17501758
}
1751-
17521759
if (Name.startswith("__esimd_get_surface_index")) {
17531760
translateGetSurfaceIndex(*CI);
17541761
ToErase.push_back(CI);
17551762
continue;
17561763
}
1764+
assert(!Name.startswith("__esimd_set_kernel_properties") &&
1765+
"__esimd_set_kernel_properties must have been lowered");
17571766

17581767
if (Name.empty() || !Name.startswith(ESIMD_INTRIN_PREF1))
17591768
continue;

0 commit comments

Comments
 (0)