Skip to content

Commit 032e4ad

Browse files
dlei6gigcbot
authored andcommitted
[Autobackout][FuncReg]Revert of change: f1bc713
Create FunctionCloningThreshold flag to limit the number of times functions can be cloned. Functions are cloned for each function group they belong to. This can cause a function to be compiled N times for N function groups that all call this function. When N is large, this will greatly increase the compile time and memory usage. This flag limits the number of times a function can be cloned, such that if it's exceeded, we convert it to an indirectly called function. It will only be compiled once, and runtime relocation is used to patch the function address to each function group which it belongs to. The default value is 0, which means there is no limit.
1 parent ecd7f1b commit 032e4ad

File tree

4 files changed

+23
-51
lines changed

4 files changed

+23
-51
lines changed

IGC/AdaptorCommon/ProcessFuncAttributes.cpp

+22-5
Original file line numberDiff line numberDiff line change
@@ -885,10 +885,27 @@ bool InsertDummyKernelForSymbolTable::runOnModule(Module& M)
885885
ModuleMetaData* modMD = mduw.getModuleMetaData();
886886
CodeGenContext* pCtx = getAnalysis<CodeGenContextWrapper>().getCodeGenContext();
887887

888-
// Conservatively create a dummy kernel as long as there are function calls
889-
// Later (in EmitVISAPass.cpp) we can skip compiling this kernel if no
890-
// symbol table is created for it.
891-
if (pCtx->enableFunctionCall())
888+
bool needDummyKernel = false;
889+
890+
// Creates an empty dummy kernel.
891+
// This kernel will only be used for creating the symbol table.
892+
// All indirectly called functions will also be attached to this kernel's binary.
893+
if (IGC_IS_FLAG_ENABLED(EnableFunctionPointer) &&
894+
pCtx->type == ShaderType::OPENCL_SHADER)
895+
{
896+
if (pCtx->m_enableFunctionPointer)
897+
{
898+
// Symbols are needed for external functions and function pointers
899+
needDummyKernel = true;
900+
}
901+
else if (!modMD->inlineProgramScopeOffsets.empty())
902+
{
903+
// Create one also if global variables are present and require symbols
904+
needDummyKernel = true;
905+
}
906+
}
907+
908+
if (needDummyKernel)
892909
{
893910
// Create empty kernel function
894911
IGC_ASSERT(IGC::getIntelSymbolTableVoidProgram(&M) == nullptr);
@@ -908,7 +925,7 @@ bool InsertDummyKernelForSymbolTable::runOnModule(Module& M)
908925

909926
// Promote SIMD size information from kernels, which has indirectly called
910927
// functions. All such functions will be connected to the default kernel in
911-
// GenCodeGenModule.cpp
928+
// GenCodeGenModule.cpp (addIndirectFuncsToKernelGroup)
912929
for (auto I = M.begin(), E = M.end(); I != E; ++I)
913930
{
914931
Function* F = &(*I);

IGC/Compiler/CISACodeGen/GenCodeGenModule.cpp

+1-38
Original file line numberDiff line numberDiff line change
@@ -149,25 +149,6 @@ void GenXCodeGenModule::processFunction(Function& F)
149149
}
150150

151151
IGC_ASSERT(CallerFGs.size() >= 1);
152-
153-
// Get the cloning threshold. If the number of function groups a function belongs to
154-
// exceeds the threshold, instead of cloning the function N times, make it an indirect call
155-
// and use relocation instead. The function will only be compiled once and runtime must relocate
156-
// its address for each caller. This greatly saves on compile time when there are many function
157-
// groups that all call the same function.
158-
auto cloneTheshold = IGC_GET_FLAG_VALUE(FunctionCloningThreshold);
159-
if (cloneTheshold > 0 && CallerFGs.size() > cloneTheshold)
160-
{
161-
auto pCtx = getAnalysis<CodeGenContextWrapper>().getCodeGenContext();
162-
auto IFG = FGA->getIndirectCallGroup();
163-
IGC_ASSERT(IFG);
164-
F.addFnAttr("referenced-indirectly");
165-
F.addFnAttr("visaStackCall");
166-
pCtx->m_enableFunctionPointer = true;
167-
FGA->addToFunctionGroup(&F, IFG, &F);
168-
return;
169-
}
170-
171152
bool FirstPair = true;
172153
for (auto FGPair : CallerFGs)
173154
{
@@ -228,25 +209,6 @@ void GenXCodeGenModule::processSCC(std::vector<llvm::CallGraphNode*>* SCCNodes)
228209
}
229210
}
230211
IGC_ASSERT(CallerFGs.size() >= 1);
231-
232-
// Use the same cloning threshold for single function SCCs, but making every stack function
233-
// in the SCC indirect calls to prevent cloning the entire SCC N times.
234-
auto cloneTheshold = IGC_GET_FLAG_VALUE(FunctionCloningThreshold);
235-
if (cloneTheshold > 0 && CallerFGs.size() > cloneTheshold)
236-
{
237-
auto pCtx = getAnalysis<CodeGenContextWrapper>().getCodeGenContext();
238-
for (CallGraphNode* Node : (*SCCNodes))
239-
{
240-
Function* F = Node->getFunction();
241-
auto IFG = FGA->getIndirectCallGroup();
242-
IGC_ASSERT(IFG && F->hasFnAttribute("visaStackCall"));
243-
F->addFnAttr("referenced-indirectly");
244-
pCtx->m_enableFunctionPointer = true;
245-
FGA->addToFunctionGroup(F, IFG, F);
246-
}
247-
return;
248-
}
249-
250212
bool FirstPair = true;
251213
for (auto FG : CallerFGs)
252214
{
@@ -640,6 +602,7 @@ void GenXFunctionGroupAnalysis::addIndirectFuncsToKernelGroup(llvm::Module* pMod
640602
Function* F = &(*I);
641603
if (F->isDeclaration() || isEntryFunc(pMdUtils, F)) continue;
642604

605+
// Add non-used function to default group
643606
if (F->hasFnAttribute("referenced-indirectly") || F->getNumUses() == 0)
644607
{
645608
IGC_ASSERT(getGroup(F) == nullptr);

IGC/Compiler/CISACodeGen/GenCodeGenModule.h

-4
Original file line numberDiff line numberDiff line change
@@ -241,10 +241,6 @@ namespace IGC {
241241
return FG != nullptr && FG == IndirectCallGroup;
242242
}
243243

244-
FunctionGroup* getIndirectCallGroup() {
245-
return IndirectCallGroup;
246-
}
247-
248244
/// \brief Check whether this is a group header.
249245
bool isGroupHead(llvm::Function* F) {
250246
return getGroupForHead(F) != nullptr;

IGC/common/igc_flags.def

-4
Original file line numberDiff line numberDiff line change
@@ -360,10 +360,6 @@ DECLARE_IGC_REGKEY(bool, EnableLTODebug, false, "Enable debug inf
360360
DECLARE_IGC_REGKEY(DWORD, FunctionControl, 0, "Control function inlining/subroutine/stackcall. See value defs in igc_flags.hpp.", true)
361361
DECLARE_IGC_REGKEY(bool, EnableStackCallFuncCall, false, "If enabled, the default function call mode will be set to stack call. Otherwise, subroutine call is used.", false)
362362
DECLARE_IGC_REGKEY(bool, ForceInlineStackCallWithImplArg, false, "If enabled, stack calls that uses implicit args will be force inlined.", true)
363-
DECLARE_IGC_REGKEY(DWORD, FunctionCloningThreshold, 0,
364-
"Limits how many times functions can be cloned when called from multiple function groups." \
365-
"If exceeding the cloning threshold, compile the function only once and use address relocation instead." \
366-
"A value of '0' means no limit on times it can be cloned", true)
367363
DECLARE_IGC_REGKEY(DWORD, OCLInlineThreshold, 512, "Setting OCL inline thershold", true)
368364
DECLARE_IGC_REGKEY(bool, DisableAddingAlwaysAttribute, false, "Disable adding always attribute", true)
369365
DECLARE_IGC_REGKEY(bool, EnableForceGroupSize, false, "Enable forcing thread Group Size ForceGroupSizeX and ForceGroupSizeY", false)

0 commit comments

Comments
 (0)