Skip to content

Commit e2936cf

Browse files
Graban, Danielsys_zuul
Graban, Daniel
authored and
sys_zuul
committed
Merge between components.
Change-Id: If967c8f14d5d9c63fa9f118d7f6c6deecc598d36
1 parent 4837bfc commit e2936cf

9 files changed

+187
-22
lines changed

IGC/Compiler/CISACodeGen/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ set(IGC_BUILD__SRC__CISACodeGen_Common
7878
"${CMAKE_CURRENT_SOURCE_DIR}/VectorProcess.cpp"
7979
"${CMAKE_CURRENT_SOURCE_DIR}/VertexShaderCodeGen.cpp"
8080
"${CMAKE_CURRENT_SOURCE_DIR}/VertexShaderLowering.cpp"
81+
"${CMAKE_CURRENT_SOURCE_DIR}/ComputeShaderLowering.cpp"
8182
"${CMAKE_CURRENT_SOURCE_DIR}/WIAnalysis.cpp"
8283
"${CMAKE_CURRENT_SOURCE_DIR}/SLMConstProp.cpp"
8384
"${CMAKE_CURRENT_SOURCE_DIR}/POSH_RemoveNonPositionOutput.cpp"
@@ -166,6 +167,7 @@ set(IGC_BUILD__HDR__CISACodeGen_Common
166167
"${CMAKE_CURRENT_SOURCE_DIR}/VectorProcess.hpp"
167168
"${CMAKE_CURRENT_SOURCE_DIR}/VertexShaderCodeGen.hpp"
168169
"${CMAKE_CURRENT_SOURCE_DIR}/VertexShaderLowering.hpp"
170+
"${CMAKE_CURRENT_SOURCE_DIR}/ComputeShaderLowering.hpp"
169171
"${CMAKE_CURRENT_SOURCE_DIR}/WIAnalysis.hpp"
170172
"${CMAKE_CURRENT_SOURCE_DIR}/SLMConstProp.hpp"
171173
"${CMAKE_CURRENT_SOURCE_DIR}/POSH_RemoveNonPositionOutput.h"

IGC/Compiler/CISACodeGen/ComputeShaderCodeGen.cpp

Lines changed: 24 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -125,27 +125,31 @@ void CComputeShader::ParseShaderSpecificOpcode(llvm::Instruction* inst)
125125
}
126126
}
127127

128-
void CComputeShader::CreateThreadPayloadData(void* & pThreadPayload, uint& threadPayloadSize)
128+
void CComputeShader::CreateThreadPayloadData(void* & pThreadPayload, uint& curbeTotalDataLength, uint& curbeReadLength)
129129
{
130+
typedef uint16_t ThreadPayloadEntry;
131+
130132
// Find the max thread group dimension
131133
const OctEltUnit SIZE_OF_DQWORD = OctEltUnit(2);
134+
const OctEltUnit SIZE_OF_OWORD = OctEltUnit(1);
132135
uint numberOfId = GetNumberOfId();
133136
uint dimX = numLanes(m_dispatchSize);
134-
uint dimY = (iSTD::Align(m_threadGroupSize, dimX)/dimX) * numberOfId;
135-
136-
typedef uint ThreadPayloadEntry;
137-
138-
uint alignedVal = EltUnit(SIZE_OF_DQWORD).Count() * sizeof(DWORD); // Oct Element is 8 DWORDS
137+
// dimX must align to alignment_X bytes (one GRF)
138+
uint alignment_X = EltUnit(SIZE_OF_OWORD).Count() * sizeof(DWORD);
139+
uint dimX_aligned = iSTD::Align(dimX * sizeof(ThreadPayloadEntry), alignment_X) / sizeof(ThreadPayloadEntry);
140+
uint dimY = (iSTD::Align(m_threadGroupSize, dimX) / dimX) * numberOfId;
141+
curbeReadLength = dimX_aligned * numberOfId * sizeof(ThreadPayloadEntry) / alignment_X;
139142

143+
uint alignedVal = EltUnit(SIZE_OF_DQWORD).Count() * sizeof(ThreadPayloadEntry); // Oct Element is 8 Entries
140144
// m_NOSBufferSize is the additional space for cross-thread constant data (constants set by driver).
141-
threadPayloadSize = iSTD::Align( dimX * dimY * sizeof( ThreadPayloadEntry ) + m_NOSBufferSize, alignedVal );
145+
curbeTotalDataLength = iSTD::Align(dimX_aligned * dimY * sizeof(ThreadPayloadEntry) + m_NOSBufferSize, alignedVal);
142146

143147
assert(pThreadPayload == nullptr && "Thread payload should be a null variable");
144148

145-
unsigned threadPayloadEntries = threadPayloadSize / sizeof(ThreadPayloadEntry);
149+
unsigned threadPayloadEntries = curbeTotalDataLength / sizeof(ThreadPayloadEntry);
146150

147151
ThreadPayloadEntry* pThreadPayloadMem =
148-
(ThreadPayloadEntry*)IGC::aligned_malloc(threadPayloadEntries* sizeof(ThreadPayloadEntry), 16);
152+
(ThreadPayloadEntry*)IGC::aligned_malloc(threadPayloadEntries * sizeof(ThreadPayloadEntry), 16);
149153
std::fill(pThreadPayloadMem, pThreadPayloadMem + threadPayloadEntries, 0);
150154

151155
pThreadPayload = pThreadPayloadMem;
@@ -169,17 +173,17 @@ void CComputeShader::CreateThreadPayloadData(void* & pThreadPayload, uint& threa
169173
uint lane = 0;
170174
if(m_pThread_ID_in_Group_X)
171175
{
172-
pThreadPayloadMem[(y + lane) * dimX + x] = currThreadX;
176+
pThreadPayloadMem[(y + lane) * dimX_aligned + x] = currThreadX;
173177
lane++;
174178
}
175179
if(m_pThread_ID_in_Group_Y)
176180
{
177-
pThreadPayloadMem[(y + lane) * dimX + x] = currThreadY;
181+
pThreadPayloadMem[(y + lane) * dimX_aligned + x] = currThreadY;
178182
lane++;
179183
}
180184
if(m_pThread_ID_in_Group_Z)
181185
{
182-
pThreadPayloadMem[(y + lane) * dimX + x] = currThreadZ;
186+
pThreadPayloadMem[(y + lane) * dimX_aligned + x] = currThreadZ;
183187
lane++;
184188
}
185189

@@ -259,19 +263,19 @@ CVariable* CComputeShader::CreateThreadIDinGroup(uint channelNum)
259263
case 0:
260264
if(m_pThread_ID_in_Group_X == nullptr)
261265
{
262-
m_pThread_ID_in_Group_X = GetNewVariable(numLanes(m_SIMDSize), ISA_TYPE_D, EALIGN_GRF, false, m_numberInstance);
266+
m_pThread_ID_in_Group_X = GetNewVariable(numLanes(m_SIMDSize), ISA_TYPE_W, EALIGN_GRF, false, m_numberInstance);
263267
}
264268
return m_pThread_ID_in_Group_X;
265269
case 1:
266270
if(m_pThread_ID_in_Group_Y == nullptr)
267271
{
268-
m_pThread_ID_in_Group_Y = GetNewVariable(numLanes(m_SIMDSize), ISA_TYPE_D, EALIGN_GRF, false, m_numberInstance);
272+
m_pThread_ID_in_Group_Y = GetNewVariable(numLanes(m_SIMDSize), ISA_TYPE_W, EALIGN_GRF, false, m_numberInstance);
269273
}
270274
return m_pThread_ID_in_Group_Y;
271275
case 2:
272276
if(m_pThread_ID_in_Group_Z == nullptr)
273277
{
274-
m_pThread_ID_in_Group_Z = GetNewVariable(numLanes(m_SIMDSize), ISA_TYPE_D, EALIGN_GRF, false, m_numberInstance);
278+
m_pThread_ID_in_Group_Z = GetNewVariable(numLanes(m_SIMDSize), ISA_TYPE_W, EALIGN_GRF, false, m_numberInstance);
275279
}
276280
return m_pThread_ID_in_Group_Z;
277281
default:
@@ -335,6 +339,7 @@ void CComputeShader::AllocatePayload()
335339
{
336340
AllocateInput(m_pThread_ID_in_Group_X, offset, i);
337341
offset += m_pThread_ID_in_Group_X->GetSize();
342+
offset = iSTD::Round(offset, alignmentSize[m_pThread_ID_in_Group_X->GetAlign()]);
338343
}
339344
}
340345

@@ -344,6 +349,7 @@ void CComputeShader::AllocatePayload()
344349
{
345350
AllocateInput(m_pThread_ID_in_Group_Y, offset, i);
346351
offset += m_pThread_ID_in_Group_Y->GetSize();
352+
offset = iSTD::Round(offset, alignmentSize[m_pThread_ID_in_Group_Y->GetAlign()]);
347353
}
348354
}
349355

@@ -353,6 +359,7 @@ void CComputeShader::AllocatePayload()
353359
{
354360
AllocateInput(m_pThread_ID_in_Group_Z, offset, i);
355361
offset += m_pThread_ID_in_Group_Z->GetSize();
362+
offset = iSTD::Round(offset, alignmentSize[m_pThread_ID_in_Group_Z->GetAlign()]);
356363
}
357364
}
358365

@@ -466,8 +473,6 @@ void CComputeShader::FillProgram(SComputeShaderKernelProgram* pKernelProgram)
466473
pKernelProgram->FloatingPointMode = USC::GFX3DSTATE_FLOATING_POINT_IEEE_754;
467474
pKernelProgram->SingleProgramFlow = USC::GFX3DSTATE_PROGRAM_FLOW_MULTIPLE;
468475
pKernelProgram->CurbeReadOffset = 0;
469-
pKernelProgram->CurbeReadLength = GetNumberOfId() * (numLanes(m_dispatchSize) / numLanes(SIMDMode::SIMD8));
470-
471476
pKernelProgram->PhysicalThreadsInGroup = static_cast<int>(
472477
std::ceil((static_cast<float>(m_threadGroupSize) /
473478
static_cast<float>((numLanes(m_dispatchSize))))));
@@ -487,7 +492,8 @@ void CComputeShader::FillProgram(SComputeShaderKernelProgram* pKernelProgram)
487492
pKernelProgram->ThreadPayloadData = nullptr;
488493
CreateThreadPayloadData(
489494
pKernelProgram->ThreadPayloadData,
490-
pKernelProgram->CurbeTotalDataLength);
495+
pKernelProgram->CurbeTotalDataLength,
496+
pKernelProgram->CurbeReadLength);
491497

492498
pKernelProgram->ThreadGroupSize = m_threadGroupSize;
493499

IGC/Compiler/CISACodeGen/ComputeShaderCodeGen.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ class CComputeShader : public CShader
4343
void FillProgram(SComputeShaderKernelProgram* pKernelProgram);
4444
void PreCompile() override;
4545
void ExtractGlobalVariables() override;
46-
void CreateThreadPayloadData(void* & pThreadPayload, uint& threadPayloadSize);
46+
void CreateThreadPayloadData(void* & pThreadPayload, uint& curbeTotalDataLength, uint& curbeReadLength);
4747
uint GetNumberOfId();
4848
void ParseShaderSpecificOpcode(llvm::Instruction* inst) override;
4949

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
/*===================== begin_copyright_notice ==================================
2+
3+
Copyright (c) 2017 Intel Corporation
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a
6+
copy of this software and associated documentation files (the
7+
"Software"), to deal in the Software without restriction, including
8+
without limitation the rights to use, copy, modify, merge, publish,
9+
distribute, sublicense, and/or sell copies of the Software, and to
10+
permit persons to whom the Software is furnished to do so, subject to
11+
the following conditions:
12+
13+
The above copyright notice and this permission notice shall be included
14+
in all copies or substantial portions of the Software.
15+
16+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17+
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23+
24+
25+
======================= end_copyright_notice ==================================*/
26+
#include "ComputeShaderLowering.hpp"
27+
#include "IGCPassSupport.h"
28+
#include "GenISAIntrinsics/GenIntrinsicInst.h"
29+
#include "AdaptorCommon/ImplicitArgs.hpp"
30+
#include "common/LLVMWarningsPush.hpp"
31+
#include "llvm/IR/Function.h"
32+
#include "common/LLVMWarningsPop.hpp"
33+
34+
using namespace llvm;
35+
using namespace IGC;
36+
using namespace IGC::IGCMD;
37+
38+
class ComputeShaderLowering : public FunctionPass
39+
{
40+
public:
41+
ComputeShaderLowering() : FunctionPass(ID) {}
42+
virtual bool runOnFunction(Function &F) override;
43+
virtual llvm::StringRef getPassName() const override
44+
{
45+
return "ComputeShaderLowering";
46+
}
47+
virtual void getAnalysisUsage(llvm::AnalysisUsage &AU) const override
48+
{
49+
AU.setPreservesCFG();
50+
AU.addRequired<CodeGenContextWrapper>();
51+
}
52+
static char ID;
53+
protected:
54+
Function* m_function = nullptr;
55+
void shortenThreadID(GenIntrinsicInst& inst, Function &F);
56+
};
57+
58+
char ComputeShaderLowering::ID = 0;
59+
60+
bool ComputeShaderLowering::runOnFunction(Function &F)
61+
{
62+
for(auto BI = F.begin(), BE = F.end(); BI != BE; BI++)
63+
{
64+
for(auto II = BI->begin(), IE = BI->end(); II != IE; II++)
65+
{
66+
if(GenIntrinsicInst* inst = dyn_cast<GenIntrinsicInst>(II))
67+
{
68+
if(inst->getIntrinsicID() == GenISAIntrinsic::GenISA_DCL_SystemValue)
69+
{
70+
shortenThreadID(*inst, F);
71+
}
72+
}
73+
}
74+
}
75+
76+
return true;
77+
}
78+
79+
void ComputeShaderLowering::shortenThreadID(GenIntrinsicInst& inst, Function &F)
80+
{
81+
SGVUsage usage =
82+
static_cast<SGVUsage>(llvm::cast<llvm::ConstantInt>(inst.getOperand(0))->getZExtValue());
83+
if (THREAD_ID_IN_GROUP_X != usage &&
84+
THREAD_ID_IN_GROUP_Y != usage &&
85+
THREAD_ID_IN_GROUP_Z != usage
86+
)
87+
{
88+
return;
89+
}
90+
91+
llvm::Module* module = F.getParent();
92+
IRBuilder<> builder(&inst);
93+
llvm::Value* vSGV = builder.getInt32(usage);
94+
llvm::Function* funcSGV = llvm::GenISAIntrinsic::getDeclaration(module, GenISAIntrinsic::GenISA_DCL_SystemValue, builder.getInt16Ty());
95+
llvm::Value* vSGVCreate = builder.CreateCall(funcSGV, vSGV);
96+
vSGVCreate = builder.CreateZExtOrTrunc(vSGVCreate, builder.getInt32Ty());
97+
vSGVCreate = builder.CreateBitCast(vSGVCreate, inst.getType());
98+
inst.replaceAllUsesWith(vSGVCreate);
99+
100+
return;
101+
}
102+
103+
namespace IGC {
104+
#define PASS_FLAG "igc-compute-shader-lowering"
105+
#define PASS_DESCRIPTION "This is the compute shader lowering pass "
106+
#define PASS_CFG_ONLY false
107+
#define PASS_ANALYSIS true
108+
IGC_INITIALIZE_PASS_BEGIN(ComputeShaderLowering, PASS_FLAG, PASS_DESCRIPTION, PASS_CFG_ONLY, PASS_ANALYSIS)
109+
IGC_INITIALIZE_PASS_DEPENDENCY(CodeGenContextWrapper)
110+
IGC_INITIALIZE_PASS_END(ComputeShaderLowering, PASS_FLAG, PASS_DESCRIPTION, PASS_CFG_ONLY, PASS_ANALYSIS)
111+
112+
FunctionPass* CreateComputeShaderLowering()
113+
{
114+
return new ComputeShaderLowering();
115+
}
116+
}
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
/*===================== begin_copyright_notice ==================================
2+
3+
Copyright (c) 2017 Intel Corporation
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a
6+
copy of this software and associated documentation files (the
7+
"Software"), to deal in the Software without restriction, including
8+
without limitation the rights to use, copy, modify, merge, publish,
9+
distribute, sublicense, and/or sell copies of the Software, and to
10+
permit persons to whom the Software is furnished to do so, subject to
11+
the following conditions:
12+
13+
The above copyright notice and this permission notice shall be included
14+
in all copies or substantial portions of the Software.
15+
16+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17+
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23+
24+
25+
======================= end_copyright_notice ==================================*/
26+
#pragma once
27+
#include "common/LLVMWarningsPush.hpp"
28+
#include <llvm/Pass.h>
29+
#include "common/LLVMWarningsPop.hpp"
30+
31+
namespace IGC
32+
{
33+
llvm::FunctionPass* CreateComputeShaderLowering();
34+
}

IGC/Compiler/CISACodeGen/EmitVISAPass.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6779,18 +6779,21 @@ void EmitPass::emitCSSGV(GenIntrinsicInst* inst)
67796779
}
67806780
case THREAD_ID_IN_GROUP_X:
67816781
{
6782+
assert(inst->getType() == Type::getInt16Ty(inst->getContext()) && "only 16bit ThreadID is supported now.");
67826783
pThreadIdInGroup = csProgram->CreateThreadIDinGroup(0);
67836784
m_currShader->CopyVariable(m_destination, pThreadIdInGroup);
67846785
break;
67856786
}
67866787
case THREAD_ID_IN_GROUP_Y:
67876788
{
6789+
assert(inst->getType() == Type::getInt16Ty(inst->getContext()) && "only 16bit ThreadID is supported now.");
67886790
pThreadIdInGroup = csProgram->CreateThreadIDinGroup(1);
67896791
m_currShader->CopyVariable(m_destination, pThreadIdInGroup);
67906792
break;
67916793
}
67926794
case THREAD_ID_IN_GROUP_Z:
67936795
{
6796+
assert(inst->getType() == Type::getInt16Ty(inst->getContext()) && "only 16bit ThreadID is supported now.");
67946797
pThreadIdInGroup = csProgram->CreateThreadIDinGroup(2);
67956798
m_currShader->CopyVariable(m_destination, pThreadIdInGroup);
67966799
break;

IGC/Compiler/CISACodeGen/ShaderCodeGen.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
7272
#include "Compiler/CISACodeGen/LowerGEPForPrivMem.hpp"
7373
#include "Compiler/CISACodeGen/POSH_RemoveNonPositionOutput.h"
7474
#include "Compiler/CISACodeGen/RegisterEstimator.hpp"
75+
#include "Compiler/CISACodeGen/ComputeShaderLowering.hpp"
7576

7677
#include "Compiler/CISACodeGen/SLMConstProp.hpp"
7778
#include "Compiler/Optimizer/OpenCLPasses/GenericAddressResolution/GenericAddressDynamicResolution.hpp"
@@ -650,6 +651,9 @@ inline void AddLegalizationPasses(CodeGenContext &ctx, IGCPassManager& mpm)
650651
case ShaderType::DOMAIN_SHADER:
651652
mpm.add(createDomainShaderLoweringPass());
652653
break;
654+
case ShaderType::COMPUTE_SHADER:
655+
mpm.add(CreateComputeShaderLowering());
656+
break;
653657
default:
654658
break;
655659
}

IGC/GenISAIntrinsics/Intrinsic_definitions.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@
143143
# (dwordAttributeOrSetupIndex, e_interpolation_PSOnly)->anyvector
144144
"GenISA_DCL_ShaderInputVec": ["anyvector",["int","int"],"NoMem"],
145145
"GenISA_DCL_GSinputVec": ["float4",["int","int"],"NoMem"],
146-
"GenISA_DCL_SystemValue": ["anyfloat",["int"],"NoMem"],
146+
"GenISA_DCL_SystemValue": ["any:float",["int"],"NoMem"],
147147
"GenISA_SampleOffsetX": ["float",["int"],"NoMem"],
148148
"GenISA_SampleOffsetY": ["float",["int"],"NoMem"],
149149
"GenISA_PixelPositionX": ["short",[],"NoMem"],

visa/Common_ISA.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ class G4_Declare;
9494
#define COMMON_ISA_GRF_REG_SIZE (getGRFSize()) /// # of bytes in a CISA GRF register
9595

9696
#define COMMON_ISA_MAX_ADDRREG_WIDTH 8
97-
#define COMMON_ISA_MAX_FILENAME_LENGTH 255
97+
#define COMMON_ISA_MAX_FILENAME_LENGTH 1023
9898

9999
#define COMMON_ISA_MAX_KERNEL_NAME_LEN 255
100100
#define COMMON_ISA_MAX_ADDRESS_OFFSET 4096
@@ -693,7 +693,7 @@ typedef struct _CISA_INST
693693
dst = *((type *) &buf[byte_pos]); \
694694
byte_pos += sizeof(type);
695695

696-
#define STRING_LEN 512
696+
#define STRING_LEN 1024
697697

698698
struct Common_ISA_Attribute{
699699
char* name;

0 commit comments

Comments
 (0)