forked from CRobeck/instrument-amdgpu-kernels
-
Notifications
You must be signed in to change notification settings - Fork 0
/
InjectAMDGCNSharedMemTtrace.cpp
136 lines (128 loc) · 6.41 KB
/
InjectAMDGCNSharedMemTtrace.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
#include "InjectAMDGCNSharedMemTtrace.h"
#include "llvm/Bitcode/BitcodeWriter.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Passes/PassBuilder.h"
#include "llvm/Passes/PassPlugin.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include <iostream>
using namespace llvm;
static cl::opt<std::string> InstrumentAMDGPUFunction("instrument-amdgpu-function", cl::init(""),
cl::desc("AMDGPU function to instrument"));
bool InjectAMDGCNSharedMemTtrace::runOnModule(Module &M) {
bool ModifiedCodeGen = false;
auto &CTX = M.getContext();
bool DebugInfoWarningPrinted = false;
IRBuilder<> ModuleBuilder(CTX);
// This is the actual variable value that gets inserted in the Inline ASM
Value *TtraceCounter = ModuleBuilder.getInt32(0);
// This is the internal counter in the compiler pass. These two will not match
// currently b/c unrolled loops will copy/inline the InlineASM version not the
// internal compiler counter.
unsigned CounterInt = 0;
for (auto &F : M) {
if (F.getCallingConv() == CallingConv::AMDGPU_KERNEL) {
if(F.getName() == InstrumentAMDGPUFunction || InstrumentAMDGPUFunction.empty()){
for (Function::iterator BB = F.begin(); BB != F.end(); BB++) {
for (BasicBlock::iterator I = BB->begin(); I != BB->end(); I++) {
// Shared memory reads
if (auto LI = dyn_cast<LoadInst>(I)) {
Value *Op = LI->getPointerOperand()->stripPointerCasts();
unsigned AddrSpace =
cast<PointerType>(Op->getType())->getAddressSpace();
if (AddrSpace == 3) {
if (DILocation *DL = dyn_cast<Instruction>(I)->getDebugLoc()) {
std::string SourceInfo =
(F.getName() + "\t" + DL->getFilename() + ":" +
Twine(DL->getLine()) + ":" + Twine(DL->getColumn()))
.str();
errs() << CounterInt << "\t" << SourceInfo << "\n";
} else {
if (!DebugInfoWarningPrinted) {
errs() << "warning: no debug info found, did you forget to "
"add -ggdb?\n";
DebugInfoWarningPrinted = true;
}
}
IRBuilder<> Builder(dyn_cast<Instruction>(I));
Builder.SetInsertPoint(dyn_cast<Instruction>(std::next(I,-1)));
FunctionType *FTy =
FunctionType::get(Type::getInt32Ty(CTX), true);
std::string AsmString = "s_mov_b32 $0 m0\n"
"s_mov_b32 m0 $1\n"
"s_nop 0\n";
InlineAsm *InlineAsmFunc =
InlineAsm::get(FTy, AsmString, "=s,s", true);
Builder.CreateCall(InlineAsmFunc, {TtraceCounter});
Builder.SetInsertPoint(dyn_cast<Instruction>(std::next(I,1)));
Builder.CreateCall(InlineAsm::get(FTy,"s_ttracedata\n""s_mov_b32 m0 $0\n""s_add_i32 $1 $1 1\n"
"s_nop 15\n""s_nop 15\n""s_nop 15\n""s_nop 15\n""s_nop 15\n"
"s_nop 15\n""s_nop 15\n""s_nop 15\n""s_nop 15\n""s_nop 15\n"
"s_nop 15\n""s_nop 15\n""s_nop 15\n""s_nop 15\n""s_nop 15\n"
"s_nop 15\n","=s,s", true),{TtraceCounter});
CounterInt++;
}
}
// Shared memory writes
if (auto SI = dyn_cast<StoreInst>(I)) {
Value *Op = SI->getPointerOperand()->stripPointerCasts();
unsigned AddrSpace =
cast<PointerType>(Op->getType())->getAddressSpace();
if (AddrSpace == 3) {
if (DILocation *DL = dyn_cast<Instruction>(I)->getDebugLoc()) {
std::string SourceInfo =
(F.getName() + "\t" + DL->getFilename() + ":" +
Twine(DL->getLine()) + ":" + Twine(DL->getColumn()))
.str();
errs() << CounterInt << "\t" << SourceInfo << "\n";
} else {
if (!DebugInfoWarningPrinted) {
errs() << "warning: no debug info found, did you forget to "
"add -ggdb?\n";
DebugInfoWarningPrinted = true;
}
}
IRBuilder<> Builder(dyn_cast<Instruction>(I));
Builder.SetInsertPoint(dyn_cast<Instruction>(std::next(I,-1)));
FunctionType *FTy =
FunctionType::get(Type::getInt32Ty(CTX), true);
std::string AsmString = "s_mov_b32 $0 m0\n"
"s_mov_b32 m0 $1\n"
"s_nop 0\n";
InlineAsm *InlineAsmFunc =
InlineAsm::get(FTy, AsmString, "=s,s", true);
Builder.CreateCall(InlineAsmFunc, {TtraceCounter});
Builder.SetInsertPoint(dyn_cast<Instruction>(std::next(I,1)));
Builder.CreateCall(InlineAsm::get(FTy,"s_ttracedata\n""s_mov_b32 m0 $0\n""s_add_i32 $1 $1 1\n"
"s_nop 15\n""s_nop 15\n""s_nop 15\n""s_nop 15\n""s_nop 15\n"
"s_nop 15\n""s_nop 15\n""s_nop 15\n""s_nop 15\n""s_nop 15\n"
"s_nop 15\n""s_nop 15\n""s_nop 15\n""s_nop 15\n""s_nop 15\n"
"s_nop 15\n","=s,s", true),{TtraceCounter});
CounterInt++;
}
}
}
} // End of instructions in AMDGCN kernel loop
errs() << "Injected LDS Load/Store s_ttrace instructions at "
<< CounterInt << " source locations\n";
ModifiedCodeGen = true;
}
} // End of if AMDGCN Kernel
} // End of functions in module loop
return ModifiedCodeGen;
}
PassPluginLibraryInfo getPassPluginInfo() {
const auto callback = [](PassBuilder &PB) {
PB.registerOptimizerLastEPCallback([&](ModulePassManager &MPM, auto) {
MPM.addPass(InjectAMDGCNSharedMemTtrace());
return true;
});
};
return {LLVM_PLUGIN_API_VERSION, "inject-amdgcn-lds-ttrace",
LLVM_VERSION_STRING, callback};
};
extern "C" LLVM_ATTRIBUTE_WEAK PassPluginLibraryInfo llvmGetPassPluginInfo() {
return getPassPluginInfo();
}