Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Pipeliner] Enable automatic loop fusion #5726

Merged
merged 39 commits into from
Feb 7, 2025
Merged
Changes from 1 commit
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
7998b9d
tmp
Mogball Jan 23, 2025
89db13c
almost
Mogball Jan 24, 2025
0365583
axisinfo for poison op
Mogball Jan 24, 2025
90930f0
persistent with loop fusion
Mogball Jan 24, 2025
d6fb02d
I cry
Mogball Jan 25, 2025
8d3f108
omg it works
Mogball Jan 25, 2025
bf1fce8
remove mlir files
Mogball Jan 25, 2025
ef11332
check persistent matmul perf
Mogball Jan 28, 2025
e422788
remove unused include
Mogball Jan 28, 2025
daca556
remove invalid bench
Mogball Jan 28, 2025
5864048
Merge remote-tracking branch 'origin/main' into mogball/automatic_fuse
Mogball Jan 28, 2025
262383d
fix conflict
Mogball Jan 28, 2025
b423b45
fix lit tests
Mogball Jan 28, 2025
d4e9c70
Merge remote-tracking branch 'origin/main' into mogball/automatic_fuse
Mogball Jan 28, 2025
89a1ab9
fix pipeline
Mogball Jan 28, 2025
94cef6a
xd
Mogball Jan 28, 2025
62eaecb
it actually works
Mogball Jan 30, 2025
00bf302
fix tests
Mogball Jan 30, 2025
ff1578b
Merge remote-tracking branch 'origin/main' into mogball/automatic_fuse
Mogball Feb 4, 2025
49061e0
fmt
Mogball Feb 4, 2025
93abd41
add unit test
Mogball Feb 4, 2025
6f9626f
fix crash in pipeliner
Mogball Feb 4, 2025
e60a8fa
Merge remote-tracking branch 'origin/main' into mogball/automatic_fuse
Mogball Feb 4, 2025
d352ba6
fix
Mogball Feb 4, 2025
5eb5481
test poison axisinfo
Mogball Feb 5, 2025
cdeacc0
add tests for everything, switch to flag
Mogball Feb 5, 2025
55b7088
add licm for earlier archs
Mogball Feb 5, 2025
e69df10
fmt
Mogball Feb 5, 2025
67fd006
Merge remote-tracking branch 'origin/main' into mogball/automatic_fuse
Mogball Feb 5, 2025
d63f3da
update
Mogball Feb 5, 2025
c86718b
skip test for AMD
Mogball Feb 5, 2025
f4ee2fb
extract fix
Mogball Feb 6, 2025
f19c78c
fix crash in tma pipeline
Mogball Feb 6, 2025
119fc50
rename fuse to flatten
Mogball Feb 6, 2025
da567b0
Merge remote-tracking branch 'origin/main' into mogball/automatic_fuse
Mogball Feb 6, 2025
7aa0302
remove unused var
Mogball Feb 6, 2025
2a263fd
add an regression test
Mogball Feb 6, 2025
db7ddbc
fmt
Mogball Feb 6, 2025
643a3ad
Merge remote-tracking branch 'origin/main' into mogball/automatic_fuse
Mogball Feb 7, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
persistent with loop fusion
Mogball committed Jan 24, 2025
commit 90930f0b1d25ce915a2cd3a992aeb11d0460f091
172 changes: 95 additions & 77 deletions lib/Dialect/TritonGPU/Transforms/FuseNestedLoops.cpp

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -125,11 +125,9 @@ loadOpsToIndirectionLevel(scf::ForOp forOp, bool pipelineWithoutDot,
[&](Operation *op, Operation *finalUser, int distance) {
if (!seen.insert(op).second || excluded.count(op))
return;
op->dump();
if (isa<tt::LoadOp, tt::ExperimentalDescriptorLoadOp>(op)) {
if (!isPipeliningBeneficial(op, finalUser, axisInfoAnalysis))
return;
op->dump();
if (loadOpToIndLevel.count(op)) {
int level = loadOpToIndLevel[op];
if (level != distance) {
@@ -170,7 +168,6 @@ loadOpsToIndirectionLevel(scf::ForOp forOp, bool pipelineWithoutDot,
continue;
seenDot = true;
seen.clear();
op.dump();
dfs(&op, &op, 0);
}

@@ -232,7 +229,6 @@ DenseMap<Operation *, int> assignLatencies(ModuleOp moduleOp,

DenseMap<Operation *, int> opLatency;
for (auto forOp : loops) {
forOp.dump();
if (hasLatenciesAssigned(forOp)) {
assignUserProvidedLatencies(forOp, opLatency);
continue;
Original file line number Diff line number Diff line change
@@ -248,7 +248,11 @@ bool LoopPipelinerInternal::verifySchedule() {
continue;
int64_t producerCycle = it->second;
if (consumerCycle < producerCycle - numCylesPerIter * distance) {
consumer->emitError("operation scheduled before its operands");
InFlightDiagnostic diag =
consumer->emitError("operation scheduled before its operands");
diag.attachNote(producer->getLoc())
.append("operand defined here: ")
.appendOp(*producer, OpPrintingFlags().printGenericOpForm());
return false;
}
}
381 changes: 0 additions & 381 deletions orig.mlir

This file was deleted.

1 change: 1 addition & 0 deletions python/src/passes.cc
Original file line number Diff line number Diff line change
@@ -71,6 +71,7 @@ void init_triton_passes_ttgpuir(py::module &&m) {
createTritonGPUCombineTensorSelectAndIf);
ADD_PASS_WRAPPER_0("add_optimize_accumulator_init",
createTritonGPUOptimizeAccumulatorInit);
ADD_PASS_WRAPPER_0("add_fuse_nested_loops", createTritonGPUFuseNestedLoops);
ADD_PASS_OPTION_WRAPPER_1("add_loop_scheduling",
createTritonGPULoopScheduling, int);
ADD_PASS_WRAPPER_0("add_coalesce_async_copy",
11 changes: 5 additions & 6 deletions python/tutorials/09-persistent-matmul.py
Original file line number Diff line number Diff line change
@@ -259,7 +259,6 @@ def matmul_persistent(a, b):
num_warps=configs[dtype]["num_warps"], #
grid=grid
)
print(kernel.asm["ttir"])
return c


@@ -610,8 +609,8 @@ def show_profile(precision, profile_name):
validate(32, 32, 32, dtype)
validate(8192, 8192, 512, dtype)

#proton.start("matmul", hook="triton")
#for K in range(args.K_range[0], args.K_range[1] + 1, args.K_step):
# bench(K, dtype)
#proton.finalize()
#show_profile(args.prec, "matmul")
proton.start("matmul", hook="triton")
for K in range(args.K_range[0], args.K_range[1] + 1, args.K_step):
bench(K, dtype)
proton.finalize()
show_profile(args.prec, "matmul")
177 changes: 0 additions & 177 deletions test.mlir

This file was deleted.

Loading