diff --git a/lib/Analysis/Allocation.cpp b/lib/Analysis/Allocation.cpp index 14563810e261..0d408e10c484 100644 --- a/lib/Analysis/Allocation.cpp +++ b/lib/Analysis/Allocation.cpp @@ -95,8 +95,8 @@ getScratchCvtInOutVecLengths(RankedTensorType srcTy, RankedTensorType dstTy) { : srcContigPerThread; unsigned outVec = outOrd[0] != innerDim ? 1 : dstContigPerThread; - if (mlir::isa(srcLayout) && - mlir::isa(dstLayout)) { + if (isa(srcLayout) && + isa(dstLayout)) { // when storing from mma layout and loading in blocked layout vectorizing // the load back gives better performance even if there is a // transposition. @@ -159,8 +159,8 @@ unsigned defaultAllocationAnalysisScratchSizeFn(Operation *op) { auto dstTy = cvtLayout.getType(); auto srcEncoding = srcTy.getEncoding(); auto dstEncoding = dstTy.getEncoding(); - if (mlir::isa(srcEncoding) || - mlir::isa(dstEncoding)) { + if (isa(srcEncoding) || + isa(dstEncoding)) { // Conversions from/to shared memory do not need scratch memory. return 0; } @@ -187,7 +187,7 @@ unsigned defaultAllocationAnalysisScratchSizeFn(Operation *op) { assert(!isa(elemTy) && "unexpected pointer type"); return elems * std::max(8, elemTy.getIntOrFloatBitWidth()) / 8; } - if (auto createTensormap = dyn_cast(op)) { + if (isa(op)) { constexpr int32_t kTMASize = 128; return kTMASize; } @@ -423,6 +423,22 @@ class AllocationAnalysis { } } + void dumpAllocationSize() { + LDBG("Dump shared memory allocation size -----------"); + auto liveBuffers = allocation->getLiveBuffers(); + auto analyzedSize = 0; + for (auto [op, bufferIds] : liveBuffers) { + auto size = 0; + for (auto bufferId : bufferIds) { + auto bufferSize = allocation->getAllocatedSize(bufferId); + size += bufferSize; + } + analyzedSize = std::max(analyzedSize, size); + } + llvm::dbgs() << "Allocated: " << allocation->sharedMemorySize + << ", analyzed: " << analyzedSize << "\n"; + } + void dumpInterferenceGraph(const GraphT &interference) { LDBG("\n"); LDBG("Dump interference graph: \n"); @@ -467,6 +483,8 @@ class AllocationAnalysis { allocate(buffers, interference); buildInterferenceGraph(buffers, interference); } while (!interference.empty()); + + LLVM_DEBUG(dumpAllocationSize()); } /// Computes the initial shared memory offsets. @@ -628,7 +646,7 @@ Allocation::getLiveBuffers() { std::map> liveBuffers; Operation *rootOperation = getOperation(); - mlir::Liveness liveness(rootOperation); + Liveness liveness(rootOperation); auto analyzeOperation = [&](Operation *op) -> void { auto scratchBuffer = getBufferId(op); if (scratchBuffer != InvalidBufferId)