diff --git a/lib/Analysis/Allocation.cpp b/lib/Analysis/Allocation.cpp
index 14563810e261..0d408e10c484 100644
--- a/lib/Analysis/Allocation.cpp
+++ b/lib/Analysis/Allocation.cpp
@@ -95,8 +95,8 @@ getScratchCvtInOutVecLengths(RankedTensorType srcTy, RankedTensorType dstTy) {
                                           : srcContigPerThread;
   unsigned outVec = outOrd[0] != innerDim ? 1 : dstContigPerThread;
 
-  if (mlir::isa<gpu::NvidiaMmaEncodingAttr>(srcLayout) &&
-      mlir::isa<gpu::BlockedEncodingAttr>(dstLayout)) {
+  if (isa<gpu::NvidiaMmaEncodingAttr>(srcLayout) &&
+      isa<gpu::BlockedEncodingAttr>(dstLayout)) {
     // when storing from mma layout and loading in blocked layout vectorizing
     // the load back gives better performance even if there is a
     // transposition.
@@ -159,8 +159,8 @@ unsigned defaultAllocationAnalysisScratchSizeFn(Operation *op) {
     auto dstTy = cvtLayout.getType();
     auto srcEncoding = srcTy.getEncoding();
     auto dstEncoding = dstTy.getEncoding();
-    if (mlir::isa<gpu::SharedEncodingAttr>(srcEncoding) ||
-        mlir::isa<gpu::SharedEncodingAttr>(dstEncoding)) {
+    if (isa<gpu::SharedEncodingAttr>(srcEncoding) ||
+        isa<gpu::SharedEncodingAttr>(dstEncoding)) {
       // Conversions from/to shared memory do not need scratch memory.
       return 0;
     }
@@ -187,7 +187,7 @@ unsigned defaultAllocationAnalysisScratchSizeFn(Operation *op) {
     assert(!isa<PointerType>(elemTy) && "unexpected pointer type");
     return elems * std::max<int>(8, elemTy.getIntOrFloatBitWidth()) / 8;
   }
-  if (auto createTensormap = dyn_cast<ExperimentalTensormapCreateOp>(op)) {
+  if (isa<ExperimentalTensormapCreateOp>(op)) {
     constexpr int32_t kTMASize = 128;
     return kTMASize;
   }
@@ -423,6 +423,22 @@ class AllocationAnalysis {
     }
   }
 
+  void dumpAllocationSize() {
+    LDBG("Dump shared memory allocation size -----------");
+    auto liveBuffers = allocation->getLiveBuffers();
+    auto analyzedSize = 0;
+    for (auto [op, bufferIds] : liveBuffers) {
+      auto size = 0;
+      for (auto bufferId : bufferIds) {
+        auto bufferSize = allocation->getAllocatedSize(bufferId);
+        size += bufferSize;
+      }
+      analyzedSize = std::max(analyzedSize, size);
+    }
+    llvm::dbgs() << "Allocated: " << allocation->sharedMemorySize
+                 << ", analyzed: " << analyzedSize << "\n";
+  }
+
   void dumpInterferenceGraph(const GraphT &interference) {
     LDBG("\n");
     LDBG("Dump interference graph: \n");
@@ -467,6 +483,8 @@ class AllocationAnalysis {
       allocate(buffers, interference);
       buildInterferenceGraph(buffers, interference);
     } while (!interference.empty());
+
+    LLVM_DEBUG(dumpAllocationSize());
   }
 
   /// Computes the initial shared memory offsets.
@@ -628,7 +646,7 @@ Allocation::getLiveBuffers() {
   std::map<Operation *, SmallVector<BufferId>> liveBuffers;
 
   Operation *rootOperation = getOperation();
-  mlir::Liveness liveness(rootOperation);
+  Liveness liveness(rootOperation);
   auto analyzeOperation = [&](Operation *op) -> void {
     auto scratchBuffer = getBufferId(op);
     if (scratchBuffer != InvalidBufferId)