EnzymeAD · mofeing · Aug 9, 2025 · Aug 9, 2025 · Aug 9, 2025 · Aug 14, 2025
diff --git a/src/enzyme_ad/jax/Passes/LowerEnzymeXLALapack.cpp b/src/enzyme_ad/jax/Passes/LowerEnzymeXLALapack.cpp
@@ -137,7 +137,7 @@ struct GeqrfOpLowering : public OpRewritePattern<enzymexla::GeqrfOp> {
       // `101` for row-major, `102` for col-major
       auto layout = rewriter.create<LLVM::ConstantOp>(
           op.getLoc(), type_llvm_lapack_int,
-          rewriter.getIntegerAttr(type_lapack_int, 101));
+          rewriter.getIntegerAttr(type_lapack_int, 102));
       auto m = rewriter.create<LLVM::ConstantOp>(
           op.getLoc(), type_llvm_lapack_int,
           rewriter.getIntegerAttr(type_lapack_int, inputShape[0]));
@@ -183,7 +183,8 @@ struct GeqrfOpLowering : public OpRewritePattern<enzymexla::GeqrfOp> {
 
     SmallVector<Attribute> aliases;
     for (int i = 0; i < 3; ++i) {
-      aliases.push_back(stablehlo::OutputOperandAliasAttr::get(ctx, {}, i, {}));
+      aliases.push_back(
+          stablehlo::OutputOperandAliasAttr::get(ctx, {i}, i, {}));
     }
 
     auto jit_call_op = rewriter.create<enzymexla::JITCallOp>(
@@ -411,7 +412,7 @@ struct GeqrtOpLowering : public OpRewritePattern<enzymexla::GeqrtOp> {
       // `101` for row-major, `102` for col-major
       auto layout = rewriter.create<LLVM::ConstantOp>(
           op.getLoc(), type_llvm_lapack_int,
-          rewriter.getIntegerAttr(type_lapack_int, 101));
+          rewriter.getIntegerAttr(type_lapack_int, 102));
       auto m = rewriter.create<LLVM::ConstantOp>(
           op.getLoc(), type_llvm_lapack_int,
           rewriter.getIntegerAttr(type_lapack_int, inputShape[0]));
@@ -472,8 +473,8 @@ struct GeqrtOpLowering : public OpRewritePattern<enzymexla::GeqrtOp> {
         op.getLoc(), type_T, cast<ElementsAttr>(makeAttr(type_T, 0)));
 
     SmallVector<bool> isColMajorArr = {true, true, true};
-    SmallVector<int64_t> operandRanks = {2, 1, 0};
-    SmallVector<int64_t> outputRanks = {2, 1, 0};
+    SmallVector<int64_t> operandRanks = {2, 2, 0};
+    SmallVector<int64_t> outputRanks = {2, 2, 0};
     auto operandLayouts =
         getSHLOLayout(rewriter, operandRanks, isColMajorArr, 2);
     auto resultLayouts = getSHLOLayout(rewriter, outputRanks, isColMajorArr, 2);
@@ -622,7 +623,7 @@ struct OrgqrOpLowering : public OpRewritePattern<enzymexla::OrgqrOp> {
       // `101` for row-major, `102` for col-major
       auto layout = rewriter.create<LLVM::ConstantOp>(
           op.getLoc(), type_llvm_lapack_int,
-          rewriter.getIntegerAttr(type_lapack_int, 101));
+          rewriter.getIntegerAttr(type_lapack_int, 102));
       auto mC = inputShape[0];
       auto m = rewriter.create<LLVM::ConstantOp>(
           op.getLoc(), type_llvm_lapack_int,
@@ -663,7 +664,7 @@ struct OrgqrOpLowering : public OpRewritePattern<enzymexla::OrgqrOp> {
     auto resultLayouts = getSHLOLayout(rewriter, outputRanks, isColMajorArr, 2);
 
     SmallVector<Attribute> aliases;
-    aliases.push_back(stablehlo::OutputOperandAliasAttr::get(ctx, {0}, 0, {}));
+    aliases.push_back(stablehlo::OutputOperandAliasAttr::get(ctx, {}, 0, {}));
 
     auto jit_call_op = rewriter.create<enzymexla::JITCallOp>(
         op.getLoc(), TypeRange{inputType},
@@ -933,7 +934,7 @@ struct OrmqrOpLowering : public OpRewritePattern<enzymexla::OrmqrOp> {
       // `101` for row-major, `102` for col-major
       auto layout = rewriter.create<LLVM::ConstantOp>(
           op.getLoc(), type_llvm_lapack_int,
-          rewriter.getIntegerAttr(type_lapack_int, 101));
+          rewriter.getIntegerAttr(type_lapack_int, 102));
 
       auto side = rewriter.create<LLVM::ConstantOp>(
           op.getLoc(), type_llvm_char,
@@ -1200,7 +1201,7 @@ struct GemqrtOpLowering : public OpRewritePattern<enzymexla::GemqrtOp> {
       // `101` for row-major, `102` for col-major
       auto layout = rewriter.create<LLVM::ConstantOp>(
           op.getLoc(), type_llvm_lapack_int,
-          rewriter.getIntegerAttr(type_lapack_int, 101));
+          rewriter.getIntegerAttr(type_lapack_int, 102));
 
       auto side = rewriter.create<LLVM::ConstantOp>(
           op.getLoc(), type_llvm_char,

diff --git a/test/lit_tests/linalg/gemqrt_square.mlir b/test/lit_tests/linalg/gemqrt_square.mlir
@@ -8,7 +8,7 @@ module {
 }
 
 // CPU:  llvm.func @enzymexla_wrapper_lapacke_sgemqrt_[[WRAPPER_ID:[0-9]+]](%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: !llvm.ptr) {
-// CPU-NEXT:    %0 = llvm.mlir.constant(101 : i64) : i64
+// CPU-NEXT:    %0 = llvm.mlir.constant(102 : i64) : i64
 // CPU-NEXT:    %1 = llvm.mlir.constant(76 : i8) : i8
 // CPU-NEXT:    %2 = llvm.mlir.constant(78 : i8) : i8
 // CPU-NEXT:    %3 = llvm.mlir.constant(64 : i64) : i64
@@ -30,7 +30,7 @@ module {
 }
 
 // CPU:  llvm.func @enzymexla_wrapper_lapacke_sgemqrt_[[WRAPPER_ID:[0-9]+]](%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: !llvm.ptr) {
-// CPU-NEXT:    %0 = llvm.mlir.constant(101 : i64) : i64
+// CPU-NEXT:    %0 = llvm.mlir.constant(102 : i64) : i64
 // CPU-NEXT:    %1 = llvm.mlir.constant(76 : i8) : i8
 // CPU-NEXT:    %2 = llvm.mlir.constant(84 : i8) : i8
 // CPU-NEXT:    %3 = llvm.mlir.constant(64 : i64) : i64
@@ -52,7 +52,7 @@ module {
 }
 
 // CPU:  llvm.func @enzymexla_wrapper_lapacke_sgemqrt_[[WRAPPER_ID:[0-9]+]](%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: !llvm.ptr) {
-// CPU-NEXT:    %0 = llvm.mlir.constant(101 : i64) : i64
+// CPU-NEXT:    %0 = llvm.mlir.constant(102 : i64) : i64
 // CPU-NEXT:    %1 = llvm.mlir.constant(82 : i8) : i8
 // CPU-NEXT:    %2 = llvm.mlir.constant(78 : i8) : i8
 // CPU-NEXT:    %3 = llvm.mlir.constant(48 : i64) : i64
@@ -74,7 +74,7 @@ module {
 }
 
 // CPU:  llvm.func @enzymexla_wrapper_lapacke_sgemqrt_[[WRAPPER_ID:[0-9]+]](%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: !llvm.ptr) {
-// CPU-NEXT:    %0 = llvm.mlir.constant(101 : i64) : i64
+// CPU-NEXT:    %0 = llvm.mlir.constant(102 : i64) : i64
 // CPU-NEXT:    %1 = llvm.mlir.constant(82 : i8) : i8
 // CPU-NEXT:    %2 = llvm.mlir.constant(84 : i8) : i8
 // CPU-NEXT:    %3 = llvm.mlir.constant(48 : i64) : i64

diff --git a/test/lit_tests/linalg/gemqrt_tall.mlir b/test/lit_tests/linalg/gemqrt_tall.mlir
@@ -8,7 +8,7 @@ module {
 }
 
 // CPU:  llvm.func @enzymexla_wrapper_lapacke_sgemqrt_[[WRAPPER_ID:[0-9]+]](%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: !llvm.ptr) {
-// CPU-NEXT:    %0 = llvm.mlir.constant(101 : i64) : i64
+// CPU-NEXT:    %0 = llvm.mlir.constant(102 : i64) : i64
 // CPU-NEXT:    %1 = llvm.mlir.constant(76 : i8) : i8
 // CPU-NEXT:    %2 = llvm.mlir.constant(78 : i8) : i8
 // CPU-NEXT:    %3 = llvm.mlir.constant(64 : i64) : i64
@@ -31,7 +31,7 @@ module {
 }
 
 // CPU:  llvm.func @enzymexla_wrapper_lapacke_sgemqrt_[[WRAPPER_ID:[0-9]+]](%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: !llvm.ptr) {
-// CPU-NEXT:    %0 = llvm.mlir.constant(101 : i64) : i64
+// CPU-NEXT:    %0 = llvm.mlir.constant(102 : i64) : i64
 // CPU-NEXT:    %1 = llvm.mlir.constant(76 : i8) : i8
 // CPU-NEXT:    %2 = llvm.mlir.constant(84 : i8) : i8
 // CPU-NEXT:    %3 = llvm.mlir.constant(64 : i64) : i64
@@ -54,7 +54,7 @@ module {
 }
 
 // CPU:  llvm.func @enzymexla_wrapper_lapacke_sgemqrt_[[WRAPPER_ID:[0-9]+]](%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: !llvm.ptr) {
-// CPU-NEXT:    %0 = llvm.mlir.constant(101 : i64) : i64
+// CPU-NEXT:    %0 = llvm.mlir.constant(102 : i64) : i64
 // CPU-NEXT:    %1 = llvm.mlir.constant(82 : i8) : i8
 // CPU-NEXT:    %2 = llvm.mlir.constant(78 : i8) : i8
 // CPU-NEXT:    %3 = llvm.mlir.constant(48 : i64) : i64
@@ -77,7 +77,7 @@ module {
 }
 
 // CPU:  llvm.func @enzymexla_wrapper_lapacke_sgemqrt_[[WRAPPER_ID:[0-9]+]](%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: !llvm.ptr) {
-// CPU-NEXT:    %0 = llvm.mlir.constant(101 : i64) : i64
+// CPU-NEXT:    %0 = llvm.mlir.constant(102 : i64) : i64
 // CPU-NEXT:    %1 = llvm.mlir.constant(82 : i8) : i8
 // CPU-NEXT:    %2 = llvm.mlir.constant(84 : i8) : i8
 // CPU-NEXT:    %3 = llvm.mlir.constant(48 : i64) : i64

diff --git a/test/lit_tests/linalg/geqrf_square.mlir b/test/lit_tests/linalg/geqrf_square.mlir
@@ -10,7 +10,7 @@ module {
 }
 
 // CPU:  llvm.func @enzymexla_wrapper_lapacke_sgeqrf_[[WRAPPER_ID:[0-9]+]](%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: !llvm.ptr) {
-// CPU-NEXT:    %0 = llvm.mlir.constant(101 : i64) : i64
+// CPU-NEXT:    %0 = llvm.mlir.constant(102 : i64) : i64
 // CPU-NEXT:    %1 = llvm.mlir.constant(64 : i64) : i64
 // CPU-NEXT:    %2 = llvm.call @enzymexla_lapacke_sgeqrf_(%0, %1, %1, %arg0, %1, %arg1) : (i64, i64, i64, !llvm.ptr, i64, !llvm.ptr) -> i64
 // CPU-NEXT:    llvm.store %2, %arg2 : i64, !llvm.ptr
@@ -20,7 +20,7 @@ module {
 // CPU-NEXT:  func.func @main(%arg0: tensor<64x64xf32>) -> (tensor<64x64xf32>, tensor<64xf32>, tensor<i64>) {
 // CPU-NEXT:    %c = stablehlo.constant dense<-1> : tensor<i64>
 // CPU-NEXT:    %cst = stablehlo.constant dense<0.000000e+00> : tensor<64xf32>
-// CPU-NEXT:    %0:3 = enzymexla.jit_call @enzymexla_wrapper_lapacke_sgeqrf_[[WRAPPER_ID]] (%arg0, %cst, %c) {operand_layouts = [dense<[0, 1]> : tensor<2xindex>, dense<0> : tensor<1xindex>, dense<> : tensor<0xindex>], output_operand_aliases = [#stablehlo.output_operand_alias<output_tuple_indices = [], operand_index = 0, operand_tuple_indices = []>, #stablehlo.output_operand_alias<output_tuple_indices = [], operand_index = 1, operand_tuple_indices = []>, #stablehlo.output_operand_alias<output_tuple_indices = [], operand_index = 2, operand_tuple_indices = []>], result_layouts = [dense<[0, 1]> : tensor<2xindex>, dense<0> : tensor<1xindex>, dense<> : tensor<0xindex>], xla_side_effect_free} : (tensor<64x64xf32>, tensor<64xf32>, tensor<i64>) -> (tensor<64x64xf32>, tensor<64xf32>, tensor<i64>)
+// CPU-NEXT:    %0:3 = enzymexla.jit_call @enzymexla_wrapper_lapacke_sgeqrf_[[WRAPPER_ID]] (%arg0, %cst, %c) {operand_layouts = [dense<[0, 1]> : tensor<2xindex>, dense<0> : tensor<1xindex>, dense<> : tensor<0xindex>], output_operand_aliases = [#stablehlo.output_operand_alias<output_tuple_indices = [0], operand_index = 0, operand_tuple_indices = []>, #stablehlo.output_operand_alias<output_tuple_indices = [1], operand_index = 1, operand_tuple_indices = []>, #stablehlo.output_operand_alias<output_tuple_indices = [2], operand_index = 2, operand_tuple_indices = []>], result_layouts = [dense<[0, 1]> : tensor<2xindex>, dense<0> : tensor<1xindex>, dense<> : tensor<0xindex>], xla_side_effect_free} : (tensor<64x64xf32>, tensor<64xf32>, tensor<i64>) -> (tensor<64x64xf32>, tensor<64xf32>, tensor<i64>)
 // CPU-NEXT:    return %0#0, %0#1, %0#2 : tensor<64x64xf32>, tensor<64xf32>, tensor<i64>
 // CPU-NEXT:  }
 

diff --git a/test/lit_tests/linalg/geqrf_tall_thin.mlir b/test/lit_tests/linalg/geqrf_tall_thin.mlir
@@ -10,7 +10,7 @@ module {
 }
 
 // CPU:  llvm.func @enzymexla_wrapper_lapacke_sgeqrf_[[WRAPPER_ID:[0-9]+]](%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: !llvm.ptr) {
-// CPU-NEXT:    %0 = llvm.mlir.constant(101 : i64) : i64
+// CPU-NEXT:    %0 = llvm.mlir.constant(102 : i64) : i64
 // CPU-NEXT:    %1 = llvm.mlir.constant(64 : i64) : i64
 // CPU-NEXT:    %2 = llvm.mlir.constant(32 : i64) : i64
 // CPU-NEXT:    %3 = llvm.call @enzymexla_lapacke_sgeqrf_(%0, %1, %2, %arg0, %1, %arg1) : (i64, i64, i64, !llvm.ptr, i64, !llvm.ptr) -> i64
@@ -21,7 +21,7 @@ module {
 // CPU-NEXT:  func.func @main(%arg0: tensor<64x32xf32>) -> (tensor<64x32xf32>, tensor<32xf32>, tensor<i64>) {
 // CPU-NEXT:    %c = stablehlo.constant dense<-1> : tensor<i64>
 // CPU-NEXT:    %cst = stablehlo.constant dense<0.000000e+00> : tensor<32xf32>
-// CPU-NEXT:    %0:3 = enzymexla.jit_call @enzymexla_wrapper_lapacke_sgeqrf_[[WRAPPER_ID]] (%arg0, %cst, %c) {operand_layouts = [dense<[0, 1]> : tensor<2xindex>, dense<0> : tensor<1xindex>, dense<> : tensor<0xindex>], output_operand_aliases = [#stablehlo.output_operand_alias<output_tuple_indices = [], operand_index = 0, operand_tuple_indices = []>, #stablehlo.output_operand_alias<output_tuple_indices = [], operand_index = 1, operand_tuple_indices = []>, #stablehlo.output_operand_alias<output_tuple_indices = [], operand_index = 2, operand_tuple_indices = []>], result_layouts = [dense<[0, 1]> : tensor<2xindex>, dense<0> : tensor<1xindex>, dense<> : tensor<0xindex>], xla_side_effect_free} : (tensor<64x32xf32>, tensor<32xf32>, tensor<i64>) -> (tensor<64x32xf32>, tensor<32xf32>, tensor<i64>)
+// CPU-NEXT:    %0:3 = enzymexla.jit_call @enzymexla_wrapper_lapacke_sgeqrf_[[WRAPPER_ID]] (%arg0, %cst, %c) {operand_layouts = [dense<[0, 1]> : tensor<2xindex>, dense<0> : tensor<1xindex>, dense<> : tensor<0xindex>], output_operand_aliases = [#stablehlo.output_operand_alias<output_tuple_indices = [0], operand_index = 0, operand_tuple_indices = []>, #stablehlo.output_operand_alias<output_tuple_indices = [1], operand_index = 1, operand_tuple_indices = []>, #stablehlo.output_operand_alias<output_tuple_indices = [2], operand_index = 2, operand_tuple_indices = []>], result_layouts = [dense<[0, 1]> : tensor<2xindex>, dense<0> : tensor<1xindex>, dense<> : tensor<0xindex>], xla_side_effect_free} : (tensor<64x32xf32>, tensor<32xf32>, tensor<i64>) -> (tensor<64x32xf32>, tensor<32xf32>, tensor<i64>)
 // CPU-NEXT:    return %0#0, %0#1, %0#2 : tensor<64x32xf32>, tensor<32xf32>, tensor<i64>
 // CPU-NEXT:  }
 

diff --git a/test/lit_tests/linalg/geqrf_wide_thin.mlir b/test/lit_tests/linalg/geqrf_wide_thin.mlir
@@ -10,7 +10,7 @@ module {
 }
 
 // CPU:  llvm.func @enzymexla_wrapper_lapacke_sgeqrf_[[WRAPPER_ID:[0-9]+]](%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: !llvm.ptr) {
-// CPU-NEXT:    %0 = llvm.mlir.constant(101 : i64) : i64
+// CPU-NEXT:    %0 = llvm.mlir.constant(102 : i64) : i64
 // CPU-NEXT:    %1 = llvm.mlir.constant(32 : i64) : i64
 // CPU-NEXT:    %2 = llvm.mlir.constant(64 : i64) : i64
 // CPU-NEXT:    %3 = llvm.call @enzymexla_lapacke_sgeqrf_(%0, %1, %2, %arg0, %1, %arg1) : (i64, i64, i64, !llvm.ptr, i64, !llvm.ptr) -> i64
@@ -21,7 +21,7 @@ module {
 // CPU-NEXT:  func.func @main(%arg0: tensor<32x64xf32>) -> (tensor<32x64xf32>, tensor<32xf32>, tensor<i64>) {
 // CPU-NEXT:    %c = stablehlo.constant dense<-1> : tensor<i64>
 // CPU-NEXT:    %cst = stablehlo.constant dense<0.000000e+00> : tensor<32xf32>
-// CPU-NEXT:    %0:3 = enzymexla.jit_call @enzymexla_wrapper_lapacke_sgeqrf_[[WRAPPER_ID]] (%arg0, %cst, %c) {operand_layouts = [dense<[0, 1]> : tensor<2xindex>, dense<0> : tensor<1xindex>, dense<> : tensor<0xindex>], output_operand_aliases = [#stablehlo.output_operand_alias<output_tuple_indices = [], operand_index = 0, operand_tuple_indices = []>, #stablehlo.output_operand_alias<output_tuple_indices = [], operand_index = 1, operand_tuple_indices = []>, #stablehlo.output_operand_alias<output_tuple_indices = [], operand_index = 2, operand_tuple_indices = []>], result_layouts = [dense<[0, 1]> : tensor<2xindex>, dense<0> : tensor<1xindex>, dense<> : tensor<0xindex>], xla_side_effect_free} : (tensor<32x64xf32>, tensor<32xf32>, tensor<i64>) -> (tensor<32x64xf32>, tensor<32xf32>, tensor<i64>)
+// CPU-NEXT:    %0:3 = enzymexla.jit_call @enzymexla_wrapper_lapacke_sgeqrf_[[WRAPPER_ID]] (%arg0, %cst, %c) {operand_layouts = [dense<[0, 1]> : tensor<2xindex>, dense<0> : tensor<1xindex>, dense<> : tensor<0xindex>], output_operand_aliases = [#stablehlo.output_operand_alias<output_tuple_indices = [0], operand_index = 0, operand_tuple_indices = []>, #stablehlo.output_operand_alias<output_tuple_indices = [1], operand_index = 1, operand_tuple_indices = []>, #stablehlo.output_operand_alias<output_tuple_indices = [2], operand_index = 2, operand_tuple_indices = []>], result_layouts = [dense<[0, 1]> : tensor<2xindex>, dense<0> : tensor<1xindex>, dense<> : tensor<0xindex>], xla_side_effect_free} : (tensor<32x64xf32>, tensor<32xf32>, tensor<i64>) -> (tensor<32x64xf32>, tensor<32xf32>, tensor<i64>)
 // CPU-NEXT:    return %0#0, %0#1, %0#2 : tensor<32x64xf32>, tensor<32xf32>, tensor<i64>
 // CPU-NEXT:  }
 

diff --git a/test/lit_tests/linalg/geqrt_square.mlir b/test/lit_tests/linalg/geqrt_square.mlir
@@ -8,7 +8,7 @@ module {
 }
 
 // CPU:  llvm.func @enzymexla_wrapper_lapacke_sgeqrt_[[WRAPPER_ID:[0-9]+]](%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: !llvm.ptr) {
-// CPU-NEXT:    %0 = llvm.mlir.constant(101 : i64) : i64
+// CPU-NEXT:    %0 = llvm.mlir.constant(102 : i64) : i64
 // CPU-NEXT:    %1 = llvm.mlir.constant(64 : i64) : i64
 // CPU-NEXT:    %2 = llvm.call @enzymexla_lapacke_sgeqrt_(%0, %1, %1, %1, %arg0, %1, %arg1, %1) : (i64, i64, i64, i64, !llvm.ptr, i64, !llvm.ptr, i64) -> i64
 // CPU-NEXT:    llvm.store %2, %arg2 : i64, !llvm.ptr
@@ -18,7 +18,7 @@ module {
 // CPU-NEXT:  func.func @main(%arg0: tensor<64x64xf32>) -> (tensor<64x64xf32>, tensor<64x64xf32>, tensor<i64>) {
 // CPU-NEXT:    %c = stablehlo.constant dense<-1> : tensor<i64>
 // CPU-NEXT:    %cst = stablehlo.constant dense<0.000000e+00> : tensor<64x64xf32>
-// CPU-NEXT:    %0:3 = enzymexla.jit_call @enzymexla_wrapper_lapacke_sgeqrt_[[WRAPPER_ID]] (%arg0, %cst, %c) {operand_layouts = [dense<[0, 1]> : tensor<2xindex>, dense<0> : tensor<1xindex>, dense<> : tensor<0xindex>], output_operand_aliases = [#stablehlo.output_operand_alias<output_tuple_indices = [0], operand_index = 0, operand_tuple_indices = []>, #stablehlo.output_operand_alias<output_tuple_indices = [1], operand_index = 1, operand_tuple_indices = []>, #stablehlo.output_operand_alias<output_tuple_indices = [2], operand_index = 2, operand_tuple_indices = []>], result_layouts = [dense<[0, 1]> : tensor<2xindex>, dense<0> : tensor<1xindex>, dense<> : tensor<0xindex>], xla_side_effect_free} : (tensor<64x64xf32>, tensor<64x64xf32>, tensor<i64>) -> (tensor<64x64xf32>, tensor<64x64xf32>, tensor<i64>)
+// CPU-NEXT:    %0:3 = enzymexla.jit_call @enzymexla_wrapper_lapacke_sgeqrt_[[WRAPPER_ID]] (%arg0, %cst, %c) {operand_layouts = [dense<[0, 1]> : tensor<2xindex>, dense<[0, 1]> : tensor<2xindex>, dense<> : tensor<0xindex>], output_operand_aliases = [#stablehlo.output_operand_alias<output_tuple_indices = [0], operand_index = 0, operand_tuple_indices = []>, #stablehlo.output_operand_alias<output_tuple_indices = [1], operand_index = 1, operand_tuple_indices = []>, #stablehlo.output_operand_alias<output_tuple_indices = [2], operand_index = 2, operand_tuple_indices = []>], result_layouts = [dense<[0, 1]> : tensor<2xindex>, dense<[0, 1]> : tensor<2xindex>, dense<> : tensor<0xindex>], xla_side_effect_free} : (tensor<64x64xf32>, tensor<64x64xf32>, tensor<i64>) -> (tensor<64x64xf32>, tensor<64x64xf32>, tensor<i64>)
 // CPU-NEXT:    return %0#0, %0#1, %0#2 : tensor<64x64xf32>, tensor<64x64xf32>, tensor<i64>
 // CPU-NEXT:  }
 

diff --git a/test/lit_tests/linalg/geqrt_tall_thin.mlir b/test/lit_tests/linalg/geqrt_tall_thin.mlir
@@ -8,7 +8,7 @@ module {
 }
 
 // CPU:  llvm.func @enzymexla_wrapper_lapacke_sgeqrt_[[WRAPPER_ID:[0-9]+]](%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: !llvm.ptr) {
-// CPU-NEXT:    %0 = llvm.mlir.constant(101 : i64) : i64
+// CPU-NEXT:    %0 = llvm.mlir.constant(102 : i64) : i64
 // CPU-NEXT:    %1 = llvm.mlir.constant(64 : i64) : i64
 // CPU-NEXT:    %2 = llvm.mlir.constant(32 : i64) : i64
 // CPU-NEXT:    %3 = llvm.call @enzymexla_lapacke_sgeqrt_(%0, %1, %2, %2, %arg0, %1, %arg1, %2) : (i64, i64, i64, i64, !llvm.ptr, i64, !llvm.ptr, i64) -> i64
@@ -19,7 +19,7 @@ module {
 // CPU-NEXT:  func.func @main(%arg0: tensor<64x32xf32>) -> (tensor<64x32xf32>, tensor<32x32xf32>, tensor<i64>) {
 // CPU-NEXT:    %c = stablehlo.constant dense<-1> : tensor<i64>
 // CPU-NEXT:    %cst = stablehlo.constant dense<0.000000e+00> : tensor<32x32xf32>
-// CPU-NEXT:    %0:3 = enzymexla.jit_call @enzymexla_wrapper_lapacke_sgeqrt_[[WRAPPER_ID]] (%arg0, %cst, %c) {operand_layouts = [dense<[0, 1]> : tensor<2xindex>, dense<0> : tensor<1xindex>, dense<> : tensor<0xindex>], output_operand_aliases = [#stablehlo.output_operand_alias<output_tuple_indices = [0], operand_index = 0, operand_tuple_indices = []>, #stablehlo.output_operand_alias<output_tuple_indices = [1], operand_index = 1, operand_tuple_indices = []>, #stablehlo.output_operand_alias<output_tuple_indices = [2], operand_index = 2, operand_tuple_indices = []>], result_layouts = [dense<[0, 1]> : tensor<2xindex>, dense<0> : tensor<1xindex>, dense<> : tensor<0xindex>], xla_side_effect_free} : (tensor<64x32xf32>, tensor<32x32xf32>, tensor<i64>) -> (tensor<64x32xf32>, tensor<32x32xf32>, tensor<i64>)
+// CPU-NEXT:    %0:3 = enzymexla.jit_call @enzymexla_wrapper_lapacke_sgeqrt_[[WRAPPER_ID]] (%arg0, %cst, %c) {operand_layouts = [dense<[0, 1]> : tensor<2xindex>, dense<[0, 1]> : tensor<2xindex>, dense<> : tensor<0xindex>], output_operand_aliases = [#stablehlo.output_operand_alias<output_tuple_indices = [0], operand_index = 0, operand_tuple_indices = []>, #stablehlo.output_operand_alias<output_tuple_indices = [1], operand_index = 1, operand_tuple_indices = []>, #stablehlo.output_operand_alias<output_tuple_indices = [2], operand_index = 2, operand_tuple_indices = []>], result_layouts = [dense<[0, 1]> : tensor<2xindex>, dense<[0, 1]> : tensor<2xindex>, dense<> : tensor<0xindex>], xla_side_effect_free} : (tensor<64x32xf32>, tensor<32x32xf32>, tensor<i64>) -> (tensor<64x32xf32>, tensor<32x32xf32>, tensor<i64>)
 // CPU-NEXT:    return %0#0, %0#1, %0#2 : tensor<64x32xf32>, tensor<32x32xf32>, tensor<i64>
 // CPU-NEXT:  }