Skip to content

Commit 03ead06

Browse files
committed
[flang][fir] Add affine optimization pass pipeline.
1 parent cd6c4b6 commit 03ead06

File tree

6 files changed

+74
-2
lines changed

6 files changed

+74
-2
lines changed

flang/include/flang/Optimizer/Passes/CommandLineOpts.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ extern llvm::cl::opt<bool> disableCfgConversion;
4242
extern llvm::cl::opt<bool> disableFirAvc;
4343
extern llvm::cl::opt<bool> disableFirMao;
4444

45+
extern llvm::cl::opt<bool> enableAffineOpt;
4546
extern llvm::cl::opt<bool> disableFirAliasTags;
4647
extern llvm::cl::opt<bool> useOldAliasTags;
4748

flang/include/flang/Optimizer/Passes/Pipelines.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@
1818
#include "flang/Optimizer/Passes/CommandLineOpts.h"
1919
#include "flang/Optimizer/Transforms/Passes.h"
2020
#include "flang/Tools/CrossToolHelpers.h"
21-
#include "mlir/Conversion/ReconcileUnrealizedCasts/ReconcileUnrealizedCasts.h"
22-
#include "mlir/Conversion/SCFToControlFlow/SCFToControlFlow.h"
21+
#include "mlir/Conversion/Passes.h"
22+
#include "mlir/Dialect/Affine/Passes.h"
2323
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
2424
#include "mlir/Dialect/LLVMIR/LLVMAttrs.h"
2525
#include "mlir/Pass/PassManager.h"

flang/lib/Optimizer/Passes/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ add_flang_library(flangPasses
2121
MLIRPass
2222
MLIRReconcileUnrealizedCasts
2323
MLIRSCFToControlFlow
24+
MLIRSCFToOpenMP
2425
MLIRSupport
2526
MLIRTransforms
2627
)

flang/lib/Optimizer/Passes/CommandLineOpts.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ cl::opt<bool> useOldAliasTags(
5555
cl::desc("Use a single TBAA tree for all functions and do not use "
5656
"the FIR alias tags pass"),
5757
cl::init(false), cl::Hidden);
58+
EnableOption(AffineOpt, "affine-opt", "affine optimization");
5859

5960
/// CodeGen Passes
6061
DisableOption(CodeGenRewrite, "codegen-rewrite", "rewrite FIR for codegen");

flang/lib/Optimizer/Passes/Pipelines.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,23 @@ void createDefaultFIROptimizerPassPipeline(mlir::PassManager &pm,
211211

212212
addNestedPassToAllTopLevelOperations<PassConstructor>(
213213
pm, fir::createStackReclaim);
214+
215+
if (enableAffineOpt && pc.OptLevel.isOptimizingForSpeed()) {
216+
pm.addPass(fir::createPromoteToAffinePass());
217+
pm.addPass(mlir::createCSEPass());
218+
pm.addPass(mlir::affine::createAffineLoopInvariantCodeMotionPass());
219+
pm.addPass(mlir::affine::createAffineLoopNormalizePass());
220+
pm.addPass(mlir::affine::createSimplifyAffineStructuresPass());
221+
pm.addPass(mlir::affine::createAffineParallelize(
222+
mlir::affine::AffineParallelizeOptions{1, false}));
223+
pm.addPass(fir::createAffineDemotionPass());
224+
pm.addPass(mlir::createLowerAffinePass());
225+
if (pc.EnableOpenMP) {
226+
pm.addPass(mlir::createConvertSCFToOpenMPPass());
227+
pm.addPass(mlir::createCanonicalizerPass());
228+
}
229+
}
230+
214231
// convert control flow to CFG form
215232
fir::addCfgConversionPass(pm, pc);
216233
pm.addPass(mlir::createSCFToControlFlowPass());

flang/test/Lower/OpenMP/auto-omp.f90

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
! RUN: %flang_fc1 -O1 -mllvm --enable-affine-opt -emit-llvm -fopenmp -o - %s \
2+
! RUN: | FileCheck %s
3+
4+
subroutine foo(a)
5+
integer, dimension(100, 100), intent(out) :: a
6+
a = 1
7+
end subroutine foo
8+
9+
!CHECK-LABEL: entry:
10+
!CHECK: %[[VAL_0:.*]] = alloca { ptr }, align 8
11+
!CHECK: %[[VAL_1:.*]] = tail call i32 @__kmpc_global_thread_num(ptr nonnull @1)
12+
!CHECK: store ptr %[[VAL_2:.*]], ptr %[[VAL_0]], align 8
13+
!CHECK: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr nonnull @foo_..omp_par, ptr nonnull %[[VAL_0]])
14+
!CHECK: ret void
15+
!CHECK: omp.par.entry:
16+
!CHECK: %[[VAL_3:.*]] = load ptr, ptr %[[VAL_4:.*]], align 8, !align !3
17+
!CHECK: %[[VAL_5:.*]] = alloca i32, align 4
18+
!CHECK: %[[VAL_6:.*]] = alloca i64, align 8
19+
!CHECK: %[[VAL_7:.*]] = alloca i64, align 8
20+
!CHECK: %[[VAL_8:.*]] = alloca i64, align 8
21+
!CHECK: store i64 0, ptr %[[VAL_6]], align 8
22+
!CHECK: store i64 99, ptr %[[VAL_7]], align 8
23+
!CHECK: store i64 1, ptr %[[VAL_8]], align 8
24+
!CHECK: %[[VAL_9:.*]] = tail call i32 @__kmpc_global_thread_num(ptr nonnull @1)
25+
!CHECK: call void @__kmpc_for_static_init_8u(ptr nonnull @1, i32 %[[VAL_9]], i32 34, ptr nonnull %[[VAL_5]], ptr nonnull %[[VAL_6]], ptr nonnull %[[VAL_7]], ptr nonnull %[[VAL_8]], i64 1, i64 0)
26+
!CHECK: %[[VAL_10:.*]] = load i64, ptr %[[VAL_6]], align 8
27+
!CHECK: %[[VAL_11:.*]] = load i64, ptr %[[VAL_7]], align 8
28+
!CHECK: %[[VAL_12:.*]] = sub i64 %[[VAL_11]], %[[VAL_10]]
29+
!CHECK: %[[VAL_13:.*]] = icmp eq i64 %[[VAL_12]], -1
30+
!CHECK: br i1 %[[VAL_13]], label %[[VAL_14:.*]], label %[[VAL_15:.*]]
31+
!CHECK: omp_loop.exit: ; preds = %[[VAL_16:.*]], %[[VAL_17:.*]]
32+
!CHECK: call void @__kmpc_for_static_fini(ptr nonnull @1, i32 %[[VAL_9]])
33+
!CHECK: %[[VAL_18:.*]] = call i32 @__kmpc_global_thread_num(ptr nonnull @1)
34+
!CHECK: call void @__kmpc_barrier(ptr nonnull @2, i32 %[[VAL_18]])
35+
!CHECK: ret void
36+
!CHECK: omp_loop.body: ; preds = %[[VAL_17]], %[[VAL_16]]
37+
!CHECK: %[[VAL_19:.*]] = phi i64 [ %[[VAL_20:.*]], %[[VAL_16]] ], [ 0, %[[VAL_17]] ]
38+
!CHECK: %[[VAL_21:.*]] = add i64 %[[VAL_19]], %[[VAL_10]]
39+
!CHECK: %[[VAL_22:.*]] = mul i64 %[[VAL_21]], 400
40+
!CHECK: %[[VAL_23:.*]] = getelementptr i8, ptr %[[VAL_3]], i64 %[[VAL_22]]
41+
!CHECK: br label %[[VAL_24:.*]]
42+
!CHECK: omp_loop.inc: ; preds = %[[VAL_24]]
43+
!CHECK: %[[VAL_20]] = add nuw i64 %[[VAL_19]], 1
44+
!CHECK: %[[VAL_25:.*]] = icmp eq i64 %[[VAL_19]], %[[VAL_12]]
45+
!CHECK: br i1 %[[VAL_25]], label %[[VAL_14]], label %[[VAL_15]]
46+
!CHECK: omp.loop_nest.region6: ; preds = %[[VAL_15]], %[[VAL_24]]
47+
!CHECK: %[[VAL_26:.*]] = phi i64 [ 0, %[[VAL_15]] ], [ %[[VAL_27:.*]], %[[VAL_24]] ]
48+
!CHECK: %[[VAL_28:.*]] = getelementptr i32, ptr %[[VAL_23]], i64 %[[VAL_26]]
49+
!CHECK: store i32 1, ptr %[[VAL_28]], align 4, !tbaa !4
50+
!CHECK: %[[VAL_27]] = add nuw nsw i64 %[[VAL_26]], 1
51+
!CHECK: %[[VAL_29:.*]] = icmp eq i64 %[[VAL_27]], 100
52+
!CHECK: br i1 %[[VAL_29]], label %[[VAL_16]], label %[[VAL_24]]

0 commit comments

Comments
 (0)