Skip to content

Commit ff88172

Browse files
committed
add -floop-fuse to clang and flang
1 parent 148c69d commit ff88172

File tree

15 files changed

+62
-2
lines changed

15 files changed

+62
-2
lines changed

clang/include/clang/Basic/CodeGenOptions.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -326,6 +326,7 @@ CODEGENOPT(TimeTrace , 1, 0) ///< Set when -ftime-trace is enabled.
326326
VALUE_CODEGENOPT(TimeTraceGranularity, 32, 500) ///< Minimum time granularity (in microseconds),
327327
///< traced by time profiler
328328
CODEGENOPT(InterchangeLoops , 1, 0) ///< Run loop-interchange.
329+
CODEGENOPT(FuseLoops , 1, 0) ///< Run loop-fuse.
329330
CODEGENOPT(UnrollLoops , 1, 0) ///< Control whether loops are unrolled.
330331
CODEGENOPT(RerollLoops , 1, 0) ///< Control whether loops are rerolled.
331332
CODEGENOPT(NoUseJumpTables , 1, 0) ///< Set when -fno-jump-tables is enabled.

clang/include/clang/Driver/Options.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4189,6 +4189,10 @@ def floop_interchange : Flag<["-"], "floop-interchange">, Group<f_Group>,
41894189
HelpText<"Enable the loop interchange pass">, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>;
41904190
def fno_loop_interchange: Flag<["-"], "fno-loop-interchange">, Group<f_Group>,
41914191
HelpText<"Disable the loop interchange pass">, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>;
4192+
def floop_fuse : Flag<["-"], "floop-fuse">, Group<f_Group>,
4193+
HelpText<"Enable the loop fuse pass">, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>;
4194+
def fno_loop_fuse: Flag<["-"], "fno-loop-fuse">, Group<f_Group>,
4195+
HelpText<"Disable the loop fuse pass">, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>;
41924196
def funroll_loops : Flag<["-"], "funroll-loops">, Group<f_Group>,
41934197
HelpText<"Turn on loop unroller">, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>;
41944198
def fno_unroll_loops : Flag<["-"], "fno-unroll-loops">, Group<f_Group>,

clang/lib/CodeGen/BackendUtil.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -898,6 +898,7 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
898898
PipelineTuningOptions PTO;
899899
PTO.LoopUnrolling = CodeGenOpts.UnrollLoops;
900900
PTO.LoopInterchange = CodeGenOpts.InterchangeLoops;
901+
PTO.LoopFuse = CodeGenOpts.FuseLoops;
901902
// For historical reasons, loop interleaving is set to mirror setting for loop
902903
// unrolling.
903904
PTO.LoopInterleaving = CodeGenOpts.UnrollLoops;
@@ -1339,6 +1340,7 @@ runThinLTOBackend(CompilerInstance &CI, ModuleSummaryIndex *CombinedIndex,
13391340
Conf.SampleProfile = std::move(SampleProfile);
13401341
Conf.PTO.LoopUnrolling = CGOpts.UnrollLoops;
13411342
Conf.PTO.LoopInterchange = CGOpts.InterchangeLoops;
1343+
Conf.PTO.LoopFuse = CGOpts.FuseLoops;
13421344
// For historical reasons, loop interleaving is set to mirror setting for loop
13431345
// unrolling.
13441346
Conf.PTO.LoopInterleaving = CGOpts.UnrollLoops;

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7030,6 +7030,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
70307030
options::OPT_fno_unroll_loops);
70317031
Args.AddLastArg(CmdArgs, options::OPT_floop_interchange,
70327032
options::OPT_fno_loop_interchange);
7033+
Args.AddLastArg(CmdArgs, options::OPT_floop_fuse, options::OPT_fno_loop_fuse);
70337034

70347035
Args.AddLastArg(CmdArgs, options::OPT_fstrict_flex_arrays_EQ);
70357036

clang/lib/Driver/ToolChains/CommonArgs.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3157,7 +3157,7 @@ void tools::handleVectorizeSLPArgs(const ArgList &Args,
31573157

31583158
void tools::handleInterchangeLoopsArgs(const ArgList &Args,
31593159
ArgStringList &CmdArgs) {
3160-
// FIXME: instead of relying on shouldEnableVectorizerAtOLevel, we may want to
3160+
// FIXME: Instead of relying on shouldEnableVectorizerAtOLevel, we may want to
31613161
// implement a separate function to infer loop interchange from opt level.
31623162
// For now, enable loop-interchange at the same opt levels as loop-vectorize.
31633163
bool EnableInterchange = shouldEnableVectorizerAtOLevel(Args, false);

clang/lib/Driver/ToolChains/Flang.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,8 @@ void Flang::addCodegenOptions(const ArgList &Args,
151151
!stackArrays->getOption().matches(options::OPT_fno_stack_arrays))
152152
CmdArgs.push_back("-fstack-arrays");
153153

154+
Args.AddLastArg(CmdArgs, options::OPT_floop_fuse, options::OPT_fno_loop_fuse);
155+
154156
handleInterchangeLoopsArgs(Args, CmdArgs);
155157
handleVectorizeLoopsArgs(Args, CmdArgs);
156158
handleVectorizeSLPArgs(Args, CmdArgs);

clang/lib/Frontend/CompilerInvocation.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1648,6 +1648,11 @@ void CompilerInvocationBase::GenerateCodeGenArgs(const CodeGenOptions &Opts,
16481648
else
16491649
GenerateArg(Consumer, OPT_fno_loop_interchange);
16501650

1651+
if (Opts.FuseLoops)
1652+
GenerateArg(Consumer, OPT_floop_fuse);
1653+
else
1654+
GenerateArg(Consumer, OPT_fno_loop_fuse);
1655+
16511656
if (!Opts.BinutilsVersion.empty())
16521657
GenerateArg(Consumer, OPT_fbinutils_version_EQ, Opts.BinutilsVersion);
16531658

@@ -1963,6 +1968,7 @@ bool CompilerInvocation::ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args,
19631968
(Opts.OptimizationLevel > 1));
19641969
Opts.InterchangeLoops =
19651970
Args.hasFlag(OPT_floop_interchange, OPT_fno_loop_interchange, false);
1971+
Opts.FuseLoops = Args.hasFlag(OPT_floop_fuse, OPT_fno_loop_fuse, false);
19661972
Opts.BinutilsVersion =
19671973
std::string(Args.getLastArgValue(OPT_fbinutils_version_EQ));
19681974

clang/test/Driver/clang_f_opts.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,13 @@
5252
// CHECK-INTERCHANGE-LOOPS: "-floop-interchange"
5353
// CHECK-NO-INTERCHANGE-LOOPS: "-fno-loop-interchange"
5454

55+
// RUN: %clang -### -S -floop-fuse %s 2>&1 | FileCheck -check-prefix=CHECK-FUSE-LOOPS %s
56+
// RUN: %clang -### -S -fno-loop-fuse %s 2>&1 | FileCheck -check-prefix=CHECK-NO-FUSE-LOOPS %s
57+
// RUN: %clang -### -S -fno-loop-fuse -floop-fuse %s 2>&1 | FileCheck -check-prefix=CHECK-FUSE-LOOPS %s
58+
// RUN: %clang -### -S -floop-fuse -fno-loop-fuse %s 2>&1 | FileCheck -check-prefix=CHECK-NO-FUSE-LOOPS %s
59+
// CHECK-FUSE-LOOPS: "-floop-fuse"
60+
// CHECK-NO-FUSE-LOOPS: "-fno-loop-fuse"
61+
5562
// RUN: %clang -### -S -fprofile-sample-accurate %s 2>&1 | FileCheck -check-prefix=CHECK-PROFILE-SAMPLE-ACCURATE %s
5663
// CHECK-PROFILE-SAMPLE-ACCURATE: "-fprofile-sample-accurate"
5764

flang/docs/ReleaseNotes.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ page](https://llvm.org/releases/).
3434

3535
* -floop-interchange is now recognized by flang.
3636
* -floop-interchange is enabled by default at -O2 and above.
37+
* -floop-fuse is now recognized by flang.
3738

3839
## Windows Support
3940

flang/include/flang/Frontend/CodeGenOptions.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ CODEGENOPT(StackArrays, 1, 0) ///< -fstack-arrays (enable the stack-arrays pass)
3636
CODEGENOPT(VectorizeLoop, 1, 0) ///< Enable loop vectorization.
3737
CODEGENOPT(VectorizeSLP, 1, 0) ///< Enable SLP vectorization.
3838
CODEGENOPT(InterchangeLoops, 1, 0) ///< Enable loop interchange.
39+
CODEGENOPT(FuseLoops, 1, 0) ///< Enable loop fuse.
3940
CODEGENOPT(LoopVersioning, 1, 0) ///< Enable loop versioning.
4041
CODEGENOPT(UnrollLoops, 1, 0) ///< Enable loop unrolling
4142
CODEGENOPT(AliasAnalysis, 1, 0) ///< Enable alias analysis pass

flang/lib/Frontend/CompilerInvocation.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,9 @@ static void parseCodeGenArgs(Fortran::frontend::CodeGenOptions &opts,
273273
if (args.getLastArg(clang::driver::options::OPT_floop_interchange))
274274
opts.InterchangeLoops = 1;
275275

276+
if (args.getLastArg(clang::driver::options::OPT_floop_fuse))
277+
opts.FuseLoops = 1;
278+
276279
if (args.getLastArg(clang::driver::options::OPT_vectorize_loops))
277280
opts.VectorizeLoop = 1;
278281

flang/lib/Frontend/FrontendActions.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -925,6 +925,7 @@ void CodeGenAction::runOptimizationPipeline(llvm::raw_pwrite_stream &os) {
925925
si.getTimePasses().setOutStream(ci.getTimingStreamLLVM());
926926
pto.LoopUnrolling = opts.UnrollLoops;
927927
pto.LoopInterchange = opts.InterchangeLoops;
928+
pto.LoopFuse = opts.FuseLoops;
928929
pto.LoopInterleaving = opts.UnrollLoops;
929930
pto.LoopVectorization = opts.VectorizeLoop;
930931
pto.SLPVectorization = opts.VectorizeSLP;

flang/test/Driver/loop-fuse.f90

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
! RUN: %flang -### -S -floop-fuse %s 2>&1 | FileCheck -check-prefix=CHECK-LOOP-FUSE %s
2+
! RUN: %flang -### -S -fno-loop-fuse %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-FUSE %s
3+
! RUN: %flang -### -S -O0 %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-FUSE %s
4+
! RUN: %flang -### -S -O1 %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-FUSE %s
5+
! RUN: %flang -### -S -O2 %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-FUSE %s
6+
! RUN: %flang -### -S -O3 %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-FUSE %s
7+
! RUN: %flang -### -S -Os %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-FUSE %s
8+
! RUN: %flang -### -S -Oz %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-FUSE %s
9+
! CHECK-LOOP-FUSE: "-floop-fuse"
10+
! CHECK-NO-LOOP-FUSE-NOT: "-floop-fuse"
11+
! RUN: %flang_fc1 -emit-llvm -O2 -floop-fuse -mllvm -print-pipeline-passes -o /dev/null %s 2>&1 | FileCheck -check-prefix=CHECK-LOOP-FUSE-PASS %s
12+
! RUN: %flang_fc1 -emit-llvm -O2 -fno-loop-fuse -mllvm -print-pipeline-passes -o /dev/null %s 2>&1 | FileCheck -check-prefix=CHECK-NO-LOOP-FUSE-PASS %s
13+
! CHECK-LOOP-FUSE-PASS: loop-fusion
14+
! CHECK-NO-LOOP-FUSE-PASS-NOT: loop-fusion
15+
16+
program test
17+
end program

llvm/include/llvm/Passes/PassBuilder.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,9 @@ class PipelineTuningOptions {
6464
/// false.
6565
bool LoopInterchange;
6666

67+
/// Tuning option to enable/disable loop fuse. Its default value is false.
68+
bool LoopFuse;
69+
6770
/// Tuning option to forget all SCEV loops in LoopUnroll. Its default value
6871
/// is that of the flag: `-forget-scev-loop-unroll`.
6972
bool ForgetAllSCEVInLoopUnroll;

llvm/lib/Passes/PassBuilderPipelines.cpp

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@
104104
#include "llvm/Transforms/Scalar/LoopDeletion.h"
105105
#include "llvm/Transforms/Scalar/LoopDistribute.h"
106106
#include "llvm/Transforms/Scalar/LoopFlatten.h"
107+
#include "llvm/Transforms/Scalar/LoopFuse.h"
107108
#include "llvm/Transforms/Scalar/LoopIdiomRecognize.h"
108109
#include "llvm/Transforms/Scalar/LoopInstSimplify.h"
109110
#include "llvm/Transforms/Scalar/LoopInterchange.h"
@@ -205,6 +206,10 @@ static cl::opt<bool>
205206
EnableLoopInterchange("enable-loopinterchange", cl::init(false), cl::Hidden,
206207
cl::desc("Enable the LoopInterchange Pass"));
207208

209+
static cl::opt<bool> EnableLoopFuse("enable-loopfuse", cl::init(false),
210+
cl::Hidden,
211+
cl::desc("Enable the LoopFuse Pass"));
212+
208213
static cl::opt<bool> EnableUnrollAndJam("enable-unroll-and-jam",
209214
cl::init(false), cl::Hidden,
210215
cl::desc("Enable Unroll And Jam Pass"));
@@ -314,6 +319,7 @@ PipelineTuningOptions::PipelineTuningOptions() {
314319
SLPVectorization = false;
315320
LoopUnrolling = true;
316321
LoopInterchange = EnableLoopInterchange;
322+
LoopFuse = EnableLoopFuse;
317323
ForgetAllSCEVInLoopUnroll = ForgetSCEVInLoopUnroll;
318324
LicmMssaOptCap = SetLicmMssaOptCap;
319325
LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap;
@@ -518,6 +524,9 @@ PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
518524

519525
invokeLoopOptimizerEndEPCallbacks(LPM2, Level);
520526

527+
if (PTO.LoopFuse)
528+
FPM.addPass(LoopFusePass());
529+
521530
FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
522531
/*UseMemorySSA=*/true,
523532
/*UseBlockFrequencyInfo=*/true));
@@ -709,6 +718,9 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
709718

710719
invokeLoopOptimizerEndEPCallbacks(LPM2, Level);
711720

721+
if (PTO.LoopFuse)
722+
FPM.addPass(LoopFusePass());
723+
712724
FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
713725
/*UseMemorySSA=*/true,
714726
/*UseBlockFrequencyInfo=*/true));
@@ -2112,7 +2124,6 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
21122124
LPM.addPass(LoopFlattenPass());
21132125
LPM.addPass(IndVarSimplifyPass());
21142126
LPM.addPass(LoopDeletionPass());
2115-
// FIXME: Add loop interchange.
21162127

21172128
// Unroll small loops and perform peeling.
21182129
LPM.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),

0 commit comments

Comments
 (0)