Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions clang/include/clang/Basic/LangOptions.def
Original file line number Diff line number Diff line change
Expand Up @@ -251,8 +251,7 @@ LANGOPT(OpenMPOptimisticCollapse , 1, 0, NotCompatible, "Use at most 32 bits to
LANGOPT(OpenMPThreadSubscription , 1, 0, NotCompatible, "Assume work-shared loops do not have more iterations than participating threads.")
LANGOPT(OpenMPTeamSubscription , 1, 0, NotCompatible, "Assume distributed loops do not have more iterations than participating teams.")
LANGOPT(OpenMPNoThreadState , 1, 0, NotCompatible, "Assume that no thread in a parallel region will modify an ICV.")
LANGOPT(OpenMPNoNestedParallelism , 1, 0, NotCompatible, "Assume that no thread in a parallel region will encounter a parallel region")
LANGOPT(OpenMPOffloadMandatory , 1, 0, NotCompatible, "Assert that offloading is mandatory and do not create a host fallback.")
LANGOPT(OpenMPNoNestedParallelism , 1, 0, NotCompatible, "Assume that no thread in a parallel region will encounter a parallel region")LANGOPT(OpenMPOffloadMandatory , 1, 0, NotCompatible, "Assert that offloading is mandatory and do not create a host fallback.")
LANGOPT(OpenMPForceUSM , 1, 0, NotCompatible, "Enable OpenMP unified shared memory mode via compiler.")
LANGOPT(OpenMPKernelIO , 1, 1, NotCompatible, "Enable OpenMP host-exec Device IO.")

Expand Down
37 changes: 19 additions & 18 deletions clang/include/clang/Options/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -3942,16 +3942,6 @@ def fopenmp_gpu_threads_per_team_EQ : Joined<["-"], "fopenmp-gpu-threads-per-tea
Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[ClangOption, CC1Option]>;
def fopenmp_target_xteam_reduction_blocksize_EQ : Joined<["-"], "fopenmp-target-xteam-reduction-blocksize=">, Group<f_Group>,
Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[ClangOption, CC1Option, FlangOption]>;
def fopenmp_target_fast : Flag<["-"], "fopenmp-target-fast">, Group<f_Group>, Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[ClangOption, CC1Option, FlangOption]>;
def fno_openmp_target_fast : Flag<["-"], "fno-openmp-target-fast">, Group<f_Group>, Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[ClangOption, CC1Option, FlangOption]>;
def fopenmp_target_ignore_env_vars : Flag<["-"], "fopenmp-target-ignore-env-vars">, Group<f_Group>,
Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[ClangOption, CC1Option, FlangOption]>,
HelpText<"Assert that device related environment variables can be ignored while generating code">,
MarshallingInfoFlag<LangOpts<"OpenMPTargetIgnoreEnvVars">>;
def fno_openmp_target_ignore_env_vars : Flag<["-"], "fno-openmp-target-ignore-env-vars">, Group<f_Group>,
Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[ClangOption, CC1Option, FlangOption]>,
HelpText<"Assert that device related environment variables cannot be ignored while generating code">,
MarshallingInfoFlag<LangOpts<"OpenMPTargetIgnoreEnvVars">>;
def fopenmp_target_big_jump_loop : Flag<["-"], "fopenmp-target-big-jump-loop">, Group<f_Group>,
Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[ClangOption, CC1Option, FlangOption]>,
HelpText<"Use the big-jump-loop code generation technique if possible">,
Expand Down Expand Up @@ -4080,20 +4070,16 @@ def fno_openmp_assume_teams_oversubscription : Flag<["-"], "fno-openmp-assume-te
HelpText<"Do not assume teams oversubscription.">;
def fno_openmp_assume_threads_oversubscription : Flag<["-"], "fno-openmp-assume-threads-oversubscription">,
HelpText<"Do not assume threads oversubscription.">;
def fno_openmp_assume_no_thread_state : Flag<["-"], "fno-openmp-assume-no-thread-state">, Group<f_Group>,
Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[ClangOption, CC1Option]>,
HelpText<"Assert that a thread in a parallel region may modify an ICV">,
MarshallingInfoFlag<LangOpts<"OpenMPNoThreadState">>;
def fopenmp_assume_no_thread_state : Flag<["-"], "fopenmp-assume-no-thread-state">,
HelpText<"Assert no thread in a parallel region modifies an ICV">,
MarshallingInfoFlag<LangOpts<"OpenMPNoThreadState">>;
def fno_openmp_assume_no_thread_state : Flag<["-"], "fno-openmp-assume-no-thread-state">,
HelpText<"Assert that a thread in a parallel region may modify an ICV">;
def fopenmp_assume_no_nested_parallelism : Flag<["-"], "fopenmp-assume-no-nested-parallelism">,
HelpText<"Assert no nested parallel regions in the GPU">,
MarshallingInfoFlag<LangOpts<"OpenMPNoNestedParallelism">>;
def fno_openmp_assume_no_nested_parallelism : Flag<["-"], "fno-openmp-assume-no-nested-parallelism">, Group<f_Group>,
Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[ClangOption, CC1Option]>,
HelpText<"Assert that a nested parallel region may be used in the GPU">,
MarshallingInfoFlag<LangOpts<"OpenMPNoNestedParallelism">>;
def fno_openmp_assume_no_nested_parallelism : Flag<["-"], "fno-openmp-assume-no-nested-parallelism">,
HelpText<"Assert that a nested parallel region may be used in the GPU">;

} // let Group = f_Group
} // let Visibility = [ClangOption, CC1Option, FC1Option]
Expand All @@ -4117,6 +4103,21 @@ def fopenmp_target_new_runtime : Flag<["-"], "fopenmp-target-new-runtime">,
Group<f_Group>, Flags<[HelpHidden]>, Visibility<[ClangOption, CC1Option]>;
def fno_openmp_target_new_runtime : Flag<["-"], "fno-openmp-target-new-runtime">,
Group<f_Group>, Flags<[HelpHidden]>, Visibility<[ClangOption, CC1Option]>;
def fopenmp_target_ignore_env_vars : Flag<["-"], "fopenmp-target-ignore-env-vars">,
Group<f_Group>, Flags<[NoArgumentUnused, HelpHidden]>,
Visibility<[ClangOption, CC1Option, FlangOption]>,
HelpText<"Assume that the OpenMP runtime can ignore environment variables during code generation for GPU offload">,
MarshallingInfoFlag<LangOpts<"OpenMPTargetIgnoreEnvVars">>;
def fno_openmp_target_ignore_env_vars : Flag<["-"], "fno-openmp-target-ignore-env-vars">,
Group<f_Group>, Flags<[NoArgumentUnused, HelpHidden]>,
Visibility<[ClangOption, CC1Option, FlangOption]>;
def fopenmp_target_fast : Flag<["-"], "fopenmp-target-fast">,
Group<f_Group>, Flags<[NoArgumentUnused, HelpHidden]>,
Visibility<[ClangOption, CC1Option, FlangOption]>,
HelpText<"Convenience flag to enable aggressive OpenMP GPU optimizations">;
def fno_openmp_target_fast : Flag<["-"], "fno-openmp-target-fast">,
Group<f_Group>, Flags<[NoArgumentUnused, HelpHidden]>,
Visibility<[ClangOption, CC1Option, FlangOption]>;
defm openmp_optimistic_collapse : BoolFOption<"openmp-optimistic-collapse",
LangOpts<"OpenMPOptimisticCollapse">, DefaultFalse,
PosFlag<SetTrue, [], [ClangOption, CC1Option]>,
Expand Down
59 changes: 49 additions & 10 deletions clang/lib/Driver/ToolChains/Clang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6763,6 +6763,11 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
options::OPT_fno_offload_via_llvm, false) &&
(JA.isDeviceOffloading(Action::OFK_None) ||
JA.isDeviceOffloading(Action::OFK_OpenMP))) {

// Determine if target-fast optimizations should be enabled
bool TargetFastUsed =
Args.hasFlag(options::OPT_fopenmp_target_fast,
options::OPT_fno_openmp_target_fast, OFastEnabled);
switch (D.getOpenMPRuntime(Args)) {
case Driver::OMPRT_OMP:
case Driver::OMPRT_IOMP5:
Expand Down Expand Up @@ -6879,19 +6884,53 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
/*Default=*/false))
CmdArgs.push_back("-fopenmp-assume-threads-oversubscription");

if (Args.hasFlag(options::OPT_fopenmp_assume_no_thread_state,
options::OPT_fno_openmp_assume_no_thread_state,
isTargetFastUsed(Args)))
// Handle -fopenmp-target-fast
if (Arg *A = Args.getLastArg(options::OPT_fopenmp_target_fast,
options::OPT_fno_openmp_target_fast)) {
if (A->getOption().matches(options::OPT_fopenmp_target_fast))
CmdArgs.push_back("-fopenmp-target-fast");
else
CmdArgs.push_back("-fno-openmp-target-fast");
} else if (OFastEnabled) {
CmdArgs.push_back("-fopenmp-target-fast");
}

// Handle -fopenmp-target-ignore-env-vars (implied by target-fast)
if (Arg *A =
Args.getLastArg(options::OPT_fopenmp_target_ignore_env_vars,
options::OPT_fno_openmp_target_ignore_env_vars)) {
if (A->getOption().matches(options::OPT_fopenmp_target_ignore_env_vars))
CmdArgs.push_back("-fopenmp-target-ignore-env-vars");
else
CmdArgs.push_back("-fno-openmp-target-ignore-env-vars");
} else if (TargetFastUsed) {
CmdArgs.push_back("-fopenmp-target-ignore-env-vars");
}

// Handle -fopenmp-assume-no-thread-state (implied by target-fast)
if (Arg *A =
Args.getLastArg(options::OPT_fopenmp_assume_no_thread_state,
options::OPT_fno_openmp_assume_no_thread_state)) {
if (A->getOption().matches(options::OPT_fopenmp_assume_no_thread_state))
CmdArgs.push_back("-fopenmp-assume-no-thread-state");
else
CmdArgs.push_back("-fno-openmp-assume-no-thread-state");
} else if (TargetFastUsed) {
CmdArgs.push_back("-fopenmp-assume-no-thread-state");
else
CmdArgs.push_back("-fno-openmp-assume-no-thread-state");
}

if (Args.hasFlag(options::OPT_fopenmp_assume_no_nested_parallelism,
options::OPT_fno_openmp_assume_no_nested_parallelism,
isTargetFastUsed(Args)))
// Handle -fopenmp-assume-no-nested-parallelism (implied by target-fast)
if (Arg *A = Args.getLastArg(
options::OPT_fopenmp_assume_no_nested_parallelism,
options::OPT_fno_openmp_assume_no_nested_parallelism)) {
if (A->getOption().matches(
options::OPT_fopenmp_assume_no_nested_parallelism))
CmdArgs.push_back("-fopenmp-assume-no-nested-parallelism");
else
CmdArgs.push_back("-fno-openmp-assume-no-nested-parallelism");
} else if (TargetFastUsed) {
CmdArgs.push_back("-fopenmp-assume-no-nested-parallelism");
else
CmdArgs.push_back("-fno-openmp-assume-no-nested-parallelism");
}

if (Args.hasArg(options::OPT_fopenmp_offload_mandatory))
CmdArgs.push_back("-fopenmp-offload-mandatory");
Expand Down
Loading