Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SYCL][NATIVECPU] Fix local scope module variables for native cpu #15280

Merged
merged 3 commits into from
Sep 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions llvm/lib/SYCLNativeCPUUtils/PipelineSYCLNativeCPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@
#include "compiler/utils/builtin_info.h"
#include "compiler/utils/define_mux_builtins_pass.h"
#include "compiler/utils/device_info.h"
#include "compiler/utils/encode_kernel_metadata_pass.h"
#include "compiler/utils/prepare_barriers_pass.h"
#include "compiler/utils/replace_local_module_scope_variables_pass.h"
#include "compiler/utils/sub_group_analysis.h"
#include "compiler/utils/work_item_loops_pass.h"
#include "vecz/pass.h"
Expand Down Expand Up @@ -60,6 +62,7 @@ void llvm::sycl::utils::addSYCLNativeCPUBackendPasses(
OptimizationLevel OptLevel) {
MPM.addPass(ConvertToMuxBuiltinsSYCLNativeCPUPass());
#ifdef NATIVECPU_USE_OCK
MPM.addPass(compiler::utils::TransferKernelMetadataPass());
// Always enable vectorizer, unless explictly disabled or -O0 is set.
if (OptLevel != OptimizationLevel::O0 && !SYCLNativeCPUNoVecz) {
MAM.registerPass([] { return vecz::TargetInfoAnalysis(); });
Expand Down Expand Up @@ -87,6 +90,7 @@ void llvm::sycl::utils::addSYCLNativeCPUBackendPasses(
MAM.registerPass([] { return compiler::utils::SubgroupAnalysis(); });
MPM.addPass(compiler::utils::PrepareBarriersPass());
MPM.addPass(compiler::utils::WorkItemLoopsPass(Opts));
MPM.addPass(compiler::utils::ReplaceLocalModuleScopeVariablesPass());
MPM.addPass(AlwaysInlinerPass());
#endif
MPM.addPass(PrepareSYCLNativeCPUPass());
Expand Down
48 changes: 11 additions & 37 deletions llvm/lib/SYCLNativeCPUUtils/PrepareSYCLNativeCPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -338,47 +338,21 @@ PreservedAnalyses PrepareSYCLNativeCPUPass::run(Module &M,
SmallSet<Function *, 5> RemovableFuncs;
SmallVector<Function *, 5> WrapperFuncs;

// Retrieve the wrapper functions created by the WorkItemLoop pass.
for (auto &OldF : OldKernels) {
std::optional<compiler::utils::LinkMetadataResult> VeczR =
compiler::utils::parseVeczToOrigFnLinkMetadata(*OldF);
if (VeczR && VeczR.value().first) {
WrapperFuncs.push_back(OldF);
} else {
auto Name = compiler::utils::getBaseFnNameOrFnName(*OldF);
if (Name != OldF->getName()) {
WrapperFuncs.push_back(OldF);
}
}
}

for (auto &OldF : WrapperFuncs) {
// If vectorization occurred, at this point we have a wrapper function
// that runs the vectorized kernel and peels using the scalar kernel. We
// make it so this wrapper steals the original kernel name.
std::optional<compiler::utils::LinkMetadataResult> VeczR =
compiler::utils::parseVeczToOrigFnLinkMetadata(*OldF);
if (VeczR && VeczR.value().first) {
auto ScalarF = VeczR.value().first;
OldF->takeName(ScalarF);
if (ScalarF->use_empty())
RemovableFuncs.insert(ScalarF);
} else {
// The WorkItemLoops pass created a wrapper function for the original
// kernel. If we have a kernel named foo(), the wrapper will be called
// foo-wrapper(), and will have the original kernel name retrieved by
// getBaseFnNameOrFnName. We set the name of the wrapper function
// to the original kernel name and add the original kernel to the
// list of functions that can be removed from the module.
auto Name = compiler::utils::getBaseFnNameOrFnName(*OldF);
Function *OrigF = M.getFunction(Name);
// that runs the vectorized kernel and peels using the scalar kernel.
// There may also be a wrapper for local variables replacement. We make it
// so this wrapper steals the original kernel name. Otherwise we will have
// a wrapper function from the work item loops. In this case we also steal
// the original kernel name.
auto Name = compiler::utils::getOrigFnName(*OldF);
Function *OrigF = M.getFunction(Name);
if (Name != OldF->getName()) {
if (OrigF != nullptr) {
// The original kernel is inlined by the WorkItemLoops
// pass if it contained barriers or group collectives, otherwise
// we don't want to (and can't) remove it.
if (OrigF->use_empty())
RemovableFuncs.insert(OrigF);
OldF->takeName(OrigF);
if (OrigF->use_empty()) {
RemovableFuncs.insert(OrigF);
}
} else {
OldF->setName(Name);
}
Expand Down
39 changes: 39 additions & 0 deletions sycl/test/check_device_code/native_cpu/local_module_scope.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
// REQUIRES: native_cpu_ock

// RUN: %clangxx -fsycl -fsycl-targets=native_cpu -Xclang -sycl-std=2020 -mllvm -sycl-opt -mllvm -inline-threshold=500 -mllvm -sycl-native-cpu-no-vecz -mllvm -sycl-native-dump-device-ir %s | FileCheck %s

// Check that local types structure is created and placed on the stack
// We also check that the attribute mux-orig-fn is created as this is needed to
// find the original function after this pass is run

// CHECK: %localVarTypes = type { ptr addrspace(1) }
// CHECK: define void @_ZTS4TestILi1ELi4EiE.NativeCPUKernel{{.*}} #[[ATTR:[0-9]*]]
// CHECK: alloca %localVarTypes
// CHECK: attributes #[[ATTR]] = {{.*}} "mux-orig-fn"="_ZTS4TestILi1ELi4EiE"

#include "sycl.hpp"

template <int dims, int size, typename T = int> struct Test;

int main() {
sycl::queue queue;

constexpr int dims = 1;
constexpr int size = 4;

std::array<int, size> data;

const auto range = sycl::range<dims>(size);
const auto range_wg = sycl::range<dims>(1);
{
sycl::buffer<int, dims> buf(data.data(), range);

queue.submit([&](sycl::handler &cgh) {
auto acc = sycl::accessor(buf, cgh, sycl::write_only);
cgh.parallel_for_work_group<Test<dims, size>>(
range, range_wg, [=](auto group) { acc[group.get_group_id()] = 42; });
});
queue.wait_and_throw();
}
return 0;
}
Loading