diff --git a/sycl/source/detail/context_impl.cpp b/sycl/source/detail/context_impl.cpp index 0bf73f191a8d0..4b417f2e6d13c 100644 --- a/sycl/source/detail/context_impl.cpp +++ b/sycl/source/detail/context_impl.cpp @@ -340,9 +340,9 @@ void context_impl::addDeviceGlobalInitializer( } } -std::vector -context_impl::initializeDeviceGlobals(ur_program_handle_t NativePrg, - queue_impl &QueueImpl) { +std::vector context_impl::initializeDeviceGlobals( + ur_program_handle_t NativePrg, queue_impl &QueueImpl, + detail::kernel_bundle_impl *KernelBundleImplPtr) { if (!MDeviceGlobalNotInitializedCnt.load(std::memory_order_acquire)) return {}; @@ -396,6 +396,12 @@ context_impl::initializeDeviceGlobals(ur_program_handle_t NativePrg, detail::ProgramManager::getInstance().getDeviceGlobalEntries( DeviceGlobalIds, /*ExcludeDeviceImageScopeDecorated=*/true); + // Kernel bundles may have isolated device globals. They need to be + // initialized too. + if (KernelBundleImplPtr && KernelBundleImplPtr->getDeviceGlobalMap().size()) + KernelBundleImplPtr->getDeviceGlobalMap().getEntries( + DeviceGlobalIds, /*ExcludeDeviceImageScopeDecorated=*/true, + DeviceGlobalEntries); // If there were no device globals without device_image_scope the device // globals are trivially fully initialized and we can end early. diff --git a/sycl/source/detail/context_impl.hpp b/sycl/source/detail/context_impl.hpp index 24a19f0a9c674..a38950948fdf5 100644 --- a/sycl/source/detail/context_impl.hpp +++ b/sycl/source/detail/context_impl.hpp @@ -223,7 +223,8 @@ class context_impl : public std::enable_shared_from_this { /// Initializes device globals for a program on the associated queue. std::vector - initializeDeviceGlobals(ur_program_handle_t NativePrg, queue_impl &QueueImpl); + initializeDeviceGlobals(ur_program_handle_t NativePrg, queue_impl &QueueImpl, + detail::kernel_bundle_impl *KernelBundleImplPtr); void memcpyToHostOnlyDeviceGlobal(device_impl &DeviceImpl, const void *DeviceGlobalPtr, diff --git a/sycl/source/detail/device_global_map.hpp b/sycl/source/detail/device_global_map.hpp index 42b63fe3abb56..256c48066ec87 100644 --- a/sycl/source/detail/device_global_map.hpp +++ b/sycl/source/detail/device_global_map.hpp @@ -16,6 +16,7 @@ #include #include #include +#include namespace sycl { inline namespace _V1 { @@ -23,9 +24,22 @@ namespace detail { class DeviceGlobalMap { public: + DeviceGlobalMap(bool OwnerControlledCleanup) + : MOwnerControlledCleanup{OwnerControlledCleanup} {} + + ~DeviceGlobalMap() { + if (!MOwnerControlledCleanup) + for (auto &DeviceGlobalIt : MDeviceGlobals) + DeviceGlobalIt.second->cleanup(); + } + void initializeEntries(const RTDeviceBinaryImage *Img) { - const auto &DeviceGlobals = Img->getDeviceGlobals(); std::lock_guard DeviceGlobalsGuard(MDeviceGlobalsMutex); + initializeEntriesLockless(Img); + } + + void initializeEntriesLockless(const RTDeviceBinaryImage *Img) { + const auto &DeviceGlobals = Img->getDeviceGlobals(); for (const sycl_device_binary_property &DeviceGlobal : DeviceGlobals) { ByteArray DeviceGlobalInfo = DeviceBinaryProperty(DeviceGlobal).asByteArray(); @@ -102,9 +116,16 @@ class DeviceGlobalMap { return Entry->second; } - DeviceGlobalMapEntry *tryGetEntry(const std::string &UniqueId, - bool ExcludeDeviceImageScopeDecorated) { + DeviceGlobalMapEntry * + tryGetEntry(const std::string &UniqueId, + bool ExcludeDeviceImageScopeDecorated = false) { std::lock_guard DeviceGlobalsGuard(MDeviceGlobalsMutex); + return tryGetEntryLockless(UniqueId, ExcludeDeviceImageScopeDecorated); + } + + DeviceGlobalMapEntry * + tryGetEntryLockless(const std::string &UniqueId, + bool ExcludeDeviceImageScopeDecorated = false) const { auto DeviceGlobalEntry = MDeviceGlobals.find(UniqueId); if (DeviceGlobalEntry != MDeviceGlobals.end() && (!ExcludeDeviceImageScopeDecorated || @@ -113,22 +134,17 @@ class DeviceGlobalMap { return nullptr; } - std::vector - getEntries(const std::vector &UniqueIds, - bool ExcludeDeviceImageScopeDecorated) { - std::vector FoundEntries; - FoundEntries.reserve(UniqueIds.size()); - + void getEntries(const std::vector &UniqueIds, + bool ExcludeDeviceImageScopeDecorated, + std::vector &OutVec) { std::lock_guard DeviceGlobalsGuard(MDeviceGlobalsMutex); for (const std::string &UniqueId : UniqueIds) { auto DeviceGlobalEntry = MDeviceGlobals.find(UniqueId); - assert(DeviceGlobalEntry != MDeviceGlobals.end() && - "Device global not found in map."); - if (!ExcludeDeviceImageScopeDecorated || - !DeviceGlobalEntry->second->MIsDeviceImageScopeDecorated) - FoundEntries.push_back(DeviceGlobalEntry->second.get()); + if (DeviceGlobalEntry != MDeviceGlobals.end() && + (!ExcludeDeviceImageScopeDecorated || + !DeviceGlobalEntry->second->MIsDeviceImageScopeDecorated)) + OutVec.push_back(DeviceGlobalEntry->second.get()); } - return FoundEntries; } const std::unordered_map @@ -143,6 +159,12 @@ class DeviceGlobalMap { } private: + // Indicates whether the owner will explicitly cleanup the entries. If false + // the dtor of DeviceGlobalMap will cleanup the entries. + // Note: This lets the global device global map avoid overhead at shutdown and + // instead let the contexts own the associated entries. + bool MOwnerControlledCleanup = true; + // Maps between device_global identifiers and associated information. std::unordered_map> MDeviceGlobals; diff --git a/sycl/source/detail/device_global_map_entry.cpp b/sycl/source/detail/device_global_map_entry.cpp index 78bd59a9ef795..2cb9e570a5c92 100644 --- a/sycl/source/detail/device_global_map_entry.cpp +++ b/sycl/source/detail/device_global_map_entry.cpp @@ -68,22 +68,33 @@ DeviceGlobalMapEntry::getOrAllocateDeviceGlobalUSM(queue_impl &QueueImpl) { { std::lock_guard Lock(NewAlloc.MInitEventMutex); ur_event_handle_t InitEvent; - // C++ guarantees members appear in memory in the order they are declared, - // so since the member variable that contains the initial contents of the - // device_global is right after the usm_ptr member variable we can do - // some pointer arithmetic to memcopy over this value to the usm_ptr. This - // value inside of the device_global will be zero-initialized if it was not - // given a value on construction. - - MemoryManager::copy_usm(reinterpret_cast( - reinterpret_cast(MDeviceGlobalPtr) + - sizeof(MDeviceGlobalPtr)), - QueueImpl, MDeviceGlobalTSize, NewAlloc.MPtr, - std::vector{}, &InitEvent); + if (MDeviceGlobalPtr) { + // C++ guarantees members appear in memory in the order they are declared, + // so since the member variable that contains the initial contents of the + // device_global is right after the usm_ptr member variable we can do + // some pointer arithmetic to memcopy over this value to the usm_ptr. This + // value inside of the device_global will be zero-initialized if it was + // not given a value on construction. + MemoryManager::copy_usm( + reinterpret_cast( + reinterpret_cast(MDeviceGlobalPtr) + + sizeof(MDeviceGlobalPtr)), + QueueImpl, MDeviceGlobalTSize, NewAlloc.MPtr, + std::vector{}, &InitEvent); + } else { + // For SYCLBIN device globals we do not have a host pointer to copy from, + // so instead we fill the USM memory with 0's. + MemoryManager::fill_usm(NewAlloc.MPtr, QueueImpl, MDeviceGlobalTSize, + {static_cast(0)}, {}, &InitEvent); + } NewAlloc.MInitEvent = InitEvent; } - CtxImpl.addAssociatedDeviceGlobal(MDeviceGlobalPtr); + // Only device globals with host variables need to be registered with the + // context. The rest will be managed by their kernel bundles and cleaned up + // accordingly. + if (MDeviceGlobalPtr) + CtxImpl.addAssociatedDeviceGlobal(MDeviceGlobalPtr); return NewAlloc; } @@ -111,19 +122,32 @@ DeviceGlobalMapEntry::getOrAllocateDeviceGlobalUSM(const context &Context) { "USM allocation for device and context already happened."); DeviceGlobalUSMMem &NewAlloc = NewAllocIt.first->second; - // C++ guarantees members appear in memory in the order they are declared, - // so since the member variable that contains the initial contents of the - // device_global is right after the usm_ptr member variable we can do - // some pointer arithmetic to memcopy over this value to the usm_ptr. This - // value inside of the device_global will be zero-initialized if it was not - // given a value on construction. - MemoryManager::context_copy_usm( - reinterpret_cast( - reinterpret_cast(MDeviceGlobalPtr) + - sizeof(MDeviceGlobalPtr)), - &CtxImpl, MDeviceGlobalTSize, NewAlloc.MPtr); - - CtxImpl.addAssociatedDeviceGlobal(MDeviceGlobalPtr); + if (MDeviceGlobalPtr) { + // C++ guarantees members appear in memory in the order they are declared, + // so since the member variable that contains the initial contents of the + // device_global is right after the usm_ptr member variable we can do + // some pointer arithmetic to memcopy over this value to the usm_ptr. This + // value inside of the device_global will be zero-initialized if it was not + // given a value on construction. + MemoryManager::context_copy_usm( + reinterpret_cast( + reinterpret_cast(MDeviceGlobalPtr) + + sizeof(MDeviceGlobalPtr)), + &CtxImpl, MDeviceGlobalTSize, NewAlloc.MPtr); + } else { + // For SYCLBIN device globals we do not have a host pointer to copy from, + // so instead we fill the USM memory with 0's. + std::vector ImmBuff(MDeviceGlobalTSize, + static_cast(0)); + MemoryManager::context_copy_usm(ImmBuff.data(), &CtxImpl, + MDeviceGlobalTSize, NewAlloc.MPtr); + } + + // Only device globals with host variables need to be registered with the + // context. The rest will be managed by their kernel bundles and cleaned up + // accordingly. + if (MDeviceGlobalPtr) + CtxImpl.addAssociatedDeviceGlobal(MDeviceGlobalPtr); return NewAlloc; } @@ -150,6 +174,30 @@ void DeviceGlobalMapEntry::removeAssociatedResources( } } +void DeviceGlobalMapEntry::cleanup() { + std::lock_guard Lock{MDeviceToUSMPtrMapMutex}; + assert(MDeviceGlobalPtr == nullptr && + "Entry has host variable, so it should be associated with a context " + "and should be cleaned up by its dtor."); + for (auto &USMPtrIt : MDeviceToUSMPtrMap) { + // The context should be alive through the kernel_bundle owning these + // device_global entries. + const context_impl *CtxImpl = USMPtrIt.first.second; + DeviceGlobalUSMMem &USMMem = USMPtrIt.second; + detail::usm::freeInternal(USMMem.MPtr, CtxImpl); + if (USMMem.MInitEvent.has_value()) + CtxImpl->getAdapter()->call( + *USMMem.MInitEvent); +#ifndef NDEBUG + // For debugging we set the event and memory to some recognizable values + // to allow us to check that this cleanup happens before erasure. + USMMem.MPtr = nullptr; + USMMem.MInitEvent = {}; +#endif + } + MDeviceToUSMPtrMap.clear(); +} + } // namespace detail } // namespace _V1 } // namespace sycl diff --git a/sycl/source/detail/device_global_map_entry.hpp b/sycl/source/detail/device_global_map_entry.hpp index 5f020f1358e8b..3623e315ed9df 100644 --- a/sycl/source/detail/device_global_map_entry.hpp +++ b/sycl/source/detail/device_global_map_entry.hpp @@ -120,6 +120,13 @@ struct DeviceGlobalMapEntry { // Removes resources for device_globals associated with the context. void removeAssociatedResources(const context_impl *CtxImpl); + // Cleans up the USM memory and intialization events associated with this + // entry. This should only be called when the device global entry is not + // owned by the program manager, as otherwise it will be bound to the lifetime + // of the owner context and will be cleaned up through + // removeAssociatedResources. + void cleanup(); + private: // Map from a device and a context to the associated USM allocation for the // device_global. This should always be empty if MIsDeviceImageScopeDecorated diff --git a/sycl/source/detail/device_image_impl.hpp b/sycl/source/detail/device_image_impl.hpp index f0050e7aa0918..f265d01c91a93 100644 --- a/sycl/source/detail/device_image_impl.hpp +++ b/sycl/source/detail/device_image_impl.hpp @@ -84,20 +84,15 @@ class ManagedDeviceGlobalsRegistry { bool hasDeviceGlobalName(const std::string &Name) const noexcept { return !MDeviceGlobalNames.empty() && std::find(MDeviceGlobalNames.begin(), MDeviceGlobalNames.end(), - mangleDeviceGlobalName(Name)) != MDeviceGlobalNames.end(); + Name) != MDeviceGlobalNames.end(); } DeviceGlobalMapEntry *tryGetDeviceGlobalEntry(const std::string &Name) const { auto &PM = detail::ProgramManager::getInstance(); - return PM.tryGetDeviceGlobalEntry(MPrefix + mangleDeviceGlobalName(Name)); + return PM.tryGetDeviceGlobalEntry(MPrefix + Name); } private: - static std::string mangleDeviceGlobalName(const std::string &Name) { - // TODO: Support device globals declared in namespaces. - return "_Z" + std::to_string(Name.length()) + Name; - } - void unregisterDeviceGlobalsFromContext() { if (MDeviceGlobalNames.empty()) return; diff --git a/sycl/source/detail/kernel_bundle_impl.hpp b/sycl/source/detail/kernel_bundle_impl.hpp index 59b7132b8db23..d50fc33060499 100644 --- a/sycl/source/detail/kernel_bundle_impl.hpp +++ b/sycl/source/detail/kernel_bundle_impl.hpp @@ -8,6 +8,7 @@ #pragma once +#include #include #include #include @@ -189,6 +190,7 @@ class kernel_bundle_impl MDevices, PropList); MDeviceImages.emplace_back(BuiltImg); MUniqueDeviceImages.emplace_back(BuiltImg); + populateDeviceGlobalsForSYCLBIN(); break; } case bundle_state::input: @@ -411,6 +413,8 @@ class kernel_bundle_impl removeDuplicateImages(); + populateDeviceGlobalsForSYCLBIN(); + for (const kernel_bundle &Bundle : ObjectBundles) { const KernelBundleImplPtr &BundlePtr = getSyclObjImpl(Bundle); for (const std::pair> @@ -489,6 +493,9 @@ class kernel_bundle_impl fillUniqueDeviceImages(); + if (get_bundle_state() == bundle_state::executable) + populateDeviceGlobalsForSYCLBIN(); + if (get_bundle_state() == bundle_state::input) { // Copy spec constants values from the device images. auto MergeSpecConstants = [this](const device_image_plain &Img) { @@ -589,6 +596,8 @@ class kernel_bundle_impl ProgramManager::getInstance().bringSYCLDeviceImagesToState(MDeviceImages, State); fillUniqueDeviceImages(); + if (State == bundle_state::executable) + populateDeviceGlobalsForSYCLBIN(); } template @@ -697,10 +706,14 @@ class kernel_bundle_impl } bool ext_oneapi_has_device_global(const std::string &Name) const { - return std::any_of( - begin(), end(), [&Name](const device_image_plain &DeviceImage) { - return getSyclObjImpl(DeviceImage)->hasDeviceGlobalName(Name); - }); + std::string MangledName = mangleDeviceGlobalName(Name); + return (MDeviceGlobals.size() && + MDeviceGlobals.tryGetEntryLockless(MangledName)) || + std::any_of(begin(), end(), + [&MangledName](const device_image_plain &DeviceImage) { + return getSyclObjImpl(DeviceImage) + ->hasDeviceGlobalName(MangledName); + }); } void *ext_oneapi_get_device_global_address(const std::string &Name, @@ -1023,28 +1036,51 @@ class kernel_bundle_impl return const_cast(this)->Base::shared_from_this(); } + DeviceGlobalMap &getDeviceGlobalMap() { return MDeviceGlobals; } + private: DeviceGlobalMapEntry *getDeviceGlobalEntry(const std::string &Name) const { - if (!hasSourceBasedImages()) { + if (!hasSourceBasedImages() && !hasSYCLBINImages()) { throw sycl::exception(make_error_code(errc::invalid), "Querying device globals by name is only available " - "in kernel_bundles successfully built from " + "in kernel_bundles created from SYCLBIN files and " + "kernel_bundles successfully built from " "kernel_bundle::ext_oneapi_source> " "with 'sycl' source language."); } - if (!ext_oneapi_has_device_global(Name)) { - throw sycl::exception(make_error_code(errc::invalid), - "device global '" + Name + - "' not found in kernel_bundle"); - } + std::string MangledName = mangleDeviceGlobalName(Name); + + if (MDeviceGlobals.size()) + if (DeviceGlobalMapEntry *Entry = + MDeviceGlobals.tryGetEntryLockless(MangledName)) + return Entry; for (const device_image_plain &DevImg : MUniqueDeviceImages) if (DeviceGlobalMapEntry *Entry = - getSyclObjImpl(DevImg)->tryGetDeviceGlobalEntry(Name)) + getSyclObjImpl(DevImg)->tryGetDeviceGlobalEntry(MangledName)) return Entry; - assert(false && "Device global should have been found."); - return nullptr; + + throw sycl::exception(make_error_code(errc::invalid), + "device global '" + Name + + "' not found in kernel_bundle"); + } + + static std::string mangleDeviceGlobalName(const std::string &Name) { + // TODO: Support device globals declared in namespaces. + return "_Z" + std::to_string(Name.length()) + Name; + } + + void populateDeviceGlobalsForSYCLBIN() { + // This should only be called from ctors, so lockless initialization is + // safe. + for (const device_image_plain &DevImg : MUniqueDeviceImages) { + const auto &DevImgImpl = getSyclObjImpl(DevImg); + if (DevImgImpl->getOriginMask() & ImageOriginSYCLBIN) + if (const RTDeviceBinaryImage *DevBinImg = + DevImgImpl->get_bin_image_ref()) + MDeviceGlobals.initializeEntriesLockless(DevBinImg); + } } void fillUniqueDeviceImages() { @@ -1082,6 +1118,12 @@ class kernel_bundle_impl // from any device image. SpecConstMapT MSpecConstValues; bundle_state MState; + + // Map for isolating device_global variables owned by the SYCLBINs in the + // kernel_bundle. This map will ensure the cleanup of its entries, unlike the + // map in program_manager which has its entry cleanup managed by the + // corresponding owner contexts. + DeviceGlobalMap MDeviceGlobals{/*OwnerControlledCleanup=*/false}; }; } // namespace detail diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 3ecb904513719..922808835812a 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -2456,8 +2456,11 @@ ProgramManager::tryGetDeviceGlobalEntry(const std::string &UniqueId, std::vector ProgramManager::getDeviceGlobalEntries( const std::vector &UniqueIds, bool ExcludeDeviceImageScopeDecorated) { - return m_DeviceGlobals.getEntries(UniqueIds, - ExcludeDeviceImageScopeDecorated); + std::vector FoundEntries; + FoundEntries.reserve(UniqueIds.size()); + m_DeviceGlobals.getEntries(UniqueIds, ExcludeDeviceImageScopeDecorated, + FoundEntries); + return FoundEntries; } void ProgramManager::addOrInitHostPipeEntry(const void *HostPipePtr, @@ -3041,6 +3044,10 @@ ProgramManager::link(const std::vector &Imgs, const RTDeviceBinaryImage *NewBinImg = mergeImageData( Imgs, *KernelIDs, NewSpecConstBlob, NewSpecConstMap, MergedImageStorage); + // With both the new program and the merged image data, initialize associated + // device_global variables. + ContextImpl.addDeviceGlobalInitializer(LinkedProg, Devs, NewBinImg); + { std::lock_guard Lock(MNativeProgramsMutex); // NativePrograms map does not intend to keep reference to program handle, diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index f9386e30a3635..427ca01a23245 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -533,7 +533,9 @@ class ProgramManager { SanitizerType m_SanitizerFoundInImage; // Maps between device_global identifiers and associated information. - DeviceGlobalMap m_DeviceGlobals; + // The ownership of entry resources is taken to allow contexts to cleanup + // their associated entry resources when they die. + DeviceGlobalMap m_DeviceGlobals{/*OwnerControlledCleanup=*/true}; // Maps between host_pipe identifiers and associated information. std::unordered_map> diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 2d9b651077ab8..a1843848b2d29 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -2731,7 +2731,7 @@ void enqueueImpKernel( // Initialize device globals associated with this. std::vector DeviceGlobalInitEvents = - ContextImpl.initializeDeviceGlobals(Program, Queue); + ContextImpl.initializeDeviceGlobals(Program, Queue, KernelBundleImplPtr); if (!DeviceGlobalInitEvents.empty()) { std::vector EventsWithDeviceGlobalInits; EventsWithDeviceGlobalInits.reserve(RawEvents.size() + diff --git a/sycl/test-e2e/SYCLBIN/Inputs/dg.hpp b/sycl/test-e2e/SYCLBIN/Inputs/dg.hpp new file mode 100644 index 0000000000000..747f6a9d4186e --- /dev/null +++ b/sycl/test-e2e/SYCLBIN/Inputs/dg.hpp @@ -0,0 +1,104 @@ +#include "common.hpp" + +#include + +static constexpr size_t NUM = 1024; +static constexpr size_t WGSIZE = 16; +static constexpr float EPS = 0.001; + +int main(int argc, char *argv[]) { + assert(argc == 2); + + sycl::queue Q; + sycl::device Dev = Q.get_device(); + + int Failed = CommonLoadCheck(Q.get_context(), argv[1]); + +#if defined(SYCLBIN_INPUT_STATE) + auto KBInput = syclexp::get_kernel_bundle( + Q.get_context(), std::string{argv[1]}); + auto KBExe1 = sycl::build(KBInput); + auto KBExe2 = sycl::build(KBInput); +#elif defined(SYCLBIN_OBJECT_STATE) + auto KBObj = syclexp::get_kernel_bundle( + Q.get_context(), std::string{argv[1]}); + auto KBExe1 = sycl::link(KBObj); + auto KBExe2 = sycl::link(KBObj); +#else // defined(SYCLBIN_EXECUTABLE_STATE) + auto KBExe1 = syclexp::get_kernel_bundle( + Q.get_context(), std::string{argv[1]}); + auto KBExe2 = syclexp::get_kernel_bundle( + Q.get_context(), std::string{argv[1]}); +#endif + + sycl::kernel AddK = KBExe1.ext_oneapi_get_kernel("ff_dg_adder"); + + // Check presence of device globals. + assert(KBExe1.ext_oneapi_has_device_global("DG")); + // Querying a non-existing device global shall not crash. + assert(!KBExe1.ext_oneapi_has_device_global("bogus_DG")); + + void *DGAddr = KBExe1.ext_oneapi_get_device_global_address("DG", Dev); + size_t DGSize = KBExe1.ext_oneapi_get_device_global_size("DG"); + assert(DGSize == 4); + + int32_t Val = -1; + auto CheckVal = [&](int32_t Expected) { + Val = -1; + Q.memcpy(&Val, DGAddr, DGSize).wait(); + if (Val != Expected) { + std::cout << "Val: " << Val << " != " << Expected << '\n'; + ++Failed; + } + }; + + // Device globals are zero-initialized. + CheckVal(0); + + // Set the DG. + Val = 123; + Q.memcpy(DGAddr, &Val, DGSize).wait(); + CheckVal(123); + + // Run a kernel using it. + Val = -17; + Q.submit([&](sycl::handler &CGH) { + CGH.set_args(Val); + CGH.single_task(AddK); + }).wait(); + CheckVal(123 - 17); + + // Test that each bundle has its distinct set of globals. + DGAddr = KBExe2.ext_oneapi_get_device_global_address("DG", Dev); + CheckVal(0); + + DGAddr = KBExe1.ext_oneapi_get_device_global_address("DG", Dev); + CheckVal(123 - 17); + + // Test global with `device_image_scope`. We currently cannot read/write these + // from the host, but they should work device-only. + auto SwapK = KBExe2.ext_oneapi_get_kernel("ff_swap"); + int64_t *ValBuf = sycl::malloc_shared(1, Q); + *ValBuf = -1; + auto DoSwap = [&]() { + Q.submit([&](sycl::handler &CGH) { + CGH.set_args(ValBuf); + CGH.single_task(SwapK); + }).wait(); + }; + + DoSwap(); + if (*ValBuf != 0) { + std::cout << "ValBuf: " << *ValBuf << " != 0"; + ++Failed; + } + DoSwap(); + if (*ValBuf != -1) { + std::cout << "ValBuf: " << *ValBuf << " != -1"; + ++Failed; + } + + sycl::free(ValBuf, Q); + + return Failed; +} diff --git a/sycl/test-e2e/SYCLBIN/Inputs/dg_kernel.cpp b/sycl/test-e2e/SYCLBIN/Inputs/dg_kernel.cpp new file mode 100644 index 0000000000000..fa66cf29b8d7d --- /dev/null +++ b/sycl/test-e2e/SYCLBIN/Inputs/dg_kernel.cpp @@ -0,0 +1,21 @@ +#include + +namespace syclex = sycl::ext::oneapi::experimental; + +syclex::device_global DG; + +extern "C" SYCL_EXTERNAL SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( + (syclex::single_task_kernel)) void ff_dg_adder(int val) { + DG += val; +} + +syclex::device_global + DG_DIS; + +extern "C" SYCL_EXTERNAL SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( + (syclex::single_task_kernel)) void ff_swap(int64_t *val) { + int64_t tmp = DG_DIS; + DG_DIS = *val; + *val = tmp; +} diff --git a/sycl/test-e2e/SYCLBIN/dg_executable.cpp b/sycl/test-e2e/SYCLBIN/dg_executable.cpp new file mode 100644 index 0000000000000..c2ec644eedc6c --- /dev/null +++ b/sycl/test-e2e/SYCLBIN/dg_executable.cpp @@ -0,0 +1,26 @@ +//==---------- dg_executable.cpp --- SYCLBIN extension tests ---------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: aspect-usm_device_allocations + +// -- Test for using device globals in SYCLBIN. + +// UNSUPPORTED: opencl && gpu +// UNSUPPORTED-TRACKER: GSD-4287 + +// SYCLBIN currently only properly detects SPIR-V binaries. +// XFAIL: !target-spir +// XFAIL-TRACKER: CMPLRLLVM-68811 + +// RUN: %clangxx --offload-new-driver -fsyclbin=executable %{sycl_target_opts} %S/Inputs/dg_kernel.cpp -o %t.syclbin +// RUN: %{build} -o %t.out +// RUN: %{l0_leak_check} %{run} %t.out %t.syclbin + +#define SYCLBIN_EXECUTABLE_STATE + +#include "Inputs/dg.hpp" diff --git a/sycl/test-e2e/SYCLBIN/dg_input.cpp b/sycl/test-e2e/SYCLBIN/dg_input.cpp new file mode 100644 index 0000000000000..9e535e87fe71f --- /dev/null +++ b/sycl/test-e2e/SYCLBIN/dg_input.cpp @@ -0,0 +1,26 @@ +//==----------- dg_input.cpp --- SYCLBIN extension tests -------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: aspect-usm_device_allocations + +// -- Test for using device globals in SYCLBIN. + +// UNSUPPORTED: opencl && gpu +// UNSUPPORTED-TRACKER: GSD-4287 + +// SYCLBIN currently only properly detects SPIR-V binaries. +// XFAIL: !target-spir +// XFAIL-TRACKER: CMPLRLLVM-68811 + +// RUN: %clangxx --offload-new-driver -fsyclbin=input %{sycl_target_opts} %S/Inputs/dg_kernel.cpp -o %t.syclbin +// RUN: %{build} -o %t.out +// RUN: %{l0_leak_check} %{run} %t.out %t.syclbin + +#define SYCLBIN_INPUT_STATE + +#include "Inputs/dg.hpp" diff --git a/sycl/test-e2e/SYCLBIN/dg_object.cpp b/sycl/test-e2e/SYCLBIN/dg_object.cpp new file mode 100644 index 0000000000000..faa2c87070df6 --- /dev/null +++ b/sycl/test-e2e/SYCLBIN/dg_object.cpp @@ -0,0 +1,26 @@ +//==----------- dg_object.cpp --- SYCLBIN extension tests ------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: aspect-usm_device_allocations + +// -- Test for using device globals in SYCLBIN. + +// UNSUPPORTED: opencl && gpu +// UNSUPPORTED-TRACKER: GSD-4287 + +// SYCLBIN currently only properly detects SPIR-V binaries. +// XFAIL: !target-spir +// XFAIL-TRACKER: CMPLRLLVM-68811 + +// RUN: %clangxx --offload-new-driver -fsyclbin=object %{sycl_target_opts} %S/Inputs/dg_kernel.cpp -o %t.syclbin +// RUN: %{build} -o %t.out +// RUN: %{l0_leak_check} %{run} %t.out %t.syclbin + +#define SYCLBIN_OBJECT_STATE + +#include "Inputs/dg.hpp" diff --git a/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp b/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp index de2b939756ea0..5b97ae7a0cac9 100644 --- a/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp +++ b/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp @@ -6,7 +6,7 @@ // CHECK-DAG: README.md // CHECK-DAG: lit.cfg.py // -// CHECK-NUM-MATCHES: 25 +// CHECK-NUM-MATCHES: 26 // // This test verifies that `` isn't used in E2E tests. Instead, // fine-grained includes should used, see