From 4fd187227ff5dd6cf0f3c1d66606c14e69f036cb Mon Sep 17 00:00:00 2001 From: CrabeExtra Date: Tue, 26 May 2026 21:04:00 +0330 Subject: [PATCH] =?UTF-8?q?copyMemoryToImage=20=D8=A8=D8=B9=D8=AF=D8=B2=20?= =?UTF-8?q?=D9=87=D8=A6=D8=AD=D9=85=D8=B9=D8=A6=D8=AB=D8=AF=D9=81=D8=AB?= =?UTF-8?q?=DB=8C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add hostImageCopy as MOVE_TO_LIMIT in device_features.json. - Query and enable VkPhysicalDeviceHostImageCopyFeaturesEXT through limits.hostImageCopy. - Add EUF_HOST_TRANSFER_BIT and map it to VK_IMAGE_USAGE_HOST_TRANSFER_BIT_EXT. - Wire VK_FORMAT_FEATURE_2_HOST_IMAGE_TRANSFER_BIT_EXT into hostImageTransfer format usage. - Add host image copy flags and SMemoryToImageCopy. - Add ILogicalDevice::copyMemoryToImage validation and CVulkanLogicalDevice backend. - Validate host-transfer image creation and EHICF_MEMCPY_BIT full-subresource rules. --- include/nbl/asset/IImage.h | 39 ++++++- include/nbl/video/CVulkanCommon.h | 2 + include/nbl/video/ILogicalDevice.h | 10 +- include/nbl/video/IPhysicalDevice.h | 4 +- src/nbl/video/CVulkanLogicalDevice.cpp | 24 ++++ src/nbl/video/CVulkanLogicalDevice.h | 2 + src/nbl/video/CVulkanPhysicalDevice.cpp | 18 ++- src/nbl/video/ILogicalDevice.cpp | 103 ++++++++++++++++++ .../device_capabilities/device_features.json | 7 +- src/nbl/video/utilities/CAssetConverter.cpp | 2 + 10 files changed, 203 insertions(+), 8 deletions(-) diff --git a/include/nbl/asset/IImage.h b/include/nbl/asset/IImage.h index 748a609bbe..0189a7c8d4 100644 --- a/include/nbl/asset/IImage.h +++ b/include/nbl/asset/IImage.h @@ -132,10 +132,12 @@ class IImage : public virtual core::IReferenceCounted EUF_TRANSIENT_ATTACHMENT_BIT = 0x0040, EUF_INPUT_ATTACHMENT_BIT = 0x0080, EUF_SHADING_RATE_ATTACHMENT_BIT = 0x0100, - EUF_FRAGMENT_DENSITY_MAP_BIT = 0x0200 + EUF_FRAGMENT_DENSITY_MAP_BIT = 0x0200, + EUF_HOST_TRANSFER_BIT = 0x0400 }; + struct SSubresourceRange - { + { core::bitflag aspectMask = E_ASPECT_FLAGS::EAF_NONE; uint32_t baseMipLevel = 0u; uint32_t levelCount = 0u; @@ -151,6 +153,11 @@ class IImage : public virtual core::IReferenceCounted auto operator<=>(const SSubresourceLayers&) const = default; }; + enum E_HOST_IMAGE_COPY_FLAGS : uint8_t + { + EHICF_NONE = 0x00, + EHICF_MEMCPY_BIT = 0x01 + }; struct SBufferCopy { inline bool isValid() const @@ -209,6 +216,33 @@ class IImage : public virtual core::IReferenceCounted auto operator<=>(const SBufferCopy&) const = default; }; + struct SMemoryToImageCopy + { + inline bool isValid() const + { + if (!hostPointer) + return false; + if (imageSubresource.layerCount==0u) + return false; + if (imageExtent.width==0u || imageExtent.height==0u || imageExtent.depth==0u) + return false; + + return true; + } + + inline const auto& getDstSubresource() const {return imageSubresource;} + inline const VkOffset3D& getDstOffset() const {return imageOffset;} + inline const VkExtent3D& getExtent() const {return imageExtent;} + + const void* hostPointer = nullptr; + uint32_t memoryRowLength = 0u; + uint32_t memoryImageHeight = 0u; + SSubresourceLayers imageSubresource = {}; + VkOffset3D imageOffset = {0u,0u,0u}; + VkExtent3D imageExtent = {0u,0u,0u}; + + auto operator<=>(const SMemoryToImageCopy&) const = default; + }; struct SImageCopy { inline bool isValid() const @@ -846,6 +880,7 @@ class IImage : public virtual core::IReferenceCounted }; static_assert(sizeof(IImage)-sizeof(IDescriptor)!=3u*sizeof(uint32_t)+sizeof(VkExtent3D)+sizeof(uint32_t)*3u,"BaW File Format won't work"); +NBL_ENUM_ADD_BITWISE_OPERATORS(IImage::E_HOST_IMAGE_COPY_FLAGS) NBL_ENUM_ADD_BITWISE_OPERATORS(IImage::E_USAGE_FLAGS) } // end namespace nbl::asset diff --git a/include/nbl/video/CVulkanCommon.h b/include/nbl/video/CVulkanCommon.h index e4dfb7e3e9..143a3b00e0 100644 --- a/include/nbl/video/CVulkanCommon.h +++ b/include/nbl/video/CVulkanCommon.h @@ -791,6 +791,7 @@ inline VkImageUsageFlags getVkImageUsageFlagsFromImageUsageFlags(const core::bit if (in.hasFlags(IGPUImage::EUF_INPUT_ATTACHMENT_BIT)) ret |= VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT; if (in.hasFlags(IGPUImage::EUF_SHADING_RATE_ATTACHMENT_BIT)) ret |= VK_IMAGE_USAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR; if (in.hasFlags(IGPUImage::EUF_FRAGMENT_DENSITY_MAP_BIT)) ret |= VK_IMAGE_USAGE_FRAGMENT_DENSITY_MAP_BIT_EXT; + if (in.hasFlags(IGPUImage::EUF_HOST_TRANSFER_BIT)) ret |= VK_IMAGE_USAGE_HOST_TRANSFER_BIT_EXT; return ret; } @@ -806,6 +807,7 @@ inline core::bitflag getImageUsageFlagsFromVkImageUsag if (in&VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT) ret |= IGPUImage::EUF_INPUT_ATTACHMENT_BIT; if (in&VK_IMAGE_USAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR) ret |= IGPUImage::EUF_SHADING_RATE_ATTACHMENT_BIT; if (in&VK_IMAGE_USAGE_FRAGMENT_DENSITY_MAP_BIT_EXT) ret |= IGPUImage::EUF_FRAGMENT_DENSITY_MAP_BIT; + if (in&VK_IMAGE_USAGE_HOST_TRANSFER_BIT_EXT) ret |= IGPUImage::EUF_HOST_TRANSFER_BIT; return ret; } diff --git a/include/nbl/video/ILogicalDevice.h b/include/nbl/video/ILogicalDevice.h index 742cb506c6..1aa28586ab 100644 --- a/include/nbl/video/ILogicalDevice.h +++ b/include/nbl/video/ILogicalDevice.h @@ -361,7 +361,8 @@ class NBL_API2 ILogicalDevice : public core::IReferenceCounted, public IDeviceMe m_logger.log("Failed to create Image, queue family count %d for concurrent sharing larger than our max %d!",system::ILogger::ELL_ERROR,creationParams.queueFamilyIndexCount,MaxQueueFamilies); return nullptr; } - // TODO: validation of creationParams against the device's limits (sample counts, etc.) see vkCreateImage docs + if (!validateImageCreationAgainstDevice(creationParams)) + return nullptr; return createImage_impl(std::move(creationParams)); } // Create an ImageView that can actually be used by shaders (@see ICPUImageView) @@ -822,6 +823,8 @@ class NBL_API2 ILogicalDevice : public core::IReferenceCounted, public IDeviceMe return result!=DEFERRABLE_RESULT::SOME_ERROR; } + // https://docs.vulkan.org/refpages/latest/refpages/source/vkCopyMemoryToImage.html + bool copyMemoryToImage(IGPUImage* const dstImage, const IGPUImage::LAYOUT dstImageLayout, const core::bitflag flags, const std::span regions); //! Shaders struct SShaderCreationParameters @@ -1134,6 +1137,7 @@ class NBL_API2 ILogicalDevice : public core::IReferenceCounted, public IDeviceMe virtual core::smart_refctd_ptr createBuffer_impl(IGPUBuffer::SCreationParams&& creationParams) = 0; virtual core::smart_refctd_ptr createBufferView_impl(const asset::SBufferRange& underlying, const asset::E_FORMAT _fmt) = 0; + bool validateImageCreationAgainstDevice(const IGPUImage::SCreationParams& creationParams); virtual core::smart_refctd_ptr createImage_impl(IGPUImage::SCreationParams&& params) = 0; virtual core::smart_refctd_ptr createImageView_impl(IGPUImageView::SCreationParams&& params) = 0; virtual core::smart_refctd_ptr createBottomLevelAccelerationStructure_impl(IGPUAccelerationStructure::SCreationParams&& params) = 0; @@ -1179,6 +1183,8 @@ class NBL_API2 ILogicalDevice : public core::IReferenceCounted, public IDeviceMe virtual DEFERRABLE_RESULT copyAccelerationStructureToMemory_impl(IDeferredOperation* const deferredOperation, const IGPUAccelerationStructure* src, const asset::SBufferBinding& dst) = 0; virtual DEFERRABLE_RESULT copyAccelerationStructureFromMemory_impl(IDeferredOperation* const deferredOperation, const asset::SBufferBinding& src, IGPUAccelerationStructure* dst) = 0; + virtual bool copyMemoryToImage_impl(IGPUImage* const dstImage, const IGPUImage::LAYOUT dstImageLayout, const core::bitflag flags, const std::span regions) = 0; + constexpr static inline auto MaxStagesPerPipeline = 6u; virtual core::smart_refctd_ptr createDescriptorSetLayout_impl(const std::span bindings, const uint32_t maxSamplersCount) = 0; virtual core::smart_refctd_ptr createPipelineLayout_impl( @@ -1624,4 +1630,4 @@ inline bool ILogicalDevice::validateMemoryBarrier(const uint32_t queueFamilyInde } // namespace nbl::video #include "nbl/undef_logging_macros.h" -#endif //_NBL_VIDEO_I_LOGICAL_DEVICE_H_INCLUDED_ \ No newline at end of file +#endif //_NBL_VIDEO_I_LOGICAL_DEVICE_H_INCLUDED_ diff --git a/include/nbl/video/IPhysicalDevice.h b/include/nbl/video/IPhysicalDevice.h index 4222a22153..69de22f9e3 100644 --- a/include/nbl/video/IPhysicalDevice.h +++ b/include/nbl/video/IPhysicalDevice.h @@ -443,7 +443,7 @@ class NBL_API2 IPhysicalDevice : public core::Interface, public core::Unmovable storageImageLoadWithoutFormat(0), storageImageStoreWithoutFormat(0), depthCompareSampledImage(0), - hostImageTransfer(0), + hostImageTransfer(usages.hasFlags(IGPUImage::EUF_HOST_TRANSFER_BIT)), log2MaxSamples(0) {} @@ -455,6 +455,8 @@ class NBL_API2 IPhysicalDevice : public core::Interface, public core::Unmovable retval |= usage_flags_t::EUF_SAMPLED_BIT; if (storageImage) retval |= usage_flags_t::EUF_STORAGE_BIT; + if (hostImageTransfer) + retval |= usage_flags_t::EUF_HOST_TRANSFER_BIT; if (attachment || blitDst) // does also src imply? retval |= usage_flags_t::EUF_RENDER_ATTACHMENT_BIT; if (blitSrc || transferSrc) diff --git a/src/nbl/video/CVulkanLogicalDevice.cpp b/src/nbl/video/CVulkanLogicalDevice.cpp index 40e60d489f..5bdd962245 100644 --- a/src/nbl/video/CVulkanLogicalDevice.cpp +++ b/src/nbl/video/CVulkanLogicalDevice.cpp @@ -527,7 +527,31 @@ auto CVulkanLogicalDevice::copyAccelerationStructureFromMemory_impl(IDeferredOpe return getDeferrableResultFrom(m_devf.vk.vkCopyMemoryToAccelerationStructureKHR(m_vkdev,static_cast(deferredOperation)->getInternalObject(),&info)); } +bool CVulkanLogicalDevice::copyMemoryToImage_impl(IGPUImage* const dstImage, const IGPUImage::LAYOUT dstImageLayout, const core::bitflag flags, const std::span regions) +{ + core::vector vk_regions(regions.size(),{VK_STRUCTURE_TYPE_MEMORY_TO_IMAGE_COPY,nullptr}); + for (size_t i=0u; i(regions[i].imageSubresource.aspectMask.value); + vk_regions[i].imageSubresource.mipLevel = regions[i].imageSubresource.mipLevel; + vk_regions[i].imageSubresource.baseArrayLayer = regions[i].imageSubresource.baseArrayLayer; + vk_regions[i].imageSubresource.layerCount = regions[i].imageSubresource.layerCount; + vk_regions[i].imageOffset = { static_cast(regions[i].imageOffset.x), static_cast(regions[i].imageOffset.y), static_cast(regions[i].imageOffset.z) }; + vk_regions[i].imageExtent = { regions[i].imageExtent.width, regions[i].imageExtent.height, regions[i].imageExtent.depth }; + } + VkCopyMemoryToImageInfoEXT info = { VK_STRUCTURE_TYPE_COPY_MEMORY_TO_IMAGE_INFO,nullptr }; + info.flags = static_cast(flags.value); + info.dstImage = static_cast(dstImage)->getInternalObject(); + info.dstImageLayout = getVkImageLayoutFromImageLayout(dstImageLayout); + info.regionCount = static_cast(vk_regions.size()); + info.pRegions = vk_regions.data(); + + return m_devf.vk.vkCopyMemoryToImageEXT(m_vkdev,&info) == VK_SUCCESS; +} core::smart_refctd_ptr CVulkanLogicalDevice::createDescriptorSetLayout_impl(const std::span bindings, const uint32_t maxSamplersCount) { diff --git a/src/nbl/video/CVulkanLogicalDevice.h b/src/nbl/video/CVulkanLogicalDevice.h index e77386cb34..6fb08dfb55 100644 --- a/src/nbl/video/CVulkanLogicalDevice.h +++ b/src/nbl/video/CVulkanLogicalDevice.h @@ -270,6 +270,8 @@ class CVulkanLogicalDevice final : public ILogicalDevice DEFERRABLE_RESULT copyAccelerationStructureToMemory_impl(IDeferredOperation* const deferredOperation, const IGPUAccelerationStructure* src, const asset::SBufferBinding& dst) override; DEFERRABLE_RESULT copyAccelerationStructureFromMemory_impl(IDeferredOperation* const deferredOperation, const asset::SBufferBinding& src, IGPUAccelerationStructure* dst) override; + virtual bool copyMemoryToImage_impl(IGPUImage* const dstImage, const IGPUImage::LAYOUT dstImageLayout, const core::bitflag flags, const std::span regions) override; + // layouts core::smart_refctd_ptr createDescriptorSetLayout_impl(const std::span bindings, const uint32_t maxSamplersCount) override; core::smart_refctd_ptr createPipelineLayout_impl( diff --git a/src/nbl/video/CVulkanPhysicalDevice.cpp b/src/nbl/video/CVulkanPhysicalDevice.cpp index 65a0c358cc..04ca6eae16 100644 --- a/src/nbl/video/CVulkanPhysicalDevice.cpp +++ b/src/nbl/video/CVulkanPhysicalDevice.cpp @@ -324,6 +324,7 @@ std::unique_ptr CVulkanPhysicalDevice::create(core::smart VkPhysicalDeviceCooperativeMatrixPropertiesKHR cooperativeMatrixProperties = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COOPERATIVE_MATRIX_PROPERTIES_KHR }; VkPhysicalDeviceShaderSMBuiltinsPropertiesNV shaderSMBuiltinsPropertiesNV = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_SM_BUILTINS_PROPERTIES_NV }; VkPhysicalDeviceShaderCoreProperties2AMD shaderCoreProperties2AMD = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CORE_PROPERTIES_2_AMD }; + VkPhysicalDeviceHostImageCopyPropertiesEXT hostImageCopyProperties = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_IMAGE_COPY_PROPERTIES_EXT }; VkPhysicalDeviceMaintenance5PropertiesKHR maintenance5Properties = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_5_PROPERTIES_KHR }; VkPhysicalDeviceGraphicsPipelineLibraryPropertiesEXT graphicsPipelineLibraryProperties = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_GRAPHICS_PIPELINE_LIBRARY_PROPERTIES_EXT }; VkPhysicalDeviceRayTracingInvocationReorderPropertiesEXT rayTracingInvocationReorderProperties = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_INVOCATION_REORDER_PROPERTIES_EXT }; @@ -357,6 +358,8 @@ std::unique_ptr CVulkanPhysicalDevice::create(core::smart addToPNextChain(&shaderSMBuiltinsPropertiesNV); if (isExtensionSupported(VK_AMD_SHADER_CORE_PROPERTIES_2_EXTENSION_NAME)) addToPNextChain(&shaderCoreProperties2AMD); + if (isExtensionSupported(VK_EXT_HOST_IMAGE_COPY_EXTENSION_NAME)) + addToPNextChain(&hostImageCopyProperties); if (isExtensionSupported(VK_KHR_MAINTENANCE_5_EXTENSION_NAME)) addToPNextChain(&maintenance5Properties); if (isExtensionSupported(VK_EXT_GRAPHICS_PIPELINE_LIBRARY_EXTENSION_NAME)) @@ -726,6 +729,7 @@ std::unique_ptr CVulkanPhysicalDevice::create(core::smart VkPhysicalDeviceShaderImageAtomicInt64FeaturesEXT shaderImageAtomicInt64Features = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_IMAGE_ATOMIC_INT64_FEATURES_EXT }; VkPhysicalDeviceIndexTypeUint8FeaturesEXT indexTypeUint8Features = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INDEX_TYPE_UINT8_FEATURES_EXT }; VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR pipelineExecutablePropertiesFeatures = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_EXECUTABLE_PROPERTIES_FEATURES_KHR }; + VkPhysicalDeviceHostImageCopyFeaturesEXT hostImageCopyFeatures = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_IMAGE_COPY_FEATURES_EXT }; VkPhysicalDeviceDeviceGeneratedCommandsFeaturesNV deviceGeneratedCommandsFeatures = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEVICE_GENERATED_COMMANDS_FEATURES_NV }; VkPhysicalDeviceDeviceMemoryReportFeaturesEXT deviceMemoryReportFeatures = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEVICE_MEMORY_REPORT_FEATURES_EXT }; VkPhysicalDeviceShaderEarlyAndLateFragmentTestsFeaturesAMD shaderEarlyAndLateFragmentTestsFeatures = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_EARLY_AND_LATE_FRAGMENT_TESTS_FEATURES_AMD }; @@ -785,6 +789,8 @@ std::unique_ptr CVulkanPhysicalDevice::create(core::smart addToPNextChain(&indexTypeUint8Features); if (isExtensionSupported(VK_KHR_PIPELINE_EXECUTABLE_PROPERTIES_EXTENSION_NAME)) addToPNextChain(&pipelineExecutablePropertiesFeatures); + if (isExtensionSupported(VK_EXT_HOST_IMAGE_COPY_EXTENSION_NAME)) + addToPNextChain(&hostImageCopyFeatures); if (isExtensionSupported(VK_NV_DEVICE_GENERATED_COMMANDS_EXTENSION_NAME)) addToPNextChain(&deviceGeneratedCommandsFeatures); if (isExtensionSupported(VK_EXT_DEVICE_MEMORY_REPORT_EXTENSION_NAME)) @@ -1157,7 +1163,6 @@ std::unique_ptr CVulkanPhysicalDevice::create(core::smart if (isExtensionSupported(VK_KHR_PIPELINE_EXECUTABLE_PROPERTIES_EXTENSION_NAME)) features.pipelineExecutableInfo = pipelineExecutablePropertiesFeatures.pipelineExecutableInfo; - if (isExtensionSupported(VK_NV_DEVICE_GENERATED_COMMANDS_EXTENSION_NAME)) features.deviceGeneratedCommands = deviceGeneratedCommandsFeatures.deviceGeneratedCommands; @@ -1221,6 +1226,8 @@ std::unique_ptr CVulkanPhysicalDevice::create(core::smart properties.limits.shaderImageFloat32AtomicMinMax = shaderAtomicFloat2Features.shaderImageFloat32AtomicMinMax; properties.limits.sparseImageFloat32AtomicMinMax = shaderAtomicFloat2Features.sparseImageFloat32AtomicMinMax; } + if (isExtensionSupported(VK_EXT_HOST_IMAGE_COPY_EXTENSION_NAME)) + properties.limits.hostImageCopy = hostImageCopyFeatures.hostImageCopy; if (isExtensionSupported(VK_EXT_DEVICE_MEMORY_REPORT_EXTENSION_NAME)) properties.limits.deviceMemoryReport = deviceMemoryReportFeatures.deviceMemoryReport; @@ -1332,7 +1339,9 @@ std::unique_ptr CVulkanPhysicalDevice::create(core::smart retval.storageImageLoadWithoutFormat = anyFlag(features, VK_FORMAT_FEATURE_2_STORAGE_READ_WITHOUT_FORMAT_BIT); retval.storageImageStoreWithoutFormat = anyFlag(features, VK_FORMAT_FEATURE_2_STORAGE_WRITE_WITHOUT_FORMAT_BIT); retval.depthCompareSampledImage = anyFlag(features, VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_DEPTH_COMPARISON_BIT); -// retval.hostImageTransfer = anyFlag(features, VK_FORMAT_FEATURE_2_HOST_IMAGE_TRANSFER_BIT); + //Not sure to remove this? + // retval.hostImageTransfer = anyFlag(features, VK_FORMAT_FEATURE_2_HOST_IMAGE_TRANSFER_BIT); + retval.hostImageTransfer = anyFlag(features, VK_FORMAT_FEATURE_2_HOST_IMAGE_TRANSFER_BIT_EXT); //retval.log2MaxSmples = ; // Todo(Erfan) return retval; }; @@ -1588,6 +1597,9 @@ core::smart_refctd_ptr CVulkanPhysicalDevice::createLogicalDevic VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR pipelineExecutablePropertiesFeatures = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_EXECUTABLE_PROPERTIES_FEATURES_KHR,nullptr }; REQUIRE_EXTENSION_IF(enabledFeatures.pipelineExecutableInfo,VK_KHR_PIPELINE_EXECUTABLE_PROPERTIES_EXTENSION_NAME,&pipelineExecutablePropertiesFeatures); + VkPhysicalDeviceHostImageCopyFeaturesEXT hostImageCopyFeatures = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_IMAGE_COPY_FEATURES_EXT,nullptr }; + REQUIRE_EXTENSION_IF(limits.hostImageCopy,VK_EXT_HOST_IMAGE_COPY_EXTENSION_NAME,&hostImageCopyFeatures); + VkPhysicalDeviceDeviceGeneratedCommandsFeaturesNV deviceGeneratedCommandsFeatures = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEVICE_GENERATED_COMMANDS_FEATURES_NV,nullptr }; REQUIRE_EXTENSION_IF(enabledFeatures.deviceGeneratedCommands,VK_NV_DEVICE_GENERATED_COMMANDS_EXTENSION_NAME,&deviceGeneratedCommandsFeatures); @@ -1875,6 +1887,8 @@ core::smart_refctd_ptr CVulkanPhysicalDevice::createLogicalDevic pipelineExecutablePropertiesFeatures.pipelineExecutableInfo = enabledFeatures.pipelineExecutableInfo; + hostImageCopyFeatures.hostImageCopy = limits.hostImageCopy; + deviceGeneratedCommandsFeatures.deviceGeneratedCommands = enabledFeatures.deviceGeneratedCommands; //deviceMemoryReportFeatures [LIMIT SO ENABLE EVERYTHING BY DEFAULT] diff --git a/src/nbl/video/ILogicalDevice.cpp b/src/nbl/video/ILogicalDevice.cpp index bee6381f7a..c8ca8f4055 100644 --- a/src/nbl/video/ILogicalDevice.cpp +++ b/src/nbl/video/ILogicalDevice.cpp @@ -630,6 +630,109 @@ bool ILogicalDevice::nullifyDescriptors(const std::spangetLimits().hostImageCopy) + { + NBL_LOG_ERROR("Failed to create Image, `EUF_HOST_TRANSFER_BIT` requires `hostImageCopy` support!"); + return false; + } + if (!physDev->getImageFormatUsages(creationParams.tiling)[creationParams.format].hostImageTransfer) + { + NBL_LOG_ERROR("Failed to create Image, `EUF_HOST_TRANSFER_BIT` requires `hostImageTransfer` format support!"); + return false; + } + } + return true; +} + +bool ILogicalDevice::copyMemoryToImage(IGPUImage* const dstImage, const IGPUImage::LAYOUT dstImageLayout, const core::bitflag flags, const std::span regions) +{ + if (!getPhysicalDevice()->getLimits().hostImageCopy) + { + NBL_LOG_ERROR("`hostImageCopy` feature is not enabled"); + return false; + } + if (!dstImage || !dstImage->wasCreatedBy(this)) + { + NBL_LOG_ERROR("`dstImage` was not created by this device"); + return false; + } + const auto& params = dstImage->getCreationParameters(); + const auto imageUsages = params.usage | params.stencilUsage; + if (!imageUsages.hasFlags(IGPUImage::EUF_HOST_TRANSFER_BIT)) + { + NBL_LOG_ERROR("`dstImage` was not created with `EUF_HOST_TRANSFER_BIT` usage"); + return false; + } + if (regions.empty()) + { + NBL_LOG_ERROR("`regions` must not be empty"); + return false; + } + + const bool memcpy = flags.hasFlags(IGPUImage::EHICF_MEMCPY_BIT); + for (size_t i=0u; i= params.mipLevels) + { + NBL_LOG_ERROR("`imageSubresource.mipLevel` out of bounds (regions[%zu])", i); + return false; + } + if (r.imageSubresource.baseArrayLayer + r.imageSubresource.layerCount > params.arrayLayers) + { + NBL_LOG_ERROR("`imageSubresource.baseArrayLayer + layerCount` out of bounds (regions[%zu])", i); + return false; + } + const auto mipSize = dstImage->getMipSize(r.imageSubresource.mipLevel); + if (r.imageOffset.x + r.imageExtent.width > mipSize.x || + r.imageOffset.y + r.imageExtent.height > mipSize.y || + r.imageOffset.z + r.imageExtent.depth > mipSize.z) + { + NBL_LOG_ERROR("`imageOffset + imageExtent` exceeds mip extent (regions[%zu])", i); + return false; + } + // MEMCPY_BIT VUIDs: offset must be zero, extent must cover the full selected subresource, + // and memoryRowLength/memoryImageHeight must both be zero. + if (memcpy) + { + if (r.imageOffset.x!=0u || r.imageOffset.y!=0u || r.imageOffset.z!=0u) + { + NBL_LOG_ERROR("`EHICF_MEMCPY_BIT` requires `imageOffset` to be zero (regions[%zu])", i); + return false; + } + if (r.imageExtent.width!=mipSize.x || r.imageExtent.height!=mipSize.y || r.imageExtent.depth!=mipSize.z) + { + NBL_LOG_ERROR("`EHICF_MEMCPY_BIT` requires `imageExtent` to match the full mip extent (regions[%zu])", i); + return false; + } + if (r.memoryRowLength != 0u) + { + NBL_LOG_ERROR("`EHICF_MEMCPY_BIT` requires `memoryRowLength` to be zero (regions[%zu])", i); + return false; + } + + if (r.memoryImageHeight != 0u) + { + NBL_LOG_ERROR("`EHICF_MEMCPY_BIT` requires `memoryImageHeight` to be zero (regions[%zu])", i); + return false; + } + } + } + + return copyMemoryToImage_impl(dstImage, dstImageLayout, flags, regions); +} + core::smart_refctd_ptr ILogicalDevice::createRenderpass(const IGPURenderpass::SCreationParams& params) { IGPURenderpass::SCreationParamValidationResult validation = IGPURenderpass::validateCreationParams(params); diff --git a/src/nbl/video/device_capabilities/device_features.json b/src/nbl/video/device_capabilities/device_features.json index bca5ab18de..1446d2231a 100644 --- a/src/nbl/video/device_capabilities/device_features.json +++ b/src/nbl/video/device_capabilities/device_features.json @@ -2927,8 +2927,13 @@ ] }, { + "type": "bool", + "name": "hostImageCopy", + "value": false, + "expose": "MOVE_TO_LIMIT", "comment": [ - "[DO NOT EXPOSE] We don't support yet, but should when ubiquitous", + "[EXPOSE AS A LIMIT] Nabla enables host image copy automatically when supported", + "HostImageCopyFeaturesEXT", "VK_EXT_host_image_copy" ] }, diff --git a/src/nbl/video/utilities/CAssetConverter.cpp b/src/nbl/video/utilities/CAssetConverter.cpp index 5bb8be8274..633d40b0bb 100644 --- a/src/nbl/video/utilities/CAssetConverter.cpp +++ b/src/nbl/video/utilities/CAssetConverter.cpp @@ -262,6 +262,8 @@ bool CAssetConverter::patch_impl_t::valid(const ILogicalDevice* devic const auto* physDev = device->getPhysicalDevice(); if (storageImageLoadWithoutFormat && !physDev->getLimits().shaderStorageImageReadWithoutFormat) return false; + if (usageFlags.hasFlags(usage_flags_t::EUF_HOST_TRANSFER_BIT) && !physDev->getLimits().hostImageCopy) + return false; // We're not going to check if the format is creatable for a given usage nad metausages, because we possibly haven't collected all the usages yet. // So the Image format promotion happens in another pass, just after the DFS descent. return true;