diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index b54a270f8..2c870240e 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -2,9 +2,9 @@ name: Build on: push: - branches: [ main ] + branches: [ main, 'release/**' ] pull_request: - branches: [ main ] + branches: [ main, 'release/**' ] jobs: build-windows: diff --git a/.github/workflows/test-build-windows.yml b/.github/workflows/test-build-windows.yml new file mode 100644 index 000000000..67c3bfd6b --- /dev/null +++ b/.github/workflows/test-build-windows.yml @@ -0,0 +1,52 @@ +name: Test Builds on Windows + +on: [push, pull_request, workflow_dispatch] + +jobs: + build-set-windows: + runs-on: windows-2022 + + steps: + - name: Checkout code + id: checkout-code + uses: actions/checkout@v2 + with: + submodules: recursive + + - name: Setup glslangValidator + shell: pwsh + run: | + choco install vulkan-sdk -y + Write-Output "$([System.Environment]::GetEnvironmentVariable('VULKAN_SDK', 'Machine'))\Bin" ` + | Out-File -FilePath "${Env:GITHUB_PATH}" -Append + + - name: Setup Meson + shell: pwsh + run: pip install meson + + - name: Find Visual Studio + shell: pwsh + run: | + $installationPath = Get-VSSetupInstance ` + | Select-VSSetupInstance -Require Microsoft.VisualStudio.Workload.NativeDesktop -Latest ` + | Select-Object -ExpandProperty InstallationPath + Write-Output "VSDEVCMD=${installationPath}\Common7\Tools\VsDevCmd.bat" ` + | Out-File -FilePath "${Env:GITHUB_ENV}" -Append + + - name: Build MSVC x86 + shell: pwsh + run: | + & "${Env:COMSPEC}" /s /c "`"${Env:VSDEVCMD}`" -arch=x86 -host_arch=x64 -no_logo && set" ` + | % { , ($_ -Split '=', 2) } ` + | % { [System.Environment]::SetEnvironmentVariable($_[0], $_[1]) } + meson --buildtype release --backend vs2022 build-msvc-x86 + msbuild -m build-msvc-x86/dxvk.sln + + - name: Build MSVC x64 + shell: pwsh + run: | + & "${Env:COMSPEC}" /s /c "`"${Env:VSDEVCMD}`" -arch=x64 -host_arch=x64 -no_logo && set" ` + | % { , ($_ -Split '=', 2) } ` + | % { [System.Environment]::SetEnvironmentVariable($_[0], $_[1]) } + meson --buildtype release --backend vs2022 build-msvc-x64 + msbuild -m build-msvc-x64/dxvk.sln diff --git a/.gitignore b/.gitignore index 2b7c63153..003ac7fd3 100644 --- a/.gitignore +++ b/.gitignore @@ -35,3 +35,4 @@ .vscode/ /external **/_batch_output/** +/public/bin/ diff --git a/README.md b/README.md index 791711c39..d468bd246 100644 --- a/README.md +++ b/README.md @@ -68,7 +68,8 @@ While dxvk-remix is a fork of DXVK, please report bugs encountered with dxvk-rem ## Project Documentation - [Rtx Options](/RtxOptions.md) -- [Terrain System](/documentation/TerrainSystem.md) - [Anti-Culling System](/documentation/AntiCullingSystem.md) - [Foliage System](/documentation/FoliageSystem.md) +- [Opacity Micromap](/documentation/OpacityMicromap.md) +- [Terrain System](/documentation/TerrainSystem.md) - [Unit Test](/documentation/UnitTest.md) diff --git a/RELEASE b/RELEASE index 9442d91d0..45fb4f951 100644 --- a/RELEASE +++ b/RELEASE @@ -20,4 +20,4 @@ # DEALINGS IN THE SOFTWARE. ############################################################################# -1.9.3 \ No newline at end of file +1.9.4 diff --git a/RtxOptions.md b/RtxOptions.md index be2ac49f9..a63f24fa4 100644 --- a/RtxOptions.md +++ b/RtxOptions.md @@ -117,6 +117,7 @@ Tables below enumerate all the options and their defaults set by RTX Remix. Note |rtx.debugView.composite.compositeViewIdx|int|0|Index of a composite view to show when Composite Debug View is enabled\. The index must be a a valid value from CompositeDebugView enumeration\. Value of 0 disables Composite Debug View\.| |rtx.debugView.debugViewIdx|int|0|Index of a debug view to show when Debug View is enabled\. The index must be a valid value from DEBUG\_VIEW\_\* macro defined indices\. Value of 0 disables Debug View\.| |rtx.debugView.displayType|int|0|| +|rtx.debugView.enableGammaCorrection|bool|False|Enables gamma correction of a debug view value\.| |rtx.debugView.enablePseudoColor|bool|False|Enables RGB color coding of a scalar debug view value\.| |rtx.debugView.evMaxValue|int|4|| |rtx.debugView.evMinValue|int|-4|| @@ -167,7 +168,11 @@ Tables below enumerate all the options and their defaults set by RTX Remix. Note |rtx.di.stealBoundaryPixelSamplesWhenOutsideOfScreen|bool|True|Steal screen boundary samples when a hit point is outside the screen\.| |rtx.displacement.displacementFactor|float|1|Scaling factor for all displacement maps| |rtx.displacement.enableDirectLighting|bool|True|Whether direct lighting accounts for displacement mapping| +|rtx.displacement.enableIndirectHit|bool|False|Whether indirect ray hits account for displacement mapping \(Enabling this is expensive\. Without it, non\-perfect reflections of displaced objects will not show displacement\.\)| |rtx.displacement.enableIndirectLighting|bool|True|Whether indirect lighting accounts for displacement mapping| +|rtx.displacement.enableNEECache|bool|True|Whether the NEE cache accounts for displacement mapping| +|rtx.displacement.enablePSR|bool|False|Enable PSR \(perfect reflections\) for materials with displacement\. Rays that have been perfectly reflected off a POM surface will not collide correctly with other parts of that same surface\.| +|rtx.displacement.enableReSTIRGI|bool|True|Whether ReSTIR GI accounts for displacement mapping| |rtx.dlfg.enable|bool|True|Enables DLSS 3\.0 frame generation which generates interpolated frames to increase framerate at the cost of slightly more latency\.| |rtx.dlssEnhancementDirectLightMaxValue|float|10|The maximum strength of direct lighting enhancement\.| |rtx.dlssEnhancementDirectLightPower|float|0.7|The overall strength of direct lighting enhancement\.| @@ -312,6 +317,7 @@ Tables below enumerate all the options and their defaults set by RTX Remix. Note |rtx.lightConversionEqualityDirectionThreshold|float|0.99|The lower cosine angle threshold between two directions used to determine if two directional lights as the same light when uniquely identifying legacy lights for conversion\.| |rtx.lightConversionEqualityDistanceThreshold|float|0.05|The upper distance threshold between two positions used to determine if two positional lights as the same light when uniquely identifying legacy lights for conversion\.| |rtx.lightConversionSphereLightFixedRadius|float|4|The fixed radius in world units to use for legacy lights converted to sphere lights \(currently point and spot lights will convert to sphere lights\)\. Use caution with large light radii as many legacy lights will be placed close to geometry and intersect it, causing suboptimal light sampling performance or other visual artifacts \(lights clipping through walls, etc\)\.| +|rtx.lights.enableDebugMode|bool|False|Enables light debug visualization\.| |rtx.localtonemap.boostLocalContrast|bool|False|Boosts contrast on local features\.| |rtx.localtonemap.displayMip|int|0|Bottom mip level of tone map pyramid\.| |rtx.localtonemap.exposure|float|0.75|Exposure factor applied on average exposure\.| @@ -535,7 +541,8 @@ Tables below enumerate all the options and their defaults set by RTX Remix. Note |rtx.stochasticAlphaBlendShareNeighbors|bool|True|Share result with other pixels to accelerate search\.| |rtx.stochasticAlphaBlendUseNeighborSearch|bool|True|Get radiance from neighbor opaque pixels\.| |rtx.stochasticAlphaBlendUseRadianceVolume|bool|True|Get radiance from radiance volume\.| -|rtx.subsurface.enableThinOpaque|bool|True|Enable thin opaque material\. The materials with th in opaque properties will fallback to normal opaque material\.| +|rtx.subsurface.enableThinOpaque|bool|True|Enable thin opaque material\. The materials withthin opaque properties will fallback to normal opaque material\.| +|rtx.subsurface.enableTextureMaps|bool|True|Enable texture maps such as thickness map or scattering albedo map\. The corresponding subsurface properties will fallback to per\-material constants if this is disabled\.| |rtx.subsurface.surfaceThicknessScale|float|1|Scalar of the subsurface thickness\.| |rtx.taauPreset|int|1|Adjusts TAA\-U scaling factor, trades quality for performance\.| |rtx.temporalAA.colorClampingFactor|float|1|A scalar factor to apply to the standard deviation of the neighborhood of pixels in the color signal used for clamping\. Should be in the range 0\-infinity\.
This value essentially represents how many standard deviations of tolerance from the current frame's colors around each pixel pixel the temporally accumulated color signal may have\.
Higher values will cause more ghosting whereas lower values may reduce ghosting but will impact image quality \(less ability to upscale effectively\) and reduce stability \(more jittering\)\.| diff --git a/documentation/FoliageSystem.md b/documentation/FoliageSystem.md index 3a67c7a0f..223c41504 100644 --- a/documentation/FoliageSystem.md +++ b/documentation/FoliageSystem.md @@ -10,6 +10,10 @@ User Instructions: b. [Subsurface Measurement Distance]: The thickness of the foliage surface in the range [0, 16]. Incidence radiance is attenuated proportionally to the thickness. The unit is [mm]. c. [Subsurface Single Scattering Albedo]: The coefficient determines how much energy is scattered when trace through subsurface materials. d. [Subsurface Volumetric Anisotropy]: The anisotropy of the scattering phase function (-1 being backscattering, 0 being isotropic, 1 being forward scattering). +4. Optional: Setup Foliage Texture Maps: + a. [Subsurface Transmittance Color Map] + b. [Subsurface Measurement Distance Map] + c. [Subsurface Single Scattering Albedo Map] Real-time debug interface: 1. Debugging View: Enable [Is Thin Opaque](../RtxOptions.md) or [rtx.debugView.debugViewIdx = 800] to verify if thin opaque materials are correctly set up. diff --git a/documentation/OpacityMicromap.md b/documentation/OpacityMicromap.md new file mode 100644 index 000000000..e64534aa1 --- /dev/null +++ b/documentation/OpacityMicromap.md @@ -0,0 +1,14 @@ +# Opacity Micromap + +Opacity Micromap (OMM) optimizes ray tracing by skipping transparent surface ray interactions due to opacity texture cutouts. OMMs encode opacity at a microtriangle level in a triangle and there can be thousands of microtriangles generated per a triangle. This is similar to many texels from a texture being applied to a triangle. OMMs need to be pre-built for a triangle and are encoded during a bottom level acceleration structure (BLAS) construction for a GPU driver to use during ray tracing. Any transparent parts of the surface due to opacity texture and/or texture and vertex blending will be skipped by the driver. Without OMMs such hits would be returned as opaque hits to the ray tracing shader code and depend on the ray tracer to resolve the opacity and skip the hit, returning the execution to continue ray tracing to the driver. OMMs avoid this unnecessary roundtrip altogether and, thus, provide a performance speed up. In cases, where there is a lot of triangles with opacity cutouts and, especially, with multiple layers of such geometry (i.e. for fences, particles, foliage, etc.) the performance uplift in Remix can be in the order of 10% or higher. While older NVIDIA architectures benefit from this feature as well, the speed ups due to OMMs is higher on Ada (40**) GPUs. + +OMMs are enabled by default in Remix and generated at runtime. This can be controlled via GUI or rtx.conf parameter [rtx.opacityMicromap.enable](../RtxOptions.md). OMMs are generated at runtime at a throttled pace so as to limit amount of GPU resources being used for them. It can take several seconds to generate OMMs for a scene. OMMs require enough free VRAM to get generated and, thus, their generation is dependant on enough VRAM being available. Disabling OMMs also releases the generated OMMs. Therefore, if you want to double check performance impact ON/OFF, it is advised to simply disable OMMs being bound via [rtx.opacityMicromap.enableBinding](../RtxOptions.md) so that they are not release in the process. + +To take the best advantage of OMMs, author your assets considering following: +- Make transparent regions of assets resolve to opacity of 0 (i.e. fully transparent). +- Reuse UVs across triangles so that a same OMM can apply to multiple triangles in a geometry. Repeating textures are your friend. This reduces OMM build times and runtime memory cost. +- Avoid thin triangles. +- Avoid generating triangles with very high frequency of opaque and transparent regions changing back and forth across a triangle (i.e. a mesh in a high density fence, but represented with only few triangles). In such cases, tesselate your geometry further to lower the opacity frequency within a triangle. OMMs in Remix are generated roughly at up to 256x256 microtriangles per triangle. It is important that a good portion of such microtriangles correspond to fully transparent regions. If the underlying opacity for a triangle has a higher frequency of opaque and transparent features than the 256x256 and microtriangles cover areas that are both opaque and transparent, such microtriangles will be marked as non-transparent. In that case, there will be no speed up since the geometry is essentially treated as opaque by the driver. Therefore, make sure your geometry is tessellated enough such that opaque/transparent regions appear at a frequency lower than that and, preferably, end up with multiple neighboring microtriangles falling into the same opaque/transparent category. + +Caveats: +- Similar to other instance tracking cases in Remix, the runtime hashes OMM signatures and uses the hashes to differentiate among the OMMs. However, although it's rarely the case, but should two OMMs hash collide they will be treated as equal. This will result in an incorrect OMM being bound for a triangle and, thus, having wrong opacity cutouts being applied. In that case, a triangle can appear transparent in areas where it should be opaque. If that happens, tag such texture as [rtx.opacityMicromapIgnoreTextures](../RtxOptions.md) to avoid generating OMMs for them and report this as an issue on github. \ No newline at end of file diff --git a/dxvk.conf b/dxvk.conf index 3fcdd67e7..f50dfd991 100644 --- a/dxvk.conf +++ b/dxvk.conf @@ -365,7 +365,7 @@ d3d9.adapterOverride = 0 # Supported values: # - True/False -# d3d9.floatEmulation = +# d3d9.floatEmulation = Auto # Enable dialog box mode diff --git a/meson.build b/meson.build index 42c0efd28..ef4ec6f6c 100644 --- a/meson.build +++ b/meson.build @@ -155,6 +155,9 @@ if not dxvk_is_msvc endif endif +remix_api_include_path = include_directories('./public/include') +add_global_arguments('/DREMIX_LIBRARY_EXPORTS=1', language : 'cpp') + dxvk_include_dirs = [ './include', './include/vulkan/include' @@ -391,6 +394,10 @@ boost_include_path = include_directories('external/nv_usd/include/boost-1_78/') usd_include_paths = [nv_usd_include_path, boost_include_path] +vk_include_path = include_directories('./include/vulkan/include') + +lssusd_include_paths = [nv_usd_include_path, boost_include_path, vk_include_path] + nvapi_include_paths = include_directories(join_paths(meson.global_source_root(), 'external/nvapi')) nvapi_lib_path = join_paths(meson.global_source_root(), 'external/nvapi/amd64') nvapi_lib = dxvk_compiler.find_library('nvapi64', dirs : nvapi_lib_path) diff --git a/packman-external.xml b/packman-external.xml index 197cac5c6..4e5106a12 100644 --- a/packman-external.xml +++ b/packman-external.xml @@ -20,7 +20,7 @@ - + @@ -41,6 +41,6 @@ - + diff --git a/public/include/remix/remix.h b/public/include/remix/remix.h new file mode 100644 index 000000000..0c0458f05 --- /dev/null +++ b/public/include/remix/remix.h @@ -0,0 +1,734 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#pragma once + +#include "remix_c.h" + +#include +#include +#include +#include + +#ifndef REMIXAPI_ASSERT +#include +#define REMIXAPI_ASSERT(mustBeTrue) assert(mustBeTrue) +#endif + +namespace remix { + namespace detail { + template< typename T > + struct Result { + Result(T&& value) + : m_value { std::forward< T >(value) } + , m_status { REMIXAPI_ERROR_CODE_SUCCESS } { + } + + Result(remixapi_ErrorCode error) + : m_value {} + , m_status { error } { + REMIXAPI_ASSERT(error != REMIXAPI_ERROR_CODE_SUCCESS); + } + + Result(const Result&) = delete; + Result(Result&&) = delete; + Result& operator=(const Result&) = delete; + Result& operator=(Result&&) = delete; + + operator bool() const { + return m_status == REMIXAPI_ERROR_CODE_SUCCESS; + } + + T& value() { + REMIXAPI_ASSERT(bool { *this }); + return m_value; + } + + const T& value() const { + REMIXAPI_ASSERT(bool { *this }); + return m_value; + } + + remixapi_ErrorCode status() const { + return m_status; + } + + const T& operator*() const { + return value(); + } + T& operator*() { + return value(); + } + const T* operator->() const { + return &value(); + } + T* operator->() { + return &value(); + } + + private: + const remixapi_ErrorCode m_status; + T m_value; + }; + + template<> + struct Result< void > { + Result(remixapi_ErrorCode error) : m_status { error } { } + + Result(const Result&) = delete; + Result(Result&&) = delete; + Result& operator=(const Result&) = delete; + Result& operator=(Result&&) = delete; + + operator bool() const { + return m_status == REMIXAPI_ERROR_CODE_SUCCESS; + } + + remixapi_ErrorCode status() const { + return m_status; + } + + private: + const remixapi_ErrorCode m_status; + }; + + template< typename T > + void assign_if(remixapi_Bool& hasvalue, T& value, const std::optional< T >& src) { + if (src) { + hasvalue = true; + value = src.value(); + } else { + hasvalue = false; + } + } + } + + template< typename T > + using Result = detail::Result< T >; + + using StructType = remixapi_StructType; + using Float2D = remixapi_Float2D; + using Float3D = remixapi_Float3D; + using Float4D = remixapi_Float4D; + using Transform = remixapi_Transform; + + + + struct MaterialInfo; + struct MeshInfo; + struct CameraInfo; + struct InstanceInfo; + struct LightInfo; + namespace detail { + struct dxvk_ExternalSwapchain; + struct dxvk_VkImage; + } + + struct Interface { + HMODULE m_RemixDLL { nullptr }; + remixapi_Interface m_CInterface {}; + + // Functions + Result< void > Shutdown(); + Result< remixapi_MaterialHandle > CreateMaterial(const MaterialInfo& info); + Result< void > DestroyMaterial(remixapi_MaterialHandle handle); + Result< remixapi_MeshHandle > CreateMesh(const MeshInfo& info); + Result< void > DestroyMesh(remixapi_MeshHandle handle); + Result< void > SetupCamera(const CameraInfo& info); + Result< void > DrawInstance(const InstanceInfo& info); + Result< remixapi_LightHandle > CreateLight(const LightInfo& info); + Result< void > DestroyLight(remixapi_LightHandle handle); + Result< void > DrawLightInstance(remixapi_LightHandle handle); + Result< void > SetConfigVariable(const char* key, const char* value); + + // DXVK interoperability + Result< IDirect3D9Ex* > dxvk_CreateD3D9(bool disableSrgbConversionForOutput); + Result< void > dxvk_RegisterD3D9Device(IDirect3DDevice9Ex* d3d9Device); + Result< detail::dxvk_ExternalSwapchain > dxvk_GetExternalSwapchain(); + Result< detail::dxvk_VkImage > dxvk_GetVkImage(IDirect3DSurface9* source); + Result< void > dxvk_CopyRenderingOutput(IDirect3DSurface9* destination, + remixapi_dxvk_CopyRenderingOutputType type); + }; + + namespace lib { + // Helper function to load a .dll of Remix, and initialize it. + // pRemixD3D9DllPath is a path to .dll file, e.g. "C:\dxvk-remix-nv\public\bin\d3d9.dll" + // TODO: wchar_t / char + [[nodiscard]] inline Result< Interface > loadRemixDllAndInitialize(const char* pRemixD3D9DllPath) { + { + auto lastSlash = std::string_view { pRemixD3D9DllPath }.find_last_of("/\\"); + if (lastSlash != std::string::npos) { + SetDllDirectoryA( + std::string { std::string_view{ pRemixD3D9DllPath }.substr(0, lastSlash) } + .c_str()); + } + } + + if (HMODULE remixDll = LoadLibraryA(pRemixD3D9DllPath)) { + auto pfn_InitializeLibrary = reinterpret_cast( + GetProcAddress(remixDll, "remixapi_InitializeLibrary")); + + if (pfn_InitializeLibrary) { + remixapi_InitializeLibraryInfo info = {}; + { + info.sType = REMIXAPI_STRUCT_TYPE_INITIALIZE_LIBRARY_INFO; + info.version = REMIXAPI_VERSION_MAKE(REMIXAPI_VERSION_MAJOR, + REMIXAPI_VERSION_MINOR, + REMIXAPI_VERSION_PATCH); + } + + remixapi_Interface interfaceInC = {}; + remixapi_ErrorCode status = pfn_InitializeLibrary(&info, &interfaceInC); + + if (status != REMIXAPI_ERROR_CODE_SUCCESS) { + return status; + } + + remix::Interface interfaceInCpp = {}; + { + interfaceInCpp.m_RemixDLL = remixDll; + interfaceInCpp.m_CInterface = interfaceInC; + } + return interfaceInCpp; + } + + return REMIXAPI_ERROR_CODE_GET_PROC_ADDRESS_FAILURE; + } + + return REMIXAPI_ERROR_CODE_LOAD_LIBRARY_FAILURE; + } + + inline void shutdownAndUnloadRemixDll(Interface& interfaceInCpp) { + interfaceInCpp.Shutdown(); + if (interfaceInCpp.m_RemixDLL) { + FreeLibrary(interfaceInCpp.m_RemixDLL); + } + interfaceInCpp = {}; + } + } + + + + inline Result< void > Interface::Shutdown() { + if (m_CInterface.Shutdown) { + return m_CInterface.Shutdown(); + } + return REMIXAPI_ERROR_CODE_SUCCESS; + } + + inline Result< void > Interface::SetConfigVariable(const char* key, const char* value) { + return m_CInterface.SetConfigVariable(key, value); + } + + + + struct MaterialInfoOpaqueEXT : remixapi_MaterialInfoOpaqueEXT { + MaterialInfoOpaqueEXT() { + sType = REMIXAPI_STRUCT_TYPE_MATERIAL_INFO_OPAQUE_EXT; + pNext = nullptr; + roughnessTexture = {}; + metallicTexture = {}; + anisotropy = 0.0f; + albedoConstant = { 1.0f, 1.0f, 1.0f }; + opacityConstant = 1.0f; + roughnessConstant = 1.0f; + metallicConstant = 0.0f; + thinFilmThickness_hasvalue = false; + thinFilmThickness_value = 200.f; + alphaIsThinFilmThickness = false; + heightTexture = {}; + heightTextureStrength = 0.0f; + useDrawCallAlphaState = true; + blendType_hasvalue = false; + blendType_value = 0; + invertedBlend = false; + alphaTestType = 7; + alphaReferenceValue = 0; + static_assert(sizeof remixapi_MaterialInfoOpaqueEXT == 112); + } + + void set_roughnessTexture(std::filesystem::path v) { + cpp_roughnessTexture = std::move(v); + roughnessTexture = cpp_roughnessTexture.c_str(); + } + void set_metallicTexture(std::filesystem::path v) { + cpp_metallicTexture = std::move(v); + metallicTexture = cpp_metallicTexture.c_str(); + } + void set_heightTexture(std::filesystem::path v) { + cpp_heightTexture = std::move(v); + heightTexture = cpp_heightTexture.c_str(); + } + void set_thinFilmThickness(const std::optional< float >& v) { + detail::assign_if(thinFilmThickness_hasvalue, thinFilmThickness_value, v); + } + void set_blendType(const std::optional< int >& v) { + detail::assign_if(blendType_hasvalue, blendType_value, v); + } + + private: + std::filesystem::path cpp_roughnessTexture {}; + std::filesystem::path cpp_metallicTexture {}; + std::filesystem::path cpp_heightTexture {}; + }; + + // Can be linked to MaterialInfoOpaqueEXT + struct MaterialInfoOpaqueSubsurfaceEXT : remixapi_MaterialInfoOpaqueSubsurfaceEXT { + MaterialInfoOpaqueSubsurfaceEXT() { + sType = REMIXAPI_STRUCT_TYPE_MATERIAL_INFO_OPAQUE_SUBSURFACE_EXT; + pNext = nullptr; + subsurfaceTransmittanceTexture = {}; + subsurfaceThicknessTexture = {}; + subsurfaceSingleScatteringAlbedoTexture = {}; + subsurfaceTransmittanceColor = { 0.5f, 0.5f, 0.5f }; + subsurfaceMeasurementDistance = 0.0f; + subsurfaceSingleScatteringAlbedo = { 0.5f, 0.5f, 0.5f };; + subsurfaceVolumetricAnisotropy = 0.0f; + static_assert(sizeof remixapi_MaterialInfoOpaqueSubsurfaceEXT == 72); + } + + void set_subsurfaceTransmittanceTexture(std::filesystem::path v) { + cpp_subsurfaceTransmittanceTexture = std::move(v); + subsurfaceTransmittanceTexture = cpp_subsurfaceTransmittanceTexture.c_str(); + } + void set_subsurfaceThicknessTexture(std::filesystem::path v) { + cpp_subsurfaceThicknessTexture = std::move(v); + subsurfaceThicknessTexture = cpp_subsurfaceThicknessTexture.c_str(); + } + void set_subsurfaceSingleScatteringAlbedoTexture(std::filesystem::path v) { + cpp_subsurfaceSingleScatteringAlbedoTexture = std::move(v); + subsurfaceSingleScatteringAlbedoTexture = cpp_subsurfaceSingleScatteringAlbedoTexture.c_str(); + } + + private: + std::filesystem::path cpp_subsurfaceTransmittanceTexture {}; + std::filesystem::path cpp_subsurfaceThicknessTexture {}; + std::filesystem::path cpp_subsurfaceSingleScatteringAlbedoTexture {}; + }; + + struct MaterialInfoTranslucentEXT : remixapi_MaterialInfoTranslucentEXT { + MaterialInfoTranslucentEXT() { + sType = REMIXAPI_STRUCT_TYPE_MATERIAL_INFO_TRANSLUCENT_EXT; + pNext = nullptr; + transmittanceTexture = {}; + refractiveIndex = 1.3f; + transmittanceColor = { 0.97f, 0.97f, 0.97f }; + transmittanceMeasurementDistance = 1.0f; + thinWallThickness_hasvalue = false; + thinWallThickness_value = 0.001f; + useDiffuseLayer = false; + static_assert(sizeof remixapi_MaterialInfoTranslucentEXT == 56); + } + + void set_transmittanceTexture(std::filesystem::path v) { + cpp_transmittanceTexture = std::move(v); + transmittanceTexture = cpp_transmittanceTexture.c_str(); + } + void set_thinWallThickness(const std::optional< float >& v) { + detail::assign_if(thinWallThickness_hasvalue, thinWallThickness_value, v); + } + + private: + std::filesystem::path cpp_transmittanceTexture {}; + }; + + struct MaterialInfoPortalEXT : remixapi_MaterialInfoPortalEXT { + MaterialInfoPortalEXT() { + sType = REMIXAPI_STRUCT_TYPE_MATERIAL_INFO_PORTAL_EXT; + pNext = nullptr; + rayPortalIndex = 0; + rotationSpeed = 0.0f; + static_assert(sizeof remixapi_MaterialInfoPortalEXT == 24); + } + }; + + struct MaterialInfo : remixapi_MaterialInfo { + MaterialInfo() { + sType = REMIXAPI_STRUCT_TYPE_MATERIAL_INFO; + pNext = nullptr; + hash = 0; + albedoTexture = {}; + normalTexture = {}; + tangentTexture = {}; + emissiveTexture = {}; + emissiveIntensity = 0.0f; + emissiveColorConstant = { 0.0f, 0.0f, 0.0f }; + spriteSheetRow = 1; + spriteSheetCol = 1; + spriteSheetFps = 0; + filterMode = 1; // Linear + wrapModeU = 0; // Repeat + wrapModeV = 0; // Repeat + static_assert(sizeof remixapi_MaterialInfo == 80); + } + + void set_albedoTexture(std::filesystem::path v) { + cpp_albedoTexture = std::move(v); + albedoTexture = cpp_albedoTexture.c_str(); + } + void set_normalTexture(std::filesystem::path v) { + cpp_normalTexture = std::move(v); + normalTexture = cpp_normalTexture.c_str(); + } + void set_tangentTexture(std::filesystem::path v) { + cpp_tangentTexture = std::move(v); + tangentTexture = cpp_tangentTexture.c_str(); + } + void set_emissiveTexture(std::filesystem::path v) { + cpp_emissiveTexture = std::move(v); + emissiveTexture = cpp_emissiveTexture.c_str(); + } + + private: + std::filesystem::path cpp_albedoTexture {}; + std::filesystem::path cpp_normalTexture {}; + std::filesystem::path cpp_tangentTexture {}; + std::filesystem::path cpp_emissiveTexture {}; + }; + + inline Result< remixapi_MaterialHandle > Interface::CreateMaterial(const MaterialInfo& info) { + remixapi_MaterialHandle handle = nullptr; + remixapi_ErrorCode status = m_CInterface.CreateMaterial(&info, &handle); + if (status != REMIXAPI_ERROR_CODE_SUCCESS) { + return status; + } + return handle; + } + + inline Result< void > Interface::DestroyMaterial(remixapi_MaterialHandle handle) { + return m_CInterface.DestroyMaterial(handle); + } + + + + struct MeshInfo : remixapi_MeshInfo { + MeshInfo() { + sType = REMIXAPI_STRUCT_TYPE_MESH_INFO; + pNext = nullptr; + hash = 0; + surfaces_values = {}; + surfaces_count = 0; + static_assert(sizeof remixapi_MeshInfo == 40); + } + }; + + inline Result< remixapi_MeshHandle > Interface::CreateMesh(const MeshInfo& info) { + remixapi_MeshHandle handle = nullptr; + remixapi_ErrorCode status = m_CInterface.CreateMesh(&info, &handle); + if (status != REMIXAPI_ERROR_CODE_SUCCESS) { + return status; + } + return handle; + } + + inline Result< void > Interface::DestroyMesh(remixapi_MeshHandle handle) { + return m_CInterface.DestroyMesh(handle); + } + + + + using CameraType = remixapi_CameraType; + + // Ignores view / projection matrices from CameraInfo + // by recalculating them from the given arguments in this struct. + struct CameraInfoParameterizedEXT : remixapi_CameraInfoParameterizedEXT { + CameraInfoParameterizedEXT() { + sType = { REMIXAPI_STRUCT_TYPE_CAMERA_INFO_PARAMETERIZED_EXT }; + pNext = { nullptr }; + position = { 0, 0, 0 }; + forward = { 0, 0, 1 }; + up = { 0, 1, 0 }; + right = { 1, 0, 0 }; + fovYInDegrees = 75.f; + aspect = 16.f / 9.f; + nearPlane = 0.1f; + farPlane = 1000.f; + static_assert(sizeof remixapi_CameraInfoParameterizedEXT == 80); + } + }; + + struct CameraInfo : remixapi_CameraInfo { + CameraInfo() { + sType = { REMIXAPI_STRUCT_TYPE_CAMERA_INFO }; + pNext = { nullptr }; + type = { REMIXAPI_CAMERA_TYPE_WORLD }; + view[0][0] = view[1][1] = view[2][2] = view[3][3] = 1.f; + projection[0][0] = projection[1][1] = projection[2][2] = projection[3][3] = 1.f; + static_assert(sizeof remixapi_CameraInfo == 152); + } + }; + + inline Result< void > Interface::SetupCamera(const CameraInfo& info) { + return m_CInterface.SetupCamera(&info); + } + + + + struct InstanceInfoBoneTransformsEXT : remixapi_InstanceInfoBoneTransformsEXT { + InstanceInfoBoneTransformsEXT() { + sType = REMIXAPI_STRUCT_TYPE_INSTANCE_INFO_BONE_TRANSFORMS_EXT; + pNext = nullptr; + boneTransforms_count = 0; + boneTransforms_values = {}; + static_assert(sizeof remixapi_InstanceInfoBoneTransformsEXT == 32); + } + }; + + struct InstanceInfoBlendEXT : remixapi_InstanceInfoBlendEXT { + InstanceInfoBlendEXT() { + sType = REMIXAPI_STRUCT_TYPE_INSTANCE_INFO_BLEND_EXT; + pNext = nullptr; + alphaTestEnabled = false; + alphaTestReferenceValue = 0; + alphaTestCompareOp = 7 /* VK_COMPARE_OP_ALWAYS */; + alphaBlendEnabled = false; + srcColorBlendFactor = 1 /* VK_BLEND_FACTOR_ONE */; + dstColorBlendFactor = 0 /* VK_BLEND_FACTOR_ZERO */; + colorBlendOp = 0 /* VK_BLEND_OP_ADD */; + textureColorArg1Source = 1 /* RtTextureArgSource::Texture */; + textureColorArg2Source = 0 /* RtTextureArgSource::None */; + textureColorOperation = 3 /* DxvkRtTextureOperation::Modulate */; + textureAlphaArg1Source = 1 /* RtTextureArgSource::Texture */; + textureAlphaArg2Source = 0 /* RtTextureArgSource::None */; + textureAlphaOperation = 1 /* DxvkRtTextureOperation::SelectArg1 */; + tFactor = 0XFFFFFFFF; + isTextureFactorBlend = false; + static_assert(sizeof remixapi_InstanceInfoBlendEXT == 80); + } + }; + + struct InstanceInfoObjectPickingEXT : remixapi_InstanceInfoObjectPickingEXT { + InstanceInfoObjectPickingEXT() { + sType = REMIXAPI_STRUCT_TYPE_INSTANCE_INFO_OBJECT_PICKING_EXT; + pNext = nullptr; + objectPickingValue = 0; + } + }; + + using InstanceCategoryBit = remixapi_InstanceCategoryBit; + using InstanceCategoryFlags = remixapi_InstanceCategoryFlags; + + struct InstanceInfo : remixapi_InstanceInfo { + InstanceInfo() { + sType = REMIXAPI_STRUCT_TYPE_INSTANCE_INFO; + pNext = nullptr; + categoryFlags = 0; + mesh = 0; + transform = {}; + doubleSided = false; + static_assert(sizeof remixapi_InstanceInfo == 88); + } + }; + + inline Result< void > Interface::DrawInstance(const InstanceInfo& info) { + return m_CInterface.DrawInstance(&info); + } + + + + namespace detail { + inline remixapi_LightInfoLightShaping defaultLightShaping() { + remixapi_LightInfoLightShaping shaping {}; + { + shaping.primaryAxis = { 0.0f, 0.0f, 1.0f }; + shaping.coneAngleDegrees = 180.0f; + shaping.coneSoftness = 0.0f; + shaping.focusExponent = 0.0f; + } + return shaping; + }; + } + + using LightInfoLightShaping = remixapi_LightInfoLightShaping; + + struct LightInfoSphereEXT : remixapi_LightInfoSphereEXT { + LightInfoSphereEXT() { + sType = REMIXAPI_STRUCT_TYPE_LIGHT_INFO_SPHERE_EXT; + pNext = nullptr; + position = { 0.0f, 0.0f, 0.0f }; + radius = 0.05f; + shaping_hasvalue = false; + shaping_value = detail::defaultLightShaping(); + static_assert(sizeof remixapi_LightInfoSphereEXT == 64); + } + + void set_shaping(const std::optional< remixapi_LightInfoLightShaping >& v) { + detail::assign_if(shaping_hasvalue, shaping_value, v); + } + }; + + struct LightInfoRectEXT : remixapi_LightInfoRectEXT { + LightInfoRectEXT() { + sType = REMIXAPI_STRUCT_TYPE_LIGHT_INFO_RECT_EXT; + pNext = nullptr; + position = { 0.0f, 0.0f, 0.0f }; + xAxis = { 1.0f, 0.0f, 0.0f }; + xSize = 1.0f; + yAxis = { 0.0f, 1.0f, 0.0f }; + ySize = 1.0f; + shaping_hasvalue = false; + shaping_value = detail::defaultLightShaping(); + static_assert(sizeof remixapi_LightInfoRectEXT == 88); + } + + void set_shaping(const std::optional< remixapi_LightInfoLightShaping >& v) { + detail::assign_if(shaping_hasvalue, shaping_value, v); + } + }; + + struct LightInfoDiskEXT : remixapi_LightInfoDiskEXT { + LightInfoDiskEXT() { + sType = REMIXAPI_STRUCT_TYPE_LIGHT_INFO_DISK_EXT; + pNext = nullptr; + position = { 0.0f, 0.0f, 0.0f }; + xAxis = { 1.0f, 0.0f, 0.0f }; + xRadius = 1.0f; + yAxis = { 0.0f, 1.0f, 0.0f }; + yRadius = 1.0f; + shaping_hasvalue = false; + shaping_value = detail::defaultLightShaping(); + static_assert(sizeof remixapi_LightInfoDiskEXT == 88); + } + + void set_shaping(const std::optional< remixapi_LightInfoLightShaping >& v) { + detail::assign_if(shaping_hasvalue, shaping_value, v); + } + }; + + struct LightInfoCylinderEXT : remixapi_LightInfoCylinderEXT { + LightInfoCylinderEXT() { + sType = REMIXAPI_STRUCT_TYPE_LIGHT_INFO_CYLINDER_EXT; + pNext = nullptr; + position = { 0.0f, 0.0f, 0.0f }; + radius = 1.0f; + axis = { 1.0f, 0.0f, 0.0f }; + axisLength = 1.0f; + static_assert(sizeof remixapi_LightInfoCylinderEXT == 48); + } + }; + + struct LightInfoDistantEXT : remixapi_LightInfoDistantEXT { + LightInfoDistantEXT() { + sType = REMIXAPI_STRUCT_TYPE_LIGHT_INFO_DISTANT_EXT; + pNext = nullptr; + direction = { 0.0f, -1.0f, 0.0f }; + angularDiameterDegrees = 0.5f; + static_assert(sizeof remixapi_LightInfoDistantEXT == 32); + } + }; + + struct LightInfoDomeEXT : remixapi_LightInfoDomeEXT { + LightInfoDomeEXT() { + sType = REMIXAPI_STRUCT_TYPE_LIGHT_INFO_DOME_EXT; + pNext = nullptr; + transform = {}; + colorTexture = {}; + static_assert(sizeof remixapi_LightInfoDomeEXT == 72); + } + + void set_colorTexture(std::filesystem::path v) { + cpp_colorTexture = std::move(v); + colorTexture = cpp_colorTexture.c_str(); + } + + private: + std::filesystem::path cpp_colorTexture {}; + }; + + struct LightInfo : remixapi_LightInfo { + LightInfo() { + sType = REMIXAPI_STRUCT_TYPE_LIGHT_INFO; + pNext = nullptr; + hash = 0; + radiance = { 1.0f, 1.0f, 1.0f }; + static_assert(sizeof remixapi_LightInfo == 40); + } + }; + + inline Result< remixapi_LightHandle > Interface::CreateLight(const LightInfo& info) { + remixapi_LightHandle handle = nullptr; + remixapi_ErrorCode status = m_CInterface.CreateLight(&info, &handle); + if (status != REMIXAPI_ERROR_CODE_SUCCESS) { + return status; + } + return handle; + } + + inline Result< void > Interface::DestroyLight(remixapi_LightHandle handle) { + return m_CInterface.DestroyLight(handle); + } + + inline Result< void > Interface::DrawLightInstance(remixapi_LightHandle handle) { + return m_CInterface.DrawLightInstance(handle); + } + + namespace detail { + struct dxvk_ExternalSwapchain { + uint64_t vkImage; + uint64_t vkSemaphoreRenderingDone; + uint64_t vkSemaphoreResumeSemaphore; + }; + + struct dxvk_VkImage { + uint64_t vkImage; + }; + } + + inline Result< IDirect3D9Ex* > Interface::dxvk_CreateD3D9(bool disableSrgbConversionForOutput) { + IDirect3D9Ex* d3d9 { nullptr }; + remixapi_ErrorCode status = m_CInterface.dxvk_CreateD3D9(disableSrgbConversionForOutput, &d3d9); + if (status != REMIXAPI_ERROR_CODE_SUCCESS) { + return status; + } + return d3d9; + } + + inline Result< void > Interface::dxvk_RegisterD3D9Device(IDirect3DDevice9Ex* d3d9Device) { + return m_CInterface.dxvk_RegisterD3D9Device(d3d9Device); + } + + inline Result< detail::dxvk_ExternalSwapchain > Interface::dxvk_GetExternalSwapchain() { + detail::dxvk_ExternalSwapchain externalSwapchain {}; + remixapi_ErrorCode status = m_CInterface.dxvk_GetExternalSwapchain( + &externalSwapchain.vkImage, + &externalSwapchain.vkSemaphoreRenderingDone, + &externalSwapchain.vkSemaphoreResumeSemaphore); + if (status != REMIXAPI_ERROR_CODE_SUCCESS) { + return status; + } + return externalSwapchain; + } + + inline Result< detail::dxvk_VkImage > Interface::dxvk_GetVkImage(IDirect3DSurface9* source) { + detail::dxvk_VkImage externalImage {}; + remixapi_ErrorCode status = m_CInterface.dxvk_GetVkImage(source, &externalImage.vkImage); + if (status != REMIXAPI_ERROR_CODE_SUCCESS) { + return status; + } + return externalImage; + } + + inline Result< void > Interface::dxvk_CopyRenderingOutput( + IDirect3DSurface9* destination, remixapi_dxvk_CopyRenderingOutputType type) { + return m_CInterface.dxvk_CopyRenderingOutput(destination, type); + } +} diff --git a/public/include/remix/remix_c.h b/public/include/remix/remix_c.h new file mode 100644 index 000000000..f5b586203 --- /dev/null +++ b/public/include/remix/remix_c.h @@ -0,0 +1,535 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef REMIX_C_H_ +#define REMIX_C_H_ + +#include +#include + + +// __stdcall convention +#define REMIXAPI_CALL __stdcall +#define REMIXAPI_PTR REMIXAPI_CALL + +#ifdef REMIX_LIBRARY_EXPORTS + #define REMIXAPI __declspec(dllexport) +#else + #define REMIXAPI __declspec(dllimport) +#endif // REMIX_LIBRARY_EXPORTS + + +#define REMIXAPI_VERSION_MAKE(major, minor, patch) ( \ + (((uint64_t)(major)) << 48) | \ + (((uint64_t)(minor)) << 16) | \ + (((uint64_t)(patch)) ) ) +#define REMIXAPI_VERSION_GET_MAJOR(version) (((uint64_t)(version) >> 48) & (uint64_t)0xFFFF) +#define REMIXAPI_VERSION_GET_MINOR(version) (((uint64_t)(version) >> 16) & (uint64_t)0xFFFFFFFF) +#define REMIXAPI_VERSION_GET_PATCH(version) (((uint64_t)(version) ) & (uint64_t)0xFFFF) + +#define REMIXAPI_VERSION_MAJOR 0 +#define REMIXAPI_VERSION_MINOR 2 +#define REMIXAPI_VERSION_PATCH 0 + + +// External +typedef struct IDirect3D9Ex IDirect3D9Ex; +typedef struct IDirect3DDevice9Ex IDirect3DDevice9Ex; +typedef struct IDirect3DSurface9 IDirect3DSurface9; + + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + + typedef enum remixapi_StructType { + REMIXAPI_STRUCT_TYPE_NONE = 0, + REMIXAPI_STRUCT_TYPE_INITIALIZE_LIBRARY_INFO = 1, + REMIXAPI_STRUCT_TYPE_MATERIAL_INFO = 2, + REMIXAPI_STRUCT_TYPE_MATERIAL_INFO_PORTAL_EXT = 3, + REMIXAPI_STRUCT_TYPE_MATERIAL_INFO_TRANSLUCENT_EXT = 4, + REMIXAPI_STRUCT_TYPE_MATERIAL_INFO_OPAQUE_EXT = 5, + REMIXAPI_STRUCT_TYPE_LIGHT_INFO = 6, + REMIXAPI_STRUCT_TYPE_LIGHT_INFO_DISTANT_EXT = 7, + REMIXAPI_STRUCT_TYPE_LIGHT_INFO_CYLINDER_EXT = 8, + REMIXAPI_STRUCT_TYPE_LIGHT_INFO_DISK_EXT = 9, + REMIXAPI_STRUCT_TYPE_LIGHT_INFO_RECT_EXT = 10, + REMIXAPI_STRUCT_TYPE_LIGHT_INFO_SPHERE_EXT = 11, + REMIXAPI_STRUCT_TYPE_MESH_INFO = 12, + REMIXAPI_STRUCT_TYPE_INSTANCE_INFO = 13, + REMIXAPI_STRUCT_TYPE_INSTANCE_INFO_BONE_TRANSFORMS_EXT = 14, + REMIXAPI_STRUCT_TYPE_INSTANCE_INFO_BLEND_EXT = 15, + REMIXAPI_STRUCT_TYPE_CAMERA_INFO = 16, + REMIXAPI_STRUCT_TYPE_CAMERA_INFO_PARAMETERIZED_EXT = 17, + REMIXAPI_STRUCT_TYPE_MATERIAL_INFO_OPAQUE_SUBSURFACE_EXT = 18, + REMIXAPI_STRUCT_TYPE_INSTANCE_INFO_OBJECT_PICKING_EXT = 19, + REMIXAPI_STRUCT_TYPE_LIGHT_INFO_DOME_EXT = 20, + } remixapi_StructType; + + typedef enum remixapi_ErrorCode { + REMIXAPI_ERROR_CODE_SUCCESS = 0, + REMIXAPI_ERROR_CODE_GENERAL_FAILURE = 1, + // WinAPI's LoadLibrary has failed + REMIXAPI_ERROR_CODE_LOAD_LIBRARY_FAILURE = 2, + REMIXAPI_ERROR_CODE_WRONG_ARGUMENTS = 3, + // Couldn't find 'remixInitialize' function in the .dll + REMIXAPI_ERROR_CODE_GET_PROC_ADDRESS_FAILURE = 4, + // CreateD3D9 / RegisterD3D9Device can be called only once + REMIXAPI_ERROR_CODE_ALREADY_EXISTS = 5, + // RegisterD3D9Device requires the device that was created with IDirect3DDevice9Ex, returned by CreateD3D9 + REMIXAPI_ERROR_CODE_REGISTERING_NON_REMIX_D3D9_DEVICE = 6, + // RegisterD3D9Device was not called + REMIXAPI_ERROR_CODE_REMIX_DEVICE_WAS_NOT_REGISTERED = 7, + REMIXAPI_ERROR_CODE_INCOMPATIBLE_VERSION = 8, + } remixapi_ErrorCode; + + typedef uint32_t remixapi_Bool; + + typedef struct remixapi_Float2D { + float x; + float y; + float z; + } remixapi_Float2D; + + typedef struct remixapi_Float3D { + float x; + float y; + float z; + } remixapi_Float3D; + + typedef struct remixapi_Float4D { + float x; + float y; + float z; + float w; + } remixapi_Float4D; + + typedef struct remixapi_Transform { + float matrix[3][4]; + } remixapi_Transform; + + typedef struct remixapi_MaterialHandle_T* remixapi_MaterialHandle; + typedef struct remixapi_MeshHandle_T* remixapi_MeshHandle; + typedef struct remixapi_LightHandle_T* remixapi_LightHandle; + + typedef const wchar_t* remixapi_Path; + + + + typedef remixapi_ErrorCode(REMIXAPI_PTR* PFN_remixapi_Shutdown)(); + + + + typedef struct remixapi_MaterialInfoOpaqueEXT { + remixapi_StructType sType; + void* pNext; + remixapi_Path roughnessTexture; + remixapi_Path metallicTexture; + float anisotropy; + remixapi_Float3D albedoConstant; + float opacityConstant; + float roughnessConstant; + float metallicConstant; + remixapi_Bool thinFilmThickness_hasvalue; + float thinFilmThickness_value; + remixapi_Bool alphaIsThinFilmThickness; + remixapi_Path heightTexture; + float heightTextureStrength; + // If true, InstanceInfoBlendEXT is used as a source for alpha state + remixapi_Bool useDrawCallAlphaState; + remixapi_Bool blendType_hasvalue; + int blendType_value; + remixapi_Bool invertedBlend; + int alphaTestType; + uint8_t alphaReferenceValue; + } remixapi_MaterialInfoOpaqueEXT; + + // Valid only if remixapi_MaterialInfo contains remixapi_MaterialInfoOpaqueEXT in pNext chain + typedef struct remixapi_MaterialInfoOpaqueSubsurfaceEXT { + remixapi_StructType sType; + void* pNext; + remixapi_Path subsurfaceTransmittanceTexture; + remixapi_Path subsurfaceThicknessTexture; + remixapi_Path subsurfaceSingleScatteringAlbedoTexture; + remixapi_Float3D subsurfaceTransmittanceColor; + float subsurfaceMeasurementDistance; + remixapi_Float3D subsurfaceSingleScatteringAlbedo; + float subsurfaceVolumetricAnisotropy; + } remixapi_MaterialInfoOpaqueSubsurfaceEXT; + + typedef struct remixapi_MaterialInfoTranslucentEXT { + remixapi_StructType sType; + void* pNext; + remixapi_Path transmittanceTexture; + float refractiveIndex; + remixapi_Float3D transmittanceColor; + float transmittanceMeasurementDistance; + remixapi_Bool thinWallThickness_hasvalue; + float thinWallThickness_value; + remixapi_Bool useDiffuseLayer; + } remixapi_MaterialInfoTranslucentEXT; + + typedef struct remixapi_MaterialInfoPortalEXT { + remixapi_StructType sType; + void* pNext; + uint8_t rayPortalIndex; + float rotationSpeed; + } remixapi_MaterialInfoPortalEXT; + + typedef struct remixapi_MaterialInfo { + remixapi_StructType sType; + void* pNext; + uint64_t hash; + remixapi_Path albedoTexture; + remixapi_Path normalTexture; + remixapi_Path tangentTexture; + remixapi_Path emissiveTexture; + float emissiveIntensity; + remixapi_Float3D emissiveColorConstant; + uint8_t spriteSheetRow; + uint8_t spriteSheetCol; + uint8_t spriteSheetFps; + uint8_t filterMode; + uint8_t wrapModeU; + uint8_t wrapModeV; + } remixapi_MaterialInfo; + + + typedef remixapi_ErrorCode(REMIXAPI_PTR* PFN_remixapi_CreateMaterial)( + const remixapi_MaterialInfo* info, + remixapi_MaterialHandle* out_handle); + + typedef remixapi_ErrorCode(REMIXAPI_PTR* PFN_remixapi_DestroyMaterial)( + remixapi_MaterialHandle handle); + + typedef struct remixapi_HardcodedVertex { + float position[3]; + float normal[3]; + float texcoord[2]; + uint32_t color; + uint32_t _pad0; + uint32_t _pad1; + uint32_t _pad2; + uint32_t _pad3; + uint32_t _pad4; + uint32_t _pad5; + uint32_t _pad6; + } remixapi_HardcodedVertex; + + typedef struct remixapi_MeshInfoSkinning { + uint32_t bonesPerVertex; + // Each tuple of 'bonesPerVertex' float-s defines a vertex. + // I.e. the size must be (bonesPerVertex * vertexCount). + const float* blendWeights_values; + uint32_t blendWeights_count; + // Each tuple of 'bonesPerVertex' uint32_t-s defines a vertex. + // I.e. the size must be (bonesPerVertex * vertexCount). + const uint32_t* blendIndices_values; + uint32_t blendIndices_count; + } remixapi_MeshInfoSkinning; + + typedef struct remixapi_MeshInfoSurfaceTriangles { + const remixapi_HardcodedVertex* vertices_values; + uint64_t vertices_count; + const uint32_t* indices_values; + uint64_t indices_count; + remixapi_Bool skinning_hasvalue; + remixapi_MeshInfoSkinning skinning_value; + remixapi_MaterialHandle material; + } remixapi_MeshInfoSurfaceTriangles; + + typedef struct remixapi_MeshInfo { + remixapi_StructType sType; + void* pNext; + uint64_t hash; + const remixapi_MeshInfoSurfaceTriangles* surfaces_values; + uint32_t surfaces_count; + } remixapi_MeshInfo; + + typedef remixapi_ErrorCode(REMIXAPI_PTR* PFN_remixapi_CreateMesh)( + const remixapi_MeshInfo* info, + remixapi_MeshHandle* out_handle); + + typedef remixapi_ErrorCode(REMIXAPI_PTR* PFN_remixapi_DestroyMesh)( + remixapi_MeshHandle handle); + + + + typedef enum remixapi_CameraType { + REMIXAPI_CAMERA_TYPE_WORLD, + REMIXAPI_CAMERA_TYPE_SKY, + REMIXAPI_CAMERA_TYPE_VIEW_MODEL, + } remixapi_CameraType; + + typedef struct remixapi_CameraInfoParameterizedEXT { + remixapi_StructType sType; + void* pNext; + remixapi_Float3D position; + remixapi_Float3D forward; + remixapi_Float3D up; + remixapi_Float3D right; + float fovYInDegrees; + float aspect; + float nearPlane; + float farPlane; + } remixapi_CameraInfoParameterizedEXT; + + typedef struct remixapi_CameraInfo { + remixapi_StructType sType; + void* pNext; + remixapi_CameraType type; + float view[4][4]; + float projection[4][4]; + } remixapi_CameraInfo; + + typedef remixapi_ErrorCode(REMIXAPI_PTR* PFN_remixapi_SetupCamera)( + const remixapi_CameraInfo* info); + + + +#define REMIXAPI_INSTANCE_INFO_MAX_BONES_COUNT 256 + + typedef struct remixapi_InstanceInfoBoneTransformsEXT { + remixapi_StructType sType; + void* pNext; + const remixapi_Transform* boneTransforms_values; + uint32_t boneTransforms_count; + } remixapi_InstanceInfoBoneTransformsEXT; + + typedef struct remixapi_InstanceInfoBlendEXT { + remixapi_StructType sType; + void* pNext; + remixapi_Bool alphaTestEnabled; + uint8_t alphaTestReferenceValue; + uint32_t alphaTestCompareOp; + remixapi_Bool alphaBlendEnabled; + uint32_t srcColorBlendFactor; + uint32_t dstColorBlendFactor; + uint32_t colorBlendOp; + uint32_t textureColorArg1Source; + uint32_t textureColorArg2Source; + uint32_t textureColorOperation; + uint32_t textureAlphaArg1Source; + uint32_t textureAlphaArg2Source; + uint32_t textureAlphaOperation; + uint32_t tFactor; + remixapi_Bool isTextureFactorBlend; + } remixapi_InstanceInfoBlendEXT; + + typedef struct remixapi_InstanceInfoObjectPickingEXT { + remixapi_StructType sType; + void* pNext; + // A value to write into REMIXAPI_DXVK_COPY_RENDERING_OUTPUT_TYPE_OBJECT_PICKING + uint32_t objectPickingValue; + } remixapi_InstanceInfoObjectPickingEXT; + + typedef enum remixapi_InstanceCategoryBit { + REMIXAPI_INSTANCE_CATEGORY_BIT_WORLD_UI = 1 << 0, + REMIXAPI_INSTANCE_CATEGORY_BIT_WORLD_MATTE = 1 << 1, + REMIXAPI_INSTANCE_CATEGORY_BIT_SKY = 1 << 2, + REMIXAPI_INSTANCE_CATEGORY_BIT_IGNORE = 1 << 3, + REMIXAPI_INSTANCE_CATEGORY_BIT_IGNORE_LIGHTS = 1 << 4, + REMIXAPI_INSTANCE_CATEGORY_BIT_IGNORE_ANTI_CULLING = 1 << 5, + REMIXAPI_INSTANCE_CATEGORY_BIT_IGNORE_MOTION_BLUR = 1 << 6, + REMIXAPI_INSTANCE_CATEGORY_BIT_IGNORE_OPACITY_MICROMAP = 1 << 7, + REMIXAPI_INSTANCE_CATEGORY_BIT_HIDDEN = 1 << 8, + REMIXAPI_INSTANCE_CATEGORY_BIT_PARTICLE = 1 << 9, + REMIXAPI_INSTANCE_CATEGORY_BIT_BEAM = 1 << 10, + REMIXAPI_INSTANCE_CATEGORY_BIT_DECAL_STATIC = 1 << 11, + REMIXAPI_INSTANCE_CATEGORY_BIT_DECAL_DYNAMIC = 1 << 12, + REMIXAPI_INSTANCE_CATEGORY_BIT_DECAL_SINGLE_OFFSET = 1 << 13, + REMIXAPI_INSTANCE_CATEGORY_BIT_DECAL_NO_OFFSET = 1 << 14, + REMIXAPI_INSTANCE_CATEGORY_BIT_ALPHA_BLEND_TO_CUTOUT = 1 << 15, + REMIXAPI_INSTANCE_CATEGORY_BIT_TERRAIN = 1 << 16, + REMIXAPI_INSTANCE_CATEGORY_BIT_ANIMATED_WATER = 1 << 17, + REMIXAPI_INSTANCE_CATEGORY_BIT_THIRD_PERSON_PLAYER_MODEL = 1 << 18, + REMIXAPI_INSTANCE_CATEGORY_BIT_THIRD_PERSON_PLAYER_BODY = 1 << 19, + } remixapi_InstanceCategoryBit; + + typedef uint32_t remixapi_InstanceCategoryFlags; + + typedef struct remixapi_InstanceInfo { + remixapi_StructType sType; + void* pNext; + remixapi_InstanceCategoryFlags categoryFlags; + remixapi_MeshHandle mesh; + remixapi_Transform transform; + remixapi_Bool doubleSided; + } remixapi_InstanceInfo; + + typedef remixapi_ErrorCode(REMIXAPI_PTR* PFN_remixapi_DrawInstance)( + const remixapi_InstanceInfo* info); + + + + typedef struct remixapi_LightInfoLightShaping { + remixapi_Float3D primaryAxis; + float coneAngleDegrees; + float coneSoftness; + float focusExponent; + } remixapi_LightInfoLightShaping; + + typedef struct remixapi_LightInfoSphereEXT { + remixapi_StructType sType; + void* pNext; + remixapi_Float3D position; + float radius; + remixapi_Bool shaping_hasvalue; + remixapi_LightInfoLightShaping shaping_value; + } remixapi_LightInfoSphereEXT; + + typedef struct remixapi_LightInfoRectEXT { + remixapi_StructType sType; + void* pNext; + remixapi_Float3D position; + remixapi_Float3D xAxis; + float xSize; + remixapi_Float3D yAxis; + float ySize; + remixapi_Bool shaping_hasvalue; + remixapi_LightInfoLightShaping shaping_value; + } remixapi_LightInfoRectEXT; + + typedef struct remixapi_LightInfoDiskEXT { + remixapi_StructType sType; + void* pNext; + remixapi_Float3D position; + remixapi_Float3D xAxis; + float xRadius; + remixapi_Float3D yAxis; + float yRadius; + remixapi_Bool shaping_hasvalue; + remixapi_LightInfoLightShaping shaping_value; + } remixapi_LightInfoDiskEXT; + + typedef struct remixapi_LightInfoCylinderEXT { + remixapi_StructType sType; + void* pNext; + remixapi_Float3D position; + float radius; + remixapi_Float3D axis; + float axisLength; + } remixapi_LightInfoCylinderEXT; + + typedef struct remixapi_LightInfoDistantEXT { + remixapi_StructType sType; + void* pNext; + remixapi_Float3D direction; + float angularDiameterDegrees; + } remixapi_LightInfoDistantEXT; + + typedef struct remixapi_LightInfoDomeEXT { + remixapi_StructType sType; + void* pNext; + remixapi_Transform transform; + remixapi_Path colorTexture; + } remixapi_LightInfoDomeEXT; + + typedef struct remixapi_LightInfo { + remixapi_StructType sType; + void* pNext; + uint64_t hash; + remixapi_Float3D radiance; + } remixapi_LightInfo; + + typedef remixapi_ErrorCode(REMIXAPI_PTR* PFN_remixapi_CreateLight)( + const remixapi_LightInfo* info, + remixapi_LightHandle* out_handle); + + typedef remixapi_ErrorCode(REMIXAPI_PTR* PFN_remixapi_DestroyLight)( + remixapi_LightHandle handle); + + + typedef remixapi_ErrorCode(REMIXAPI_PTR* PFN_remixapi_DrawLightInstance)( + remixapi_LightHandle lightHandle); + + + typedef remixapi_ErrorCode(REMIXAPI_PTR* PFN_remixapi_SetConfigVariable)( + const char* key, + const char* value); + + + + typedef remixapi_ErrorCode(REMIXAPI_PTR* PFN_remixapi_dxvk_CreateD3D9)( + remixapi_Bool disableSrgbConversionForOutput, + IDirect3D9Ex** out_pD3D9); + + typedef remixapi_ErrorCode(REMIXAPI_PTR* PFN_remixapi_dxvk_RegisterD3D9Device)( + IDirect3DDevice9Ex* d3d9Device); + + typedef remixapi_ErrorCode(REMIXAPI_PTR* PFN_remixapi_dxvk_GetExternalSwapchain)( + uint64_t* out_vkImage, + uint64_t* out_vkSemaphoreRenderingDone, + uint64_t* out_vkSemaphoreResumeSemaphore); + + typedef remixapi_ErrorCode(REMIXAPI_PTR* PFN_remixapi_dxvk_GetVkImage)( + IDirect3DSurface9* source, + uint64_t* out_vkImage); + + typedef enum remixapi_dxvk_CopyRenderingOutputType { + REMIXAPI_DXVK_COPY_RENDERING_OUTPUT_TYPE_FINAL_COLOR = 0, + REMIXAPI_DXVK_COPY_RENDERING_OUTPUT_TYPE_DEPTH = 1, + REMIXAPI_DXVK_COPY_RENDERING_OUTPUT_TYPE_NORMALS = 2, + REMIXAPI_DXVK_COPY_RENDERING_OUTPUT_TYPE_OBJECT_PICKING = 3, + } remixapi_dxvk_CopyRenderingOutputType; + + typedef remixapi_ErrorCode(REMIXAPI_PTR* PFN_remixapi_dxvk_CopyRenderingOutput)( + IDirect3DSurface9* destination, + remixapi_dxvk_CopyRenderingOutputType type); + + + + typedef struct remixapi_InitializeLibraryInfo { + remixapi_StructType sType; + void* pNext; + uint64_t version; + } remixapi_InitializeLibraryInfo; + + typedef struct remixapi_Interface { + PFN_remixapi_Shutdown Shutdown; + PFN_remixapi_CreateMaterial CreateMaterial; + PFN_remixapi_DestroyMaterial DestroyMaterial; + PFN_remixapi_CreateMesh CreateMesh; + PFN_remixapi_DestroyMesh DestroyMesh; + PFN_remixapi_SetupCamera SetupCamera; + PFN_remixapi_DrawInstance DrawInstance; + PFN_remixapi_CreateLight CreateLight; + PFN_remixapi_DestroyLight DestroyLight; + PFN_remixapi_DrawLightInstance DrawLightInstance; + PFN_remixapi_SetConfigVariable SetConfigVariable; + // DXVK interoperability + PFN_remixapi_dxvk_CreateD3D9 dxvk_CreateD3D9; + PFN_remixapi_dxvk_RegisterD3D9Device dxvk_RegisterD3D9Device; + PFN_remixapi_dxvk_GetExternalSwapchain dxvk_GetExternalSwapchain; + PFN_remixapi_dxvk_GetVkImage dxvk_GetVkImage; + PFN_remixapi_dxvk_CopyRenderingOutput dxvk_CopyRenderingOutput; + } remixapi_Interface; + + REMIXAPI remixapi_ErrorCode REMIXAPI_CALL remixapi_InitializeLibrary( + const remixapi_InitializeLibraryInfo* info, + remixapi_Interface* out_result); + +#ifdef __cplusplus +} +#endif // __cplusplus + +#endif // REMIX_C_H_ diff --git a/src/d3d11/d3d11.def b/src/d3d11/d3d11.def index 8b65655a0..d406ba0b3 100644 --- a/src/d3d11/d3d11.def +++ b/src/d3d11/d3d11.def @@ -3,3 +3,4 @@ EXPORTS D3D11CoreCreateDevice @18 D3D11CreateDevice @22 D3D11CreateDeviceAndSwapChain @23 + D3D11On12CreateDevice @24 diff --git a/src/d3d11/d3d11_buffer.cpp b/src/d3d11/d3d11_buffer.cpp index 687be0a51..9b1ee35aa 100644 --- a/src/d3d11/d3d11_buffer.cpp +++ b/src/d3d11/d3d11_buffer.cpp @@ -74,6 +74,8 @@ namespace dxvk { m_buffer = m_parent->GetDXVKDevice()->createBuffer(info, GetMemoryFlags()); m_mapped = m_buffer->getSliceHandle(); + m_mapMode = DetermineMapMode(); + // For Stream Output buffers we need a counter if (pDesc->BindFlags & D3D11_BIND_STREAM_OUTPUT) m_soCounter = CreateSoCounterBuffer(); @@ -273,6 +275,13 @@ namespace dxvk { | VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT; return device->createBuffer(info, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); } + + + D3D11_COMMON_BUFFER_MAP_MODE D3D11Buffer::DetermineMapMode() { + return (m_buffer->memFlags() & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) + ? D3D11_COMMON_BUFFER_MAP_MODE_DIRECT + : D3D11_COMMON_BUFFER_MAP_MODE_NONE; + } D3D11Buffer* GetCommonBuffer(ID3D11Resource* pResource) { diff --git a/src/d3d11/d3d11_buffer.h b/src/d3d11/d3d11_buffer.h index 3ddd61a0a..c0472fd65 100644 --- a/src/d3d11/d3d11_buffer.h +++ b/src/d3d11/d3d11_buffer.h @@ -1,5 +1,6 @@ #pragma once +#include "../dxvk/dxvk_cs.h" #include "../dxvk/dxvk_device.h" #include "../d3d10/d3d10_buffer.h" @@ -67,9 +68,7 @@ namespace dxvk { } D3D11_COMMON_BUFFER_MAP_MODE GetMapMode() const { - return (m_buffer->memFlags() & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) - ? D3D11_COMMON_BUFFER_MAP_MODE_DIRECT - : D3D11_COMMON_BUFFER_MAP_MODE_NONE; + return m_mapMode; } Rc GetBuffer() const { @@ -119,6 +118,21 @@ namespace dxvk { return &m_d3d10; } + bool HasSequenceNumber() const { + return m_mapMode != D3D11_COMMON_BUFFER_MAP_MODE_NONE + && !(m_desc.MiscFlags & D3D11_RESOURCE_MISC_DRAWINDIRECT_ARGS) + && !(m_desc.BindFlags); + } + + void TrackSequenceNumber(uint64_t Seq) { + m_seq = Seq; + } + + uint64_t GetSequenceNumber() { + return HasSequenceNumber() ? m_seq + : DxvkCsThread::SynchronizeAll; + } + /** * \brief Normalizes buffer description * @@ -130,14 +144,16 @@ namespace dxvk { private: - const D3D11_BUFFER_DESC m_desc; + D3D11_BUFFER_DESC m_desc; + D3D11_COMMON_BUFFER_MAP_MODE m_mapMode; - Rc m_buffer; - Rc m_soCounter; - DxvkBufferSliceHandle m_mapped; + Rc m_buffer; + Rc m_soCounter; + DxvkBufferSliceHandle m_mapped; + uint64_t m_seq = 0ull; - D3D11DXGIResource m_resource; - D3D10Buffer m_d3d10; + D3D11DXGIResource m_resource; + D3D10Buffer m_d3d10; BOOL CheckFormatFeatureSupport( VkFormat Format, @@ -147,6 +163,8 @@ namespace dxvk { Rc CreateSoCounterBuffer(); + D3D11_COMMON_BUFFER_MAP_MODE DetermineMapMode(); + }; diff --git a/src/d3d11/d3d11_cmdlist.cpp b/src/d3d11/d3d11_cmdlist.cpp index 0ce56211a..aeb1cd5a2 100644 --- a/src/d3d11/d3d11_cmdlist.cpp +++ b/src/d3d11/d3d11_cmdlist.cpp @@ -1,5 +1,7 @@ #include "d3d11_cmdlist.h" #include "d3d11_device.h" +#include "d3d11_buffer.h" +#include "d3d11_texture.h" namespace dxvk { @@ -58,21 +60,71 @@ namespace dxvk { for (const auto& query : m_queries) cmdList->m_queries.push_back(query); + for (const auto& resource : m_resources) + cmdList->m_resources.push_back(resource); + MarkSubmitted(); } - void D3D11CommandList::EmitToCsThread(DxvkCsThread* CsThread) { + uint64_t D3D11CommandList::EmitToCsThread(DxvkCsThread* CsThread) { + uint64_t seq = 0; + for (const auto& query : m_queries) query->DoDeferredEnd(); for (const auto& chunk : m_chunks) - CsThread->dispatchChunk(DxvkCsChunkRef(chunk)); + seq = CsThread->dispatchChunk(DxvkCsChunkRef(chunk)); + for (const auto& resource : m_resources) + TrackResourceSequenceNumber(resource, seq); + MarkSubmitted(); + return seq; } + void D3D11CommandList::TrackResourceUsage( + ID3D11Resource* pResource, + D3D11_RESOURCE_DIMENSION ResourceType, + UINT Subresource) { + m_resources.emplace_back(pResource, Subresource, ResourceType); + } + + + void D3D11CommandList::TrackResourceSequenceNumber( + const D3D11ResourceRef& Resource, + uint64_t Seq) { + ID3D11Resource* iface = Resource.Get(); + UINT subresource = Resource.GetSubresource(); + + switch (Resource.GetType()) { + case D3D11_RESOURCE_DIMENSION_UNKNOWN: + break; + + case D3D11_RESOURCE_DIMENSION_BUFFER: { + auto impl = static_cast(iface); + impl->TrackSequenceNumber(Seq); + } break; + + case D3D11_RESOURCE_DIMENSION_TEXTURE1D: { + auto impl = static_cast(iface)->GetCommonTexture(); + impl->TrackSequenceNumber(subresource, Seq); + } break; + + case D3D11_RESOURCE_DIMENSION_TEXTURE2D: { + auto impl = static_cast(iface)->GetCommonTexture(); + impl->TrackSequenceNumber(subresource, Seq); + } break; + + case D3D11_RESOURCE_DIMENSION_TEXTURE3D: { + auto impl = static_cast(iface)->GetCommonTexture(); + impl->TrackSequenceNumber(subresource, Seq); + } break; + } + } + + void D3D11CommandList::MarkSubmitted() { if (m_submitted.exchange(true) && !m_warned.exchange(true) && m_parent->GetOptions()->dcSingleUseMode) { diff --git a/src/d3d11/d3d11_cmdlist.h b/src/d3d11/d3d11_cmdlist.h index ce207aaf4..8be313ad6 100644 --- a/src/d3d11/d3d11_cmdlist.h +++ b/src/d3d11/d3d11_cmdlist.h @@ -29,19 +29,29 @@ namespace dxvk { void EmitToCommandList( ID3D11CommandList* pCommandList); - void EmitToCsThread( + uint64_t EmitToCsThread( DxvkCsThread* CsThread); - + + void TrackResourceUsage( + ID3D11Resource* pResource, + D3D11_RESOURCE_DIMENSION ResourceType, + UINT Subresource); + private: - + UINT const m_contextFlags; std::vector m_chunks; std::vector> m_queries; + std::vector m_resources; std::atomic m_submitted = { false }; std::atomic m_warned = { false }; + void TrackResourceSequenceNumber( + const D3D11ResourceRef& Resource, + uint64_t Seq); + void MarkSubmitted(); }; diff --git a/src/d3d11/d3d11_context.cpp b/src/d3d11/d3d11_context.cpp index 45af2f7bd..cc470eee2 100644 --- a/src/d3d11/d3d11_context.cpp +++ b/src/d3d11/d3d11_context.cpp @@ -473,6 +473,9 @@ namespace dxvk { cSrcSlice.offset(), sizeof(uint32_t)); }); + + if (buf->HasSequenceNumber()) + TrackBufferSequenceNumber(buf); } @@ -903,134 +906,6 @@ namespace dxvk { }); } - - void STDMETHODCALLTYPE D3D11DeviceContext::UpdateSubresource( - ID3D11Resource* pDstResource, - UINT DstSubresource, - const D3D11_BOX* pDstBox, - const void* pSrcData, - UINT SrcRowPitch, - UINT SrcDepthPitch) { - UpdateSubresource1(pDstResource, - DstSubresource, pDstBox, pSrcData, - SrcRowPitch, SrcDepthPitch, 0); - } - - - void STDMETHODCALLTYPE D3D11DeviceContext::UpdateSubresource1( - ID3D11Resource* pDstResource, - UINT DstSubresource, - const D3D11_BOX* pDstBox, - const void* pSrcData, - UINT SrcRowPitch, - UINT SrcDepthPitch, - UINT CopyFlags) { - D3D10DeviceLock lock = LockContext(); - - if (!pDstResource) - return; - - // Filter out invalid copy flags - CopyFlags &= D3D11_COPY_NO_OVERWRITE | D3D11_COPY_DISCARD; - - // We need a different code path for buffers - D3D11_RESOURCE_DIMENSION resourceType; - pDstResource->GetType(&resourceType); - - if (resourceType == D3D11_RESOURCE_DIMENSION_BUFFER) { - const auto bufferResource = static_cast(pDstResource); - const auto bufferSlice = bufferResource->GetBufferSlice(); - - VkDeviceSize offset = bufferSlice.offset(); - VkDeviceSize size = bufferSlice.length(); - - if (pDstBox != nullptr) { - offset = pDstBox->left; - size = pDstBox->right - pDstBox->left; - } - - if (!size || offset + size > bufferSlice.length()) - return; - - bool useMap = (bufferSlice.buffer()->memFlags() & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) - && (size == bufferSlice.length() || CopyFlags); - - if (useMap) { - D3D11_MAP mapType = (CopyFlags & D3D11_COPY_NO_OVERWRITE) - ? D3D11_MAP_WRITE_NO_OVERWRITE - : D3D11_MAP_WRITE_DISCARD; - - D3D11_MAPPED_SUBRESOURCE mappedSr; - if (likely(useMap = SUCCEEDED(Map(pDstResource, 0, mapType, 0, &mappedSr)))) { - std::memcpy(reinterpret_cast(mappedSr.pData) + offset, pSrcData, size); - Unmap(pDstResource, 0); - } - } - - if (!useMap) { - DxvkDataSlice dataSlice = AllocUpdateBufferSlice(size); - std::memcpy(dataSlice.ptr(), pSrcData, size); - - EmitCs([ - cDataBuffer = std::move(dataSlice), - cBufferSlice = bufferSlice.subSlice(offset, size) - ] (DxvkContext* ctx) { - ctx->updateBuffer( - cBufferSlice.buffer(), - cBufferSlice.offset(), - cBufferSlice.length(), - cDataBuffer.ptr()); - }); - } - } else { - D3D11CommonTexture* dstTexture = GetCommonTexture(pDstResource); - - if (DstSubresource >= dstTexture->CountSubresources()) - return; - - VkFormat packedFormat = dstTexture->GetPackedFormat(); - - auto formatInfo = imageFormatInfo(packedFormat); - auto subresource = dstTexture->GetSubresourceFromIndex( - formatInfo->aspectMask, DstSubresource); - - VkExtent3D mipExtent = dstTexture->MipLevelExtent(subresource.mipLevel); - - VkOffset3D offset = { 0, 0, 0 }; - VkExtent3D extent = mipExtent; - - if (pDstBox != nullptr) { - if (pDstBox->left >= pDstBox->right - || pDstBox->top >= pDstBox->bottom - || pDstBox->front >= pDstBox->back) - return; // no-op, but legal - - offset.x = pDstBox->left; - offset.y = pDstBox->top; - offset.z = pDstBox->front; - - extent.width = pDstBox->right - pDstBox->left; - extent.height = pDstBox->bottom - pDstBox->top; - extent.depth = pDstBox->back - pDstBox->front; - } - - if (!util::isBlockAligned(offset, extent, formatInfo->blockSize, mipExtent)) { - Logger::err("D3D11: UpdateSubresource1: Unaligned region"); - return; - } - - auto stagingSlice = AllocStagingBuffer(util::computeImageDataSize(packedFormat, extent)); - - util::packImageData(stagingSlice.mapPtr(0), - pSrcData, SrcRowPitch, SrcDepthPitch, 0, 0, - dstTexture->GetVkImageType(), extent, 1, - formatInfo, formatInfo->aspectMask); - - UpdateImage(dstTexture, &subresource, - offset, extent, std::move(stagingSlice)); - } - } - HRESULT STDMETHODCALLTYPE D3D11DeviceContext::UpdateTileMappings( ID3D11Resource* pTiledResource, @@ -1131,8 +1006,8 @@ namespace dxvk { return; } - const D3D11CommonTexture* dstTextureInfo = GetCommonTexture(pDstResource); - const D3D11CommonTexture* srcTextureInfo = GetCommonTexture(pSrcResource); + D3D11CommonTexture* dstTextureInfo = GetCommonTexture(pDstResource); + D3D11CommonTexture* srcTextureInfo = GetCommonTexture(pSrcResource); const DXGI_VK_FORMAT_INFO dstFormatInfo = m_parent->LookupFormat(dstDesc.Format, DXGI_VK_FORMAT_MODE_ANY); const DXGI_VK_FORMAT_INFO srcFormatInfo = m_parent->LookupFormat(srcDesc.Format, DXGI_VK_FORMAT_MODE_ANY); @@ -1195,6 +1070,9 @@ namespace dxvk { ctx->resolveImage(cDstImage, cSrcImage, region, cFormat); }); } + + if (dstTextureInfo->HasSequenceNumber()) + TrackTextureSequenceNumber(dstTextureInfo, DstSubresource); } @@ -3404,6 +3282,11 @@ namespace dxvk { cSrcBuffer.length()); } }); + + if (pDstBuffer->HasSequenceNumber()) + TrackBufferSequenceNumber(pDstBuffer); + if (pSrcBuffer->HasSequenceNumber()) + TrackBufferSequenceNumber(pSrcBuffer); } @@ -3638,6 +3521,20 @@ namespace dxvk { } } } + + if (pDstTexture->HasSequenceNumber()) { + for (uint32_t i = 0; i < pDstLayers->layerCount; i++) { + TrackTextureSequenceNumber(pDstTexture, D3D11CalcSubresource( + pDstLayers->mipLevel, pDstLayers->baseArrayLayer + i, pDstTexture->Desc()->MipLevels)); + } + } + + if (pSrcTexture->HasSequenceNumber()) { + for (uint32_t i = 0; i < pSrcLayers->layerCount; i++) { + TrackTextureSequenceNumber(pSrcTexture, D3D11CalcSubresource( + pSrcLayers->mipLevel, pSrcLayers->baseArrayLayer + i, pSrcTexture->Desc()->MipLevels)); + } + } } @@ -3668,6 +3565,106 @@ namespace dxvk { } + void D3D11DeviceContext::UpdateBuffer( + D3D11Buffer* pDstBuffer, + UINT Offset, + UINT Length, + const void* pSrcData) { + DxvkBufferSlice bufferSlice = pDstBuffer->GetBufferSlice(Offset, Length); + + if (Length <= 1024 && !(Offset & 0x3) && !(Length & 0x3)) { + // The backend has special code paths for small buffer updates, + // however both offset and size must be aligned to four bytes. + DxvkDataSlice dataSlice = AllocUpdateBufferSlice(Length); + std::memcpy(dataSlice.ptr(), pSrcData, Length); + + EmitCs([ + cDataBuffer = std::move(dataSlice), + cBufferSlice = std::move(bufferSlice) + ] (DxvkContext* ctx) { + ctx->updateBuffer( + cBufferSlice.buffer(), + cBufferSlice.offset(), + cBufferSlice.length(), + cDataBuffer.ptr()); + }); + } else { + // Otherwise, to avoid large data copies on the CS thread, + // write directly to a staging buffer and dispatch a copy + DxvkBufferSlice stagingSlice = AllocStagingBuffer(Length); + std::memcpy(stagingSlice.mapPtr(0), pSrcData, Length); + + EmitCs([ + cStagingSlice = std::move(stagingSlice), + cBufferSlice = std::move(bufferSlice) + ] (DxvkContext* ctx) { + ctx->copyBuffer( + cBufferSlice.buffer(), + cBufferSlice.offset(), + cStagingSlice.buffer(), + cStagingSlice.offset(), + cBufferSlice.length()); + }); + } + + if (pDstBuffer->HasSequenceNumber()) + TrackBufferSequenceNumber(pDstBuffer); + } + + + void D3D11DeviceContext::UpdateTexture( + D3D11CommonTexture* pDstTexture, + UINT DstSubresource, + const D3D11_BOX* pDstBox, + const void* pSrcData, + UINT SrcRowPitch, + UINT SrcDepthPitch) { + if (DstSubresource >= pDstTexture->CountSubresources()) + return; + + VkFormat packedFormat = pDstTexture->GetPackedFormat(); + + auto formatInfo = imageFormatInfo(packedFormat); + auto subresource = pDstTexture->GetSubresourceFromIndex( + formatInfo->aspectMask, DstSubresource); + + VkExtent3D mipExtent = pDstTexture->MipLevelExtent(subresource.mipLevel); + + VkOffset3D offset = { 0, 0, 0 }; + VkExtent3D extent = mipExtent; + + if (pDstBox != nullptr) { + if (pDstBox->left >= pDstBox->right + || pDstBox->top >= pDstBox->bottom + || pDstBox->front >= pDstBox->back) + return; // no-op, but legal + + offset.x = pDstBox->left; + offset.y = pDstBox->top; + offset.z = pDstBox->front; + + extent.width = pDstBox->right - pDstBox->left; + extent.height = pDstBox->bottom - pDstBox->top; + extent.depth = pDstBox->back - pDstBox->front; + } + + if (!util::isBlockAligned(offset, extent, formatInfo->blockSize, mipExtent)) { + Logger::err("D3D11: UpdateSubresource1: Unaligned region"); + return; + } + + auto stagingSlice = AllocStagingBuffer(util::computeImageDataSize(packedFormat, extent)); + + util::packImageData(stagingSlice.mapPtr(0), + pSrcData, SrcRowPitch, SrcDepthPitch, 0, 0, + pDstTexture->GetVkImageType(), extent, 1, + formatInfo, formatInfo->aspectMask); + + UpdateImage(pDstTexture, &subresource, + offset, extent, std::move(stagingSlice)); + } + + void D3D11DeviceContext::UpdateImage( D3D11CommonTexture* pDstTexture, const VkImageSubresource* pDstSubresource, @@ -3676,6 +3673,9 @@ namespace dxvk { DxvkBufferSlice StagingBuffer) { bool dstIsImage = pDstTexture->GetMapMode() != D3D11_COMMON_TEXTURE_MAP_MODE_STAGING; + uint32_t dstSubresource = D3D11CalcSubresource(pDstSubresource->mipLevel, + pDstSubresource->arrayLayer, pDstTexture->Desc()->MipLevels); + if (dstIsImage) { EmitCs([ cDstImage = pDstTexture->GetImage(), @@ -3707,9 +3707,6 @@ namespace dxvk { // format metadata, so deal with it manually here. VkExtent3D dstMipExtent = pDstTexture->MipLevelExtent(pDstSubresource->mipLevel); - uint32_t dstSubresource = D3D11CalcSubresource(pDstSubresource->mipLevel, - pDstSubresource->arrayLayer, pDstTexture->Desc()->MipLevels); - auto dstFormat = pDstTexture->GetPackedFormat(); auto dstFormatInfo = imageFormatInfo(dstFormat); @@ -3757,6 +3754,9 @@ namespace dxvk { srcPlaneOffset += util::flattenImageExtent(blockCount) * elementSize; } } + + if (pDstTexture->HasSequenceNumber()) + TrackTextureSequenceNumber(pDstTexture, dstSubresource); } @@ -4398,7 +4398,7 @@ namespace dxvk { DxvkDataSlice D3D11DeviceContext::AllocUpdateBufferSlice(size_t Size) { - constexpr size_t UpdateBufferSize = 16 * 1024 * 1024; + constexpr size_t UpdateBufferSize = 1 * 1024 * 1024; if (Size >= UpdateBufferSize) { Rc buffer = new DxvkDataBuffer(Size); @@ -4421,6 +4421,8 @@ namespace dxvk { DxvkBufferSlice D3D11DeviceContext::AllocStagingBuffer( VkDeviceSize Size) { + constexpr VkDeviceSize StagingBufferSize = 4 * 1024 * 1024; + DxvkBufferCreateInfo info; info.size = Size; info.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT @@ -4432,8 +4434,32 @@ namespace dxvk { info.access = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_SHADER_READ_BIT; - return DxvkBufferSlice(m_device->createBuffer(info, - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)); + // Create a dedicated buffer for large allocations + VkDeviceSize alignedSize = align(Size, 256); + + if (alignedSize >= StagingBufferSize) { + return DxvkBufferSlice(m_device->createBuffer(info, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)); + } + + // Otherwise, try to suballocate from an existing buffer + if (m_stagingOffset + alignedSize > StagingBufferSize || m_stagingBuffer == nullptr) { + info.size = StagingBufferSize; + + m_stagingBuffer = m_device->createBuffer(info, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); + m_stagingOffset = 0; + } + + DxvkBufferSlice slice(m_stagingBuffer, m_stagingOffset, Size); + m_stagingOffset += alignedSize; + return slice; + } + + + void D3D11DeviceContext::ResetStagingBuffer() { + m_stagingBuffer = nullptr; + m_stagingOffset = 0; } @@ -4504,4 +4530,25 @@ namespace dxvk { pMsState->enableAlphaToCoverage = VK_FALSE; } + + void D3D11DeviceContext::TrackResourceSequenceNumber( + ID3D11Resource* pResource) { + if (!pResource) + return; + + D3D11CommonTexture* texture = GetCommonTexture(pResource); + + if (texture) { + if (texture->HasSequenceNumber()) { + for (uint32_t i = 0; i < texture->CountSubresources(); i++) + TrackTextureSequenceNumber(texture, i); + } + } else { + D3D11Buffer* buffer = static_cast(pResource); + + if (buffer->HasSequenceNumber()) + TrackBufferSequenceNumber(buffer); + } + } + } diff --git a/src/d3d11/d3d11_context.h b/src/d3d11/d3d11_context.h index 483b04157..48b84427d 100644 --- a/src/d3d11/d3d11_context.h +++ b/src/d3d11/d3d11_context.h @@ -133,23 +133,6 @@ namespace dxvk { void STDMETHODCALLTYPE GenerateMips( ID3D11ShaderResourceView* pShaderResourceView); - void STDMETHODCALLTYPE UpdateSubresource( - ID3D11Resource* pDstResource, - UINT DstSubresource, - const D3D11_BOX* pDstBox, - const void* pSrcData, - UINT SrcRowPitch, - UINT SrcDepthPitch); - - void STDMETHODCALLTYPE UpdateSubresource1( - ID3D11Resource* pDstResource, - UINT DstSubresource, - const D3D11_BOX* pDstBox, - const void* pSrcData, - UINT SrcRowPitch, - UINT SrcDepthPitch, - UINT CopyFlags); - HRESULT STDMETHODCALLTYPE UpdateTileMappings( ID3D11Resource* pTiledResource, UINT NumTiledResourceRegions, @@ -712,7 +695,10 @@ namespace dxvk { Rc m_device; Rc m_updateBuffer; - + + Rc m_stagingBuffer; + VkDeviceSize m_stagingOffset = 0ull; + DxvkCsChunkFlags m_csFlags; DxvkCsChunkRef m_csChunk; @@ -804,6 +790,82 @@ namespace dxvk { ID3D11Resource* pResource, UINT Subresource); + template + static void UpdateResource( + ContextType* pContext, + ID3D11Resource* pDstResource, + UINT DstSubresource, + const D3D11_BOX* pDstBox, + const void* pSrcData, + UINT SrcRowPitch, + UINT SrcDepthPitch, + UINT CopyFlags) { + D3D10DeviceLock lock = pContext->LockContext(); + + if (!pDstResource) + return; + + // We need a different code path for buffers + D3D11_RESOURCE_DIMENSION resourceType; + pDstResource->GetType(&resourceType); + + if (likely(resourceType == D3D11_RESOURCE_DIMENSION_BUFFER)) { + const auto bufferResource = static_cast(pDstResource); + uint64_t bufferSize = bufferResource->Desc()->ByteWidth; + + // Provide a fast path for mapped buffer updates since some + // games use UpdateSubresource to update constant buffers. + if (likely(bufferResource->GetMapMode() == D3D11_COMMON_BUFFER_MAP_MODE_DIRECT) && likely(!pDstBox)) { + pContext->UpdateMappedBuffer(bufferResource, 0, bufferSize, pSrcData, 0); + return; + } + + // Validate buffer range to update + uint64_t offset = 0; + uint64_t length = bufferSize; + + if (pDstBox) { + offset = pDstBox->left; + length = pDstBox->right - offset; + } + + if (unlikely(offset + length > bufferSize)) + return; + + // Still try to be fast if a box is provided but we update the full buffer + if (likely(bufferResource->GetMapMode() == D3D11_COMMON_BUFFER_MAP_MODE_DIRECT)) { + CopyFlags &= D3D11_COPY_DISCARD | D3D11_COPY_NO_OVERWRITE; + + if (likely(length == bufferSize) || unlikely(CopyFlags != 0)) { + pContext->UpdateMappedBuffer(bufferResource, offset, length, pSrcData, CopyFlags); + return; + } + } + + // Otherwise we can't really do anything fancy, so just do a GPU copy + pContext->UpdateBuffer(bufferResource, offset, length, pSrcData); + } else { + D3D11CommonTexture* textureResource = GetCommonTexture(pDstResource); + + pContext->UpdateTexture(textureResource, + DstSubresource, pDstBox, pSrcData, SrcRowPitch, SrcDepthPitch); + } + } + + void UpdateBuffer( + D3D11Buffer* pDstBuffer, + UINT Offset, + UINT Length, + const void* pSrcData); + + void UpdateTexture( + D3D11CommonTexture* pDstTexture, + UINT DstSubresource, + const D3D11_BOX* pDstBox, + const void* pSrcData, + UINT SrcRowPitch, + UINT SrcDepthPitch); + void UpdateImage( D3D11CommonTexture* pDstTexture, const VkImageSubresource* pDstSubresource, @@ -918,7 +980,7 @@ namespace dxvk { UINT NumViews, ID3D11RenderTargetView* const* ppRenderTargetViews, ID3D11DepthStencilView* pDepthStencilView); - + VkClearValue ConvertColorValue( const FLOAT Color[4], const DxvkFormatInfo* pFormatInfo); @@ -927,7 +989,9 @@ namespace dxvk { DxvkBufferSlice AllocStagingBuffer( VkDeviceSize Size); - + + void ResetStagingBuffer(); + DxvkCsChunkRef AllocCsChunk(); static void InitDefaultPrimitiveTopology( @@ -1004,8 +1068,18 @@ namespace dxvk { } } + void TrackResourceSequenceNumber( + ID3D11Resource* pResource); + virtual void EmitCsChunk(DxvkCsChunkRef&& chunk) = 0; + virtual void TrackTextureSequenceNumber( + D3D11CommonTexture* pResource, + UINT Subresource) = 0; + + virtual void TrackBufferSequenceNumber( + D3D11Buffer* pResource) = 0; + }; } diff --git a/src/d3d11/d3d11_context_def.cpp b/src/d3d11/d3d11_context_def.cpp index 1ac44b061..552cfcc81 100644 --- a/src/d3d11/d3d11_context_def.cpp +++ b/src/d3d11/d3d11_context_def.cpp @@ -175,6 +175,7 @@ namespace dxvk { ClearState(); m_mappedResources.clear(); + ResetStagingBuffer(); return S_OK; } @@ -190,47 +191,35 @@ namespace dxvk { if (unlikely(!pResource || !pMappedResource)) return E_INVALIDARG; - D3D11_RESOURCE_DIMENSION resourceDim = D3D11_RESOURCE_DIMENSION_UNKNOWN; - pResource->GetType(&resourceDim); - if (MapType == D3D11_MAP_WRITE_DISCARD) { - D3D11DeferredContextMapEntry entry; - + D3D11_RESOURCE_DIMENSION resourceDim; + pResource->GetType(&resourceDim); + + D3D11_MAPPED_SUBRESOURCE mapInfo; HRESULT status = resourceDim == D3D11_RESOURCE_DIMENSION_BUFFER - ? MapBuffer(pResource, MapType, MapFlags, &entry) - : MapImage (pResource, Subresource, MapType, MapFlags, &entry); + ? MapBuffer(pResource, &mapInfo) + : MapImage (pResource, Subresource, &mapInfo); if (unlikely(FAILED(status))) { *pMappedResource = D3D11_MAPPED_SUBRESOURCE(); return status; } - // Adding a new map entry actually overrides the - // old one in practice because the lookup function - // scans the array in reverse order - m_mappedResources.push_back(std::move(entry)); - - // Fill mapped resource structure - pMappedResource->pData = entry.MapPointer; - pMappedResource->RowPitch = entry.RowPitch; - pMappedResource->DepthPitch = entry.DepthPitch; + AddMapEntry(pResource, Subresource, resourceDim, mapInfo); + *pMappedResource = mapInfo; return S_OK; } else if (MapType == D3D11_MAP_WRITE_NO_OVERWRITE) { // The resource must be mapped with D3D11_MAP_WRITE_DISCARD // before it can be mapped with D3D11_MAP_WRITE_NO_OVERWRITE. auto entry = FindMapEntry(pResource, Subresource); - if (unlikely(entry == m_mappedResources.rend())) { + if (unlikely(!entry)) { *pMappedResource = D3D11_MAPPED_SUBRESOURCE(); return E_INVALIDARG; } // Return same memory region as earlier - entry->MapType = D3D11_MAP_WRITE_NO_OVERWRITE; - - pMappedResource->pData = entry->MapPointer; - pMappedResource->RowPitch = entry->RowPitch; - pMappedResource->DepthPitch = entry->DepthPitch; + *pMappedResource = entry->MapInfo; return S_OK; } else { // Not allowed on deferred contexts @@ -247,6 +236,31 @@ namespace dxvk { } + void STDMETHODCALLTYPE D3D11DeferredContext::UpdateSubresource( + ID3D11Resource* pDstResource, + UINT DstSubresource, + const D3D11_BOX* pDstBox, + const void* pSrcData, + UINT SrcRowPitch, + UINT SrcDepthPitch) { + UpdateResource(this, pDstResource, + DstSubresource, pDstBox, pSrcData, SrcRowPitch, SrcDepthPitch, 0); + } + + + void STDMETHODCALLTYPE D3D11DeferredContext::UpdateSubresource1( + ID3D11Resource* pDstResource, + UINT DstSubresource, + const D3D11_BOX* pDstBox, + const void* pSrcData, + UINT SrcRowPitch, + UINT SrcDepthPitch, + UINT CopyFlags) { + UpdateResource(this, pDstResource, + DstSubresource, pDstBox, pSrcData, SrcRowPitch, SrcDepthPitch, CopyFlags); + } + + void STDMETHODCALLTYPE D3D11DeferredContext::SwapDeviceContextState( ID3DDeviceContextState* pState, ID3DDeviceContextState** ppPreviousState) { @@ -259,9 +273,7 @@ namespace dxvk { HRESULT D3D11DeferredContext::MapBuffer( ID3D11Resource* pResource, - D3D11_MAP MapType, - UINT MapFlags, - D3D11DeferredContextMapEntry* pMapEntry) { + D3D11_MAPPED_SUBRESOURCE* pMappedResource) { D3D11Buffer* pBuffer = static_cast(pResource); if (unlikely(pBuffer->GetMapMode() == D3D11_COMMON_BUFFER_MAP_MODE_NONE)) { @@ -269,18 +281,15 @@ namespace dxvk { return E_INVALIDARG; } - pMapEntry->pResource = pResource; - pMapEntry->Subresource = 0; - pMapEntry->MapType = D3D11_MAP_WRITE_DISCARD; - pMapEntry->RowPitch = pBuffer->Desc()->ByteWidth; - pMapEntry->DepthPitch = pBuffer->Desc()->ByteWidth; + pMappedResource->RowPitch = pBuffer->Desc()->ByteWidth; + pMappedResource->DepthPitch = pBuffer->Desc()->ByteWidth; if (likely(m_csFlags.test(DxvkCsChunkFlag::SingleUse))) { // For resources that cannot be written by the GPU, // we may write to the buffer resource directly and // just swap in the buffer slice as needed. auto bufferSlice = pBuffer->AllocSlice(); - pMapEntry->MapPointer = bufferSlice.mapPtr; + pMappedResource->pData = bufferSlice.mapPtr; EmitCs([ cDstBuffer = pBuffer->GetBuffer(), @@ -292,7 +301,7 @@ namespace dxvk { // For GPU-writable resources, we need a data slice // to perform the update operation at execution time. auto dataSlice = AllocUpdateBufferSlice(pBuffer->Desc()->ByteWidth); - pMapEntry->MapPointer = dataSlice.ptr(); + pMappedResource->pData = dataSlice.ptr(); EmitCs([ cDstBuffer = pBuffer->GetBuffer(), @@ -311,9 +320,7 @@ namespace dxvk { HRESULT D3D11DeferredContext::MapImage( ID3D11Resource* pResource, UINT Subresource, - D3D11_MAP MapType, - UINT MapFlags, - D3D11DeferredContextMapEntry* pMapEntry) { + D3D11_MAPPED_SUBRESOURCE* pMappedResource) { D3D11CommonTexture* pTexture = GetCommonTexture(pResource); if (unlikely(pTexture->GetMapMode() == D3D11_COMMON_TEXTURE_MAP_MODE_NONE)) { @@ -335,12 +342,9 @@ namespace dxvk { auto layout = pTexture->GetSubresourceLayout(formatInfo->aspectMask, Subresource); auto dataSlice = AllocStagingBuffer(util::computeImageDataSize(packedFormat, levelExtent)); - pMapEntry->pResource = pResource; - pMapEntry->Subresource = Subresource; - pMapEntry->MapType = D3D11_MAP_WRITE_DISCARD; - pMapEntry->RowPitch = layout.RowPitch; - pMapEntry->DepthPitch = layout.DepthPitch; - pMapEntry->MapPointer = dataSlice.mapPtr(0); + pMappedResource->RowPitch = layout.RowPitch; + pMappedResource->DepthPitch = layout.DepthPitch; + pMappedResource->pData = dataSlice.mapPtr(0); UpdateImage(pTexture, &subresource, VkOffset3D { 0, 0, 0 }, levelExtent, @@ -349,6 +353,34 @@ namespace dxvk { } + void D3D11DeferredContext::UpdateMappedBuffer( + D3D11Buffer* pDstBuffer, + UINT Offset, + UINT Length, + const void* pSrcData, + UINT CopyFlags) { + void* mapPtr = nullptr; + + if (unlikely(CopyFlags == D3D11_COPY_NO_OVERWRITE)) { + auto entry = FindMapEntry(pDstBuffer, 0); + + if (entry) + mapPtr = entry->MapInfo.pData; + } + + if (likely(!mapPtr)) { + // The caller validates the map mode, so we can + // safely ignore the MapBuffer return value here + D3D11_MAPPED_SUBRESOURCE mapInfo; + MapBuffer(pDstBuffer, &mapInfo); + AddMapEntry(pDstBuffer, 0, D3D11_RESOURCE_DIMENSION_BUFFER, mapInfo); + mapPtr = mapInfo.pData; + } + + std::memcpy(reinterpret_cast(mapPtr) + Offset, pSrcData, Length); + } + + void D3D11DeferredContext::FinalizeQueries() { for (auto& query : m_queriesBegun) { m_commandList->AddQuery(query.ptr()); @@ -373,6 +405,52 @@ namespace dxvk { } + void D3D11DeferredContext::TrackTextureSequenceNumber( + D3D11CommonTexture* pResource, + UINT Subresource) { + m_commandList->TrackResourceUsage( + pResource->GetInterface(), + pResource->GetDimension(), + Subresource); + } + + + void D3D11DeferredContext::TrackBufferSequenceNumber( + D3D11Buffer* pResource) { + m_commandList->TrackResourceUsage( + pResource, D3D11_RESOURCE_DIMENSION_BUFFER, 0); + } + + + D3D11DeferredContextMapEntry* D3D11DeferredContext::FindMapEntry( + ID3D11Resource* pResource, + UINT Subresource) { + // Recently mapped resources as well as entries with + // up-to-date map infos will be located at the end + // of the resource array, so scan in reverse order. + size_t size = m_mappedResources.size(); + + for (size_t i = 1; i <= size; i++) { + auto entry = &m_mappedResources[size - i]; + + if (entry->Resource.Get() == pResource + && entry->Resource.GetSubresource() == Subresource) + return entry; + } + + return nullptr; + } + + void D3D11DeferredContext::AddMapEntry( + ID3D11Resource* pResource, + UINT Subresource, + D3D11_RESOURCE_DIMENSION ResourceType, + const D3D11_MAPPED_SUBRESOURCE& MapInfo) { + m_mappedResources.emplace_back(pResource, + Subresource, ResourceType, MapInfo); + } + + DxvkCsChunkFlags D3D11DeferredContext::GetCsChunkFlags( D3D11Device* pDevice) { return pDevice->GetOptions()->dcSingleUseMode diff --git a/src/d3d11/d3d11_context_def.h b/src/d3d11/d3d11_context_def.h index bdfcf4412..c5fe49c9d 100644 --- a/src/d3d11/d3d11_context_def.h +++ b/src/d3d11/d3d11_context_def.h @@ -5,22 +5,26 @@ #include "d3d11_context.h" #include "d3d11_texture.h" -#include #include namespace dxvk { struct D3D11DeferredContextMapEntry { - Com pResource; - UINT Subresource; - D3D11_MAP MapType; - UINT RowPitch; - UINT DepthPitch; - void* MapPointer; + D3D11DeferredContextMapEntry() { } + D3D11DeferredContextMapEntry( + ID3D11Resource* pResource, + UINT Subresource, + D3D11_RESOURCE_DIMENSION ResourceType, + const D3D11_MAPPED_SUBRESOURCE& MappedResource) + : Resource(pResource, Subresource, ResourceType), + MapInfo(MappedResource) { } + + D3D11ResourceRef Resource; + D3D11_MAPPED_SUBRESOURCE MapInfo; }; class D3D11DeferredContext : public D3D11DeviceContext { - + friend class D3D11DeviceContext; public: D3D11DeferredContext( @@ -77,6 +81,23 @@ namespace dxvk { ID3D11Resource* pResource, UINT Subresource); + void STDMETHODCALLTYPE UpdateSubresource( + ID3D11Resource* pDstResource, + UINT DstSubresource, + const D3D11_BOX* pDstBox, + const void* pSrcData, + UINT SrcRowPitch, + UINT SrcDepthPitch); + + void STDMETHODCALLTYPE UpdateSubresource1( + ID3D11Resource* pDstResource, + UINT DstSubresource, + const D3D11_BOX* pDstBox, + const void* pSrcData, + UINT SrcRowPitch, + UINT SrcDepthPitch, + UINT CopyFlags); + void STDMETHODCALLTYPE SwapDeviceContextState( ID3DDeviceContextState* pState, ID3DDeviceContextState** ppPreviousState); @@ -98,34 +119,46 @@ namespace dxvk { HRESULT MapBuffer( ID3D11Resource* pResource, - D3D11_MAP MapType, - UINT MapFlags, - D3D11DeferredContextMapEntry* pMapEntry); + D3D11_MAPPED_SUBRESOURCE* pMappedResource); HRESULT MapImage( ID3D11Resource* pResource, UINT Subresource, - D3D11_MAP MapType, - UINT MapFlags, - D3D11DeferredContextMapEntry* pMapEntry); - + D3D11_MAPPED_SUBRESOURCE* pMappedResource); + + void UpdateMappedBuffer( + D3D11Buffer* pDstBuffer, + UINT Offset, + UINT Length, + const void* pSrcData, + UINT CopyFlags); + void FinalizeQueries(); Com CreateCommandList(); void EmitCsChunk(DxvkCsChunkRef&& chunk); + void TrackTextureSequenceNumber( + D3D11CommonTexture* pResource, + UINT Subresource); + + void TrackBufferSequenceNumber( + D3D11Buffer* pResource); + + D3D11DeferredContextMapEntry* FindMapEntry( + ID3D11Resource* pResource, + UINT Subresource); + + void AddMapEntry( + ID3D11Resource* pResource, + UINT Subresource, + D3D11_RESOURCE_DIMENSION ResourceType, + const D3D11_MAPPED_SUBRESOURCE& MapInfo); + static DxvkCsChunkFlags GetCsChunkFlags( D3D11Device* pDevice); - auto FindMapEntry(ID3D11Resource* pResource, UINT Subresource) { - return std::find_if(m_mappedResources.rbegin(), m_mappedResources.rend(), - [pResource, Subresource] (const D3D11DeferredContextMapEntry& entry) { - return entry.pResource == pResource - && entry.Subresource == Subresource; - }); - } - }; } diff --git a/src/d3d11/d3d11_context_ext.cpp b/src/d3d11/d3d11_context_ext.cpp index c892f7efd..ecd41e832 100644 --- a/src/d3d11/d3d11_context_ext.cpp +++ b/src/d3d11/d3d11_context_ext.cpp @@ -193,6 +193,13 @@ namespace dxvk { ctx->launchCuKernelNVX(cLaunchInfo.nvxLaunchInfo, cLaunchInfo.buffers, cLaunchInfo.images); }); + // Track resource usage as necessary + for (uint32_t i = 0; i < NumReadResources; i++) + m_ctx->TrackResourceSequenceNumber(static_cast(pReadResources[i])); + + for (uint32_t i = 0; i < NumWriteResources; i++) + m_ctx->TrackResourceSequenceNumber(static_cast(pWriteResources[i])); + return true; } } diff --git a/src/d3d11/d3d11_context_imm.cpp b/src/d3d11/d3d11_context_imm.cpp index adaf28105..0523cc5f1 100644 --- a/src/d3d11/d3d11_context_imm.cpp +++ b/src/d3d11/d3d11_context_imm.cpp @@ -7,13 +7,15 @@ constexpr static uint32_t MinFlushIntervalUs = 750; constexpr static uint32_t IncFlushIntervalUs = 250; constexpr static uint32_t MaxPendingSubmits = 6; +constexpr static VkDeviceSize MaxImplicitDiscardSize = 256ull << 10; + namespace dxvk { D3D11ImmediateContext::D3D11ImmediateContext( D3D11Device* pParent, const Rc& Device) : D3D11DeviceContext(pParent, Device, DxvkCsChunkFlag::SingleUse), - m_csThread(Device->createContext()), + m_csThread(Device, Device->createContext()), m_videoContext(this, Device) { EmitCs([ cDevice = m_device, @@ -39,7 +41,7 @@ namespace dxvk { D3D11ImmediateContext::~D3D11ImmediateContext() { Flush(); - SynchronizeCsThread(); + SynchronizeCsThread(DxvkCsThread::SynchronizeAll); SynchronizeDevice(); } @@ -212,7 +214,8 @@ namespace dxvk { // Dispatch command list to the CS thread and // restore the immediate context's state - commandList->EmitToCsThread(&m_csThread); + uint64_t csSeqNum = commandList->EmitToCsThread(&m_csThread); + m_csSeqNum = std::max(m_csSeqNum, csSeqNum); if (RestoreContextState) RestoreState(); @@ -272,12 +275,17 @@ namespace dxvk { void STDMETHODCALLTYPE D3D11ImmediateContext::Unmap( ID3D11Resource* pResource, UINT Subresource) { - D3D11_RESOURCE_DIMENSION resourceDim = D3D11_RESOURCE_DIMENSION_UNKNOWN; - pResource->GetType(&resourceDim); - - if (unlikely(resourceDim != D3D11_RESOURCE_DIMENSION_BUFFER)) { - D3D10DeviceLock lock = LockContext(); - UnmapImage(GetCommonTexture(pResource), Subresource); + // Since it is very uncommon for images to be mapped compared + // to buffers, we count the currently mapped images in order + // to avoid a virtual method call in the common case. + if (unlikely(m_mappedImageCount > 0)) { + D3D11_RESOURCE_DIMENSION resourceDim = D3D11_RESOURCE_DIMENSION_UNKNOWN; + pResource->GetType(&resourceDim); + + if (resourceDim != D3D11_RESOURCE_DIMENSION_BUFFER) { + D3D10DeviceLock lock = LockContext(); + UnmapImage(GetCommonTexture(pResource), Subresource); + } } } @@ -288,11 +296,8 @@ namespace dxvk { const void* pSrcData, UINT SrcRowPitch, UINT SrcDepthPitch) { - FlushImplicit(FALSE); - - D3D11DeviceContext::UpdateSubresource( - pDstResource, DstSubresource, pDstBox, - pSrcData, SrcRowPitch, SrcDepthPitch); + UpdateResource(this, pDstResource, + DstSubresource, pDstBox, pSrcData, SrcRowPitch, SrcDepthPitch, 0); } @@ -304,12 +309,8 @@ namespace dxvk { UINT SrcRowPitch, UINT SrcDepthPitch, UINT CopyFlags) { - FlushImplicit(FALSE); - - D3D11DeviceContext::UpdateSubresource1( - pDstResource, DstSubresource, pDstBox, - pSrcData, SrcRowPitch, SrcDepthPitch, - CopyFlags); + UpdateResource(this, pDstResource, + DstSubresource, pDstBox, pSrcData, SrcRowPitch, SrcDepthPitch, CopyFlags); } @@ -353,15 +354,17 @@ namespace dxvk { Logger::err("D3D11: Cannot map a device-local buffer"); return E_INVALIDARG; } - - if (MapType == D3D11_MAP_WRITE_DISCARD) { + + VkDeviceSize bufferSize = pResource->Desc()->ByteWidth; + + if (likely(MapType == D3D11_MAP_WRITE_DISCARD)) { // Allocate a new backing slice for the buffer and set // it as the 'new' mapped slice. This assumes that the // only way to invalidate a buffer is by mapping it. auto physSlice = pResource->DiscardSlice(); pMappedResource->pData = physSlice.mapPtr; - pMappedResource->RowPitch = pResource->Desc()->ByteWidth; - pMappedResource->DepthPitch = pResource->Desc()->ByteWidth; + pMappedResource->RowPitch = bufferSize; + pMappedResource->DepthPitch = bufferSize; EmitCs([ cBuffer = pResource->GetBuffer(), @@ -371,22 +374,65 @@ namespace dxvk { }); return S_OK; - } else { - // Wait until the resource is no longer in use - if (MapType != D3D11_MAP_WRITE_NO_OVERWRITE) { - if (!WaitForResource(pResource->GetBuffer(), MapType, MapFlags)) - return DXGI_ERROR_WAS_STILL_DRAWING; - } - - // Use map pointer from previous map operation. This - // way we don't have to synchronize with the CS thread - // if the map mode is D3D11_MAP_WRITE_NO_OVERWRITE. + } else if (likely(MapType == D3D11_MAP_WRITE_NO_OVERWRITE)) { + // Put this on a fast path without any extra checks since it's + // a somewhat desired method to partially update large buffers DxvkBufferSliceHandle physSlice = pResource->GetMappedSlice(); - pMappedResource->pData = physSlice.mapPtr; - pMappedResource->RowPitch = pResource->Desc()->ByteWidth; - pMappedResource->DepthPitch = pResource->Desc()->ByteWidth; + pMappedResource->RowPitch = bufferSize; + pMappedResource->DepthPitch = bufferSize; return S_OK; + } else { + // Quantum Break likes using MAP_WRITE on resources which would force + // us to synchronize with the GPU multiple times per frame. In those + // situations, if there are no pending GPU writes to the resource, we + // can promote it to MAP_WRITE_DISCARD, but preserve the data by doing + // a CPU copy from the previous buffer slice, to avoid the sync point. + bool doInvalidatePreserve = false; + + auto buffer = pResource->GetBuffer(); + auto sequenceNumber = pResource->GetSequenceNumber(); + + if (MapType != D3D11_MAP_READ && !MapFlags && bufferSize <= MaxImplicitDiscardSize) { + SynchronizeCsThread(sequenceNumber); + + bool hasWoAccess = buffer->isInUse(DxvkAccess::Write); + bool hasRwAccess = buffer->isInUse(DxvkAccess::Read); + + if (hasRwAccess && !hasWoAccess) { + // Uncached reads can be so slow that a GPU sync may actually be faster + doInvalidatePreserve = buffer->memFlags() & VK_MEMORY_PROPERTY_HOST_CACHED_BIT; + } + } + + if (doInvalidatePreserve) { + FlushImplicit(TRUE); + + auto prevSlice = pResource->GetMappedSlice(); + auto physSlice = pResource->DiscardSlice(); + + EmitCs([ + cBuffer = std::move(buffer), + cBufferSlice = physSlice + ] (DxvkContext* ctx) { + ctx->invalidateBuffer(cBuffer, cBufferSlice); + }); + + std::memcpy(physSlice.mapPtr, prevSlice.mapPtr, physSlice.length); + pMappedResource->pData = physSlice.mapPtr; + pMappedResource->RowPitch = bufferSize; + pMappedResource->DepthPitch = bufferSize; + return S_OK; + } else { + if (!WaitForResource(buffer, sequenceNumber, MapType, MapFlags)) + return DXGI_ERROR_WAS_STILL_DRAWING; + + DxvkBufferSliceHandle physSlice = pResource->GetMappedSlice(); + pMappedResource->pData = physSlice.mapPtr; + pMappedResource->RowPitch = bufferSize; + pMappedResource->DepthPitch = bufferSize; + return S_OK; + } } } @@ -423,23 +469,83 @@ namespace dxvk { VkFormat packedFormat = m_parent->LookupPackedFormat( pResource->Desc()->Format, pResource->GetFormatMode()).Format; + uint64_t sequenceNumber = pResource->GetSequenceNumber(Subresource); + auto formatInfo = imageFormatInfo(packedFormat); void* mapPtr; if (mapMode == D3D11_COMMON_TEXTURE_MAP_MODE_DIRECT) { - // Wait for the resource to become available - if (!WaitForResource(mappedImage, MapType, MapFlags)) - return DXGI_ERROR_WAS_STILL_DRAWING; + // Wait for the resource to become available. We do not + // support image renaming, so stall on DISCARD instead. + if (MapType == D3D11_MAP_WRITE_DISCARD) + MapFlags &= ~D3D11_MAP_FLAG_DO_NOT_WAIT; + + if (MapType != D3D11_MAP_WRITE_NO_OVERWRITE) { + if (!WaitForResource(mappedImage, sequenceNumber, MapType, MapFlags)) + return DXGI_ERROR_WAS_STILL_DRAWING; + } // Query the subresource's memory layout and hope that // the application respects the returned pitch values. mapPtr = mappedImage->mapPtr(0); } else { - if (MapType == D3D11_MAP_WRITE_DISCARD) { - // We do not have to preserve the contents of the - // buffer if the entire image gets discarded. + constexpr uint32_t DoInvalidate = (1u << 0); + constexpr uint32_t DoPreserve = (1u << 1); + constexpr uint32_t DoWait = (1u << 2); + uint32_t doFlags; + + if (MapType == D3D11_MAP_READ) { + // Reads will not change the image content, so we only need + // to wait for the GPU to finish writing to the mapped buffer. + doFlags = DoWait; + } else if (MapType == D3D11_MAP_WRITE_DISCARD) { + doFlags = DoInvalidate; + + // If we know for sure that the mapped buffer is currently not + // in use by the GPU, we don't have to allocate a new slice. + if (m_csThread.lastSequenceNumber() >= sequenceNumber && !mappedBuffer->isInUse(DxvkAccess::Read)) + doFlags = 0; + } else if (mapMode == D3D11_COMMON_TEXTURE_MAP_MODE_STAGING && (MapFlags & D3D11_MAP_FLAG_DO_NOT_WAIT)) { + // Always respect DO_NOT_WAIT for mapped staging images + doFlags = DoWait; + } else if (MapType != D3D11_MAP_WRITE_NO_OVERWRITE || mapMode == D3D11_COMMON_TEXTURE_MAP_MODE_BUFFER) { + // Need to synchronize thread to determine pending GPU accesses + SynchronizeCsThread(sequenceNumber); + + // Don't implicitly discard large buffers or buffers of images with + // multiple subresources, as that is likely to cause memory issues. + VkDeviceSize bufferSize = pResource->CountSubresources() == 1 + ? pResource->GetMappedSlice(Subresource).length + : MaxImplicitDiscardSize; + + if (bufferSize >= MaxImplicitDiscardSize) { + // Don't check access flags, WaitForResource will return + // early anyway if the resource is currently in use + doFlags = DoWait; + } else if (mappedBuffer->isInUse(DxvkAccess::Write)) { + // There are pending GPU writes, need to wait for those + doFlags = DoWait; + } else if (mappedBuffer->isInUse(DxvkAccess::Read)) { + // All pending GPU accesses are reads, so the buffer data + // is still current, and we can prevent GPU synchronization + // by creating a new slice with an exact copy of the data. + doFlags = DoInvalidate | DoPreserve; + } else { + // There are no pending accesses, so we don't need to wait + doFlags = 0; + } + } else { + // No need to synchronize staging resources with NO_OVERWRITE + // since the buffer will be used directly. + doFlags = 0; + } + + if (doFlags & DoInvalidate) { + FlushImplicit(TRUE); + + DxvkBufferSliceHandle prevSlice = pResource->GetMappedSlice(Subresource); DxvkBufferSliceHandle physSlice = pResource->DiscardSlice(Subresource); - + EmitCs([ cImageBuffer = mappedBuffer, cBufferSlice = physSlice @@ -447,15 +553,22 @@ namespace dxvk { ctx->invalidateBuffer(cImageBuffer, cBufferSlice); }); + if (doFlags & DoPreserve) + std::memcpy(physSlice.mapPtr, prevSlice.mapPtr, physSlice.length); + mapPtr = physSlice.mapPtr; } else { - bool wait = MapType != D3D11_MAP_WRITE_NO_OVERWRITE - || mapMode == D3D11_COMMON_TEXTURE_MAP_MODE_BUFFER; - - // Wait for mapped buffer to become available - if (wait && !WaitForResource(mappedBuffer, MapType, MapFlags)) - return DXGI_ERROR_WAS_STILL_DRAWING; - + if (doFlags & DoWait) { + // We cannot respect DO_NOT_WAIT for buffer-mapped resources since + // our internal copies need to be transparent to the application. + if (mapMode == D3D11_COMMON_TEXTURE_MAP_MODE_BUFFER) + MapFlags &= ~D3D11_MAP_FLAG_DO_NOT_WAIT; + + // Wait for mapped buffer to become available + if (!WaitForResource(mappedBuffer, sequenceNumber, MapType, MapFlags)) + return DXGI_ERROR_WAS_STILL_DRAWING; + } + mapPtr = pResource->GetMappedSlice(Subresource).mapPtr; } } @@ -470,6 +583,7 @@ namespace dxvk { pMappedResource->DepthPitch = layout.DepthPitch; } + m_mappedImageCount += 1; return S_OK; } @@ -480,11 +594,15 @@ namespace dxvk { D3D11_MAP mapType = pResource->GetMapType(Subresource); pResource->SetMapType(Subresource, D3D11_MAP(~0u)); - if (mapType == D3D11_MAP(~0u) - || mapType == D3D11_MAP_READ) + if (mapType == D3D11_MAP(~0u)) return; - - if (pResource->GetMapMode() == D3D11_COMMON_TEXTURE_MAP_MODE_BUFFER) { + + // Decrement mapped image counter only after making sure + // the given subresource is actually mapped right now + m_mappedImageCount -= 1; + + if ((mapType != D3D11_MAP_READ) && + (pResource->GetMapMode() == D3D11_COMMON_TEXTURE_MAP_MODE_BUFFER)) { // Now that data has been written into the buffer, // we need to copy its contents into the image VkImageAspectFlags aspectMask = imageFormatInfo(pResource->GetPackedFormat())->aspectMask; @@ -497,6 +615,31 @@ namespace dxvk { } + void D3D11ImmediateContext::UpdateMappedBuffer( + D3D11Buffer* pDstBuffer, + UINT Offset, + UINT Length, + const void* pSrcData, + UINT CopyFlags) { + DxvkBufferSliceHandle slice; + + if (likely(CopyFlags != D3D11_COPY_NO_OVERWRITE)) { + slice = pDstBuffer->DiscardSlice(); + + EmitCs([ + cBuffer = pDstBuffer->GetBuffer(), + cBufferSlice = slice + ] (DxvkContext* ctx) { + ctx->invalidateBuffer(cBuffer, cBufferSlice); + }); + } else { + slice = pDstBuffer->GetMappedSlice(); + } + + std::memcpy(reinterpret_cast(slice.mapPtr) + Offset, pSrcData, Length); + } + + void STDMETHODCALLTYPE D3D11ImmediateContext::SwapDeviceContextState( ID3DDeviceContextState* pState, ID3DDeviceContextState** ppPreviousState) { @@ -523,15 +666,15 @@ namespace dxvk { } - void D3D11ImmediateContext::SynchronizeCsThread() { + void D3D11ImmediateContext::SynchronizeCsThread(uint64_t SequenceNumber) { D3D10DeviceLock lock = LockContext(); // Dispatch current chunk so that all commands // recorded prior to this function will be run - FlushCsChunk(); + if (SequenceNumber > m_csSeqNum) + FlushCsChunk(); - if (m_csThread.isBusy()) - m_csThread.synchronize(); + m_csThread.synchronize(SequenceNumber); } @@ -542,6 +685,7 @@ namespace dxvk { bool D3D11ImmediateContext::WaitForResource( const Rc& Resource, + uint64_t SequenceNumber, D3D11_MAP MapType, UINT MapFlags) { // Determine access type to wait for based on map mode @@ -549,39 +693,58 @@ namespace dxvk { ? DxvkAccess::Write : DxvkAccess::Read; - // Wait for the any pending D3D11 command to be executed - // on the CS thread so that we can determine whether the - // resource is currently in use or not. - if (!Resource->isInUse(access)) - SynchronizeCsThread(); - - if (Resource->isInUse(access)) { - if (MapFlags & D3D11_MAP_FLAG_DO_NOT_WAIT) { + // Wait for any CS chunk using the resource to execute, since + // otherwise we cannot accurately determine if the resource is + // actually being used by the GPU right now. + bool isInUse = Resource->isInUse(access); + + if (!isInUse) { + SynchronizeCsThread(SequenceNumber); + isInUse = Resource->isInUse(access); + } + + if (MapFlags & D3D11_MAP_FLAG_DO_NOT_WAIT) { + if (isInUse) { // We don't have to wait, but misbehaving games may // still try to spin on `Map` until the resource is // idle, so we should flush pending commands FlushImplicit(FALSE); return false; - } else { + } + } else { + if (isInUse) { // Make sure pending commands using the resource get // executed on the the GPU if we have to wait for it Flush(); - SynchronizeCsThread(); - + SynchronizeCsThread(SequenceNumber); + Resource->waitIdle(access); } } - + return true; } void D3D11ImmediateContext::EmitCsChunk(DxvkCsChunkRef&& chunk) { - m_csThread.dispatchChunk(std::move(chunk)); + m_csSeqNum = m_csThread.dispatchChunk(std::move(chunk)); m_csIsBusy = true; } + void D3D11ImmediateContext::TrackTextureSequenceNumber( + D3D11CommonTexture* pResource, + UINT Subresource) { + pResource->TrackSequenceNumber(Subresource, m_csSeqNum + 1); + } + + + void D3D11ImmediateContext::TrackBufferSequenceNumber( + D3D11Buffer* pResource) { + pResource->TrackSequenceNumber(m_csSeqNum + 1); + } + + void D3D11ImmediateContext::FlushImplicit(BOOL StrongHint) { // Flush only if the GPU is about to go idle, in // order to keep the number of submissions low. diff --git a/src/d3d11/d3d11_context_imm.h b/src/d3d11/d3d11_context_imm.h index ae57cfb13..9e9f73295 100644 --- a/src/d3d11/d3d11_context_imm.h +++ b/src/d3d11/d3d11_context_imm.h @@ -16,6 +16,7 @@ namespace dxvk { class D3D11ImmediateContext : public D3D11DeviceContext { friend class D3D11SwapChain; friend class D3D11VideoContext; + friend class D3D11DeviceContext; public: D3D11ImmediateContext( @@ -108,18 +109,22 @@ namespace dxvk { const UINT* pUAVInitialCounts); void STDMETHODCALLTYPE SwapDeviceContextState( - ID3DDeviceContextState* pState, - ID3DDeviceContextState** ppPreviousState); + ID3DDeviceContextState* pState, + ID3DDeviceContextState** ppPreviousState); - void SynchronizeCsThread(); + void SynchronizeCsThread( + uint64_t SequenceNumber); private: - DxvkCsThread m_csThread; - bool m_csIsBusy = false; + DxvkCsThread m_csThread; + uint64_t m_csSeqNum = 0ull; + bool m_csIsBusy = false; Rc m_eventSignal; - uint64_t m_eventCount = 0; + uint64_t m_eventCount = 0ull; + uint32_t m_mappedImageCount = 0u; + dxvk::high_resolution_clock::time_point m_lastFlush = dxvk::high_resolution_clock::now(); @@ -144,15 +149,30 @@ namespace dxvk { D3D11CommonTexture* pResource, UINT Subresource); + void UpdateMappedBuffer( + D3D11Buffer* pDstBuffer, + UINT Offset, + UINT Length, + const void* pSrcData, + UINT CopyFlags); + void SynchronizeDevice(); bool WaitForResource( const Rc& Resource, + uint64_t SequenceNumber, D3D11_MAP MapType, UINT MapFlags); void EmitCsChunk(DxvkCsChunkRef&& chunk); + void TrackTextureSequenceNumber( + D3D11CommonTexture* pResource, + UINT Subresource); + + void TrackBufferSequenceNumber( + D3D11Buffer* pResource); + void FlushImplicit(BOOL StrongHint); void SignalEvent(HANDLE hEvent); diff --git a/src/d3d11/d3d11_enums.cpp b/src/d3d11/d3d11_enums.cpp index 0b9fc0f62..24af0276c 100644 --- a/src/d3d11/d3d11_enums.cpp +++ b/src/d3d11/d3d11_enums.cpp @@ -9,6 +9,8 @@ std::ostream& operator << (std::ostream& os, D3D_FEATURE_LEVEL e) { ENUM_NAME(D3D_FEATURE_LEVEL_10_1); ENUM_NAME(D3D_FEATURE_LEVEL_11_0); ENUM_NAME(D3D_FEATURE_LEVEL_11_1); + ENUM_NAME(D3D_FEATURE_LEVEL_12_0); + ENUM_NAME(D3D_FEATURE_LEVEL_12_1); ENUM_DEFAULT(e); } } diff --git a/src/d3d11/d3d11_interop.cpp b/src/d3d11/d3d11_interop.cpp index 9584b6ab1..8fa50288b 100644 --- a/src/d3d11/d3d11_interop.cpp +++ b/src/d3d11/d3d11_interop.cpp @@ -91,7 +91,7 @@ namespace dxvk { auto immediateContext = static_cast(deviceContext.ptr()); immediateContext->Flush(); - immediateContext->SynchronizeCsThread(); + immediateContext->SynchronizeCsThread(DxvkCsThread::SynchronizeAll); } diff --git a/src/d3d11/d3d11_main.cpp b/src/d3d11/d3d11_main.cpp index d3f6e0c8f..d80d5dec4 100644 --- a/src/d3d11/d3d11_main.cpp +++ b/src/d3d11/d3d11_main.cpp @@ -242,4 +242,24 @@ extern "C" { ppDevice, pFeatureLevel, ppImmediateContext); } + + DLLEXPORT HRESULT __stdcall D3D11On12CreateDevice( + IUnknown* pDevice, + UINT Flags, + const D3D_FEATURE_LEVEL* pFeatureLevels, + UINT FeatureLevels, + IUnknown* const* ppCommandQueues, + UINT NumQueues, + UINT NodeMask, + ID3D11Device** ppDevice, + ID3D11DeviceContext** ppImmediateContext, + D3D_FEATURE_LEVEL* pChosenFeatureLevel) { + static bool s_errorShown = false; + + if (!std::exchange(s_errorShown, true)) + Logger::err("D3D11On12CreateDevice: Not implemented"); + + return E_NOTIMPL; + } + } \ No newline at end of file diff --git a/src/d3d11/d3d11_resource.cpp b/src/d3d11/d3d11_resource.cpp index 7ba71367f..5ee5fbf96 100644 --- a/src/d3d11/d3d11_resource.cpp +++ b/src/d3d11/d3d11_resource.cpp @@ -198,11 +198,8 @@ namespace dxvk { } - HRESULT ResourceAddRefPrivate(ID3D11Resource* pResource) { - D3D11_RESOURCE_DIMENSION dim; - pResource->GetType(&dim); - - switch (dim) { + HRESULT ResourceAddRefPrivate(ID3D11Resource* pResource, D3D11_RESOURCE_DIMENSION Type) { + switch (Type) { case D3D11_RESOURCE_DIMENSION_BUFFER: static_cast (pResource)->AddRefPrivate(); return S_OK; case D3D11_RESOURCE_DIMENSION_TEXTURE1D: static_cast(pResource)->AddRefPrivate(); return S_OK; case D3D11_RESOURCE_DIMENSION_TEXTURE2D: static_cast(pResource)->AddRefPrivate(); return S_OK; @@ -212,11 +209,16 @@ namespace dxvk { } - HRESULT ResourceReleasePrivate(ID3D11Resource* pResource) { + HRESULT ResourceAddRefPrivate(ID3D11Resource* pResource) { D3D11_RESOURCE_DIMENSION dim; pResource->GetType(&dim); - switch (dim) { + return ResourceAddRefPrivate(pResource, dim); + } + + + HRESULT ResourceReleasePrivate(ID3D11Resource* pResource, D3D11_RESOURCE_DIMENSION Type) { + switch (Type) { case D3D11_RESOURCE_DIMENSION_BUFFER: static_cast (pResource)->ReleasePrivate(); return S_OK; case D3D11_RESOURCE_DIMENSION_TEXTURE1D: static_cast(pResource)->ReleasePrivate(); return S_OK; case D3D11_RESOURCE_DIMENSION_TEXTURE2D: static_cast(pResource)->ReleasePrivate(); return S_OK; @@ -225,4 +227,12 @@ namespace dxvk { } } + + HRESULT ResourceReleasePrivate(ID3D11Resource* pResource) { + D3D11_RESOURCE_DIMENSION dim; + pResource->GetType(&dim); + + return ResourceReleasePrivate(pResource, dim); + } + } \ No newline at end of file diff --git a/src/d3d11/d3d11_resource.h b/src/d3d11/d3d11_resource.h index 7d91a3052..8f0e4b6ce 100644 --- a/src/d3d11/d3d11_resource.h +++ b/src/d3d11/d3d11_resource.h @@ -128,20 +128,124 @@ namespace dxvk { * Helper method that figures out the exact type of * the resource and calls its \c AddRefPrivate method. * \param [in] pResource The resource to reference + * \param [in] Type Resource type * \returns \c S_OK, or \c E_INVALIDARG for an invalid resource */ HRESULT ResourceAddRefPrivate( - ID3D11Resource* pResource); + ID3D11Resource* pResource, + D3D11_RESOURCE_DIMENSION Type); + HRESULT ResourceAddRefPrivate( + ID3D11Resource* pResource); + /** * \brief Decrements private reference count of a resource * * Helper method that figures out the exact type of * the resource and calls its \c ReleasePrivate method. * \param [in] pResource The resource to reference + * \param [in] Type Resource type * \returns \c S_OK, or \c E_INVALIDARG for an invalid resource */ + HRESULT ResourceReleasePrivate( + ID3D11Resource* pResource, + D3D11_RESOURCE_DIMENSION Type); + HRESULT ResourceReleasePrivate( ID3D11Resource* pResource); + /** + * \brief Typed private resource pointer + * + * Stores a resource and its type, in order to avoid + * unnecessary GetType calls. Also optionally stores + * a subresource index to avoid struct padding. + */ + class D3D11ResourceRef { + + public: + + D3D11ResourceRef() + : m_type(D3D11_RESOURCE_DIMENSION_UNKNOWN), + m_subresource(0), m_resource(nullptr) { } + + D3D11ResourceRef(ID3D11Resource* pResource) + : D3D11ResourceRef(pResource, 0) { } + + D3D11ResourceRef(ID3D11Resource* pResource, UINT Subresource) + : m_type(D3D11_RESOURCE_DIMENSION_UNKNOWN), + m_subresource(Subresource), m_resource(pResource) { + if (m_resource) { + m_resource->GetType(&m_type); + ResourceAddRefPrivate(m_resource, m_type); + } + } + + D3D11ResourceRef(ID3D11Resource* pResource, UINT Subresource, D3D11_RESOURCE_DIMENSION Type) + : m_type(Type), m_subresource(Subresource), m_resource(pResource) { + if (m_resource) + ResourceAddRefPrivate(m_resource, m_type); + } + + D3D11ResourceRef(D3D11ResourceRef&& other) + : m_type(other.m_type), m_resource(other.m_resource) { + other.m_type = D3D11_RESOURCE_DIMENSION_UNKNOWN; + other.m_resource = nullptr; + } + + D3D11ResourceRef(const D3D11ResourceRef& other) + : m_type(other.m_type), m_resource(other.m_resource) { + if (m_resource) + ResourceAddRefPrivate(m_resource, m_type); + } + + ~D3D11ResourceRef() { + if (m_resource) + ResourceReleasePrivate(m_resource, m_type); + } + + D3D11ResourceRef& operator = (D3D11ResourceRef&& other) { + if (m_resource) + ResourceReleasePrivate(m_resource, m_type); + + m_type = other.m_type; + m_resource = other.m_resource; + + other.m_type = D3D11_RESOURCE_DIMENSION_UNKNOWN; + other.m_resource = nullptr; + return *this; + } + + D3D11ResourceRef& operator = (const D3D11ResourceRef& other) { + if (other.m_resource) + ResourceAddRefPrivate(other.m_resource, other.m_type); + + if (m_resource) + ResourceReleasePrivate(m_resource, m_type); + + m_type = other.m_type; + m_resource = other.m_resource; + return *this; + } + + D3D11_RESOURCE_DIMENSION GetType() const { + return m_type; + } + + UINT GetSubresource() const { + return m_subresource; + } + + ID3D11Resource* Get() const { + return m_resource; + } + + private: + + D3D11_RESOURCE_DIMENSION m_type; + UINT m_subresource; + ID3D11Resource* m_resource; + + }; + } \ No newline at end of file diff --git a/src/d3d11/d3d11_texture.cpp b/src/d3d11/d3d11_texture.cpp index 69d75602d..14c1379c9 100644 --- a/src/d3d11/d3d11_texture.cpp +++ b/src/d3d11/d3d11_texture.cpp @@ -5,12 +5,13 @@ namespace dxvk { D3D11CommonTexture::D3D11CommonTexture( + ID3D11Resource* pInterface, D3D11Device* pDevice, const D3D11_COMMON_TEXTURE_DESC* pDesc, D3D11_RESOURCE_DIMENSION Dimension, DXGI_USAGE DxgiUsage, VkImage vkImage) - : m_device(pDevice), m_dimension(Dimension), m_desc(*pDesc), m_dxgiUsage(DxgiUsage) { + : m_interface(pInterface), m_device(pDevice), m_dimension(Dimension), m_desc(*pDesc), m_dxgiUsage(DxgiUsage) { DXGI_VK_FORMAT_MODE formatMode = GetFormatMode(); DXGI_VK_FORMAT_INFO formatInfo = m_device->LookupFormat(m_desc.Format, formatMode); DXGI_VK_FORMAT_FAMILY formatFamily = m_device->LookupFamily(m_desc.Format, formatMode); @@ -162,7 +163,7 @@ namespace dxvk { if (m_mapMode != D3D11_COMMON_TEXTURE_MAP_MODE_DIRECT) m_buffers.push_back(CreateMappedBuffer(j)); - m_mapTypes.push_back(D3D11_MAP(~0u)); + m_mapInfo.push_back({ D3D11_MAP(~0u), 0ull }); } } } @@ -910,7 +911,7 @@ namespace dxvk { D3D11Device* pDevice, const D3D11_COMMON_TEXTURE_DESC* pDesc) : D3D11DeviceChild(pDevice), - m_texture (pDevice, pDesc, D3D11_RESOURCE_DIMENSION_TEXTURE1D, 0, VK_NULL_HANDLE), + m_texture (this, pDevice, pDesc, D3D11_RESOURCE_DIMENSION_TEXTURE1D, 0, VK_NULL_HANDLE), m_interop (this, &m_texture), m_surface (this, &m_texture), m_resource(this), @@ -1008,7 +1009,7 @@ namespace dxvk { D3D11Device* pDevice, const D3D11_COMMON_TEXTURE_DESC* pDesc) : D3D11DeviceChild(pDevice), - m_texture (pDevice, pDesc, D3D11_RESOURCE_DIMENSION_TEXTURE2D, 0, VK_NULL_HANDLE), + m_texture (this, pDevice, pDesc, D3D11_RESOURCE_DIMENSION_TEXTURE2D, 0, VK_NULL_HANDLE), m_interop (this, &m_texture), m_surface (this, &m_texture), m_resource(this), @@ -1023,7 +1024,7 @@ namespace dxvk { DXGI_USAGE DxgiUsage, VkImage vkImage) : D3D11DeviceChild(pDevice), - m_texture (pDevice, pDesc, D3D11_RESOURCE_DIMENSION_TEXTURE2D, DxgiUsage, vkImage), + m_texture (this, pDevice, pDesc, D3D11_RESOURCE_DIMENSION_TEXTURE2D, DxgiUsage, vkImage), m_interop (this, &m_texture), m_surface (this, &m_texture), m_resource(this), @@ -1139,7 +1140,7 @@ namespace dxvk { D3D11Device* pDevice, const D3D11_COMMON_TEXTURE_DESC* pDesc) : D3D11DeviceChild(pDevice), - m_texture (pDevice, pDesc, D3D11_RESOURCE_DIMENSION_TEXTURE3D, 0, VK_NULL_HANDLE), + m_texture (this, pDevice, pDesc, D3D11_RESOURCE_DIMENSION_TEXTURE3D, 0, VK_NULL_HANDLE), m_interop (this, &m_texture), m_resource(this), m_d3d10 (this) { diff --git a/src/d3d11/d3d11_texture.h b/src/d3d11/d3d11_texture.h index 80901a85f..e6115279d 100644 --- a/src/d3d11/d3d11_texture.h +++ b/src/d3d11/d3d11_texture.h @@ -1,5 +1,6 @@ #pragma once +#include "../dxvk/dxvk_cs.h" #include "../dxvk/dxvk_device.h" #include "../d3d10/d3d10_texture.h" @@ -72,6 +73,7 @@ namespace dxvk { public: D3D11CommonTexture( + ID3D11Resource* pInterface, D3D11Device* pDevice, const D3D11_COMMON_TEXTURE_DESC* pDesc, D3D11_RESOURCE_DIMENSION Dimension, @@ -80,6 +82,14 @@ namespace dxvk { ~D3D11CommonTexture(); + /** + * \brief Retrieves resource interface + * \returns Resource interface + */ + ID3D11Resource* GetInterface() const { + return m_interface; + } + /** * \brief Texture properties * @@ -91,6 +101,14 @@ namespace dxvk { return &m_desc; } + /** + * \brief Retrieves D3D11 texture type + * \returns D3D11 resource dimension + */ + D3D11_RESOURCE_DIMENSION GetDimension() const { + return m_dimension; + } + /** * \brief Retrieves Vulkan image type * @@ -147,8 +165,8 @@ namespace dxvk { * \returns Current map mode of that subresource */ D3D11_MAP GetMapType(UINT Subresource) const { - return Subresource < m_mapTypes.size() - ? D3D11_MAP(m_mapTypes[Subresource]) + return Subresource < m_mapInfo.size() + ? D3D11_MAP(m_mapInfo[Subresource].mapType) : D3D11_MAP(~0u); } @@ -159,8 +177,8 @@ namespace dxvk { * \param [in] MapType The map type */ void SetMapType(UINT Subresource, D3D11_MAP MapType) { - if (Subresource < m_mapTypes.size()) - m_mapTypes[Subresource] = MapType; + if (Subresource < m_mapInfo.size()) + m_mapInfo[Subresource].mapType = MapType; } /** @@ -223,6 +241,56 @@ namespace dxvk { return m_packedFormat; } + /** + * \brief Checks whether the resource is eligible for tracking + * + * Mapped resources with no bind flags can be tracked so that + * mapping them will not necessarily cause a CS thread sync. + * \returns \c true if tracking is supported for this resource + */ + bool HasSequenceNumber() const { + if (m_mapMode == D3D11_COMMON_TEXTURE_MAP_MODE_NONE) + return false; + + // For buffer-mapped images we only need to track copies to + // and from that buffer, so we can safely ignore bind flags + if (m_mapMode == D3D11_COMMON_TEXTURE_MAP_MODE_BUFFER) + return true; + + // Otherwise we can only do accurate tracking if the + // image cannot be used in the rendering pipeline. + return m_desc.BindFlags == 0; + } + + /** + * \brief Tracks sequence number for a given subresource + * + * Stores which CS chunk the resource was last used on. + * \param [in] Subresource Subresource index + * \param [in] Seq Sequence number + */ + void TrackSequenceNumber(UINT Subresource, uint64_t Seq) { + if (Subresource < m_mapInfo.size()) + m_mapInfo[Subresource].seq = Seq; + } + + /** + * \brief Queries sequence number for a given subresource + * + * Returns which CS chunk the resource was last used on. + * \param [in] Subresource Subresource index + * \returns Sequence number for the given subresource + */ + uint64_t GetSequenceNumber(UINT Subresource) { + if (HasSequenceNumber()) { + return Subresource < m_buffers.size() + ? m_mapInfo[Subresource].seq + : 0ull; + } else { + return DxvkCsThread::SynchronizeAll; + } + } + /** * \brief Computes pixel offset into mapped buffer * @@ -310,7 +378,13 @@ namespace dxvk { DxvkBufferSliceHandle slice; }; - D3D11Device* const m_device; + struct MappedInfo { + D3D11_MAP mapType; + uint64_t seq; + }; + + ID3D11Resource* m_interface; + D3D11Device* m_device; D3D11_RESOURCE_DIMENSION m_dimension; D3D11_COMMON_TEXTURE_DESC m_desc; D3D11_COMMON_TEXTURE_MAP_MODE m_mapMode; @@ -319,7 +393,7 @@ namespace dxvk { Rc m_image; std::vector m_buffers; - std::vector m_mapTypes; + std::vector m_mapInfo; MappedBuffer CreateMappedBuffer( UINT MipLevel) const; diff --git a/src/d3d9/d3d9_common_texture.cpp b/src/d3d9/d3d9_common_texture.cpp index ca49ae763..d18db4056 100644 --- a/src/d3d9/d3d9_common_texture.cpp +++ b/src/d3d9/d3d9_common_texture.cpp @@ -40,8 +40,9 @@ namespace dxvk { m_mapping = pDevice->LookupFormat(m_desc.Format); - m_mapMode = DetermineMapMode(); - m_shadow = DetermineShadowState(); + m_mapMode = DetermineMapMode(); + m_shadow = DetermineShadowState(); + m_supportsFetch4 = DetermineFetch4Compatibility(); if (m_mapMode == D3D9_COMMON_TEXTURE_MAP_MODE_BACKED) { bool plainSurface = m_type == D3DRTYPE_SURFACE && @@ -350,7 +351,7 @@ namespace dxvk { BOOL D3D9CommonTexture::DetermineShadowState() const { - static std::array blacklist = { + constexpr std::array blacklist = { D3D9Format::INTZ, D3D9Format::DF16, D3D9Format::DF24 }; @@ -359,6 +360,17 @@ namespace dxvk { } + BOOL D3D9CommonTexture::DetermineFetch4Compatibility() const { + constexpr std::array singleChannelFormats = { + D3D9Format::INTZ, D3D9Format::DF16, D3D9Format::DF24, + D3D9Format::R16F, D3D9Format::R32F, D3D9Format::A8, + D3D9Format::L8, D3D9Format::L16 + }; + + return std::find(singleChannelFormats.begin(), singleChannelFormats.end(), m_desc.Format) != singleChannelFormats.end(); + } + + BOOL D3D9CommonTexture::CheckImageSupport( const DxvkImageCreateInfo* pImageInfo, VkImageTiling Tiling) const { diff --git a/src/d3d9/d3d9_common_texture.h b/src/d3d9/d3d9_common_texture.h index 5eefbdfd7..b3150e60d 100644 --- a/src/d3d9/d3d9_common_texture.h +++ b/src/d3d9/d3d9_common_texture.h @@ -191,6 +191,14 @@ namespace dxvk { return m_shadow; } + /** + * \brief FETCH4 compatibility + * \returns Whether the format of the texture supports the FETCH4 hack + */ + bool SupportsFetch4() const { + return m_supportsFetch4; + } + /** * \brief Null * \returns Whether the texture is D3DFMT_NULL or not @@ -448,6 +456,7 @@ namespace dxvk { D3D9_VK_FORMAT_MAPPING m_mapping; bool m_shadow; //< Depth Compare-ness + bool m_supportsFetch4; int64_t m_size = 0; @@ -487,6 +496,8 @@ namespace dxvk { BOOL DetermineShadowState() const; + BOOL DetermineFetch4Compatibility() const; + BOOL CheckImageSupport( const DxvkImageCreateInfo* pImageInfo, VkImageTiling Tiling) const; diff --git a/src/d3d9/d3d9_device.cpp b/src/d3d9/d3d9_device.cpp index 6a0839afc..23cb205c4 100644 --- a/src/d3d9/d3d9_device.cpp +++ b/src/d3d9/d3d9_device.cpp @@ -81,7 +81,7 @@ namespace dxvk { , m_d3d9Options ( dxvkDevice, pParent->GetInstance()->config() ) , m_multithread ( BehaviorFlags & D3DCREATE_MULTITHREADED ) , m_isSWVP ( (BehaviorFlags & D3DCREATE_SOFTWARE_VERTEXPROCESSING) ? true : false ) - , m_csThread ( dxvkDevice->createRtxContext() ) + , m_csThread ( dxvkDevice, dxvkDevice->createRtxContext() ) , m_csChunk ( AllocCsChunk() ) // NV-DXVK start: unbound light indices , m_state ( Direct3DState9 { D3D9CapturableState{ static_cast(std::max(m_d3d9Options.maxEnabledLights, 0)) } } ) @@ -2770,6 +2770,9 @@ namespace dxvk { return D3D_OK; } + if (!VertexCount) + return D3D_OK; + D3D9CommonBuffer* dst = static_cast(pDestBuffer)->GetCommonBuffer(); D3D9VertexDecl* decl = static_cast (pVertexDecl); @@ -3859,10 +3862,14 @@ namespace dxvk { constexpr DWORD Fetch4Disabled = MAKEFOURCC('G', 'E', 'T', '1'); if (unlikely(Type == D3DSAMP_MIPMAPLODBIAS)) { + auto texture = GetCommonTexture(m_state.textures[StateSampler]); + bool textureSupportsFetch4 = texture != nullptr && texture->SupportsFetch4(); + if (unlikely(Value == Fetch4Enabled)) { m_fetch4Enabled |= 1u << StateSampler; - if (state[StateSampler][D3DSAMP_MAGFILTER] == D3DTEXF_POINT) + if (textureSupportsFetch4 && state[StateSampler][D3DSAMP_MAGFILTER] == D3DTEXF_POINT) { m_fetch4 |= 1u << StateSampler; + } } else if (unlikely(Value == Fetch4Disabled)) { m_fetch4Enabled &= ~(1u << StateSampler); @@ -3871,7 +3878,10 @@ namespace dxvk { } if (unlikely(Type == D3DSAMP_MAGFILTER && (m_fetch4Enabled & (1u << StateSampler)))) { - if (Value == D3DTEXF_POINT) + auto texture = GetCommonTexture(m_state.textures[StateSampler]); + bool textureSupportsFetch4 = texture != nullptr && texture->SupportsFetch4(); + + if (Value == D3DTEXF_POINT && textureSupportsFetch4) m_fetch4 |= 1u << StateSampler; else m_fetch4 &= ~(1u << StateSampler); @@ -3925,6 +3935,19 @@ namespace dxvk { m_dirtySamplerStates |= 1u << StateSampler; } + + if (unlikely(m_fetch4Enabled & (1u << StateSampler) && !(m_fetch4 & (1u << StateSampler)))) { + bool textureSupportsFetch4 = newTexture->SupportsFetch4(); + if (textureSupportsFetch4 + && m_state.samplerStates[StateSampler][D3DSAMP_MAGFILTER] == D3DTEXF_POINT + && m_state.samplerStates[StateSampler][D3DSAMP_MINFILTER] == D3DTEXF_POINT) { + m_fetch4 |= 1u << StateSampler; + m_dirtySamplerStates |= 1u << StateSampler; + } + } + } else if (unlikely(m_fetch4 & (1u << StateSampler))) { + m_fetch4 &= ~(1u << StateSampler); + m_dirtySamplerStates |= 1u << StateSampler; } DWORD combinedUsage = oldUsage | newUsage; @@ -5054,8 +5077,7 @@ namespace dxvk { // recorded prior to this function will be run FlushCsChunk(); - if (m_csThread.isBusy()) - m_csThread.synchronize(); + m_csThread.synchronize(DxvkCsThread::SynchronizeAll); } diff --git a/src/d3d9/d3d9_device.h b/src/d3d9/d3d9_device.h index 61739621c..4b53b6e18 100644 --- a/src/d3d9/d3d9_device.h +++ b/src/d3d9/d3d9_device.h @@ -936,6 +936,9 @@ namespace dxvk { // NV-DXVK start: external API D3D9SwapchainExternal* GetExternalPresenter(); + D3D9Rtx& RTX() { + return m_rtx; + } // NV-DXVK end private: @@ -944,6 +947,9 @@ namespace dxvk { return DxvkCsChunkRef(chunk, &m_csChunkPool); } +// NV-DXVK start: external API + public: +// NV-DXVK end template void EmitCs(Cmd&& command) { if (unlikely(!m_csChunk->push(command))) { @@ -953,6 +959,9 @@ namespace dxvk { m_csChunk->push(command); } } +// NV-DXVK start: external API + private: +// NV-DXVK end void EmitCsChunk(DxvkCsChunkRef&& chunk); diff --git a/src/d3d9/d3d9_main.cpp b/src/d3d9/d3d9_main.cpp index 129910f10..8c1a5e21c 100644 --- a/src/d3d9/d3d9_main.cpp +++ b/src/d3d9/d3d9_main.cpp @@ -91,3 +91,12 @@ extern "C" { return 0; } } + +// NV-DXVK start: external API +#include + +void dummy() { + // need to reference a function so it's exported from d3d9.dll + remixapi_InitializeLibrary(nullptr, nullptr); +} +// NV-DXVK end diff --git a/src/d3d9/d3d9_options.cpp b/src/d3d9/d3d9_options.cpp index 9aceef225..7110ffb18 100644 --- a/src/d3d9/d3d9_options.cpp +++ b/src/d3d9/d3d9_options.cpp @@ -107,13 +107,20 @@ namespace dxvk { 0, 0); applyTristate(this->generalHazards, config.getOption("d3d9.generalHazards", Tristate::Auto)); - std::string floatEmulation = Config::toLower(config.getOption("d3d9.floatEmulation", "true")); + std::string floatEmulation = Config::toLower(config.getOption("d3d9.floatEmulation", "auto")); if (floatEmulation == "strict") { - d3d9FloatEmulation = D3D9FloatEmulation::Strict; + d3d9FloatEmulation = D3D9FloatEmulation::Strict; } else if (floatEmulation == "false") { - d3d9FloatEmulation = D3D9FloatEmulation::Disabled; + d3d9FloatEmulation = D3D9FloatEmulation::Disabled; + } else if (floatEmulation == "true") { + d3d9FloatEmulation = D3D9FloatEmulation::Enabled; } else { - d3d9FloatEmulation = D3D9FloatEmulation::Enabled; + bool hasMulz = adapter != nullptr + && adapter->matchesDriver(DxvkGpuVendor::Amd, + VK_DRIVER_ID_MESA_RADV, + VK_MAKE_VERSION(21, 99, 99), + 0); + d3d9FloatEmulation = hasMulz ? D3D9FloatEmulation::Strict : D3D9FloatEmulation::Enabled; } } diff --git a/src/d3d9/meson.build b/src/d3d9/meson.build index 78ea3a645..69c7ceed8 100644 --- a/src/d3d9/meson.build +++ b/src/d3d9/meson.build @@ -94,7 +94,7 @@ d3d9_dll = shared_library('d3d9', d3d9_src, dxvk_version, glsl_generator.process name_prefix : '', link_with : [ util_lib ], dependencies : [ dxso_dep, dxvk_dep, util_dep ], - include_directories : [ dxvk_include_path, dxvk_shader_include_path, usd_include_paths ], + include_directories : [ dxvk_include_path, dxvk_shader_include_path, usd_include_paths, remix_api_include_path ], install : true, objects : not dxvk_is_msvc ? 'd3d9' + def_spec_ext : [], vs_module_defs : 'd3d9'+def_spec_ext, diff --git a/src/dxvk/dxvk_adapter.cpp b/src/dxvk/dxvk_adapter.cpp index c737f2b43..8246d20ce 100644 --- a/src/dxvk/dxvk_adapter.cpp +++ b/src/dxvk/dxvk_adapter.cpp @@ -938,13 +938,23 @@ namespace dxvk { // Query full device properties for all enabled extensions m_vki->vkGetPhysicalDeviceProperties2(m_handle, &m_deviceInfo.core); - - // Nvidia reports the driver version in a slightly different format - if (DxvkGpuVendor(m_deviceInfo.core.properties.vendorID) == DxvkGpuVendor::Nvidia) { - m_deviceInfo.core.properties.driverVersion = VK_MAKE_VERSION( - VK_VERSION_MAJOR(m_deviceInfo.core.properties.driverVersion), - VK_VERSION_MINOR(m_deviceInfo.core.properties.driverVersion >> 0) >> 2, - VK_VERSION_PATCH(m_deviceInfo.core.properties.driverVersion >> 2) >> 4); + + // Some drivers reports the driver version in a slightly different format + switch (m_deviceInfo.khrDeviceDriverProperties.driverID) { + case VK_DRIVER_ID_NVIDIA_PROPRIETARY: + m_deviceInfo.core.properties.driverVersion = VK_MAKE_VERSION( + (m_deviceInfo.core.properties.driverVersion >> 22) & 0x3ff, + (m_deviceInfo.core.properties.driverVersion >> 14) & 0x0ff, + (m_deviceInfo.core.properties.driverVersion >> 6) & 0x0ff); + break; + + case VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS: + m_deviceInfo.core.properties.driverVersion = VK_MAKE_VERSION( + m_deviceInfo.core.properties.driverVersion >> 14, + m_deviceInfo.core.properties.driverVersion & 0x3fff, 0); + break; + + default:; } } diff --git a/src/dxvk/dxvk_barrier.h b/src/dxvk/dxvk_barrier.h index f3629fa92..f7a7713d9 100644 --- a/src/dxvk/dxvk_barrier.h +++ b/src/dxvk/dxvk_barrier.h @@ -354,7 +354,7 @@ namespace dxvk { std::vector m_hashMap; static size_t computeHash(K key) { - return size_t(reinterpret_cast(key)); + return size_t(uint64_t(key)); } size_t computeIndex(K key) const { diff --git a/src/dxvk/dxvk_buffer.cpp b/src/dxvk/dxvk_buffer.cpp index 790081c0b..c018e4788 100644 --- a/src/dxvk/dxvk_buffer.cpp +++ b/src/dxvk/dxvk_buffer.cpp @@ -46,7 +46,7 @@ namespace dxvk { m_physSliceCount = std::max(1, 256 / m_physSliceStride); // Limit size of multi-slice buffers to reduce fragmentation - constexpr VkDeviceSize MaxBufferSize = 4 << 20; + constexpr VkDeviceSize MaxBufferSize = 256 << 10; m_physSliceMaxCount = MaxBufferSize >= m_physSliceStride ? MaxBufferSize / m_physSliceStride @@ -128,12 +128,6 @@ namespace dxvk { "\n usage: ", info.usage)); } - VkMemoryAllocateFlags memoryAllocateFlags = 0; - - if (info.usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) { - memoryAllocateFlags |= VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT; - } - VkMemoryDedicatedRequirements dedicatedRequirements; dedicatedRequirements.sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS; dedicatedRequirements.pNext = VK_NULL_HANDLE; @@ -177,11 +171,15 @@ namespace dxvk { bool isGpuWritable = (m_info.access & ( VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT)) != 0; - float priority = isGpuWritable ? 1.0f : 0.5f; + DxvkMemoryFlags hints(DxvkMemoryFlag::GpuReadable); + + if (isGpuWritable) + hints.set(DxvkMemoryFlag::GpuWritable); + // Ask driver whether we should be using a dedicated allocation handle.memory = m_memAlloc->alloc(&memReq.memoryRequirements, - dedicatedRequirements, dedMemoryAllocInfo, m_memFlags, memoryAllocateFlags, priority, category); + dedicatedRequirements, dedMemoryAllocInfo, m_memFlags, hints, category); if (vkd->vkBindBufferMemory(vkd->device(), handle.buffer, handle.memory.memory(), handle.memory.offset()) != VK_SUCCESS) diff --git a/src/dxvk/dxvk_buffer.h b/src/dxvk/dxvk_buffer.h index 0be450ff9..44937c476 100644 --- a/src/dxvk/dxvk_buffer.h +++ b/src/dxvk/dxvk_buffer.h @@ -348,19 +348,22 @@ namespace dxvk { VkDeviceAddress m_deviceAddress = 0; uint32_t m_vertexStride = 0; + + alignas(CACHE_LINE_SIZE) + sync::Spinlock m_freeMutex; + uint32_t m_lazyAlloc = false; + VkDeviceSize m_physSliceLength = 0; + VkDeviceSize m_physSliceStride = 0; + VkDeviceSize m_physSliceCount = 1; + VkDeviceSize m_physSliceMaxCount = 1; - sync::Spinlock m_freeMutex; - sync::Spinlock m_swapMutex; - - std::vector m_buffers; - std::vector m_freeSlices; - std::vector m_nextSlices; - - VkDeviceSize m_physSliceLength = 0; - VkDeviceSize m_physSliceStride = 0; - VkDeviceSize m_physSliceCount = 1; - VkDeviceSize m_physSliceMaxCount = 1; + std::vector m_buffers; + std::vector m_freeSlices; + + alignas(CACHE_LINE_SIZE) + sync::Spinlock m_swapMutex; + std::vector m_nextSlices; DxvkMemoryStats::Category m_category; diff --git a/src/dxvk/dxvk_cmdlist.h b/src/dxvk/dxvk_cmdlist.h index 9546f8745..13ae5e770 100644 --- a/src/dxvk/dxvk_cmdlist.h +++ b/src/dxvk/dxvk_cmdlist.h @@ -144,7 +144,7 @@ namespace dxvk { * \param [in] ctr The counter to increment * \param [in] val The value to add */ - void addStatCtr(DxvkStatCounter ctr, uint32_t val) { + void addStatCtr(DxvkStatCounter ctr, uint64_t val) { m_statCounters.addCtr(ctr, val); } diff --git a/src/dxvk/dxvk_context.cpp b/src/dxvk/dxvk_context.cpp index f527f5cd1..f4af12175 100644 --- a/src/dxvk/dxvk_context.cpp +++ b/src/dxvk/dxvk_context.cpp @@ -2270,13 +2270,8 @@ namespace dxvk { const void* data, bool forceNoReplace) { bool isHostVisible = buffer->memFlags() & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; - - bool replaceBuffer = !forceNoReplace - && (size == buffer->info().size) - && (size <= (1 << 20)) - && !isHostVisible; -// NV-DXVK end: - + bool replaceBuffer = !forceNoReplace && size == buffer->info().size && !isHostVisible; + DxvkBufferSliceHandle bufferSlice; DxvkCmdBuffer cmdBuffer; @@ -2303,20 +2298,42 @@ namespace dxvk { m_execBarriers.recordCommands(m_cmd); } - // Vulkan specifies that small amounts of data (up to 64kB) can - // be copied to a buffer directly if the size is a multiple of - // four. Anything else must be copied through a staging buffer. - // We'll limit the size to 4kB in order to keep command buffers - // reasonably small, we do not know how much data apps may upload. - if ((size <= 4096) && ((size & 0x3) == 0) && ((offset & 0x3) == 0)) { - m_cmd->cmdUpdateBuffer( - cmdBuffer, - bufferSlice.handle, - bufferSlice.offset, - bufferSlice.length, - data); - } - else { + m_cmd->cmdUpdateBuffer(cmdBuffer, + bufferSlice.handle, + bufferSlice.offset, + bufferSlice.length, + data); + + auto& barriers = replaceBuffer + ? m_initBarriers + : m_execBarriers; + + barriers.accessBuffer( + bufferSlice, + VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_ACCESS_TRANSFER_WRITE_BIT, + buffer->info().stages, + buffer->info().access); + + m_cmd->trackResource(buffer); + } + +// NV-DXVK begin: utility function for partial buffer uploads + void DxvkContext::writeToBuffer( + const Rc& buffer, + VkDeviceSize offset, + VkDeviceSize size, + const void* data, + bool forceNoReplace) { + + if (size < 65536 && size % 4 == 0) { + updateBuffer(buffer, offset, size, data, forceNoReplace); + } else { + this->spillRenderPass(true); + + DxvkBufferSliceHandle bufferSlice = buffer->getSliceHandle(offset, size); + DxvkCmdBuffer cmdBuffer = DxvkCmdBuffer::ExecBuffer; + auto stagingSlice = m_staging.alloc(CACHE_LINE_SIZE, size); auto stagingHandle = stagingSlice.getSliceHandle(); @@ -2328,25 +2345,25 @@ namespace dxvk { region.size = size; m_cmd->cmdCopyBuffer(cmdBuffer, - stagingHandle.handle, bufferSlice.handle, 1, ®ion); + stagingHandle.handle, + bufferSlice.handle, + 1, + ®ion); m_cmd->trackResource(stagingSlice.buffer()); - } - auto& barriers = replaceBuffer - ? m_initBarriers - : m_execBarriers; - - barriers.accessBuffer( - bufferSlice, - VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_ACCESS_TRANSFER_WRITE_BIT, - buffer->info().stages, - buffer->info().access); + auto& barriers = m_execBarriers; + barriers.accessBuffer( + bufferSlice, + VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_ACCESS_TRANSFER_WRITE_BIT, + buffer->info().stages, + buffer->info().access); - m_cmd->trackResource(buffer); + m_cmd->trackResource(buffer); + } } - +// NV-DXVK end void DxvkContext::updateImage( const Rc& image, diff --git a/src/dxvk/dxvk_context.h b/src/dxvk/dxvk_context.h index 9092aef78..ad7a302dd 100644 --- a/src/dxvk/dxvk_context.h +++ b/src/dxvk/dxvk_context.h @@ -857,6 +857,15 @@ namespace dxvk { bool forceNoReplace = false); // NV-DXVK end: + // NV-DXVK begin: utility function for partial buffer uploads + void writeToBuffer( + const Rc& buffer, + VkDeviceSize offset, + VkDeviceSize size, + const void* data, + bool forceNoReplace = false); + // NV-DXVK end + /** * \brief Updates an image * @@ -1213,6 +1222,19 @@ namespace dxvk { */ void insertDebugLabel(VkDebugUtilsLabelEXT *label); + /** + * \brief Increments a given stat counter + * + * The stat counters will be merged into the global + * stat counters upon execution of the command list. + * \param [in] counter Stat counter to increment + * \param [in] value Increment value + */ + void addStatCtr(DxvkStatCounter counter, uint64_t value) { + if (m_cmd != nullptr) + m_cmd->addStatCtr(counter, value); + } + // NV-DXVK start: use EXT_debug_utils VkDescriptorSet allocateDescriptorSet(VkDescriptorSetLayout layout, const char *name = nullptr); // NV-DXVK end @@ -1229,6 +1251,7 @@ namespace dxvk { } protected: + Rc m_device; DxvkObjects* m_common; diff --git a/src/dxvk/dxvk_cs.cpp b/src/dxvk/dxvk_cs.cpp index d28b5d650..5b91acf2c 100644 --- a/src/dxvk/dxvk_cs.cpp +++ b/src/dxvk/dxvk_cs.cpp @@ -97,8 +97,11 @@ namespace dxvk { } - DxvkCsThread::DxvkCsThread(const Rc& context) - : m_context(context), m_thread([this] { threadFunc(); }) { + DxvkCsThread::DxvkCsThread( + const Rc& device, + const Rc& context) + : m_device(device), m_context(context), + m_thread([this] { threadFunc(); }) { } @@ -113,26 +116,42 @@ namespace dxvk { } - void DxvkCsThread::dispatchChunk(DxvkCsChunkRef&& chunk) { + uint64_t DxvkCsThread::dispatchChunk(DxvkCsChunkRef&& chunk) { ScopedCpuProfileZone(); + uint64_t seq; + { std::unique_lock lock(m_mutex); + seq = ++m_chunksDispatched; m_chunksQueued.push(std::move(chunk)); - m_chunksPending += 1; } m_condOnAdd.notify_one(); + return seq; } - void DxvkCsThread::synchronize() { + void DxvkCsThread::synchronize(uint64_t seq) { ScopedCpuProfileZone(); - std::unique_lock lock(m_mutex); - - m_condOnSync.wait(lock, [this] { - return !m_chunksPending.load(); - }); + // Avoid locking if we know the sync is a no-op, may + // reduce overhead if this is being called frequently + if (seq > m_chunksExecuted.load(std::memory_order_acquire)) { + std::unique_lock lock(m_mutex); + + if (seq == SynchronizeAll) + seq = m_chunksDispatched.load(); + + auto t0 = dxvk::high_resolution_clock::now(); + m_condOnSync.wait(lock, [this, seq] { + return m_chunksExecuted.load() >= seq; + }); + auto t1 = dxvk::high_resolution_clock::now(); + auto ticks = std::chrono::duration_cast(t1 - t0); + + m_device->addStatCtr(DxvkStatCounter::CsSyncCount, 1); + m_device->addStatCtr(DxvkStatCounter::CsSyncTicks, ticks.count()); + } } @@ -147,8 +166,8 @@ namespace dxvk { while (!m_stopped.load()) { { std::unique_lock lock(m_mutex); if (chunk) { - if (--m_chunksPending == 0) - m_condOnSync.notify_one(); + m_chunksExecuted++; + m_condOnSync.notify_one(); chunk = DxvkCsChunkRef(); } @@ -166,8 +185,10 @@ namespace dxvk { } } - if (chunk) + if (chunk) { + m_context->addStatCtr(DxvkStatCounter::CsChunkCount, 1); chunk->executeAll(m_context.ptr()); + } } } catch (const DxvkError& e) { Logger::err("Exception on CS thread!"); diff --git a/src/dxvk/dxvk_cs.h b/src/dxvk/dxvk_cs.h index ca34fa23f..94f980dfd 100644 --- a/src/dxvk/dxvk_cs.h +++ b/src/dxvk/dxvk_cs.h @@ -6,6 +6,8 @@ #include #include "../util/thread.h" + +#include "dxvk_device.h" #include "dxvk_context.h" namespace dxvk { @@ -45,7 +47,7 @@ namespace dxvk { * \brief Executes embedded commands * \param [in] ctx The target context */ - virtual void exec(DxvkContext* ctx) const = 0; + virtual void exec(DxvkContext* ctx) = 0; private: @@ -71,7 +73,7 @@ namespace dxvk { DxvkCsTypedCmd (DxvkCsTypedCmd&&) = delete; DxvkCsTypedCmd& operator = (DxvkCsTypedCmd&&) = delete; - void exec(DxvkContext* ctx) const { + void exec(DxvkContext* ctx) override { m_command(ctx); } @@ -102,7 +104,7 @@ namespace dxvk { DxvkCsDataCmd (DxvkCsDataCmd&&) = delete; DxvkCsDataCmd& operator = (DxvkCsDataCmd&&) = delete; - void exec(DxvkContext* ctx) const { + void exec(DxvkContext* ctx) override { m_command(ctx, &m_data); } @@ -379,8 +381,12 @@ namespace dxvk { class DxvkCsThread { public: - - DxvkCsThread(const Rc& context); + + constexpr static uint64_t SynchronizeAll = ~0ull; + + DxvkCsThread( + const Rc& device, + const Rc& context); ~DxvkCsThread(); /** @@ -389,41 +395,44 @@ namespace dxvk { * Can be used to efficiently play back large * command lists recorded on another thread. * \param [in] chunk The chunk to dispatch + * \returns Sequence number of the submission */ - void dispatchChunk(DxvkCsChunkRef&& chunk); + uint64_t dispatchChunk(DxvkCsChunkRef&& chunk); /** * \brief Synchronizes with the thread * - * This waits for all chunks in the dispatch - * queue to be processed by the thread. Note - * that this does \e not implicitly call - * \ref flush. + * This waits for all chunks in the dispatch queue to + * be processed by the thread, up to the given sequence + * number. If the sequence number is 0, this will wait + * for all pending chunks to complete execution. + * \param [in] seq Sequence number to wait for. */ - void synchronize(); + void synchronize(uint64_t seq); /** - * \brief Checks whether the worker thread is busy - * - * Note that this information is only reliable if - * only the calling thread dispatches jobs to the - * worker queue and if the result is \c false. - * \returns \c true if there is still work to do + * \brief Retrieves last executed sequence number + * + * Can be used to avoid synchronization in some cases. + * \returns Sequence number of last executed chunk */ - bool isBusy() const { - return m_chunksPending.load() != 0; + uint64_t lastSequenceNumber() const { + return m_chunksExecuted.load(); } - + private: - const Rc m_context; + Rc m_device; + Rc m_context; + + std::atomic m_chunksDispatched = { 0ull }; + std::atomic m_chunksExecuted = { 0ull }; std::atomic m_stopped = { false }; dxvk::mutex m_mutex; dxvk::condition_variable m_condOnAdd; dxvk::condition_variable m_condOnSync; std::queue m_chunksQueued; - std::atomic m_chunksPending = { 0u }; dxvk::thread m_thread; void threadFunc(); diff --git a/src/dxvk/dxvk_device.h b/src/dxvk/dxvk_device.h index 579219c90..f6ade334e 100644 --- a/src/dxvk/dxvk_device.h +++ b/src/dxvk/dxvk_device.h @@ -551,6 +551,17 @@ namespace dxvk { return m_submissionQueue.pendingSubmissions(); } + /** + * \brief Increments a given stat counter + * + * \param [in] counter Stat counter to increment + * \param [in] value Increment value + */ + void addStatCtr(DxvkStatCounter counter, uint64_t value) { + std::lock_guard lock(m_statLock); + m_statCounters.addCtr(counter, value); + } + /** * \brief Waits for a given submission * diff --git a/src/dxvk/dxvk_framebuffer.h b/src/dxvk/dxvk_framebuffer.h index eaeb62c10..b55e6e5fa 100644 --- a/src/dxvk/dxvk_framebuffer.h +++ b/src/dxvk/dxvk_framebuffer.h @@ -65,7 +65,7 @@ namespace dxvk { state.add(depthView); for (uint32_t i = 0; i < MaxNumRenderTargets; i++) state.add(colorViews[i]); - state.add(reinterpret_cast(renderPass)); + state.add(uint64_t(renderPass)); return state; } diff --git a/src/dxvk/dxvk_image.cpp b/src/dxvk/dxvk_image.cpp index ff13e1956..06fd3b99a 100644 --- a/src/dxvk/dxvk_image.cpp +++ b/src/dxvk/dxvk_image.cpp @@ -137,7 +137,7 @@ namespace dxvk { m_vkd->vkGetImageMemoryRequirements2( m_vkd->device(), &memReqInfo, &memReq); - + if (info.tiling != VK_IMAGE_TILING_LINEAR && !dedicatedRequirements.prefersDedicatedAllocation) { memReq.memoryRequirements.size = align(memReq.memoryRequirements.size, memAlloc.bufferImageGranularity()); memReq.memoryRequirements.alignment = align(memReq.memoryRequirements.alignment , memAlloc.bufferImageGranularity()); @@ -151,8 +151,11 @@ namespace dxvk { VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT)) != 0; - float priority = isGpuWritable ? 1.0f : 0.5f; + DxvkMemoryFlags hints(DxvkMemoryFlag::GpuReadable); + if (isGpuWritable) + hints.set(DxvkMemoryFlag::GpuWritable); + if (m_shared) { dedicatedRequirements.prefersDedicatedAllocation = VK_TRUE; dedicatedRequirements.requiresDedicatedAllocation = VK_TRUE; @@ -160,7 +163,7 @@ namespace dxvk { // Ask driver whether we should be using a dedicated allocation m_image.memory = memAlloc.alloc(&memReq.memoryRequirements, - dedicatedRequirements, dedMemoryAllocInfo, memFlags, 0, priority, category); + dedicatedRequirements, dedMemoryAllocInfo, memFlags, hints, category); // Try to bind the allocated memory slice to the image if (m_vkd->vkBindImageMemory(m_vkd->device(), m_image.image, diff --git a/src/dxvk/dxvk_instance.cpp b/src/dxvk/dxvk_instance.cpp index e4fb014a9..cb46d5001 100644 --- a/src/dxvk/dxvk_instance.cpp +++ b/src/dxvk/dxvk_instance.cpp @@ -236,12 +236,8 @@ namespace dxvk { // NV-DXVK end m_extProviders.push_back(&DxvkPlatformExts::s_instance); - - if (m_options.enableOpenVR) - m_extProviders.push_back(&VrInstance::s_instance); - - if (m_options.enableOpenXR) - m_extProviders.push_back(&DxvkXrProvider::s_instance); + m_extProviders.push_back(&VrInstance::s_instance); + m_extProviders.push_back(&DxvkXrProvider::s_instance); // NV-DXVK start: RTXIO #ifdef WITH_RTXIO @@ -408,7 +404,7 @@ namespace dxvk { // NV-DXVK start: custom pEngineName appInfo.pEngineName = "DXVK_NvRemix"; // NV-DXVK end - appInfo.engineVersion = VK_MAKE_VERSION(1, 9, 3); + appInfo.engineVersion = VK_MAKE_VERSION(1, 9, 4); // NV-DXVK start: Require Vulkan 1.3 appInfo.apiVersion = VK_MAKE_VERSION(1, 3, 0); // NV-DXVK end diff --git a/src/dxvk/dxvk_memory.cpp b/src/dxvk/dxvk_memory.cpp index fd4a2d010..adf1cb31c 100644 --- a/src/dxvk/dxvk_memory.cpp +++ b/src/dxvk/dxvk_memory.cpp @@ -19,6 +19,9 @@ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. */ + +#include + #include "dxvk_device.h" #include "dxvk_memory.h" @@ -221,8 +224,9 @@ DxvkMemory::DxvkMemory() { } DxvkMemoryChunk::DxvkMemoryChunk( DxvkMemoryAllocator* alloc, DxvkMemoryType* type, - DxvkDeviceMemory memory) - : m_alloc(alloc), m_type(type), m_memory(memory) { + DxvkDeviceMemory memory, + DxvkMemoryFlags hints) + : m_alloc(alloc), m_type(type), m_memory(memory), m_hints(hints) { // Mark the entire chunk as free m_freeList.push_back(FreeSlice { 0, memory.memSize }); } @@ -236,17 +240,14 @@ DxvkMemory::DxvkMemory() { } DxvkMemory DxvkMemoryChunk::alloc( - VkMemoryPropertyFlags propertyFlags, - VkMemoryAllocateFlags allocateFlags, + VkMemoryPropertyFlags flags, VkDeviceSize size, VkDeviceSize align, - float priority, + DxvkMemoryFlags hints, DxvkMemoryStats::Category category) { - // Property and allocate flags must be compatible. This could + // Property flags must be compatible. This could // be refined a bit in the future if necessary. - if (m_memory.memPropertyFlags != propertyFlags - || m_memory.memAllocateFlags != allocateFlags - || m_memory.priority != priority) + if (m_memory.memFlags != flags || !checkHints(hints)) return DxvkMemory(); // If the chunk is full, return @@ -322,20 +323,30 @@ DxvkMemory::DxvkMemory() { } m_freeList.push_back({ offset, length }); } - // NV-DXVK start: Free unused memory - bool DxvkMemoryChunk::isWholeChunkFree() const { - if (m_freeList.size() != 1) - return false; + + bool DxvkMemoryChunk::isEmpty() const { + return m_freeList.size() == 1 + && m_freeList[0].length == m_memory.memSize; + } + + + bool DxvkMemoryChunk::isCompatible(const Rc& other) const { + return other->m_memory.memFlags == m_memory.memFlags && other->m_hints == m_hints; + } + - if (m_freeList[0].offset != 0) - return false; + bool DxvkMemoryChunk::checkHints(DxvkMemoryFlags hints) const { + DxvkMemoryFlags mask( + DxvkMemoryFlag::Small, + DxvkMemoryFlag::GpuReadable, + DxvkMemoryFlag::GpuWritable); - if (m_freeList[0].length != m_memory.memSize) - return false; + if (hints.test(DxvkMemoryFlag::IgnoreConstraints)) + mask = DxvkMemoryFlags(); - return true; + return (m_hints & mask) == (hints & mask); } - // NV-DXVK end + DxvkMemoryAllocator::DxvkMemoryAllocator(const DxvkDevice* device) : m_vkd (device->vkd()), @@ -358,7 +369,17 @@ DxvkMemory::DxvkMemory() { } m_memTypes[i].heapId = m_memProps.memoryTypes[i].heapIndex; m_memTypes[i].memType = m_memProps.memoryTypes[i]; m_memTypes[i].memTypeId = i; - m_memTypes[i].chunkSize = pickChunkSize(i); + } + + /* Check what kind of heap the HVV memory type is on, if any. If the + * HVV memory type is on the largest device-local heap, we either have + * an UMA system or an RBAR-enabled system. Otherwise, there will likely + * be a separate, smaller heap for it. */ + VkDeviceSize largestDeviceLocalHeap = 0; + + for (uint32_t i = 0; i < m_memProps.memoryTypeCount; i++) { + if (m_memTypes[i].memType.propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) + largestDeviceLocalHeap = std::max(largestDeviceLocalHeap, m_memTypes[i].heap->properties.size); } /* Work around an issue on Nvidia drivers where using the entire @@ -371,12 +392,11 @@ DxvkMemory::DxvkMemory() { } if (shrinkNvidiaHvvHeap) { for (uint32_t i = 0; i < m_memProps.memoryTypeCount; i++) { - VkMemoryPropertyFlags flags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; + VkMemoryPropertyFlags hvvFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; - if ((m_memTypes[i].memType.propertyFlags & flags) == flags) { + if ((m_memTypes[i].memType.propertyFlags & hvvFlags) == hvvFlags + && (m_memTypes[i].heap->properties.size < largestDeviceLocalHeap)) m_memTypes[i].heap->budget = 32 << 20; - m_memTypes[i].chunkSize = 1 << 20; - } } } } @@ -392,9 +412,8 @@ DxvkMemory::DxvkMemory() { } const VkMemoryRequirements* req, const VkMemoryDedicatedRequirements& dedAllocReq, const VkMemoryDedicatedAllocateInfo& dedAllocInfo, - VkMemoryPropertyFlags propertyFlags, - VkMemoryAllocateFlags allocateFlags, - float priority, + VkMemoryPropertyFlags flags, + DxvkMemoryFlags hints, DxvkMemoryStats::Category category) { ScopedCpuProfileZone(); @@ -402,26 +421,45 @@ DxvkMemory::DxvkMemory() { } // Note: The mutex here in DXVK has been removed in favor of the per-memory type mutex in tryAllocFromType. // NV-DXVK end + // Keep small allocations together to avoid fragmenting + // chunks for larger resources with lots of small gaps, + // as well as resources with potentially weird lifetimes + if (req->size <= SmallAllocationThreshold) { + hints.set(DxvkMemoryFlag::Small); + hints.clr(DxvkMemoryFlag::GpuWritable, DxvkMemoryFlag::GpuReadable); + } + + // Ignore all hints for host-visible allocations since they + // usually don't make much sense for those resources + if (flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) + hints = DxvkMemoryFlags(); + // Try to allocate from a memory type which supports the given flags exactly auto dedAllocPtr = dedAllocReq.prefersDedicatedAllocation ? &dedAllocInfo : nullptr; - DxvkMemory result = this->tryAlloc(req, dedAllocPtr, propertyFlags, allocateFlags, priority, category); + DxvkMemory result = this->tryAlloc(req, dedAllocPtr, flags, hints, category); // If the first attempt failed, try ignoring the dedicated allocation if (!result && dedAllocPtr && !dedAllocReq.requiresDedicatedAllocation) { - result = this->tryAlloc(req, nullptr, propertyFlags, allocateFlags, priority, category); + result = this->tryAlloc(req, nullptr, flags, hints, category); dedAllocPtr = nullptr; } + // Retry without the hint constraints + if (!result) { + hints.set(DxvkMemoryFlag::IgnoreConstraints); + result = this->tryAlloc(req, nullptr, flags, hints, category); + } + // If that still didn't work, probe slower memory types as well VkMemoryPropertyFlags optFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT; VkMemoryPropertyFlags remFlags = 0; - while (!result && (propertyFlags & optFlags)) { + while (!result && (flags & optFlags)) { remFlags |= optFlags & (0 - optFlags); // Note: 0 - x is a more well defined version of -x for unsigned values optFlags &= ~remFlags; - result = this->tryAlloc(req, dedAllocPtr, propertyFlags & ~remFlags, allocateFlags, priority, category); + result = this->tryAlloc(req, dedAllocPtr, flags & ~remFlags, hints, category); } if (!result) { @@ -431,8 +469,7 @@ DxvkMemory::DxvkMemory() { } "DxvkMemoryAllocator: Memory allocation failed", "\n Size: ", req->size, "\n Alignment: ", req->alignment, - "\n Mem property flags: ", "0x", std::hex, propertyFlags, - "\n Mem allocate flags: ", "0x", std::hex, allocateFlags, + "\n Mem property flags: ", "0x", std::hex, flags, "\n Mem types: ", "0x", std::hex, req->memoryTypeBits)); for (uint32_t i = 0; i < m_memProps.memoryHeapCount; i++) { @@ -454,36 +491,29 @@ DxvkMemory::DxvkMemory() { } return result; } - // NV-DXVK start: Free unused memory + //// NV-DXVK start: Free unused memory void DxvkMemoryAllocator::freeUnusedChunks() { - for (auto& type : m_memTypes) { - std::lock_guard lock(type.mutex); - - const auto new_end_iterator = std::remove_if(type.chunks.begin(), type.chunks.end(), [](const auto& chunk) { - return chunk->isWholeChunkFree(); - }); - - type.chunks.erase(new_end_iterator, type.chunks.end()); + for (auto& heap : m_memHeaps) { + freeEmptyChunks(&heap); } } - // NV-DXVK end + //// NV-DXVK end DxvkMemory DxvkMemoryAllocator::tryAlloc( const VkMemoryRequirements* req, const VkMemoryDedicatedAllocateInfo* dedAllocInfo, - VkMemoryPropertyFlags propertyFlags, - VkMemoryAllocateFlags allocateFlags, - float priority, + VkMemoryPropertyFlags flags, + DxvkMemoryFlags hints, DxvkMemoryStats::Category category) { DxvkMemory result; for (uint32_t i = 0; i < m_memProps.memoryTypeCount && !result; i++) { const bool supported = (req->memoryTypeBits & (1u << i)) != 0; - const bool adequate = (m_memTypes[i].memType.propertyFlags & propertyFlags) == propertyFlags; + const bool adequate = (m_memTypes[i].memType.propertyFlags & flags) == flags; if (supported && adequate) { result = this->tryAllocFromType(&m_memTypes[i], - propertyFlags, allocateFlags, req->size, req->alignment, priority, dedAllocInfo, category); + flags, req->size, req->alignment, hints, dedAllocInfo, category); } } @@ -493,11 +523,10 @@ DxvkMemory::DxvkMemory() { } DxvkMemory DxvkMemoryAllocator::tryAllocFromType( DxvkMemoryType* type, - VkMemoryPropertyFlags propertyFlags, - VkMemoryAllocateFlags allocateFlags, + VkMemoryPropertyFlags flags, VkDeviceSize size, VkDeviceSize align, - float priority, + DxvkMemoryFlags hints, const VkMemoryDedicatedAllocateInfo* dedAllocInfo, DxvkMemoryStats::Category category ) { @@ -506,32 +535,47 @@ DxvkMemory::DxvkMemory() { } // NV-DXVK end // Prevent unnecessary external host memory fragmentation - bool isDeviceLocal = (propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) != 0; - - if (!isDeviceLocal) - priority = 0.0f; + bool isDeviceLocal = (flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) != 0; + + VkDeviceSize chunkSize = pickChunkSize(type->memTypeId, hints); DxvkMemory memory; - if (size >= type->chunkSize || dedAllocInfo) { + if (size >= chunkSize || dedAllocInfo) { + if (this->shouldFreeEmptyChunks(type->heap, size)) { + // NV-DXVK start: use a per-memory-type mutex + type->mutex.unlock(); + this->freeEmptyChunks(type->heap); + type->mutex.lock(); + // NV-DXVK end + } + DxvkDeviceMemory devMem = this->tryAllocDeviceMemory( - type, propertyFlags, allocateFlags, size, priority, dedAllocInfo, category); + type, flags, size, hints, dedAllocInfo, category); if (devMem.memHandle != VK_NULL_HANDLE) memory = DxvkMemory(this, nullptr, type, devMem.memHandle, 0, size, devMem.memPointer, category); } else { for (uint32_t i = 0; i < type->chunks.size() && !memory; i++) - memory = type->chunks[i]->alloc(propertyFlags, allocateFlags, size, align, priority, category); + memory = type->chunks[i]->alloc(flags, size, align, hints, category); if (!memory) { DxvkDeviceMemory devMem; - - for (uint32_t i = 0; i < 6 && (type->chunkSize >> i) >= size && !devMem.memHandle; i++) - devMem = tryAllocDeviceMemory(type, propertyFlags, allocateFlags, type->chunkSize >> i, priority, nullptr, category); + + if (this->shouldFreeEmptyChunks(type->heap, chunkSize)) { + // NV-DXVK start: use a per-memory-type mutex + type->mutex.unlock(); + this->freeEmptyChunks(type->heap); + type->mutex.lock(); + // NV-DXVK end + } + + for (uint32_t i = 0; i < 6 && (chunkSize >> i) >= size && !devMem.memHandle; i++) + devMem = tryAllocDeviceMemory(type, flags, chunkSize >> i, hints, nullptr, category); if (devMem.memHandle) { - Rc chunk = new DxvkMemoryChunk(this, type, devMem); - memory = chunk->alloc(propertyFlags, allocateFlags, size, align, priority, category); + Rc chunk = new DxvkMemoryChunk(this, type, devMem, hints); + memory = chunk->alloc(flags, size, align, hints, category); type->chunks.push_back(std::move(chunk)); } @@ -547,29 +591,38 @@ DxvkMemory::DxvkMemory() { } DxvkDeviceMemory DxvkMemoryAllocator::tryAllocDeviceMemory( DxvkMemoryType* type, - VkMemoryPropertyFlags propertyFlags, - VkMemoryAllocateFlags allocateFlags, + VkMemoryPropertyFlags flags, VkDeviceSize size, - float priority, + DxvkMemoryFlags hints, const VkMemoryDedicatedAllocateInfo* dedAllocInfo, DxvkMemoryStats::Category category) { ScopedCpuProfileZone(); - bool useMemoryPriority = (propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) + bool useMemoryPriority = (flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) && (m_device->features().extMemoryPriority.memoryPriority); if (type->heap->budget && type->heap->stats.totalAllocated() + size > type->heap->budget) return DxvkDeviceMemory(); + float priority = 0.0f; + + if (hints.test(DxvkMemoryFlag::GpuReadable)) + priority = 0.5f; + if (hints.test(DxvkMemoryFlag::GpuWritable)) + priority = 1.0f; + DxvkDeviceMemory result; result.memSize = size; - result.memPropertyFlags = propertyFlags; - result.memAllocateFlags = allocateFlags; + result.memFlags = flags; result.priority = priority; VkMemoryAllocateFlagsInfo allocateFlagsInfo; allocateFlagsInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO; allocateFlagsInfo.pNext = dedAllocInfo; - allocateFlagsInfo.flags = allocateFlags; + // NV-DXVK begin: use device address bit for allocations + // dxvk-remix requires buffer device addresses on some allocations; setting this bit + // is essentially free, so we set it unconditionally to avoid having to plumb it through + allocateFlagsInfo.flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT; + // NV-DXVK end allocateFlagsInfo.deviceMask = 0; VkMemoryPriorityAllocateInfoEXT prio; @@ -586,7 +639,7 @@ DxvkMemory::DxvkMemory() { } if (m_vkd->vkAllocateMemory(m_vkd->device(), &info, nullptr, &result.memHandle) != VK_SUCCESS) return DxvkDeviceMemory(); - if (propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) { + if (flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) { VkResult status = m_vkd->vkMapMemory(m_vkd->device(), result.memHandle, 0, VK_WHOLE_SIZE, 0, &result.memPointer); if (status != VK_SUCCESS) { @@ -631,6 +684,18 @@ DxvkMemory::DxvkMemory() { } VkDeviceSize offset, VkDeviceSize length) { chunk->free(offset, length); + + if (chunk->isEmpty()) { + Rc chunkRef = chunk; + + // Free the chunk if we have to, or at least put it at the end of + // the list so that chunks that are already in use and cannot be + // freed are prioritized for allocations to reduce memory pressure. + type->chunks.erase(std::remove(type->chunks.begin(), type->chunks.end(), chunkRef)); + + if (!this->shouldFreeChunk(type, chunkRef)) + type->chunks.push_back(std::move(chunkRef)); + } } @@ -643,7 +708,7 @@ DxvkMemory::DxvkMemory() { } } - VkDeviceSize DxvkMemoryAllocator::pickChunkSize(uint32_t memTypeId) const { + VkDeviceSize DxvkMemoryAllocator::pickChunkSize(uint32_t memTypeId, DxvkMemoryFlags hints) const { VkMemoryType type = m_memProps.memoryTypes[memTypeId]; VkMemoryHeap heap = m_memProps.memoryHeaps[type.heapIndex]; @@ -653,11 +718,14 @@ DxvkMemory::DxvkMemory() { } VkDeviceSize chunkSize = (isDeviceLocal ? options.deviceLocalMemoryChunkSizeMB : options.otherMemoryChunkSizeMB) << 20; // NV-DXVK end - // Try to waste a bit less system memory in 32-bit - // applications due to address space constraints + if (hints.test(DxvkMemoryFlag::Small)) + chunkSize = 16 << 20; + + // Try to waste a bit less system memory especially in + // 32-bit applications due to address space constraints if (env::is32BitHostPlatform()) { if (type.propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) - chunkSize = 32 << 20; + chunkSize = 16 << 20; } // Reduce the chunk size on small heaps so @@ -667,5 +735,55 @@ DxvkMemory::DxvkMemory() { } return chunkSize; } - + + + bool DxvkMemoryAllocator::shouldFreeChunk( + const DxvkMemoryType* type, + const Rc& chunk) const { + // Under memory pressure, we should start freeing everything. + if (this->shouldFreeEmptyChunks(type->heap, 0)) + return true; + + // Even if we have enough memory to spare, only keep + // one chunk of each type around to save memory. + for (const auto& c : type->chunks) { + if (c != chunk && c->isEmpty() && c->isCompatible(chunk)) + return true; + } + + return false; + } + + + bool DxvkMemoryAllocator::shouldFreeEmptyChunks( + const DxvkMemoryHeap* heap, + VkDeviceSize allocationSize) const { + VkDeviceSize budget = heap->budget; + + if (!budget) + budget = (heap->properties.size * 4) / 5; + + return heap->stats.totalAllocated() + allocationSize > budget; + } + + + void DxvkMemoryAllocator::freeEmptyChunks( + const DxvkMemoryHeap* heap) { + for (uint32_t i = 0; i < m_memProps.memoryTypeCount; i++) { + DxvkMemoryType* type = &m_memTypes[i]; + + if (type->heap != heap) + continue; + + // NV-DXVK start: use a per-memory-type mutex + std::lock_guard lock(type->mutex); + // NV-DXVK end + + type->chunks.erase( + std::remove_if(type->chunks.begin(), type->chunks.end(), + [] (const Rc& chunk) { return chunk->isEmpty(); }), + type->chunks.end()); + } + } + } diff --git a/src/dxvk/dxvk_memory.h b/src/dxvk/dxvk_memory.h index 35f555f3d..a74b5b39d 100644 --- a/src/dxvk/dxvk_memory.h +++ b/src/dxvk/dxvk_memory.h @@ -125,8 +125,7 @@ namespace dxvk { VkDeviceMemory memHandle = VK_NULL_HANDLE; void* memPointer = nullptr; VkDeviceSize memSize = 0; - VkMemoryPropertyFlags memPropertyFlags = 0; - VkMemoryAllocateFlags memAllocateFlags = 0; + VkMemoryPropertyFlags memFlags = 0; float priority = 0.0f; }; @@ -158,8 +157,6 @@ namespace dxvk { VkMemoryType memType; uint32_t memTypeId; - VkDeviceSize chunkSize; - std::vector> chunks; // NV-DXVK start: use a per-memory-type mutex rather than an allocator-wide mutex @@ -259,6 +256,22 @@ namespace dxvk { void free(); }; + + + /** + * \brief Memory allocation flags + * + * Used to batch similar allocations into the same + * set of chunks, which may help with fragmentation. + */ + enum class DxvkMemoryFlag : uint32_t { + Small = 0, ///< Small allocation + GpuReadable = 1, ///< Medium-priority resource + GpuWritable = 2, ///< High-priority resource + IgnoreConstraints = 3, ///< Ignore most allocation flags + }; + + using DxvkMemoryFlags = Flags; /** @@ -274,7 +287,8 @@ namespace dxvk { DxvkMemoryChunk( DxvkMemoryAllocator* alloc, DxvkMemoryType* type, - DxvkDeviceMemory memory); + DxvkDeviceMemory memory, + DxvkMemoryFlags m_hints); ~DxvkMemoryChunk(); @@ -283,19 +297,18 @@ namespace dxvk { * * On failure, this returns a slice with * \c VK_NULL_HANDLE as the memory handle. - * \param [in] flags Requested memory flags + * \param [in] flags Requested memory type flags * \param [in] size Number of bytes to allocate * \param [in] align Required alignment - * \param [in] priority Requested priority + * \param [in] hints Memory category * \returns The allocated memory slice */ DxvkMemory alloc( - VkMemoryPropertyFlags propertyFlags, - VkMemoryAllocateFlags allocateFlags, - VkDeviceSize size, - VkDeviceSize align, - float priority, - DxvkMemoryStats::Category category); + VkMemoryPropertyFlags flags, + VkDeviceSize size, + VkDeviceSize align, + DxvkMemoryFlags hints, + DxvkMemoryStats::Category category); /** * \brief Frees memory @@ -309,16 +322,18 @@ namespace dxvk { void free( VkDeviceSize offset, VkDeviceSize length); - - // NV-DXVK start: Free unused memory + /** - * \brief Queries if an entire chunk is considered free. - * - * Returns true if no allocations exist - * on this chunk. + * \brief Checks whether the chunk is being used + * \returns \c true if there are no allocations left */ - bool isWholeChunkFree() const; - // NV-DXVK end + bool isEmpty() const; + + /** + * \brief Checks whether hints and flags of another chunk match + * \param [in] other The chunk to compare to + */ + bool isCompatible(const Rc& other) const; private: @@ -330,8 +345,11 @@ namespace dxvk { DxvkMemoryAllocator* m_alloc; DxvkMemoryType* m_type; DxvkDeviceMemory m_memory; + DxvkMemoryFlags m_hints; std::vector m_freeList; + + bool checkHints(DxvkMemoryFlags hints) const; }; @@ -345,6 +363,8 @@ namespace dxvk { class DxvkMemoryAllocator { friend class DxvkMemory; friend class DxvkMemoryChunk; + + constexpr static VkDeviceSize SmallAllocationThreshold = 256 << 10; public: DxvkMemoryAllocator(const DxvkDevice* device); @@ -369,16 +389,15 @@ namespace dxvk { * \param [in] dedAllocReq Dedicated allocation requirements * \param [in] dedAllocInfo Dedicated allocation info * \param [in] flags Memory type flags - * \param [in] priority Device-local memory priority + * \param [in] hints Memory hints * \returns Allocated memory slice */ DxvkMemory alloc( const VkMemoryRequirements* req, const VkMemoryDedicatedRequirements& dedAllocReq, const VkMemoryDedicatedAllocateInfo& dedAllocInfo, - VkMemoryPropertyFlags propertyFlags, - VkMemoryAllocateFlags allocateFlags, - float priority, + VkMemoryPropertyFlags flags, + DxvkMemoryFlags hints, DxvkMemoryStats::Category category); /** @@ -434,31 +453,28 @@ namespace dxvk { std::array m_memTypes; DxvkMemory tryAlloc( - const VkMemoryRequirements* req, - const VkMemoryDedicatedAllocateInfo* dedAllocInfo, - VkMemoryPropertyFlags propertyFlags, - VkMemoryAllocateFlags allocateFlags, - float priority, - DxvkMemoryStats::Category category); + const VkMemoryRequirements* req, + const VkMemoryDedicatedAllocateInfo* dedAllocInfo, + VkMemoryPropertyFlags flags, + DxvkMemoryFlags hints, + DxvkMemoryStats::Category category); DxvkMemory tryAllocFromType( - DxvkMemoryType* type, - VkMemoryPropertyFlags propertyFlags, - VkMemoryAllocateFlags allocateFlags, - VkDeviceSize size, - VkDeviceSize align, - float priority, - const VkMemoryDedicatedAllocateInfo* dedAllocInfo, - DxvkMemoryStats::Category category); + DxvkMemoryType* type, + VkMemoryPropertyFlags flags, + VkDeviceSize size, + VkDeviceSize align, + DxvkMemoryFlags hints, + const VkMemoryDedicatedAllocateInfo* dedAllocInfo, + DxvkMemoryStats::Category category); DxvkDeviceMemory tryAllocDeviceMemory( - DxvkMemoryType* type, - VkMemoryPropertyFlags propertyFlags, - VkMemoryAllocateFlags allocateFlags, - VkDeviceSize size, - float priority, - const VkMemoryDedicatedAllocateInfo* dedAllocInfo, - DxvkMemoryStats::Category category); + DxvkMemoryType* type, + VkMemoryPropertyFlags flags, + VkDeviceSize size, + DxvkMemoryFlags hints, + const VkMemoryDedicatedAllocateInfo* dedAllocInfo, + DxvkMemoryStats::Category category); void free( const DxvkMemory& memory); @@ -474,7 +490,19 @@ namespace dxvk { DxvkDeviceMemory memory); VkDeviceSize pickChunkSize( - uint32_t memTypeId) const; + uint32_t memTypeId, + DxvkMemoryFlags hints) const; + + bool shouldFreeChunk( + const DxvkMemoryType* type, + const Rc& chunk) const; + + bool shouldFreeEmptyChunks( + const DxvkMemoryHeap* heap, + VkDeviceSize allocationSize) const; + + void freeEmptyChunks( + const DxvkMemoryHeap* heap); }; diff --git a/src/dxvk/dxvk_options.cpp b/src/dxvk/dxvk_options.cpp index f05179e15..565a296dc 100644 --- a/src/dxvk/dxvk_options.cpp +++ b/src/dxvk/dxvk_options.cpp @@ -26,8 +26,6 @@ namespace dxvk { DxvkOptions::DxvkOptions(const Config& config) { enableStateCache = config.getOption ("dxvk.enableStateCache", true); - enableOpenVR = config.getOption ("dxvk.enableOpenVR", true); - enableOpenXR = config.getOption ("dxvk.enableOpenXR", true); numCompilerThreads = config.getOption ("dxvk.numCompilerThreads", 0); useRawSsbo = config.getOption("dxvk.useRawSsbo", Tristate::Auto); shrinkNvidiaHvvHeap = config.getOption("dxvk.shrinkNvidiaHvvHeap", Tristate::Auto); diff --git a/src/dxvk/dxvk_options.h b/src/dxvk/dxvk_options.h index b45eceea3..478b32aac 100644 --- a/src/dxvk/dxvk_options.h +++ b/src/dxvk/dxvk_options.h @@ -32,12 +32,6 @@ namespace dxvk { /// Enable state cache bool enableStateCache; - /// Enables OpenVR loading - bool enableOpenVR; - - /// Enables OpenXR loading - bool enableOpenXR; - /// Number of compiler threads /// when using the state cache int32_t numCompilerThreads; diff --git a/src/dxvk/dxvk_stats.h b/src/dxvk/dxvk_stats.h index bf556bbd1..6c900ba97 100644 --- a/src/dxvk/dxvk_stats.h +++ b/src/dxvk/dxvk_stats.h @@ -11,16 +11,21 @@ namespace dxvk { * thogether with \ref DxvkStatCounters. */ enum class DxvkStatCounter : uint32_t { - CmdDrawCalls, ///< Number of draw calls - CmdDispatchCalls, ///< Number of compute calls + CmdDrawCalls, ///< Number of draw calls + CmdDispatchCalls, ///< Number of compute calls + CmdRenderPassCount, ///< Number of render passes + PipeCountGraphics, ///< Number of graphics pipelines + PipeCountCompute, ///< Number of compute pipelines + PipeCompilerBusy, ///< Boolean indicating compiler activity + QueueSubmitCount, ///< Number of command buffer submissions + QueuePresentCount, ///< Number of present calls / frames + GpuIdleTicks, ///< GPU idle time in microseconds + CsSyncCount, ///< CS thread synchronizations + CsSyncTicks, ///< Time spent waiting on CS + CsChunkCount, ///< Submitted CS chunks + + // NV-DXVK begin: RTX Remix counters CmdTraceRaysCalls, ///< Number of traceRays calls - CmdRenderPassCount, ///< Number of render passes - PipeCountGraphics, ///< Number of graphics pipelines - PipeCountCompute, ///< Number of compute pipelines - PipeCompilerBusy, ///< Boolean indicating compiler activity - QueueSubmitCount, ///< Number of command buffer submissions - QueuePresentCount, ///< Number of present calls / frames - GpuIdleTicks, ///< GPU idle time in microseconds RtxBlasCount, ///< Number of unique BLAS's in the scene/geometry cache RtxBufferCount, ///< Number of unique buffers being tracked for RT rendering RtxTextureCount, ///< Number of unique textures being tracked for RT rendering @@ -32,7 +37,9 @@ namespace dxvk { RtxSamplers, ///< Number of samplers currently present in the scene RtxTexturesInFlight, ///< Number of texture currently being loaded RtxLastTextureBatchDuration, ///< Duration in ms of the last processed texture batch - NumCounters, ///< Number of counters available + // NV-DXVK end + + NumCounters, ///< Number of counters available }; diff --git a/src/dxvk/hud/dxvk_hud.cpp b/src/dxvk/hud/dxvk_hud.cpp index 5b4490f05..c64632f91 100644 --- a/src/dxvk/hud/dxvk_hud.cpp +++ b/src/dxvk/hud/dxvk_hud.cpp @@ -45,6 +45,7 @@ namespace dxvk::hud { addItem("pipelines", -1, device); addItem("memory", -1, device); addItem("raytracingMode", -1); + addItem("cs", -1, device); addItem("gpuload", -1, device); addItem("compiler", -1, device); addItem("rtx", -1, device); diff --git a/src/dxvk/hud/dxvk_hud_item.cpp b/src/dxvk/hud/dxvk_hud_item.cpp index 1873a9728..8fb09117b 100644 --- a/src/dxvk/hud/dxvk_hud_item.cpp +++ b/src/dxvk/hud/dxvk_hud_item.cpp @@ -570,8 +570,8 @@ namespace dxvk::hud { bool isDeviceLocal = m_memory.memoryHeaps[i].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT; VkDeviceSize memSizeMib = m_memory.memoryHeaps[i].size >> 20; - VkDeviceSize memAllocatedMib = m_heaps[i].totalAllocated() >> 20; VkDeviceSize memUsedMib = m_heaps[i].totalUsed() >> 20; + VkDeviceSize memAllocatedMib = m_heaps[i].totalAllocated() >> 20; uint64_t percentage = (100 * memUsedMib) / memSizeMib; std::string label = str::format(isDeviceLocal ? "Vidmem" : "Sysmem", " heap ", i, ":"); @@ -614,6 +614,85 @@ namespace dxvk::hud { } + HudCsThreadItem::HudCsThreadItem(const Rc& device) + : m_device(device) { + + } + + + HudCsThreadItem::~HudCsThreadItem() { + + } + + + void HudCsThreadItem::update(dxvk::high_resolution_clock::time_point time) { + uint64_t ticks = std::chrono::duration_cast(time - m_lastUpdate).count(); + + // Capture the maximum here since it's more useful to + // identify stutters than using any sort of average + DxvkStatCounters counters = m_device->getStatCounters(); + uint64_t currCsSyncCount = counters.getCtr(DxvkStatCounter::CsSyncCount); + uint64_t currCsSyncTicks = counters.getCtr(DxvkStatCounter::CsSyncTicks); + + m_maxCsSyncCount = std::max(m_maxCsSyncCount, currCsSyncCount - m_prevCsSyncCount); + m_maxCsSyncTicks = std::max(m_maxCsSyncTicks, currCsSyncTicks - m_prevCsSyncTicks); + + m_prevCsSyncCount = currCsSyncCount; + m_prevCsSyncTicks = currCsSyncTicks; + + m_updateCount++; + + if (ticks >= UpdateInterval) { + uint64_t currCsChunks = counters.getCtr(DxvkStatCounter::CsChunkCount); + uint64_t diffCsChunks = (currCsChunks - m_prevCsChunks) / m_updateCount; + m_prevCsChunks = currCsChunks; + + uint64_t syncTicks = m_maxCsSyncTicks / 100; + + m_csChunkString = str::format(diffCsChunks); + m_csSyncString = m_maxCsSyncCount + ? str::format(m_maxCsSyncCount, " (", (syncTicks / 10), ".", (syncTicks % 10), " ms)") + : str::format(m_maxCsSyncCount); + + m_maxCsSyncCount = 0; + m_maxCsSyncTicks = 0; + + m_updateCount = 0; + m_lastUpdate = time; + } + } + + + HudPos HudCsThreadItem::render( + HudRenderer& renderer, + HudPos position) { + position.y += 16.0f; + renderer.drawText(16.0f, + { position.x, position.y }, + { 0.25f, 1.0f, 0.25f, 1.0f }, + "CS chunks:"); + + renderer.drawText(16.0f, + { position.x + 132.0f, position.y }, + { 1.0f, 1.0f, 1.0f, 1.0f }, + m_csChunkString); + + position.y += 20.0f; + renderer.drawText(16.0f, + { position.x, position.y }, + { 0.25f, 1.0f, 0.25f, 1.0f }, + "CS syncs:"); + + renderer.drawText(16.0f, + { position.x + 132.0f, position.y }, + { 1.0f, 1.0f, 1.0f, 1.0f }, + m_csSyncString); + + position.y += 8.0f; + return position; + } + + HudGpuLoadItem::HudGpuLoadItem(const Rc& device) : m_device(device) { diff --git a/src/dxvk/hud/dxvk_hud_item.h b/src/dxvk/hud/dxvk_hud_item.h index bafd3b7f9..963841e96 100644 --- a/src/dxvk/hud/dxvk_hud_item.h +++ b/src/dxvk/hud/dxvk_hud_item.h @@ -383,6 +383,45 @@ namespace dxvk::hud { }; + /** + * \brief HUD item to display CS thread statistics + */ + class HudCsThreadItem : public HudItem { + constexpr static int64_t UpdateInterval = 500'000; + public: + + HudCsThreadItem(const Rc& device); + + ~HudCsThreadItem(); + + void update(dxvk::high_resolution_clock::time_point time); + + HudPos render( + HudRenderer& renderer, + HudPos position); + + private: + + Rc m_device; + + uint64_t m_prevCsSyncCount = 0; + uint64_t m_prevCsSyncTicks = 0; + uint64_t m_prevCsChunks = 0; + + uint64_t m_maxCsSyncCount = 0; + uint64_t m_maxCsSyncTicks = 0; + + uint64_t m_updateCount = 0; + + std::string m_csSyncString; + std::string m_csChunkString; + + dxvk::high_resolution_clock::time_point m_lastUpdate + = dxvk::high_resolution_clock::now(); + + }; + + /** * \brief HUD item to display GPU load */ diff --git a/src/dxvk/imgui/dxvk_imgui.cpp b/src/dxvk/imgui/dxvk_imgui.cpp index cf497aad9..3abedbfa7 100644 --- a/src/dxvk/imgui/dxvk_imgui.cpp +++ b/src/dxvk/imgui/dxvk_imgui.cpp @@ -2600,7 +2600,7 @@ namespace dxvk { ImGui::Checkbox("Enable Volumetric Lighting", &RtxOptions::Get()->enableVolumetricLightingObject()); if (RtxOptions::Get()->enableVolumetricLighting()) { - ImGui::DragFloat3("Transmittance Color", &RtxOptions::Get()->volumetricTransmittanceColorObject(), 0.01f, 0.0f, 1.0f, "%.3f"); + ImGui::DragFloat3("Transmittance Color", &RtxOptions::Get()->volumetricTransmittanceColorObject(), 0.01f, 0.0f, VolumeManager::MaxTransmittanceValue, "%.3f"); ImGui::DragFloat("Transmittance Measurement Distance", &RtxOptions::Get()->volumetricTransmittanceMeasurementDistanceObject(), 0.25f, 0.0f, FLT_MAX, "%.2f", sliderFlags); ImGui::DragFloat3("Single Scattering Albedo", &RtxOptions::Get()->volumetricSingleScatteringAlbedoObject(), 0.01f, 0.0f, 1.0f, "%.3f"); ImGui::DragFloat("Anisotropy", &RtxOptions::Get()->volumetricAnisotropyObject(), 0.01f, -1.0f, 1.0f, "%.3f", sliderFlags); @@ -2647,6 +2647,14 @@ namespace dxvk { ImGui::Checkbox("Enable Thin Opaque", &RtxOptions::SubsurfaceScattering::enableThinOpaqueObject()); + if (RtxOptions::SubsurfaceScattering::enableThinOpaque()) { + ImGui::Indent(); + + ImGui::Checkbox("Enable Texture Maps", &RtxOptions::SubsurfaceScattering::enableTextureMapsObject()); + + ImGui::Unindent(); + } + ImGui::Unindent(); } diff --git a/src/dxvk/imgui/dxvk_imgui_capture.cpp b/src/dxvk/imgui/dxvk_imgui_capture.cpp index 4d01c7397..6c415fb52 100644 --- a/src/dxvk/imgui/dxvk_imgui_capture.cpp +++ b/src/dxvk/imgui/dxvk_imgui_capture.cpp @@ -54,6 +54,7 @@ namespace dxvk { } void ImGuiCapture::show(const Rc& ctx) { + auto capturer = ctx->getCommonObjects()->capturer(); const bool disableCapture = ctx->getCommonObjects()->getSceneManager().areReplacementsLoaded() && RtxOptions::Get()->getEnableAnyReplacements(); @@ -68,6 +69,7 @@ namespace dxvk { showContinuousCapture(ctx); } ImGui::Separator(); + ImGui::Checkbox("Correct baked world transforms", &capturer->correctBakedTransformsRef()); ImGui::Checkbox("Show menu on capture hotkey", &RtxOptions::Get()->m_captureShowMenuOnHotkey); if(RtxOptions::Get()->m_captureShowMenuOnHotkey) { ImGui::PushTextWrapPos(ImGui::GetCurrentWindow()->Size.x); diff --git a/src/dxvk/meson.build b/src/dxvk/meson.build index b6bb312c9..c10f47576 100644 --- a/src/dxvk/meson.build +++ b/src/dxvk/meson.build @@ -215,6 +215,7 @@ dxvk_src = files([ 'rtx_render/rtx_game_capturer.cpp', 'rtx_render/rtx_game_capturer.h', 'rtx_render/rtx_game_capturer_paths.h', + 'rtx_render/rtx_game_capturer_utils.h', 'rtx_render/rtx_geometry_utils.cpp', 'rtx_render/rtx_geometry_utils.h', 'rtx_render/rtx_hashing.cpp', @@ -280,6 +281,7 @@ dxvk_src = files([ 'rtx_render/rtx_ray_portal_manager.h', 'rtx_render/rtx_reflex.cpp', 'rtx_render/rtx_reflex.h', + 'rtx_render/rtx_remix_api.cpp', 'rtx_render/rtx_resources.cpp', 'rtx_render/rtx_resources.h', 'rtx_render/rtx_restir_gi_rayquery.cpp', @@ -424,7 +426,7 @@ endif dxvk_lib = static_library('dxvk', dxvk_src, dxvk_version, [generated_dxvk_shaders, rtx_shaders, dxvk_generated_embedded_files], link_with : [ util_lib, spirv_lib ], dependencies : [ dxvk_deps ], - include_directories : [ dxvk_include_path, dxvk_shader_include_path, rtxdi_include_path ], + include_directories : [ dxvk_include_path, dxvk_shader_include_path, rtxdi_include_path, remix_api_include_path ], override_options : ['cpp_std='+dxvk_cpp_std]) dxvk_dep = declare_dependency( diff --git a/src/dxvk/rtx_render/rtx_accel_manager.cpp b/src/dxvk/rtx_render/rtx_accel_manager.cpp index f4c79ae67..421cbb495 100644 --- a/src/dxvk/rtx_render/rtx_accel_manager.cpp +++ b/src/dxvk/rtx_render/rtx_accel_manager.cpp @@ -222,6 +222,10 @@ namespace dxvk { } } + int AccelManager::getCurrentFramePrimitiveIDPrefixSumBufferID() const { + return m_device->getCurrentFrameId() & 0x1; + } + void AccelManager::createAndBuildIntersectionBlas(Rc ctx, DxvkBarrierSet& execBarriers) { if (m_intersectionBlas.ptr()) return; @@ -568,7 +572,7 @@ namespace dxvk { // Copy the instance transform data to the device if(instanceTransforms.size() > 0) - ctx->updateBuffer(m_transformBuffer, 0, instanceTransforms.size() * sizeof(VkTransformMatrixKHR), instanceTransforms.data()); + ctx->writeToBuffer(m_transformBuffer, 0, instanceTransforms.size() * sizeof(VkTransformMatrixKHR), instanceTransforms.data()); ctx->getCommandList()->trackResource(m_transformBuffer); ctx->getCommandList()->trackResource(m_transformBuffer); @@ -595,6 +599,7 @@ namespace dxvk { // Build prefix sum array // Collect primitive count for each surface object + m_reorderedSurfacesPrimitiveIDPrefixSumLastFrame = m_reorderedSurfacesPrimitiveIDPrefixSum; m_reorderedSurfacesPrimitiveIDPrefixSum.resize(m_reorderedSurfaces.size()); for (uint32_t i = 0; i < m_reorderedSurfaces.size(); i++) { auto surface = m_reorderedSurfaces[i]; @@ -810,7 +815,7 @@ namespace dxvk { for (const auto& instances : m_mergedInstances) { if (!instances.empty()) { const size_t size = instances.size() * sizeof(VkAccelerationStructureInstanceKHR); - ctx->updateBuffer(m_vkInstanceBuffer, offset, size, instances.data()); + ctx->writeToBuffer(m_vkInstanceBuffer, offset, size, instances.data()); offset += size; } } @@ -823,7 +828,7 @@ namespace dxvk { } // Write billboard data - ctx->updateBuffer(m_billboardsBuffer, 0, numActiveBillboards * sizeof(MemoryBillboard), memoryBillboards.data()); + ctx->writeToBuffer(m_billboardsBuffer, 0, numActiveBillboards * sizeof(MemoryBillboard), memoryBillboards.data()); } } @@ -862,7 +867,7 @@ namespace dxvk { assert(dataOffset == surfacesGPUSize); assert(surfacesGPUData.size() == surfacesGPUSize); - ctx->updateBuffer(m_surfaceBuffer, 0, surfacesGPUData.size(), surfacesGPUData.data()); + ctx->writeToBuffer(m_surfaceBuffer, 0, surfacesGPUData.size(), surfacesGPUData.data()); // Find the size of the surface mapping buffer uint32_t maxPreviousSurfaceIndex = 0; @@ -894,11 +899,20 @@ namespace dxvk { } // Create and upload the primitive id prefix sum buffer - info.size = align(m_reorderedSurfacesPrimitiveIDPrefixSum.size() * sizeof(m_reorderedSurfacesPrimitiveIDPrefixSum[0]), kBufferAlignment); - if (m_primitiveIDPrefixSumBuffer == nullptr || info.size > m_primitiveIDPrefixSumBuffer->info().size) { - m_primitiveIDPrefixSumBuffer = m_device->createBuffer(info, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, DxvkMemoryStats::Category::RTXAccelerationStructure); - } - ctx->updateBuffer(m_primitiveIDPrefixSumBuffer, 0, m_reorderedSurfacesPrimitiveIDPrefixSum.size() * sizeof(m_reorderedSurfacesPrimitiveIDPrefixSum[0]), m_reorderedSurfacesPrimitiveIDPrefixSum.data()); + auto updatePrefixSumBuffer = [&info, this, ctx](std::vector& prefixSumList, Rc& prefixSumBuffer) { + info.size = std::max(prefixSumList.size(), 1llu) * sizeof(prefixSumList[0]); + + if (prefixSumBuffer == nullptr || info.size > prefixSumBuffer->info().size) { + prefixSumBuffer = m_device->createBuffer(info, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, DxvkMemoryStats::Category::RTXAccelerationStructure); + } + + if (prefixSumList.size() > 0) { + ctx->writeToBuffer(prefixSumBuffer, 0, prefixSumList.size() * sizeof(prefixSumList[0]), prefixSumList.data()); + } + }; + + updatePrefixSumBuffer(m_reorderedSurfacesPrimitiveIDPrefixSum, m_primitiveIDPrefixSumBuffer); + updatePrefixSumBuffer(m_reorderedSurfacesPrimitiveIDPrefixSumLastFrame, m_primitiveIDPrefixSumBufferLastFrame); // Create and upload the surface mapping buffer if (!surfaceIndexMapping.empty()) { @@ -907,7 +921,7 @@ namespace dxvk { m_surfaceMappingBuffer = m_device->createBuffer(info, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, DxvkMemoryStats::Category::RTXAccelerationStructure); } - ctx->updateBuffer(m_surfaceMappingBuffer, 0, surfaceIndexMapping.size() * sizeof(surfaceIndexMapping[0]), surfaceIndexMapping.data()); + ctx->writeToBuffer(m_surfaceMappingBuffer, 0, surfaceIndexMapping.size() * sizeof(surfaceIndexMapping[0]), surfaceIndexMapping.data()); } } diff --git a/src/dxvk/rtx_render/rtx_accel_manager.h b/src/dxvk/rtx_render/rtx_accel_manager.h index 858bd83d4..f9e2b9ea7 100644 --- a/src/dxvk/rtx_render/rtx_accel_manager.h +++ b/src/dxvk/rtx_render/rtx_accel_manager.h @@ -78,7 +78,13 @@ class AccelManager : public CommonDeviceObject { const Rc getSurfaceMappingBuffer() const { return m_surfaceMappingBuffer; } - const Rc getPrimitiveIDPrefixSumBuffer() const { return m_primitiveIDPrefixSumBuffer; } + const Rc getCurrentFramePrimitiveIDPrefixSumBuffer() const { + return m_primitiveIDPrefixSumBuffer; + } + + const Rc getLastFramePrimitiveIDPrefixSumBuffer() const { + return m_primitiveIDPrefixSumBufferLastFrame; + } const Rc getBillboardsBuffer() const { return m_billboardsBuffer; } @@ -123,6 +129,7 @@ class AccelManager : public CommonDeviceObject { std::vector m_reorderedSurfaces; std::vector m_reorderedSurfacesFirstIndexOffset; std::vector m_reorderedSurfacesPrimitiveIDPrefixSum; + std::vector m_reorderedSurfacesPrimitiveIDPrefixSumLastFrame; std::vector m_mergedInstances[Tlas::Count]; std::vector> m_blasPool; @@ -131,6 +138,9 @@ class AccelManager : public CommonDeviceObject { Rc m_surfaceMappingBuffer; Rc m_transformBuffer; Rc m_primitiveIDPrefixSumBuffer; + Rc m_primitiveIDPrefixSumBufferLastFrame; + + int getCurrentFramePrimitiveIDPrefixSumBufferID() const; Rc m_intersectionBlas; Rc m_aabbBuffer; diff --git a/src/dxvk/rtx_render/rtx_asset_replacer.cpp b/src/dxvk/rtx_render/rtx_asset_replacer.cpp index 216a435ee..2091eaf51 100644 --- a/src/dxvk/rtx_render/rtx_asset_replacer.cpp +++ b/src/dxvk/rtx_render/rtx_asset_replacer.cpp @@ -19,7 +19,6 @@ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. */ -#pragma once #include "rtx_asset_replacer.h" @@ -162,5 +161,59 @@ void AssetReplacer::updateSecretReplacements() { m_bSecretReplacementsUpdated = updated; } +namespace { + std::string tostr(const remixapi_MaterialHandle& h) { + static_assert(sizeof h == sizeof uint64_t); + return std::to_string(reinterpret_cast(h)); + } + std::string tostr(const remixapi_MeshHandle& h) { + static_assert(sizeof h == sizeof uint64_t); + return std::to_string(reinterpret_cast(h)); + } +} + +void AssetReplacer::makeMaterialWithTexturePreload(DxvkContext& ctx, remixapi_MaterialHandle handle, MaterialData&& data) { + auto [iter, isNew] = m_extMaterials.emplace(handle, std::move(data)); + + if (!isNew) { + Logger::info("Ignoring repeated material registration (handle=" + tostr(handle) + ") "); + return; + } +} + +const MaterialData* AssetReplacer::accessExternalMaterial(remixapi_MaterialHandle handle) const { + auto found = m_extMaterials.find(handle); + if (found == m_extMaterials.end()) { + return nullptr; + } + return found->second ? &found->second.value() : nullptr; +} + +void AssetReplacer::destroyExternalMaterial(remixapi_MaterialHandle handle) { + m_extMaterials.erase(handle); +} + +void AssetReplacer::registerExternalMesh(remixapi_MeshHandle handle, std::vector&& submeshes) { + if (m_extMeshes.count(handle) > 0) { + Logger::info("Ignoring repeated mesh registration (handle=" + tostr(handle) + ") "); + return; + } + + m_extMeshes.emplace(handle, std::move(submeshes)); +} + +const std::vector& AssetReplacer::accessExternalMesh(remixapi_MeshHandle handle) const { + auto found = m_extMeshes.find(handle); + if (found == m_extMeshes.end()) { + static const auto s_empty = std::vector {}; + return s_empty; + } + return found->second; +} + +void AssetReplacer::destroyExternalMesh(remixapi_MeshHandle handle) { + m_extMeshes.erase(handle); +} + } // namespace dxvk diff --git a/src/dxvk/rtx_render/rtx_asset_replacer.h b/src/dxvk/rtx_render/rtx_asset_replacer.h index c3f94e0a8..742ecd7db 100644 --- a/src/dxvk/rtx_render/rtx_asset_replacer.h +++ b/src/dxvk/rtx_render/rtx_asset_replacer.h @@ -217,6 +217,14 @@ namespace dxvk { (bEnabled) ? variantId : VariantInfo::kDefaultVariant; } + void makeMaterialWithTexturePreload(DxvkContext& ctx, remixapi_MaterialHandle handle, MaterialData&& data); + [[nodiscard]] const MaterialData* accessExternalMaterial(remixapi_MaterialHandle handle) const; + void destroyExternalMaterial(remixapi_MaterialHandle handle); + + void registerExternalMesh(remixapi_MeshHandle handle, std::vector&& submeshes); + [[nodiscard]] const std::vector& accessExternalMesh(remixapi_MeshHandle handle) const; + void destroyExternalMesh(remixapi_MeshHandle handle); + private: void updateSecretReplacements(); @@ -232,6 +240,9 @@ namespace dxvk { SecretReplacements m_secretReplacements; ModManager m_modManager; + + std::unordered_map> m_extMaterials {}; + std::unordered_map> m_extMeshes {}; }; } // namespace dxvk diff --git a/src/dxvk/rtx_render/rtx_bindless_resource_manager.cpp b/src/dxvk/rtx_render/rtx_bindless_resource_manager.cpp index 8a598740b..ea2f6475f 100644 --- a/src/dxvk/rtx_render/rtx_bindless_resource_manager.cpp +++ b/src/dxvk/rtx_render/rtx_bindless_resource_manager.cpp @@ -86,18 +86,20 @@ namespace dxvk { assert(idx <= kMaxBindlessResources); - VkWriteDescriptorSet descWrites; - descWrites.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - descWrites.pNext = nullptr; - descWrites.dstSet = 0;// This will be filled in by the BindlessTable - descWrites.dstBinding = 0; - descWrites.dstArrayElement = 0; - descWrites.descriptorCount = idx; - descWrites.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; - descWrites.pImageInfo = &imageInfo[0]; - descWrites.pBufferInfo = nullptr; - descWrites.pTexelBufferView = nullptr; - m_tables[Table::Textures][currentIdx()]->updateDescriptors(descWrites); + if (idx > 0) { + VkWriteDescriptorSet descWrites; + descWrites.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + descWrites.pNext = nullptr; + descWrites.dstSet = 0;// This will be filled in by the BindlessTable + descWrites.dstBinding = 0; + descWrites.dstArrayElement = 0; + descWrites.descriptorCount = idx; + descWrites.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + descWrites.pImageInfo = &imageInfo[0]; + descWrites.pBufferInfo = nullptr; + descWrites.pTexelBufferView = nullptr; + m_tables[Table::Textures][currentIdx()]->updateDescriptors(descWrites); + } } // Buffers @@ -117,18 +119,20 @@ namespace dxvk { assert(idx <= kMaxBindlessResources); - VkWriteDescriptorSet descWrites; - descWrites.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - descWrites.pNext = nullptr; - descWrites.dstSet = 0;// This will be filled in by the BindlessTable - descWrites.dstBinding = 0; - descWrites.dstArrayElement = 0; - descWrites.descriptorCount = idx; - descWrites.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - descWrites.pImageInfo = nullptr; - descWrites.pBufferInfo = &bufferInfo[0]; - descWrites.pTexelBufferView = nullptr; - m_tables[Table::Buffers][currentIdx()]->updateDescriptors(descWrites); + if (idx > 0) { + VkWriteDescriptorSet descWrites; + descWrites.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + descWrites.pNext = nullptr; + descWrites.dstSet = 0;// This will be filled in by the BindlessTable + descWrites.dstBinding = 0; + descWrites.dstArrayElement = 0; + descWrites.descriptorCount = idx; + descWrites.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + descWrites.pImageInfo = nullptr; + descWrites.pBufferInfo = &bufferInfo[0]; + descWrites.pTexelBufferView = nullptr; + m_tables[Table::Buffers][currentIdx()]->updateDescriptors(descWrites); + } } // Samplers @@ -150,18 +154,20 @@ namespace dxvk { assert(idx <= kMaxBindlessSamplers); - VkWriteDescriptorSet descWrites; - descWrites.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - descWrites.pNext = nullptr; - descWrites.dstSet = 0;// This will be filled in by the BindlessTable - descWrites.dstBinding = 0; - descWrites.dstArrayElement = 0; - descWrites.descriptorCount = idx; - descWrites.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER; - descWrites.pImageInfo = &imageInfo[0]; - descWrites.pBufferInfo = nullptr; - descWrites.pTexelBufferView = nullptr; - m_tables[Table::Samplers][currentIdx()]->updateDescriptors(descWrites); + if (idx > 0) { + VkWriteDescriptorSet descWrites; + descWrites.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + descWrites.pNext = nullptr; + descWrites.dstSet = 0;// This will be filled in by the BindlessTable + descWrites.dstBinding = 0; + descWrites.dstArrayElement = 0; + descWrites.descriptorCount = idx; + descWrites.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER; + descWrites.pImageInfo = &imageInfo[0]; + descWrites.pBufferInfo = nullptr; + descWrites.pTexelBufferView = nullptr; + m_tables[Table::Samplers][currentIdx()]->updateDescriptors(descWrites); + } } m_frameLastUpdated = m_device->getCurrentFrameId(); diff --git a/src/dxvk/rtx_render/rtx_camera_manager.cpp b/src/dxvk/rtx_render/rtx_camera_manager.cpp index bf2c916e4..379b327fe 100644 --- a/src/dxvk/rtx_render/rtx_camera_manager.cpp +++ b/src/dxvk/rtx_render/rtx_camera_manager.cpp @@ -256,4 +256,16 @@ namespace dxvk { return m_lastCameraCutFrameId == m_device->getCurrentFrameId(); } + void CameraManager::processExternalCamera(CameraType::Enum type, + const Matrix4& worldToView, + const Matrix4& viewToProjection) { + float fov, aspectRatio, nearPlane, farPlane, shearX, shearY; + bool isLHS; + bool isReverseZ; + decomposeProjection(viewToProjection, aspectRatio, fov, nearPlane, farPlane, shearX, shearY, isLHS, isReverseZ); + + getCamera(type).update( + m_device->getCurrentFrameId(), + worldToView, viewToProjection, fov, aspectRatio, nearPlane, farPlane, isLHS); + } } // namespace dxvk diff --git a/src/dxvk/rtx_render/rtx_camera_manager.h b/src/dxvk/rtx_render/rtx_camera_manager.h index f52f95ad7..7c8e69d33 100644 --- a/src/dxvk/rtx_render/rtx_camera_manager.h +++ b/src/dxvk/rtx_render/rtx_camera_manager.h @@ -55,6 +55,7 @@ namespace dxvk { // Calculates a camera type for the specified draw call. CameraType::Enum processCameraData(const DrawCallState& input); + void processExternalCamera(CameraType::Enum type, const Matrix4& worldToView, const Matrix4& viewToProjection); uint32_t getLastCameraCutFrameId() const { return m_lastCameraCutFrameId; } bool isCameraCutThisFrame() const; diff --git a/src/dxvk/rtx_render/rtx_composite.cpp b/src/dxvk/rtx_render/rtx_composite.cpp index fd6313ee5..d6a7937fc 100644 --- a/src/dxvk/rtx_render/rtx_composite.cpp +++ b/src/dxvk/rtx_render/rtx_composite.cpp @@ -348,7 +348,7 @@ namespace dxvk { compositeArgs.stochasticAlphaBlendRadianceVolumeMultiplier = stochasticAlphaBlendRadianceVolumeMultiplier(); Rc cb = getCompositeConstantsBuffer(); - ctx->updateBuffer(cb, 0, sizeof(CompositeArgs), &compositeArgs); + ctx->writeToBuffer(cb, 0, sizeof(CompositeArgs), &compositeArgs); ctx->getCommandList()->trackResource(cb); ctx->bindResourceBuffer(COMPOSITE_CONSTANTS_INPUT, DxvkBufferSlice(cb, 0, cb->info().size)); diff --git a/src/dxvk/rtx_render/rtx_context.cpp b/src/dxvk/rtx_render/rtx_context.cpp index d34d30eef..545ae1f9c 100644 --- a/src/dxvk/rtx_render/rtx_context.cpp +++ b/src/dxvk/rtx_render/rtx_context.cpp @@ -67,6 +67,8 @@ namespace dxvk { Metrics Metrics::s_instance; + bool g_allowSrgbConversionForOutput = true; + void RtxContext::takeScreenshot(std::string imageName, Rc image) { // NOTE: Improve this, I'd like all textures from the same frame to have the same time code... Currently sampling the time on each "dump op" results in different timecodes. auto t = std::time(nullptr); @@ -513,7 +515,7 @@ namespace dxvk { // Tone mapping // WAR for TREX-553 - disable sRGB conversion as NVTT implicitly applies it during dds->png // conversion for 16bit float formats - const bool performSRGBConversion = !captureScreenImage; + const bool performSRGBConversion = !captureScreenImage && g_allowSrgbConversionForOutput; dispatchToneMapping(rtOutput, performSRGBConversion, frameTimeSecs); if (captureScreenImage) { @@ -694,6 +696,10 @@ namespace dxvk { } } + void RtxContext::commitExternalGeometryToRT(ExternalDrawState&& state) { + getSceneManager().submitExternalDraw(this, std::move(state)); + } + static uint32_t jenkinsHash(uint32_t a) { // http://burtleburtle.net/bob/hash/integer.html a = (a + 0x7ed55d16) + (a << 12); @@ -850,6 +856,11 @@ namespace dxvk { constants.totalMipBias = getSceneManager().getTotalMipBias(); + const VkExtent3D& rtExtent = rtOutput.m_finalOutput.image->info().extent; + constants.upscaleFactor = float2 { + rtOutput.m_compositeOutputExtent.width / static_cast(rtExtent.width), + rtOutput.m_compositeOutputExtent.height / static_cast(rtExtent.height) }; + constants.terrainArgs = getSceneManager().getTerrainBaker().getTerrainArgs(); constants.thinOpaqueEnable = RtxOptions::SubsurfaceScattering::enableThinOpaque(); @@ -887,7 +898,7 @@ namespace dxvk { constants.enableReSTIRGIDemodulatedTargetFunction = restirGI.useDemodulatedTargetFunction(); - m_common->metaNeeCache().setRaytraceArgs(constants); + m_common->metaNeeCache().setRaytraceArgs(constants, m_resetHistory); constants.surfaceCount = getSceneManager().getAccelManager().getSurfaceCount(); auto* cameraTeleportDirectionInfo = getSceneManager().getRayPortalManager().getCameraTeleportationRayPortalDirectionInfo(); @@ -984,11 +995,13 @@ namespace dxvk { constants.isZUp = RtxOptions::Get()->isZUp(); constants.enableCullingSecondaryRays = RtxOptions::Get()->enableCullingInSecondaryRays(); + constants.domeLightArgs = getSceneManager().getLightManager().getDomeLightArgs(); + // Upload the constants to the GPU { Rc cb = getResourceManager().getConstantsBuffer(); - updateBuffer(cb, 0, sizeof(constants), &constants); + writeToBuffer(cb, 0, sizeof(constants), &constants); m_cmd->trackResource(cb); } diff --git a/src/dxvk/rtx_render/rtx_context.h b/src/dxvk/rtx_render/rtx_context.h index 6ce4c3ca3..01a4838db 100644 --- a/src/dxvk/rtx_render/rtx_context.h +++ b/src/dxvk/rtx_render/rtx_context.h @@ -39,6 +39,7 @@ namespace dxvk { class AssetExporter; class SceneManager; class TerrainBaker; + struct ExternalDrawState; struct D3D9RtxVertexCaptureData; @@ -109,6 +110,7 @@ namespace dxvk { void clearImageView(const Rc& imageView, VkOffset3D offset, VkExtent3D extent, VkImageAspectFlags aspect, VkClearValue value); void commitGeometryToRT(const DrawParameters& params, DrawCallState& drawCallState); + void commitExternalGeometryToRT(ExternalDrawState&& state); static void blitImageHelper(Rc ctx, const Rc& srcImage, const Rc& dstImage, VkFilter filter); diff --git a/src/dxvk/rtx_render/rtx_debug_view.cpp b/src/dxvk/rtx_render/rtx_debug_view.cpp index 9dc7551ee..a6b9c564e 100644 --- a/src/dxvk/rtx_render/rtx_debug_view.cpp +++ b/src/dxvk/rtx_render/rtx_debug_view.cpp @@ -66,6 +66,8 @@ namespace dxvk { {DEBUG_VIEW_POSITION, "Position"}, {DEBUG_VIEW_TEXCOORDS, "Texture Coordinates"}, + {DEBUG_VIEW_TEXCOORDS_GRADIENT_X, "Texture Coordinates Gradient X"}, + {DEBUG_VIEW_TEXCOORDS_GRADIENT_Y, "Texture Coordinates Gradient Y"}, {DEBUG_VIEW_TEXCOORD_GENERATION_MODE, "Texture Coordinates Generation Mode"}, {DEBUG_VIEW_VIRTUAL_MOTION_VECTOR, "Virtual Motion Vector"}, {DEBUG_VIEW_SCREEN_SPACE_MOTION_VECTOR, "Screen-Space Motion Vector"}, @@ -82,7 +84,19 @@ namespace dxvk { {DEBUG_VIEW_MATERIAL_TYPE, "Material Type"}, {DEBUG_VIEW_ALBEDO, "Diffuse Albedo"}, - {DEBUG_VIEW_RAW_ALBEDO, "Diffuse Raw Albedo (RGS only)"}, + {DEBUG_VIEW_RAW_ALBEDO, "Diffuse Raw Albedo (RGS only)"}, + {DEBUG_VIEW_OPAQUE_RAW_ALBEDO_RESOLUTION_CHECKERS, "Opaque Material Raw Albedo + Texture Resolution Checkers (RGS only)", + "Parameterize via:\n" + "Debug Knob [0]: num texels per checker box [Default: 64]\n" + "Debug Knob [1]: checkers overlay strength [Default: 0.5]"}, + {DEBUG_VIEW_OPAQUE_NORMAL_RESOLUTION_CHECKERS, "Opaque Material Normal + Texture Resolution Checkers (RGS only)", + "Parameterize via:\n" + "Debug Knob [0]: num texels per checker box [Default: 64]\n" + "Debug Knob [1]: checkers overlay strength [Default: 0.5]"}, + {DEBUG_VIEW_OPAQUE_ROUGHNESS_RESOLUTION_CHECKERS, "Opaque Material Roughness + Texture Resolution Checkers (RGS only)", + "Parameterize via:\n" + "Debug Knob [0]: num texels per checker box [Default: 64]\n" + "Debug Knob [1]: checkers overlay strength [Default: 0.5]"}, {DEBUG_VIEW_BASE_REFLECTIVITY, "Base Reflectivity"}, {DEBUG_VIEW_ROUGHNESS, "Isotropic Roughness"}, {DEBUG_VIEW_PERCEPTUAL_ROUGHNESS, "Perceptual Roughness"}, @@ -100,11 +114,11 @@ namespace dxvk { {DEBUG_VIEW_CASCADE_LEVEL, "Terrain: Cascade Level (RGS only)"}, {DEBUG_VIEW_VIRTUAL_HIT_DISTANCE, "Virtual Hit Distance"}, - {DEBUG_VIEW_PRIMARY_DEPTH, "Primary Depth" }, + {DEBUG_VIEW_PRIMARY_DEPTH, "Primary Depth"}, {DEBUG_VIEW_SHARED_BIAS_CURRENT_COLOR_MASK, "DLSS Bias Color Mask"}, - {DEBUG_VIEW_IS_INSIDE_FRUSTUM, "Is Inside Frustum" }, + {DEBUG_VIEW_IS_INSIDE_FRUSTUM, "Is Inside Frustum"}, {DEBUG_VIEW_BLUE_NOISE, "Blue Noise"}, {DEBUG_VIEW_PIXEL_CHECKERBOARD, "Pixel Checkerboard"}, @@ -131,6 +145,8 @@ namespace dxvk { {DEBUG_VIEW_NEE_CACHE_LIGHT_HISTOGRAM, "NEE Cache Light Histogram"}, {DEBUG_VIEW_NEE_CACHE_HISTOGRAM, "NEE Cache Triangle Histogram"}, + {DEBUG_VIEW_NEE_CACHE_HASH_MAP, "NEE Cache Hash Map"}, + {DEBUG_VIEW_NEE_CACHE_ACCUMULATE_MAP, "NEE Cache Accumulate Map"}, {DEBUG_VIEW_NEE_CACHE_SAMPLE_RADIANCE, "NEE Cache Sample Radiance"}, {DEBUG_VIEW_NEE_CACHE_TASK, "NEE Cache Task"}, @@ -208,8 +224,8 @@ namespace dxvk { {DEBUG_VIEW_NAN, "Inf/NaN Check"}, {DEBUG_SURFACE_LOBE_CONSISTENCY, "Surface/Lobe Consistency Check"}, {DEBUG_VIEW_SCROLLING_LINE, "Scrolling Line"}, - {DEBUG_VIEW_POM_ITERATIONS, "POM Iterations" }, - {DEBUG_VIEW_POM_DIRECT_HIT_POS, "POM Direct Hit Position (Tangent Space)" }, + {DEBUG_VIEW_POM_ITERATIONS, "POM Iterations"}, + {DEBUG_VIEW_POM_DIRECT_HIT_POS, "POM Direct Hit Position (Tangent Space)"}, } }; ImGui::ComboWithKey compositeDebugViewCombo = ImGui::ComboWithKey( @@ -318,23 +334,23 @@ namespace dxvk { bool filterWords = searchWord.length() > 0; // Hide unmatched options - std::vector items; + std::vector> items; items.reserve(debugViewEntries.size()); int itemIndex = -1; for (int i = 0; i < debugViewEntries.size(); i++) { - if (debugViewEntries[i].first == lastView) { + if (debugViewEntries[i].key == lastView) { itemIndex = items.size(); } if (filterWords) { - std::string name(debugViewEntries[i].second); + std::string name(debugViewEntries[i].name); toLowerCase(name); - if (debugViewEntries[i].first == lastView || name.find(searchWord) != std::string::npos) { - items.push_back(debugViewEntries[i].second); + if (debugViewEntries[i].key == lastView || name.find(searchWord) != std::string::npos) { + items.emplace_back(debugViewEntries[i].name, debugViewEntries[i].tooltip); } } else { - items.push_back(debugViewEntries[i].second); + items.emplace_back(debugViewEntries[i].name, debugViewEntries[i].tooltip); } } @@ -348,8 +364,8 @@ namespace dxvk { ImGui::PopItemWidth(); for (int i = 0; i < debugViewEntries.size(); i++) { - if (itemIndex != -1 && debugViewEntries[i].second == items[itemIndex]) { - lastView = debugViewEntries[i].first; + if (itemIndex != -1 && debugViewEntries[i].name == items[itemIndex].first) { + lastView = debugViewEntries[i].key; } } } @@ -691,7 +707,7 @@ namespace dxvk { auto&& debugViewArgs = getCommonDebugViewArgs(ctx.ptr(), rtOutput, common); Rc cb = getDebugViewConstantsBuffer(); - ctx->updateBuffer(cb, 0, sizeof(DebugViewArgs), &debugViewArgs); + ctx->writeToBuffer(cb, 0, sizeof(DebugViewArgs), &debugViewArgs); ctx->getCommandList()->trackResource(cb); if (displayType() == DebugViewDisplayType::HDRWaveform) { diff --git a/src/dxvk/rtx_render/rtx_game_capturer.cpp b/src/dxvk/rtx_render/rtx_game_capturer.cpp index b625d2415..c68ccb889 100644 --- a/src/dxvk/rtx_render/rtx_game_capturer.cpp +++ b/src/dxvk/rtx_render/rtx_game_capturer.cpp @@ -159,10 +159,10 @@ namespace dxvk { } void GameCapturer::setInstanceUpdateFlag(const RtInstance& rtInstance, const InstFlag flag) { - if (isIdle()) { + if (!m_state.has()) { return; } - m_cap.instanceFlags[rtInstance.getId()] |= (1 << uint8_t(flag)); + m_pCap->instanceFlags[rtInstance.getId()] |= (1 << uint8_t(flag)); } void GameCapturer::trigger(const Rc ctx) { @@ -185,15 +185,16 @@ namespace dxvk { assert(!m_state.has()); m_options = getOptions(); - - m_cap.idStr = hashToString(Capture::nextId++).substr(4, 4); - m_cap.bCaptureInstances = m_options.bCaptureInstances; - m_cap.bSkyProbeBaked = false; - if (m_cap.bCaptureInstances) { + + m_pCap = std::make_unique(); + m_pCap->idStr = hashToString(Capture::nextId++).substr(4, 4); + m_pCap->bCaptureInstances = m_options.bCaptureInstances; + m_pCap->bSkyProbeBaked = false; + if (m_pCap->bCaptureInstances) { prepareInstanceStage(ctx); } - Logger::info("[GameCapturer][" + m_cap.idStr + "] New capture"); - m_cap.instanceFlags.clear(); + Logger::info("[GameCapturer][" + m_pCap->idStr + "] New capture"); + m_pCap->instanceFlags.clear(); m_state.set(); m_state.set(); @@ -202,67 +203,67 @@ namespace dxvk { void GameCapturer::prepareInstanceStage(const Rc ctx) { const auto stagePathStr = buildStagePath(m_options.instanceStageName); - m_cap.instance.stageName = m_options.instanceStageName; - m_cap.instance.stagePath = stagePathStr; + m_pCap->instance.stageName = m_options.instanceStageName; + m_pCap->instance.stagePath = stagePathStr; m_exporter.generateSceneThumbnail(ctx, BASE_DIR + lss::commonDirName::thumbDir, m_options.instanceStageName); } void GameCapturer::capture(const Rc ctx, const float dt) { assert(m_state.has()); - m_cap.currentFrameNum += dt * static_cast(m_options.fps); + m_pCap->currentFrameNum += dt * static_cast(m_options.fps); captureFrame(ctx); - if (m_cap.numFramesCaptured >= m_options.numFrames) { + if (m_pCap->numFramesCaptured >= m_options.numFrames) { m_state.set(); m_state.set(); } } void GameCapturer::captureFrame(const Rc ctx) { - Logger::debug("[GameCapturer][" + m_cap.idStr + "] Begin frame capture"); - if (m_cap.bCaptureInstances) { + Logger::debug("[GameCapturer][" + m_pCap->idStr + "] Begin frame capture"); + if (m_pCap->bCaptureInstances) { captureCamera(); captureLights(); } captureInstances(ctx); - ++m_cap.numFramesCaptured; - Logger::debug("[GameCapturer][" + m_cap.idStr + "] End frame capture"); + ++m_pCap->numFramesCaptured; + Logger::debug("[GameCapturer][" + m_pCap->idStr + "] End frame capture"); } void GameCapturer::captureCamera() { - if (isnan(m_cap.camera.fov) || - isnan(m_cap.camera.aspectRatio) || - isnan(m_cap.camera.nearPlane) || - isnan(m_cap.camera.farPlane)) { - Logger::debug("[GameCapturer][" + m_cap.idStr + "][Camera] New"); + if (isnan(m_pCap->camera.fov) || + isnan(m_pCap->camera.aspectRatio) || + isnan(m_pCap->camera.nearPlane) || + isnan(m_pCap->camera.farPlane)) { + Logger::debug("[GameCapturer][" + m_pCap->idStr + "][Camera] New"); float shearX, shearY; const auto projMat = m_sceneManager.getCamera().getViewToProjection(); decomposeProjection(projMat, - m_cap.camera.aspectRatio, - m_cap.camera.fov, - m_cap.camera.nearPlane, - m_cap.camera.farPlane, + m_pCap->camera.aspectRatio, + m_pCap->camera.fov, + m_pCap->camera.nearPlane, + m_pCap->camera.farPlane, shearX, shearY, - m_cap.camera.isLHS, - m_cap.camera.isReverseZ); + m_pCap->camera.isLHS, + m_pCap->camera.isReverseZ); // Infinite projection is legit, but USD doesnt take kindly to it constexpr float kMaxFarPlane = 100000000; - if (m_cap.camera.farPlane > kMaxFarPlane) { - m_cap.camera.farPlane = kMaxFarPlane; + if (m_pCap->camera.farPlane > kMaxFarPlane) { + m_pCap->camera.farPlane = kMaxFarPlane; } // If the app is being rendered upside-down, we need to plan accordingly - m_cap.camera.bFlipVertAperture = (projMat[0][0] * projMat[1][1] < 0.0f); - m_cap.camera.firstTime = m_cap.currentFrameNum; + m_pCap->camera.bFlipMeshes = (projMat[0][0] * projMat[1][1] < 0.0f); + m_pCap->camera.firstTime = m_pCap->currentFrameNum; } - assert(!isnan(m_cap.camera.fov)); - assert(!isnan(m_cap.camera.aspectRatio)); - assert(!isnan(m_cap.camera.nearPlane)); - assert(!isnan(m_cap.camera.farPlane)); + assert(!isnan(m_pCap->camera.fov)); + assert(!isnan(m_pCap->camera.aspectRatio)); + assert(!isnan(m_pCap->camera.nearPlane)); + assert(!isnan(m_pCap->camera.farPlane)); const Matrix4 xform = m_sceneManager.getCamera().getViewToWorld(); - m_cap.camera.finalTime = m_cap.currentFrameNum; - m_cap.camera.xforms.push_back({ m_cap.currentFrameNum, matrix4ToGfMatrix4d(xform) }); + m_pCap->camera.finalTime = m_pCap->currentFrameNum; + m_pCap->camera.xforms.push_back({ m_pCap->currentFrameNum, matrix4ToGfMatrix4d(xform) }); } void GameCapturer::captureLights() { @@ -276,17 +277,17 @@ namespace dxvk { break; case RtLightType::Rect: // Todo: Handle Rect lights - Logger::err("[GameCapturer][" + m_cap.idStr + "] RectLight not implemented"); + Logger::err("[GameCapturer][" + m_pCap->idStr + "] RectLight not implemented"); assert(false); break; case RtLightType::Disk: // Todo: Handle Disk lights - Logger::err("[GameCapturer][" + m_cap.idStr + "] DiskLight not implemented"); + Logger::err("[GameCapturer][" + m_pCap->idStr + "] DiskLight not implemented"); assert(false); break; case RtLightType::Cylinder: // Todo: Handle Cylinder lights - Logger::err("[GameCapturer][" + m_cap.idStr + "] CylinderLight not implemented"); + Logger::err("[GameCapturer][" + m_pCap->idStr + "] CylinderLight not implemented"); assert(false); break; case RtLightType::Distant: @@ -300,9 +301,9 @@ namespace dxvk { const auto hash = rtLight.getHash(); pxr::GfRotation rotation; rotation.SetIdentity(); - if (m_cap.sphereLights.count(hash) == 0) { + if (m_pCap->sphereLights.count(hash) == 0) { const std::string name = dxvk::hashToString(hash); - lss::SphereLight& sphereLight = m_cap.sphereLights[hash]; + lss::SphereLight& sphereLight = m_pCap->sphereLights[hash]; sphereLight.lightName = name; const auto colorAndIntensity = rtLight.getColorAndIntensity(); sphereLight.color[0] = colorAndIntensity.r; @@ -310,8 +311,8 @@ namespace dxvk { sphereLight.color[2] = colorAndIntensity.b; sphereLight.intensity = colorAndIntensity.w; sphereLight.radius = rtLight.getRadius(); - sphereLight.xforms.reserve(m_options.numFrames - m_cap.numFramesCaptured); - sphereLight.firstTime = m_cap.currentFrameNum; + sphereLight.xforms.reserve(m_options.numFrames - m_pCap->numFramesCaptured); + sphereLight.firstTime = m_pCap->currentFrameNum; const dxvk::RtLightShaping& shaping = rtLight.getShaping(); if (shaping.enabled) { sphereLight.shapingEnabled = true; @@ -320,21 +321,21 @@ namespace dxvk { sphereLight.focusExponent = shaping.focusExponent; rotation = pxr::GfRotation(-pxr::GfVec3d::ZAxis(), pxr::GfVec3f(&shaping.primaryAxis[0])); } - Logger::debug("[GameCapturer][" + m_cap.idStr + "][SphereLight:" + name + "] New"); + Logger::debug("[GameCapturer][" + m_pCap->idStr + "][SphereLight:" + name + "] New"); } - lss::SphereLight& sphereLight = m_cap.sphereLights[hash]; + lss::SphereLight& sphereLight = m_pCap->sphereLights[hash]; const auto position = rtLight.getPosition(); pxr::GfMatrix4d usdXform(rotation, pxr::GfVec3f(&position[0])); - sphereLight.xforms.push_back({ m_cap.currentFrameNum, usdXform }); - sphereLight.finalTime = m_cap.currentFrameNum; + sphereLight.xforms.push_back({ m_pCap->currentFrameNum, usdXform }); + sphereLight.finalTime = m_pCap->currentFrameNum; } void GameCapturer::captureDistantLight(const RtDistantLight& rtLight) { const auto hash = rtLight.getHash(); - if (m_cap.sphereLights.count(hash) == 0) { + if (m_pCap->sphereLights.count(hash) == 0) { const std::string name = dxvk::hashToString(hash); - lss::DistantLight& distantLight = m_cap.distantLights[hash]; + lss::DistantLight& distantLight = m_pCap->distantLights[hash]; distantLight.lightName = name; const auto colorAndIntensity = rtLight.getColorAndIntensity(); distantLight.color[0] = colorAndIntensity.r; @@ -343,53 +344,53 @@ namespace dxvk { distantLight.intensity = colorAndIntensity.w; distantLight.angle = rtLight.getHalfAngle() * 2.0; distantLight.direction = pxr::GfVec3f(rtLight.getDirection().data); - distantLight.firstTime = m_cap.currentFrameNum; - Logger::debug("[GameCapturer][" + m_cap.idStr + "][DistantLight:" + name + "] New"); + distantLight.firstTime = m_pCap->currentFrameNum; + Logger::debug("[GameCapturer][" + m_pCap->idStr + "][DistantLight:" + name + "] New"); } - lss::DistantLight& distantLight = m_cap.distantLights[hash]; - distantLight.finalTime = m_cap.currentFrameNum; + lss::DistantLight& distantLight = m_pCap->distantLights[hash]; + distantLight.finalTime = m_pCap->currentFrameNum; } void GameCapturer::captureInstances(const Rc ctx) { - for (const RtInstance* rtInstancePtr : m_sceneManager.getInstanceTable()) { - assert(rtInstancePtr->getBlas() != nullptr); + for (const RtInstance* pRtInstance : m_sceneManager.getInstanceTable()) { + assert(pRtInstance->getBlas() != nullptr); - if (rtInstancePtr->getBlas()->input.cameraType == CameraType::Sky) { - if (!m_cap.bSkyProbeBaked) { + if (pRtInstance->getBlas()->input.cameraType == CameraType::Sky) { + if (!m_pCap->bSkyProbeBaked) { m_exporter.bakeSkyProbe(ctx, BASE_DIR + lss::commonDirName::texDir, commonFileName::bakedSkyProbe); - m_cap.bSkyProbeBaked = true; - Logger::debug("[GameCapturer][" + m_cap.idStr + "][SkyProbe] Bake scheduled to " + + m_pCap->bSkyProbeBaked = true; + Logger::debug("[GameCapturer][" + m_pCap->idStr + "][SkyProbe] Bake scheduled to " + commonFileName::bakedSkyProbe); } } - const XXH64_hash_t instanceId = rtInstancePtr->getId(); - const uint8_t instanceFlags = m_cap.instanceFlags[instanceId]; - const bool bIsNew = m_cap.instances.count(instanceId) == 0; + const XXH64_hash_t instanceId = pRtInstance->getId(); + const uint8_t instanceFlags = m_pCap->instanceFlags[instanceId]; + const bool bIsNew = m_pCap->instances.count(instanceId) == 0; const bool bPointsUpdate = checkInstanceUpdateFlag(instanceFlags, InstFlag::PositionsUpdate); const bool bNormalsUpdate = checkInstanceUpdateFlag(instanceFlags, InstFlag::NormalsUpdate); const bool bIndexUpdate = checkInstanceUpdateFlag(instanceFlags, InstFlag::IndexUpdate); const bool bXformUpdate = checkInstanceUpdateFlag(instanceFlags, InstFlag::XformUpdate); - Instance& instance = m_cap.instances[instanceId]; + Instance& instance = m_pCap->instances[instanceId]; if (bIsNew) { - newInstance(ctx, *rtInstancePtr); + newInstance(ctx, *pRtInstance); } - if (m_cap.bCaptureInstances && !bIsNew && (bPointsUpdate || bNormalsUpdate || bIndexUpdate)) { - const BlasEntry* pBlas = rtInstancePtr->getBlas(); + if (m_pCap->bCaptureInstances && !bIsNew && (bPointsUpdate || bNormalsUpdate || bIndexUpdate)) { + const BlasEntry* pBlas = pRtInstance->getBlas(); assert(pBlas != nullptr); - captureMesh(ctx, instance.meshHash, *pBlas, rtInstancePtr->getCategoryFlags(), false, bPointsUpdate, bNormalsUpdate, bIndexUpdate); + captureMesh(ctx, instance.meshHash, *pBlas, pRtInstance->getCategoryFlags(), false, bPointsUpdate, bNormalsUpdate, bIndexUpdate); } - if (m_cap.bCaptureInstances && (bIsNew || bXformUpdate)) { - instance.lssData.xforms.push_back({ m_cap.currentFrameNum, matrix4ToGfMatrix4d(rtInstancePtr->getTransform()) }); - const SkinningData& skinData = rtInstancePtr->getBlas()->input.getSkinningState(); + if (m_pCap->bCaptureInstances && (bIsNew || bXformUpdate)) { + instance.lssData.xforms.push_back({ m_pCap->currentFrameNum, matrix4ToGfMatrix4d(pRtInstance->getTransform()) }); + const SkinningData& skinData = pRtInstance->getBlas()->input.getSkinningState(); if (skinData.numBones > 0) { - instance.lssData.boneXForms.push_back({ m_cap.currentFrameNum, matrix4VecToGfMatrix4dVec(skinData.pBoneMatrices) }); + instance.lssData.boneXForms.push_back({ m_pCap->currentFrameNum, matrix4VecToGfMatrix4dVec(skinData.pBoneMatrices) }); } } - instance.lssData.finalTime = m_cap.currentFrameNum; - instance.lssData.isSky = (rtInstancePtr->getBlas()->input.cameraType == CameraType::Sky); - instance.lssData.metadata = createDrawCallMetadata(*rtInstancePtr); + instance.lssData.finalTime = m_pCap->currentFrameNum; + instance.lssData.isSky = (pRtInstance->getBlas()->input.cameraType == CameraType::Sky); + instance.lssData.metadata = createDrawCallMetadata(*pRtInstance); } } @@ -402,7 +403,7 @@ namespace dxvk { const LegacyMaterialData& material = pBlas->getMaterialData(matHash); - const bool bIsNewMat = (matHash != 0x0) && (m_cap.materials.count(matHash) == 0); + const bool bIsNewMat = (matHash != 0x0) && (m_pCap->materials.count(matHash) == 0); if (bIsNewMat) { captureMaterial(ctx, material, !rtInstance.surface.alphaState.isFullyOpaque); } @@ -411,48 +412,53 @@ namespace dxvk { size_t instanceNum = 0; { std::lock_guard lock(m_meshMutex); - bIsNewMesh = m_cap.meshes.count(meshHash) == 0; + bIsNewMesh = m_pCap->meshes.count(meshHash) == 0; if (bIsNewMesh) { - m_cap.meshes[meshHash] = std::make_shared(); - m_cap.meshes[meshHash]->instanceCount = 0; - m_cap.meshes[meshHash]->matHash = matHash; + m_pCap->meshes[meshHash] = std::make_shared(); + m_pCap->meshes[meshHash]->instanceCount = 0; + m_pCap->meshes[meshHash]->matHash = matHash; } - instanceNum = m_cap.meshes[meshHash]->instanceCount++; + instanceNum = m_pCap->meshes[meshHash]->instanceCount++; } if (bIsNewMesh) { captureMesh(ctx, meshHash, *pBlas, rtInstance.getCategoryFlags(), true, true, true, true); } const XXH64_hash_t instanceId = rtInstance.getId(); - Instance& instance = m_cap.instances[instanceId]; + Instance& instance = m_pCap->instances[instanceId]; instance.meshHash = meshHash; instance.matHash = matHash; instance.meshInstNum = instanceNum; - instance.lssData.firstTime = m_cap.currentFrameNum; - instance.lssData.xforms.reserve(m_options.numFrames - m_cap.numFramesCaptured); - instance.lssData.metadata = createDrawCallMetadata(rtInstance); + instance.lssData.firstTime = m_pCap->currentFrameNum; - Logger::debug("[GameCapturer][" + m_cap.idStr + "][Inst:" + hashToString(instanceId) + "] New"); + Logger::debug("[GameCapturer][" + m_pCap->idStr + "][Inst:" + hashToString(instanceId) + "] New"); } void GameCapturer::captureMaterial(const Rc ctx, const LegacyMaterialData& materialData, const bool bEnableOpacity) { - const std::string matName = dxvk::hashToString(materialData.getHash()); + lss::Material lssMat; // to be populated - //Export Textures + // Resolve material name + const std::string matName = dxvk::hashToString(materialData.getHash()); + lssMat.matName = matName; + // Export Textures const std::string albedoTexFilename(matName + lss::ext::dds); m_exporter.dumpImageToFile(ctx, BASE_DIR + lss::commonDirName::texDir, albedoTexFilename, materialData.getColorTexture().getImageView()->image()); - const std::string albedoTexPath = str::format(BASE_DIR + lss::commonDirName::texDir, albedoTexFilename); - - // Export Material - lss::Material lssMat; - lssMat.matName = matName; lssMat.albedoTexPath = albedoTexPath; + // Opacity lssMat.enableOpacity = bEnableOpacity; - m_cap.materials[materialData.getHash()].lssData = lssMat; - Logger::debug("[GameCapturer][" + m_cap.idStr + "][Mat:" + matName + "] New"); + // Collect sampler info + const auto& samplerCreateInfo = materialData.getSampler()->info(); + lssMat.sampler.addrModeU = samplerCreateInfo.addressModeU; + lssMat.sampler.addrModeV = samplerCreateInfo.addressModeV; + lssMat.sampler.filter = samplerCreateInfo.magFilter; + lssMat.sampler.borderColor = samplerCreateInfo.borderColor; + + // Set populated LSS Material in our cache + m_pCap->materials[materialData.getHash()].lssData = lssMat; + Logger::debug("[GameCapturer][" + m_pCap->idStr + "][Mat:" + matName + "] New"); } void GameCapturer::captureMesh(const Rc ctx, @@ -472,7 +478,7 @@ namespace dxvk { std::shared_ptr pMesh; { std::lock_guard lock(m_meshMutex); - pMesh = m_cap.meshes[currentMeshHash]; + pMesh = m_pCap->meshes[currentMeshHash]; } // Note: Ensures that reading a Vec3 from the position buffer will result in the proper values. This can be extended if @@ -504,39 +510,39 @@ namespace dxvk { pMesh->lssData.isDoubleSided = isDoubleSided; pMesh->lssData.numBones = skinData.numBones; pMesh->lssData.bonesPerVertex = skinData.numBonesPerVertex; - Logger::debug("[GameCapturer][" + m_cap.idStr + "][Mesh:" + pMesh->lssData.meshName + "] New"); + Logger::debug("[GameCapturer][" + m_pCap->idStr + "][Mesh:" + pMesh->lssData.meshName + "] New"); } if (bCapturePositions && geomData.positionBuffer.defined()) { if (skinData.numBones > 0) { - captureMeshPositions(ctx, rasterGeomData.vertexCount, rasterGeomData.positionBuffer, m_cap.currentFrameNum, pMesh); + captureMeshPositions(ctx, rasterGeomData.vertexCount, rasterGeomData.positionBuffer, m_pCap->currentFrameNum, pMesh); } else { - captureMeshPositions(ctx, geomData.vertexCount, geomData.positionBuffer, m_cap.currentFrameNum, pMesh); + captureMeshPositions(ctx, geomData.vertexCount, geomData.positionBuffer, m_pCap->currentFrameNum, pMesh); } } if (bCaptureNormals && geomData.normalBuffer.defined()) { if (skinData.numBones > 0) { - captureMeshNormals(ctx, rasterGeomData.vertexCount, rasterGeomData.normalBuffer, m_cap.currentFrameNum, pMesh); + captureMeshNormals(ctx, rasterGeomData.vertexCount, rasterGeomData.normalBuffer, m_pCap->currentFrameNum, pMesh); } else { - captureMeshNormals(ctx, geomData.vertexCount, geomData.normalBuffer, m_cap.currentFrameNum, pMesh); + captureMeshNormals(ctx, geomData.vertexCount, geomData.normalBuffer, m_pCap->currentFrameNum, pMesh); } } if (bCaptureIndices && geomData.indexBuffer.defined()) { - captureMeshIndices(ctx, geomData, m_cap.currentFrameNum, pMesh); + captureMeshIndices(ctx, geomData, m_pCap->currentFrameNum, pMesh); } if (bIsNewMesh && geomData.texcoordBuffer.defined()) { - captureMeshTexCoords(ctx, geomData, m_cap.currentFrameNum, pMesh); + captureMeshTexCoords(ctx, geomData, m_pCap->currentFrameNum, pMesh); } if (bIsNewMesh && geomData.color0Buffer.defined()) { - captureMeshColor(ctx, geomData, m_cap.currentFrameNum, pMesh); + captureMeshColor(ctx, geomData, m_pCap->currentFrameNum, pMesh); } if (bIsNewMesh && skinData.numBones > 0) { - captureMeshBlending(ctx, rasterGeomData, m_cap.currentFrameNum, pMesh); + captureMeshBlending(ctx, rasterGeomData, m_pCap->currentFrameNum, pMesh); pMesh->lssData.boneXForms = matrix4VecToGfMatrix4dVec(skinData.pBoneMatrices); } } @@ -548,7 +554,7 @@ namespace dxvk { const float currentFrameNum, std::shared_ptr pMesh) { - AssetExporter::BufferCallback captureMeshPositionsAsync = [ctx, numVertices, inputPositionBuffer, currentFrameNum, pMesh](Rc posBuf) { + AssetExporter::BufferCallback captureMeshPositionsAsync = [this, ctx, numVertices, inputPositionBuffer, currentFrameNum, pMesh](Rc posBuf) { // Prep helper vars constexpr size_t positionSubElementSize = sizeof(float); const size_t positionStride = inputPositionBuffer.stride() / positionSubElementSize; @@ -562,8 +568,16 @@ namespace dxvk { // Copy GPU buffer to local VtArray pxr::VtArray positions; positions.reserve(numVertices); + OriginCalc originCalc; for (size_t idx = 0; idx < numVertices; ++idx) { - positions.push_back(pxr::GfVec3f(&pVkPosBuf[idx * positionStride])); + const pxr::GfVec3f& pos = *reinterpret_cast(&pVkPosBuf[idx * positionStride]); + if(m_correctBakedTransforms) { + originCalc.compareAndSwap(pos); + } + positions.push_back(pos); + } + if(m_correctBakedTransforms) { + pMesh->originCalc.compareAndSwap(originCalc); } assert(positions.size() > 0); // Create comparison function that returns float @@ -866,11 +880,12 @@ namespace dxvk { assert(!m_state.has()); assert(!m_state.has()); static auto exportThreadTask = [this](const Rc ctx, - Capture cap, + std::unique_ptr pCap, State* pState, CompletedCapture* complete, const float framesPerSecond, const bool bUseLssUsdPlugins) { + Capture& cap = *pCap; m_exporter.waitForAllExportsToComplete(); assert(pState->has()); const auto exportPrep = prepExport(cap, framesPerSecond, bUseLssUsdPlugins); @@ -897,17 +912,16 @@ namespace dxvk { m_state.set(); std::thread(exportThreadTask, ctx, - std::move(m_cap), + std::move(m_pCap), &m_state, &m_completeCapture, static_cast(m_options.fps), m_bUseLssUsdPlugins).detach(); - m_cap = Capture(); // reset to default } lss::Export GameCapturer::prepExport(const Capture& cap, - const float framesPerSecond, - const bool bUseLssUsdPlugins) { + const float framesPerSecond, + const bool bUseLssUsdPlugins) { lss::Export exportPrep; prepExportMetaData(cap, framesPerSecond, bUseLssUsdPlugins, exportPrep); prepExportMaterials(cap, exportPrep); @@ -944,6 +958,7 @@ namespace dxvk { exportPrep.meta.renderingSettingsDict[pair.first] = pair.second->genericValueToString(RtxOptionImpl::ValueType::Value); } } + exportPrep.meta.bCorrectBakedTransforms = m_correctBakedTransforms; exportPrep.debugId = cap.idStr; exportPrep.baseExportPath = BASE_DIR; @@ -959,38 +974,45 @@ namespace dxvk { } } - void GameCapturer::prepExportMeshes(const Capture& cap, - lss::Export& exportPrep) { + void GameCapturer::prepExportMeshes(const Capture& cap, lss::Export& exportPrep) { + OriginCalc stageOriginCalc; for (auto& [hash, pMesh] : cap.meshes) { std::unique_lock lock(pMesh->meshSync.mutex); pMesh->meshSync.cond.wait(lock, [pNumOutstanding = &pMesh->meshSync.numOutstanding] { return *pNumOutstanding == 0; }); - auto& exportMesh = exportPrep.meshes[hash]; if (pMesh->lssData.numIndices == 0 && pMesh->lssData.numVertices == 0) { continue; } if (cap.materials.count(pMesh->matHash) > 0) { pMesh->lssData.matId = pMesh->matHash; } - exportMesh = pMesh->lssData; + if(m_correctBakedTransforms) { + pMesh->lssData.origin = pMesh->originCalc.calc(); + stageOriginCalc.compareAndSwap(pMesh->lssData.origin); + } + exportPrep.meshes[hash] = pMesh->lssData; + } + if(m_correctBakedTransforms) { + exportPrep.stageOrigin = stageOriginCalc.calc(); } } - void GameCapturer::prepExportInstances(const Capture& cap, - lss::Export& exportPrep) { + void GameCapturer::prepExportInstances(const Capture& cap, lss::Export& exportPrep) { for (auto& [hash, instance] : cap.instances) { if (instance.meshHash == 0) { continue; } - auto& exportInstance = - exportPrep.instances.emplace(hash, instance.lssData).first->second; + auto& exportInstance = exportPrep.instances[hash]; + exportInstance = instance.lssData; + assert(cap.meshes.count(instance.meshHash) > 0); + exportInstance.meshId = instance.meshHash; + const auto pMesh = cap.meshes.at(instance.meshHash); - exportInstance.instanceName = pMesh->lssData.meshName + "_" + std::to_string(instance.meshInstNum); if (cap.materials.count(pMesh->matHash) > 0) { exportInstance.matId = instance.matHash; } - exportInstance.meshId = instance.meshHash; + exportInstance.instanceName = pMesh->lssData.meshName + "_" + std::to_string(instance.meshInstNum); } } @@ -1015,5 +1037,4 @@ namespace dxvk { assert(pFlattenedStage); Logger::info("[GameCapturer][" + exportPrep.debugId + "] USD capture flattened."); } - } diff --git a/src/dxvk/rtx_render/rtx_game_capturer.h b/src/dxvk/rtx_render/rtx_game_capturer.h index 5a6c980f7..906f56393 100644 --- a/src/dxvk/rtx_render/rtx_game_capturer.h +++ b/src/dxvk/rtx_render/rtx_game_capturer.h @@ -21,6 +21,7 @@ */ #pragma once +#include "rtx_game_capturer_utils.h" #include "rtx_options.h" #include "../../lssusd/game_exporter_types.h" @@ -57,6 +58,13 @@ struct LegacyMaterialData; class GameCapturer : public RcObject { public: + RW_RTX_OPTION("rtx.capture", bool, correctBakedTransforms, false, + "Some games bake world transforms into mesh vertices. If individually captured\n" + "meshes appear to be way off in the middle of nowhere OR instanced meshes appear\n" + "to all have identity xform matrices, enabling will attempt to correct this and\n" + "improve stage + mesh viewability in tools.\n" + "Hashes are unaffected."); + GameCapturer(DxvkDevice* const pDevice, SceneManager& sceneManager, AssetExporter& exporter); ~GameCapturer(); @@ -125,10 +133,11 @@ class GameCapturer : public RcObject }; struct Mesh { - lss::Mesh lssData; - size_t instanceCount = 0; - XXH64_hash_t matHash; - MeshSync meshSync; + lss::Mesh lssData; + size_t instanceCount = 0; + XXH64_hash_t matHash; + MeshSync meshSync; + AtomicOriginCalc originCalc; }; struct Instance { @@ -280,7 +289,8 @@ class GameCapturer : public RcObject std::unordered_map materials; std::unordered_map instances; std::unordered_map instanceFlags; - } m_cap; + }; + std::unique_ptr m_pCap; }; } \ No newline at end of file diff --git a/src/dxvk/rtx_render/rtx_game_capturer_utils.h b/src/dxvk/rtx_render/rtx_game_capturer_utils.h new file mode 100644 index 000000000..b1afb5966 --- /dev/null +++ b/src/dxvk/rtx_render/rtx_game_capturer_utils.h @@ -0,0 +1,110 @@ +/* +* Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +* +* Permission is hereby granted, free of charge, to any person obtaining a +* copy of this software and associated documentation files (the "Software"), +* to deal in the Software without restriction, including without limitation +* the rights to use, copy, modify, merge, publish, distribute, sublicense, +* and/or sell copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice shall be included in +* all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +* DEALINGS IN THE SOFTWARE. +*/ +#pragma once + +#include "../../lssusd/usd_include_begin.h" +#include +#include "../../lssusd/usd_include_end.h" + +#include + +namespace dxvk { + +struct AtomicOriginCalc; // Forward declare +struct OriginCalc { + inline void compareAndSwap(const pxr::GfVec3f& vec3) { + replaceMin(&vec3[0]); + replaceMax(&vec3[0]); + } + inline void compareAndSwap(const OriginCalc& other) { + replaceMin(other.min); + replaceMax(other.max); + } + inline pxr::GfVec3f calc() const { + return (pxr::GfVec3f(&min[0]) + pxr::GfVec3f(&max[0])) / 2; + } +private: + static constexpr float fMax = std::numeric_limits::max(); + float min[3] = { fMax, fMax, fMax}; + float max[3] = {-fMax,-fMax,-fMax}; + inline void replaceMin(const float* const vec3) { + min[0] = std::min(min[0],vec3[0]); + min[1] = std::min(min[1],vec3[1]); + min[2] = std::min(min[2],vec3[2]); + } + inline void replaceMax(const float* const vec3) { + max[0] = std::max(max[0],vec3[0]); + max[1] = std::max(max[1],vec3[1]); + max[2] = std::max(max[2],vec3[2]); + } + friend AtomicOriginCalc; +}; + +struct AtomicOriginCalc { + inline void compareAndSwap(const pxr::GfVec3f& vec3) { + replaceMin(&vec3[0]); + replaceMax(&vec3[0]); + } + inline void compareAndSwap(const OriginCalc& other) { + replaceMin(other.min); + replaceMax(other.max); + } + inline pxr::GfVec3f calc() const { + return (pxr::GfVec3f{min[0].load(),min[1].load(),min[2].load()} + + pxr::GfVec3f{max[0].load(),max[1].load(),max[2].load()}) + / 2; + } + inline void reset() { + for(size_t i = 0; i < 3; ++i) { + min[i].store(fMax); + max[i].store(-fMax); + } + } +private: + static constexpr float fMax = std::numeric_limits::max(); + std::atomic min[3] = { fMax, fMax, fMax}; + std::atomic max[3] = {-fMax,-fMax,-fMax}; + inline void replaceMin(const float* const vec3) { + swapIfLess(min[0],vec3[0]); + swapIfLess(min[1],vec3[1]); + swapIfLess(min[2],vec3[2]); + } + inline void replaceMax(const float* const vec3) { + swapIfGreater(max[0],vec3[0]); + swapIfGreater(max[1],vec3[1]); + swapIfGreater(max[2],vec3[2]); + } + static inline void swapIfLess(std::atomic& atomic, const float other) { + float val = fMax; + do { + val = atomic.load(); + } while(val > other && !atomic.compare_exchange_weak(val, other)); + } + static void swapIfGreater(std::atomic& atomic, const float other) { + float val = -fMax; + do { + val = atomic.load(); + } while(val < other && !atomic.compare_exchange_weak(val, other)); + } +}; + +} diff --git a/src/dxvk/rtx_render/rtx_geometry_utils.cpp b/src/dxvk/rtx_render/rtx_geometry_utils.cpp index 9b3a4ec91..a00c3b1ce 100644 --- a/src/dxvk/rtx_render/rtx_geometry_utils.cpp +++ b/src/dxvk/rtx_render/rtx_geometry_utils.cpp @@ -245,8 +245,8 @@ namespace dxvk { for (uint32_t idx = 0; idx < params.numVertices; idx++) { skinning(idx, &dstPosition[0], &dstNormal[0], srcPosition, srcBlendWeight, srcBlendIndices, srcNormal, params); - ctx->updateBuffer(geo.positionBuffer.buffer(), geo.positionBuffer.offsetFromSlice() + idx * geo.positionBuffer.stride(), sizeof(dstPosition), &dstPosition[0], true); - ctx->updateBuffer(geo.normalBuffer.buffer(), geo.normalBuffer.offsetFromSlice() + idx * geo.normalBuffer.stride(), sizeof(dstNormal), &dstNormal[0], true); + ctx->writeToBuffer(geo.positionBuffer.buffer(), geo.positionBuffer.offsetFromSlice() + idx * geo.positionBuffer.stride(), sizeof(dstPosition), &dstPosition[0], true); + ctx->writeToBuffer(geo.normalBuffer.buffer(), geo.normalBuffer.offsetFromSlice() + idx * geo.normalBuffer.stride(), sizeof(dstNormal), &dstNormal[0], true); } } } @@ -560,7 +560,7 @@ namespace dxvk { generateIndices(idx, dst, src, cb); } - ctx->updateBuffer(dstSlice.buffer(), 0, cb.primCount * 3 * sizeof(uint16_t), dst, true); + ctx->writeToBuffer(dstSlice.buffer(), 0, cb.primCount * 3 * sizeof(uint16_t), dst, true); } } @@ -733,7 +733,7 @@ namespace dxvk { interleaver::interleave(i, dst, inputData.positionData, inputData.normalData, inputData.texcoordData, inputData.vertexColorData, args); } - ctx->updateBuffer(output.buffer, 0, input.vertexCount * output.stride, dst, true); + ctx->writeToBuffer(output.buffer, 0, input.vertexCount * output.stride, dst, true); } uint32_t offset = 0; diff --git a/src/dxvk/rtx_render/rtx_imgui.cpp b/src/dxvk/rtx_render/rtx_imgui.cpp index 8968f68e4..9387c14a5 100644 --- a/src/dxvk/rtx_render/rtx_imgui.cpp +++ b/src/dxvk/rtx_render/rtx_imgui.cpp @@ -24,4 +24,137 @@ namespace ImGui { bool Checkbox(const char* label, dxvk::RtxOption* rtxOption) { return IMGUI_ADD_TOOLTIP(Checkbox(label, &rtxOption->getValue()), rtxOption->getDescription()); } + + static bool Items_PairGetter(void* data, int idx, const char** out_text, const char** out_tooltip) { + std::pair* items = reinterpret_cast*>(data); + if (out_text) { + *out_text = items[idx].first; + } + if (out_tooltip) { + *out_tooltip = items[idx].second; + } + return true; + } + + // Copied from imgui_widgets.cpp + static float CalcMaxPopupHeightFromItemCount(int items_count) { + ImGuiContext& g = *GImGui; + if (items_count <= 0) + return FLT_MAX; + return (g.FontSize + g.Style.ItemSpacing.y) * items_count - g.Style.ItemSpacing.y + (g.Style.WindowPadding.y * 2); + } + + bool ImGui::Combo(const char* label, int* current_item, const std::pair items[], int items_count, int height_in_items) { + const bool value_changed = Combo(label, current_item, Items_PairGetter, (void*) items, items_count, height_in_items); + return value_changed; + } + + // Old API, prefer using BeginCombo() nowadays if you can. + bool ImGui::Combo(const char* label, int* current_item, bool (*items_getter)(void*, int, const char**, const char**), void* data, int items_count, int popup_max_height_in_items) { + ImGuiContext& g = *GImGui; + + // Call the getter to obtain the preview string which is a parameter to BeginCombo() + const char* preview_value = NULL; + if (*current_item >= 0 && *current_item < items_count) { + items_getter(data, *current_item, &preview_value, nullptr); + } + + // The old Combo() API exposed "popup_max_height_in_items". The new more general BeginCombo() API doesn't have/need it, but we emulate it here. + if (popup_max_height_in_items != -1 && !(g.NextWindowData.Flags & ImGuiNextWindowDataFlags_HasSizeConstraint)) { + SetNextWindowSizeConstraints(ImVec2(0, 0), ImVec2(FLT_MAX, CalcMaxPopupHeightFromItemCount(popup_max_height_in_items))); + } + + if (!BeginCombo(label, preview_value, ImGuiComboFlags_None)) { + return false; + } + + // Display items + // FIXME-OPT: Use clipper (but we need to disable it on the appearing frame to make sure our call to SetItemDefaultFocus() is processed) + bool value_changed = false; + for (int i = 0; i < items_count; i++) { + PushID(i); + const bool item_selected = (i == *current_item); + const char* item_text; + const char* item_tooltip; + if (!items_getter(data, i, &item_text, &item_tooltip)) { + item_text = "*Unknown item*"; + } + if (Selectable(item_text, item_selected)) { + value_changed = true; + *current_item = i; + } + if (item_selected) { + SetItemDefaultFocus(); + } + if (item_tooltip && item_tooltip[0] != '\0' && ImGui::IsItemHovered()) { + SetTooltipUnformatted(item_tooltip); + } + PopID(); + } + + EndCombo(); + + if (value_changed) { + MarkItemEdited(g.LastItemData.ID); + } + + return value_changed; + } + + bool ImGui::ListBox(const char* label, int* current_item, const std::pair items[], int items_count, int height_items) { + const bool value_changed = ListBox(label, current_item, Items_PairGetter, (void*) items, items_count, height_items); + return value_changed; + } + + // This is merely a helper around BeginListBox(), EndListBox(). + // Considering using those directly to submit custom data or store selection differently. + bool ImGui::ListBox(const char* label, int* current_item, bool (*items_getter)(void*, int, const char**, const char**), void* data, int items_count, int height_in_items) { + ImGuiContext& g = *GImGui; + + // Calculate size from "height_in_items" + if (height_in_items < 0) { + height_in_items = ImMin(items_count, 7); + } + float height_in_items_f = height_in_items + 0.25f; + ImVec2 size(0.0f, ImFloor(GetTextLineHeightWithSpacing() * height_in_items_f + g.Style.FramePadding.y * 2.0f)); + + if (!BeginListBox(label, size)) { + return false; + } + + // Assume all items have even height (= 1 line of text). If you need items of different height, + // you can create a custom version of ListBox() in your code without using the clipper. + bool value_changed = false; + ImGuiListClipper clipper; + clipper.Begin(items_count, GetTextLineHeightWithSpacing()); // We know exactly our line height here so we pass it as a minor optimization, but generally you don't need to. + while (clipper.Step()) + for (int i = clipper.DisplayStart; i < clipper.DisplayEnd; i++) { + const char* item_text; + const char* item_tooltip; + if (!items_getter(data, i, &item_text, &item_tooltip)) { + item_text = "*Unknown item*"; + } + + PushID(i); + const bool item_selected = (i == *current_item); + if (Selectable(item_text, item_selected)) { + *current_item = i; + value_changed = true; + } + if (item_selected) { + SetItemDefaultFocus(); + } + if (item_tooltip && item_tooltip[0] != '\0' && ImGui::IsItemHovered()) { + SetTooltipUnformatted(item_tooltip); + } + PopID(); + } + EndListBox(); + + if (value_changed) { + MarkItemEdited(g.LastItemData.ID); + } + + return value_changed; + } } diff --git a/src/dxvk/rtx_render/rtx_imgui.h b/src/dxvk/rtx_render/rtx_imgui.h index 93d1f63a2..b5a34b7e4 100644 --- a/src/dxvk/rtx_render/rtx_imgui.h +++ b/src/dxvk/rtx_render/rtx_imgui.h @@ -41,6 +41,11 @@ namespace ImGui { #define IMGUI_ADD_TOOLTIP(imguiCommand, tooltip) ImGui::addTooltipAndPassthroughValue((imguiCommand), tooltip) IMGUI_API bool Checkbox(const char* label, dxvk::RtxOption* rtxOption); + IMGUI_API bool Combo(const char* label, int* current_item, const std::pair items[], int items_count, int popup_max_height_in_items = -1); + IMGUI_API bool Combo(const char* label, int* current_item, bool(*items_getter)(void* data, int idx, const char** out_text, const char** out_tooltip), void* data, int items_count, int popup_max_height_in_items = -1); + IMGUI_API bool ListBox(const char* label, int* current_item, const std::pair items[], int items_count, int height_in_items = -1); + IMGUI_API bool ListBox(const char* label, int* current_item, bool (*items_getter)(void* data, int idx, const char** out_text, const char** out_tooltip), void* data, int items_count, int height_in_items = -1); + // Variant handling RtxOption as input template @@ -225,19 +230,24 @@ namespace ImGui { IMGUI_API bool SliderFloat4(const char* label, dxvk::RtxOption* rtxOption, Args&& ... args) { return IMGUI_ADD_TOOLTIP(SliderFloat4(label, rtxOption->getValue().data, std::forward(args)...), rtxOption->getDescription()); } - + // Combo Box with unique key per combo entry // The combo entries are displayed in the order they appear in ComboEntries template class ComboWithKey { public: - using ComboEntries = std::vector>; + struct ComboEntry { + T key; + const char* name = nullptr; + const char* tooltip = nullptr; + }; + using ComboEntries = std::vector; ComboWithKey(const char* widgetName, ComboEntries&& comboEntries) : m_comboEntries { std::move(comboEntries) } , m_widgetName { widgetName } { for (int i = 0; i < m_comboEntries.size(); i++) { - T key = m_comboEntries[i].first; + T key = m_comboEntries[i].key; assert(m_keyToComboIdx.find(key) == m_keyToComboIdx.end() && "Duplicate key found"); m_keyToComboIdx[key] = i; } @@ -258,7 +268,7 @@ namespace ImGui { bool isChanged = Combo(m_widgetName, &comboIdx, getString, static_cast(&m_comboEntries), static_cast(m_comboEntries.size())); - *key = m_comboEntries[comboIdx].first; + *key = m_comboEntries[comboIdx].key; return isChanged; } @@ -270,13 +280,18 @@ namespace ImGui { } private: - static bool getString(void* data, int entryIdx, const char** out_text) { + static bool getString(void* data, int entryIdx, const char** out_text, const char** out_tooltip) { const ComboEntries& v = *reinterpret_cast(data); if (entryIdx >= v.size()) return false; - *out_text = v[entryIdx].second; + if (out_text) { + *out_text = v[entryIdx].name; + } + if (out_tooltip) { + *out_tooltip = v[entryIdx].tooltip; + } return true; } diff --git a/src/dxvk/rtx_render/rtx_instance_manager.cpp b/src/dxvk/rtx_render/rtx_instance_manager.cpp index 61c9defb2..26f685cbb 100644 --- a/src/dxvk/rtx_render/rtx_instance_manager.cpp +++ b/src/dxvk/rtx_render/rtx_instance_manager.cpp @@ -898,6 +898,7 @@ namespace dxvk { currentInstance.surface.spriteSheetRows = spriteSheetRows; currentInstance.surface.spriteSheetCols = spriteSheetCols; currentInstance.surface.spriteSheetFPS = spriteSheetFPS; + currentInstance.surface.objectPickingValue = drawCall.drawCallID; // For worldspace UI, we want to show the UI (unlit) in the world. So configure the blend mode if blending is used accordingly. if (currentInstance.m_isWorldSpaceUI) { diff --git a/src/dxvk/rtx_render/rtx_light_manager.cpp b/src/dxvk/rtx_render/rtx_light_manager.cpp index f15f951e4..d0942942f 100644 --- a/src/dxvk/rtx_render/rtx_light_manager.cpp +++ b/src/dxvk/rtx_render/rtx_light_manager.cpp @@ -324,6 +324,13 @@ namespace dxvk { m_linearizedLights.emplace_back(&light); } + for (auto& handle : m_externalActiveLightList) { + auto& found = m_externalLights.find(handle); + if (found != m_externalLights.end()) { + m_linearizedLights.emplace_back(&found->second); + } + } + // Count the active light of each type m_lightTypeRanges.fill(LightRange {}); @@ -366,6 +373,7 @@ namespace dxvk { // is not an issue and the buffers are allowed to keep whatever capacity they have allocated between calls for the sake of performance. m_lightsGPUData.resize(lightsGPUSize); + memset(m_lightsGPUData.data(), 0xff, sizeof(char)* m_lightsGPUData.size()); m_lightMappingData.resize(lightMappingBufferEntries); // Clear all slots to new light @@ -425,16 +433,36 @@ namespace dxvk { } if (!m_lightsGPUData.empty()) { - ctx->updateBuffer(m_lightBuffer, 0, m_lightsGPUData.size(), m_lightsGPUData.data()); + ctx->writeToBuffer(m_lightBuffer, 0, m_lightsGPUData.size(), m_lightsGPUData.data()); } if (!m_lightMappingData.empty()) { - ctx->updateBuffer(m_lightMappingBuffer, 0, m_lightMappingData.size() * sizeof(uint16_t), m_lightMappingData.data()); + ctx->writeToBuffer(m_lightMappingBuffer, 0, m_lightMappingData.size() * sizeof(uint16_t), m_lightMappingData.data()); } // If there are no lights with >0 intensity, then clear the list... if (m_currentActiveLightCount == 0) clear(); + + // Generate a GPU dome light if necessary + DomeLight activeDomeLight; + if (getActiveDomeLight(activeDomeLight)) { + // Ensures a texture stays in VidMem + SceneManager& sceneManager = device()->getCommon()->getSceneManager(); + sceneManager.trackTexture(ctx, activeDomeLight.texture, m_gpuDomeLightArgs.textureIndex, true, false); + + m_gpuDomeLightArgs.active = true; + m_gpuDomeLightArgs.radiance = activeDomeLight.radiance; + m_gpuDomeLightArgs.worldToLightTransform = activeDomeLight.worldToLight; + } else { + m_gpuDomeLightArgs.active = false; + m_gpuDomeLightArgs.radiance = Vector3(0.0f); + m_gpuDomeLightArgs.textureIndex = BINDING_INDEX_INVALID; + } + + // Reset external active light list. + m_externalActiveDomeLight = nullptr; + m_externalActiveLightList.clear(); } float LightManager::isSimilar(const RtLight& a, const RtLight& b, float distanceThreshold) { @@ -604,6 +632,58 @@ namespace dxvk { } } + void LightManager::addExternalLight(remixapi_LightHandle handle, const RtLight& rtlight) { + auto found = m_externalLights.find(handle); + if (found != m_externalLights.end()) { + // TODO: warn the user about id collision, + // or just overwriting existing one is fine? + found->second = rtlight; + } else { + m_externalLights.emplace(handle, rtlight); + } + } + + void LightManager::removeExternalLight(remixapi_LightHandle handle) { + m_externalLights.erase(handle); + m_externalDomeLights.erase(handle); + } + + bool LightManager::getActiveDomeLight(DomeLight& domeLightOut) { + if (m_externalDomeLights.size() == 0 || m_externalActiveDomeLight == nullptr) { + return false; + } + + auto found = m_externalDomeLights.find(m_externalActiveDomeLight); + if (found == m_externalDomeLights.end()) { + // Invalid active dome light, reset it + m_externalActiveDomeLight = nullptr; + return false; + } + + domeLightOut = found->second; + + return true; + } + + void LightManager::addExternalDomeLight(remixapi_LightHandle handle, const DomeLight& domeLight) { + auto found = m_externalDomeLights.find(handle); + if (found != m_externalDomeLights.end()) { + // TODO: warn the user about id collision, + // or just overwriting existing one is fine? + found->second = domeLight; + } else { + m_externalDomeLights.emplace(handle, domeLight); + } + } + + void LightManager::addExternalLightInstance(remixapi_LightHandle enabledLight) { + if (m_externalLights.find(enabledLight) != m_externalLights.end()) { + m_externalActiveLightList.insert(enabledLight); + } else if (m_externalDomeLights.find(enabledLight) != m_externalDomeLights.end() && m_externalActiveDomeLight == nullptr) { + m_externalActiveDomeLight = enabledLight; + } + } + void LightManager::setRaytraceArgs(RaytraceArgs& raytraceArgs, uint32_t rtxdiInitialLightSamples, uint32_t volumeRISInitialLightSamples, uint32_t risLightSamples) const { // The algorithm below performs two tasks: diff --git a/src/dxvk/rtx_render/rtx_light_manager.h b/src/dxvk/rtx_render/rtx_light_manager.h index 29ac26f58..1bd6ecf22 100644 --- a/src/dxvk/rtx_render/rtx_light_manager.h +++ b/src/dxvk/rtx_render/rtx_light_manager.h @@ -31,6 +31,10 @@ #include "rtx_lights.h" #include "rtx_camera_manager.h" #include "rtx_common_object.h" +#include "rtx/pass/common_binding_indices.h" +#include "rtx/pass/raytrace_args.h" + +using remixapi_LightHandle = struct remixapi_LightHandle_T*; struct RaytraceArgs; @@ -71,6 +75,7 @@ struct LightManager : public CommonDeviceObject { const Rc getPreviousLightBuffer() const { return m_previousLightBuffer.ptr() ? m_previousLightBuffer : m_lightBuffer; } const Rc getLightMappingBuffer() const { return m_lightMappingBuffer; } const uint32_t getActiveCount() const { return m_currentActiveLightCount; } + const DomeLightArgs& getDomeLightArgs() const { return m_gpuDomeLightArgs; } void clear(); @@ -84,15 +89,27 @@ struct LightManager : public CommonDeviceObject { void addLight(const RtLight& light, const RtLightAntiCullingType antiCullingType); void addLight(const RtLight& light, const DrawCallState& drawCallState, const RtLightAntiCullingType antiCullingType); + void addExternalLight(remixapi_LightHandle handle, const RtLight& rtlight); + void addExternalDomeLight(remixapi_LightHandle handle, const DomeLight& domeLight); + void removeExternalLight(remixapi_LightHandle handle); + void addExternalLightInstance(remixapi_LightHandle enabledLight); + void setRaytraceArgs(RaytraceArgs& raytraceArgs, uint32_t rtxdiInitialLightSamples, uint32_t volumeRISInitialLightSamples, uint32_t risLightSamples) const; uint getLightCount(uint type); + private: std::unordered_map m_lights; // Note: A fallback light tracked seperately and handled specially to not be mixed up with // lights provided from the application. std::optional m_fallbackLight{}; + std::unordered_map m_externalLights; + std::unordered_map m_externalDomeLights; + std::unordered_set m_externalActiveLightList; + remixapi_LightHandle m_externalActiveDomeLight = nullptr; + DomeLightArgs m_gpuDomeLightArgs; + Rc m_lightBuffer; Rc m_previousLightBuffer; Rc m_lightMappingBuffer; @@ -107,6 +124,8 @@ struct LightManager : public CommonDeviceObject { std::vector m_lightsGPUData{}; std::vector m_lightMappingData{}; + bool getActiveDomeLight(DomeLight& lightOut); + void garbageCollectionInternal(); // Similarity check. diff --git a/src/dxvk/rtx_render/rtx_lights.h b/src/dxvk/rtx_render/rtx_lights.h index fe6ec909b..cefd579ca 100644 --- a/src/dxvk/rtx_render/rtx_lights.h +++ b/src/dxvk/rtx_render/rtx_lights.h @@ -327,6 +327,12 @@ struct RtDistantLight { XXH64_hash_t m_cachedHash; }; +struct DomeLight { + Vector3 radiance; + TextureRef texture; + Matrix4 worldToLight; +}; + struct RtLight { RtLight(); diff --git a/src/dxvk/rtx_render/rtx_lights_data.cpp b/src/dxvk/rtx_render/rtx_lights_data.cpp index a26f206eb..c83e091e0 100644 --- a/src/dxvk/rtx_render/rtx_lights_data.cpp +++ b/src/dxvk/rtx_render/rtx_lights_data.cpp @@ -56,6 +56,7 @@ pxr::VtValue val; \ getLightAttribute(prim, pxr::TfToken(#usd_attr), pxr::TfToken("inputs:"#usd_attr)).Get(&val); \ if(!val.IsEmpty()) { \ + static_assert(uint32_t(DirtyFlags::k_##name) < 32); \ m_dirty.set(DirtyFlags::k_##name); \ m_##name = val.UncheckedGet(); \ } \ diff --git a/src/dxvk/rtx_render/rtx_material_data.h b/src/dxvk/rtx_render/rtx_material_data.h index d5b127a93..18b575a3b 100644 --- a/src/dxvk/rtx_render/rtx_material_data.h +++ b/src/dxvk/rtx_render/rtx_material_data.h @@ -21,6 +21,8 @@ */ #pragma once +#include "../../lssusd/mdl_helpers.h" + #include "../../lssusd/usd_include_begin.h" #include #include @@ -31,47 +33,55 @@ // clang-format off #define LIST_OPAQUE_MATERIAL_TEXTURES(X) \ - /*Parameter Name, USD Token String, Type, UNUSED... Default Value */ \ - X(AlbedoOpacityTexture, diffuse_texture, TextureRef, void, void, {}) \ - X(NormalTexture, normalmap_texture, TextureRef, void, void, {}) \ - X(TangentTexture, tangent_texture, TextureRef, void, void, {}) \ - X(HeightTexture, height_texture, TextureRef, void, void, {}) \ - X(RoughnessTexture, reflectionroughness_texture, TextureRef, void, void, {}) \ - X(MetallicTexture, metallic_texture, TextureRef, void, void, {}) \ - X(EmissiveColorTexture, emissive_mask_texture, TextureRef, void, void, {}) + /*Parameter Name, USD Token String, Type, UNUSED... Default Value */ \ + X(AlbedoOpacityTexture, diffuse_texture, TextureRef, void, void, {}) \ + X(NormalTexture, normalmap_texture, TextureRef, void, void, {}) \ + X(TangentTexture, tangent_texture, TextureRef, void, void, {}) \ + X(HeightTexture, height_texture, TextureRef, void, void, {}) \ + X(RoughnessTexture, reflectionroughness_texture, TextureRef, void, void, {}) \ + X(MetallicTexture, metallic_texture, TextureRef, void, void, {}) \ + X(EmissiveColorTexture, emissive_mask_texture, TextureRef, void, void, {}) \ + X(SubsurfaceTransmittanceTexture, subsurface_transmittance_texture, TextureRef, void, void, {}) \ + X(SubsurfaceThicknessTexture, subsurface_thickness_texture, TextureRef, void, void, {}) \ + X(SubsurfaceSingleScatteringAlbedoTexture, subsurface_single_scattering_texture, TextureRef, void, void, {}) + #define LIST_OPAQUE_MATERIAL_CONSTANTS(X) \ - /*Parameter Name, USD Token String, Type, Min Value, Max Value, Default Value */ \ - X(AnisotropyConstant, anisotropy, float, 0.f, 1.f, 0.f) \ + /*Parameter Name, USD Token String, Type, Min Value, Max Value, Default Value */ \ + X(AnisotropyConstant, anisotropy, float, 0.f, 1.f, 0.f) \ /* Note: Maximum clamped to float 16 max due to GPU encoding requirements. */ \ - X(EmissiveIntensity, emissive_intensity, float, 0.f, 65504.0f, 40.f) \ - X(AlbedoConstant, diffuse_color_constant, Vector3, Vector3(0.f), Vector3(1.f), Vector3(0.2f, 0.2f, 0.2f)) \ - X(OpacityConstant, opacity_constant, float, 0.f, 1.f, 1.f) \ - X(RoughnessConstant, reflection_roughness_constant, float, 0.f, 1.f, .5f) \ - X(MetallicConstant, metallic_constant, float, 0.f, 1.f, 0.f) \ - X(EmissiveColorConstant, emissive_color_constant, Vector3, Vector3(0.f), Vector3(1.f), Vector3(1.0f, 0.1f, 0.1f)) \ - X(EnableEmission, enable_emission, bool, false, true, false) \ - X(SpriteSheetRows, sprite_sheet_rows, uint8_t, 0, 255, 0) \ - X(SpriteSheetCols, sprite_sheet_cols, uint8_t, 0, 255, 0) \ - X(SpriteSheetFPS, sprite_sheet_fps, uint8_t, 0, 255, 0) \ - X(EnableThinFilm, enable_thin_film, bool, false, true, false) \ - X(AlphaIsThinFilmThickness, thin_film_thickness_from_albedo_alpha, bool, false, true, false) \ + X(EmissiveIntensity, emissive_intensity, float, 0.f, 65504.0f, 40.f) \ + X(AlbedoConstant, diffuse_color_constant, Vector3, Vector3(0.f), Vector3(1.f), Vector3(0.2f, 0.2f, 0.2f)) \ + X(OpacityConstant, opacity_constant, float, 0.f, 1.f, 1.f) \ + X(RoughnessConstant, reflection_roughness_constant, float, 0.f, 1.f, .5f) \ + X(MetallicConstant, metallic_constant, float, 0.f, 1.f, 0.f) \ + X(EmissiveColorConstant, emissive_color_constant, Vector3, Vector3(0.f), Vector3(1.f), Vector3(1.0f, 0.1f, 0.1f)) \ + X(EnableEmission, enable_emission, bool, false, true, false) \ + X(SpriteSheetRows, sprite_sheet_rows, uint8_t, 0, 255, 0) \ + X(SpriteSheetCols, sprite_sheet_cols, uint8_t, 0, 255, 0) \ + X(SpriteSheetFPS, sprite_sheet_fps, uint8_t, 0, 255, 0) \ + X(EnableThinFilm, enable_thin_film, bool, false, true, false) \ + X(AlphaIsThinFilmThickness, thin_film_thickness_from_albedo_alpha, bool, false, true, false) \ /* Note: Thickness cannot be 0 so should be kept above this minimum small value (though in practice it'll likely be */ \ /* quantized to 0 with values this small anyways, but it's good to be careful about it for potential future changes). */ \ /* Note: Max thickness constant be less than the float 16 max due to float 16 usage on the GPU. */ \ - X(ThinFilmThicknessConstant, thin_film_thickness_constant, float, .001f, OPAQUE_SURFACE_MATERIAL_THIN_FILM_MAX_THICKNESS, 200.f) \ - X(UseLegacyAlphaState, use_legacy_alpha_state, bool, false, true, true) \ - X(BlendEnabled, blend_enabled, bool, false, true, false) \ - X(BlendType, blend_type, BlendType, BlendType::kMinValue, BlendType::kMaxValue, BlendType::kAlpha) \ - X(InvertedBlend, inverted_blend, bool, false, true, false) \ - X(AlphaTestType, alpha_test_type, AlphaTestType, AlphaTestType::kMinValue, AlphaTestType::kMaxValue, AlphaTestType::kAlways) \ - X(AlphaTestReferenceValue, alpha_test_reference_value, uint8_t, 0, 255, 0) \ + X(ThinFilmThicknessConstant, thin_film_thickness_constant, float, .001f, OPAQUE_SURFACE_MATERIAL_THIN_FILM_MAX_THICKNESS, 200.f) \ + X(UseLegacyAlphaState, use_legacy_alpha_state, bool, false, true, true) \ + X(BlendEnabled, blend_enabled, bool, false, true, false) \ + X(BlendType, blend_type, BlendType, BlendType::kMinValue, BlendType::kMaxValue, BlendType::kAlpha) \ + X(InvertedBlend, inverted_blend, bool, false, true, false) \ + X(AlphaTestType, alpha_test_type, AlphaTestType, AlphaTestType::kMinValue, AlphaTestType::kMaxValue, AlphaTestType::kAlways) \ + X(AlphaTestReferenceValue, alpha_test_reference_value, uint8_t, 0, 255, 0) \ /* Note: Maximum clamped to float 16 max due to GPU encoding requirements. */ \ - X(DisplaceIn, displace_in, float, 0.f, 65504.0f, 0.f) \ - X(SubsurfaceTransmittanceColor, subsurface_transmittance_color, Vector3, Vector3(0.f), Vector3(1.f), Vector3(0.5f, 0.5f, 0.5f)) \ - X(SubsurfaceMeasurementDistance, subsurface_measurement_distance, float, 0.f, 65504.0f, 0.f) \ - X(SubsurfaceSingleScatteringAlbedo, subsurface_single_scattering_albedo, Vector3, Vector3(0.f), Vector3(1.f), Vector3(0.5f, 0.5f, 0.5f)) \ - X(SubsurfaceVolumetricAnisotropy, subsurface_volumetric_anisotropy, float, -1.f, 1.f, 0.f) + X(DisplaceIn, displace_in, float, 0.f, 65504.0f, 0.f) \ + X(SubsurfaceTransmittanceColor, subsurface_transmittance_color, Vector3, Vector3(0.f), Vector3(1.f), Vector3(0.5f, 0.5f, 0.5f)) \ + X(SubsurfaceMeasurementDistance, subsurface_measurement_distance, float, 0.f, 65504.0f, 0.f) \ + X(SubsurfaceSingleScatteringAlbedo, subsurface_single_scattering_albedo, Vector3, Vector3(0.f), Vector3(1.f), Vector3(0.5f, 0.5f, 0.5f)) \ + X(SubsurfaceVolumetricAnisotropy, subsurface_volumetric_anisotropy, float, -1.f, 1.f, 0.f) \ + /* Sampler State */ \ + X(FilterMode, filter_mode, uint8_t, lss::Mdl::Filter::Nearest, lss::Mdl::Filter::Linear, lss::Mdl::Filter::Nearest) \ + X(WrapModeU, wrap_mode_u, uint8_t, lss::Mdl::WrapMode::Clamp, lss::Mdl::WrapMode::Clip, lss::Mdl::WrapMode::Repeat) \ + X(WrapModeV, wrap_mode_v, uint8_t, lss::Mdl::WrapMode::Clamp, lss::Mdl::WrapMode::Clip, lss::Mdl::WrapMode::Repeat) #define LIST_OPAQUE_MATERIAL_PARAMS(X)\ LIST_OPAQUE_MATERIAL_TEXTURES(X) \ @@ -88,23 +98,27 @@ /* Note: IoR values less than 1 are physically impossible for typical translucent materials. */ \ /* Note: 3 chosen due to virtually no physical materials having an IoR greater to this, and because this */ \ /* is currently the maximum IoR value the GPU supports encoding of as well. */ \ - /*Parameter Name, USD Token String, Type, Min Value, Max Value, Default Value */ \ - X(RefractiveIndex, ior_constant, float, 1.f, 3.f, 1.3f) \ - X(TransmittanceColor, transmittance_color, Vector3, Vector3(0.f), Vector3(1.f), Vector3(0.97f, 0.97f, 0.97f)) \ - X(TransmittanceMeasurementDistance, transmittance_measurement_distance, float, .001f, 65504.0f, 1.f) \ - X(EnableEmission, enable_emission, bool, false, true, false) \ + /*Parameter Name, USD Token String, Type, Min Value, Max Value, Default Value */ \ + X(RefractiveIndex, ior_constant, float, 1.f, 3.f, 1.3f) \ + X(TransmittanceColor, transmittance_color, Vector3, Vector3(0.f), Vector3(1.f), Vector3(0.97f, 0.97f, 0.97f)) \ + X(TransmittanceMeasurementDistance, transmittance_measurement_distance, float, .001f, 65504.0f, 1.f) \ + X(EnableEmission, enable_emission, bool, false, true, false) \ /* Note: Maximum clamped to float 16 max due to GPU encoding requirements. */ \ - X(EmissiveIntensity, emissive_intensity, float, 0.f, 65504.0f, 40.f) \ - X(EmissiveColorConstant, emissive_color_constant, Vector3, Vector3(0.f), Vector3(1.f), Vector3(1.0f, 0.1f, 0.1f)) \ - X(SpriteSheetRows, sprite_sheet_rows, uint8_t, 0, 255, 0) \ - X(SpriteSheetCols, sprite_sheet_cols, uint8_t, 0, 255, 0) \ - X(SpriteSheetFPS, sprite_sheet_fps, uint8_t, 0, 255, 0) \ - X(EnableThinWalled, thin_walled, bool, false, true, false) \ + X(EmissiveIntensity, emissive_intensity, float, 0.f, 65504.0f, 40.f) \ + X(EmissiveColorConstant, emissive_color_constant, Vector3, Vector3(0.f), Vector3(1.f), Vector3(1.0f, 0.1f, 0.1f)) \ + X(SpriteSheetRows, sprite_sheet_rows, uint8_t, 0, 255, 0) \ + X(SpriteSheetCols, sprite_sheet_cols, uint8_t, 0, 255, 0) \ + X(SpriteSheetFPS, sprite_sheet_fps, uint8_t, 0, 255, 0) \ + X(EnableThinWalled, thin_walled, bool, false, true, false) \ /* Note: 0.001 to be safe around the minimum of float16 values, as well as due to the fact that we cut off */ \ /* 2 bits of the value in some cases. */ \ /* Note: Maximum clamped to float 16 max due to GPU encoding requirements. */ \ - X(ThinWallThickness, thin_wall_thickness, float, .001f, 65504.0f, .001f) \ - X(EnableDiffuseLayer, use_diffuse_layer, bool, false, true, false) + X(ThinWallThickness, thin_wall_thickness, float, .001f, 65504.0f, .001f) \ + X(EnableDiffuseLayer, use_diffuse_layer, bool, false, true, false) \ + /* Sampler State */ \ + X(FilterMode, filter_mode, uint8_t, lss::Mdl::Filter::Nearest, lss::Mdl::Filter::Linear, lss::Mdl::Filter::Nearest) \ + X(WrapModeU, wrap_mode_u, uint8_t, lss::Mdl::WrapMode::Clamp, lss::Mdl::WrapMode::Clip, lss::Mdl::WrapMode::Repeat) \ + X(WrapModeV, wrap_mode_v, uint8_t, lss::Mdl::WrapMode::Clamp, lss::Mdl::WrapMode::Clip, lss::Mdl::WrapMode::Repeat) #define LIST_TRANSLUCENT_MATERIAL_PARAMS(X)\ LIST_TRANSLUCENT_MATERIAL_TEXTURES(X) \ @@ -117,14 +131,19 @@ X(MaskTexture2, unused_in_usd_so_dont, TextureRef, void, void, {}) #define LIST_PORTAL_MATERIAL_CONSTANTS(X) \ - /*Parameter Name, USD Token String, Type, Min Value, Max Value, Default Value */ \ - X(RayPortalIndex, portal_index, uint8_t, 0, 255, 0) \ - X(SpriteSheetRows, sprite_sheet_rows, uint8_t, 0, 255, 0) \ - X(SpriteSheetCols, sprite_sheet_cols, uint8_t, 0, 255, 0) \ - X(SpriteSheetFPS, sprite_sheet_fps, uint8_t, 0, 255, 0) \ - X(RotationSpeed, rotation_speed, float, 0.f, 65504.0f, 0.f) \ - X(EnableEmission, enable_emission, bool, false, true, false) \ - X(EmissiveIntensity, emissive_intensity, float, 0.f, 65504.0f, 40.f) \ + /*Parameter Name, USD Token String, Type, Min Value, Max Value, Default Value */ \ + X(RayPortalIndex, portal_index, uint8_t, 0, 255, 0) \ + X(SpriteSheetRows, sprite_sheet_rows, uint8_t, 0, 255, 0) \ + X(SpriteSheetCols, sprite_sheet_cols, uint8_t, 0, 255, 0) \ + X(SpriteSheetFPS, sprite_sheet_fps, uint8_t, 0, 255, 0) \ + X(RotationSpeed, rotation_speed, float, 0.f, 65504.0f, 0.f) \ + X(EnableEmission, enable_emission, bool, false, true, false) \ + X(EmissiveIntensity, emissive_intensity, float, 0.f, 65504.0f, 40.f) \ + /* Sampler State */ \ + X(FilterMode, filter_mode, uint8_t, lss::Mdl::Filter::Nearest, lss::Mdl::Filter::Linear, lss::Mdl::Filter::Nearest) \ + X(WrapModeU, wrap_mode_u, uint8_t, lss::Mdl::WrapMode::Clamp, lss::Mdl::WrapMode::Clip, lss::Mdl::WrapMode::Repeat) \ + X(WrapModeV, wrap_mode_v, uint8_t, lss::Mdl::WrapMode::Clamp, lss::Mdl::WrapMode::Clip, lss::Mdl::WrapMode::Repeat) + #define LIST_PORTAL_MATERIAL_PARAMS(X)\ LIST_PORTAL_MATERIAL_TEXTURES(X) \ @@ -138,6 +157,7 @@ m_##name{name}, #define WRITE_CONSTANT_MEMBER_FUNC(name, usd_attr, type, minVal, maxVal, defaultVal) \ + type& get##name() { return m_##name; } \ const type& get##name() const { return m_##name; } \ static pxr::TfToken get##name##Token() { return pxr::TfToken("inputs:"#usd_attr); } @@ -148,6 +168,7 @@ #define WRITE_CONSTANT_DESERIALIZER(name, usd_attr, type, minVal, maxVal, defaultVal) \ if(shader.HasAttribute(get##name##Token())) { \ + static_assert(uint64_t(DirtyFlags::k_##name) < 64); \ target.m_dirty.set(DirtyFlags::k_##name); \ pxr::VtValue val; \ shader.GetAttribute(get##name##Token()).Get(&val); \ @@ -157,6 +178,7 @@ #define WRITE_TEXTURE_DESERIALIZER(name, usd_attr, type, minVal, maxVal, defaultVal) \ if(shader.HasAttribute(get##name##Token())) { \ + static_assert(uint64_t(DirtyFlags::k_##name) < 64); \ target.m_dirty.set(DirtyFlags::k_##name); \ target.m_##name = TextureRef(getTexture(shader, get##name##Token())); \ } @@ -193,7 +215,7 @@ struct name##Data { /* Instantiates a material, must explicitly set all parameters */ \ name##Data( \ X_PARAMS(WRITE_CTOR_ARGS) \ - uint32_t dirtyFlags = 0 \ + uint64_t dirtyFlags = 0 \ ) \ : X_PARAMS(WRITE_CTOR_INIT) \ m_dirty {dirtyFlags} \ @@ -248,7 +270,7 @@ private: \ X_PARAMS(WRITE_PARAMETER_MEMBERS) \ \ - enum class DirtyFlags : uint32_t { \ + enum class DirtyFlags : uint64_t { \ X_PARAMS(WRITE_DIRTY_FLAGS) \ }; \ \ diff --git a/src/dxvk/rtx_render/rtx_materials.h b/src/dxvk/rtx_render/rtx_materials.h index 79bc8db19..796d3e554 100644 --- a/src/dxvk/rtx_render/rtx_materials.h +++ b/src/dxvk/rtx_render/rtx_materials.h @@ -29,6 +29,7 @@ #include "../shaders/rtx/concept/surface/surface_shared.h" #include "../../dxso/dxso_util.h" #include "rtx_material_data.h" +#include "../../lssusd/mdl_helpers.h" namespace dxvk { // Surfaces @@ -110,7 +111,7 @@ struct RtSurface { writeGPUHelper(data, offset, packedHash); writeGPUHelper(data, offset, positionOffset); - writeGPUPadding<4>(data, offset); + writeGPUHelper(data, offset, objectPickingValue); writeGPUHelper(data, offset, normalOffset); writeGPUHelper(data, offset, texcoordOffset); writeGPUHelper(data, offset, color0Offset); @@ -320,6 +321,7 @@ struct RtSurface { uint8_t spriteSheetFPS = 0; XXH64_hash_t associatedGeometryHash; // NOTE: This is used for the debug view + uint32_t objectPickingValue = 0; // NOTE: a value to fill GBUFFER_BINDING_PRIMARY_OBJECT_PICKING_OUTPUT }; // Shared Material Defaults/Limits @@ -921,13 +923,21 @@ struct RtRayPortalSurfaceMaterial { // Subsurface Material struct RtSubsurfaceMaterial { RtSubsurfaceMaterial( + const uint32_t subsurfaceTransmittanceTextureIndex, + const uint32_t subsurfaceThicknessTextureIndex, + const uint32_t subsurfaceSingleScatteringAlbedoTextureIndex, const Vector3& subsurfaceTransmittanceColor, const float subsurfaceMeasurementDistance, const Vector3& subsurfaceSingleScatteringAlbedo, const float subsurfaceVolumetricAnisotropy) : + m_subsurfaceTransmittanceTextureIndex(subsurfaceTransmittanceTextureIndex), + m_subsurfaceThicknessTextureIndex(subsurfaceThicknessTextureIndex), + m_subsurfaceSingleScatteringAlbedoTextureIndex(subsurfaceSingleScatteringAlbedoTextureIndex), m_subsurfaceTransmittanceColor { subsurfaceTransmittanceColor }, m_subsurfaceMeasurementDistance { subsurfaceMeasurementDistance }, m_subsurfaceSingleScatteringAlbedo { subsurfaceSingleScatteringAlbedo }, m_subsurfaceVolumetricAnisotropy { subsurfaceVolumetricAnisotropy }, - m_subsurfaceVolumetricAttenuationCoefficient { Vector3(1.0f, 1.0f, 1.0f) / (subsurfaceTransmittanceColor * subsurfaceMeasurementDistance) } + m_subsurfaceVolumetricAttenuationCoefficient { Vector3(-std::log(subsurfaceTransmittanceColor.x), + -std::log(subsurfaceTransmittanceColor.y), + -std::log(subsurfaceTransmittanceColor.z)) / subsurfaceMeasurementDistance } { updateCachedHash(); } @@ -935,25 +945,57 @@ struct RtSubsurfaceMaterial { void writeGPUData(unsigned char* data, std::size_t& offset) const { [[maybe_unused]] const std::size_t oldOffset = offset; - // Bytes 0-5 + uint32_t flags = (1 << 31); // Set bit 31 to 1 for subsurface scattering type + + // Bytes 0-1 + if (m_subsurfaceTransmittanceTextureIndex != kSurfaceMaterialInvalidTextureIndex) { + writeGPUHelperExplicit<2>(data, offset, m_subsurfaceTransmittanceTextureIndex); + flags |= SUBSURFACE_MATERIAL_FLAG_HAS_TRANSMITTANCE_TEXTURE; + } else { + // Note: We currently have enough space in SSS material, so no need to compress transmittance from f16v3 to f8v3. + // But it's an option if we run out of space in the future. + + writeGPUPadding<2>(data, offset); // Note: Padding for unused space + } + + // Bytes 2-3 + if (m_subsurfaceThicknessTextureIndex != kSurfaceMaterialInvalidTextureIndex) { + writeGPUHelperExplicit<2>(data, offset, m_subsurfaceThicknessTextureIndex); + flags |= SUBSURFACE_MATERIAL_FLAG_HAS_THICKNESS_TEXTURE; + } else { + assert(m_subsurfaceMeasurementDistance <= FLOAT16_MAX); + writeGPUHelper(data, offset, glm::packHalf1x16(m_subsurfaceMeasurementDistance)); + } + + // Bytes 4-5 + if (m_subsurfaceSingleScatteringAlbedoTextureIndex != kSurfaceMaterialInvalidTextureIndex) { + writeGPUHelperExplicit<2>(data, offset, m_subsurfaceSingleScatteringAlbedoTextureIndex); + flags |= SUBSURFACE_MATERIAL_FLAG_HAS_SINGLE_SCATTERING_ALBEDO_TEXTURE; + } else { + // Note: We currently have enough space in SSS material, so no need to compress scattering-albedo from f16v3 to f8v3. + // But it's an option if we run out of space in the future. + + writeGPUPadding<2>(data, offset); // Note: Padding for unused space + } + + // Bytes 6-11 writeGPUHelper(data, offset, glm::packHalf1x16(m_subsurfaceVolumetricAttenuationCoefficient.x)); writeGPUHelper(data, offset, glm::packHalf1x16(m_subsurfaceVolumetricAttenuationCoefficient.y)); writeGPUHelper(data, offset, glm::packHalf1x16(m_subsurfaceVolumetricAttenuationCoefficient.z)); - // Bytes 6-7 - assert(m_subsurfaceMeasurementDistance <= FLOAT16_MAX); - writeGPUHelper(data, offset, glm::packHalf1x16(m_subsurfaceMeasurementDistance)); - - // Bytes 8-13 + // Bytes 12-17 writeGPUHelper(data, offset, glm::packHalf1x16(m_subsurfaceSingleScatteringAlbedo.x)); writeGPUHelper(data, offset, glm::packHalf1x16(m_subsurfaceSingleScatteringAlbedo.y)); writeGPUHelper(data, offset, glm::packHalf1x16(m_subsurfaceSingleScatteringAlbedo.z)); - // Bytes 14-15 + // Bytes 18-19 writeGPUHelper(data, offset, glm::packHalf1x16(m_subsurfaceVolumetricAnisotropy)); - // Padding 16-31 - writeGPUPadding<16>(data, offset); + // 8 Bytes padding (20-27) + writeGPUPadding<8>(data, offset); + + // Bytes 28-31 + writeGPUHelper(data, offset, flags); } bool operator==(const RtSubsurfaceMaterial& r) const { @@ -968,6 +1010,18 @@ struct RtSubsurfaceMaterial { return m_cachedHash; } + uint32_t getSubsurfaceTransmittanceTextureIndex() const { + return m_subsurfaceTransmittanceTextureIndex; + } + + uint32_t getSubsurfaceThicknessTextureIndex() const { + return m_subsurfaceThicknessTextureIndex; + } + + uint32_t getSubsurfaceSingleScatteringAlbedoTextureIndex() const { + return m_subsurfaceSingleScatteringAlbedoTextureIndex; + } + float getSubsurfaceMeasurementDistance() const { return m_subsurfaceMeasurementDistance; } @@ -985,17 +1039,39 @@ struct RtSubsurfaceMaterial { } private: - void updateCachedHash() { - XXH64_hash_t h = 0; - - h = XXH64(&m_subsurfaceTransmittanceColor, sizeof(m_subsurfaceTransmittanceColor), h); - h = XXH64(&m_subsurfaceMeasurementDistance, sizeof(m_subsurfaceMeasurementDistance), h); - h = XXH64(&m_subsurfaceSingleScatteringAlbedo, sizeof(m_subsurfaceSingleScatteringAlbedo), h); - h = XXH64(&m_subsurfaceVolumetricAnisotropy, sizeof(m_subsurfaceVolumetricAnisotropy), h); - h = XXH64(&m_subsurfaceVolumetricAttenuationCoefficient, sizeof(m_subsurfaceVolumetricAttenuationCoefficient), h); + struct HashStruct { + uint32_t m_subsurfaceTransmittanceTextureIndex; + uint32_t m_subsurfaceThicknessTextureIndex; + uint32_t m_subsurfaceSingleScatteringAlbedoTextureIndex; + Vector3 m_subsurfaceTransmittanceColor; + float m_subsurfaceMeasurementDistance; + Vector3 m_subsurfaceSingleScatteringAlbedo; + float m_subsurfaceVolumetricAnisotropy; + Vector3 m_subsurfaceVolumetricAttenuationCoefficient; + + XXH64_hash_t calculateHash() { + static_assert(sizeof(HashStruct) == sizeof(uint32_t) * 14); + return XXH3_64bits(this, sizeof(HashStruct)); + } + }; - m_cachedHash = h; - } + void updateCachedHash() { + HashStruct hashData = { + m_subsurfaceTransmittanceTextureIndex, + m_subsurfaceThicknessTextureIndex, + m_subsurfaceSingleScatteringAlbedoTextureIndex, + m_subsurfaceTransmittanceColor, + m_subsurfaceMeasurementDistance, + m_subsurfaceSingleScatteringAlbedo, + m_subsurfaceVolumetricAnisotropy, + m_subsurfaceVolumetricAttenuationCoefficient }; + m_cachedHash = hashData.calculateHash(); + } + + // Thin Opaque Textures Index + uint32_t m_subsurfaceTransmittanceTextureIndex; + uint32_t m_subsurfaceThicknessTextureIndex; + uint32_t m_subsurfaceSingleScatteringAlbedoTextureIndex; // Thin Opaque Properties Vector3 m_subsurfaceTransmittanceColor; @@ -1304,17 +1380,27 @@ struct LegacyMaterialData { operator OpaqueMaterialData() const { OpaqueMaterialData opaqueMat; opaqueMat.getAlbedoOpacityTexture() = getColorTexture(); + opaqueMat.getFilterMode() = lss::Mdl::Filter::vkToMdl(getSampler()->info().magFilter); + opaqueMat.getWrapModeU() = lss::Mdl::WrapMode::vkToMdl(getSampler()->info().addressModeU); + opaqueMat.getWrapModeV() = lss::Mdl::WrapMode::vkToMdl(getSampler()->info().addressModeV); return opaqueMat; } operator TranslucentMaterialData() const { - return TranslucentMaterialData(); + TranslucentMaterialData transluscentMat; + transluscentMat.getFilterMode() = lss::Mdl::Filter::vkToMdl(getSampler()->info().magFilter); + transluscentMat.getWrapModeU() = lss::Mdl::WrapMode::vkToMdl(getSampler()->info().addressModeU); + transluscentMat.getWrapModeV() = lss::Mdl::WrapMode::vkToMdl(getSampler()->info().addressModeV); + return transluscentMat; } operator RayPortalMaterialData() const { RayPortalMaterialData portalMat; portalMat.getMaskTexture() = getColorTexture(); portalMat.getMaskTexture2() = getColorTexture2(); + portalMat.getFilterMode() = lss::Mdl::Filter::vkToMdl(getSampler()->info().magFilter); + portalMat.getWrapModeU() = lss::Mdl::WrapMode::vkToMdl(getSampler()->info().addressModeU); + portalMat.getWrapModeV() = lss::Mdl::WrapMode::vkToMdl(getSampler()->info().addressModeV); return portalMat; } @@ -1371,10 +1457,16 @@ struct LegacyMaterialData { D3DMATERIAL9 d3dMaterial = {}; bool isTextureFactorBlend = false; + void setHashOverride(XXH64_hash_t hash) { + m_cachedHash = hash; + } + private: friend class RtxContext; friend struct D3D9Rtx; friend class TerrainBaker; + friend class SceneManager; + friend struct RemixAPIPrivateAccessor; void updateCachedHash() { // Note: Currently only based on the color texture's data hash. This may have to be changed later to @@ -1554,6 +1646,42 @@ struct MaterialData { } } +#define POPULATE_SAMPLER_INFO(info, material) \ + info.magFilter = \ + lss::Mdl::Filter::mdlToVk(material.getFilterMode()); \ + info.minFilter = \ + lss::Mdl::Filter::mdlToVk(material.getFilterMode()); \ + info.addressModeU = \ + lss::Mdl::WrapMode::mdlToVk(material.getWrapModeU(), &info.borderColor); \ + info.addressModeV = \ + lss::Mdl::WrapMode::mdlToVk(material.getWrapModeV(), &info.borderColor); + + const void populateSamplerInfo(DxvkSamplerCreateInfo& toPopulate) const { + switch (m_type) { + default: + assert(false); + [[fallthrough]]; + case MaterialDataType::Opaque: + POPULATE_SAMPLER_INFO(toPopulate, m_opaqueMaterialData); + break; + case MaterialDataType::Translucent: + POPULATE_SAMPLER_INFO(toPopulate, m_translucentMaterialData); + break; + case MaterialDataType::RayPortal: + POPULATE_SAMPLER_INFO(toPopulate, m_rayPortalMaterialData); + break; + } + } +#undef P_SAMPLER_INFO + + void setReplacement() { + m_isReplacement = true; + } + + bool isReplacement() const { + return m_isReplacement; + } + private: // Type-specific Material Data Information bool m_ignored = false; @@ -1565,6 +1693,8 @@ struct MaterialData { TranslucentMaterialData m_translucentMaterialData; RayPortalMaterialData m_rayPortalMaterialData; }; + + bool m_isReplacement = false; }; enum class HighlightColor { diff --git a/src/dxvk/rtx_render/rtx_nee_cache.cpp b/src/dxvk/rtx_render/rtx_nee_cache.cpp index 849a7e762..2f9c9cc2f 100644 --- a/src/dxvk/rtx_render/rtx_nee_cache.cpp +++ b/src/dxvk/rtx_render/rtx_nee_cache.cpp @@ -34,7 +34,6 @@ #include #include -#include namespace dxvk { @@ -61,28 +60,11 @@ namespace dxvk { RW_STRUCTURED_BUFFER(UPDATE_NEE_CACHE_BINDING_NEE_CACHE_SAMPLE) TEXTURE2D(UPDATE_NEE_CACHE_BINDING_NEE_CACHE_THREAD_TASK) STRUCTURED_BUFFER(UPDATE_NEE_CACHE_BINDING_PRIMITIVE_ID_PREFIX_SUM) + STRUCTURED_BUFFER(UPDATE_NEE_CACHE_BINDING_LAST_PRIMITIVE_ID_PREFIX_SUM) END_PARAMETER() }; PREWARM_SHADER_PIPELINE(UpdateNEECacheShader); - - - class UpdateNEETaskShader : public ManagedShader { - SHADER_SOURCE(UpdateNEETaskShader, VK_SHADER_STAGE_COMPUTE_BIT, update_nee_task) - - BINDLESS_ENABLED() - - BEGIN_PARAMETER() - COMMON_RAYTRACING_BINDINGS - RW_STRUCTURED_BUFFER(UPDATE_NEE_CACHE_BINDING_NEE_CACHE) - RW_STRUCTURED_BUFFER(UPDATE_NEE_CACHE_BINDING_NEE_CACHE_TASK) - RW_STRUCTURED_BUFFER(UPDATE_NEE_CACHE_BINDING_NEE_CACHE_SAMPLE) - TEXTURE2D(UPDATE_NEE_CACHE_BINDING_NEE_CACHE_THREAD_TASK) - STRUCTURED_BUFFER(UPDATE_NEE_CACHE_BINDING_PRIMITIVE_ID_PREFIX_SUM) - END_PARAMETER() - }; - - PREWARM_SHADER_PIPELINE(UpdateNEETaskShader); } NeeCachePass::NeeCachePass(dxvk::DxvkDevice* device) @@ -100,14 +82,16 @@ namespace dxvk { enableModeAfterFirstBounceCombo.getKey(&enableModeAfterFirstBounceObject()); ImGui::Checkbox("Enable Analytical Light", &enableAnalyticalLightObject()); ImGui::DragFloat("Specular Factor", &specularFactorObject(), 0.01f, 0.f, 20.f, "%.3f"); + ImGui::DragFloat("Learning Rate", &learningRateObject(), 0.01f, 0.f, 1.f, "%.3f"); ImGui::DragFloat("Uniform Sampling Probability", &uniformSamplingProbabilityObject(), 0.01f, 0.f, 1.f, "%.3f"); ImGui::DragFloat("Culling Threshold", &cullingThresholdObject(), 0.001f, 0.f, 1.f, "%.3f"); ImGui::DragFloat("Emissive Texture Sample Footprint Scale", &emissiveTextureSampleFootprintScaleObject(), 0.001f, 0.f, 20.f, "%.3f"); ImGui::DragFloat("Age Culling Speed", &ageCullingSpeedObject(), 0.001f, 0.0f, 0.99f, "%.3f"); - ImGui::DragFloat("Cache Range", &rangeObject(), 1.f, 0.1f, 10000000.0f, "%.3f"); + ImGui::DragFloat("Cell Resolution", &resolutionObject(), 0.01f, 0.01f, 100.0f, "%.3f"); + ImGui::DragFloat("Min Range", &minRangeObject(), 1.f, 0.1f, 10000.0f, "%.3f"); } - void NeeCachePass::setRaytraceArgs(RaytraceArgs& constants) const { + void NeeCachePass::setRaytraceArgs(RaytraceArgs& constants, bool resetHistory) const { constants.neeCacheArgs.enable = enable(); constants.neeCacheArgs.enableImportanceSampling = enableImportanceSampling(); constants.neeCacheArgs.enableMIS = enableMIS(); @@ -116,13 +100,15 @@ namespace dxvk { constants.neeCacheArgs.specularFactor = specularFactor(); constants.neeCacheArgs.uniformSamplingProbability = uniformSamplingProbability(); constants.neeCacheArgs.enableModeAfterFirstBounce = enableModeAfterFirstBounce(); - constants.neeCacheArgs.range = range() * RtxOptions::Get()->sceneScale(); constants.neeCacheArgs.emissiveTextureSampleFootprintScale = emissiveTextureSampleFootprintScale(); constants.neeCacheArgs.ageCullingSpeed = ageCullingSpeed(); constants.neeCacheArgs.cullingThreshold = cullingThreshold(); + constants.neeCacheArgs.learningRate = learningRate(); + constants.neeCacheArgs.resolution = resolution(); + constants.neeCacheArgs.minRange = minRange() * RtxOptions::Get()->sceneScale(); - static uvec2 oldResolution = constants.camera.resolution; - constants.neeCacheArgs.clearCache = oldResolution.x != constants.camera.resolution.x || oldResolution.y != constants.camera.resolution.y; + static uvec2 oldResolution {0, 0}; + constants.neeCacheArgs.clearCache = resetHistory || oldResolution.x != constants.camera.resolution.x || oldResolution.y != constants.camera.resolution.y; oldResolution = constants.camera.resolution; } @@ -133,24 +119,12 @@ namespace dxvk { const auto& numRaysExtent = rtOutput.m_compositeOutputExtent; VkExtent3D workgroups = util::computeBlockCount(numRaysExtent, VkExtent3D{ 16, 8, 1 }); - Rc primitiveIDPrefixSumBuffer = ctx->getSceneManager().getPrimitiveIDPrefixSumBuffer(); + Rc primitiveIDPrefixSumBuffer = ctx->getSceneManager().getCurrentFramePrimitiveIDPrefixSumBuffer(); + Rc lastPrimitiveIDPrefixSumBuffer = ctx->getSceneManager().getLastFramePrimitiveIDPrefixSumBuffer(); ScopedGpuProfileZone(ctx, "NEE Cache"); // Bind resources - { - ScopedGpuProfileZone(ctx, "UpdateNEETaskShader"); - ctx->bindCommonRayTracingResources(rtOutput); - ctx->bindResourceBuffer(UPDATE_NEE_CACHE_BINDING_NEE_CACHE, DxvkBufferSlice(rtOutput.m_neeCache, 0, rtOutput.m_neeCache->info().size)); - ctx->bindResourceBuffer(UPDATE_NEE_CACHE_BINDING_NEE_CACHE_TASK, DxvkBufferSlice(rtOutput.m_neeCacheTask, 0, rtOutput.m_neeCacheTask->info().size)); - ctx->bindResourceBuffer(UPDATE_NEE_CACHE_BINDING_NEE_CACHE_SAMPLE, DxvkBufferSlice(rtOutput.m_neeCacheSample, 0, rtOutput.m_neeCacheSample->info().size)); - ctx->bindResourceBuffer(UPDATE_NEE_CACHE_BINDING_PRIMITIVE_ID_PREFIX_SUM, DxvkBufferSlice(primitiveIDPrefixSumBuffer, 0, primitiveIDPrefixSumBuffer->info().size)); - ctx->bindResourceView(UPDATE_NEE_CACHE_BINDING_NEE_CACHE_THREAD_TASK, rtOutput.m_neeCacheThreadTask.view, nullptr); - - ctx->bindShader(VK_SHADER_STAGE_COMPUTE_BIT, UpdateNEETaskShader::getShader()); - ctx->dispatch(workgroups.width, workgroups.height, workgroups.depth); - } - { ScopedGpuProfileZone(ctx, "UpdateNEECacheShader"); ctx->bindCommonRayTracingResources(rtOutput); @@ -158,6 +132,7 @@ namespace dxvk { ctx->bindResourceBuffer(UPDATE_NEE_CACHE_BINDING_NEE_CACHE_TASK, DxvkBufferSlice(rtOutput.m_neeCacheTask, 0, rtOutput.m_neeCacheTask->info().size)); ctx->bindResourceBuffer(UPDATE_NEE_CACHE_BINDING_NEE_CACHE_SAMPLE, DxvkBufferSlice(rtOutput.m_neeCacheSample, 0, rtOutput.m_neeCacheSample->info().size)); ctx->bindResourceBuffer(UPDATE_NEE_CACHE_BINDING_PRIMITIVE_ID_PREFIX_SUM, DxvkBufferSlice(primitiveIDPrefixSumBuffer, 0, primitiveIDPrefixSumBuffer->info().size)); + ctx->bindResourceBuffer(UPDATE_NEE_CACHE_BINDING_LAST_PRIMITIVE_ID_PREFIX_SUM, DxvkBufferSlice(lastPrimitiveIDPrefixSumBuffer, 0, lastPrimitiveIDPrefixSumBuffer->info().size)); ctx->bindResourceView(UPDATE_NEE_CACHE_BINDING_NEE_CACHE_THREAD_TASK, rtOutput.m_neeCacheThreadTask.view, nullptr); ctx->bindShader(VK_SHADER_STAGE_COMPUTE_BIT, UpdateNEECacheShader::getShader()); diff --git a/src/dxvk/rtx_render/rtx_nee_cache.h b/src/dxvk/rtx_render/rtx_nee_cache.h index 4c8de5717..f4b99f821 100644 --- a/src/dxvk/rtx_render/rtx_nee_cache.h +++ b/src/dxvk/rtx_render/rtx_nee_cache.h @@ -45,7 +45,7 @@ namespace dxvk { void showImguiSettings(); - void setRaytraceArgs(RaytraceArgs& raytraceArgs) const; + void setRaytraceArgs(RaytraceArgs& raytraceArgs, bool resetHistory) const; RW_RTX_OPTION("rtx.neeCache", bool, enable, true, "[Experimental] Enable NEE cache. The integrator will perform NEE on emissive triangles, which usually have significant light contributions, stored in the cache."); RTX_OPTION("rtx.neeCache", bool, enableImportanceSampling, true, "Enable importance sampling."); @@ -55,9 +55,11 @@ namespace dxvk { RTX_OPTION("rtx.neeCache", NeeEnableMode, enableModeAfterFirstBounce, NeeEnableMode::SpecularOnly, "NEE Cache enable mode on a second and higher bounces. 0 means off, 1 means enabled for specular rays only, 2 means always enabled."); RTX_OPTION("rtx.neeCache", bool, enableAnalyticalLight, true, "Enable NEE Cache on analytical light."); RTX_OPTION("rtx.neeCache", float, specularFactor, 1.0, "Specular component factor."); + RTX_OPTION("rtx.neeCache", float, learningRate, 0.02, "Learning rate. Higher values makes the cache adapt to lighting changes more quickly."); RTX_OPTION("rtx.neeCache", float, uniformSamplingProbability, 0.1, "Uniform sampling probability."); - RTX_OPTION("rtx.neeCache", float, cullingThreshold, 0.001, "Culling threshold."); - RTX_OPTION("rtx.neeCache", float, range, 3000, "World space range."); + RTX_OPTION("rtx.neeCache", float, cullingThreshold, 0.01, "Culling threshold."); + RTX_OPTION("rtx.neeCache", float, resolution, 8.0, "Cell resolution. Higher values mean smaller cells."); + RTX_OPTION("rtx.neeCache", float, minRange, 400, "The range for lowest level cells."); RTX_OPTION("rtx.neeCache", float, emissiveTextureSampleFootprintScale, 1.0, "Emissive texture sample footprint scale."); RTX_OPTION("rtx.neeCache", float, ageCullingSpeed, 0.02, "This threshold determines culling speed of an old triangle. A triangle that is not detected for several frames will be deemed less important and culled quicker."); private: diff --git a/src/dxvk/rtx_render/rtx_nrd_settings.cpp b/src/dxvk/rtx_render/rtx_nrd_settings.cpp index a098403bb..4a5159246 100644 --- a/src/dxvk/rtx_render/rtx_nrd_settings.cpp +++ b/src/dxvk/rtx_render/rtx_nrd_settings.cpp @@ -27,7 +27,7 @@ namespace dxvk { ImGui::ComboWithKey methodCombo = ImGui::ComboWithKey( "Denoiser", - ImGui::ComboWithKey::ComboEntries{ { + ImGui::ComboWithKey::ComboEntries { { {nrd::Method::REBLUR_DIFFUSE_SPECULAR, "ReBLUR"}, {nrd::Method::RELAX_DIFFUSE_SPECULAR, "ReLAX"}, {nrd::Method::REFERENCE, "Reference"}, diff --git a/src/dxvk/rtx_render/rtx_opacity_micromap_manager.cpp b/src/dxvk/rtx_render/rtx_opacity_micromap_manager.cpp index cdd92877c..34cc340eb 100644 --- a/src/dxvk/rtx_render/rtx_opacity_micromap_manager.cpp +++ b/src/dxvk/rtx_render/rtx_opacity_micromap_manager.cpp @@ -1109,8 +1109,8 @@ namespace dxvk { *(ommIndex++) = i; } - ctx->updateBuffer(triangleArrayBuffer, 0, triangleArrayBufferSize, hostTriangleArrayBuffer.data(), true); - ctx->updateBuffer(triangleIndexBuffer, 0, triangleIndexBufferSize, hostTriangleIndexBuffer.data(), true); + ctx->writeToBuffer(triangleArrayBuffer, 0, triangleArrayBufferSize, hostTriangleArrayBuffer.data(), true); + ctx->writeToBuffer(triangleIndexBuffer, 0, triangleIndexBufferSize, hostTriangleIndexBuffer.data(), true); return OpacityMicromapManager::OmmResult::Success; } diff --git a/src/dxvk/rtx_render/rtx_option.cpp b/src/dxvk/rtx_render/rtx_option.cpp index 451153b78..8274b8bf2 100644 --- a/src/dxvk/rtx_render/rtx_option.cpp +++ b/src/dxvk/rtx_render/rtx_option.cpp @@ -186,46 +186,9 @@ namespace dxvk { std::string fullName = getFullName(); auto& value = valueList[(int) ValueType::Value]; - auto& defaultValue = valueList[(int) ValueType::DefaultValue]; if (changedOptionOnly) { - bool isChanged = false; - switch (type) { - case OptionType::Bool: - isChanged = (value.b != defaultValue.b); - break; - case OptionType::Int: - isChanged = (value.i != defaultValue.i); - break; - case OptionType::Float: - isChanged = (value.f != defaultValue.f); - break; - case OptionType::HashSet: - isChanged = (*value.hashSet != *defaultValue.hashSet); - break; - case OptionType::HashVector: - isChanged = (*value.hashVector != *defaultValue.hashVector); - break; - case OptionType::IntVector: - isChanged = (*value.intVector != *defaultValue.intVector); - break; - case OptionType::Vector2: - isChanged = (*value.v2 != *defaultValue.v2); - break; - case OptionType::Vector3: - isChanged = (*value.v3 != *defaultValue.v3); - break; - case OptionType::Vector2i: - isChanged = (*value.v2i != *defaultValue.v2i); - break; - case OptionType::String: - isChanged = (*value.string != *defaultValue.string); - break; - default: - break; - } - - if (!isChanged) { + if (isDefault()) { return; } } @@ -266,6 +229,48 @@ namespace dxvk { } } + bool RtxOptionImpl::isDefault() const { + auto& value = valueList[(int) ValueType::Value]; + auto& defaultValue = valueList[(int) ValueType::DefaultValue]; + + bool isDefault = false; + switch (type) { + case OptionType::Bool: + isDefault = (value.b == defaultValue.b); + break; + case OptionType::Int: + isDefault = (value.i == defaultValue.i); + break; + case OptionType::Float: + isDefault = (value.f == defaultValue.f); + break; + case OptionType::HashSet: + isDefault = (*value.hashSet == *defaultValue.hashSet); + break; + case OptionType::HashVector: + isDefault = (*value.hashVector == *defaultValue.hashVector); + break; + case OptionType::IntVector: + isDefault = (*value.intVector == *defaultValue.intVector); + break; + case OptionType::Vector2: + isDefault = (*value.v2 == *defaultValue.v2); + break; + case OptionType::Vector3: + isDefault = (*value.v3 == *defaultValue.v3); + break; + case OptionType::Vector2i: + isDefault = (*value.v2i == *defaultValue.v2i); + break; + case OptionType::String: + isDefault = (*value.string == *defaultValue.string); + break; + default: + break; + } + return isDefault; + } + void RtxOptionImpl::resetOption() { if (flags & (uint32_t) RtxOptionFlags::NoReset) return; diff --git a/src/dxvk/rtx_render/rtx_option.h b/src/dxvk/rtx_render/rtx_option.h index 9684ef362..66f247eda 100644 --- a/src/dxvk/rtx_render/rtx_option.h +++ b/src/dxvk/rtx_render/rtx_option.h @@ -104,6 +104,9 @@ namespace dxvk { void readOption(const Config& options, ValueType type); void writeOption(Config& options, bool changedOptionOnly); + + bool isDefault() const; + void resetOption(); static std::string getFullName(const std::string& category, const std::string& name) { diff --git a/src/dxvk/rtx_render/rtx_options.h b/src/dxvk/rtx_render/rtx_options.h index 81aa6d85a..6ab38999d 100644 --- a/src/dxvk/rtx_render/rtx_options.h +++ b/src/dxvk/rtx_render/rtx_options.h @@ -638,7 +638,8 @@ namespace dxvk { friend class RtxOptions; friend class ImGUI; - RTX_OPTION("rtx.subsurface", bool, enableThinOpaque, true, "Enable thin opaque material. The materials with th in opaque properties will fallback to normal opaque material."); + RTX_OPTION("rtx.subsurface", bool, enableThinOpaque, true, "Enable thin opaque material. The materials withthin opaque properties will fallback to normal opaque material."); + RTX_OPTION("rtx.subsurface", bool, enableTextureMaps, true, "Enable texture maps such as thickness map or scattering albedo map. The corresponding subsurface properties will fallback to per-material constants if this is disabled."); RTX_OPTION("rtx.subsurface", float, surfaceThicknessScale, 1.0f, "Scalar of the subsurface thickness."); }; diff --git a/src/dxvk/rtx_render/rtx_pathtracer_gbuffer.cpp b/src/dxvk/rtx_render/rtx_pathtracer_gbuffer.cpp index 1f349d380..a215614dc 100644 --- a/src/dxvk/rtx_render/rtx_pathtracer_gbuffer.cpp +++ b/src/dxvk/rtx_render/rtx_pathtracer_gbuffer.cpp @@ -61,6 +61,8 @@ namespace dxvk { BEGIN_PARAMETER() COMMON_RAYTRACING_BINDINGS + SAMPLER(GBUFFER_BINDING_LINEAR_WRAP_SAMPLER) + SAMPLER3D(GBUFFER_BINDING_VOLUME_FILTERED_RADIANCE_INPUT) RW_TEXTURE2D(GBUFFER_BINDING_SHARED_FLAGS_OUTPUT) @@ -70,8 +72,9 @@ namespace dxvk { RW_TEXTURE2D(GBUFFER_BINDING_SHARED_MATERIAL_DATA0_OUTPUT) RW_TEXTURE2D(GBUFFER_BINDING_SHARED_MATERIAL_DATA1_OUTPUT) RW_TEXTURE2D(GBUFFER_BINDING_SHARED_MEDIUM_MATERIAL_INDEX_OUTPUT) - RW_TEXTURE2D(GBUFFER_BINDING_DISPLACEMENT_TEXTURE_COORD_OUTPUT) + RW_TEXTURE2D(GBUFFER_BINDING_SHARED_TEXTURE_COORD_OUTPUT) RW_TEXTURE2D(GBUFFER_BINDING_SHARED_SURFACE_INDEX_OUTPUT) + RW_TEXTURE2D(GBUFFER_BINDING_SHARED_SUBSURFACE_DATA_OUTPUT) RW_TEXTURE2D(GBUFFER_BINDING_PRIMARY_ATTENUATION_OUTPUT) RW_TEXTURE2D(GBUFFER_BINDING_PRIMARY_WORLD_SHADING_NORMAL_OUTPUT) @@ -89,6 +92,7 @@ namespace dxvk { RW_TEXTURE2D(GBUFFER_BINDING_PRIMARY_CONE_RADIUS_OUTPUT) RW_TEXTURE2D(GBUFFER_BINDING_PRIMARY_WORLD_POSITION_OUTPUT) RW_TEXTURE2D(GBUFFER_BINDING_PRIMARY_POSITION_ERROR_OUTPUT) + RW_TEXTURE2D(GBUFFER_BINDING_PRIMARY_OBJECT_PICKING_OUTPUT) RW_TEXTURE2D(GBUFFER_BINDING_SECONDARY_ATTENUATION_OUTPUT) RW_TEXTURE2D(GBUFFER_BINDING_SECONDARY_WORLD_SHADING_NORMAL_OUTPUT) @@ -173,17 +177,20 @@ namespace dxvk { ctx->bindCommonRayTracingResources(rtOutput); // Note: Clamp to edge used to avoid interpolation to black on the edges of the view. - Rc linearSampler = ctx->getResourceManager().getSampler(VK_FILTER_LINEAR, VK_SAMPLER_MIPMAP_MODE_NEAREST, VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE); + Rc linearClampSampler = ctx->getResourceManager().getSampler(VK_FILTER_LINEAR, VK_SAMPLER_MIPMAP_MODE_NEAREST, VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE); + Rc linearWrapSampler = ctx->getResourceManager().getSampler(VK_FILTER_LINEAR, VK_SAMPLER_MIPMAP_MODE_NEAREST, VK_SAMPLER_ADDRESS_MODE_REPEAT); + + ctx->bindResourceSampler(GBUFFER_BINDING_LINEAR_WRAP_SAMPLER, linearWrapSampler); ctx->bindResourceView(GBUFFER_BINDING_VOLUME_FILTERED_RADIANCE_INPUT, rtOutput.m_volumeFilteredRadiance.view, nullptr); - ctx->bindResourceSampler(GBUFFER_BINDING_VOLUME_FILTERED_RADIANCE_INPUT, linearSampler); + ctx->bindResourceSampler(GBUFFER_BINDING_VOLUME_FILTERED_RADIANCE_INPUT, linearClampSampler); ctx->bindResourceView(GBUFFER_BINDING_SKYMATTE, ctx->getResourceManager().getSkyMatte(ctx).view, nullptr); - ctx->bindResourceSampler(GBUFFER_BINDING_SKYMATTE, linearSampler); + ctx->bindResourceSampler(GBUFFER_BINDING_SKYMATTE, linearClampSampler); // Requires the probe too for PSRR/T miss ctx->bindResourceView(GBUFFER_BINDING_SKYPROBE, ctx->getResourceManager().getSkyProbe(ctx).view, nullptr); - ctx->bindResourceSampler(GBUFFER_BINDING_SKYPROBE, linearSampler); + ctx->bindResourceSampler(GBUFFER_BINDING_SKYPROBE, linearClampSampler); ctx->bindResourceView(GBUFFER_BINDING_SHARED_FLAGS_OUTPUT, rtOutput.m_sharedFlags.view, nullptr); ctx->bindResourceView(GBUFFER_BINDING_SHARED_RADIANCE_RG_OUTPUT, rtOutput.m_sharedRadianceRG.view, nullptr); @@ -193,8 +200,9 @@ namespace dxvk { ctx->bindResourceView(GBUFFER_BINDING_SHARED_MATERIAL_DATA1_OUTPUT, rtOutput.m_sharedMaterialData1.view, nullptr); ctx->bindResourceView(GBUFFER_BINDING_SHARED_MEDIUM_MATERIAL_INDEX_OUTPUT, rtOutput.m_sharedMediumMaterialIndex.view, nullptr); ctx->bindResourceView(GBUFFER_BINDING_SHARED_BIAS_CURRENT_COLOR_MASK_OUTPUT, rtOutput.m_sharedBiasCurrentColorMask.view(Resources::AccessType::Write), nullptr); - ctx->bindResourceView(GBUFFER_BINDING_DISPLACEMENT_TEXTURE_COORD_OUTPUT, rtOutput.m_displacementTextureCoord.view, nullptr); + ctx->bindResourceView(GBUFFER_BINDING_SHARED_TEXTURE_COORD_OUTPUT, rtOutput.m_sharedTextureCoord.view, nullptr); ctx->bindResourceView(GBUFFER_BINDING_SHARED_SURFACE_INDEX_OUTPUT, rtOutput.m_sharedSurfaceIndex.view, nullptr); + ctx->bindResourceView(GBUFFER_BINDING_SHARED_SUBSURFACE_DATA_OUTPUT, rtOutput.m_sharedSubsurfaceData.view, nullptr); ctx->bindResourceView(GBUFFER_BINDING_PRIMARY_ATTENUATION_OUTPUT, rtOutput.m_primaryAttenuation.view, nullptr); ctx->bindResourceView(GBUFFER_BINDING_PRIMARY_WORLD_SHADING_NORMAL_OUTPUT, rtOutput.m_primaryWorldShadingNormal.view, nullptr); @@ -215,6 +223,7 @@ namespace dxvk { ctx->bindResourceView(GBUFFER_BINDING_PRIMARY_SURFACE_FLAGS_OUTPUT, rtOutput.m_primarySurfaceFlags.view, nullptr); ctx->bindResourceView(GBUFFER_BINDING_PRIMARY_DISOCCLUSION_THRESHOLD_MIX_OUTPUT, rtOutput.m_primaryDisocclusionThresholdMix.view, nullptr); ctx->bindResourceView(GBUFFER_BINDING_PRIMARY_DEPTH_OUTPUT, rtOutput.m_primaryDepth.view, nullptr); + ctx->bindResourceView(GBUFFER_BINDING_PRIMARY_OBJECT_PICKING_OUTPUT, rtOutput.m_primaryObjectPicking.view, nullptr); ctx->bindResourceView(GBUFFER_BINDING_SECONDARY_ATTENUATION_OUTPUT, rtOutput.m_secondaryAttenuation.view, nullptr); ctx->bindResourceView(GBUFFER_BINDING_SECONDARY_WORLD_SHADING_NORMAL_OUTPUT, rtOutput.m_secondaryWorldShadingNormal.view, nullptr); @@ -254,7 +263,7 @@ namespace dxvk { const bool serEnabled = RtxOptions::Get()->isShaderExecutionReorderingInPathtracerGbufferEnabled(); const bool ommEnabled = RtxOptions::Get()->getEnableOpacityMicromap(); - const bool includePortals = RtxOptions::Get()->rayPortalModelTextureHashes().size() > 0; + const bool includePortals = RtxOptions::Get()->rayPortalModelTextureHashes().size() > 0 || rtOutput.m_raytraceArgs.numActiveRayPortals > 0; GbufferPushConstants pushArgs = {}; pushArgs.isTransmissionPSR = 0; @@ -289,7 +298,6 @@ namespace dxvk { case RaytraceMode::RayQueryRayGen: { ScopedGpuProfileZone(ctx, "Primary Rays"); - const bool includePortals = RtxOptions::Get()->rayPortalModelTextureHashes().size() > 0; ctx->bindRaytracingPipelineShaders(getPipelineShaders(false, true, serEnabled, ommEnabled, includePortals)); ctx->traceRays(rayDims.width, rayDims.height, rayDims.depth); } diff --git a/src/dxvk/rtx_render/rtx_pathtracer_integrate_direct.cpp b/src/dxvk/rtx_render/rtx_pathtracer_integrate_direct.cpp index da5599b91..5262e66b9 100644 --- a/src/dxvk/rtx_render/rtx_pathtracer_integrate_direct.cpp +++ b/src/dxvk/rtx_render/rtx_pathtracer_integrate_direct.cpp @@ -50,8 +50,9 @@ namespace dxvk { TEXTURE2D(INTEGRATE_DIRECT_BINDING_SHARED_INTEGRATION_SURFACE_PDF_INPUT) TEXTURE2D(INTEGRATE_DIRECT_BINDING_SHARED_MATERIAL_DATA0_INPUT) TEXTURE2D(INTEGRATE_DIRECT_BINDING_SHARED_MATERIAL_DATA1_INPUT) - RW_TEXTURE2D(INTEGRATE_DIRECT_BINDING_DISPLACEMENT_TEXTURE_COORD_INPUT) - RW_TEXTURE2D(INTEGRATE_DIRECT_BINDING_SHARED_SURFACE_INDEX_INPUT) + TEXTURE2D(INTEGRATE_DIRECT_BINDING_SHARED_TEXTURE_COORD_INPUT) + TEXTURE2D(INTEGRATE_DIRECT_BINDING_SHARED_SURFACE_INDEX_INPUT) + TEXTURE2D(INTEGRATE_DIRECT_BINDING_SHARED_SUBSURFACE_DATA_INPUT) TEXTURE2D(INTEGRATE_DIRECT_BINDING_PRIMARY_WORLD_SHADING_NORMAL_INPUT) TEXTURE2D(INTEGRATE_DIRECT_BINDING_PRIMARY_PERCEPTUAL_ROUGHNESS_INPUT) @@ -83,6 +84,7 @@ namespace dxvk { STRUCTURED_BUFFER(INTEGRATE_DIRECT_BINDING_NEE_CACHE) STRUCTURED_BUFFER(INTEGRATE_DIRECT_BINDING_NEE_CACHE_SAMPLE) + RW_STRUCTURED_BUFFER(INTEGRATE_DIRECT_BINDING_NEE_CACHE_TASK) RW_TEXTURE2D(INTEGRATE_DIRECT_BINDING_NEE_CACHE_THREAD_TASK) RW_TEXTURE2D(INTEGRATE_DIRECT_BINDING_INDIRECT_RAY_ORIGIN_DIRECTION_OUTPUT) @@ -123,8 +125,9 @@ namespace dxvk { ctx->bindResourceView(INTEGRATE_DIRECT_BINDING_SHARED_INTEGRATION_SURFACE_PDF_INPUT, rtOutput.m_sharedIntegrationSurfacePdf.view(Resources::AccessType::Read), nullptr); ctx->bindResourceView(INTEGRATE_DIRECT_BINDING_SHARED_MATERIAL_DATA0_INPUT, rtOutput.m_sharedMaterialData0.view, nullptr); ctx->bindResourceView(INTEGRATE_DIRECT_BINDING_SHARED_MATERIAL_DATA1_INPUT, rtOutput.m_sharedMaterialData1.view, nullptr); - ctx->bindResourceView(INTEGRATE_DIRECT_BINDING_DISPLACEMENT_TEXTURE_COORD_INPUT, rtOutput.m_displacementTextureCoord.view, nullptr); + ctx->bindResourceView(INTEGRATE_DIRECT_BINDING_SHARED_TEXTURE_COORD_INPUT, rtOutput.m_sharedTextureCoord.view, nullptr); ctx->bindResourceView(INTEGRATE_DIRECT_BINDING_SHARED_SURFACE_INDEX_INPUT, rtOutput.m_sharedSurfaceIndex.view, nullptr); + ctx->bindResourceView(INTEGRATE_DIRECT_BINDING_SHARED_SUBSURFACE_DATA_INPUT, rtOutput.m_sharedSubsurfaceData.view, nullptr); ctx->bindResourceView(INTEGRATE_DIRECT_BINDING_PRIMARY_WORLD_SHADING_NORMAL_INPUT, rtOutput.m_primaryWorldShadingNormal.view, nullptr); ctx->bindResourceView(INTEGRATE_DIRECT_BINDING_PRIMARY_PERCEPTUAL_ROUGHNESS_INPUT, rtOutput.m_primaryPerceptualRoughness.view, nullptr); @@ -154,6 +157,7 @@ namespace dxvk { ctx->bindResourceView(INTEGRATE_DIRECT_BINDING_PRIMARY_RTXDI_ILLUMINANCE_OUTPUT, rtOutput.getCurrentRtxdiIlluminance().view(Resources::AccessType::Write), nullptr); ctx->bindResourceBuffer(INTEGRATE_DIRECT_BINDING_NEE_CACHE, DxvkBufferSlice(rtOutput.m_neeCache, 0, rtOutput.m_neeCache->info().size)); ctx->bindResourceBuffer(INTEGRATE_DIRECT_BINDING_NEE_CACHE_SAMPLE, DxvkBufferSlice(rtOutput.m_neeCacheSample, 0, rtOutput.m_neeCacheSample->info().size)); + ctx->bindResourceBuffer(INTEGRATE_DIRECT_BINDING_NEE_CACHE_TASK, DxvkBufferSlice(rtOutput.m_neeCacheTask, 0, rtOutput.m_neeCacheTask->info().size)); ctx->bindResourceView(INTEGRATE_DIRECT_BINDING_NEE_CACHE_THREAD_TASK, rtOutput.m_neeCacheThreadTask.view, nullptr); ctx->bindResourceView(INTEGRATE_DIRECT_BINDING_INDIRECT_RAY_ORIGIN_DIRECTION_OUTPUT, rtOutput.m_indirectRayOriginDirection.view(Resources::AccessType::Write), nullptr); diff --git a/src/dxvk/rtx_render/rtx_pathtracer_integrate_indirect.cpp b/src/dxvk/rtx_render/rtx_pathtracer_integrate_indirect.cpp index 5824d92a8..9f7675155 100644 --- a/src/dxvk/rtx_render/rtx_pathtracer_integrate_indirect.cpp +++ b/src/dxvk/rtx_render/rtx_pathtracer_integrate_indirect.cpp @@ -65,12 +65,15 @@ namespace dxvk { BEGIN_PARAMETER() COMMON_RAYTRACING_BINDINGS + SAMPLER(INTEGRATE_BINDING_LINEAR_WRAP_SAMPLER) + SAMPLERCUBE(INTEGRATE_INDIRECT_BINDING_SKYPROBE) TEXTURE2D(INTEGRATE_INDIRECT_BINDING_SHARED_FLAGS_INPUT) TEXTURE2D(INTEGRATE_INDIRECT_BINDING_SHARED_MEDIUM_MATERIAL_INDEX_INPUT) - RW_TEXTURE2D(INTEGRATE_INDIRECT_BINDING_DISPLACEMENT_TEXTURE_COORD_INPUT) - RW_TEXTURE2D(INTEGRATE_INDIRECT_BINDING_SHARED_SURFACE_INDEX_INPUT) + TEXTURE2D(INTEGRATE_INDIRECT_BINDING_SHARED_TEXTURE_COORD_INPUT) + TEXTURE2D(INTEGRATE_INDIRECT_BINDING_SHARED_SURFACE_INDEX_INPUT) + TEXTURE2D(INTEGRATE_INDIRECT_BINDING_SHARED_SUBSURFACE_DATA_INPUT) TEXTURE2D(INTEGRATE_INDIRECT_BINDING_PRIMARY_CONE_RADIUS_INPUT) TEXTURE2D(INTEGRATE_INDIRECT_BINDING_SECONDARY_CONE_RADIUS_INPUT) @@ -99,6 +102,8 @@ namespace dxvk { STRUCTURED_BUFFER(INTEGRATE_INDIRECT_BINDING_NEE_CACHE) STRUCTURED_BUFFER(INTEGRATE_INDIRECT_BINDING_NEE_CACHE_SAMPLE) + STRUCTURED_BUFFER(INTEGRATE_INDIRECT_BINDING_PRIMITIVE_ID_PREFIX_SUM) + RW_STRUCTURED_BUFFER(INTEGRATE_INDIRECT_BINDING_NEE_CACHE_TASK) RW_TEXTURE2D(INTEGRATE_INDIRECT_BINDING_NEE_CACHE_THREAD_TASK) RW_TEXTURE2D(INTEGRATE_INSTRUMENTATION) @@ -129,8 +134,9 @@ namespace dxvk { TEXTURE2D(INTEGRATE_NEE_BINDING_SHARED_FLAGS_INPUT) TEXTURE2D(INTEGRATE_NEE_BINDING_SHARED_MATERIAL_DATA0_INPUT) TEXTURE2D(INTEGRATE_NEE_BINDING_SHARED_MATERIAL_DATA1_INPUT) - RW_TEXTURE2D(INTEGRATE_NEE_BINDING_DISPLACEMENT_TEXTURE_COORD_INPUT) - RW_TEXTURE2D(INTEGRATE_NEE_BINDING_SHARED_SURFACE_INDEX_INPUT) + TEXTURE2D(INTEGRATE_NEE_BINDING_SHARED_TEXTURE_COORD_INPUT) + TEXTURE2D(INTEGRATE_NEE_BINDING_SHARED_SURFACE_INDEX_INPUT) + TEXTURE2D(INTEGRATE_NEE_BINDING_SHARED_SUBSURFACE_DATA_INPUT) TEXTURE2D(INTEGRATE_NEE_BINDING_PRIMARY_WORLD_SHADING_NORMAL_INPUT) TEXTURE2D(INTEGRATE_NEE_BINDING_PRIMARY_WORLD_INTERPOLATED_NORMAL_INPUT) @@ -157,6 +163,7 @@ namespace dxvk { RW_STRUCTURED_BUFFER(INTEGRATE_NEE_BINDING_NEE_CACHE_TASK) RW_STRUCTURED_BUFFER(INTEGRATE_NEE_BINDING_NEE_CACHE_SAMPLE) RW_TEXTURE2D(INTEGRATE_NEE_BINDING_NEE_CACHE_THREAD_TASK) + STRUCTURED_BUFFER(INTEGRATE_NEE_BINDING_PRIMITIVE_ID_PREFIX_SUM) END_PARAMETER() }; @@ -173,8 +180,9 @@ namespace dxvk { TEXTURE2D(INTEGRATE_NEE_BINDING_SHARED_FLAGS_INPUT) TEXTURE2D(INTEGRATE_NEE_BINDING_SHARED_MATERIAL_DATA0_INPUT) TEXTURE2D(INTEGRATE_NEE_BINDING_SHARED_MATERIAL_DATA1_INPUT) - RW_TEXTURE2D(INTEGRATE_NEE_BINDING_DISPLACEMENT_TEXTURE_COORD_INPUT) + RW_TEXTURE2D(INTEGRATE_NEE_BINDING_SHARED_TEXTURE_COORD_INPUT) RW_TEXTURE2D(INTEGRATE_NEE_BINDING_SHARED_SURFACE_INDEX_INPUT) + TEXTURE2D(INTEGRATE_NEE_BINDING_SHARED_SUBSURFACE_DATA_INPUT) TEXTURE2D(INTEGRATE_NEE_BINDING_PRIMARY_WORLD_SHADING_NORMAL_INPUT) TEXTURE2D(INTEGRATE_NEE_BINDING_PRIMARY_WORLD_INTERPOLATED_NORMAL_INPUT) @@ -201,6 +209,7 @@ namespace dxvk { RW_STRUCTURED_BUFFER(INTEGRATE_NEE_BINDING_NEE_CACHE_TASK) RW_STRUCTURED_BUFFER(INTEGRATE_NEE_BINDING_NEE_CACHE_SAMPLE) RW_TEXTURE2D(INTEGRATE_NEE_BINDING_NEE_CACHE_THREAD_TASK) + STRUCTURED_BUFFER(INTEGRATE_NEE_BINDING_PRIMITIVE_ID_PREFIX_SUM) END_PARAMETER() }; @@ -243,17 +252,22 @@ namespace dxvk { // Bind resources // Note: Clamp to edge used to avoid interpolation to black on the edges of the view. - Rc linearSampler = ctx->getResourceManager().getSampler(VK_FILTER_LINEAR, VK_SAMPLER_MIPMAP_MODE_NEAREST, VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE); + Rc linearClampSampler = ctx->getResourceManager().getSampler(VK_FILTER_LINEAR, VK_SAMPLER_MIPMAP_MODE_NEAREST, VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE); + Rc linearWrapSampler = ctx->getResourceManager().getSampler(VK_FILTER_LINEAR, VK_SAMPLER_MIPMAP_MODE_NEAREST, VK_SAMPLER_ADDRESS_MODE_REPEAT); + Rc primitiveIDPrefixSumBuffer = ctx->getSceneManager().getCurrentFramePrimitiveIDPrefixSumBuffer(); ctx->bindCommonRayTracingResources(rtOutput); + ctx->bindResourceSampler(INTEGRATE_BINDING_LINEAR_WRAP_SAMPLER, linearWrapSampler); + ctx->bindResourceView(INTEGRATE_INDIRECT_BINDING_SKYPROBE, ctx->getResourceManager().getSkyProbe(ctx).view, nullptr); - ctx->bindResourceSampler(INTEGRATE_INDIRECT_BINDING_SKYPROBE, linearSampler); + ctx->bindResourceSampler(INTEGRATE_INDIRECT_BINDING_SKYPROBE, linearClampSampler); ctx->bindResourceView(INTEGRATE_INDIRECT_BINDING_SHARED_FLAGS_INPUT, rtOutput.m_sharedFlags.view, nullptr); ctx->bindResourceView(INTEGRATE_INDIRECT_BINDING_SHARED_MEDIUM_MATERIAL_INDEX_INPUT, rtOutput.m_sharedMediumMaterialIndex.view, nullptr); - ctx->bindResourceView(INTEGRATE_INDIRECT_BINDING_DISPLACEMENT_TEXTURE_COORD_INPUT, rtOutput.m_displacementTextureCoord.view, nullptr); + ctx->bindResourceView(INTEGRATE_INDIRECT_BINDING_SHARED_TEXTURE_COORD_INPUT, rtOutput.m_sharedTextureCoord.view, nullptr); ctx->bindResourceView(INTEGRATE_INDIRECT_BINDING_SHARED_SURFACE_INDEX_INPUT, rtOutput.m_sharedSurfaceIndex.view, nullptr); + ctx->bindResourceView(INTEGRATE_INDIRECT_BINDING_SHARED_SUBSURFACE_DATA_INPUT, rtOutput.m_sharedSubsurfaceData.view, nullptr); ctx->bindResourceView(INTEGRATE_INDIRECT_BINDING_PRIMARY_CONE_RADIUS_INPUT, rtOutput.m_primaryConeRadius.view, nullptr); ctx->bindResourceView(INTEGRATE_INDIRECT_BINDING_SECONDARY_CONE_RADIUS_INPUT, rtOutput.m_secondaryConeRadius.view(Resources::AccessType::Read), nullptr); @@ -265,7 +279,7 @@ namespace dxvk { ctx->bindResourceView(INTEGRATE_INDIRECT_BINDING_PREV_WORLD_POSITION_INPUT, rtOutput.getPreviousPrimaryWorldPositionWorldTriangleNormal().view, nullptr); ctx->bindResourceView(INTEGRATE_INDIRECT_BINDING_VOLUME_FILTERED_RADIANCE_INPUT, rtOutput.m_volumeFilteredRadiance.view, nullptr); - ctx->bindResourceSampler(INTEGRATE_INDIRECT_BINDING_VOLUME_FILTERED_RADIANCE_INPUT, linearSampler); + ctx->bindResourceSampler(INTEGRATE_INDIRECT_BINDING_VOLUME_FILTERED_RADIANCE_INPUT, linearClampSampler); ctx->bindResourceView(INTEGRATE_INDIRECT_BINDING_PRIMARY_HIT_DISTANCE_INPUT, rtOutput.m_primaryHitDistance.view, nullptr); ctx->bindResourceView(INTEGRATE_INDIRECT_BINDING_SECONDARY_HIT_DISTANCE_INPUT, rtOutput.m_secondaryHitDistance.view, nullptr); @@ -285,6 +299,8 @@ namespace dxvk { ctx->bindResourceBuffer(INTEGRATE_INDIRECT_BINDING_NEE_CACHE, DxvkBufferSlice(rtOutput.m_neeCache, 0, rtOutput.m_neeCache->info().size)); ctx->bindResourceBuffer(INTEGRATE_INDIRECT_BINDING_NEE_CACHE_SAMPLE, DxvkBufferSlice(rtOutput.m_neeCacheSample, 0, rtOutput.m_neeCacheSample->info().size)); + ctx->bindResourceBuffer(INTEGRATE_INDIRECT_BINDING_PRIMITIVE_ID_PREFIX_SUM, DxvkBufferSlice(primitiveIDPrefixSumBuffer, 0, primitiveIDPrefixSumBuffer->info().size)); + ctx->bindResourceBuffer(INTEGRATE_INDIRECT_BINDING_NEE_CACHE_TASK, DxvkBufferSlice(rtOutput.m_neeCacheTask, 0, rtOutput.m_neeCacheTask->info().size)); ctx->bindResourceView(INTEGRATE_INDIRECT_BINDING_NEE_CACHE_THREAD_TASK, rtOutput.m_neeCacheThreadTask.view, nullptr); // Aliased resources @@ -300,7 +316,7 @@ namespace dxvk { const bool serEnabled = RtxOptions::Get()->isShaderExecutionReorderingInPathtracerIntegrateIndirectEnabled(); const bool ommEnabled = RtxOptions::Get()->getEnableOpacityMicromap(); - const bool includePortals = RtxOptions::Get()->rayPortalModelTextureHashes().size() > 0; + const bool includePortals = RtxOptions::Get()->rayPortalModelTextureHashes().size() > 0 || rtOutput.m_raytraceArgs.numActiveRayPortals > 0; const bool pomEnabled = rtOutput.m_raytraceArgs.pomMode != DisplacementMode::Off && RtxOptions::Displacement::enableIndirectHit(); // Trace indirect ray @@ -331,6 +347,7 @@ namespace dxvk { // Construct restir input sample const auto rayDims = rtOutput.m_compositeOutputExtent; VkExtent3D workgroups = util::computeBlockCount(rayDims, VkExtent3D { 16, 8, 1 }); + Rc primitiveIDPrefixSumBuffer = ctx->getSceneManager().getCurrentFramePrimitiveIDPrefixSumBuffer(); ScopedGpuProfileZone(ctx, "Integrate NEE"); ctx->bindCommonRayTracingResources(rtOutput); @@ -338,8 +355,9 @@ namespace dxvk { ctx->bindResourceView(INTEGRATE_NEE_BINDING_SHARED_FLAGS_INPUT, rtOutput.m_sharedFlags.view, nullptr); ctx->bindResourceView(INTEGRATE_NEE_BINDING_SHARED_MATERIAL_DATA0_INPUT, rtOutput.m_sharedMaterialData0.view, nullptr); ctx->bindResourceView(INTEGRATE_NEE_BINDING_SHARED_MATERIAL_DATA1_INPUT, rtOutput.m_sharedMaterialData1.view, nullptr); - ctx->bindResourceView(INTEGRATE_NEE_BINDING_DISPLACEMENT_TEXTURE_COORD_INPUT, rtOutput.m_displacementTextureCoord.view, nullptr); + ctx->bindResourceView(INTEGRATE_NEE_BINDING_SHARED_TEXTURE_COORD_INPUT, rtOutput.m_sharedTextureCoord.view, nullptr); ctx->bindResourceView(INTEGRATE_NEE_BINDING_SHARED_SURFACE_INDEX_INPUT, rtOutput.m_sharedSurfaceIndex.view, nullptr); + ctx->bindResourceView(INTEGRATE_NEE_BINDING_SHARED_SUBSURFACE_DATA_INPUT, rtOutput.m_sharedSubsurfaceData.view, nullptr); ctx->bindResourceView(INTEGRATE_NEE_BINDING_PRIMARY_WORLD_SHADING_NORMAL_INPUT, rtOutput.m_primaryWorldShadingNormal.view, nullptr); ctx->bindResourceView(INTEGRATE_NEE_BINDING_PRIMARY_WORLD_INTERPOLATED_NORMAL_INPUT, rtOutput.m_primaryWorldInterpolatedNormal.view, nullptr); @@ -366,13 +384,15 @@ namespace dxvk { ctx->bindResourceBuffer(INTEGRATE_NEE_BINDING_NEE_CACHE_TASK, DxvkBufferSlice(rtOutput.m_neeCacheTask, 0, rtOutput.m_neeCacheTask->info().size)); ctx->bindResourceBuffer(INTEGRATE_NEE_BINDING_NEE_CACHE_SAMPLE, DxvkBufferSlice(rtOutput.m_neeCacheSample, 0, rtOutput.m_neeCacheSample->info().size)); ctx->bindResourceView(INTEGRATE_NEE_BINDING_NEE_CACHE_THREAD_TASK, rtOutput.m_neeCacheThreadTask.view, nullptr); + ctx->bindResourceBuffer(INTEGRATE_NEE_BINDING_PRIMITIVE_ID_PREFIX_SUM, DxvkBufferSlice(primitiveIDPrefixSumBuffer, 0, primitiveIDPrefixSumBuffer->info().size)); ctx->bindShader(VK_SHADER_STAGE_COMPUTE_BIT, IntegrateNEEShader::getShader()); ctx->dispatch(workgroups.width, workgroups.height, workgroups.depth); // Visualize the nee cache when debug view is chosen. uint32_t debugViewIndex = ctx->getCommonObjects()->metaDebugView().debugViewIdx(); - if (debugViewIndex == DEBUG_VIEW_NEE_CACHE_LIGHT_HISTOGRAM || debugViewIndex == DEBUG_VIEW_NEE_CACHE_HISTOGRAM) + if (debugViewIndex == DEBUG_VIEW_NEE_CACHE_LIGHT_HISTOGRAM || debugViewIndex == DEBUG_VIEW_NEE_CACHE_HISTOGRAM || + debugViewIndex == DEBUG_VIEW_NEE_CACHE_ACCUMULATE_MAP || debugViewIndex == DEBUG_VIEW_NEE_CACHE_HASH_MAP) { ctx->bindShader(VK_SHADER_STAGE_COMPUTE_BIT, VisualizeNEEShader::getShader()); ctx->dispatch(workgroups.width, workgroups.height, workgroups.depth); diff --git a/src/dxvk/rtx_render/rtx_ray_portal_manager.cpp b/src/dxvk/rtx_render/rtx_ray_portal_manager.cpp index 5c7e42520..6e8f88535 100644 --- a/src/dxvk/rtx_render/rtx_ray_portal_manager.cpp +++ b/src/dxvk/rtx_render/rtx_ray_portal_manager.cpp @@ -99,8 +99,8 @@ namespace dxvk { return; } - // TODO: ONLY HANDLING 16 bit indices - if (originalGeometryData.indexBuffer.indexType() != VK_INDEX_TYPE_UINT16) + // Portals must be simple plane like objects, and so have 6 or less indices (two triangles) + if (originalGeometryData.indexCount > 6) return; const GeometryBufferData bufferData(originalGeometryData); @@ -117,8 +117,6 @@ namespace dxvk { // Make sure that the geometry matches our expected pattern, which is 1 quad as a triangle strip // Note: Portal (at least our modified version of it) has 4 vertices for the Portal object, each of which represents a corner. constexpr uint32_t indicesPerQuad = 4; - if (originalGeometryData.indexCount != indicesPerQuad || originalGeometryData.topology != VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP) - return; // Calculate world space vertices of the Ray Portal @@ -127,8 +125,15 @@ namespace dxvk { Vector3 maxAbsVertexWorldCoords = Vector3(0.f); - for (size_t idx = 0; idx < indicesPerQuad; ++idx) { - const uint16_t currentIndex = bufferData.getIndex(idx); + const bool indices16bit = (originalGeometryData.indexBuffer.indexType() == VK_INDEX_TYPE_UINT16); + + std::unordered_set uniqueIndices; + for (size_t idx = 0; idx < originalGeometryData.indexCount; ++idx) { + const uint32_t currentIndex = indices16bit ? bufferData.getIndex(idx) : bufferData.getIndex32(idx); + if (uniqueIndices.find(currentIndex) != uniqueIndices.end()) { + continue; + } + // Note: This may not be "model" coordinates as many games like to pre-transform the positions into worldspace // to perhaps avoid needing a world matrix in legacy // API implementations where it may have had a more significant cost to apply. @@ -137,12 +142,18 @@ namespace dxvk { const Vector3 currentWorldPosition((objectToWorld * currentPosition).xyz()); centroid += currentWorldPosition; - worldVertices[idx] = currentWorldPosition; + worldVertices[uniqueIndices.size()] = currentWorldPosition; for (uint32_t i = 0; i < 3; i++) maxAbsVertexWorldCoords[i] = std::max(abs(currentWorldPosition[i]), maxAbsVertexWorldCoords[i]); + + uniqueIndices.insert(currentIndex); } + // Not enough unique vertices to extract a Portal + if (uniqueIndices.size() < 3) + return; + centroid /= static_cast(indicesPerQuad); // Todo: Calculate relevant projection axes in the future from the world space coordinates diff --git a/src/dxvk/rtx_render/rtx_remix_api.cpp b/src/dxvk/rtx_render/rtx_remix_api.cpp new file mode 100644 index 000000000..0ed474bf6 --- /dev/null +++ b/src/dxvk/rtx_render/rtx_remix_api.cpp @@ -0,0 +1,1127 @@ +/* +* Copyright (c) 2021-2023, NVIDIA CORPORATION. All rights reserved. +* +* Permission is hereby granted, free of charge, to any person obtaining a +* copy of this software and associated documentation files (the "Software"), +* to deal in the Software without restriction, including without limitation +* the rights to use, copy, modify, merge, publish, distribute, sublicense, +* and/or sell copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice shall be included in +* all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +* DEALINGS IN THE SOFTWARE. +*/ + +#define RTX_REMIX_PNEXT_CHECK_STRUCTS + +#include "rtx_asset_data_manager.h" +#include "rtx_asset_replacer.h" +#include "rtx_light_manager.h" +#include "rtx_option.h" + +#include +#include "rtx_remix_pnext.h" + +#include "../dxvk_image.h" + +#include "../../util/util_math.h" + +#include "../../d3d9/d3d9_swapchain.h" + +#include + +namespace dxvk { + HRESULT CreateD3D9( + bool Extended, + IDirect3D9Ex** ppDirect3D9Ex, + bool WithExternalSwapchain); + + extern bool g_allowSrgbConversionForOutput; +} + +namespace dxvk { + // Because DrawCallState/LegacyMaterialData hide needed fields as private + struct RemixAPIPrivateAccessor { + static ExternalDrawState toRtDrawState(const remixapi_InstanceInfo& info); + }; +} + +namespace { + IDirect3D9Ex* s_dxvkD3D9 { nullptr }; + dxvk::D3D9DeviceEx* s_dxvkDevice { nullptr }; + dxvk::mutex s_mutex {}; + + + dxvk::D3D9DeviceEx* tryAsDxvk() { + return s_dxvkDevice; + } + + + // from rtx_mod_usd.cpp + XXH64_hash_t hack_getNextGeomHash() { + static uint64_t s_id = UINT64_MAX; + std::lock_guard lock { s_mutex }; + --s_id; + return XXH64(&s_id, sizeof(s_id), 0); + } + + + template + size_t sizeInBytes(const T* values, size_t count) { + return sizeof(T) * count; + } + + + void sanitizeConfigs() { + // Disable fallback light + const_cast(dxvk::LightManager::fallbackLightMode()) = dxvk::LightManager::FallbackLightMode::Never; + } + + namespace convert { + using namespace dxvk; + + std::string tostr(const remixapi_MaterialHandle& h) { + static_assert(sizeof h == sizeof uint64_t); + return std::to_string(reinterpret_cast(h)); + } + + Matrix4 tomat4(const remixapi_Transform& transform) { + const auto& m = transform.matrix; + return Matrix4 { + m[0][0], m[1][0], m[2][0], 0.f, + m[0][1], m[1][1], m[2][1], 0.f, + m[0][2], m[1][2], m[2][2], 0.f, + m[0][3], m[1][3], m[2][3], 1.f + }; + } + + Vector3 tovec3(const remixapi_Float3D& v) { + return { v.x, v.y, v.z }; + } + + Vector3d tovec3d(const remixapi_Float3D& v) { + return { v.x, v.y, v.z }; + } + + constexpr bool tobool(remixapi_Bool b) { + return !!b; + } + + std::filesystem::path topath(remixapi_Path p) { + if (!p) { + return {}; + } + return p; + } + + // -- + + struct PreloadSource { + std::filesystem::path albedoTexture; + std::filesystem::path normalTexture; + std::filesystem::path tangentTexture; + std::filesystem::path emissiveTexture; + std::filesystem::path transmittanceTexture; + std::filesystem::path roughnessTexture; + std::filesystem::path metallicTexture; + std::filesystem::path heightTexture; + std::filesystem::path subsurfaceTransmittanceTexture; + std::filesystem::path subsurfaceThicknessTexture; + std::filesystem::path subsurfaceSingleScatteringAlbedoTexture; + }; + + PreloadSource makePreloadSource(const remixapi_MaterialInfo& info) { + // TODO: C++20 designated initializers + if (auto extOpaque = pnext::find(&info)) { + auto extSubsurface = pnext::find(&info); + return PreloadSource { + topath(info.albedoTexture), // albedoTexture; + topath(info.normalTexture), // normalTexture; + topath(info.tangentTexture), // tangentTexture; + topath(info.emissiveTexture), // emissiveTexture; + {}, // transmittanceTexture; + topath(extOpaque->roughnessTexture), // roughnessTexture; + topath(extOpaque->metallicTexture), // metallicTexture; + topath(extOpaque->heightTexture), // heightTexture; + topath(extSubsurface ? extSubsurface->subsurfaceTransmittanceTexture : nullptr), // subsurfaceTransmittanceTexture; + topath(extSubsurface ? extSubsurface->subsurfaceThicknessTexture : nullptr), // subsurfaceTransmittanceTexture; + topath(extSubsurface ? extSubsurface->subsurfaceSingleScatteringAlbedoTexture : nullptr), // subsurfaceTransmittanceTexture; + }; + } + if (auto extTranslucent = pnext::find(&info)) { + return PreloadSource { + topath(info.albedoTexture), // albedoTexture; + topath(info.normalTexture), // normalTexture; + topath(info.tangentTexture), // tangentTexture; + topath(info.emissiveTexture), // emissiveTexture; + topath(extTranslucent->transmittanceTexture), // transmittanceTexture; + {}, // roughnessTexture; + {}, // metallicTexture; + {}, // heightTexture; + {}, // subsurfaceTransmittanceTexture; + {}, // subsurfaceThicknessTexture; + {}, // subsurfaceSingleScatteringAlbedoTexture; + }; + } + if (auto extPortal = pnext::find(&info)) { + return PreloadSource { + topath(info.albedoTexture), // albedoTexture; + topath(info.normalTexture), // normalTexture; + topath(info.tangentTexture), // tangentTexture; + topath(info.emissiveTexture), // emissiveTexture; + {}, // transmittanceTexture; + {}, // roughnessTexture; + {}, // metallicTexture; + {}, // heightTexture; + {}, // subsurfaceTransmittanceTexture; + {}, // subsurfaceThicknessTexture; + {}, // subsurfaceSingleScatteringAlbedoTexture; + }; + } + return {}; + } + + MaterialData toRtMaterialFinalized(dxvk::DxvkContext& ctx, const MaterialData& materialWithoutPreload, const PreloadSource& preload) { + auto preloadTexture = [&ctx](const std::filesystem::path& path)->TextureRef { + if (path.empty()) { + return {}; + } + auto assetData = AssetDataManager::get().findAsset(path.string().c_str()); + if (assetData == nullptr) { + return {}; + } + auto uploadedTexture = ctx.getCommonObjects()->getTextureManager() + .preloadTextureAsset(assetData, dxvk::ColorSpace::AUTO, &ctx, false); + return TextureRef { uploadedTexture }; + }; + + switch (materialWithoutPreload.getType()) { + case MaterialDataType::Opaque: + { + const auto& src = materialWithoutPreload.getOpaqueMaterialData(); + return MaterialData { OpaqueMaterialData{ + preloadTexture(preload.albedoTexture), + preloadTexture(preload.normalTexture), + preloadTexture(preload.tangentTexture), + preloadTexture(preload.heightTexture), + preloadTexture(preload.roughnessTexture), + preloadTexture(preload.metallicTexture), + preloadTexture(preload.emissiveTexture), + preloadTexture(preload.subsurfaceTransmittanceTexture), + preloadTexture(preload.subsurfaceThicknessTexture), + preloadTexture(preload.subsurfaceSingleScatteringAlbedoTexture), + src.getAnisotropyConstant(), + src.getEmissiveIntensity(), + src.getAlbedoConstant(), + src.getOpacityConstant(), + src.getRoughnessConstant(), + src.getMetallicConstant(), + src.getEmissiveColorConstant(), + src.getEnableEmission(), + src.getSpriteSheetRows(), + src.getSpriteSheetCols(), + src.getSpriteSheetFPS(), + src.getEnableThinFilm(), + src.getAlphaIsThinFilmThickness(), + src.getThinFilmThicknessConstant(), + src.getUseLegacyAlphaState(), + src.getBlendEnabled(), + src.getBlendType(), + src.getInvertedBlend(), + src.getAlphaTestType(), + src.getAlphaTestReferenceValue(), + src.getDisplaceIn(), + src.getSubsurfaceTransmittanceColor(), + src.getSubsurfaceMeasurementDistance(), + src.getSubsurfaceSingleScatteringAlbedo(), + src.getSubsurfaceVolumetricAnisotropy(), + src.getFilterMode(), + src.getWrapModeU(), + src.getWrapModeV() + } }; + } + case MaterialDataType::Translucent: + { + const auto& src = materialWithoutPreload.getTranslucentMaterialData(); + return MaterialData { TranslucentMaterialData { + preloadTexture(preload.normalTexture), + preloadTexture(preload.transmittanceTexture), + preloadTexture(preload.emissiveTexture), + src.getRefractiveIndex(), + src.getTransmittanceColor(), + src.getTransmittanceMeasurementDistance(), + src.getEnableEmission(), + src.getEmissiveIntensity(), + src.getEmissiveColorConstant(), + src.getSpriteSheetRows(), + src.getSpriteSheetCols(), + src.getSpriteSheetFPS(), + src.getEnableThinWalled(), + src.getThinWallThickness(), + src.getEnableDiffuseLayer(), + src.getFilterMode(), + src.getWrapModeU(), + src.getWrapModeV() + } }; + } + case MaterialDataType::RayPortal: + { + const auto& src = materialWithoutPreload.getRayPortalMaterialData(); + return MaterialData { RayPortalMaterialData { + preloadTexture(preload.emissiveTexture), + {}, // unused + src.getRayPortalIndex(), + src.getSpriteSheetRows(), + src.getSpriteSheetCols(), + src.getSpriteSheetFPS(), + src.getRotationSpeed(), + src.getEnableEmission(), + src.getEmissiveIntensity(), + src.getFilterMode(), + src.getWrapModeU(), + src.getWrapModeV() + } }; + } + case MaterialDataType::Legacy: + default: assert(0); return materialWithoutPreload; + } + } + + MaterialData toRtMaterialWithoutTexturePreload(const remixapi_MaterialInfo& info) { + if (auto extOpaque = pnext::find(&info)) { + auto extSubsurface = pnext::find(&info); + return MaterialData { OpaqueMaterialData { + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + extOpaque->anisotropy, + info.emissiveIntensity, + tovec3(extOpaque->albedoConstant), + extOpaque->opacityConstant, + extOpaque->roughnessConstant, + extOpaque->metallicConstant, + tovec3(info.emissiveColorConstant), + info.emissiveIntensity > 0.f, + info.spriteSheetRow, + info.spriteSheetCol, + info.spriteSheetFps, + tobool(extOpaque->thinFilmThickness_hasvalue), + tobool(extOpaque->alphaIsThinFilmThickness), + extOpaque->thinFilmThickness_hasvalue ? extOpaque->thinFilmThickness_value : 200.f, // default OpaqueMaterial::ThinFilmThicknessConstant + tobool(extOpaque->useDrawCallAlphaState), // OpaqueMaterial::UseLegacyAlphaState + tobool(extOpaque->blendType_hasvalue), + extOpaque->blendType_hasvalue ? static_cast(extOpaque->blendType_value) : BlendType::kAlpha, // default OpaqueMaterial::BlendType + tobool(extOpaque->invertedBlend), + static_cast(extOpaque->alphaTestType), + extOpaque->alphaReferenceValue, + extOpaque->heightTextureStrength, // displaceIn + extSubsurface ? tovec3(extSubsurface->subsurfaceTransmittanceColor) : Vector3{ 0.5f, 0.5f, 0.5f }, + extSubsurface ? extSubsurface->subsurfaceMeasurementDistance : 0.f, + extSubsurface ? tovec3(extSubsurface->subsurfaceSingleScatteringAlbedo) : Vector3{ 0.5f, 0.5f, 0.5f }, + extSubsurface ? extSubsurface->subsurfaceVolumetricAnisotropy : 0.f, + info.filterMode, + info.wrapModeU, + info.wrapModeV, + } }; + } + if (auto extTranslucent = pnext::find(&info)) { + return MaterialData { TranslucentMaterialData { + {}, + {}, + {}, + extTranslucent->refractiveIndex, + tovec3(extTranslucent->transmittanceColor), + extTranslucent->transmittanceMeasurementDistance, + info.emissiveIntensity > 0.f, + info.emissiveIntensity, + tovec3(info.emissiveColorConstant), + info.spriteSheetRow, + info.spriteSheetCol, + info.spriteSheetFps, + tobool(extTranslucent->thinWallThickness_hasvalue), + extTranslucent->thinWallThickness_hasvalue ? extTranslucent->thinWallThickness_value : 0.001f, // default TranslucentMaterial::ThinWallThickness + tobool(extTranslucent->useDiffuseLayer), + info.filterMode, + info.wrapModeU, + info.wrapModeV, + } }; + } + if (auto extPortal = pnext::find(&info)) { + return MaterialData { RayPortalMaterialData { + {}, + {}, // unused + extPortal->rayPortalIndex, + info.spriteSheetRow, + info.spriteSheetCol, + info.spriteSheetFps, + extPortal->rotationSpeed, + info.emissiveIntensity > 0.f, + info.emissiveIntensity, + info.filterMode, + info.wrapModeU, + info.wrapModeV, + } }; + } + + assert(0); + return MaterialData { LegacyMaterialData {} }; + } + + // -- + CameraType::Enum toRtCameraType(remixapi_CameraType from) { + switch (from) { + case REMIXAPI_CAMERA_TYPE_WORLD: return CameraType::Main; + case REMIXAPI_CAMERA_TYPE_VIEW_MODEL: return CameraType::ViewModel; + case REMIXAPI_CAMERA_TYPE_SKY: return CameraType::Sky; + default: assert(0); return CameraType::Main; + } + } + + struct ExternalCameraInfo { + CameraType::Enum type {}; + Matrix4 worldToView {}; + Matrix4 viewToProjection {}; + }; + + ExternalCameraInfo toRtCamera(const remixapi_CameraInfo& info) { + if (auto params = pnext::find(&info)) { + auto result = ExternalCameraInfo { + toRtCameraType(info.type), + }; + { + const auto newViewToWorld = Matrix4d { + { normalize(tovec3d(params->right)), 0.0 }, + { normalize(tovec3d(params->up)), 0.0 }, + { normalize(tovec3d(params->forward)), 0.0 }, + { tovec3d(params->position), 1.0 }, + }; + result.worldToView = inverse(newViewToWorld); + } + { + constexpr bool isLhs = true; + auto proj = float4x4 {}; + proj.SetupByHalfFovy( + DegToRad(params->fovYInDegrees) / 2, + params->aspect, + params->nearPlane, + params->farPlane, + isLhs ? PROJ_LEFT_HANDED : 0); + static_assert(sizeof result.viewToProjection == sizeof proj); + memcpy(&result.viewToProjection, &proj, sizeof float4x4); + } + return result; + } + return ExternalCameraInfo { + toRtCameraType(info.type), + Matrix4 { info.view }, + Matrix4 { info.projection }, + }; + } + + // -- + + RtLightShaping toRtLightShaping(const remixapi_LightInfoLightShaping* info) { + if (info) { + return RtLightShaping { + true, + tovec3(info->primaryAxis), + std::cos(DegToRad(info->coneAngleDegrees)), + info->coneSoftness, + info->focusExponent, + }; + } + return RtLightShaping {}; + } + + RtLight toRtLight(const remixapi_LightInfo& info) { + if (auto src = pnext::find(&info)) { + return RtSphereLight { + tovec3(src->position), + tovec3(info.radiance), + src->radius, + toRtLightShaping(src->shaping_hasvalue ? &src->shaping_value : nullptr), + }; + } + if (auto src = pnext::find(&info)) { + return RtRectLight { + tovec3(src->position), + {src->xSize, src->ySize}, + tovec3(src->xAxis), + tovec3(src->yAxis), + tovec3(info.radiance), + toRtLightShaping(src->shaping_hasvalue ? &src->shaping_value : nullptr), + }; + } + if (auto src = pnext::find(&info)) { + return RtDiskLight { + tovec3(src->position), + {src->xRadius, src->yRadius}, + tovec3(src->xAxis), + tovec3(src->yAxis), + tovec3(info.radiance), + toRtLightShaping(src->shaping_hasvalue ? &src->shaping_value : nullptr), + }; + } + if (auto src = pnext::find(&info)) { + return RtCylinderLight { + tovec3(src->position), + src->radius, + tovec3(src->axis), + src->axisLength, + tovec3(info.radiance), + }; + } + if (auto src = pnext::find(&info)) { + return RtDistantLight { + tovec3(src->direction), + DegToRad(src->angularDiameterDegrees * 0.5f), + tovec3(info.radiance), + }; + } + return RtLight {}; + } + + // -- + + CameraType::Enum categoryToCameraType(remixapi_InstanceCategoryFlags flags) { + if (flags & REMIXAPI_INSTANCE_CATEGORY_BIT_SKY) { + return CameraType::Sky; + } + return CameraType::Main; + } + + CategoryFlags toRtCategories(remixapi_InstanceCategoryFlags flags) { + CategoryFlags result { 0 }; + if (flags & REMIXAPI_INSTANCE_CATEGORY_BIT_WORLD_UI ){ result.set(InstanceCategories::WorldUI ); } + if (flags & REMIXAPI_INSTANCE_CATEGORY_BIT_WORLD_MATTE ){ result.set(InstanceCategories::WorldMatte ); } + if (flags & REMIXAPI_INSTANCE_CATEGORY_BIT_SKY ){ result.set(InstanceCategories::Sky ); } + if (flags & REMIXAPI_INSTANCE_CATEGORY_BIT_IGNORE ){ result.set(InstanceCategories::Ignore ); } + if (flags & REMIXAPI_INSTANCE_CATEGORY_BIT_IGNORE_LIGHTS ){ result.set(InstanceCategories::IgnoreLights ); } + if (flags & REMIXAPI_INSTANCE_CATEGORY_BIT_IGNORE_ANTI_CULLING ){ result.set(InstanceCategories::IgnoreAntiCulling ); } + if (flags & REMIXAPI_INSTANCE_CATEGORY_BIT_IGNORE_MOTION_BLUR ){ result.set(InstanceCategories::IgnoreMotionBlur ); } + if (flags & REMIXAPI_INSTANCE_CATEGORY_BIT_IGNORE_OPACITY_MICROMAP ){ result.set(InstanceCategories::IgnoreOpacityMicromap ); } + if (flags & REMIXAPI_INSTANCE_CATEGORY_BIT_HIDDEN ){ result.set(InstanceCategories::Hidden ); } + if (flags & REMIXAPI_INSTANCE_CATEGORY_BIT_PARTICLE ){ result.set(InstanceCategories::Particle ); } + if (flags & REMIXAPI_INSTANCE_CATEGORY_BIT_BEAM ){ result.set(InstanceCategories::Beam ); } + if (flags & REMIXAPI_INSTANCE_CATEGORY_BIT_DECAL_STATIC ){ result.set(InstanceCategories::DecalStatic ); } + if (flags & REMIXAPI_INSTANCE_CATEGORY_BIT_DECAL_DYNAMIC ){ result.set(InstanceCategories::DecalDynamic ); } + if (flags & REMIXAPI_INSTANCE_CATEGORY_BIT_DECAL_SINGLE_OFFSET ){ result.set(InstanceCategories::DecalSingleOffset ); } + if (flags & REMIXAPI_INSTANCE_CATEGORY_BIT_DECAL_NO_OFFSET ){ result.set(InstanceCategories::DecalNoOffset ); } + if (flags & REMIXAPI_INSTANCE_CATEGORY_BIT_ALPHA_BLEND_TO_CUTOUT ){ result.set(InstanceCategories::AlphaBlendToCutout ); } + if (flags & REMIXAPI_INSTANCE_CATEGORY_BIT_TERRAIN ){ result.set(InstanceCategories::Terrain ); } + if (flags & REMIXAPI_INSTANCE_CATEGORY_BIT_ANIMATED_WATER ){ result.set(InstanceCategories::AnimatedWater ); } + if (flags & REMIXAPI_INSTANCE_CATEGORY_BIT_THIRD_PERSON_PLAYER_MODEL){ result.set(InstanceCategories::ThirdPersonPlayerModel); } + if (flags & REMIXAPI_INSTANCE_CATEGORY_BIT_THIRD_PERSON_PLAYER_BODY ){ result.set(InstanceCategories::ThirdPersonPlayerBody ); } + return result; + } + + ExternalDrawState toRtDrawState(const remixapi_InstanceInfo& info) { + return RemixAPIPrivateAccessor::toRtDrawState(info); + } + } +} + +dxvk::ExternalDrawState dxvk::RemixAPIPrivateAccessor::toRtDrawState(const remixapi_InstanceInfo& info) +{ + auto prototype = DrawCallState {}; + { + prototype.cameraType = CameraType::Main; + prototype.transformData.objectToWorld = convert::tomat4(info.transform); + prototype.transformData.textureTransform = Matrix4 {}; + prototype.transformData.texgenMode = TexGenMode::None; + prototype.materialData.colorTextures[0] = TextureRef {}; + prototype.materialData.colorTextures[1] = TextureRef {}; + prototype.categories = convert::toRtCategories(info.categoryFlags); + } + + if (auto objectPicking = pnext::find(&info)) { + prototype.drawCallID = objectPicking->objectPickingValue; + } + + if (auto extBones = pnext::find(&info)) { + const uint32_t boneCount = + extBones->boneTransforms_count < REMIXAPI_INSTANCE_INFO_MAX_BONES_COUNT ? + extBones->boneTransforms_count : REMIXAPI_INSTANCE_INFO_MAX_BONES_COUNT; + prototype.skinningData.minBoneIndex = 0; + prototype.skinningData.numBones = boneCount; + prototype.skinningData.numBonesPerVertex = prototype.geometryData.numBonesPerVertex; + prototype.skinningData.pBoneMatrices.resize(boneCount); + for (uint32_t boneIdx = 0; boneIdx < boneCount; boneIdx++) { + prototype.skinningData.pBoneMatrices[boneIdx] = convert::tomat4(extBones->boneTransforms_values[boneIdx]); + } + } + + if (auto extBlend = pnext::find(&info)) { + prototype.materialData.alphaTestEnabled = extBlend->alphaTestEnabled; + prototype.materialData.alphaTestReferenceValue = extBlend->alphaTestReferenceValue; + prototype.materialData.alphaTestCompareOp = (VkCompareOp) extBlend->alphaTestCompareOp; + prototype.materialData.alphaBlendEnabled = extBlend->alphaBlendEnabled; + prototype.materialData.srcColorBlendFactor = (VkBlendFactor) extBlend->srcColorBlendFactor; + prototype.materialData.dstColorBlendFactor = (VkBlendFactor) extBlend->dstColorBlendFactor; + prototype.materialData.colorBlendOp = (VkBlendOp) extBlend->colorBlendOp; + prototype.materialData.textureColorOperation = (DxvkRtTextureOperation) extBlend->textureColorOperation; + prototype.materialData.textureColorArg1Source = (RtTextureArgSource) extBlend->textureColorArg1Source; + prototype.materialData.textureColorArg2Source = (RtTextureArgSource) extBlend->textureColorArg2Source; + prototype.materialData.textureAlphaOperation = (DxvkRtTextureOperation) extBlend->textureAlphaOperation; + prototype.materialData.textureAlphaArg1Source = (RtTextureArgSource) extBlend->textureAlphaArg1Source; + prototype.materialData.textureAlphaArg2Source = (RtTextureArgSource) extBlend->textureAlphaArg2Source; + prototype.materialData.tFactor = extBlend->tFactor; + prototype.materialData.isTextureFactorBlend = extBlend->isTextureFactorBlend; + } + + return ExternalDrawState { + prototype, + info.mesh, + convert::categoryToCameraType(info.categoryFlags), + convert::toRtCategories(info.categoryFlags), + convert::tobool(info.doubleSided) + }; +} + +namespace { + remixapi_ErrorCode REMIXAPI_CALL remixapi_Shutdown() { + // TODO: a proper check for shutdown + s_dxvkDevice = nullptr; + s_dxvkD3D9 = nullptr; + return REMIXAPI_ERROR_CODE_SUCCESS; + } + + remixapi_ErrorCode REMIXAPI_CALL remixapi_CreateMaterial( + const remixapi_MaterialInfo* info, + remixapi_MaterialHandle* out_handle) { + dxvk::D3D9DeviceEx* remixDevice = tryAsDxvk(); + if (!remixDevice) { + return REMIXAPI_ERROR_CODE_REMIX_DEVICE_WAS_NOT_REGISTERED; + } + if (!out_handle || !info || info->sType != REMIXAPI_STRUCT_TYPE_MATERIAL_INFO) { + return REMIXAPI_ERROR_CODE_WRONG_ARGUMENTS; + } + static_assert(sizeof(remixapi_MaterialHandle) == sizeof(info->hash)); + auto handle = reinterpret_cast(info->hash); + if (!handle) { + return REMIXAPI_ERROR_CODE_WRONG_ARGUMENTS; + } + + // async load + std::lock_guard lock { s_mutex }; + remixDevice->EmitCs([cHandle = handle, + cMaterialData = convert::toRtMaterialWithoutTexturePreload(*info), + cPreloadSrc = convert::makePreloadSource(*info)](dxvk::DxvkContext* ctx) { + auto& assets = ctx->getCommonObjects()->getSceneManager().getAssetReplacer(); + assets->makeMaterialWithTexturePreload( + *ctx, + cHandle, + convert::toRtMaterialFinalized(*ctx, cMaterialData, cPreloadSrc)); + }); + + *out_handle = handle; + return REMIXAPI_ERROR_CODE_SUCCESS; + } + + remixapi_ErrorCode REMIXAPI_CALL remixapi_DestroyMaterial( + remixapi_MaterialHandle handle) { + if (auto remixDevice = tryAsDxvk()) { + std::lock_guard lock { s_mutex }; + remixDevice->EmitCs([cHandle = handle](dxvk::DxvkContext* ctx) { + auto& assets = ctx->getCommonObjects()->getSceneManager().getAssetReplacer(); + assets->destroyExternalMaterial(cHandle); + }); + return REMIXAPI_ERROR_CODE_SUCCESS; + } + return REMIXAPI_ERROR_CODE_REMIX_DEVICE_WAS_NOT_REGISTERED; + } + + remixapi_ErrorCode REMIXAPI_CALL remixapi_CreateMesh( + const remixapi_MeshInfo* info, + remixapi_MeshHandle* out_handle) { + dxvk::D3D9DeviceEx* remixDevice = tryAsDxvk(); + if (!remixDevice) { + return REMIXAPI_ERROR_CODE_REMIX_DEVICE_WAS_NOT_REGISTERED; + } + if (!out_handle || !info || info->sType != REMIXAPI_STRUCT_TYPE_MESH_INFO) { + return REMIXAPI_ERROR_CODE_WRONG_ARGUMENTS; + } + static_assert(sizeof(remixapi_MeshHandle) == sizeof(info->hash)); + auto handle = reinterpret_cast(info->hash); + if (!handle) { + return REMIXAPI_ERROR_CODE_WRONG_ARGUMENTS; + } + + auto allocatedSurfaces = std::vector {}; + + for (size_t i = 0; i < info->surfaces_count; i++) { + const remixapi_MeshInfoSurfaceTriangles& src = info->surfaces_values[i]; + + const size_t vertexDataSize = sizeInBytes(src.vertices_values, src.vertices_count); + const size_t indexDataSize = sizeInBytes(src.indices_values, src.indices_count); + + auto allocBuffer = [](dxvk::D3D9DeviceEx* device, size_t sizeInBytes) -> dxvk::Rc { + auto bufferInfo = dxvk::DxvkBufferCreateInfo {}; + { + bufferInfo.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR; + bufferInfo.stages = VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR; + bufferInfo.access = VK_ACCESS_TRANSFER_WRITE_BIT; + bufferInfo.size = dxvk::align(sizeInBytes, dxvk::CACHE_LINE_SIZE); + } + return device->GetDXVKDevice()->createBuffer( + bufferInfo, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT, + dxvk::DxvkMemoryStats::Category::RTXBuffer); + }; + + dxvk::Rc vertexBuffer = allocBuffer(remixDevice, vertexDataSize); + dxvk::Rc indexBuffer = allocBuffer(remixDevice, indexDataSize); + dxvk::Rc skinningBuffer = nullptr; + + auto vertexSlice = dxvk::DxvkBufferSlice { vertexBuffer }; + memcpy(vertexSlice.mapPtr(0), src.vertices_values, vertexDataSize); + + auto indexSlice = dxvk::DxvkBufferSlice { indexBuffer }; + memcpy(indexSlice.mapPtr(0), src.indices_values, indexDataSize); + + auto blendWeightsSlice = dxvk::DxvkBufferSlice {}; + auto blendIndicesSlice = dxvk::DxvkBufferSlice {}; + if (src.skinning_hasvalue) { + size_t wordsPerCompressedTuple = dxvk::divCeil(src.skinning_value.bonesPerVertex, 4u); + size_t sizeInBytes_weights = sizeInBytes(src.skinning_value.blendWeights_values, src.skinning_value.blendWeights_count); + size_t sizeInBytes_indices = src.vertices_count * wordsPerCompressedTuple * sizeof(uint32_t); + + skinningBuffer = allocBuffer(remixDevice, sizeInBytes_weights + sizeInBytes_indices); + + // Encode bone indices into compressed byte form + auto compressedBlendIndices = std::vector {}; + compressedBlendIndices.resize(src.vertices_count * wordsPerCompressedTuple); + for (size_t vert = 0; vert < src.vertices_count; vert++) { + const uint32_t* dstCompressed = &compressedBlendIndices[vert * wordsPerCompressedTuple]; + const uint32_t* blendIndicesStorage = &src.skinning_value.blendIndices_values[vert * src.skinning_value.bonesPerVertex]; + + for (int j = 0; j < src.skinning_value.bonesPerVertex; j += 4) { + uint32_t vertIndices = 0; + for (int k = 0; k < 4 && j + k < src.skinning_value.bonesPerVertex; ++k) { + vertIndices |= blendIndicesStorage[j + k] << 8 * k; + } + *(uint32_t*) &dstCompressed[j / 4] = vertIndices; + } + } + + assert(sizeInBytes_indices == compressedBlendIndices.size() * sizeof(compressedBlendIndices[0])); + + blendWeightsSlice = dxvk::DxvkBufferSlice { skinningBuffer, 0, sizeInBytes_weights }; + blendIndicesSlice = dxvk::DxvkBufferSlice { skinningBuffer, sizeInBytes_weights, sizeInBytes_indices }; + + memcpy(blendWeightsSlice.mapPtr(0), src.skinning_value.blendWeights_values, sizeInBytes_weights); + memcpy(blendIndicesSlice.mapPtr(0), compressedBlendIndices.data(), sizeInBytes_indices); + } + + auto dst = dxvk::RasterGeometry {}; + { + dst.externalMaterial = src.material; + dst.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; + dst.cullMode = VK_CULL_MODE_NONE; // this will be overwritten by the instance info at draw time + dst.frontFace = VK_FRONT_FACE_CLOCKWISE; + dst.vertexCount = src.vertices_count; assert(src.vertices_count < std::numeric_limits::max()); + dst.positionBuffer = dxvk::RasterBuffer { vertexSlice, offsetof(remixapi_HardcodedVertex, position), sizeof(remixapi_HardcodedVertex), VK_FORMAT_R32G32B32_SFLOAT }; + dst.normalBuffer = dxvk::RasterBuffer { vertexSlice, offsetof(remixapi_HardcodedVertex, normal), sizeof(remixapi_HardcodedVertex), VK_FORMAT_R32G32B32_SFLOAT }; + dst.texcoordBuffer = dxvk::RasterBuffer { vertexSlice, offsetof(remixapi_HardcodedVertex, texcoord), sizeof(remixapi_HardcodedVertex), VK_FORMAT_R32G32_SFLOAT }; + dst.color0Buffer = dxvk::RasterBuffer { vertexSlice, offsetof(remixapi_HardcodedVertex, color), sizeof(remixapi_HardcodedVertex), VK_FORMAT_B8G8R8A8_UNORM }; + if (src.skinning_hasvalue) { + dst.numBonesPerVertex = src.skinning_value.bonesPerVertex; + dst.blendWeightBuffer = dxvk::RasterBuffer { blendWeightsSlice, 0, sizeof(float), VK_FORMAT_R32_SFLOAT };; + dst.blendIndicesBuffer = dxvk::RasterBuffer { blendIndicesSlice, 0, sizeof(uint32_t), VK_FORMAT_R8G8B8A8_USCALED }; + } + + dst.indexCount = src.indices_count; + static_assert(sizeof(src.indices_values[0]) == 4); + dst.indexBuffer = dxvk::RasterBuffer { indexSlice, 0, sizeof(uint32_t), VK_INDEX_TYPE_UINT32 }; + // look comments in UsdMod::Impl::processMesh, rtx_mod_usd.cpp + dst.hashes[dxvk::HashComponents::Indices] = dst.hashes[dxvk::HashComponents::VertexPosition] = hack_getNextGeomHash(); + dst.hashes[dxvk::HashComponents::VertexTexcoord] = hack_getNextGeomHash(); + dst.hashes[dxvk::HashComponents::GeometryDescriptor] = hack_getNextGeomHash(); + dst.hashes[dxvk::HashComponents::VertexLayout] = hack_getNextGeomHash(); + dst.hashes.precombine(); + } + allocatedSurfaces.push_back(std::move(dst)); + } + std::lock_guard lock { s_mutex }; + + remixDevice->EmitCs([cHandle = handle, cSurfaces = std::move(allocatedSurfaces)](dxvk::DxvkContext* ctx) mutable { + auto& assets = ctx->getCommonObjects()->getSceneManager().getAssetReplacer(); + assets->registerExternalMesh(cHandle, std::move(cSurfaces)); + }); + + *out_handle = handle; + return REMIXAPI_ERROR_CODE_SUCCESS; + } + + remixapi_ErrorCode REMIXAPI_CALL remixapi_DestroyMesh( + remixapi_MeshHandle handle) { + dxvk::D3D9DeviceEx* remixDevice = tryAsDxvk(); + if (!remixDevice) { + return REMIXAPI_ERROR_CODE_REMIX_DEVICE_WAS_NOT_REGISTERED; + } + std::lock_guard lock { s_mutex }; + remixDevice->EmitCs([cHandle = handle](dxvk::DxvkContext* ctx) { + auto& assets = ctx->getCommonObjects()->getSceneManager().getAssetReplacer(); + assets->destroyExternalMesh(cHandle); + }); + return REMIXAPI_ERROR_CODE_SUCCESS; + } + + remixapi_ErrorCode REMIXAPI_CALL remixapi_SetupCamera( + const remixapi_CameraInfo* info) { + dxvk::D3D9DeviceEx* remixDevice = tryAsDxvk(); + if (!remixDevice) { + return REMIXAPI_ERROR_CODE_REMIX_DEVICE_WAS_NOT_REGISTERED; + } + if (!info || info->sType != REMIXAPI_STRUCT_TYPE_CAMERA_INFO) { + return REMIXAPI_ERROR_CODE_WRONG_ARGUMENTS; + } + std::lock_guard lock { s_mutex }; + remixDevice->EmitCs([cRtCamera = convert::toRtCamera(*info)](dxvk::DxvkContext* ctx) { + ctx->getCommonObjects()->getSceneManager() + .processExternalCamera(cRtCamera.type, cRtCamera.worldToView, cRtCamera.viewToProjection); + }); + return REMIXAPI_ERROR_CODE_SUCCESS; + } + + remixapi_ErrorCode REMIXAPI_CALL remixapi_DrawInstance( + const remixapi_InstanceInfo* info) { + dxvk::D3D9DeviceEx* remixDevice = tryAsDxvk(); + if (!remixDevice) { + return REMIXAPI_ERROR_CODE_REMIX_DEVICE_WAS_NOT_REGISTERED; + } + std::lock_guard lock { s_mutex }; + remixDevice->EmitCs([cRtDrawState = convert::toRtDrawState(*info)](dxvk::DxvkContext* dxvkCtx) mutable { + auto* ctx = static_cast(dxvkCtx); + ctx->commitExternalGeometryToRT(std::move(cRtDrawState)); + }); + return REMIXAPI_ERROR_CODE_SUCCESS; + } + + remixapi_ErrorCode REMIXAPI_CALL remixapi_CreateLight( + const remixapi_LightInfo* info, + remixapi_LightHandle* out_handle) { + dxvk::D3D9DeviceEx* remixDevice = tryAsDxvk(); + if (!remixDevice) { + return REMIXAPI_ERROR_CODE_REMIX_DEVICE_WAS_NOT_REGISTERED; + } + if (!out_handle || !info || info->sType != REMIXAPI_STRUCT_TYPE_LIGHT_INFO) { + return REMIXAPI_ERROR_CODE_WRONG_ARGUMENTS; + } + static_assert(sizeof(remixapi_LightHandle) == sizeof(info->hash)); + auto handle = reinterpret_cast(info->hash); + if (!handle) { + return REMIXAPI_ERROR_CODE_WRONG_ARGUMENTS; + } + + // async load + std::lock_guard lock { s_mutex }; + if (auto src = pnext::find(info)) { + // Special case for dome lights + remixDevice->EmitCs([cHandle = handle, + cRadiance = convert::tovec3(info->radiance), + cTransform = convert::tomat4(src->transform), + cTexturePath = convert::topath(src->colorTexture)] + (dxvk::DxvkContext* ctx) { + auto preloadTexture = [&ctx](const std::filesystem::path& path)->dxvk::TextureRef { + if (path.empty()) { + return {}; + } + auto assetData = dxvk::AssetDataManager::get().findAsset(path.string().c_str()); + if (assetData == nullptr) { + return {}; + } + auto uploadedTexture = ctx->getCommonObjects()->getTextureManager() + .preloadTextureAsset(assetData, dxvk::ColorSpace::AUTO, ctx, true); + return dxvk::TextureRef { uploadedTexture }; + }; + + dxvk::DomeLight domeLight; + domeLight.radiance = cRadiance; + domeLight.worldToLight = inverse(cTransform); + domeLight.texture = preloadTexture(cTexturePath); + + // Ensures a texture stays in VidMem + uint32_t unused; + ctx->getCommonObjects()->getSceneManager().trackTexture(ctx, domeLight.texture, unused, true, true); + + auto& lightMgr = ctx->getCommonObjects()->getSceneManager().getLightManager(); + lightMgr.addExternalDomeLight(cHandle, domeLight); + }); + } else { + // Regular analytical light handling + remixDevice->EmitCs([cHandle = handle, cRtLight = convert::toRtLight(*info)](dxvk::DxvkContext* ctx) { + auto& lightMgr = ctx->getCommonObjects()->getSceneManager().getLightManager(); + lightMgr.addExternalLight(cHandle, cRtLight); + }); + } + + *out_handle = handle; + return REMIXAPI_ERROR_CODE_SUCCESS; + } + + remixapi_ErrorCode REMIXAPI_CALL remixapi_DestroyLight( + remixapi_LightHandle handle) { + dxvk::D3D9DeviceEx* remixDevice = tryAsDxvk(); + if (!remixDevice) { + return REMIXAPI_ERROR_CODE_REMIX_DEVICE_WAS_NOT_REGISTERED; + } + std::lock_guard lock { s_mutex }; + remixDevice->EmitCs([cHandle = handle](dxvk::DxvkContext* ctx) { + auto& lightMgr = ctx->getCommonObjects()->getSceneManager().getLightManager(); + lightMgr.removeExternalLight(cHandle); + }); + return REMIXAPI_ERROR_CODE_SUCCESS; + } + + + remixapi_ErrorCode REMIXAPI_CALL remixapi_DrawLightInstance( + remixapi_LightHandle lightHandle) { + dxvk::D3D9DeviceEx* remixDevice = tryAsDxvk(); + if (!remixDevice) { + return REMIXAPI_ERROR_CODE_REMIX_DEVICE_WAS_NOT_REGISTERED; + } + if (!lightHandle) { + return REMIXAPI_ERROR_CODE_WRONG_ARGUMENTS; + } + + // async load + std::lock_guard lock { s_mutex }; + remixDevice->EmitCs([lightHandle](dxvk::DxvkContext* ctx) { + auto& lightMgr = ctx->getCommonObjects()->getSceneManager().getLightManager(); + lightMgr.addExternalLightInstance(lightHandle); + }); + + return REMIXAPI_ERROR_CODE_SUCCESS; + } + + + remixapi_ErrorCode REMIXAPI_CALL remixapi_SetConfigVariable( + const char* key, + const char* value) { + std::lock_guard lock { s_mutex }; + + if (!key || key[0] == '\0' || !value) { + return REMIXAPI_ERROR_CODE_WRONG_ARGUMENTS; + } + + auto& globalRtxOptions = dxvk::RtxOptionImpl::getGlobalRtxOptionMap(); + + auto found = globalRtxOptions.find(key); + if (found == globalRtxOptions.end()) { + return REMIXAPI_ERROR_CODE_GENERAL_FAILURE; + } + + dxvk::Config newSetting; + newSetting.setOption(key, std::string { value }); + found->second->readOption(newSetting, dxvk::RtxOptionImpl::ValueType::Value); + + // Make sure we dont step on required configs + sanitizeConfigs(); + return REMIXAPI_ERROR_CODE_SUCCESS; + } + + remixapi_ErrorCode REMIXAPI_CALL remixapi_dxvk_CreateD3D9( + remixapi_Bool disableSrgbConversionForOutput, + IDirect3D9Ex** out_pD3D9) { + if (s_dxvkD3D9) { + return REMIXAPI_ERROR_CODE_ALREADY_EXISTS; + } + IDirect3D9Ex* d3d9ex = nullptr; + auto hr = dxvk::CreateD3D9(true, &d3d9ex, true); + if (FAILED(hr) || !d3d9ex) { + return REMIXAPI_ERROR_CODE_GENERAL_FAILURE; + } + + sanitizeConfigs(); + if (disableSrgbConversionForOutput) { + dxvk::g_allowSrgbConversionForOutput = false; + } + + s_dxvkD3D9 = d3d9ex; + *out_pD3D9 = d3d9ex; + return REMIXAPI_ERROR_CODE_SUCCESS; + } + + remixapi_ErrorCode REMIXAPI_CALL remixapi_dxvk_RegisterD3D9Device( + IDirect3DDevice9Ex* d3d9Device) { + s_dxvkDevice = dynamic_cast(d3d9Device); + if (d3d9Device && !s_dxvkDevice) { + return REMIXAPI_ERROR_CODE_REGISTERING_NON_REMIX_D3D9_DEVICE; + } + dxvk::D3D9DeviceEx* remixDevice = tryAsDxvk(); + if (!remixDevice) { + return REMIXAPI_ERROR_CODE_REMIX_DEVICE_WAS_NOT_REGISTERED; + } + dxvk::Resources& resourceManager = remixDevice->GetDXVKDevice()->getCommon()->getResources(); + // request allocation of the images required for dxvk_CopyRenderingOutput(..) + resourceManager.requestObjectPickingImages(true); + return REMIXAPI_ERROR_CODE_SUCCESS; + } + + remixapi_ErrorCode REMIXAPI_CALL remixapi_dxvk_GetExternalSwapchain( + uint64_t* out_vkImage, + uint64_t* out_vkSemaphoreRenderingDone, + uint64_t* out_vkSemaphoreResumeSemaphore) { + dxvk::D3D9DeviceEx* remixDevice = tryAsDxvk(); + if (!remixDevice) { + return REMIXAPI_ERROR_CODE_REMIX_DEVICE_WAS_NOT_REGISTERED; + } + if (!out_vkImage || !out_vkSemaphoreRenderingDone || !out_vkSemaphoreResumeSemaphore) { + return REMIXAPI_ERROR_CODE_WRONG_ARGUMENTS; + } + if (auto pres = remixDevice->GetExternalPresenter()) { + *out_vkImage = reinterpret_cast(pres->GetVkImage(0)); + *out_vkSemaphoreRenderingDone = reinterpret_cast(pres->GetFrameCompleteVkSemaphore()); + *out_vkSemaphoreResumeSemaphore = reinterpret_cast(pres->GetFrameResumeVkSemaphore()); + return REMIXAPI_ERROR_CODE_SUCCESS; + } + return REMIXAPI_ERROR_CODE_GENERAL_FAILURE; + } + + remixapi_ErrorCode REMIXAPI_CALL remixapi_dxvk_GetVkImage( + IDirect3DSurface9* source, + uint64_t* out_vkImage) { + dxvk::D3D9DeviceEx* remixDevice = tryAsDxvk(); + if (!remixDevice) { + return REMIXAPI_ERROR_CODE_REMIX_DEVICE_WAS_NOT_REGISTERED; + } + if (!source || !out_vkImage) { + return REMIXAPI_ERROR_CODE_WRONG_ARGUMENTS; + } + + dxvk::D3D9Surface* surface = static_cast(source); + dxvk::D3D9CommonTexture* texInfo = surface ? surface->GetCommonTexture() : nullptr; + if (texInfo) { + *out_vkImage = reinterpret_cast(texInfo->GetImage()->handle()); + return REMIXAPI_ERROR_CODE_SUCCESS; + } + return REMIXAPI_ERROR_CODE_GENERAL_FAILURE; + } + + remixapi_ErrorCode REMIXAPI_CALL remixapi_dxvk_CopyRenderingOutput( + IDirect3DSurface9* destination, + remixapi_dxvk_CopyRenderingOutputType type) { + dxvk::D3D9DeviceEx* remixDevice = tryAsDxvk(); + if (!remixDevice) { + return REMIXAPI_ERROR_CODE_REMIX_DEVICE_WAS_NOT_REGISTERED; + } + if (!destination) { + return REMIXAPI_ERROR_CODE_WRONG_ARGUMENTS; + } + dxvk::D3D9Surface* destSurface = static_cast(destination); + dxvk::D3D9CommonTexture* destTexInfo = destSurface ? destSurface->GetCommonTexture() : nullptr; + if (!destTexInfo) { + return REMIXAPI_ERROR_CODE_GENERAL_FAILURE; + } + + dxvk::Resources& resourceManager = remixDevice->GetDXVKDevice()->getCommon()->getResources(); + const dxvk::Resources::RaytracingOutput& rtOutput = resourceManager.getRaytracingOutput(); + + dxvk::Rc srcImage = nullptr; + switch (type) { + case REMIXAPI_DXVK_COPY_RENDERING_OUTPUT_TYPE_FINAL_COLOR: + srcImage = rtOutput.m_finalOutput.image; + break; + case REMIXAPI_DXVK_COPY_RENDERING_OUTPUT_TYPE_DEPTH: + srcImage = rtOutput.m_primaryDepth.image; + break; + case REMIXAPI_DXVK_COPY_RENDERING_OUTPUT_TYPE_NORMALS: + srcImage = rtOutput.m_primaryWorldShadingNormal.image; + break; + case REMIXAPI_DXVK_COPY_RENDERING_OUTPUT_TYPE_OBJECT_PICKING: + srcImage = rtOutput.m_primaryObjectPicking.image; + break; + default: + break; + } + + if (srcImage.ptr() == nullptr) { + return REMIXAPI_ERROR_CODE_WRONG_ARGUMENTS; + } + + std::lock_guard lock { s_mutex }; + remixDevice->EmitCs([cDest = destTexInfo->GetImage(), cSrc = srcImage](dxvk::DxvkContext* dxvkCtx) { + auto* ctx = static_cast(dxvkCtx); + dxvk::RtxContext::blitImageHelper(ctx, cSrc, cDest, VkFilter::VK_FILTER_NEAREST); + }); + return REMIXAPI_ERROR_CODE_SUCCESS; + } + + bool isVersionCompatible(uint64_t userVersion) { + constexpr uint64_t compiledVersion = REMIXAPI_VERSION_MAKE(REMIXAPI_VERSION_MAJOR, REMIXAPI_VERSION_MINOR, REMIXAPI_VERSION_PATCH); + + bool isDevelopment = + REMIXAPI_VERSION_GET_MAJOR(userVersion) == 0 && + REMIXAPI_VERSION_GET_MAJOR(compiledVersion) == 0; + + if (isDevelopment) { + // each minor change is breaking + return REMIXAPI_VERSION_GET_MINOR(userVersion) == REMIXAPI_VERSION_GET_MINOR(compiledVersion); + } + + if (REMIXAPI_VERSION_GET_MAJOR(userVersion) == REMIXAPI_VERSION_GET_MAJOR(compiledVersion)) { + // user version must be before the currently compiled version: + // features that are requested by a user must be available in the current binary + if (REMIXAPI_VERSION_GET_MINOR(userVersion) <= REMIXAPI_VERSION_GET_MINOR(compiledVersion)) { + return true; + } + } + + return false; + } +} + +extern "C" +{ + REMIXAPI remixapi_ErrorCode REMIXAPI_CALL remixapi_InitializeLibrary(const remixapi_InitializeLibraryInfo* info, + remixapi_Interface* out_result) { + if (!info || info->sType != REMIXAPI_STRUCT_TYPE_INITIALIZE_LIBRARY_INFO) { + return REMIXAPI_ERROR_CODE_WRONG_ARGUMENTS; + } + if (!out_result) { + return REMIXAPI_ERROR_CODE_WRONG_ARGUMENTS; + } + if (!isVersionCompatible(info->version)) { + return REMIXAPI_ERROR_CODE_INCOMPATIBLE_VERSION; + } + + auto interf = remixapi_Interface {}; + { + interf.Shutdown = remixapi_Shutdown; + interf.CreateMaterial = remixapi_CreateMaterial; + interf.DestroyMaterial = remixapi_DestroyMaterial; + interf.CreateMesh = remixapi_CreateMesh; + interf.DestroyMesh = remixapi_DestroyMesh; + interf.SetupCamera = remixapi_SetupCamera; + interf.DrawInstance = remixapi_DrawInstance; + interf.CreateLight = remixapi_CreateLight; + interf.DestroyLight = remixapi_DestroyLight; + interf.DrawLightInstance = remixapi_DrawLightInstance; + interf.SetConfigVariable = remixapi_SetConfigVariable; + interf.dxvk_CreateD3D9 = remixapi_dxvk_CreateD3D9; + interf.dxvk_RegisterD3D9Device = remixapi_dxvk_RegisterD3D9Device; + interf.dxvk_GetExternalSwapchain = remixapi_dxvk_GetExternalSwapchain; + interf.dxvk_GetVkImage = remixapi_dxvk_GetVkImage; + interf.dxvk_CopyRenderingOutput = remixapi_dxvk_CopyRenderingOutput; + } + + *out_result = interf; + return REMIXAPI_ERROR_CODE_SUCCESS; + } +} diff --git a/src/dxvk/rtx_render/rtx_remix_pnext.h b/src/dxvk/rtx_render/rtx_remix_pnext.h new file mode 100644 index 000000000..2ab81723e --- /dev/null +++ b/src/dxvk/rtx_render/rtx_remix_pnext.h @@ -0,0 +1,164 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#pragma once + +#include + +#include +#include +#include + +namespace pnext{ + namespace detail { + template< typename... Types > + struct TypeList { }; + + struct AnyInfoPrototype { + remixapi_StructType sType; + void* pNext; + }; + + template< typename T > + auto getStructType(const T* info) noexcept -> remixapi_StructType { + if (info) { + return reinterpret_cast< const AnyInfoPrototype* >(info)->sType; + } + return REMIXAPI_STRUCT_TYPE_NONE; + } + + // If SourcePtr is const, then add const to TargetPtr + template< typename TargetPtr, typename Source > // requires(std::is_pointer_v< TargetPtr > && !std::is_pointer_v< Source >) + using TryConst = std::conditional_t< std::is_const_v< Source >, + std::add_pointer_t< std::add_const_t< std::remove_pointer_t< TargetPtr > > >, + std::add_pointer_t< std::remove_const_t< std::remove_pointer_t< TargetPtr > > > >; + + template< typename T > + TryConst< void*, T > getPNext(T* info) noexcept { + if (info) { + return reinterpret_cast< TryConst< AnyInfoPrototype*, T > >(info)->pNext; + } + return nullptr; + } + + template< typename T > + T* non_const(const T* v) { + return const_cast< T* >(v); + } + } +} + +// User-defined specializations for remixapi types +#include "rtx_remix_specialization.inl" + +namespace pnext { + namespace detail { + template< typename T > + using Underlying = std::remove_pointer_t< std::remove_reference_t< std::remove_cv_t< T > > >; + + template< typename T > + using RootOf = typename Root< T >::Type; + + template< typename EXT, typename Base > + constexpr bool CanBeLinkedTo = std::is_same_v< RootOf< Underlying< EXT > >, + Underlying< Base > >; + +#ifdef RTX_REMIX_PNEXT_CHECK_STRUCTS + namespace helper { + template< typename T, typename = void, typename = void > + struct checkMembers_t { + static constexpr bool hasMembers = false; + }; + + template< typename T > + struct checkMembers_t< T, std::void_t< decltype( T::sType ) >, + std::void_t< decltype( T::pNext ) > > { + static constexpr bool hasMembers = + std::is_same_v< decltype( T::sType ), remixapi_StructType > && + std::is_same_v< decltype( T::pNext), void* > && + offsetof(AnyInfoPrototype, sType) == offsetof(T, sType) && sizeof(T::sType) == sizeof(AnyInfoPrototype::sType) && + offsetof(AnyInfoPrototype, pNext) == offsetof(T, pNext) && sizeof(T::pNext) == sizeof(AnyInfoPrototype::pNext); + }; + + template< typename T > + constexpr bool HasSTypePNext = checkMembers_t< T >::hasMembers; + + + template< typename T, typename... Types > + struct hasUniqueId_t : std::true_type { }; + + template< typename T, typename U, typename... Rest > + struct hasUniqueId_t< T, TypeList< U, Rest... > > + // check that StructType enum is unique for types + : std::conditional_t< !std::is_same_v< T, U > && ToEnum< T > == ToEnum< U >, + std::false_type, // found a duplicate + hasUniqueId_t< T, TypeList< Rest... > > > // continue + { }; + + template< typename T > + constexpr bool HasUniqueId = hasUniqueId_t< T, AllTypes >::value; + + + template< typename T > + constexpr void checkStruct() { + static_assert(ToEnum< T > != REMIXAPI_STRUCT_TYPE_NONE, "ToEnum must be specialized for this type"); + static_assert(std::is_same_v< decltype(ToEnum< T >), const remixapi_StructType >, "ToEnum must be remixapi_StructType"); + static_assert(HasSTypePNext< T >, "Struct must contain sType (remixapi_StructType) and pNext (void*)"); + static_assert(HasUniqueId< T >, "Please, recheck StructType enum for duplicates"); + } + + template < typename... Types > + constexpr bool checkAllTypes(TypeList< Types... >) + { + (checkStruct< Types >(), ...); + return true; + } + + static_assert(checkAllTypes(AllTypes {})); + } +#endif + } + + template< typename T, + typename Root, + std::enable_if_t< + detail::ToEnum< T > != REMIXAPI_STRUCT_TYPE_NONE && detail::CanBeLinkedTo< T, Root > + , int > = 0 > + detail::TryConst< T*, Root > find(Root* listStart) noexcept { + // NOTE: if compilation fails here, please ensure the structure is defined in rules: rtx_remix_specialization.inl + auto next = static_cast< detail::TryConst< void*, Root > >(listStart); + while (next) { + remixapi_StructType sType = detail::getStructType(next); + if (sType == detail::ToEnum< T >) { + return static_cast< detail::TryConst< T*, Root > >(next); + } + if (sType == REMIXAPI_STRUCT_TYPE_NONE) { + // debug::Error( "Found sType=REMIXAPI_STRUCT_TYPE_NONE on {:#x}", uint64_t( next ) ); + assert(0); + break; + } + next = detail::getPNext(next); + } + + return nullptr; + } +} // namespace pnext diff --git a/src/dxvk/rtx_render/rtx_remix_specialization.inl b/src/dxvk/rtx_render/rtx_remix_specialization.inl new file mode 100644 index 000000000..eeb9cd9d2 --- /dev/null +++ b/src/dxvk/rtx_render/rtx_remix_specialization.inl @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +namespace pnext::detail { + + // NOTE: To add a new type: + // 1) Add the type to 'AllTypes'. + // 2) Add 'ToEnum' specifying the struct type and corresponding remixapi_StructType. + // 3) If the new type is an extension + // (i.e. can be put linked into 'pNext' chain of a parent type) + // (e.g. 'remixapi_LightInfoSphereEXT' is an extension of 'remixapi_LightInfo'): + // * Then add 'Root' specifying the extension type and root (parent) type. + + // clang-format off + using AllTypes = TypeList< + remixapi_MaterialInfo, + remixapi_MaterialInfoPortalEXT, + remixapi_MaterialInfoTranslucentEXT, + remixapi_MaterialInfoOpaqueEXT, + remixapi_MaterialInfoOpaqueSubsurfaceEXT, + remixapi_LightInfoSphereEXT, + remixapi_LightInfoRectEXT, + remixapi_LightInfoDiskEXT, + remixapi_LightInfoCylinderEXT, + remixapi_LightInfoDistantEXT, + remixapi_LightInfoDomeEXT, + remixapi_LightInfo, + remixapi_MeshInfo, + remixapi_InstanceInfo, + remixapi_InstanceInfoBoneTransformsEXT, + remixapi_InstanceInfoBlendEXT, + remixapi_InstanceInfoObjectPickingEXT, + remixapi_CameraInfo, + remixapi_CameraInfoParameterizedEXT + >; + + template< typename T > constexpr remixapi_StructType ToEnum = REMIXAPI_STRUCT_TYPE_NONE; + template<> constexpr auto ToEnum< remixapi_MaterialInfo > = REMIXAPI_STRUCT_TYPE_MATERIAL_INFO; + template<> constexpr auto ToEnum< remixapi_MaterialInfoPortalEXT > = REMIXAPI_STRUCT_TYPE_MATERIAL_INFO_PORTAL_EXT; + template<> constexpr auto ToEnum< remixapi_MaterialInfoTranslucentEXT > = REMIXAPI_STRUCT_TYPE_MATERIAL_INFO_TRANSLUCENT_EXT; + template<> constexpr auto ToEnum< remixapi_MaterialInfoOpaqueEXT > = REMIXAPI_STRUCT_TYPE_MATERIAL_INFO_OPAQUE_EXT; + template<> constexpr auto ToEnum< remixapi_MaterialInfoOpaqueSubsurfaceEXT> = REMIXAPI_STRUCT_TYPE_MATERIAL_INFO_OPAQUE_SUBSURFACE_EXT; + template<> constexpr auto ToEnum< remixapi_LightInfoSphereEXT > = REMIXAPI_STRUCT_TYPE_LIGHT_INFO_SPHERE_EXT; + template<> constexpr auto ToEnum< remixapi_LightInfoRectEXT > = REMIXAPI_STRUCT_TYPE_LIGHT_INFO_RECT_EXT; + template<> constexpr auto ToEnum< remixapi_LightInfoDiskEXT > = REMIXAPI_STRUCT_TYPE_LIGHT_INFO_DISK_EXT; + template<> constexpr auto ToEnum< remixapi_LightInfoCylinderEXT > = REMIXAPI_STRUCT_TYPE_LIGHT_INFO_CYLINDER_EXT; + template<> constexpr auto ToEnum< remixapi_LightInfoDistantEXT > = REMIXAPI_STRUCT_TYPE_LIGHT_INFO_DISTANT_EXT; + template<> constexpr auto ToEnum< remixapi_LightInfoDomeEXT > = REMIXAPI_STRUCT_TYPE_LIGHT_INFO_DOME_EXT; + template<> constexpr auto ToEnum< remixapi_LightInfo > = REMIXAPI_STRUCT_TYPE_LIGHT_INFO; + template<> constexpr auto ToEnum< remixapi_MeshInfo > = REMIXAPI_STRUCT_TYPE_MESH_INFO; + template<> constexpr auto ToEnum< remixapi_InstanceInfo > = REMIXAPI_STRUCT_TYPE_INSTANCE_INFO; + template<> constexpr auto ToEnum< remixapi_InstanceInfoBoneTransformsEXT > = REMIXAPI_STRUCT_TYPE_INSTANCE_INFO_BONE_TRANSFORMS_EXT; + template<> constexpr auto ToEnum< remixapi_InstanceInfoBlendEXT > = REMIXAPI_STRUCT_TYPE_INSTANCE_INFO_BLEND_EXT; + template<> constexpr auto ToEnum< remixapi_InstanceInfoObjectPickingEXT > = REMIXAPI_STRUCT_TYPE_INSTANCE_INFO_OBJECT_PICKING_EXT; + template<> constexpr auto ToEnum< remixapi_CameraInfo > = REMIXAPI_STRUCT_TYPE_CAMERA_INFO; + template<> constexpr auto ToEnum< remixapi_CameraInfoParameterizedEXT > = REMIXAPI_STRUCT_TYPE_CAMERA_INFO_PARAMETERIZED_EXT; + + template< typename T > struct Root { using Type = T; /* by default, a root is self */ }; + template<> struct Root< remixapi_MaterialInfoPortalEXT >{ using Type = remixapi_MaterialInfo; }; + template<> struct Root< remixapi_MaterialInfoTranslucentEXT >{ using Type = remixapi_MaterialInfo; }; + template<> struct Root< remixapi_MaterialInfoOpaqueEXT >{ using Type = remixapi_MaterialInfo; }; + template<> struct Root< remixapi_MaterialInfoOpaqueSubsurfaceEXT>{ using Type = remixapi_MaterialInfo; }; + template<> struct Root< remixapi_LightInfoSphereEXT >{ using Type = remixapi_LightInfo; }; + template<> struct Root< remixapi_LightInfoRectEXT >{ using Type = remixapi_LightInfo; }; + template<> struct Root< remixapi_LightInfoDiskEXT >{ using Type = remixapi_LightInfo; }; + template<> struct Root< remixapi_LightInfoCylinderEXT >{ using Type = remixapi_LightInfo; }; + template<> struct Root< remixapi_LightInfoDistantEXT >{ using Type = remixapi_LightInfo; }; + template<> struct Root< remixapi_LightInfoDomeEXT >{ using Type = remixapi_LightInfo; }; + template<> struct Root< remixapi_InstanceInfoBoneTransformsEXT >{ using Type = remixapi_InstanceInfo; }; + template<> struct Root< remixapi_InstanceInfoBlendEXT >{ using Type = remixapi_InstanceInfo; }; + template<> struct Root< remixapi_InstanceInfoObjectPickingEXT >{ using Type = remixapi_InstanceInfo; }; + template<> struct Root< remixapi_CameraInfoParameterizedEXT >{ using Type = remixapi_CameraInfo; }; + // clang-format on +} diff --git a/src/dxvk/rtx_render/rtx_resources.cpp b/src/dxvk/rtx_render/rtx_resources.cpp index dc523cca8..915bc1a49 100644 --- a/src/dxvk/rtx_render/rtx_resources.cpp +++ b/src/dxvk/rtx_render/rtx_resources.cpp @@ -292,6 +292,8 @@ namespace dxvk { void Resources::createRaytracingOutput(Rc& ctx, const VkExtent3D& downscaledExtent, const VkExtent3D& targetExtent) { ScopedCpuProfileZone(); + assert(targetExtent.width > 0 && targetExtent.height > 0 && targetExtent.depth > 0); + if (m_downscaledExtent != downscaledExtent) { m_downscaledExtent = downscaledExtent; @@ -795,6 +797,10 @@ namespace dxvk { m_raytracingOutput.m_primaryRtxdiTemporalPosition = createImageResource(ctx, "primary rtxdi temporal position", m_downscaledExtent, VK_FORMAT_R32_UINT); m_raytracingOutput.m_primarySurfaceFlags = createImageResource(ctx, "primary surface flags", m_downscaledExtent, VK_FORMAT_R8_UINT); m_raytracingOutput.m_primaryDisocclusionThresholdMix = createImageResource(ctx, "primary disocclusion threshold mix", m_downscaledExtent, VK_FORMAT_R8_UNORM); + m_raytracingOutput.m_sharedSubsurfaceData = createImageResource(ctx, "primary subsurface material buffer", m_downscaledExtent, VK_FORMAT_R16G16B16A16_UINT); + if (m_objectPickingImagesRequired) { + m_raytracingOutput.m_primaryObjectPicking = createImageResource(ctx, "primary object picking", m_downscaledExtent, VK_FORMAT_R32_UINT); + } m_raytracingOutput.m_secondaryAttenuation = createImageResource(ctx, "secondary attenuation", m_downscaledExtent, VK_FORMAT_R32_UINT); m_raytracingOutput.m_secondaryWorldShadingNormal = createImageResource(ctx, "secondary world shading normal", m_downscaledExtent, VK_FORMAT_R32_UINT); @@ -893,7 +899,7 @@ namespace dxvk { m_raytracingOutput.m_neeCacheThreadTask = createImageResource(ctx, "radiance cache thread task", m_downscaledExtent, VK_FORMAT_R32G32_UINT); // Displacement - m_raytracingOutput.m_displacementTextureCoord = createImageResource(ctx, "displacement texture coordinate", m_downscaledExtent, VK_FORMAT_R32G32_SFLOAT); + m_raytracingOutput.m_sharedTextureCoord = createImageResource(ctx, "displacement texture coordinate", m_downscaledExtent, VK_FORMAT_R32G32_SFLOAT); // Post Effect motion blur prefilter intermediate textures m_raytracingOutput.m_primarySurfaceFlagsIntermediateTexture1 = AliasedResource(m_raytracingOutput.m_secondaryPerceptualRoughness, ctx, m_downscaledExtent, VK_FORMAT_R8_UINT, "Primary Surface Flags Intermediate Texture 1"); diff --git a/src/dxvk/rtx_render/rtx_resources.h b/src/dxvk/rtx_render/rtx_resources.h index eaa240324..f08c5fc53 100644 --- a/src/dxvk/rtx_render/rtx_resources.h +++ b/src/dxvk/rtx_render/rtx_resources.h @@ -243,6 +243,7 @@ namespace dxvk Resource m_sharedMediumMaterialIndex; AliasedResource m_sharedBiasCurrentColorMask; Resource m_sharedSurfaceIndex; + Resource m_sharedSubsurfaceData; Resource m_primaryAttenuation; Resource m_primaryWorldShadingNormal; @@ -268,7 +269,8 @@ namespace dxvk Resource m_primaryRtxdiTemporalPosition; Resource m_primarySurfaceFlags; Resource m_primaryDisocclusionThresholdMix; // for NRD - + Resource m_primaryObjectPicking; + Resource m_secondaryAttenuation; Resource m_secondaryWorldShadingNormal; AliasedResource m_secondaryPerceptualRoughness; @@ -335,8 +337,8 @@ namespace dxvk Rc m_neeCacheTask; Rc m_neeCacheSample; Resource m_neeCacheThreadTask; - - Resource m_displacementTextureCoord; + + Resource m_sharedTextureCoord; VkExtent3D m_froxelVolumeExtent; uint32_t m_numFroxelVolumes; @@ -416,6 +418,10 @@ namespace dxvk const VkExtent3D& getTargetDimensions() const { return m_targetExtent; } const VkExtent3D& getDownscaleDimensions() const { return m_downscaledExtent; } + void requestObjectPickingImages(bool enable) { + m_objectPickingImagesRequired = enable; + } + static const uint32_t kInvalidFormatCompatibilityCategoryIndex = UINT32_MAX; static uint32_t getFormatCompatibilityCategoryIndex(const VkFormat format); static bool areFormatsCompatible(const VkFormat format1, const VkFormat format2); @@ -452,6 +458,8 @@ namespace dxvk VkExtent3D m_downscaledExtent = { 0, 0, 0 }; VkExtent3D m_targetExtent = { 0, 0, 0 }; + bool m_objectPickingImagesRequired = false; + using ResizeEventList = std::vector>; using FrameBeginEventList = std::vector>; ResizeEventList m_onTargetResize; diff --git a/src/dxvk/rtx_render/rtx_restir_gi_rayquery.cpp b/src/dxvk/rtx_render/rtx_restir_gi_rayquery.cpp index f976aee3d..0a1809bec 100644 --- a/src/dxvk/rtx_render/rtx_restir_gi_rayquery.cpp +++ b/src/dxvk/rtx_render/rtx_restir_gi_rayquery.cpp @@ -76,6 +76,7 @@ namespace dxvk { TEXTURE2D(RESTIR_GI_REUSE_BINDING_POSITION_ERROR_INPUT) TEXTURE2D(RESTIR_GI_REUSE_BINDING_SHARED_FLAGS_INPUT) TEXTURE2D(RESTIR_GI_REUSE_BINDING_SHARED_SURFACE_INDEX_INPUT) + TEXTURE2D(RESTIR_GI_REUSE_BINDING_SUBSURFACE_DATA_INPUT) RW_TEXTURE2D(RESTIR_GI_REUSE_BINDING_LAST_GBUFFER) END_PARAMETER() }; @@ -109,6 +110,7 @@ namespace dxvk { TEXTURE2D(RESTIR_GI_REUSE_BINDING_POSITION_ERROR_INPUT) TEXTURE2D(RESTIR_GI_REUSE_BINDING_SHARED_FLAGS_INPUT) TEXTURE2D(RESTIR_GI_REUSE_BINDING_SHARED_SURFACE_INDEX_INPUT) + TEXTURE2D(RESTIR_GI_REUSE_BINDING_SUBSURFACE_DATA_INPUT) RW_TEXTURE2D(RESTIR_GI_REUSE_BINDING_LAST_GBUFFER) END_PARAMETER() }; @@ -126,8 +128,9 @@ namespace dxvk { TEXTURE2D(RESTIR_GI_FINAL_SHADING_BINDING_SHARED_FLAGS_INPUT) TEXTURE2D(RESTIR_GI_FINAL_SHADING_BINDING_SHARED_MATERIAL_DATA0_INPUT) TEXTURE2D(RESTIR_GI_FINAL_SHADING_BINDING_SHARED_MATERIAL_DATA1_INPUT) - RW_TEXTURE2D(RESTIR_GI_FINAL_SHADING_BINDING_DISPLACEMENT_TEXTURE_COORD_INPUT) - RW_TEXTURE2D(RESTIR_GI_FINAL_SHADING_BINDING_SHARED_SURFACE_INDEX_INPUT) + TEXTURE2D(RESTIR_GI_FINAL_SHADING_BINDING_SHARED_TEXTURE_COORD_INPUT) + TEXTURE2D(RESTIR_GI_FINAL_SHADING_BINDING_SHARED_SURFACE_INDEX_INPUT) + TEXTURE2D(RESTIR_GI_FINAL_SHADING_BINDING_SHARED_SUBSURFACE_DATA_INPUT) TEXTURE2D(RESTIR_GI_FINAL_SHADING_BINDING_PRIMARY_WORLD_SHADING_NORMAL_INPUT) TEXTURE2D(RESTIR_GI_FINAL_SHADING_BINDING_PRIMARY_WORLD_INTERPOLATED_NORMAL_INPUT) @@ -221,6 +224,7 @@ namespace dxvk { ctx->bindResourceView(RESTIR_GI_REUSE_BINDING_HIT_GEOMETRY_INPUT, rtOutput.m_restirGIHitGeometry.view, nullptr); ctx->bindResourceView(RESTIR_GI_REUSE_BINDING_POSITION_ERROR_INPUT, rtOutput.m_primaryPositionError.view, nullptr); ctx->bindResourceView(RESTIR_GI_REUSE_BINDING_SHARED_SURFACE_INDEX_INPUT, rtOutput.m_sharedSurfaceIndex.view, nullptr); + ctx->bindResourceView(RESTIR_GI_REUSE_BINDING_SUBSURFACE_DATA_INPUT, rtOutput.m_sharedSubsurfaceData.view, nullptr); ctx->bindResourceView(RESTIR_GI_REUSE_BINDING_SHARED_FLAGS_INPUT, rtOutput.m_sharedFlags.view, nullptr); ctx->bindShader(VK_SHADER_STAGE_COMPUTE_BIT, ReSTIRGITemporalReuseShader::getShader()); @@ -246,6 +250,7 @@ namespace dxvk { ctx->bindResourceView(RESTIR_GI_REUSE_BINDING_HIT_GEOMETRY_INPUT, rtOutput.m_restirGIHitGeometry.view, nullptr); ctx->bindResourceView(RESTIR_GI_REUSE_BINDING_POSITION_ERROR_INPUT, rtOutput.m_primaryPositionError.view, nullptr); ctx->bindResourceView(RESTIR_GI_REUSE_BINDING_SHARED_SURFACE_INDEX_INPUT, rtOutput.m_sharedSurfaceIndex.view, nullptr); + ctx->bindResourceView(RESTIR_GI_REUSE_BINDING_SUBSURFACE_DATA_INPUT, rtOutput.m_sharedSubsurfaceData.view, nullptr); ctx->bindResourceView(RESTIR_GI_REUSE_BINDING_SHARED_FLAGS_INPUT, rtOutput.m_sharedFlags.view, nullptr); ctx->bindShader(VK_SHADER_STAGE_COMPUTE_BIT, ReSTIRGISpatialReuseShader::getShader()); @@ -260,8 +265,9 @@ namespace dxvk { ctx->bindResourceView(RESTIR_GI_FINAL_SHADING_BINDING_SHARED_FLAGS_INPUT, rtOutput.m_sharedFlags.view, nullptr); ctx->bindResourceView(RESTIR_GI_FINAL_SHADING_BINDING_SHARED_MATERIAL_DATA0_INPUT, rtOutput.m_sharedMaterialData0.view, nullptr); ctx->bindResourceView(RESTIR_GI_FINAL_SHADING_BINDING_SHARED_MATERIAL_DATA1_INPUT, rtOutput.m_sharedMaterialData1.view, nullptr); - ctx->bindResourceView(RESTIR_GI_FINAL_SHADING_BINDING_DISPLACEMENT_TEXTURE_COORD_INPUT, rtOutput.m_displacementTextureCoord.view, nullptr); + ctx->bindResourceView(RESTIR_GI_FINAL_SHADING_BINDING_SHARED_TEXTURE_COORD_INPUT, rtOutput.m_sharedTextureCoord.view, nullptr); ctx->bindResourceView(RESTIR_GI_FINAL_SHADING_BINDING_SHARED_SURFACE_INDEX_INPUT, rtOutput.m_sharedSurfaceIndex.view, nullptr); + ctx->bindResourceView(RESTIR_GI_FINAL_SHADING_BINDING_SHARED_SUBSURFACE_DATA_INPUT, rtOutput.m_sharedSubsurfaceData.view, nullptr); ctx->bindResourceView(RESTIR_GI_FINAL_SHADING_BINDING_PRIMARY_WORLD_SHADING_NORMAL_INPUT, rtOutput.m_primaryWorldShadingNormal.view, nullptr); ctx->bindResourceView(RESTIR_GI_FINAL_SHADING_BINDING_PRIMARY_WORLD_INTERPOLATED_NORMAL_INPUT, rtOutput.m_primaryWorldInterpolatedNormal.view, nullptr); diff --git a/src/dxvk/rtx_render/rtx_rtxdi_rayquery.cpp b/src/dxvk/rtx_render/rtx_rtxdi_rayquery.cpp index 3f6c7dd6a..8799d9726 100644 --- a/src/dxvk/rtx_render/rtx_rtxdi_rayquery.cpp +++ b/src/dxvk/rtx_render/rtx_rtxdi_rayquery.cpp @@ -70,6 +70,7 @@ namespace dxvk { TEXTURE2D(RTXDI_REUSE_BINDING_SS_MVEC_INPUT) TEXTURE2D(RTXDI_REUSE_BINDING_POSITION_ERROR_INPUT) TEXTURE2D(RTXDI_REUSE_BINDING_SHARED_SURFACE_INDEX_INPUT) + TEXTURE2D(RTXDI_REUSE_BINDING_SUBSURFACE_DATA_INPUT) TEXTURE2D(RTXDI_REUSE_BINDING_SHARED_FLAGS_INPUT) RW_TEXTURE2D(RTXDI_REUSE_BINDING_LAST_GBUFFER) RW_TEXTURE2D(RTXDI_REUSE_BINDING_REPROJECTION_CONFIDENCE_OUTPUT) @@ -105,6 +106,7 @@ namespace dxvk { TEXTURE2D(RTXDI_REUSE_BINDING_SS_MVEC_INPUT) TEXTURE2D(RTXDI_REUSE_BINDING_POSITION_ERROR_INPUT) TEXTURE2D(RTXDI_REUSE_BINDING_SHARED_SURFACE_INDEX_INPUT) + TEXTURE2D(RTXDI_REUSE_BINDING_SUBSURFACE_DATA_INPUT) TEXTURE2D(RTXDI_REUSE_BINDING_SHARED_FLAGS_INPUT) RW_TEXTURE2D(RTXDI_REUSE_BINDING_LAST_GBUFFER) RW_TEXTURE2D(RTXDI_REUSE_BINDING_REPROJECTION_CONFIDENCE_OUTPUT) @@ -256,6 +258,7 @@ namespace dxvk { ctx->bindResourceView(RTXDI_REUSE_BINDING_LAST_GBUFFER, rtOutput.m_gbufferLast.view, nullptr); ctx->bindResourceView(RTXDI_REUSE_BINDING_REPROJECTION_CONFIDENCE_OUTPUT, rtOutput.m_reprojectionConfidence.view, nullptr); ctx->bindResourceView(RTXDI_REUSE_BINDING_SHARED_SURFACE_INDEX_INPUT, rtOutput.m_sharedSurfaceIndex.view, nullptr); + ctx->bindResourceView(RTXDI_REUSE_BINDING_SUBSURFACE_DATA_INPUT, rtOutput.m_sharedSubsurfaceData.view, nullptr); ctx->bindResourceView(RTXDI_REUSE_BINDING_SHARED_FLAGS_INPUT, rtOutput.m_sharedFlags.view, nullptr); ctx->bindResourceView(RTXDI_REUSE_BINDING_BSDF_FACTOR_OUTPUT, rtOutput.m_bsdfFactor.view, nullptr); ctx->bindResourceView(RTXDI_REUSE_BINDING_TEMPORAL_POSITION_OUTPUT, rtOutput.m_primaryRtxdiTemporalPosition.view, nullptr); @@ -285,6 +288,7 @@ namespace dxvk { ctx->bindResourceView(RTXDI_REUSE_BINDING_LAST_GBUFFER, rtOutput.m_gbufferLast.view, nullptr); ctx->bindResourceView(RTXDI_REUSE_BINDING_REPROJECTION_CONFIDENCE_OUTPUT, rtOutput.m_reprojectionConfidence.view, nullptr); ctx->bindResourceView(RTXDI_REUSE_BINDING_SHARED_SURFACE_INDEX_INPUT, rtOutput.m_sharedSurfaceIndex.view, nullptr); + ctx->bindResourceView(RTXDI_REUSE_BINDING_SUBSURFACE_DATA_INPUT, rtOutput.m_sharedSubsurfaceData.view, nullptr); ctx->bindResourceView(RTXDI_REUSE_BINDING_SHARED_FLAGS_INPUT, rtOutput.m_sharedFlags.view, nullptr); ctx->bindResourceView(RTXDI_REUSE_BINDING_BSDF_FACTOR_OUTPUT, rtOutput.m_bsdfFactor.view, nullptr); ctx->bindResourceView(RTXDI_REUSE_BINDING_TEMPORAL_POSITION_OUTPUT, rtOutput.m_primaryRtxdiTemporalPosition.view, nullptr); diff --git a/src/dxvk/rtx_render/rtx_scene_manager.cpp b/src/dxvk/rtx_render/rtx_scene_manager.cpp index b5dbaa685..f487d1977 100644 --- a/src/dxvk/rtx_render/rtx_scene_manager.cpp +++ b/src/dxvk/rtx_render/rtx_scene_manager.cpp @@ -476,6 +476,8 @@ namespace dxvk { replacementMaterial.emplace(MaterialData(*pReplacementMaterial)); // merge in the input material from game replacementMaterial->mergeLegacyMaterial(input.getMaterialData()); + // mark material as replacement so we know how to handle sampler state + replacementMaterial->setReplacement(); // bind as a material override for this draw overrideMaterialData = &replacementMaterial.value(); } @@ -523,7 +525,7 @@ namespace dxvk { if (overrideMaterialData == nullptr) { // Note: Color texture used as mask texture for the Ray Portal - rayPortalMaterialData.emplace(RayPortalMaterialData { input.getMaterialData().getColorTexture(), texture2, static_cast(rayPortalTextureIndex), 1, 1, 0, 0.f,true, 1.f }); + rayPortalMaterialData.emplace(RayPortalMaterialData { input.getMaterialData().getColorTexture(), texture2, static_cast(rayPortalTextureIndex), 1, 1, 0, 0.f,true, 1.f, 0, 0, 0 }); // Note: A bit dirty but since we use a pointer to the material data in processDrawCallState, we need a pointer to this locally created one on the // stack in a place that doesn't go out of scope without actually allocating any heap memory. @@ -537,8 +539,9 @@ namespace dxvk { const bool highlightUnsafeAnchor = RtxOptions::Get()->getHighlightUnsafeAnchorModeEnabled() && input.getGeometryData().indexBuffer.defined() && input.getGeometryData().vertexCount > input.getGeometryData().indexCount; if (highlightUnsafeAnchor) { - static MaterialData sHighlightMaterialData(OpaqueMaterialData(TextureRef(), TextureRef(), TextureRef(), TextureRef(), TextureRef(), TextureRef(), TextureRef(), - 0.f, 1.f, Vector3(0.2f, 0.2f, 0.2f), 1.0f, 0.1f, 0.1f, Vector3(0.46f, 0.26f, 0.31f), true, 1, 1, 0, false, false, 200.f, true, false, BlendType::kAlpha, false, AlphaTestType::kAlways, 0, 0.0f, Vector3(), 0.0f, Vector3(), 0.0f)); + static MaterialData sHighlightMaterialData(OpaqueMaterialData(TextureRef(), TextureRef(), TextureRef(), TextureRef(), TextureRef(), TextureRef(), TextureRef(), TextureRef(), TextureRef(), TextureRef(), + 0.f, 1.f, Vector3(0.2f, 0.2f, 0.2f), 1.0f, 0.1f, 0.1f, Vector3(0.46f, 0.26f, 0.31f), true, 1, 1, 0, false, false, 200.f, true, false, BlendType::kAlpha, false, AlphaTestType::kAlways, 0, 0.0f, Vector3(), 0.0f, Vector3(), 0.0f, + lss::Mdl::Filter::Nearest, lss::Mdl::WrapMode::Repeat, lss::Mdl::WrapMode::Repeat)); overrideMaterialData = &sHighlightMaterialData; } @@ -640,8 +643,9 @@ namespace dxvk { overrideMaterialData = replacement.materialData; } if (highlightUnsafeReplacement) { - static MaterialData sHighlightMaterialData(OpaqueMaterialData(TextureRef(), TextureRef(), TextureRef(), TextureRef(), TextureRef(), TextureRef(), TextureRef(), - 0.f, 1.f, Vector3(0.2f, 0.2f, 0.2f), 1.f, 0.1f, 0.1f, Vector3(1.f, 0.f, 0.f), true, 1, 1, 0, false, false, 200.f, true, false, BlendType::kAlpha, false, AlphaTestType::kAlways, 0, 0.0f, Vector3(), 0.0f, Vector3(), 0.0f)); + static MaterialData sHighlightMaterialData(OpaqueMaterialData(TextureRef(), TextureRef(), TextureRef(), TextureRef(), TextureRef(), TextureRef(), TextureRef(), TextureRef(), TextureRef(), TextureRef(), + 0.f, 1.f, Vector3(0.2f, 0.2f, 0.2f), 1.f, 0.1f, 0.1f, Vector3(1.f, 0.f, 0.f), true, 1, 1, 0, false, false, 200.f, true, false, BlendType::kAlpha, false, AlphaTestType::kAlways, 0, 0.0f, Vector3(), 0.0f, Vector3(), 0.0f, + lss::Mdl::Filter::Nearest, lss::Mdl::WrapMode::Repeat, lss::Mdl::WrapMode::Repeat)); if (getGameTimeSinceStartMS() / 200 % 2 == 0) { overrideMaterialData = &sHighlightMaterialData; } @@ -806,7 +810,9 @@ namespace dxvk { uint64_t SceneManager::processDrawCallState(Rc ctx, const DrawCallState& drawCallState, const MaterialData* overrideMaterialData) { ScopedCpuProfileZone(); - const MaterialData& renderMaterialData = overrideMaterialData != nullptr ? *overrideMaterialData : drawCallState.getMaterialData(); + const bool usingOverrideMaterial = overrideMaterialData != nullptr; + const MaterialData& renderMaterialData = + usingOverrideMaterial ? *overrideMaterialData : drawCallState.getMaterialData(); if (renderMaterialData.getIgnored()) { return UINT64_MAX; } @@ -841,10 +847,24 @@ namespace dxvk { // Legacy and replacement materials should follow same filtering but due to lack of override capability per texture // legacy textures use original sampler to stay true to the original intent while replacements use more advanced filtering // for better quality by default. - uint32_t samplerIndex; - trackSampler(drawCallState.getMaterialData().getSampler(), (renderMaterialDataType != MaterialDataType::Legacy), samplerIndex); + Rc originalSampler = drawCallState.getMaterialData().getSampler(); // convenience variable for debug + Rc sampler = originalSampler; + const bool isLegacyMaterial = (renderMaterialDataType == MaterialDataType::Legacy); + // If the original sampler if valid and the new rendering material is not legacy type + // go ahead with patching and maybe merging the sampler states + if(originalSampler != nullptr && !isLegacyMaterial) { + DxvkSamplerCreateInfo samplerInfo = originalSampler->info(); // Use sampler create info struct as convenience + // Only merge prior to patching if this is a replacement material + if(renderMaterialData.isReplacement()) { + renderMaterialData.populateSamplerInfo(samplerInfo); + } + sampler = patchSampler(samplerInfo.magFilter, + samplerInfo.addressModeU, samplerInfo.addressModeV, samplerInfo.addressModeW, + samplerInfo.borderColor); + } + uint32_t samplerIndex = trackSampler(sampler); - if (renderMaterialDataType == MaterialDataType::Legacy || renderMaterialDataType == MaterialDataType::Opaque) { + if (isLegacyMaterial || renderMaterialDataType == MaterialDataType::Opaque) { uint32_t albedoOpacityTextureIndex = kSurfaceMaterialInvalidTextureIndex; uint32_t normalTextureIndex = kSurfaceMaterialInvalidTextureIndex; uint32_t tangentTextureIndex = kSurfaceMaterialInvalidTextureIndex; @@ -853,6 +873,9 @@ namespace dxvk { uint32_t metallicTextureIndex = kSurfaceMaterialInvalidTextureIndex; uint32_t emissiveColorTextureIndex = kSurfaceMaterialInvalidTextureIndex; uint32_t subsurfaceMaterialIndex = kSurfaceMaterialInvalidTextureIndex; + uint32_t subsurfaceTransmittanceTextureIndex = kSurfaceMaterialInvalidTextureIndex; + uint32_t subsurfaceThicknessTextureIndex = kSurfaceMaterialInvalidTextureIndex; + uint32_t subsurfaceSingleScatteringAlbedoTextureIndex = kSurfaceMaterialInvalidTextureIndex; float anisotropy; float emissiveIntensity; @@ -950,13 +973,25 @@ namespace dxvk { ++m_activePOMCount; } - subsurfaceTransmittanceColor = opaqueMaterialData.getSubsurfaceTransmittanceColor(); subsurfaceMeasurementDistance = opaqueMaterialData.getSubsurfaceMeasurementDistance() * RtxOptions::SubsurfaceScattering::surfaceThicknessScale(); - subsurfaceSingleScatteringAlbedo = opaqueMaterialData.getSubsurfaceSingleScatteringAlbedo(); - subsurfaceVolumetricAnisotropy = opaqueMaterialData.getSubsurfaceVolumetricAnisotropy(); - if (RtxOptions::SubsurfaceScattering::enableThinOpaque() && subsurfaceMeasurementDistance > 0.0f) { + if (RtxOptions::SubsurfaceScattering::enableTextureMaps()) { + trackTexture(ctx, opaqueMaterialData.getSubsurfaceThicknessTexture(), subsurfaceThicknessTextureIndex, hasTexcoords); + } + + if (RtxOptions::SubsurfaceScattering::enableThinOpaque() && + (subsurfaceMeasurementDistance > 0.0f || subsurfaceTransmittanceTextureIndex != kSurfaceMaterialInvalidTextureIndex)) { + subsurfaceTransmittanceColor = opaqueMaterialData.getSubsurfaceTransmittanceColor(); + subsurfaceSingleScatteringAlbedo = opaqueMaterialData.getSubsurfaceSingleScatteringAlbedo(); + subsurfaceVolumetricAnisotropy = opaqueMaterialData.getSubsurfaceVolumetricAnisotropy(); + + if (RtxOptions::SubsurfaceScattering::enableTextureMaps()) { + trackTexture(ctx, opaqueMaterialData.getSubsurfaceTransmittanceTexture(), subsurfaceTransmittanceTextureIndex, hasTexcoords); + trackTexture(ctx, opaqueMaterialData.getSubsurfaceSingleScatteringAlbedoTexture(), subsurfaceSingleScatteringAlbedoTextureIndex, hasTexcoords); + } + const RtSubsurfaceMaterial subsurfaceMaterial( + subsurfaceTransmittanceTextureIndex, subsurfaceThicknessTextureIndex, subsurfaceSingleScatteringAlbedoTextureIndex, subsurfaceTransmittanceColor, subsurfaceMeasurementDistance, subsurfaceSingleScatteringAlbedo, subsurfaceVolumetricAnisotropy); subsurfaceMaterialIndex = m_surfaceMaterialExtensionCache.track(subsurfaceMaterial); } @@ -1014,8 +1049,7 @@ namespace dxvk { uint32_t maskTextureIndex2 = kSurfaceMaterialInvalidTextureIndex; trackTexture(ctx, rayPortalMaterialData.getMaskTexture2(), maskTextureIndex2, hasTexcoords, false); - uint32_t samplerIndex2; - trackSampler(drawCallState.getMaterialData().getSampler2(), false, samplerIndex2); + uint32_t samplerIndex2 = trackSampler(drawCallState.getMaterialData().getSampler2()); uint8_t rayPortalIndex = rayPortalMaterialData.getRayPortalIndex(); float rotationSpeed = rayPortalMaterialData.getRotationSpeed(); @@ -1090,21 +1124,35 @@ namespace dxvk { return m_findLegacyTexture->promise.get_future(); } - void SceneManager::trackSampler(Rc sampler, bool patchSampler, uint32_t& samplerIndex) { - samplerIndex = kSurfaceMaterialInvalidTextureIndex; - - if (sampler.ptr()) { - if (patchSampler) { - auto& resourceManager = m_device->getCommon()->getResources(); - const DxvkSamplerCreateInfo& originalInfo = sampler->info(); - - // Create a sampler to account for DLSS lod bias and any custom filtering overrides the user has set - // TODO: Note eventually we should support setting the filter mode (nearest/linear) for patched samplers based on material replacement data in USD. - sampler = resourceManager.getSampler(VK_FILTER_LINEAR, VK_SAMPLER_MIPMAP_MODE_LINEAR, originalInfo.addressModeU, originalInfo.addressModeV, originalInfo.addressModeW, originalInfo.borderColor, getTotalMipBias(), RtxOptions::Get()->getAnisotropicFilteringEnabled()); - } + SceneManager::SamplerIndex SceneManager::trackSampler(Rc sampler) { + if (sampler == nullptr) { + ONCE(Logger::warn("Found a null sampler. Fallback to linear-repeat")); + sampler = patchSampler( + VK_FILTER_LINEAR, + VK_SAMPLER_ADDRESS_MODE_REPEAT, + VK_SAMPLER_ADDRESS_MODE_REPEAT, + VK_SAMPLER_ADDRESS_MODE_REPEAT, + VkClearColorValue {}); + } + return m_samplerCache.track(sampler); + } - samplerIndex = m_samplerCache.track(sampler); - } + Rc SceneManager::patchSampler( const VkFilter filterMode, + const VkSamplerAddressMode addressModeU, + const VkSamplerAddressMode addressModeV, + const VkSamplerAddressMode addressModeW, + const VkClearColorValue borderColor) { + auto& resourceManager = m_device->getCommon()->getResources(); + // Create a sampler to account for DLSS lod bias and any custom filtering overrides the user has set + return resourceManager.getSampler( + filterMode, + VK_SAMPLER_MIPMAP_MODE_LINEAR, + addressModeU, + addressModeV, + addressModeW, + borderColor, + getTotalMipBias(), + RtxOptions::Get()->getAnisotropicFilteringEnabled()); } void SceneManager::addLight(const D3DLIGHT9& light) { @@ -1255,7 +1303,7 @@ namespace dxvk { assert(dataOffset == surfaceMaterialsGPUSize); assert(surfaceMaterialsGPUData.size() == surfaceMaterialsGPUSize); - ctx->updateBuffer(m_surfaceMaterialBuffer, 0, surfaceMaterialsGPUData.size(), surfaceMaterialsGPUData.data()); + ctx->writeToBuffer(m_surfaceMaterialBuffer, 0, surfaceMaterialsGPUData.size(), surfaceMaterialsGPUData.data()); } // Surface Material Extension Buffer @@ -1279,7 +1327,7 @@ namespace dxvk { assert(dataOffset == surfaceMaterialExtensionsGPUSize); assert(surfaceMaterialExtensionsGPUData.size() == surfaceMaterialExtensionsGPUSize); - ctx->updateBuffer(m_surfaceMaterialExtensionBuffer, 0, surfaceMaterialExtensionsGPUData.size(), surfaceMaterialExtensionsGPUData.data()); + ctx->writeToBuffer(m_surfaceMaterialExtensionBuffer, 0, surfaceMaterialExtensionsGPUData.size(), surfaceMaterialExtensionsGPUData.data()); } // Volume Material buffer @@ -1303,7 +1351,7 @@ namespace dxvk { assert(dataOffset == volumeMaterialsGPUSize); assert(volumeMaterialsGPUData.size() == volumeMaterialsGPUSize); - ctx->updateBuffer(m_volumeMaterialBuffer, 0, volumeMaterialsGPUData.size(), volumeMaterialsGPUData.data()); + ctx->writeToBuffer(m_volumeMaterialBuffer, 0, volumeMaterialsGPUData.size(), volumeMaterialsGPUData.data()); } } @@ -1357,4 +1405,52 @@ namespace dxvk { return {}; } + void SceneManager::submitExternalDraw(Rc ctx, ExternalDrawState&& state) { + if (m_externalSampler == nullptr) { + auto s = DxvkSamplerCreateInfo {}; + { + s.magFilter = VK_FILTER_LINEAR; + s.minFilter = VK_FILTER_LINEAR; + s.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR; + s.mipmapLodBias = 0.f; + s.mipmapLodMin = 0.f; + s.mipmapLodMax = 0.f; + s.useAnisotropy = VK_FALSE; + s.maxAnisotropy = 1.f; + s.addressModeU = VK_SAMPLER_ADDRESS_MODE_REPEAT; + s.addressModeV = VK_SAMPLER_ADDRESS_MODE_REPEAT; + s.addressModeW = VK_SAMPLER_ADDRESS_MODE_REPEAT; + s.compareToDepth = VK_FALSE; + s.compareOp = VK_COMPARE_OP_NEVER; + s.borderColor = VkClearColorValue {}; + s.usePixelCoord = VK_FALSE; + } + m_externalSampler = m_device->createSampler(s); + } + + { + state.drawCall.materialData.samplers[0] = m_externalSampler; + state.drawCall.materialData.samplers[1] = m_externalSampler; + } + { + const RtCamera& rtCamera = ctx->getCommonObjects()->getSceneManager().getCameraManager() + .getCamera(state.cameraType); + state.drawCall.transformData.worldToView = Matrix4 { rtCamera.getWorldToView() }; + state.drawCall.transformData.viewToProjection = Matrix4 { rtCamera.getViewToProjection() }; + state.drawCall.transformData.objectToView = state.drawCall.transformData.worldToView * state.drawCall.transformData.objectToWorld; + } + + for (const RasterGeometry& submesh : m_pReplacer->accessExternalMesh(state.mesh)) { + state.drawCall.geometryData = submesh; + state.drawCall.geometryData.cullMode = state.doubleSided ? VK_CULL_MODE_NONE : VK_CULL_MODE_BACK_BIT; + + const MaterialData* material = m_pReplacer->accessExternalMaterial(submesh.externalMaterial); + if (material != nullptr) { + state.drawCall.materialData.setHashOverride(material->getHash()); + } + + processDrawCallState(ctx, state.drawCall, material); + } + } + } // namespace nvvk diff --git a/src/dxvk/rtx_render/rtx_scene_manager.h b/src/dxvk/rtx_render/rtx_scene_manager.h index 76644b318..a63782875 100644 --- a/src/dxvk/rtx_render/rtx_scene_manager.h +++ b/src/dxvk/rtx_render/rtx_scene_manager.h @@ -113,6 +113,14 @@ class ResourceCache { SparseUniqueCache, SamplerHashFn, SamplerKeyEqual> m_samplerCache; }; +struct ExternalDrawState { + DrawCallState drawCall {}; + remixapi_MeshHandle mesh {}; + CameraType::Enum cameraType {}; + CategoryFlags categories {}; + bool doubleSided {}; +}; + // Scene manager is a super manager, it's the interface between rendering and world state // along with managing the operation of other caches, scene manager also manages the cache // directly for "SceneObject"'s - which are "unique meshes/geometry", which map 1-to-1 with @@ -130,6 +138,7 @@ class SceneManager : public CommonDeviceObject, public ResourceCache { void onDestroy(); void submitDrawState(Rc ctx, const DrawCallState& input, const MaterialData* overrideMaterialData); + void submitExternalDraw(Rc ctx, ExternalDrawState&& state); bool areReplacementsLoaded() const; bool areReplacementsLoading() const; @@ -142,7 +151,8 @@ class SceneManager : public CommonDeviceObject, public ResourceCache { Rc getVolumeMaterialBuffer() { return m_volumeMaterialBuffer; } Rc getSurfaceBuffer() const { return m_accelManager.getSurfaceBuffer(); } Rc getSurfaceMappingBuffer() const { return m_accelManager.getSurfaceMappingBuffer(); } - Rc getPrimitiveIDPrefixSumBuffer() const { return m_accelManager.getPrimitiveIDPrefixSumBuffer(); } + Rc getCurrentFramePrimitiveIDPrefixSumBuffer() const { return m_accelManager.getCurrentFramePrimitiveIDPrefixSumBuffer(); } + Rc getLastFramePrimitiveIDPrefixSumBuffer() const { return m_accelManager.getLastFramePrimitiveIDPrefixSumBuffer(); } Rc getBillboardsBuffer() const { return m_accelManager.getBillboardsBuffer(); } bool isPreviousFrameSceneAvailable() const { return m_previousFrameSceneAvailable && getSurfaceMappingBuffer().ptr() != nullptr; } @@ -176,6 +186,11 @@ class SceneManager : public CommonDeviceObject, public ResourceCache { CameraType::Enum processCameraData(const DrawCallState& input) { return m_cameraManager.processCameraData(input); } + void processExternalCamera(CameraType::Enum type, + const Matrix4& worldToView, + const Matrix4& viewToProjection) { + m_cameraManager.processExternalCamera(type, worldToView, viewToProjection); + } const CameraManager& getCameraManager() const { return m_cameraManager; } const RtCamera& getCamera() const { return m_cameraManager.getMainCamera(); } @@ -200,8 +215,10 @@ class SceneManager : public CommonDeviceObject, public ResourceCache { void triggerUsdCapture() const; bool isGameCapturerIdle() const; + using SamplerIndex = uint32_t; + void trackTexture(Rc ctx, TextureRef inputTexture, uint32_t& textureIndex, bool hasTexcoords, bool allowAsync = true); - void trackSampler(Rc sampler, bool patchSampler, uint32_t& samplerIndex); + [[nodiscard]] SamplerIndex trackSampler(Rc sampler); std::future findLegacyTextureHashBySurfaceMaterialIndex(uint32_t surfaceMaterialIndex); @@ -210,6 +227,12 @@ class SceneManager : public CommonDeviceObject, public ResourceCache { uint32_t frameId); std::optional> accessSurfaceMaterialIndexToHighlight(uint32_t frameId); + Rc patchSampler( const VkFilter filterMode, + const VkSamplerAddressMode addressModeU, + const VkSamplerAddressMode addressModeV, + const VkSamplerAddressMode addressModeW, + const VkClearColorValue borderColor); + private: enum class ObjectCacheState { @@ -288,6 +311,9 @@ class SceneManager : public CommonDeviceObject, public ResourceCache { }; std::optional m_findLegacyTexture {}; dxvk::mutex m_findLegacyTextureMutex{}; + + // TODO: expand to many different + Rc m_externalSampler = nullptr; }; } // namespace nvvk diff --git a/src/dxvk/rtx_render/rtx_terrain_baker.cpp b/src/dxvk/rtx_render/rtx_terrain_baker.cpp index 5de6c2b0d..2e1c9f37b 100644 --- a/src/dxvk/rtx_render/rtx_terrain_baker.cpp +++ b/src/dxvk/rtx_render/rtx_terrain_baker.cpp @@ -552,6 +552,7 @@ namespace dxvk { createTextureRef(ReplacementMaterialTextureType::Roughness), createTextureRef(ReplacementMaterialTextureType::Metallic), createTextureRef(ReplacementMaterialTextureType::Emissive), + TextureRef(), TextureRef(), TextureRef(), // SSS textures Material::Properties::roughnessAnisotropy(), Material::Properties::emissiveIntensity(), Vector3(1, 1, 1), // AlbedoConstant - unused since the AlbedoOpacity texture must be always present for baking @@ -575,7 +576,11 @@ namespace dxvk { Vector3(), // opaqueMaterialDefaults.subsurfaceTransmittanceColor 0.0f, // opaqueMaterialDefaults.subsurfaceMeasurementDistance Vector3(), // opaqueMaterialDefaults.subsurfaceSingleScatteringAlbedo - 0.0f)); // opaqueMaterialDefaults.subsurfaceVolumetricAnisotropy + 0.0f, // opaqueMaterialDefaults.subsurfaceVolumetricAnisotropy + lss::Mdl::Filter::Nearest, + lss::Mdl::WrapMode::Repeat, // U + lss::Mdl::WrapMode::Repeat // V + )); m_hasInitializedMaterialDataThisFrame = true; m_needsMaterialDataUpdate = false; diff --git a/src/dxvk/rtx_render/rtx_texture_manager.cpp b/src/dxvk/rtx_render/rtx_texture_manager.cpp index dd8c4c90a..06c1f3380 100644 --- a/src/dxvk/rtx_render/rtx_texture_manager.cpp +++ b/src/dxvk/rtx_render/rtx_texture_manager.cpp @@ -368,6 +368,21 @@ namespace dxvk { return it->second; } + { + VkFormatProperties properties = m_pDevice->adapter()->formatProperties(assetData->info().format); + + if (!(properties.optimalTilingFeatures & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT) || + !(properties.optimalTilingFeatures & VK_FORMAT_FEATURE_TRANSFER_DST_BIT)) { + std::ostringstream formatStr; + formatStr << assetData->info().format; + + Logger::err(str::format( + "Ignoring replacement texture with unsupported format [", formatStr.str(), "]: ", + assetData->info().filename)); + return {}; + } + } + // Create managed texture auto texture = TextureUtils::createTexture(assetData, colorSpace); diff --git a/src/dxvk/rtx_render/rtx_types.h b/src/dxvk/rtx_render/rtx_types.h index ffa30a45c..b3fa299f4 100644 --- a/src/dxvk/rtx_render/rtx_types.h +++ b/src/dxvk/rtx_render/rtx_types.h @@ -33,6 +33,9 @@ #include #include +using remixapi_MaterialHandle = struct remixapi_MaterialHandle_T*; +using remixapi_MeshHandle = struct remixapi_MeshHandle_T*; + namespace dxvk { class RtCamera; @@ -167,6 +170,8 @@ struct RasterGeometry { AxisAlignedBoundingBox boundingBox; Future futureBoundingBox; + remixapi_MaterialHandle externalMaterial = nullptr; + template const XXH64_hash_t getHashForRule() const { return hashes.getHashForRule(); @@ -335,6 +340,10 @@ struct GeometryBufferData { return indexData[i * indexStride]; } + uint32_t getIndex32(uint32_t i) const { + return (uint32_t)indexData[i * indexStride]; + } + Vector3& getPosition(uint32_t index) const { return *(Vector3*) (positionData + index * positionStride); } @@ -485,6 +494,7 @@ struct DrawCallState { friend class SceneManager; friend struct D3D9Rtx; friend class TerrainBaker; + friend struct RemixAPIPrivateAccessor; bool finalizeGeometryHashes(); void finalizeGeometryBoundingBox(); diff --git a/src/dxvk/rtx_render/rtx_volume_manager.cpp b/src/dxvk/rtx_render/rtx_volume_manager.cpp index 4eb788748..4f5e9b5f2 100644 --- a/src/dxvk/rtx_render/rtx_volume_manager.cpp +++ b/src/dxvk/rtx_render/rtx_volume_manager.cpp @@ -72,6 +72,7 @@ namespace dxvk { // Note: Volumetric transmittance color option is in gamma space, so must be converted to linear for usage in the volumetric system. Vector3 volumetricTransmittanceColor{ sRGBGammaToLinear(RtxOptions::Get()->getVolumetricTransmittanceColor()) }; + // Note: Fall back to usual default in cases such as the "none" D3D fog mode, no fog remapping specified, or invalid values in the fog mode derivation // (such as dividing by zero). float volumetricTransmittanceMeasurementDistance = RtxOptions::Get()->getVolumetricTransmittanceMeasurementDistance(); @@ -127,7 +128,7 @@ namespace dxvk { if (fogState.density != 0.0f) { float const transmittanceColorLuminance{ sRGBLuminance(volumetricTransmittanceColor) }; - volumetricTransmittanceMeasurementDistance = -log(transmittanceColorLuminance) / fogState.density; + volumetricTransmittanceMeasurementDistance = -log(clamp(transmittanceColorLuminance, 0.f, MaxTransmittanceValue)) / fogState.density; // Todo: Scene scale stuff ignored for now because scene scale stuff is not actually functioning properly. Add back in if it's ever fixed. // Note: Convert transmittance measurement distance into our engine's units (from game-specific world units due to being derived // from the D3D9 side of things). This in effect is the same as dividing the density by the scene scale. @@ -142,6 +143,8 @@ namespace dxvk { // Calculate scattering and attenuation coefficients for the volume + volumetricTransmittanceColor = clamp(volumetricTransmittanceColor, Vector3(0.0f), Vector3(MaxTransmittanceValue)); + Vector3 const volumetricAttenuationCoefficient{ -log(volumetricTransmittanceColor.x) / volumetricTransmittanceMeasurementDistance, -log(volumetricTransmittanceColor.y) / volumetricTransmittanceMeasurementDistance, diff --git a/src/dxvk/rtx_render/rtx_volume_manager.h b/src/dxvk/rtx_render/rtx_volume_manager.h index 3d2824164..49e7cb018 100644 --- a/src/dxvk/rtx_render/rtx_volume_manager.h +++ b/src/dxvk/rtx_render/rtx_volume_manager.h @@ -36,6 +36,11 @@ class DxvkDevice; struct VolumeManager : public CommonDeviceObject { public: + // We'll be taking the log of transmittance, and so must protect against log(1) == 0, since this will be used in a division. + // Care must also be taken to not end up with a number that will break FP16 assumptions internal to volumetrics system. + // This number was found empirically. + static constexpr float MaxTransmittanceValue = 1.f - 1.f / 255.f; + VolumeManager(VolumeManager const&) = delete; VolumeManager& operator=(VolumeManager const&) = delete; diff --git a/src/dxvk/shaders/rtx/algorithm/geometry_resolver.slangh b/src/dxvk/shaders/rtx/algorithm/geometry_resolver.slangh index 5cabd8cb3..21c63d8c4 100644 --- a/src/dxvk/shaders/rtx/algorithm/geometry_resolver.slangh +++ b/src/dxvk/shaders/rtx/algorithm/geometry_resolver.slangh @@ -109,6 +109,7 @@ void geometryResolverOutputMiss( PrimarySurfaceFlags[pixelCoordinate] = 0; PrimaryDisocclusionThresholdMix[pixelCoordinate] = 0; PrimaryDepth[pixelCoordinate] = 0.f; + PrimaryObjectPicking[pixelCoordinate] = 0; // Note: Always write to stochastic buffers in the primary surface case. writeMissToGBuffer( @@ -289,7 +290,7 @@ void geometryResolverOutputSurface( PrimaryWorldShadingNormal, PrimaryPerceptualRoughness, PrimaryVirtualWorldShadingNormalPerceptualRoughness, PrimaryVirtualWorldShadingNormalPerceptualRoughnessDenoising, PrimaryAlbedo, PrimaryBaseReflectivity, - SharedMaterialData0, SharedMaterialData1, DisplacementTextureCoord, SharedSurfaceIndex); + SharedMaterialData0, SharedMaterialData1, SharedTextureCoord, SharedSurfaceIndex, SharedSubsurfaceData); } else { @@ -301,7 +302,7 @@ void geometryResolverOutputSurface( SecondaryWorldShadingNormal, SecondaryPerceptualRoughness, SecondaryVirtualWorldShadingNormalPerceptualRoughness, SecondaryVirtualWorldShadingNormalPerceptualRoughnessDenoising, SecondaryAlbedo, SecondaryBaseReflectivity, - SharedMaterialData0, SharedMaterialData1, DisplacementTextureCoord, SharedSurfaceIndex); + SharedMaterialData0, SharedMaterialData1, SharedTextureCoord, SharedSurfaceIndex, SharedSubsurfaceData); } if (cb.enableStochasticAlphaBlend && primarySurface) @@ -402,6 +403,12 @@ void geometryResolverVertexOutputDebugView( // textures which use a clamp to edge blend mode super well, but that is fine. storeInDebugView(pixelCoordinate, fract(surfaceInteraction.textureCoordinates)); + break; + case DEBUG_VIEW_TEXCOORDS_GRADIENT_X: + storeInDebugView(pixelCoordinate, surfaceInteraction.textureGradientX); + break; + case DEBUG_VIEW_TEXCOORDS_GRADIENT_Y: + storeInDebugView(pixelCoordinate, surfaceInteraction.textureGradientY); break; case DEBUG_VIEW_TEXCOORD_GENERATION_MODE: storeInDebugView(pixelCoordinate, uint(surface.texcoordGenerationMode)); @@ -592,8 +599,9 @@ void geometryResolverVertexOutputDebugView( { float lightObjectPdf = 1.0; vec3 baryCentrics = uintToBarycentrics(rayInteraction.barycentricCoordinates); + float triangleAreaUnused; LightSample lightSample = NEECacheUtils.calculateLightSampleFromTriangle( - rayInteraction.surfaceIndex, rayInteraction.primitiveIndex, baryCentrics.yz, lightObjectPdf, cameraGetWorldPosition(cb.camera), rayInteraction.coneRadius, ray.spreadAngle); + rayInteraction.surfaceIndex, rayInteraction.primitiveIndex, baryCentrics.yz, lightObjectPdf, cameraGetWorldPosition(cb.camera), rayInteraction.coneRadius, ray.spreadAngle, triangleAreaUnused); sampleRadiance = lightSample.radiance; } @@ -865,7 +873,7 @@ void geometryResolverVertexOutputDebugView( bool isThinOpaque = false; if (materialType == surfaceMaterialTypeOpaque) { - isThinOpaque = opaqueSurfaceMaterialInteractionCreate(polymorphicSurfaceMaterialInteraction).subsurfaceMaterialIndex != BINDING_INDEX_INVALID; + isThinOpaque = isSubsurfaceMaterial(opaqueSurfaceMaterialInteractionCreate(polymorphicSurfaceMaterialInteraction)); } // Green = thin opaque, Red = not thin opaque storeInDebugView(pixelCoordinate, isThinOpaque ? vec3(0.0, 1.0, 0.0) : vec3(1.0, 0.0, 0.0)); @@ -1075,12 +1083,21 @@ void geometryResolverVertex( } else { - // Apply sky radiance on miss - // TODO: add jitter? - vec2 screenUV = cameraPixelCoordinateToScreenUV(cb.camera, geometryResolverState.pixelCoordinate); - emissiveRadiance += cb.skyBrightness * SkyMatte.Sample(screenUV); + if(cb.domeLightArgs.active) + { + emissiveRadiance += cb.domeLightArgs.radiance * sampleDomeLightTexture(LinearWrapSampler, geometryResolverState.direction, cb.domeLightArgs.textureIndex, cb.domeLightArgs.worldToLightTransform); + } + else + { + // Apply sky radiance on miss + // TODO: add jitter? + vec2 screenUV = cameraPixelCoordinateToScreenUV(cb.camera, geometryResolverState.pixelCoordinate); + emissiveRadiance += cb.skyBrightness * SkyMatte.Sample(screenUV); + } } + imageStore(PrimaryObjectPicking, geometryResolverState.pixelCoordinate, surface.objectPickingValue); + // Handle attenuation and emissive radiance from the Resolve function // Note: Emissive radiance handled first so objects with opacity do not attenuate themselves. @@ -1549,8 +1566,15 @@ void geometryPSRResolverVertex( } else { - // Output radiance from sky probe - emissiveRadiance += cb.skyBrightness * SkyProbe.Sample(ray.direction); + if(cb.domeLightArgs.active) + { + emissiveRadiance += cb.domeLightArgs.radiance * sampleDomeLightTexture(LinearWrapSampler, ray.direction, cb.domeLightArgs.textureIndex, cb.domeLightArgs.worldToLightTransform); + } + else + { + // Output radiance from sky probe + emissiveRadiance += cb.skyBrightness * SkyProbe.Sample(ray.direction); + } } // Note: Emissive radiance handled first so objects with opacity do not attenuate themselves. diff --git a/src/dxvk/shaders/rtx/algorithm/integrator.slangh b/src/dxvk/shaders/rtx/algorithm/integrator.slangh index b3a322ad3..60d0bacab 100644 --- a/src/dxvk/shaders/rtx/algorithm/integrator.slangh +++ b/src/dxvk/shaders/rtx/algorithm/integrator.slangh @@ -224,9 +224,7 @@ bool evalNEESecondary( uint8_t rayMask = OBJECT_MASK_OPAQUE | (objectMask & OBJECT_MASK_ALL_DYNAMIC); if (cb.enableIndirectTranslucentShadows) rayMask |= OBJECT_MASK_TRANSLUCENT; - // Load Subsurface Material - SubsurfaceMaterial subSurfaceMaterial; - const bool isSubSurface = subSurfaceMaterialReadHelper(opaqueSurfaceMaterialInteraction.subsurfaceMaterialIndex, subSurfaceMaterial); + const bool isSubsurface = isSubsurfaceMaterial(opaqueSurfaceMaterialInteraction); VisibilityResult visibility = traceVisibilityRay(minimalSurfaceInteraction, lightSample.position, rayMask, @@ -234,7 +232,7 @@ bool evalNEESecondary( sampledTransportPortalIndex, portalSpace, minimalRayInteraction.coneRadius, viewRay.spreadAngle, 0.98, isViewModelSurface, false, surfaceIndex, texcoords, - isSubSurface, opaqueSurfaceMaterialInteraction.shadingNormal); + isSubsurface, opaqueSurfaceMaterialInteraction.shadingNormal); if (visibility.hasOpaqueHit) { @@ -265,8 +263,7 @@ void evaluateUnshadowedLight( out vec3 specularLight) { if (dot(inputDirection, opaqueSurfaceMaterialInteraction.shadingNormal) < 0.0 && - (!cb.thinOpaqueEnable || - opaqueSurfaceMaterialInteraction.subsurfaceMaterialIndex == BINDING_INDEX_INVALID)) + (!cb.thinOpaqueEnable || !isSubsurfaceMaterial(opaqueSurfaceMaterialInteraction))) { diffuseLight = specularLight = 0; return; diff --git a/src/dxvk/shaders/rtx/algorithm/integrator_direct.slangh b/src/dxvk/shaders/rtx/algorithm/integrator_direct.slangh index 18c8f7f0b..c8d5970fd 100644 --- a/src/dxvk/shaders/rtx/algorithm/integrator_direct.slangh +++ b/src/dxvk/shaders/rtx/algorithm/integrator_direct.slangh @@ -65,9 +65,7 @@ void evalNEEPrimary( const bool enablePOM = cb.pomEnableDirectLighting && opaqueSurfaceMaterialInteractionHasHeightTexture(opaqueSurfaceMaterialInteraction); const uint16_t primarySurfaceIndex = enablePOM ? uint16_t(SharedSurfaceIndex[pixelCoordinate]) : BINDING_INDEX_INVALID; - // Load Subsurface Material - SubsurfaceMaterial subSurfaceMaterial; - const bool isSubSurface = subSurfaceMaterialReadHelper(opaqueSurfaceMaterialInteraction.subsurfaceMaterialIndex, subSurfaceMaterial); + const bool isSubsurface = isSubsurfaceMaterial(opaqueSurfaceMaterialInteraction); const VisibilityResult visibility = traceVisibilityRay( minimalSurfaceInteraction, @@ -75,8 +73,8 @@ void evalNEEPrimary( visibilityModeAccurateHitDistance | visibilityModeEnableTranslucentMaterials, sampledTransportPortalIndex, portalSpace, minimalRayInteraction.coneRadius, viewRay.spreadAngle, 1.0, isViewModelSurface, false, - primarySurfaceIndex, DisplacementTextureCoord[pixelCoordinate], - isSubSurface, opaqueSurfaceMaterialInteraction.shadingNormal); + primarySurfaceIndex, SharedTextureCoord[pixelCoordinate], + isSubsurface, opaqueSurfaceMaterialInteraction.shadingNormal); hitDistance = visibility.hitDistance; hasOpaqueHit = visibility.hasOpaqueHit; @@ -344,9 +342,6 @@ void integrateDirectPath( lightSample, inverseSelectionPdf, lightIdx); } - SubsurfaceMaterial subSurfaceMaterial; - const bool sssEnable = subSurfaceMaterialReadHelper(opaqueSurfaceMaterialInteraction.subsurfaceMaterialIndex, subSurfaceMaterial); - // Evaluate NEE if (rtxdiLightSampleValid || risLightSampleValid) { diff --git a/src/dxvk/shaders/rtx/algorithm/integrator_indirect.slangh b/src/dxvk/shaders/rtx/algorithm/integrator_indirect.slangh index 3e53556d5..7527e5ea1 100644 --- a/src/dxvk/shaders/rtx/algorithm/integrator_indirect.slangh +++ b/src/dxvk/shaders/rtx/algorithm/integrator_indirect.slangh @@ -156,19 +156,16 @@ bool sampleLightNeeCache(inout RAB_RandomSamplerState rtxdiRNG, inout RNG random out uint16_t lightIdx) { NEECell cell = NEECache.getCell(neeCellOffset); - int3 cellID = NEECache.offsetToCell(neeCellOffset); - vec3 cellCenter = NEECache.cellToCenterPoint(cellID); - SubsurfaceMaterial subSurfaceMaterial; - const bool isSubsurface = subSurfaceMaterialReadHelper(opaqueSurfaceMaterialInteraction.subsurfaceMaterialIndex, subSurfaceMaterial); + const bool isSubsurface = isSubsurfaceMaterial(opaqueSurfaceMaterialInteraction); // Note: We are NOT taking transmission into consideration, because calculating probability of transmission have some costs. // Also, it's approximate importance and no need to be exact, we just skip the calculation here for better performance. - float16_t specularRatio = calcBt709Luminance(opaqueSurfaceMaterialInteraction.baseReflectivity) / calcBt709Luminance(opaqueSurfaceMaterialInteraction.albedo + opaqueSurfaceMaterialInteraction.baseReflectivity); vec2 uniformRandomNumber = vec2(getNextSampleBlueNoise(randomState), getNextSampleBlueNoise(randomState)); - cell.sampleLightCandidate(rtxdiRNG, uniformRandomNumber, cellCenter, + + cell.sampleLightCandidate(rtxdiRNG, uniformRandomNumber, surfaceInteraction.position, viewDirection, opaqueSurfaceMaterialInteraction.shadingNormal, specularRatio, opaqueSurfaceMaterialInteraction.isotropicRoughness, isSubsurface, lightIdx, invSelectionPdf); @@ -322,7 +319,14 @@ void integratePathVertex( // Note: True misses going out into infinity will have no hit in the ray interaction and the continue resolving flag set to false. This is in contrast to misses // which may require further resolving which may be needed in cases of skipping specific pieces of geometry while still wishing to continue traversal. - emissiveRadiance += cb.skyBrightness * SkyProbe.Sample(pathState.direction); + if(cb.domeLightArgs.active) + { + emissiveRadiance += cb.domeLightArgs.radiance * sampleDomeLightTexture(LinearWrapSampler, pathState.direction, cb.domeLightArgs.textureIndex, cb.domeLightArgs.worldToLightTransform); + } + else + { + emissiveRadiance += cb.skyBrightness * SkyProbe.Sample(pathState.direction); + } } // Handle emissive radiance and attenuation from the Resolve function @@ -377,27 +381,20 @@ void integratePathVertex( if(materialType == surfaceMaterialTypeOpaque || materialType == surfaceMaterialTypeTranslucent) { #endif // SURFACE_MATERIAL_RESOLVE_TYPE_RAY_PORTAL - if (NEE_CACHE_ENABLE && cb.neeCacheArgs.enable && any(emissiveLight) > 0) - { - // Create a NEE Cache task for emissive triangles. - // Both light task and emissive triangle task are using the same buffer, skip some pixels so that light tasks on them won't be overwritten by triangle tasks. - if (any((pathState.pixelCoordinate & 0x3) != 1)) - { - ThreadTask threadTask = ThreadTask.createFromTriangleTask(pathState.neeCacheCellOffset, rayInteraction.surfaceIndex, rayInteraction.primitiveIndex); - NEECache.storeThreadTask(pathState.pixelCoordinate, threadTask); - } - } - bool isNeeEnabledOnBounce = (pathState.bounceIteration == 1 && cb.neeCacheArgs.enableOnFirstBounce) || (pathState.bounceIteration > 1 && cb.neeCacheArgs.enableModeAfterFirstBounce != NeeEnableMode::None && pathState.enableNeeCacheAfterFirstBounce); if (NEE_CACHE_ENABLE && cb.neeCacheArgs.enable && isNeeEnabledOnBounce && any(emissiveLight > 0) && pathState.neeCacheCellOffset != NEECell.s_invalidOffset) { uint cellOffset = pathState.neeCacheCellOffset; - int3 cellID = NEECache.offsetToCell(pathState.neeCacheCellOffset); - NEECell cell = NEECache.getCell(cellID); - uint2 task = uint2(rayInteraction.surfaceIndex, rayInteraction.primitiveIndex); + NEECell cell = NEECache.getCell(cellOffset); + // Insert new task + float16_t taskRadiance = calcBt709Luminance(emissiveLight * pathState.throughput); + int prefixTask = NEECacheUtils.convertIDToPrefixSumID(rayInteraction.surfaceIndex, rayInteraction.primitiveIndex, PrimitiveIDPrefixSum); + float16_t randomOffset = getNextSampleBlueNoise(randomState); + cell.insertSlotTask(prefixTask, taskRadiance, randomOffset, false); + uint2 task = uint2(rayInteraction.surfaceIndex, rayInteraction.primitiveIndex); float16_t lastSampleThreshold = 0; for (int i = 0; i < cell.getCandidateCount(); ++i) { @@ -533,14 +530,14 @@ void integratePathVertex( if (NEE_CACHE_ENABLE && cb.neeCacheArgs.enable) { rtxdiRNG = RAB_InitRandomSampler(pathState.pixelCoordinate, cb.frameIdx, pathState.bounceIteration); - vec3 jitterOffset = vec3(RAB_GetNextRandom(rtxdiRNG), RAB_GetNextRandom(rtxdiRNG), RAB_GetNextRandom(rtxdiRNG)); - pathState.neeCacheCellOffset = NEECache.pointToOffset(surfaceInteraction.position, true, jitterOffset); + uint jitter = sampleUniformIntRng(rtxdiRNG); + pathState.neeCacheCellOffset = NEECache.pointToOffset(surfaceInteraction.position, surfaceInteraction.triangleNormal, jitter); } if (!isWithinGbuffer) { // Select and sample a light for NEE via RIS - if (NEE_CACHE_ENABLE && cb.neeCacheArgs.enable && cb.neeCacheArgs.enableAnalyticalLight && NEECache.isInsideCache(surfaceInteraction.position)) + if (NEE_CACHE_ENABLE && cb.neeCacheArgs.enable && cb.neeCacheArgs.enableAnalyticalLight) { lightSampleValid = sampleLightNeeCache(rtxdiRNG, randomState, pathState.neeCacheCellOffset, surfaceInteraction, opaqueSurfaceMaterialInteraction, rayInteraction.viewDirection, @@ -560,13 +557,15 @@ void integratePathVertex( evaluateUnshadowedLight(lightSample, opaqueSurfaceMaterialInteraction, rayInteraction, normalize(lightSample.position - surfaceInteraction.position), diffuseLight, specularLight); + uint lightTask = lightIdx; if(NEE_CACHE_ENABLE && cb.neeCacheArgs.enable && cb.neeCacheArgs.enableModeAfterFirstBounce != NeeEnableMode::None && pathState.enableNeeCacheAfterFirstBounce) { NEECell cell = NEECache.getCell(pathState.neeCacheCellOffset); if (cell.isValid() && cell.getCandidateCount() > 0) { vec3 randomNumber = vec3(RAB_GetNextRandom(rtxdiRNG)); - LightSample lightSampleTriangle = cell.getLightSample(randomNumber, surfaceInteraction.position, rayInteraction.coneRadius, pathState.coneSpreadAngle, true); + uint triangleID; + LightSample lightSampleTriangle = cell.getLightSample(randomNumber, surfaceInteraction.position, rayInteraction.coneRadius, pathState.coneSpreadAngle, triangleID, true); // Calculate cached triangle NEE sample's MIS light contribution vec3 diffuseLightTriangle, specularLightTriangle; @@ -583,6 +582,7 @@ void integratePathVertex( if (r * (weightLight + weightTriangle) > weightLight) { lightIdx = kInvalidLightIdx; + lightTask = triangleID; lightSample = lightSampleTriangle; diffuseLight = diffuseLightTriangle; specularLight = specularLightTriangle; @@ -609,10 +609,12 @@ void integratePathVertex( // Create a NEE Cache task for lights with non-zero contributions. // Both light task and emissive triangle task are using the same buffer, skip some pixels so that triangle tasks on them won't be overwritten by light tasks. - if (NEE_CACHE_ENABLE && cb.neeCacheArgs.enable && any(diffuseLight + specularLight > 0) && lightIdx != kInvalidLightIdx && any((pathState.pixelCoordinate & 0x3) != 0)) + if (NEE_CACHE_ENABLE && cb.neeCacheArgs.enable && any(diffuseLight + specularLight > 0) && all((pathState.pixelCoordinate & 0x3) != 1)) { - ThreadTask task = ThreadTask.createFromLightTask(pathState.neeCacheCellOffset, lightIdx); - NEECache.storeThreadTask(pathState.pixelCoordinate, task); + NEECell cell = NEECache.getCell(pathState.neeCacheCellOffset); + float16_t accumulateValue = calcBt709Luminance(diffuseLight + specularLight); + float16_t randomOffset = getNextSampleBlueNoise(randomState); + cell.insertSlotTask(lightTask, accumulateValue, randomOffset, lightIdx != kInvalidLightIdx); } vec3 neeLight = diffuseLight + specularLight; @@ -890,9 +892,9 @@ void integrateIndirectPath( if (NEE_CACHE_ENABLE && cb.neeCacheArgs.enable) { RAB_RandomSamplerState rtxdiRNG = RAB_InitRandomSampler(pathState.pixelCoordinate, cb.frameIdx, pathState.bounceIteration); - vec3 jitterOffset = vec3(RAB_GetNextRandom(rtxdiRNG), RAB_GetNextRandom(rtxdiRNG), RAB_GetNextRandom(rtxdiRNG)); - pathState.neeCacheCellOffset = NEECache.pointToOffset(rayOrigin, true, jitterOffset); - NEECache.storeThreadTask(pixelCoordinate, ThreadTask.createEmpty()); + uint jitter = sampleUniformIntRng(rtxdiRNG); + MinimalSurfaceInteraction gbuffer = minimalSurfaceInteractionReadFromGBuffer(pixelCoordinate, indirectPathTextures.PrimaryWorldPositionWorldTriangleNormal); + pathState.neeCacheCellOffset = NEECache.pointToOffset(rayOrigin, gbuffer.triangleNormal, jitter); } if (cb.enableReSTIRGI && geometryFlags.primarySelectedIntegrationSurface) @@ -931,7 +933,7 @@ void integrateIndirectPath( const float pomThroughput = opaqueSurfaceMaterialInteractionCalcHeightThroughput( minimalSurfaceInteraction, pathState.direction, opaqueSurfaceMaterial.heightTextureIndex, - opaqueSurfaceMaterial.samplerIndex, DisplacementTextureCoord[pixelCoordinate], opaqueSurfaceMaterial.displaceIn + opaqueSurfaceMaterial.samplerIndex, SharedTextureCoord[pixelCoordinate], opaqueSurfaceMaterial.displaceIn ); pathState.throughput = pathState.throughput * pomThroughput; diff --git a/src/dxvk/shaders/rtx/algorithm/lighting.slangh b/src/dxvk/shaders/rtx/algorithm/lighting.slangh index db2a5a4de..039785ced 100644 --- a/src/dxvk/shaders/rtx/algorithm/lighting.slangh +++ b/src/dxvk/shaders/rtx/algorithm/lighting.slangh @@ -90,9 +90,7 @@ bool sampleLightBasicRIS( float selectedLightTargetPdf = 0.0f; float weightSum = 0.0f; - // Load Subsurface Material - SubsurfaceMaterial subSurfaceMaterial; - const bool isSubSurface = subSurfaceMaterialReadHelper(opaqueSurfaceMaterialInteraction.subsurfaceMaterialIndex, subSurfaceMaterial); + const bool isSubsurface = isSubsurfaceMaterial(opaqueSurfaceMaterialInteraction); [unroll] for (uint lightType = 0; lightType < lightTypeCount; ++lightType) @@ -123,7 +121,7 @@ bool sampleLightBasicRIS( // can effectively unroll the loop and remove the switch from decodedPolymorphicLightCalcWeight DecodedPolymorphicLight decodedPolymorphicLight = decodePolymorphicLight(memoryPolymorphicLight); decodedPolymorphicLight.polymorphicType = lightType; - const float targetPdf = decodedPolymorphicLightCalcWeight(decodedPolymorphicLight, surfaceInteraction, isSubSurface); + const float targetPdf = decodedPolymorphicLightCalcWeight(decodedPolymorphicLight, surfaceInteraction, isSubsurface); // Accumulate the light's weight diff --git a/src/dxvk/shaders/rtx/algorithm/nee_cache.h b/src/dxvk/shaders/rtx/algorithm/nee_cache.h index b353b224a..504761b79 100644 --- a/src/dxvk/shaders/rtx/algorithm/nee_cache.h +++ b/src/dxvk/shaders/rtx/algorithm/nee_cache.h @@ -26,17 +26,11 @@ #include "rtx/algorithm/rtxdi/rtxdi.slangh" #ifdef UPDATE_NEE_CACHE -#define NEE_CACHE_READ_TASK 1 -#define NEE_CACHE_WRITE_TASK 1 #define NEE_CACHE_WRITE_SAMPLE 1 #define NEE_CACHE_WRITE_CANDIDATE 1 -#define NEE_CACHE_WRITE_THREAD_TASK 0 #else -#define NEE_CACHE_READ_TASK 0 -#define NEE_CACHE_WRITE_TASK 0 #define NEE_CACHE_WRITE_SAMPLE 0 #define NEE_CACHE_WRITE_CANDIDATE 0 -#define NEE_CACHE_WRITE_THREAD_TASK 1 #endif struct NEESample @@ -45,6 +39,7 @@ struct NEESample float pdf; f16vec3 normal; f16vec3 radiance; + uint triangleID; NeeCache_PackedSample pack() { @@ -54,6 +49,7 @@ struct NEESample packed.lightInfo.x = f32tof16(radiance.x) | (f32tof16(radiance.y) << 16); packed.lightInfo.y = f32tof16(radiance.z); packed.lightInfo.z = asuint(pdf); + packed.lightInfo.w = triangleID; return packed; } @@ -65,6 +61,7 @@ struct NEESample radiance.y = f16tof32(packed.lightInfo.x >> 16); radiance.z = f16tof32(packed.lightInfo.y & 0xffff); pdf = asfloat(packed.lightInfo.z); + triangleID = packed.lightInfo.w; } static NEESample createEmpty() @@ -74,6 +71,7 @@ struct NEESample sample.normal = float3(0.f, 0.f, -1.f); sample.radiance = float3(0.f); sample.pdf = 0.f; + sample.triangleID = 0; return sample; } @@ -101,44 +99,36 @@ struct NEELightCandidate static float getOffsetRange() { - const float rangeCellCount = 16; - return cb.neeCacheArgs.range * (rangeCellCount / NEE_CACHE_PROBE_RESOLUTION); - } - - static float getOffsetDelta() - { - return getOffsetRange() / 128.0; + // Normalization factor for offset, in order to adapt to different scene scale. + // The 0.1 factor is based on experiment. + return cb.neeCacheArgs.minRange * 0.1; } static uint encodeOffset(vec3 offset) { + // Encode light position offset to the camera center. + // Put the encoding/decoding functions here because it's specific to NEE Cache. float range = getOffsetRange(); - float maxOffset = max(max(abs(offset.x), abs(offset.y)), abs(offset.z)); - float scaleFactor = max(maxOffset, range); - offset /= scaleFactor; - vec3 uvw = offset * 0.5 + 0.5; - - uint encodedOffset = 0; - encodedOffset |= uint(uvw.x * 255 + 0.5); - encodedOffset <<= 8; - encodedOffset |= uint(uvw.y * 255 + 0.5); - encodedOffset <<= 8; - encodedOffset |= uint(uvw.z * 255 + 0.5); - return encodedOffset; + float offsetLength = length(offset); + offset /= (offsetLength + 1e-10); + vec2 uv = sphereDirectionToSignedOctahedral(offset); + uv = (uv + 1) * 0.5; + uint result = float16BitsToUint16(min(offsetLength / range, float16Max)); + result |= (uint(uv.x * 255.0) << 24); + result |= (uint(uv.y * 255.0) << 16); + return result; } static vec3 decodeOffset(uint encodedOffset) { + // Decode light position offset to the camera center. + // Put the encoding/decoding functions here because it's specific to NEE Cache. float range = getOffsetRange(); - vec3 uvw; - uvw.z = (encodedOffset & 0xff); - encodedOffset >>= 8; - uvw.y = (encodedOffset & 0xff); - encodedOffset >>= 8; - uvw.x = (encodedOffset & 0xff); - uvw /= 255.0; - uvw = uvw * 2.0 - 1.0; - return uvw * range; + vec2 uv = vec2(encodedOffset >> 24, (encodedOffset >> 16) & 0xff) / 255.0; + uv = uv * 2.0 - 1.0; + vec3 offset = signedOctahedralToSphereDirection(uv); + float offsetLength = float(uint16BitsToHalf(encodedOffset & 0xffff)) * range; + return offset * offsetLength; } bool isValid() @@ -179,19 +169,7 @@ struct NEELightCandidate [mutating] void setOffset(vec3 offset) { - uint encodedOffset = encodeOffset(offset); - m_data.y = (m_data.y & 0xff000000) | encodedOffset; - } - - uint getAge() - { - return (m_data.y >> 24) & 0xff; - } - - [mutating] void setAge(uint age) - { - age = min(age, 255); - m_data.y = (m_data.y & 0xffffff) | (age << 24); + m_data.y = encodeOffset(offset); } static NEELightCandidate create(uint lightIdx) @@ -199,7 +177,6 @@ struct NEELightCandidate NEELightCandidate nee; nee.m_data = 0; nee.setLightID(lightIdx); - nee.setAge(0); nee.setOffset(0.0); nee.setRadiance(0); return nee; @@ -305,7 +282,7 @@ struct NEECell uint getTaskBaseAddress() { - return m_offset * (NEE_CACHE_ELEMENTS * NEE_CACHE_ELEMENT_SIZE); + return m_offset * NEE_CACHE_TASK_COUNT * NEE_CACHE_TASK_SIZE; } uint getTaskAddress(uint idx) @@ -313,6 +290,11 @@ struct NEECell return getTaskBaseAddress() + idx * NEE_CACHE_TASK_SIZE; } + uint getHashTaskAddress(uint idx) + { + return getTaskAddress(idx) + NEE_CACHE_HASH_TASK_BASE; + } + uint getCandidateAddress(uint idx) { return getBaseAddress() + (idx + 1) * NEE_CACHE_ELEMENT_SIZE; @@ -330,113 +312,92 @@ struct NEECell bool isValid() { return m_offset != NEECell.s_invalidOffset; } -#if NEE_CACHE_READ_TASK - int getTaskCount() + static uint getTaskHash(uint task) { - int count = 0; - for (int i = 0; i < getMaxTaskCount(); ++i) - { - uint taskData = NeeCacheTask.Load(getTaskAddress(i)); - if (taskData != NEE_CACHE_EMPTY_TASK) - { - count++; - } - } - return count; + return task; } - uint getTask(int idx) + static uint getBinHash(uint x) { - int count = 0; - for (int i = 0; i < getMaxTaskCount(); ++i) - { - uint taskData = NeeCacheTask.Load(getTaskAddress(i)); - if (taskData == NEE_CACHE_EMPTY_TASK) - { - continue; - } - if (count == idx) - { - return taskData & 0xffffff; - } - count++; - } - return NEE_CACHE_EMPTY_TASK; + x = ((x >> 16) ^ x) * 0x45d9f3b; + x = ((x >> 16) ^ x) * 0x45d9f3b; + x = (x >> 16) ^ x; + return x & 0xf; } - uint getTaskFromIdx(int idx) + static uint getSlotBinHash(uint x) { - uint taskData = NeeCacheTask.Load(getTaskAddress(idx)); - return taskData & 0xffffff; + x = ((x >> 16) ^ x) * 0x45d9f3b; + x = ((x >> 16) ^ x) * 0x45d9f3b; + x = (x >> 16) ^ x; + return x & 0x1f; } -#endif - static uint getTaskHash(uint task) + uint2 getSlotTaskValue(int index) { - return task; + int taskAddress = getTaskAddress(index); + return NeeCacheTask.Load2(taskAddress); } - static uint getBinHash(uint x) + void setSlotTaskValue(int index, uint2 value) { - x = ((x >> 16) ^ x) * 0x45d9f3b; - x = ((x >> 16) ^ x) * 0x45d9f3b; - x = (x >> 16) ^ x; - return x & 0xf; + int taskAddress = getTaskAddress(index); + NeeCacheTask.Store2(taskAddress, value); } -#if NEE_CACHE_WRITE_TASK - void setTaskFromIdx(int idx, uint task, uint value) { - task |= (value << 24); - NeeCacheTask.Store(getTaskAddress(idx), task); + uint2 getLightSlotTaskValue(int index) + { + int taskAddress = getTaskAddress(index + 16); + return NeeCacheTask.Load2(taskAddress); } - void clearTasks() { - for (int i = 0; i < getMaxTaskCount(); ++i) { - NeeCacheTask.Store(getTaskAddress(i), NEE_CACHE_EMPTY_TASK); - } + void setLightSlotTaskValue(int index, uint2 value) + { + int taskAddress = getTaskAddress(index + 16); + NeeCacheTask.Store2(taskAddress, value); } - bool insertTask(uint task, uint value) + uint2 getHashSlotTaskValue(int index) { - uint index = getBinHash(task + cb.frameIdx); - int taskAddress = getTaskAddress(index); - task |= (value << 24); + int taskAddress = getHashTaskAddress(index); + return NeeCacheTask.Load2(taskAddress); + } - // Remove duplicated tasks - uint oldTask = NeeCacheTask.Load(taskAddress); - if (oldTask == task || (oldTask != NEE_CACHE_EMPTY_TASK && getTaskHash(oldTask) >= getTaskHash(task))) - { - return false; - } + void setHashSlotTaskValue(int index, uint2 value) + { + int taskAddress = getHashTaskAddress(index); + NeeCacheTask.Store2(taskAddress, value); + } - // Insert task with the largest hash value - uint expectTask = NEE_CACHE_EMPTY_TASK; - uint insertTask = task; - while(true) - { - uint originalTask; - NeeCacheTask.InterlockedCompareExchange(taskAddress, expectTask, insertTask, originalTask); - if (originalTask == expectTask) - { - // successfully inserted - return true; - } + static bool isLightTask(uint2 value) + { + return (value.x & (1 << 24)) != 0; + } - // Only insert a task when its hash is larger - // Because the value is in high bits, tasks with higher values will have higher priority - uint insertTaskHash = getTaskHash(insertTask); - uint originalTaskHash = getTaskHash(originalTask); - if (originalTaskHash >= insertTaskHash) - { - return false; - } + void insertSlotTask(uint task, float16_t accumulateValue, float16_t randomOffset, bool isLightTask) { + uint index = getSlotBinHash(task + cb.frameIdx); + int taskAddress = getHashTaskAddress(index); + uint sortValueI = firstbithigh(uint(min(accumulateValue, 50) / 0.001)); + task |= (sortValueI << 25) | (isLightTask ? (1 << 24) : 0); - // Prefare for next insertion - expectTask = originalTask; + // Clamp min/max value before accumulation to improve stability. + // The min value is required because floating point atomics is not supported on all platforms, + // so quantization is necessary to convert floating point values to integers. + // The max value is required to suppress the impact from fireflies, otherwise a firefly may inject a + // useless triangle / light to the cache. + const float16_t minValue = 0.05; + const float16_t maxValue = 5000; + int accumulateValueI = clamp(accumulateValue, 0.0, maxValue) / minValue + randomOffset; + + if (accumulateValueI == 0) + { + return; } - return false; + + uint originalValue; + NeeCacheTask.InterlockedMax(taskAddress, task, originalValue); + NeeCacheTask.InterlockedAdd(taskAddress + 4, accumulateValueI, originalValue); } -#endif int getSampleAddress(int i) { @@ -487,19 +448,15 @@ struct NEECell float calculateLightCandidateWeight(NEELightCandidate candidate, vec3 cellCenter, vec3 surfacePoint, f16vec3 viewDirection, f16vec3 normal, float16_t specularRatio, float16_t roughness, bool isSubsurface) { - vec3 candidatePosition = candidate.getOffset() + cellCenter; - f16vec3 inputDirection = normalize(candidatePosition - surfacePoint + normal * NEELightCandidate.getOffsetDelta()); + vec3 candidateOffset = candidate.getOffset(); + f16vec3 inputDirection = normalize(candidateOffset + cellCenter + normal * length(candidateOffset) * 0.01 - surfacePoint); // Use a simplified GGX model to calculate light contribution - // Offset diffuse so that light at grazing angles will not be culled due to low precision input direction. - const float16_t cosOffset = 0.3h; - - float16_t ndoti = (dot(inputDirection, normal) + cosOffset) / (float16_t(1) + cosOffset); + float16_t ndoti = dot(inputDirection, normal); float16_t diffuseTerm = !isSubsurface ? (1.0 - specularRatio) / pi : (1.0 - specularRatio) / twoPi; float specularTerm = 0.0f; - if (!isSubsurface || ndoti > 0.0h) - { + if (!isSubsurface || ndoti > 0.0h) { ndoti = saturate(ndoti); // The specular term consists of there parts: D, G, F @@ -510,8 +467,7 @@ struct NEECell f16vec3 halfVector = normalize(inputDirection + viewDirection); float ndotm = saturate(dot(halfVector, normal)); specularTerm = specularRatio * evalGGXNormalDistributionIsotropic(roughness, ndotm) * cb.neeCacheArgs.specularFactor * 0.25; - } - else // isSubsurface && ndoti < 0 + } else // isSubsurface && ndoti < 0 { ndoti = -ndoti; } @@ -520,11 +476,13 @@ struct NEECell return radiance * (diffuseTerm + specularTerm) * ndoti; } - void calculateLightCandidateNormalizedWeight(int ithCandidate, vec3 cellCenter, vec3 surfacePoint, f16vec3 viewDirection, f16vec3 normal, float16_t specularRatio, float16_t roughness, bool isSubsurface, out float pdf) + void calculateLightCandidateNormalizedWeight(int ithCandidate, vec3 surfacePoint, f16vec3 viewDirection, f16vec3 normal, float16_t specularRatio, float16_t roughness, bool isSubsurface, out float pdf) { int count = getLightCandidateCount(); float totalWeight = 0; float chosenWeight = 0; + vec3 cellCenter = NEECache.getCenter(); + pdf = 0.0; for (int i = 0; i < count; ++i) { NEELightCandidate candidate = getLightCandidate(i); @@ -538,7 +496,7 @@ struct NEECell pdf = chosenWeight / totalWeight; } - void sampleLightCandidate(inout RAB_RandomSamplerState rtxdiRNG, vec2 uniformRandomNumber, vec3 cellCenter, vec3 surfacePoint, f16vec3 viewDirection, f16vec3 normal, float16_t specularRatio, float16_t roughness, bool isSubsurface, inout uint16_t lightIdx, out float invPdf) + void sampleLightCandidate(inout RAB_RandomSamplerState rtxdiRNG, vec2 uniformRandomNumber, vec3 surfacePoint, f16vec3 viewDirection, f16vec3 normal, float16_t specularRatio, float16_t roughness, bool isSubsurface, inout uint16_t lightIdx, out float invPdf) { int lightCount = cb.lightRanges[lightTypeCount-1].offset + cb.lightRanges[lightTypeCount-1].count; uint uniformLightIdx = clamp(uniformRandomNumber.y * lightCount, 0, lightCount-1); @@ -549,6 +507,7 @@ struct NEECell float totalWeight = 0; float chosenWeight = 0; float uniformWeight = 0; + vec3 cellCenter = NEECache.getCenter(); for (int i = 0; i < count; ++i) { NEELightCandidate candidate = getLightCandidate(i); @@ -647,7 +606,7 @@ struct NEECell return NEE_CACHE_ELEMENTS; } - LightSample getLightSample(vec3 randomNumber, vec3 position, float16_t coneRadius, float16_t coneSpreadAngle, bool useCachedSamples = true) + LightSample getLightSample(vec3 randomNumber, vec3 position, float16_t coneRadius, float16_t coneSpreadAngle, out uint triangleID, bool useCachedSamples = true) { LightSample lightSampleTriangle; if(useCachedSamples) @@ -656,6 +615,7 @@ struct NEECell NEESample sample = getSample(sampleIdx); lightSampleTriangle = sample.convertToLightSample(); lightSampleTriangle.solidAnglePdf *= NEECacheUtils.calculateLightSamplingSolidAnglePDF(1.0, lightSampleTriangle.position, lightSampleTriangle.normal, position); + triangleID = sample.triangleID; } else { @@ -664,197 +624,159 @@ struct NEECell NEECandidate candidate = sampleCandidate(randomNumber.x, lightObjectPdf); // Sample the selected triangle vec2 uv = vec2(randomNumber.y, randomNumber.z); + float area; lightSampleTriangle = NEECacheUtils.calculateLightSampleFromTriangle( - candidate.getSurfaceID(), candidate.getPrimitiveID(), uv, lightObjectPdf, position, coneRadius, coneSpreadAngle); + candidate.getSurfaceID(), candidate.getPrimitiveID(), uv, lightObjectPdf, position, coneRadius, coneSpreadAngle, area); + triangleID = -1; } return lightSampleTriangle; } } -struct ThreadTask -{ - uint2 m_data; - - static const uint s_lightOffset = (1 << 23); - static const uint s_surfaceMask = 0xffffff; - static const uint s_primitiveMask = 0xffffff; - static const uint s_invalidTask = 0xffffffff; - - bool isValid() - { - return any(m_data != s_invalidTask); - } - - bool isTriangleTask() - { - return isValid() && (m_data.x & 0xffffff) < s_lightOffset; - } - - bool isLightTask() - { - return isValid() && (m_data.x & 0xffffff) >= s_lightOffset; - } - - bool getTriangleTask(out uint surfaceID, out uint primitiveID) - { - surfaceID = m_data.x & s_surfaceMask; - primitiveID = m_data.y & s_primitiveMask; - return surfaceID != s_surfaceMask && primitiveID != s_primitiveMask; - } - - uint getLightTask() - { - return (m_data.x & s_surfaceMask) - s_lightOffset; - } - - uint getCellOffset() - { - return ((m_data.y >> 24) << 8) | (m_data.x >> 24); - } - - [mutating] void packFromTriangleTask(uint cellOffset, uint surfaceID, uint primitiveID) - { - m_data.x = ((cellOffset & 0xff) << 24) | (surfaceID & s_surfaceMask); - m_data.y = ((cellOffset >> 8) << 24) | (primitiveID & s_primitiveMask); - } - - [mutating] void packFromLightTask(uint cellOffset, uint lightID) - { - lightID += s_lightOffset; - m_data.x = ((cellOffset & 0xff) << 24) | (lightID & s_surfaceMask); - m_data.y = ((cellOffset >> 8) << 24); - } - - static ThreadTask createFromTriangleTask(uint cellOffset, uint surfaceID, uint primitiveID) - { - ThreadTask task; - task.packFromTriangleTask(cellOffset, surfaceID, primitiveID); - return task; - } - - static ThreadTask createFromLightTask(uint cellOffset, uint lightID) - { - ThreadTask task; - task.packFromLightTask(cellOffset, lightID); - return task; - } - - static ThreadTask createEmpty() - { - ThreadTask task; - task.m_data = s_invalidTask; - return task; - } -} - struct NEECache { - static bool isInsideCache(vec3 position) - { - vec3 cameraPos = cameraGetWorldPosition(cb.camera); - vec3 offset = abs(position - cameraPos); - return all(offset < cb.neeCacheArgs.range * 0.5); - } - - static int cellToOffset(int3 cellID) - { - if (any(cellID == -1)) + static vec3 getCenter() + { + return cameraGetWorldPosition(cb.camera); + } + + // This is an advanced form from Johannes Jendersie (aligned log grid): + // https://confluence.nvidia.com/display/~jjendersie/Spatial+Cache+Placement + // It discretizes the transitions such that there are less degenerate cases at the LOD boundaries. + // triangleNormal: A normal to apply jittering in the tangential plane. If (0), jittering is disabled. + // TODO: it would be possible to use an unidirectional jitter in this case... e.g. for volumes + // jitterScale: 1.0 will jitter by the cell width while larger numbers will increase the blur and 0 will disable jittering. + #define HASH_GRID_MIN_LOG_LEVEL -127 // Exponent bias. See hash function below, uses 8 Bits for the level + static int4 computeLogGridPos( + float3 samplePos, + const float3 cameraPos, + const float distance /*Euclidean*/, + const float base, + const float baseLog, + const float resolution, + const f16vec3 triangleNormal, + uint jitterRnd, // 32 bit random number used for jittering + const float jitterScale) + { + // Compute the initial level for the hit point. + float lvlRnd = 0.0; + if (jitterScale != 0) { - return NEECell.s_invalidOffset; + // Jittering the level helps when moving around. Caches from the next level are then discovered early and + // can be populated with new information, before they get the major contributor for the current queries. + // const float lvlRnd = ((jitterRnd & 0xFF) / float(0xFF)) - 0.5; // Linear interpolation + // More focussed on the central level than linear interpolation: + lvlRnd = ((jitterRnd & 0x7F) / float(0x7F)); + lvlRnd = lvlRnd * lvlRnd * 0.5; + if (jitterRnd & 0x80) + lvlRnd = -lvlRnd; } - - int idx = - cellID.z * NEE_CACHE_PROBE_RESOLUTION * NEE_CACHE_PROBE_RESOLUTION + - cellID.y * NEE_CACHE_PROBE_RESOLUTION + - cellID.x; - return idx; - } - - static int3 offsetToCell(int offset) - { - if (offset == NEECell.s_invalidOffset) + int lvl = floor(log(max(1e-30f, distance)) / baseLog + lvlRnd); + lvl = max(lvl, HASH_GRID_MIN_LOG_LEVEL); // Safetynet when log() returns something too small. + // Get the distance to where the level begins and derive a voxel size from it. + // (If we would use exp(lvl+1) we would get the distance to the end, but the resolution parameter is somewhat + // arbitrary anyways. However, we need this minimum distance below for the alignment. + float levelDist = exp(lvl * baseLog); + float voxelSize = levelDist / resolution; + + // Jittering to reduce grid artifacts. + // Note that the current version is not working for volumes (which would be simple to add by sampling a general + // direction on the sphere). + if (jitterScale != 0 && dot(triangleNormal, triangleNormal) > 0.f) { - return int3(-1); + // Add a translation in the geometric tangential plane to avoid jumping away from surfaces. + f16vec3 b0 = 0; + f16vec3 b1 = 0; + calcOrthonormalBasis(triangleNormal, b0, b1); + float continousSize = jitterScale * distance / resolution; + // We use 8 random bits per dimension which is enough for a cosmetic jittering + const float u0 = ((jitterRnd >> 8) & 0xFF) / float(0xFF); + const float u1 = ((jitterRnd >> 16) & 0xFF) / float(0xFF); + const float u2 = ((jitterRnd >> 24) & 0xFF) / float(0xFF); + samplePos += (vec3(u0, u1, u2) - 0.5) * 1.0 * continousSize; } - int3 cellID; - const int zSize = NEE_CACHE_PROBE_RESOLUTION * NEE_CACHE_PROBE_RESOLUTION; - const int ySize = NEE_CACHE_PROBE_RESOLUTION; - - cellID.z = offset / zSize; - offset -= cellID.z * zSize; - cellID.y = offset / ySize; - offset -= cellID.y * ySize; - - cellID.x = offset; - return cellID; + // Add an irrational number as an offset to avoid that objects in the 0-planes will lie on the boundary + // between two voxels. + const float3 offPos = samplePos + 1.6180339887; + const float3 offCam = cameraPos + 1.6180339887; + const int3 gridPos = floor(offPos / voxelSize); + return int4(gridPos, lvl - HASH_GRID_MIN_LOG_LEVEL); } - static int3 pointToCell(vec3 position, bool jittered, vec3 jitteredNumber) + static uint computeDirectionalHash(f16vec3 normal) { - float extend = cb.neeCacheArgs.range; - vec3 cameraPos = cameraGetWorldPosition(cb.camera); - vec3 origin = cameraPos - extend * 0.5; - vec3 UVW = (position - origin) / extend; - vec3 UVWi = UVW * NEE_CACHE_PROBE_RESOLUTION; - - // jitter cell ID - if(jittered) - { - vec3 fracUVWi = fract(UVWi); - ivec3 cellID = ivec3(UVWi); - cellID.x += jitteredNumber.x > fracUVWi.x ? 0 : 1; - cellID.y += jitteredNumber.y > fracUVWi.y ? 0 : 1; - cellID.z += jitteredNumber.z > fracUVWi.z ? 0 : 1; - - if (any(cellID < 0) || any(cellID > NEE_CACHE_PROBE_RESOLUTION-1)) - { - return int3(-1); - } - return cellID; - } - else - { - UVWi += 0.5; - if (any(UVWi < 0) || any(UVWi > NEE_CACHE_PROBE_RESOLUTION-1)) - { - return int3(-1); - } - return UVWi; - } +#if 1 + return 0; +#else + f16vec3 absNormal = abs(normal); + float16_t maxAbsNormal = max(absNormal.x, max(absNormal.y, absNormal.z)); + uint result = 0; + result |= (maxAbsNormal == absNormal.x) ? 1 : 0; + result |= (maxAbsNormal == absNormal.y) ? 2 : 0; + result |= (maxAbsNormal == absNormal.z) ? 4 : 0; + maxAbsNormal = -maxAbsNormal; + result |= (maxAbsNormal == absNormal.x) ? 8 : 0; + result |= (maxAbsNormal == absNormal.y) ? 16 : 0; + result |= (maxAbsNormal == absNormal.z) ? 32 : 0; + return result; +#endif } - static int pointToOffset(vec3 position, bool jittered, vec3 jitteredNumber) + static uint getSpatialHashValue(float3 position, f16vec3 normal, uint jitterRnd) { - int3 cellID = pointToCell(position, jittered, jitteredNumber); - return cellToOffset(cellID); - } + float resolution = cb.neeCacheArgs.resolution; + float minDistance = cb.neeCacheArgs.minRange; + int4 spatialHash = computeLogGridPos(position, + getCenter(), + max(minDistance, length(position - getCenter())),// const float distance /*Euclidean*/, + 0.0, //const float base, + 1.0, //const float baseLog, + resolution, //const float resolution, + normal, + jitterRnd, // 32 bit random number used for jittering + 1.0 //const float jitterScale + ); - static float getCellSize() - { - return cb.neeCacheArgs.range / NEE_CACHE_PROBE_RESOLUTION; + uint hashDir8Bit = computeDirectionalHash(normal); + + // + // 64 bit shading key: + // 16 bits x + // 16 bits y + // 16 bits z + // 8 bits logGridLevel + // 8 bits normal + // + // 16 bits per component are more than enough! + // It allows a resolution parameter of >20000 for base 1.5 or >16000 for base 2. + const uint shadingKey0 = ((spatialHash.x & 0xFFFF) << 16) + | ((spatialHash.y & 0xFFFF)); + const uint shadingKey1 = ((spatialHash.z & 0xFFFF) << 16) + | ((spatialHash.w & 0xFF) << 8) + | ((hashDir8Bit & 0xFF)); + uint2 hashKey2 = uint2(shadingKey0, shadingKey1); + uint hashKey = prospectorHash(hashKey2.x) ^ prospectorHash(hashKey2.y); + return hashKey & (NEE_CACHE_TOTAL_PROBE - 1); } - static float getVolumeSize() + static uint getHashValue(int3 positionI) { - return cb.neeCacheArgs.range; + uint hash = 0; + hash ^= hashJenkins(positionI.x); + hash ^= hashJenkins(positionI.y); + hash ^= hashJenkins(positionI.z); + return hash & (NEE_CACHE_TOTAL_PROBE - 1); } - static vec3 cellToCenterPoint(ivec3 cellID) + static uint getAddressJittered(vec3 position, f16vec3 normal, uint jitter) { - float extend = cb.neeCacheArgs.range; - vec3 cameraPos = cameraGetWorldPosition(cb.camera); - vec3 origin = cameraPos - extend * 0.5; - vec3 UVW = vec3(cellID) / NEE_CACHE_PROBE_RESOLUTION; - vec3 position = UVW * extend + origin; - return position; + return getSpatialHashValue(position, normal, jitter); } - static NEECell getCell(int3 cellID) + static int pointToOffset(vec3 position, f16vec3 normal, uint jitteredNumber) { - NEECell cell = {}; - cell.m_offset = cellToOffset(cellID); - return cell; + return getAddressJittered(position, normal, jitteredNumber); } static NEECell getCell(int offset) { @@ -863,11 +785,6 @@ struct NEECache return cell; } - static NEECell findCell(vec3 point, bool jittered, vec3 jitteredNumber) - { - return getCell(pointToCell(point, jittered, jitteredNumber)); - } - static const uint s_analyticalLightStartIdx = 0xff0000; static bool isAnalyticalLight(uint idx) @@ -885,39 +802,6 @@ struct NEECache return idx - s_analyticalLightStartIdx; } - static void loadThreadTask(int2 pixel, out uint cellOffset, out uint surfaceID, out uint primitiveID) - { - uint2 data = NeeCacheThreadTask[pixel]; - surfaceID = data.x & 0xffffff; - primitiveID = data.y & 0xffffff; - if (surfaceID == 0xffffff || primitiveID == 0xffffff) - { - surfaceID = primitiveID = 0xffffffff; - } - cellOffset = (data.x >> 24) | ((data.y & 0xff000000) >> 16); - } - -#if NEE_CACHE_WRITE_THREAD_TASK - static void storeThreadTask(int2 pixel, uint cellOffset, uint surfaceID, uint primitiveID) { - uint2 data = uint2(surfaceID, primitiveID) & 0xffffff; - data.x = data.x | ((cellOffset & 0xff) << 24); - data.y = data.y | ((cellOffset & 0xff00) << 16); - NeeCacheThreadTask[pixel] = data; - } - - static void storeThreadTask(int2 pixel, ThreadTask task) - { - NeeCacheThreadTask[pixel] = task.m_data; - } -#endif - - static ThreadTask loadThreadTask(int2 pixel) - { - ThreadTask task; - task.m_data = NeeCacheThreadTask[pixel]; - return task; - } - static bool shouldUseHigherBounceNeeCache(bool isSpecularLobe, float16_t isotropicRoughness) { // ReSTIR GI can handle diffuse rays quite well, but not for highly specular surfaces. diff --git a/src/dxvk/shaders/rtx/algorithm/nee_cache_data.h b/src/dxvk/shaders/rtx/algorithm/nee_cache_data.h index 15ef6f77d..e5702fbfe 100644 --- a/src/dxvk/shaders/rtx/algorithm/nee_cache_data.h +++ b/src/dxvk/shaders/rtx/algorithm/nee_cache_data.h @@ -22,16 +22,20 @@ #pragma once #define NEE_CACHE_PROBE_RESOLUTION 32 +#define NEE_CACHE_TOTAL_PROBE (NEE_CACHE_PROBE_RESOLUTION * NEE_CACHE_PROBE_RESOLUTION * NEE_CACHE_PROBE_RESOLUTION) #define NEE_CACHE_ELEMENTS 16 // Element size in bytes #define NEE_CACHE_ELEMENT_SIZE 4 * 2 -#define NEE_CACHE_TASK_SIZE 4 +#define NEE_CACHE_TASK_SIZE 4 * 2 +#define NEE_CACHE_TASK_COUNT 32 +#define NEE_CACHE_HASH_TASK_COUNT 32 +#define NEE_CACHE_HASH_TASK_BASE NEE_CACHE_TOTAL_PROBE * NEE_CACHE_TASK_SIZE * NEE_CACHE_TASK_COUNT #define NEE_CACHE_EMPTY_TASK 0xffffffff #define NEE_CACHE_SAMPLES 16 #define NEE_CACHE_LIGHT_ELEMENTS 16 #define NEE_CACHE_LIGHT_ELEMENT_SIZE 4 * 2 #define NEE_CACHE_CELL_CANDIDATE_TOTAL_SIZE (NEE_CACHE_ELEMENTS * NEE_CACHE_ELEMENT_SIZE + NEE_CACHE_LIGHT_ELEMENTS * NEE_CACHE_LIGHT_ELEMENT_SIZE) -#define NEE_CACHE_CELL_TASK_TOTAL_SIZE NEE_CACHE_TASK_SIZE * NEE_CACHE_ELEMENTS * 2 +#define NEE_CACHE_CELL_TASK_TOTAL_SIZE NEE_CACHE_TASK_SIZE * (NEE_CACHE_TASK_COUNT + NEE_CACHE_HASH_TASK_COUNT) struct NeeCache_PackedSample { diff --git a/src/dxvk/shaders/rtx/algorithm/nee_cache_light.slangh b/src/dxvk/shaders/rtx/algorithm/nee_cache_light.slangh index 3b8e90e24..e918c4ec7 100644 --- a/src/dxvk/shaders/rtx/algorithm/nee_cache_light.slangh +++ b/src/dxvk/shaders/rtx/algorithm/nee_cache_light.slangh @@ -23,7 +23,7 @@ #include "rtx/concept/light/light_helper.slangh" #include "rtx/concept/light/light.slangh" -#define NEE_CACHE_INVALID_ID -1 +#define NEE_CACHE_INVALID_ID 0xffffff struct NEECacheUtils { @@ -69,7 +69,8 @@ struct NEECacheUtils { if (cb.surfaceCount == 0 || prefixSumID == -1) { - surfaceID = primitiveID = NEE_CACHE_INVALID_ID; + surfaceID = NEE_CACHE_INVALID_ID; + primitiveID = NEE_CACHE_INVALID_ID; return false; } @@ -94,7 +95,7 @@ struct NEECacheUtils return true; } - static LightSample calculateLightSampleFromTriangle(int surfaceIndex, int primitiveIndex, vec2 uv, float trianglePdf, vec3 shadingPosition, float16_t coneRadius, float16_t spreadAngle, bool useSolidAnglePdf = true, int footprintMode = kFootprintFromRayOrigin) + static LightSample calculateLightSampleFromTriangle(int surfaceIndex, int primitiveIndex, vec2 uv, float trianglePdf, vec3 shadingPosition, float16_t coneRadius, float16_t spreadAngle, out float triangleArea, bool useSolidAnglePdf = true, int footprintMode = kFootprintFromRayOrigin) { #define USE_SIMIPLIFIED_MODEL 1 @@ -147,6 +148,7 @@ struct NEECacheUtils lightSample.position = surfaceInteraction.position; lightSample.normal = surfaceInteraction.interpolatedNormal; float lightPdf = trianglePdf; + triangleArea = surfaceInteraction.triangleArea; if(useSolidAnglePdf) { lightPdf *= calculateLightSamplingSolidAnglePDF( diff --git a/src/dxvk/shaders/rtx/algorithm/rtxdi/RtxdiApplicationBridge.slangh b/src/dxvk/shaders/rtx/algorithm/rtxdi/RtxdiApplicationBridge.slangh index 89a3d79cb..5188d9aa7 100644 --- a/src/dxvk/shaders/rtx/algorithm/rtxdi/RtxdiApplicationBridge.slangh +++ b/src/dxvk/shaders/rtx/algorithm/rtxdi/RtxdiApplicationBridge.slangh @@ -102,6 +102,11 @@ float sampleUniformRng(inout RAB_RandomSamplerState r) return uintBitsToFloat((mask & v) | one) - 1.f; } +uint sampleUniformIntRng(inout RAB_RandomSamplerState r) +{ + return murmur3(r); +} + // Depending on the surface type, the position/normal/viewDirection members are in different coordinate systems. // For a surface reflected in a mirror, they are in the virtual world space. @@ -186,7 +191,10 @@ RAB_Surface RAB_EmptySurface() surface.opaqueSurfaceMaterialInteraction.baseReflectivity = f16vec3(0, 0, 1); surface.opaqueSurfaceMaterialInteraction.isotropicRoughness = float16_t(1); surface.opaqueSurfaceMaterialInteraction.anisotropicRoughness = f16vec2(1, 1); - surface.opaqueSurfaceMaterialInteraction.subsurfaceMaterialIndex = BINDING_INDEX_INVALID; + surface.opaqueSurfaceMaterialInteraction.subsurfaceMaterialInteraction.packedTransmittanceColor = 0; + surface.opaqueSurfaceMaterialInteraction.subsurfaceMaterialInteraction.measurementDistance = 0.0h; + surface.opaqueSurfaceMaterialInteraction.subsurfaceMaterialInteraction.packedSingleScatteringAlbedo = 0; + surface.opaqueSurfaceMaterialInteraction.subsurfaceMaterialInteraction.volumetricAnisotropy = 0.0h; surface.hitDistance = RAB_SURFACE_BACKGROUND_HIT_DISTANCE; surface.portalSpace = PORTAL_SPACE_NONE; surface.isViewModel = false; @@ -285,7 +293,10 @@ RAB_Surface RAB_GetGBufferSurface(ivec2 pixel, bool previousFrame) surface.opaqueSurfaceMaterialInteraction.anisotropicRoughness); const uint16_t primarySurfaceIndex = uint16_t(SharedSurfaceIndex[pixel]); - surface.opaqueSurfaceMaterialInteraction.subsurfaceMaterialIndex = getSubsurfaceMaterialIndex(surfaceMaterials[primarySurfaceIndex]); + uint16_t samplerIndex = 0; + const uint16_t subsurfaceMaterialIndex = getSubsurfaceMaterialIndex(surfaceMaterials[primarySurfaceIndex], samplerIndex); + + surface.opaqueSurfaceMaterialInteraction.subsurfaceMaterialInteraction = subSurfaceMaterialReadHelper(SharedSubsurfaceData[pixel]); GeometryFlags geometryFlags = geometryFlagsReadFromGBuffer(pixel, SharedFlags); surface.portalSpace = geometryFlags.portalSpace; @@ -344,7 +355,7 @@ float RAB_GetLightSampleTargetPdfForSurface(RAB_LightSample lightSample, RAB_Sur // Need to compute the transmission pdf if the ray interact with SSS surface if (dot(L, surface.minimalSurfaceInteraction.triangleNormal) <= float16_t(0) && - (!cb.thinOpaqueEnable || surface.opaqueSurfaceMaterialInteraction.subsurfaceMaterialIndex == BINDING_INDEX_INVALID)) + (!cb.thinOpaqueEnable || !isSubsurfaceMaterial(surface.opaqueSurfaceMaterialInteraction))) { return 0; } @@ -374,7 +385,7 @@ SurfaceMaterialInteractionSplitWeight RAB_CalculateBRDF(vec3 L, RAB_Surface surf weight.diffuseTransmissionWeight = f16vec3(0); if (dot(L, surface.minimalSurfaceInteraction.triangleNormal) <= float16_t(0) && - (!cb.thinOpaqueEnable || surface.opaqueSurfaceMaterialInteraction.subsurfaceMaterialIndex == BINDING_INDEX_INVALID)) + (!cb.thinOpaqueEnable || !isSubsurfaceMaterial(surface.opaqueSurfaceMaterialInteraction))) { return weight; } @@ -390,8 +401,7 @@ float RAB_GetGITargetPdfForSurface(vec3 radiance, vec3 position, RAB_Surface sur if ((dot(L, surface.minimalSurfaceInteraction.triangleNormal) <= float16_t(0) || dot(L, surface.opaqueSurfaceMaterialInteraction.shadingNormal) <= float16_t(0)) && - (!cb.thinOpaqueEnable || - surface.opaqueSurfaceMaterialInteraction.subsurfaceMaterialIndex == BINDING_INDEX_INVALID)) + (!cb.thinOpaqueEnable || !isSubsurfaceMaterial(surface.opaqueSurfaceMaterialInteraction))) { return 0; } @@ -701,8 +711,7 @@ bool RAB_TraceLightSampleVisibility(RAB_Surface surface, RAB_LightSample lightSa uint8_t rayMask = OBJECT_MASK_OPAQUE | (surface.objectMask & OBJECT_MASK_ALL_DYNAMIC); if (cb.enableDirectTranslucentShadows) rayMask |= OBJECT_MASK_TRANSLUCENT; - SubsurfaceMaterial subSurfaceMaterial; - const bool isSubSurface = subSurfaceMaterialReadHelper(surface.opaqueSurfaceMaterialInteraction.subsurfaceMaterialIndex, subSurfaceMaterial); + const bool isSubsurface = isSubsurfaceMaterial(surface.opaqueSurfaceMaterialInteraction); const VisibilityResult visibility = traceVisibilityRay( surface.minimalSurfaceInteraction, samplePos, @@ -712,7 +721,7 @@ bool RAB_TraceLightSampleVisibility(RAB_Surface surface, RAB_LightSample lightSa surface.isViewModel, cb.enablePreviousTLAS && usePreviousTLAS, BINDING_INDEX_INVALID, f16vec2(0.0h), - isSubSurface, surface.opaqueSurfaceMaterialInteraction.shadingNormal); + isSubsurface, surface.opaqueSurfaceMaterialInteraction.shadingNormal); attenuation = visibility.attenuation; diff --git a/src/dxvk/shaders/rtx/algorithm/visibility.slangh b/src/dxvk/shaders/rtx/algorithm/visibility.slangh index 90638de21..e45a7699e 100644 --- a/src/dxvk/shaders/rtx/algorithm/visibility.slangh +++ b/src/dxvk/shaders/rtx/algorithm/visibility.slangh @@ -78,10 +78,19 @@ f16vec3 handleVisibilityVertex(Ray ray, RayHitInfo rayHitInfo, uint8_t visibilit const OpaqueSurfaceMaterialInteraction opaqueSurfaceMaterialInteraction = opaqueSurfaceMaterialInteractionCreate(opaqueSurfaceMaterial, surface, surfaceInteraction, rayInteraction); - SubsurfaceMaterial subSurfaceMaterial; - const bool isSubsurface = subSurfaceMaterialReadHelper(opaqueSurfaceMaterialInteraction.subsurfaceMaterialIndex, subSurfaceMaterial); - - if (!isSubsurface) + const SubsurfaceMaterial subsurfaceMaterial = subsurfaceMaterialCreate(opaqueSurfaceMaterialInteraction.subsurfaceMaterialInteraction); + const bool isSubsurface = isSubsurfaceMaterial(opaqueSurfaceMaterialInteraction); + + const bool handleStochasticAlphaBlend = + cb.enableStochasticAlphaBlend && + opaqueSurfaceMaterialInteraction.opacity > cb.resolveTransparencyThreshold && + opaqueSurfaceMaterialInteraction.opacity <= cb.resolveStochasticAlphaBlendThreshold; + + // We need to calculate the attenuation with opaque surface opacity in these 2 cases: + // 1. The surface doesn't have subsurface (do traditional alpha-blending) + // 2. The surface has subsurface, but the surface point need to be discarded because it fails the alpha-test. + // This is necessary for subsurface material or the alpha-test will be ignored. + if (!isSubsurface || handleStochasticAlphaBlend || opaqueSurfaceMaterialInteraction.opacity <= cb.resolveTransparencyThreshold) { return 1.0h - opaqueSurfaceMaterialInteraction.opacity; } @@ -100,16 +109,16 @@ f16vec3 handleVisibilityVertex(Ray ray, RayHitInfo rayHitInfo, uint8_t visibilit // Calculate path integral throughput/weight const float16_t outsideFresnel = evalTranslucentSchlickFresnel(baseReflectivity, normalDotOutputDirection); const float16_t insideFresnel = evalTranslucentSchlickFresnelTIR(baseReflectivity, insideRelativeRefractiveIndex, normalDotOutputDirection); - const float16_t attenuationDistance = subSurfaceMaterial.measurementDistance * subSurfaceMaterial.measurementDistance; - const f16vec3 attenuation = evalBeerLambertAttenuation(subSurfaceMaterial.volumetricAttenuationCoefficient, attenuationDistance * rcp(float(normalDotOutputDirection))); + const float16_t attenuationDistance = subsurfaceMaterial.measurementDistance * subsurfaceMaterial.measurementDistance; + const f16vec3 attenuation = evalBeerLambertAttenuation(subsurfaceMaterial.volumetricAttenuationCoefficient, attenuationDistance * rcp(float(normalDotOutputDirection))); if (insideFresnel >= 1.0h) { return 0.0h; } - return subSurfaceMaterial.singleScatteringAlbedo * (1.0f - outsideFresnel) * (1.0f - insideFresnel) * - safePositiveDivide((subSurfaceMaterial.volumetricAttenuationCoefficient * attenuationDistance), + return subsurfaceMaterial.singleScatteringAlbedo * (1.0f - outsideFresnel) * (1.0f - insideFresnel) * + safePositiveDivide((subsurfaceMaterial.volumetricAttenuationCoefficient * attenuationDistance), normalDotOutputDirection, materialEpsilon) * rcp(fourPi) * attenuation; } diff --git a/src/dxvk/shaders/rtx/concept/light/disk_light.slangh b/src/dxvk/shaders/rtx/concept/light/disk_light.slangh index 1a1a0aa55..acc34b6ae 100644 --- a/src/dxvk/shaders/rtx/concept/light/disk_light.slangh +++ b/src/dxvk/shaders/rtx/concept/light/disk_light.slangh @@ -39,11 +39,11 @@ float diskLightCalcPositionalWeight(DiskLight diskLight, vec3 position, inout ve ); } -bool diskLightCalcVisibilityWeight(DiskLight diskLight, f16vec3 triangleNormal, vec3 lightVector, float16_t maxDiskRadius, const bool isSubSurface) +bool diskLightCalcVisibilityWeight(DiskLight diskLight, f16vec3 triangleNormal, vec3 lightVector, float16_t maxDiskRadius, const bool isSubsurface) { // Cull the disks that are completely hidden below the surface or the surface has subsurface material, // and the disks that are oriented away from the surface. - const bool frontSideDisk = (isSubSurface || dot(lightVector, vec3(triangleNormal)) > float(-maxDiskRadius)) && + const bool frontSideDisk = (isSubsurface || dot(lightVector, vec3(triangleNormal)) > float(-maxDiskRadius)) && (dot(lightVector, vec3(diskLight.normal)) < 0.0f); return frontSideDisk; diff --git a/src/dxvk/shaders/rtx/concept/light/light_helper.slangh b/src/dxvk/shaders/rtx/concept/light/light_helper.slangh index 2c6110de0..458cb7cdb 100644 --- a/src/dxvk/shaders/rtx/concept/light/light_helper.slangh +++ b/src/dxvk/shaders/rtx/concept/light/light_helper.slangh @@ -36,3 +36,15 @@ struct LightSample // Note: 32 bit floating point used to avoid precision issues with some kinds of sampling on lights. float solidAnglePdf; }; + +float3 sampleDomeLightTexture(SamplerState sampler, float3 worldDirection, uint32_t domeLightTextureIndex, float4x4 worldToDomeLightTransform) +{ + if(domeLightTextureIndex == BINDING_INDEX_INVALID) + { + return 1..xxx; + } + + float3 domeSampleDirection = mul(worldToDomeLightTransform, float4(worldDirection, 0.0f)).xyz; + float2 sampleUV = cartesianDirectionToLatLongSphere(domeSampleDirection); + return textures[nonuniformEXT(uint(domeLightTextureIndex))].SampleLevel(sampler, sampleUV, 0).xyz; +} \ No newline at end of file diff --git a/src/dxvk/shaders/rtx/concept/light/polymorphic_light.slangh b/src/dxvk/shaders/rtx/concept/light/polymorphic_light.slangh index b237c6d2f..0b72b3f57 100644 --- a/src/dxvk/shaders/rtx/concept/light/polymorphic_light.slangh +++ b/src/dxvk/shaders/rtx/concept/light/polymorphic_light.slangh @@ -270,3 +270,48 @@ float decodedPolymorphicLightCalcWeight( return 0; } } + +vec3 decodedPolymorphicLightGetPosition(DecodedPolymorphicLight decodedPolymorphicLight) +{ + switch (uint(decodedPolymorphicLight.polymorphicType)) + { +#if ENABLE_SPHERE_LIGHTS + case uint(lightTypeSphere): + const SphereLight sphereLight = sphereLightCreate(decodedPolymorphicLight); + return sphereLight.position; +#endif + +#if ENABLE_RECT_LIGHTS + case uint(lightTypeRect): + const RectLight rectLight = rectLightCreate(decodedPolymorphicLight); + return rectLight.position; +#endif + +#if ENABLE_DISK_LIGHTS + case uint(lightTypeDisk): + const DiskLight diskLight = diskLightCreate(decodedPolymorphicLight); + diskLight.position; +#endif + +#if ENABLE_CYLINDER_LIGHTS + case uint(lightTypeCylinder): + const CylinderLight cylinderLight = cylinderLightCreate(decodedPolymorphicLight); + return cylinderLight.position; +#endif + +#if ENABLE_DISTANT_LIGHTS + case uint(lightTypeDistant): + const DistantLight distantLight = distantLightCreate(decodedPolymorphicLight); + return vec3(distantLight.direction) * -100000.0; +#endif + + default: + return 0.0; + } +} + +vec3 memoryPolymorphicLightGetPosition(MemoryPolymorphicLight memoryPolymorphicLight) +{ + const DecodedPolymorphicLight decodedPolymorphicLight = decodePolymorphicLight(memoryPolymorphicLight); + return decodedPolymorphicLightGetPosition(decodedPolymorphicLight); +} diff --git a/src/dxvk/shaders/rtx/concept/light/rect_light.slangh b/src/dxvk/shaders/rtx/concept/light/rect_light.slangh index db730abdc..00f810026 100644 --- a/src/dxvk/shaders/rtx/concept/light/rect_light.slangh +++ b/src/dxvk/shaders/rtx/concept/light/rect_light.slangh @@ -59,11 +59,11 @@ float rectLightCalcPositionalWeight(RectLight rectLight, vec3 position, inout ve ); } -bool rectLightCalcVisibilityWeight(RectLight rectLight, f16vec3 triangleNormal, vec3 lightVector, float16_t maxRectRadius, const bool isSubSurface) +bool rectLightCalcVisibilityWeight(RectLight rectLight, f16vec3 triangleNormal, vec3 lightVector, float16_t maxRectRadius, const bool isSubsurface) { // Cull the rects that are completely hidden below the surface or the surface has subsurface material, // and the rects that are oriented away from the surface. - const bool frontSideRect = (isSubSurface || dot(lightVector, vec3(triangleNormal)) > float(-maxRectRadius)) && + const bool frontSideRect = (isSubsurface || dot(lightVector, vec3(triangleNormal)) > float(-maxRectRadius)) && (dot(lightVector, vec3(rectLight.normal)) < 0.0f); return frontSideRect; diff --git a/src/dxvk/shaders/rtx/concept/surface/surface.h b/src/dxvk/shaders/rtx/concept/surface/surface.h index d12ae3bd2..37af7ee4d 100644 --- a/src/dxvk/shaders/rtx/concept/surface/surface.h +++ b/src/dxvk/shaders/rtx/concept/surface/surface.h @@ -123,6 +123,8 @@ struct Surface uint8_t spriteSheetRows; uint8_t spriteSheetCols; uint8_t spriteSheetFPS; + + uint32_t objectPickingValue; }; // Note: Minimal version of typical Surface Interaction for transmission across passes. diff --git a/src/dxvk/shaders/rtx/concept/surface/surface_create.slangh b/src/dxvk/shaders/rtx/concept/surface/surface_create.slangh index 946d775b7..f1352e631 100644 --- a/src/dxvk/shaders/rtx/concept/surface/surface_create.slangh +++ b/src/dxvk/shaders/rtx/concept/surface/surface_create.slangh @@ -147,5 +147,7 @@ Surface surfaceCreate(MemorySurface memorySurface) surface.spriteSheetFPS = uint8_t((textureSpritesheetData >> 16) & 0xFF); // Note: 8 bits free here in the texture spritesheet data. + surface.objectPickingValue = memorySurface.data1.y; + return surface; } diff --git a/src/dxvk/shaders/rtx/concept/surface/surface_interaction.slangh b/src/dxvk/shaders/rtx/concept/surface/surface_interaction.slangh index b54f8c720..7dc6e8a31 100644 --- a/src/dxvk/shaders/rtx/concept/surface/surface_interaction.slangh +++ b/src/dxvk/shaders/rtx/concept/surface/surface_interaction.slangh @@ -33,6 +33,7 @@ static const uint kFootprintFromRayOrigin = 0u; static const uint kFootprintFromRayDirection = 1u; static const uint kFootprintFromTextureCoordDiff = 2u; +static const uint kFootprintFromRayOriginClamped = 3u; // Surface Interaction Helper Functions @@ -501,8 +502,19 @@ SurfaceInteraction surfaceInteractionCreate( computeAnisotropicEllipseAxes( hitPosition, triangleNormal, worldTwoTriangleArea, direction, - rayInteraction.coneRadius, worldPositions, texcoords, surfaceInteraction.textureCoordinates, + coneRadius, worldPositions, texcoords, surfaceInteraction.textureCoordinates, surfaceInteraction.textureGradientX, surfaceInteraction.textureGradientY); + + if (footprintMode == kFootprintFromRayOriginClamped) + { + vec2 dUV1 = texcoords[1] - texcoords[0]; + vec2 dUV2 = texcoords[2] - texcoords[0]; + float dUVCross = abs(dUV1.x * dUV2.y - dUV2.x * dUV1.y); + float radius = dUVCross / (length(dUV1) + length(dUV2) + length(dUV2 - dUV1)); + float maxFootprint = radius * 1.5; + surfaceInteraction.textureGradientX *= min(1.0, maxFootprint / length(surfaceInteraction.textureGradientX)); + surfaceInteraction.textureGradientY *= min(1.0, maxFootprint / length(surfaceInteraction.textureGradientY)); + } } else { diff --git a/src/dxvk/shaders/rtx/concept/surface_material/opaque_surface_material.slangh b/src/dxvk/shaders/rtx/concept/surface_material/opaque_surface_material.slangh index 56d510ca2..9345744d9 100644 --- a/src/dxvk/shaders/rtx/concept/surface_material/opaque_surface_material.slangh +++ b/src/dxvk/shaders/rtx/concept/surface_material/opaque_surface_material.slangh @@ -104,19 +104,64 @@ SubsurfaceMaterial subsurfaceMaterialCreate(MemoryPolymorphicSurfaceMaterial mem const u16vec2 data01 = unpack16(memoryPolymorphicSurfaceMaterial.data0.y); const u16vec2 data02 = unpack16(memoryPolymorphicSurfaceMaterial.data0.z); const u16vec2 data03 = unpack16(memoryPolymorphicSurfaceMaterial.data0.w); + const u16vec2 data10 = unpack16(memoryPolymorphicSurfaceMaterial.data1.x); + const u16vec2 data13 = unpack16(memoryPolymorphicSurfaceMaterial.data1.w); + + const uint8_t flags = (data13.x & 0xFF); + + if (flags & SUBSURFACE_MATERIAL_FLAG_HAS_TRANSMITTANCE_TEXTURE) + { + subsurfaceMaterial.subsurfaceTransmittanceTextureIndex = data00.x; + } + else + { + subsurfaceMaterial.subsurfaceTransmittanceTextureIndex = BINDING_INDEX_INVALID; + } + + if (flags & SUBSURFACE_MATERIAL_FLAG_HAS_THICKNESS_TEXTURE) + { + subsurfaceMaterial.subsurfaceThicknessTextureIndex = data00.y; + } + else + { + subsurfaceMaterial.subsurfaceThicknessTextureIndex = BINDING_INDEX_INVALID; + subsurfaceMaterial.measurementDistance = uint16BitsToHalf(data00.y); + } + + if (flags & SUBSURFACE_MATERIAL_FLAG_HAS_SINGLE_SCATTERING_ALBEDO_TEXTURE) + { + subsurfaceMaterial.subsurfaceSingleScatteringAlbedoTextureIndex = data01.x; + } + else + { + subsurfaceMaterial.subsurfaceSingleScatteringAlbedoTextureIndex = BINDING_INDEX_INVALID; + } - subsurfaceMaterial.volumetricAttenuationCoefficient = f16vec3(uint16BitsToHalf(data00.x), uint16BitsToHalf(data00.y), uint16BitsToHalf(data01.x)); - subsurfaceMaterial.measurementDistance = uint16BitsToHalf(data01.y); - subsurfaceMaterial.singleScatteringAlbedo = f16vec3(uint16BitsToHalf(data02.x), uint16BitsToHalf(data02.y), uint16BitsToHalf(data03.x)); - subsurfaceMaterial.volumetricAnisotropy = uint16BitsToHalf(data03.y); + subsurfaceMaterial.volumetricAttenuationCoefficient = f16vec3(uint16BitsToHalf(data01.y), uint16BitsToHalf(data02.x), uint16BitsToHalf(data02.y)); + subsurfaceMaterial.singleScatteringAlbedo = f16vec3(uint16BitsToHalf(data03.x), uint16BitsToHalf(data03.y), uint16BitsToHalf(data10.x)); + subsurfaceMaterial.volumetricAnisotropy = uint16BitsToHalf(data10.y); return subsurfaceMaterial; } -uint16_t getSubsurfaceMaterialIndex(const MemoryPolymorphicSurfaceMaterial memoryPolymorphicSurfaceMaterial) +SubsurfaceMaterial subsurfaceMaterialCreate(const SubsurfaceMaterialInteraction subsurfaceMaterialInteraction) { + SubsurfaceMaterial subsurfaceMaterial; + subsurfaceMaterial.volumetricAttenuationCoefficient = transmittanceToAttenuationCoefficient(r5g6b5ToColor(subsurfaceMaterialInteraction.packedTransmittanceColor), subsurfaceMaterialInteraction.measurementDistance); + subsurfaceMaterial.measurementDistance = subsurfaceMaterialInteraction.measurementDistance; + subsurfaceMaterial.singleScatteringAlbedo = r5g6b5ToColor(subsurfaceMaterialInteraction.packedSingleScatteringAlbedo); + subsurfaceMaterial.volumetricAnisotropy = snorm8ToF16(subsurfaceMaterialInteraction.volumetricAnisotropy); + + return subsurfaceMaterial; +} + +uint16_t getSubsurfaceMaterialIndex(const MemoryPolymorphicSurfaceMaterial memoryPolymorphicSurfaceMaterial, inout uint16_t samplerIndex) +{ + const u16vec2 data10 = unpack16(memoryPolymorphicSurfaceMaterial.data1.x); const u16vec2 data12 = unpack16(memoryPolymorphicSurfaceMaterial.data1.z); const u16vec2 data13 = unpack16(memoryPolymorphicSurfaceMaterial.data1.w); + samplerIndex = data10.y; + const uint8_t flags = (data13.x & 0xFF); if (flags & OPAQUE_SURFACE_MATERIAL_FLAG_HAS_SUBSURFACE_MATERIAL) { diff --git a/src/dxvk/shaders/rtx/concept/surface_material/opaque_surface_material_interaction.slangh b/src/dxvk/shaders/rtx/concept/surface_material/opaque_surface_material_interaction.slangh index e8d584c35..aed01ce12 100644 --- a/src/dxvk/shaders/rtx/concept/surface_material/opaque_surface_material_interaction.slangh +++ b/src/dxvk/shaders/rtx/concept/surface_material/opaque_surface_material_interaction.slangh @@ -333,6 +333,74 @@ float opaqueSurfaceMaterialInteractionCalcHeightThroughput( } #endif +SubsurfaceMaterialInteraction subsurfaceMaterialInteractionCreate( + SubsurfaceMaterial subsurfaceMaterial, + SurfaceInteraction surfaceInteraction, + uint16_t samplerIndex) +{ + f16vec4 transmittanceColor; + f16vec4 subsurfaceMeasurementDistance; + f16vec4 subsurfaceSingleScatteringAlbedo; + + bool measurementDistanceLoaded = surfaceMaterialInteractionTextureReadHelper(subsurfaceMaterial.subsurfaceThicknessTextureIndex, samplerIndex, surfaceInteraction, subsurfaceMeasurementDistance); + bool transmittanceColorLoaded = surfaceMaterialInteractionTextureReadHelper(subsurfaceMaterial.subsurfaceTransmittanceTextureIndex, samplerIndex, surfaceInteraction, transmittanceColor); + bool singleScatteringAlbedoLoaded = surfaceMaterialInteractionTextureReadHelper(subsurfaceMaterial.subsurfaceSingleScatteringAlbedoTextureIndex, samplerIndex, surfaceInteraction, subsurfaceSingleScatteringAlbedo); + + SubsurfaceMaterialInteraction subsurfaceMaterialInteraction; + + if (measurementDistanceLoaded) + { + subsurfaceMaterialInteraction.measurementDistance = subsurfaceMeasurementDistance.r; + } + else + { + subsurfaceMaterialInteraction.measurementDistance = subsurfaceMaterial.measurementDistance; + } + + // volumetricAttenuationCoefficient must be read behind measurementDistance, because if using sss textures, the volumetricAttenuationCoefficient need to be calculated base on measurementDistance + if (transmittanceColorLoaded) + { + subsurfaceMaterialInteraction.packedTransmittanceColor = colorToR5G6B5(transmittanceColor.rgb); + } + else + { + subsurfaceMaterialInteraction.packedTransmittanceColor = colorToR5G6B5(evalBeerLambertAttenuation(subsurfaceMaterial.volumetricAttenuationCoefficient, subsurfaceMaterial.measurementDistance)); + } + + if (singleScatteringAlbedoLoaded) + { + subsurfaceMaterialInteraction.packedSingleScatteringAlbedo = colorToR5G6B5(subsurfaceSingleScatteringAlbedo.rgb); + } + else + { + subsurfaceMaterialInteraction.packedSingleScatteringAlbedo = colorToR5G6B5(subsurfaceMaterial.singleScatteringAlbedo); + } + + subsurfaceMaterialInteraction.volumetricAnisotropy = f16ToSnorm8(subsurfaceMaterial.volumetricAnisotropy); + + return subsurfaceMaterialInteraction; +} + +void storeColorOverlayedWithTextureResolutionCheckersInDebugView(vec3 colorValue, uint textureIndex, SurfaceInteraction surfaceInteraction) +{ + if (textureIndex != BINDING_INDEX_INVALID) + { + uint2 texDims; + textures[nonuniformEXT(uint(textureIndex))].GetDimensions(texDims.x, texDims.y); + + colorValue = + tintColorValueWithTextureResolutionCheckers( + colorValue, + surfaceInteraction.textureCoordinates, + surfaceInteraction.textureGradientX * cb.upscaleFactor, + surfaceInteraction.textureGradientY * cb.upscaleFactor, + texDims, + cb.debugKnob.x, + cb.debugKnob.y); + storeInDebugView(DispatchRaysIndex().xy, colorValue); + } +} + // Opaque Surface Material Interaction Functions OpaqueSurfaceMaterialInteraction opaqueSurfaceMaterialInteractionCreate( @@ -452,6 +520,11 @@ OpaqueSurfaceMaterialInteraction opaqueSurfaceMaterialInteractionCreate( { storeInDebugView(DispatchRaysIndex().xy, albedo.xyz); } + + if (cb.debugView == DEBUG_VIEW_OPAQUE_RAW_ALBEDO_RESOLUTION_CHECKERS) + { + storeColorOverlayedWithTextureResolutionCheckersInDebugView(albedo.xyz, opaqueSurfaceMaterial.albedoOpacityTextureIndex, surfaceInteraction); + } #endif f16vec4 tFactor = 0.h; @@ -570,7 +643,13 @@ OpaqueSurfaceMaterialInteraction opaqueSurfaceMaterialInteractionCreate( normal = normalBlendRNM(normal2, normal); } } - + +#if defined(RAY_TRACING_PRIMARY_RAY) && defined(RAY_PIPELINE) + if (cb.debugView == DEBUG_VIEW_OPAQUE_NORMAL_RESOLUTION_CHECKERS) + { + storeColorOverlayedWithTextureResolutionCheckersInDebugView(normal, opaqueSurfaceMaterial.normalTextureIndex, surfaceInteraction); + } +#endif // Load Tangent // Todo @@ -584,6 +663,12 @@ OpaqueSurfaceMaterialInteraction opaqueSurfaceMaterialInteractionCreate( roughness = roughnessSample.x; } +#if defined(RAY_TRACING_PRIMARY_RAY) && defined(RAY_PIPELINE) + if (cb.debugView == DEBUG_VIEW_OPAQUE_ROUGHNESS_RESOLUTION_CHECKERS) + { + storeColorOverlayedWithTextureResolutionCheckersInDebugView(roughness.xxx, opaqueSurfaceMaterial.roughnessTextureIndex, surfaceInteraction); + } +#endif // Apply material modifiers roughness = saturate(roughness * cb.opaqueMaterialArgs.roughnessScale + cb.opaqueMaterialArgs.roughnessBias); @@ -662,8 +747,8 @@ OpaqueSurfaceMaterialInteraction opaqueSurfaceMaterialInteractionCreate( opaqueSurfaceMaterialInteraction.emissiveRadiance = derivedEmissiveColor * derivedEmissiveIntensity; - // Subsurface Material Index - opaqueSurfaceMaterialInteraction.subsurfaceMaterialIndex = opaqueSurfaceMaterial.subsurfaceMaterialIndex; + opaqueSurfaceMaterialInteraction.subsurfaceMaterialInteraction = + subSurfaceMaterialReadHelper(opaqueSurfaceMaterial.subsurfaceMaterialIndex, surfaceInteraction, opaqueSurfaceMaterial.samplerIndex); return opaqueSurfaceMaterialInteraction; } @@ -695,9 +780,11 @@ OpaqueSurfaceMaterialInteraction opaqueSurfaceMaterialInteractionCreate(Polymorp opaqueSurfaceMaterialInteraction.anisotropicRoughness.x = polymorphicSurfaceMaterialInteraction.fdata2; opaqueSurfaceMaterialInteraction.anisotropicRoughness.y = polymorphicSurfaceMaterialInteraction.fdata3; opaqueSurfaceMaterialInteraction.normalDetail = polymorphicSurfaceMaterialInteraction.fdata4; + opaqueSurfaceMaterialInteraction.subsurfaceMaterialInteraction.measurementDistance = polymorphicSurfaceMaterialInteraction.fdata5; + opaqueSurfaceMaterialInteraction.subsurfaceMaterialInteraction.volumetricAnisotropy = polymorphicSurfaceMaterialInteraction.bdata1; + opaqueSurfaceMaterialInteraction.subsurfaceMaterialInteraction.packedTransmittanceColor = polymorphicSurfaceMaterialInteraction.idata0; + opaqueSurfaceMaterialInteraction.subsurfaceMaterialInteraction.packedSingleScatteringAlbedo = polymorphicSurfaceMaterialInteraction.idata1; - opaqueSurfaceMaterialInteraction.subsurfaceMaterialIndex = polymorphicSurfaceMaterialInteraction.idata0; - opaqueSurfaceMaterialInteraction.flags = polymorphicSurfaceMaterialInteraction.bdata0; return opaqueSurfaceMaterialInteraction; @@ -783,8 +870,7 @@ bool opaqueSurfaceMaterialInteractionCalcLobeProbability( { // Calculate dot products used for evaluation - SubsurfaceMaterial subSurfaceMaterial; - const bool isSubSurface = subSurfaceMaterialReadHelper(opaqueSurfaceMaterialInteraction.subsurfaceMaterialIndex, subSurfaceMaterial); + const bool isSubsurface = isSubsurfaceMaterial(opaqueSurfaceMaterialInteraction); // Note: Normal dot output direction used as a "best" approximation in this case, usually aligned with the actual sampled // microfacet normal on surfaces with low roughness (identical at 0), but will become more and more potentially inaccurate @@ -807,7 +893,7 @@ bool opaqueSurfaceMaterialInteractionCalcLobeProbability( diffuseReflectionProbability = albedoLuminance; specularReflectionProbability = fresnelLuminance; opacityTransmissionProbability = 1.0h - opaqueSurfaceMaterialInteraction.opacity; - diffuseTransmissionProbability = isSubSurface ? 1.0h - fresnelLuminance : 0.0h; + diffuseTransmissionProbability = isSubsurface ? 1.0h - fresnelLuminance : 0.0h; // Todo: Pass raytraceArgs in in the future if accessing these constants poses a problem later. adjustProbabilityValue( @@ -971,11 +1057,12 @@ SurfaceMaterialInteractionSample opaqueSurfaceMaterialInteractionCalcDiffuseRefl // Calculate the throughput of the sample - SubsurfaceMaterial subSurfaceMaterial; - const bool isSubsurfaceMaterial = subSurfaceMaterialReadHelper(opaqueSurfaceMaterialInteraction.subsurfaceMaterialIndex, subSurfaceMaterial); + const SubsurfaceMaterial subsurfaceMaterial = subsurfaceMaterialCreate(opaqueSurfaceMaterialInteraction.subsurfaceMaterialInteraction); + const bool isSubsurface = isSubsurfaceMaterial(opaqueSurfaceMaterialInteraction); + // If we have lambert diffuse transmission lobe, we are actually doing lambert sampling on the whole sphere, so the diffuse reflection weight should be weighted with 1/2pi for energy conservation. // The original hammon diffuse does sampling on hemisphere, so we can just normalize the weight again with 1/2. - const f16vec3 lambertTransmissionWeight = (isSubsurfaceMaterial && subSurfaceMaterial.measurementDistance <= 0.05f) ? 0.5h : 1.0h; + const f16vec3 lambertTransmissionWeight = (isSubsurface && subsurfaceMaterial.measurementDistance <= 0.05f) ? 0.5h : 1.0h; const f16vec3 weight = evalHammonDiffuse(opaqueSurfaceMaterialInteraction.albedo, opaqueSurfaceMaterialInteraction.isotropicRoughness, @@ -1192,7 +1279,7 @@ SurfaceMaterialInteractionSample thinOpaqueSurfaceMaterialInteractionCalcDiffuse inout RNG randomState, f16vec4 tangentToWorldSpaceQuaternion, MinimalRayInteraction minimalRayInteraction, - SubsurfaceMaterial subSurfaceMaterial, + SubsurfaceMaterial subsurfaceMaterial, inout bool insideMedium, inout bool penetrateSurface) { const f16vec4 worldToTangentSpaceQuaternion = quaternionInverse(tangentToWorldSpaceQuaternion); @@ -1212,8 +1299,8 @@ SurfaceMaterialInteractionSample thinOpaqueSurfaceMaterialInteractionCalcDiffuse const f16vec3 weight = evalHanrahanSingleScatteringDiffuseTransmission( opaqueSurfaceMaterialInteraction.baseReflectivity, - subSurfaceMaterial.volumetricAttenuationCoefficient, subSurfaceMaterial.measurementDistance, - subSurfaceMaterial.singleScatteringAlbedo, subSurfaceMaterial.volumetricAnisotropy, + subsurfaceMaterial.volumetricAttenuationCoefficient, subsurfaceMaterial.measurementDistance, + subsurfaceMaterial.singleScatteringAlbedo, subsurfaceMaterial.volumetricAnisotropy, normalDotOutputDirection, transmissionNormalDotInputDirection, inputDirectionDotOutputDirection); @@ -1313,15 +1400,14 @@ void opaqueSurfaceMaterialInteractionCalcSample( break; case uint(opaqueLobeTypeDiffuseTransmission): // Load Subsurface Material - SubsurfaceMaterial subSurfaceMaterial; - subSurfaceMaterialReadHelper(opaqueSurfaceMaterialInteraction.subsurfaceMaterialIndex, subSurfaceMaterial); + const SubsurfaceMaterial subsurfaceMaterial = subsurfaceMaterialCreate(opaqueSurfaceMaterialInteraction.subsurfaceMaterialInteraction); // When the thickness of surface is thinner than this experimental threshold, we don't need to consider the volume of the surface. // Instead, we use a cheaper 2-side sampling method with traditional LambertBTDF to simulate the effect. - if (subSurfaceMaterial.measurementDistance > 0.05f) + if (subsurfaceMaterial.measurementDistance > 0.05f) { surfaceMaterialInteractionSample = thinOpaqueSurfaceMaterialInteractionCalcDiffuseTransmissionSample( - opaqueSurfaceMaterialInteraction, randomState, tangentToWorldSpaceQuaternion, minimalRayInteraction, subSurfaceMaterial, insideMedium, penetrateSurface); + opaqueSurfaceMaterialInteraction, randomState, tangentToWorldSpaceQuaternion, minimalRayInteraction, subsurfaceMaterial, insideMedium, penetrateSurface); } else { @@ -1603,13 +1689,13 @@ SurfaceMaterialInteractionSplitWeight opaqueSurfaceMaterialInteractionCalcApprox const float16_t /* n.v */ normalDotOutputDirection = dot(minimalRayInteraction.viewDirection, opaqueSurfaceMaterialInteraction.shadingNormal); // Load Subsurface Material - SubsurfaceMaterial subSurfaceMaterial; - const bool sssEnabled = subSurfaceMaterialReadHelper(opaqueSurfaceMaterialInteraction.subsurfaceMaterialIndex, subSurfaceMaterial); + const SubsurfaceMaterial subsurfaceMaterial = subsurfaceMaterialCreate(opaqueSurfaceMaterialInteraction.subsurfaceMaterialInteraction); + const bool isSubsurface = isSubsurfaceMaterial(opaqueSurfaceMaterialInteraction); f16vec3 diffuseTransmissionWeight = f16vec3(0.0, 0.0, 0.0); - if (sssEnabled) + if (isSubsurface) { - diffuseTransmissionWeight = subSurfaceMaterial.singleScatteringAlbedo; + diffuseTransmissionWeight = subsurfaceMaterial.singleScatteringAlbedo; } if (normalDotOutputDirection <= float16_t(0.0) || normalDotInputDirection <= float16_t(0.0)) @@ -1682,15 +1768,15 @@ SurfaceMaterialInteractionSplitWeight opaqueSurfaceMaterialInteractionCalcProjec const float16_t /*-n.l */ transmissionNormalDotInputDirection = -tangentInputDirection.z; // Load Subsurface Material - SubsurfaceMaterial subSurfaceMaterial; - const bool sssEnabled = subSurfaceMaterialReadHelper(opaqueSurfaceMaterialInteraction.subsurfaceMaterialIndex, subSurfaceMaterial); + const SubsurfaceMaterial subsurfaceMaterial = subsurfaceMaterialCreate(opaqueSurfaceMaterialInteraction.subsurfaceMaterialInteraction); + const bool isSubsurface = isSubsurfaceMaterial(opaqueSurfaceMaterialInteraction); f16vec3 diffuseTransmissionWeight = f16vec3(0.0h, 0.0h, 0.0h); - if (sssEnabled && normalDotOutputDirection > 0.0h) { + if (isSubsurface && normalDotOutputDirection > 0.0h) { diffuseTransmissionWeight = evalHanrahanSingleScatteringDiffuseTransmission( opaqueSurfaceMaterialInteraction.baseReflectivity, - subSurfaceMaterial.volumetricAttenuationCoefficient, subSurfaceMaterial.measurementDistance, - subSurfaceMaterial.singleScatteringAlbedo, subSurfaceMaterial.volumetricAnisotropy, + subsurfaceMaterial.volumetricAttenuationCoefficient, subsurfaceMaterial.measurementDistance, + subsurfaceMaterial.singleScatteringAlbedo, subsurfaceMaterial.volumetricAnisotropy, normalDotOutputDirection, transmissionNormalDotInputDirection, inputDirectionDotOutputDirection); diff --git a/src/dxvk/shaders/rtx/concept/surface_material/polymorphic_surface_material_interaction.slangh b/src/dxvk/shaders/rtx/concept/surface_material/polymorphic_surface_material_interaction.slangh index db8aafcb5..ad899954f 100644 --- a/src/dxvk/shaders/rtx/concept/surface_material/polymorphic_surface_material_interaction.slangh +++ b/src/dxvk/shaders/rtx/concept/surface_material/polymorphic_surface_material_interaction.slangh @@ -43,8 +43,11 @@ PolymorphicSurfaceMaterialInteraction polymorphicSurfaceMaterialInteractionCreat polymorphicSurfaceMaterialInteraction.fdata2 = opaqueSurfaceMaterialInteraction.anisotropicRoughness.x; polymorphicSurfaceMaterialInteraction.fdata3 = opaqueSurfaceMaterialInteraction.anisotropicRoughness.y; polymorphicSurfaceMaterialInteraction.fdata4 = opaqueSurfaceMaterialInteraction.normalDetail; - polymorphicSurfaceMaterialInteraction.idata0 = opaqueSurfaceMaterialInteraction.subsurfaceMaterialIndex; + polymorphicSurfaceMaterialInteraction.fdata5 = opaqueSurfaceMaterialInteraction.subsurfaceMaterialInteraction.measurementDistance; + polymorphicSurfaceMaterialInteraction.idata0 = opaqueSurfaceMaterialInteraction.subsurfaceMaterialInteraction.packedTransmittanceColor; + polymorphicSurfaceMaterialInteraction.idata1 = opaqueSurfaceMaterialInteraction.subsurfaceMaterialInteraction.packedSingleScatteringAlbedo; polymorphicSurfaceMaterialInteraction.bdata0 = opaqueSurfaceMaterialInteraction.flags; + polymorphicSurfaceMaterialInteraction.bdata1 = opaqueSurfaceMaterialInteraction.subsurfaceMaterialInteraction.volumetricAnisotropy; polymorphicSurfaceMaterialInteraction.type = surfaceMaterialTypeOpaque; @@ -510,3 +513,10 @@ bool polymorphicSurfaceMaterialInteractionHasHeightTexture( polymorphicSurfaceMaterialInteraction.bdata0 & OPAQUE_SURFACE_MATERIAL_INTERACTION_FLAG_HAS_HEIGHT_TEXTURE : false; } + +bool polymorphicSurfaceMaterialInteractionHasSubsurface( + PolymorphicSurfaceMaterialInteraction polymorphicSurfaceMaterialInteraction) +{ + const uint8_t polymorphicType = polymorphicSurfaceMaterialInteractionGetTypeHelper(polymorphicSurfaceMaterialInteraction); + return polymorphicType == uint(surfaceMaterialTypeOpaque) && polymorphicSurfaceMaterialInteraction.fdata5 > 0.0h; +} diff --git a/src/dxvk/shaders/rtx/concept/surface_material/ray_portal_surface_material_interaction.slangh b/src/dxvk/shaders/rtx/concept/surface_material/ray_portal_surface_material_interaction.slangh index c7962977d..39cd69c1e 100644 --- a/src/dxvk/shaders/rtx/concept/surface_material/ray_portal_surface_material_interaction.slangh +++ b/src/dxvk/shaders/rtx/concept/surface_material/ray_portal_surface_material_interaction.slangh @@ -147,6 +147,12 @@ RayPortalSurfaceMaterialInteraction rayPortalSurfaceMaterialInteractionCreate( { mask2 = maskSample2.r; } + else + { + // Approximation for the Portal mask texture + float2 texcoord = surfaceInteraction.textureCoordinates * 2.f - 1.f; + mask2 = float16_t(1.f - saturate(0.6f * dot(texcoord, texcoord))); + } // Note: Portal mask is linear data and has no need to be gamma corrected. diff --git a/src/dxvk/shaders/rtx/concept/surface_material/surface_material.h b/src/dxvk/shaders/rtx/concept/surface_material/surface_material.h index 28146974b..aa6a77d5c 100644 --- a/src/dxvk/shaders/rtx/concept/surface_material/surface_material.h +++ b/src/dxvk/shaders/rtx/concept/surface_material/surface_material.h @@ -121,12 +121,24 @@ struct RayPortalSurfaceMaterial struct SubsurfaceMaterial { + uint16_t subsurfaceTransmittanceTextureIndex; + uint16_t subsurfaceThicknessTextureIndex; + uint16_t subsurfaceSingleScatteringAlbedoTextureIndex; + f16vec3 volumetricAttenuationCoefficient; float16_t measurementDistance; f16vec3 singleScatteringAlbedo; float16_t volumetricAnisotropy; }; +struct SubsurfaceMaterialInteraction +{ + uint16_t packedTransmittanceColor; // Pack with R5G6B5 + float16_t measurementDistance; + uint16_t packedSingleScatteringAlbedo; // Pack with R5G6B5 + uint8_t volumetricAnisotropy; +}; + struct OpaqueSurfaceMaterialInteraction { f16vec3 shadingNormal; @@ -139,7 +151,7 @@ struct OpaqueSurfaceMaterialInteraction f16vec2 anisotropicRoughness; // Note: fp16 may not be sufficient here for high radiance values, potentially change if clamping f16vec3 emissiveRadiance; - uint16_t subsurfaceMaterialIndex; + SubsurfaceMaterialInteraction subsurfaceMaterialInteraction; // Note: A value of 0 in the thin film thickness indicates the thin film is disabled. float16_t thinFilmThickness; uint8_t flags; @@ -229,11 +241,13 @@ struct PolymorphicSurfaceMaterialInteraction float16_t fdata2; float16_t fdata3; float16_t fdata4; + float16_t fdata5; uint16_t idata0; uint16_t idata1; uint8_t bdata0; + uint8_t bdata1; uint8_t type; }; diff --git a/src/dxvk/shaders/rtx/concept/surface_material/surface_material_helper.slangh b/src/dxvk/shaders/rtx/concept/surface_material/surface_material_helper.slangh index 9a6e41ad1..edb0425eb 100644 --- a/src/dxvk/shaders/rtx/concept/surface_material/surface_material_helper.slangh +++ b/src/dxvk/shaders/rtx/concept/surface_material/surface_material_helper.slangh @@ -117,17 +117,38 @@ void adjustProbabilityValue(inout float16_t value, float16_t zeroThreshold, floa value = max(value, minProbability); } -bool subSurfaceMaterialReadHelper( +bool isSubsurfaceMaterial(OpaqueSurfaceMaterialInteraction opaqueSurfaceMaterialInteraction) +{ + return opaqueSurfaceMaterialInteraction.subsurfaceMaterialInteraction.measurementDistance > 0.0h; +} + +SubsurfaceMaterialInteraction subSurfaceMaterialReadHelper( uint16_t subsurfaceMaterialIndex, - inout SubsurfaceMaterial subSurfaceMaterial) + SurfaceInteraction surfaceInteraction, + uint16_t samplerIndex) { if (!cb.thinOpaqueEnable || subsurfaceMaterialIndex == BINDING_INDEX_INVALID) { - return false; - } + SubsurfaceMaterialInteraction subsurfaceMaterialInteraction; + subsurfaceMaterialInteraction.packedTransmittanceColor = uint16_t(0); + subsurfaceMaterialInteraction.measurementDistance = 0.0h; + subsurfaceMaterialInteraction.packedSingleScatteringAlbedo = uint16_t(0); + subsurfaceMaterialInteraction.volumetricAnisotropy = uint8_t(0); - const MemoryPolymorphicSurfaceMaterial memorySubSurfaceMaterial = surfaceMaterialExtensions[subsurfaceMaterialIndex]; - subSurfaceMaterial = subsurfaceMaterialCreate(memorySubSurfaceMaterial); + return subsurfaceMaterialInteraction; + } - return subSurfaceMaterial.measurementDistance > 0.0h; + const MemoryPolymorphicSurfaceMaterial memorySubsurfaceMaterial = surfaceMaterialExtensions[subsurfaceMaterialIndex]; + SubsurfaceMaterial subsurfaceMaterial = subsurfaceMaterialCreate(memorySubsurfaceMaterial); + return subsurfaceMaterialInteractionCreate(subsurfaceMaterial, surfaceInteraction, samplerIndex); } + +SubsurfaceMaterialInteraction subSurfaceMaterialReadHelper(uint4 subsurfaceData) +{ + SubsurfaceMaterialInteraction subsurfaceMaterialInteraction; + subsurfaceMaterialInteraction.packedTransmittanceColor = uint16_t(subsurfaceData.x); + subsurfaceMaterialInteraction.measurementDistance = uint16BitsToHalf(uint16_t(subsurfaceData.y)); + subsurfaceMaterialInteraction.packedSingleScatteringAlbedo = uint16_t(subsurfaceData.z); + subsurfaceMaterialInteraction.volumetricAnisotropy = uint8_t(subsurfaceData.w); + return subsurfaceMaterialInteraction; +} \ No newline at end of file diff --git a/src/dxvk/shaders/rtx/pass/gbuffer/gbuffer_binding_indices.h b/src/dxvk/shaders/rtx/pass/gbuffer/gbuffer_binding_indices.h index b9a026f63..17c8fbf2a 100644 --- a/src/dxvk/shaders/rtx/pass/gbuffer/gbuffer_binding_indices.h +++ b/src/dxvk/shaders/rtx/pass/gbuffer/gbuffer_binding_indices.h @@ -28,6 +28,7 @@ #define USE_32BIT_RAY_DIRECTION 1 // Inputs +#define GBUFFER_BINDING_LINEAR_WRAP_SAMPLER 37 #define GBUFFER_BINDING_SKYPROBE 38 #define GBUFFER_BINDING_SKYMATTE 39 #define GBUFFER_BINDING_VOLUME_FILTERED_RADIANCE_INPUT 40 @@ -84,20 +85,22 @@ #define GBUFFER_BINDING_TRANSMISSION_PSR_DATA_STORAGE_3 87 #define GBUFFER_BINDING_ALPHA_BLEND_GBUFFER_OUTPUT 88 -#define GBUFFER_BINDING_DISPLACEMENT_TEXTURE_COORD_OUTPUT 89 +#define GBUFFER_BINDING_SHARED_TEXTURE_COORD_OUTPUT 89 #define GBUFFER_BINDING_SHARED_SURFACE_INDEX_OUTPUT 90 +#define GBUFFER_BINDING_SHARED_SUBSURFACE_DATA_OUTPUT 91 -#define GBUFFER_BINDING_ALIASED_DATA_0 91 +#define GBUFFER_BINDING_ALIASED_DATA_0 92 #define GBUFFER_BINDING_DECAL_MATERIAL_STORAGE (GBUFFER_BINDING_ALIASED_DATA_0 + 0) #define GBUFFER_BINDING_REFLECTION_PSR_DATA_STORAGE_0 (GBUFFER_BINDING_ALIASED_DATA_0 + 1) -#define GBUFFER_BINDING_ALIASED_DATA_1 93 +#define GBUFFER_BINDING_ALIASED_DATA_1 94 #define GBUFFER_BINDING_DECAL_EMISSIVE_RADIANCE_STORAGE (GBUFFER_BINDING_ALIASED_DATA_1 + 0) #define GBUFFER_BINDING_REFLECTION_PSR_DATA_STORAGE_1 (GBUFFER_BINDING_ALIASED_DATA_1 + 1) +#define GBUFFER_BINDING_PRIMARY_OBJECT_PICKING_OUTPUT 96 #define GBUFFER_MIN_BINDING GBUFFER_BINDING_SKYMATTE -#define GBUFFER_MAX_BINDING GBUFFER_BINDING_REFLECTION_PSR_DATA_STORAGE_1 +#define GBUFFER_MAX_BINDING GBUFFER_BINDING_PRIMARY_OBJECT_PICKING_OUTPUT #if GBUFFER_MIN_BINDING <= COMMON_MAX_BINDING #error "Increase the base index of G-buffer bindings to avoid overlap with common bindings!" diff --git a/src/dxvk/shaders/rtx/pass/gbuffer/gbuffer_bindings.slangh b/src/dxvk/shaders/rtx/pass/gbuffer/gbuffer_bindings.slangh index 86f44efd6..1542f2f0a 100644 --- a/src/dxvk/shaders/rtx/pass/gbuffer/gbuffer_bindings.slangh +++ b/src/dxvk/shaders/rtx/pass/gbuffer/gbuffer_bindings.slangh @@ -27,6 +27,9 @@ // Inputs +layout(binding = GBUFFER_BINDING_LINEAR_WRAP_SAMPLER) +SamplerState LinearWrapSampler; + layout(binding = GBUFFER_BINDING_VOLUME_FILTERED_RADIANCE_INPUT) Sampler3D VolumeFilteredRadiance; @@ -178,12 +181,15 @@ RWTexture2D SharedBiasCurrentColorMask; layout(rgba32ui, binding = GBUFFER_BINDING_ALPHA_BLEND_GBUFFER_OUTPUT) RWTexture2D AlphaBlendGBuffer; -layout(rg32f, binding = GBUFFER_BINDING_DISPLACEMENT_TEXTURE_COORD_OUTPUT) -RWTexture2D DisplacementTextureCoord; +layout(rg32f, binding = GBUFFER_BINDING_SHARED_TEXTURE_COORD_OUTPUT) +RWTexture2D SharedTextureCoord; layout(r16ui, binding = GBUFFER_BINDING_SHARED_SURFACE_INDEX_OUTPUT) RWTexture2D SharedSurfaceIndex; +layout(rgba16ui, binding = GBUFFER_BINDING_SHARED_SUBSURFACE_DATA_OUTPUT) +RWTexture2D SharedSubsurfaceData; + layout(push_constant) ConstantBuffer push; @@ -215,3 +221,6 @@ RWTexture2D TransmissionPSRData2; layout(rg32ui, binding = GBUFFER_BINDING_TRANSMISSION_PSR_DATA_STORAGE_3) RWTexture2D TransmissionPSRData3; + +layout(r32ui, binding = GBUFFER_BINDING_PRIMARY_OBJECT_PICKING_OUTPUT) +RWTexture2D PrimaryObjectPicking; diff --git a/src/dxvk/shaders/rtx/pass/image_utils/cube_to_latlong.comp.slang b/src/dxvk/shaders/rtx/pass/image_utils/cube_to_latlong.comp.slang index 3b139e57c..8e0ea089c 100644 --- a/src/dxvk/shaders/rtx/pass/image_utils/cube_to_latlong.comp.slang +++ b/src/dxvk/shaders/rtx/pass/image_utils/cube_to_latlong.comp.slang @@ -45,12 +45,8 @@ void main(uint2 idx : SV_DispatchThreadID) { } float3 dir = float3(cos(latlong.y) * cos(latlong.x), - sin(latlong.y), - cos(latlong.y) * sin(latlong.x)); - - if (cb.transform == LatLongTransform::ZUpToOpenEXR) { - dir = dir.xzy; - } + cos(latlong.y) * sin(latlong.x), + sin(latlong.y)); LatLong[idx] = Cube.Sample(dir); } diff --git a/src/dxvk/shaders/rtx/pass/integrate/integrate_direct.slangh b/src/dxvk/shaders/rtx/pass/integrate/integrate_direct.slangh index 4f8c1bb93..9327cf719 100644 --- a/src/dxvk/shaders/rtx/pass/integrate/integrate_direct.slangh +++ b/src/dxvk/shaders/rtx/pass/integrate/integrate_direct.slangh @@ -52,15 +52,7 @@ void deserializeInteractions( polymorphicSurfaceMaterialInteraction = polymorphicSurfaceMaterialInteractionReadFromGBuffer( pixelCoordinate, PrimaryWorldShadingNormal, PrimaryPerceptualRoughness, PrimaryAlbedo, PrimaryBaseReflectivity, - SharedMaterialData0, SharedMaterialData1); - - if (cb.thinOpaqueEnable) - { - const uint16_t primarySurfaceIndex = uint16_t(SharedSurfaceIndex[pixelCoordinate]); - const MemoryPolymorphicSurfaceMaterial memoryPolymorphicSurfaceMaterial = surfaceMaterials[primarySurfaceIndex]; - const OpaqueSurfaceMaterial opaqueSurfaceMaterial = opaqueSurfaceMaterialCreate(primarySurfaceIndex, memoryPolymorphicSurfaceMaterial); - polymorphicSurfaceMaterialInteraction.idata0 = opaqueSurfaceMaterial.subsurfaceMaterialIndex; - } + SharedMaterialData0, SharedMaterialData1, SharedSurfaceIndex, SharedSubsurfaceData); } else { @@ -72,15 +64,7 @@ void deserializeInteractions( polymorphicSurfaceMaterialInteraction = polymorphicSurfaceMaterialInteractionReadFromGBuffer( pixelCoordinate, SecondaryWorldShadingNormal, aliasedData1.SecondaryPerceptualRoughness, SecondaryAlbedo, SecondaryBaseReflectivity, - SharedMaterialData0, SharedMaterialData1); - - if (cb.thinOpaqueEnable) - { - const uint16_t primarySurfaceIndex = uint16_t(SharedSurfaceIndex[pixelCoordinate]); - const MemoryPolymorphicSurfaceMaterial memoryPolymorphicSurfaceMaterial = surfaceMaterials[primarySurfaceIndex]; - const OpaqueSurfaceMaterial opaqueSurfaceMaterial = opaqueSurfaceMaterialCreate(primarySurfaceIndex, memoryPolymorphicSurfaceMaterial); - polymorphicSurfaceMaterialInteraction.idata0 = opaqueSurfaceMaterial.subsurfaceMaterialIndex; - } + SharedMaterialData0, SharedMaterialData1, SharedSurfaceIndex, SharedSubsurfaceData); } } diff --git a/src/dxvk/shaders/rtx/pass/integrate/integrate_direct_binding_indices.h b/src/dxvk/shaders/rtx/pass/integrate/integrate_direct_binding_indices.h index 371fe95a1..01c27b950 100644 --- a/src/dxvk/shaders/rtx/pass/integrate/integrate_direct_binding_indices.h +++ b/src/dxvk/shaders/rtx/pass/integrate/integrate_direct_binding_indices.h @@ -29,45 +29,47 @@ #define INTEGRATE_DIRECT_BINDING_SHARED_INTEGRATION_SURFACE_PDF_INPUT 40 #define INTEGRATE_DIRECT_BINDING_SHARED_MATERIAL_DATA0_INPUT 41 #define INTEGRATE_DIRECT_BINDING_SHARED_MATERIAL_DATA1_INPUT 42 -#define INTEGRATE_DIRECT_BINDING_DISPLACEMENT_TEXTURE_COORD_INPUT 43 -#define INTEGRATE_DIRECT_BINDING_SHARED_SURFACE_INDEX_INPUT 44 - -#define INTEGRATE_DIRECT_BINDING_PRIMARY_WORLD_SHADING_NORMAL_INPUT 45 -#define INTEGRATE_DIRECT_BINDING_PRIMARY_PERCEPTUAL_ROUGHNESS_INPUT 46 -#define INTEGRATE_DIRECT_BINDING_PRIMARY_ALBEDO_INPUT 47 -#define INTEGRATE_DIRECT_BINDING_PRIMARY_VIEW_DIRECTION_INPUT 48 -#define INTEGRATE_DIRECT_BINDING_PRIMARY_CONE_RADIUS_INPUT 49 -#define INTEGRATE_DIRECT_BINDING_PRIMARY_WORLD_POSITION_WORLD_TRIANGLE_NORMAL_INPUT 50 -#define INTEGRATE_DIRECT_BINDING_PRIMARY_POSITION_ERROR_INPUT 51 -#define INTEGRATE_DIRECT_BINDING_PRIMARY_RTXDI_RESERVOIR 52 - -#define INTEGRATE_DIRECT_BINDING_SECONDARY_WORLD_SHADING_NORMAL_INPUT 53 -#define INTEGRATE_DIRECT_BINDING_SECONDARY_ALBEDO_INPUT 54 -#define INTEGRATE_DIRECT_BINDING_SECONDARY_VIEW_DIRECTION_INPUT 55 -#define INTEGRATE_DIRECT_BINDING_SECONDARY_CONE_RADIUS_INPUT 56 - -#define INTEGRATE_DIRECT_BINDING_NEE_CACHE 57 -#define INTEGRATE_DIRECT_BINDING_NEE_CACHE_THREAD_TASK 58 +#define INTEGRATE_DIRECT_BINDING_SHARED_TEXTURE_COORD_INPUT 43 +#define INTEGRATE_DIRECT_BINDING_SHARED_SURFACE_INDEX_INPUT 44 +#define INTEGRATE_DIRECT_BINDING_SHARED_SUBSURFACE_DATA_INPUT 45 + +#define INTEGRATE_DIRECT_BINDING_PRIMARY_WORLD_SHADING_NORMAL_INPUT 46 +#define INTEGRATE_DIRECT_BINDING_PRIMARY_PERCEPTUAL_ROUGHNESS_INPUT 47 +#define INTEGRATE_DIRECT_BINDING_PRIMARY_ALBEDO_INPUT 48 +#define INTEGRATE_DIRECT_BINDING_PRIMARY_VIEW_DIRECTION_INPUT 49 +#define INTEGRATE_DIRECT_BINDING_PRIMARY_CONE_RADIUS_INPUT 50 +#define INTEGRATE_DIRECT_BINDING_PRIMARY_WORLD_POSITION_WORLD_TRIANGLE_NORMAL_INPUT 51 +#define INTEGRATE_DIRECT_BINDING_PRIMARY_POSITION_ERROR_INPUT 52 +#define INTEGRATE_DIRECT_BINDING_PRIMARY_RTXDI_RESERVOIR 53 + +#define INTEGRATE_DIRECT_BINDING_SECONDARY_WORLD_SHADING_NORMAL_INPUT 54 +#define INTEGRATE_DIRECT_BINDING_SECONDARY_ALBEDO_INPUT 55 +#define INTEGRATE_DIRECT_BINDING_SECONDARY_VIEW_DIRECTION_INPUT 56 +#define INTEGRATE_DIRECT_BINDING_SECONDARY_CONE_RADIUS_INPUT 57 + +#define INTEGRATE_DIRECT_BINDING_NEE_CACHE 58 +#define INTEGRATE_DIRECT_BINDING_NEE_CACHE_THREAD_TASK 59 +#define INTEGRATE_DIRECT_BINDING_NEE_CACHE_TASK 60 // Inputs/Outputs -#define INTEGRATE_DIRECT_BINDING_SHARED_FLAGS_INPUT_OUTPUT 60 -#define INTEGRATE_DIRECT_BINDING_SHARED_MEDIUM_MATERIAL_INDEX_INPUT_OUTPUT 61 -#define INTEGRATE_DIRECT_BINDING_PRIMARY_BASE_REFLECTIVITY_INPUT_OUTPUT 62 -#define INTEGRATE_DIRECT_BINDING_SECONDARY_BASE_REFLECTIVITY_INPUT_OUTPUT 63 +#define INTEGRATE_DIRECT_BINDING_SHARED_FLAGS_INPUT_OUTPUT 61 +#define INTEGRATE_DIRECT_BINDING_SHARED_MEDIUM_MATERIAL_INDEX_INPUT_OUTPUT 62 +#define INTEGRATE_DIRECT_BINDING_PRIMARY_BASE_REFLECTIVITY_INPUT_OUTPUT 63 +#define INTEGRATE_DIRECT_BINDING_SECONDARY_BASE_REFLECTIVITY_INPUT_OUTPUT 64 // Outputs -#define INTEGRATE_DIRECT_BINDING_PRIMARY_DIRECT_DIFFUSE_LOBE_RADIANCE_OUTPUT 64 -#define INTEGRATE_DIRECT_BINDING_PRIMARY_DIRECT_SPECULAR_LOBE_RADIANCE_OUTPUT 65 -#define INTEGRATE_DIRECT_BINDING_SECONDARY_COMBINED_DIFFUSE_LOBE_RADIANCE_OUTPUT 66 -#define INTEGRATE_DIRECT_BINDING_SECONDARY_COMBINED_SPECULAR_LOBE_RADIANCE_OUTPUT 67 +#define INTEGRATE_DIRECT_BINDING_PRIMARY_DIRECT_DIFFUSE_LOBE_RADIANCE_OUTPUT 65 +#define INTEGRATE_DIRECT_BINDING_PRIMARY_DIRECT_SPECULAR_LOBE_RADIANCE_OUTPUT 66 +#define INTEGRATE_DIRECT_BINDING_SECONDARY_COMBINED_DIFFUSE_LOBE_RADIANCE_OUTPUT 67 +#define INTEGRATE_DIRECT_BINDING_SECONDARY_COMBINED_SPECULAR_LOBE_RADIANCE_OUTPUT 68 -#define INTEGRATE_DIRECT_BINDING_PRIMARY_RTXDI_ILLUMINANCE_OUTPUT 68 +#define INTEGRATE_DIRECT_BINDING_PRIMARY_RTXDI_ILLUMINANCE_OUTPUT 69 -#define INTEGRATE_DIRECT_BINDING_INDIRECT_THROUGHPUT_CONE_RADIUS_OUTPUT 69 +#define INTEGRATE_DIRECT_BINDING_INDIRECT_THROUGHPUT_CONE_RADIUS_OUTPUT 70 -#define INTEGRATE_DIRECT_BINDING_NEE_CACHE_SAMPLE 70 +#define INTEGRATE_DIRECT_BINDING_NEE_CACHE_SAMPLE 71 // Aliased Inputs/Outputs diff --git a/src/dxvk/shaders/rtx/pass/integrate/integrate_direct_bindings.slangh b/src/dxvk/shaders/rtx/pass/integrate/integrate_direct_bindings.slangh index e3bf80c5e..0869ff72d 100644 --- a/src/dxvk/shaders/rtx/pass/integrate/integrate_direct_bindings.slangh +++ b/src/dxvk/shaders/rtx/pass/integrate/integrate_direct_bindings.slangh @@ -38,11 +38,14 @@ Texture2D SharedMaterialData0; layout(r32ui, binding = INTEGRATE_DIRECT_BINDING_SHARED_MATERIAL_DATA1_INPUT) Texture2D SharedMaterialData1; -layout(rg32f, binding = INTEGRATE_DIRECT_BINDING_DISPLACEMENT_TEXTURE_COORD_INPUT) -RWTexture2D DisplacementTextureCoord; +layout(rg32f, binding = INTEGRATE_DIRECT_BINDING_SHARED_TEXTURE_COORD_INPUT) +Texture2D SharedTextureCoord; layout(r16ui, binding = INTEGRATE_DIRECT_BINDING_SHARED_SURFACE_INDEX_INPUT) -RWTexture2D SharedSurfaceIndex; +Texture2D SharedSurfaceIndex; + +layout(rgba16ui, binding = INTEGRATE_DIRECT_BINDING_SHARED_SUBSURFACE_DATA_INPUT) +Texture2D SharedSubsurfaceData; // Shared Inputs/Outputs @@ -87,6 +90,9 @@ ByteAddressBuffer NeeCache; layout(binding = INTEGRATE_DIRECT_BINDING_NEE_CACHE_SAMPLE) StructuredBuffer NeeCacheSample; +layout(binding = INTEGRATE_DIRECT_BINDING_NEE_CACHE_TASK) +RWByteAddressBuffer NeeCacheTask; + layout(rg32ui, binding = INTEGRATE_DIRECT_BINDING_NEE_CACHE_THREAD_TASK) RWTexture2D NeeCacheThreadTask; diff --git a/src/dxvk/shaders/rtx/pass/integrate/integrate_indirect_binding_indices.h b/src/dxvk/shaders/rtx/pass/integrate/integrate_indirect_binding_indices.h index dd8a5c3e2..51ceaa7a7 100644 --- a/src/dxvk/shaders/rtx/pass/integrate/integrate_indirect_binding_indices.h +++ b/src/dxvk/shaders/rtx/pass/integrate/integrate_indirect_binding_indices.h @@ -24,37 +24,41 @@ #include "rtx/pass/common_binding_indices.h" // Inputs -#define INTEGRATE_INDIRECT_BINDING_SKYPROBE 39 +#define INTEGRATE_BINDING_LINEAR_WRAP_SAMPLER 38 +#define INTEGRATE_INDIRECT_BINDING_SKYPROBE 39 -#define INTEGRATE_INDIRECT_BINDING_SHARED_FLAGS_INPUT 40 -#define INTEGRATE_INDIRECT_BINDING_SHARED_MEDIUM_MATERIAL_INDEX_INPUT 41 -#define INTEGRATE_INDIRECT_BINDING_DISPLACEMENT_TEXTURE_COORD_INPUT 42 -#define INTEGRATE_INDIRECT_BINDING_SHARED_SURFACE_INDEX_INPUT 43 +#define INTEGRATE_INDIRECT_BINDING_SHARED_FLAGS_INPUT 40 +#define INTEGRATE_INDIRECT_BINDING_SHARED_MEDIUM_MATERIAL_INDEX_INPUT 41 +#define INTEGRATE_INDIRECT_BINDING_SHARED_TEXTURE_COORD_INPUT 42 +#define INTEGRATE_INDIRECT_BINDING_SHARED_SURFACE_INDEX_INPUT 43 +#define INTEGRATE_INDIRECT_BINDING_SHARED_SUBSURFACE_DATA_INPUT 44 // Todo: Remove, temporary but needed for now for the miss flag encoded in these values. -#define INTEGRATE_INDIRECT_BINDING_PRIMARY_CONE_RADIUS_INPUT 44 -#define INTEGRATE_INDIRECT_BINDING_SECONDARY_CONE_RADIUS_INPUT 45 -#define INTEGRATE_INDIRECT_BINDING_PRIMARY_WORLD_POSITION_INPUT 46 -#define INTEGRATE_INDIRECT_BINDING_PRIMARY_RTXDI_RESERVOIR 47 - -#define INTEGRATE_INDIRECT_BINDING_RAY_ORIGIN_DIRECTION_INPUT 48 -#define INTEGRATE_INDIRECT_BINDING_FIRST_HIT_PERCEPTUAL_ROUGHNESS_INPUT 49 - -#define INTEGRATE_INDIRECT_BINDING_LAST_GBUFFER_INPUT 50 -#define INTEGRATE_INDIRECT_BINDING_PREV_WORLD_POSITION_INPUT 51 -#define INTEGRATE_INDIRECT_BINDING_VOLUME_FILTERED_RADIANCE_INPUT 52 -#define INTEGRATE_INDIRECT_BINDING_PRIMARY_HIT_DISTANCE_INPUT 53 -#define INTEGRATE_INDIRECT_BINDING_SECONDARY_HIT_DISTANCE_INPUT 54 -#define INTEGRATE_INDIRECT_BINDING_LAST_COMPOSITE_INPUT 55 -#define INTEGRATE_INDIRECT_BINDING_FIRST_SAMPLED_LOBE_DATA_INPUT 56 - -#define INTEGRATE_INDIRECT_BINDING_NEE_CACHE 57 -#define INTEGRATE_INDIRECT_BINDING_NEE_CACHE_SAMPLE 58 -#define INTEGRATE_INDIRECT_BINDING_NEE_CACHE_THREAD_TASK 59 +#define INTEGRATE_INDIRECT_BINDING_PRIMARY_CONE_RADIUS_INPUT 45 +#define INTEGRATE_INDIRECT_BINDING_SECONDARY_CONE_RADIUS_INPUT 46 +#define INTEGRATE_INDIRECT_BINDING_PRIMARY_WORLD_POSITION_INPUT 47 +#define INTEGRATE_INDIRECT_BINDING_PRIMARY_RTXDI_RESERVOIR 48 + +#define INTEGRATE_INDIRECT_BINDING_RAY_ORIGIN_DIRECTION_INPUT 49 +#define INTEGRATE_INDIRECT_BINDING_FIRST_HIT_PERCEPTUAL_ROUGHNESS_INPUT 50 + +#define INTEGRATE_INDIRECT_BINDING_LAST_GBUFFER_INPUT 51 +#define INTEGRATE_INDIRECT_BINDING_PREV_WORLD_POSITION_INPUT 52 +#define INTEGRATE_INDIRECT_BINDING_VOLUME_FILTERED_RADIANCE_INPUT 53 +#define INTEGRATE_INDIRECT_BINDING_PRIMARY_HIT_DISTANCE_INPUT 54 +#define INTEGRATE_INDIRECT_BINDING_SECONDARY_HIT_DISTANCE_INPUT 55 +#define INTEGRATE_INDIRECT_BINDING_LAST_COMPOSITE_INPUT 56 +#define INTEGRATE_INDIRECT_BINDING_FIRST_SAMPLED_LOBE_DATA_INPUT 57 + +#define INTEGRATE_INDIRECT_BINDING_NEE_CACHE 58 +#define INTEGRATE_INDIRECT_BINDING_NEE_CACHE_SAMPLE 59 +#define INTEGRATE_INDIRECT_BINDING_NEE_CACHE_THREAD_TASK 60 +#define INTEGRATE_INDIRECT_BINDING_NEE_CACHE_TASK 61 +#define INTEGRATE_INDIRECT_BINDING_PRIMITIVE_ID_PREFIX_SUM 62 // Storage -#define INTEGRATE_INDIRECT_BINDING_DECAL_MATERIAL_STORAGE 61 +#define INTEGRATE_INDIRECT_BINDING_DECAL_MATERIAL_STORAGE 63 // Outputs diff --git a/src/dxvk/shaders/rtx/pass/integrate/integrate_indirect_bindings.slangh b/src/dxvk/shaders/rtx/pass/integrate/integrate_indirect_bindings.slangh index c9bd6a387..97e726306 100644 --- a/src/dxvk/shaders/rtx/pass/integrate/integrate_indirect_bindings.slangh +++ b/src/dxvk/shaders/rtx/pass/integrate/integrate_indirect_bindings.slangh @@ -28,6 +28,9 @@ // Shared Inputs +layout(binding = INTEGRATE_BINDING_LINEAR_WRAP_SAMPLER) +SamplerState LinearWrapSampler; + layout(binding = INTEGRATE_INDIRECT_BINDING_SKYPROBE) SamplerCube SkyProbe; @@ -37,11 +40,14 @@ Texture2D SharedFlags; layout(r16ui, binding = INTEGRATE_INDIRECT_BINDING_SHARED_MEDIUM_MATERIAL_INDEX_INPUT) Texture2D SharedMediumMaterialIndex; -layout(rg32f, binding = INTEGRATE_INDIRECT_BINDING_DISPLACEMENT_TEXTURE_COORD_INPUT) -RWTexture2D DisplacementTextureCoord; +layout(rg32f, binding = INTEGRATE_INDIRECT_BINDING_SHARED_TEXTURE_COORD_INPUT) +Texture2D SharedTextureCoord; layout(r16ui, binding = INTEGRATE_INDIRECT_BINDING_SHARED_SURFACE_INDEX_INPUT) -RWTexture2D SharedSurfaceIndex; +Texture2D SharedSurfaceIndex; + +layout(rgba16ui, binding = INTEGRATE_INDIRECT_BINDING_SHARED_SUBSURFACE_DATA_INPUT) +Texture2D SharedSubsurfaceData; // Inputs @@ -108,9 +114,15 @@ ByteAddressBuffer NeeCache; layout(binding = INTEGRATE_INDIRECT_BINDING_NEE_CACHE_SAMPLE) StructuredBuffer NeeCacheSample; +layout(binding = INTEGRATE_INDIRECT_BINDING_NEE_CACHE_TASK) +RWByteAddressBuffer NeeCacheTask; + layout(rg32ui, binding = INTEGRATE_INDIRECT_BINDING_NEE_CACHE_THREAD_TASK) RWTexture2D NeeCacheThreadTask; +layout(binding = INTEGRATE_INDIRECT_BINDING_PRIMITIVE_ID_PREFIX_SUM) +StructuredBuffer PrimitiveIDPrefixSum; + // Aliased resources layout(rgba16f, binding = INTEGRATE_INDIRECT_BINDING_ALIASED_DATA_0) diff --git a/src/dxvk/shaders/rtx/pass/integrate/integrate_nee.comp.slang b/src/dxvk/shaders/rtx/pass/integrate/integrate_nee.comp.slang index b7010964f..9cd74b367 100644 --- a/src/dxvk/shaders/rtx/pass/integrate/integrate_nee.comp.slang +++ b/src/dxvk/shaders/rtx/pass/integrate/integrate_nee.comp.slang @@ -32,6 +32,16 @@ #include "rtx/algorithm/nee_cache.h" #include "rtx/algorithm/integrator.slangh" +float16_t getLobeAngleFromMaterial(OpaqueSurfaceMaterialInteraction opaqueSurfaceMaterialInteraction) +{ + const float perceptualRoughnessSquared = opaqueSurfaceMaterialInteraction.isotropicRoughness; + const float lobeAngleNorm = 2.0 * perceptualRoughnessSquared / (1.0 + perceptualRoughnessSquared); + const float specularRatio = calcBt709Luminance(opaqueSurfaceMaterialInteraction.baseReflectivity) + / calcBt709Luminance(opaqueSurfaceMaterialInteraction.albedo + opaqueSurfaceMaterialInteraction.baseReflectivity); + const float finalLobeAngleNorm = lerp(1.0, lobeAngleNorm, specularRatio); + return pi * 0.25 * finalLobeAngleNorm; +} + [shader("compute")] [numthreads(16, 8, 1)] void main(uint2 threadIndex : SV_DispatchThreadID, uint2 LocalIndex : SV_GroupThreadID) @@ -99,7 +109,8 @@ void main(uint2 threadIndex : SV_DispatchThreadID, uint2 LocalIndex : SV_GroupTh // Deserialize the Surface Material Interaction PolymorphicSurfaceMaterialInteraction polymorphicSurfaceMaterialInteraction = polymorphicSurfaceMaterialInteractionReadFromGBuffer( - threadIndex, PrimaryWorldShadingNormal, PrimaryPerceptualRoughness, PrimaryAlbedo, PrimaryBaseReflectivity, SharedMaterialData0, SharedMaterialData1); + threadIndex, PrimaryWorldShadingNormal, PrimaryPerceptualRoughness, PrimaryAlbedo, PrimaryBaseReflectivity, + SharedMaterialData0, SharedMaterialData1, SharedSurfaceIndex, SharedSubsurfaceData); // Perform next event estimation on cached emissive triangle list // Also add NEE sample to sample reservoir @@ -107,8 +118,8 @@ void main(uint2 threadIndex : SV_DispatchThreadID, uint2 LocalIndex : SV_GroupTh if (cb.neeCacheArgs.enable && cb.neeCacheArgs.enableOnFirstBounce && polymorphicSurfaceMaterialInteractionGetTypeHelper(polymorphicSurfaceMaterialInteraction) == surfaceMaterialTypeOpaque) { - vec3 jitterOffset = vec3(RAB_GetNextRandom(rtxdiRNG), RAB_GetNextRandom(rtxdiRNG), RAB_GetNextRandom(rtxdiRNG)); - NEECell cell = NEECache.findCell(minimalSurfaceInteraction.position, true, jitterOffset); + uint jitter = sampleUniformIntRng(rtxdiRNG); + NEECell cell = NEECache.getCell(NEECache.pointToOffset(minimalSurfaceInteraction.position, minimalSurfaceInteraction.triangleNormal, jitter)); if (cell.isValid()) { int candidateCount = cell.getCandidateCount(); @@ -121,8 +132,11 @@ void main(uint2 threadIndex : SV_DispatchThreadID, uint2 LocalIndex : SV_GroupTh NEECandidate candidate = cell.sampleCandidate(RAB_GetNextRandom(rtxdiRNG), lightObjectPdf); int primitiveIndex = candidate.getPrimitiveID(); vec2 uv = vec2(RAB_GetNextRandom(rtxdiRNG), RAB_GetNextRandom(rtxdiRNG)); + float triangleArea; + float16_t spreadAngle = getLobeAngleFromMaterial(opaqueSurfaceMaterialInteraction); lightSample = NEECacheUtils.calculateLightSampleFromTriangle( - candidate.getSurfaceID(), candidate.getPrimitiveID(), uv, lightObjectPdf, minimalSurfaceInteraction.position, minimalRayInteraction.coneRadius, primaryRay.spreadAngle); + candidate.getSurfaceID(), candidate.getPrimitiveID(), uv, lightObjectPdf, minimalSurfaceInteraction.position, + minimalRayInteraction.coneRadius, spreadAngle, triangleArea, true, kFootprintFromRayOriginClamped); bool isVisible = false; vec3 diffuseLight = 0; @@ -142,7 +156,16 @@ void main(uint2 threadIndex : SV_DispatchThreadID, uint2 LocalIndex : SV_GroupTh lightSample, invalidRayPortalIndex, surface.portalSpace, rayMask, surface.isViewModel, primaryRay, minimalRayInteraction, minimalSurfaceInteraction, opaqueSurfaceMaterialInteraction, - diffuseLight, specularLight, surfaceIndex, DisplacementTextureCoord[threadIndex]); + diffuseLight, specularLight, surfaceIndex, SharedTextureCoord[threadIndex]); + + // Skip some pixels to reduce memory traffic, especially when large screen area is occupied by a single cell. + if (any(diffuseLight + specularLight) > 0 && (threadIndex.x + threadIndex.y) % 4 == 0) + { + float16_t accumulateValue = calcBt709Luminance(diffuseLight + specularLight); + int prefixTask = NEECacheUtils.convertIDToPrefixSumID(surfaceIndex, primitiveIndex, PrimitiveIDPrefixSum); + float16_t randomOffset = RAB_GetNextRandom(rtxdiRNG); + cell.insertSlotTask(prefixTask, accumulateValue, randomOffset, false); + } indirectDiffuseOutput.xyz += diffuseLight; indirectSpecularOutput.xyz += specularLight; diff --git a/src/dxvk/shaders/rtx/pass/integrate/integrate_nee_binding_indices.h b/src/dxvk/shaders/rtx/pass/integrate/integrate_nee_binding_indices.h index ac19d6cea..72dca8111 100644 --- a/src/dxvk/shaders/rtx/pass/integrate/integrate_nee_binding_indices.h +++ b/src/dxvk/shaders/rtx/pass/integrate/integrate_nee_binding_indices.h @@ -28,28 +28,30 @@ #define INTEGRATE_NEE_BINDING_SHARED_FLAGS_INPUT 40 #define INTEGRATE_NEE_BINDING_SHARED_MATERIAL_DATA0_INPUT 41 #define INTEGRATE_NEE_BINDING_SHARED_MATERIAL_DATA1_INPUT 42 -#define INTEGRATE_NEE_BINDING_DISPLACEMENT_TEXTURE_COORD_INPUT 43 +#define INTEGRATE_NEE_BINDING_SHARED_TEXTURE_COORD_INPUT 43 #define INTEGRATE_NEE_BINDING_SHARED_SURFACE_INDEX_INPUT 44 +#define INTEGRATE_NEE_BINDING_SHARED_SUBSURFACE_DATA_INPUT 45 -#define INTEGRATE_NEE_BINDING_PRIMARY_WORLD_SHADING_NORMAL_INPUT 45 -#define INTEGRATE_NEE_BINDING_PRIMARY_PERCEPTUAL_ROUGHNESS_INPUT 46 -#define INTEGRATE_NEE_BINDING_PRIMARY_ALBEDO_INPUT 47 -#define INTEGRATE_NEE_BINDING_PRIMARY_VIEW_DIRECTION_INPUT 48 -#define INTEGRATE_NEE_BINDING_PRIMARY_CONE_RADIUS_INPUT 49 -#define INTEGRATE_NEE_BINDING_PRIMARY_WORLD_POSITION_INPUT 50 +#define INTEGRATE_NEE_BINDING_PRIMARY_WORLD_SHADING_NORMAL_INPUT 46 +#define INTEGRATE_NEE_BINDING_PRIMARY_PERCEPTUAL_ROUGHNESS_INPUT 47 +#define INTEGRATE_NEE_BINDING_PRIMARY_ALBEDO_INPUT 48 +#define INTEGRATE_NEE_BINDING_PRIMARY_VIEW_DIRECTION_INPUT 49 +#define INTEGRATE_NEE_BINDING_PRIMARY_CONE_RADIUS_INPUT 50 +#define INTEGRATE_NEE_BINDING_PRIMARY_WORLD_POSITION_INPUT 51 -#define INTEGRATE_NEE_BINDING_PRIMARY_POSITION_ERROR_INPUT 51 -#define INTEGRATE_NEE_BINDING_PRIMARY_HIT_DISTANCE_INPUT 52 -#define INTEGRATE_NEE_BINDING_PRIMARY_WORLD_INTERPOLATED_NORMAL_INPUT 53 -#define INTEGRATE_NEE_BINDING_INDIRECT_RADIANCE_HIT_DISTANCE_INPUT 54 +#define INTEGRATE_NEE_BINDING_PRIMARY_POSITION_ERROR_INPUT 52 +#define INTEGRATE_NEE_BINDING_PRIMARY_HIT_DISTANCE_INPUT 53 +#define INTEGRATE_NEE_BINDING_PRIMARY_WORLD_INTERPOLATED_NORMAL_INPUT 54 +#define INTEGRATE_NEE_BINDING_INDIRECT_RADIANCE_HIT_DISTANCE_INPUT 55 #define INTEGRATE_NEE_BINDING_NEE_CACHE 56 #define INTEGRATE_NEE_BINDING_NEE_CACHE_TASK 57 #define INTEGRATE_NEE_BINDING_NEE_CACHE_SAMPLE 58 #define INTEGRATE_NEE_BINDING_NEE_CACHE_THREAD_TASK 59 +#define INTEGRATE_NEE_BINDING_PRIMITIVE_ID_PREFIX_SUM 60 -#define INTEGRATE_NEE_BINDING_RADIANCE_INPUT 60 -#define INTEGRATE_NEE_BINDING_HIT_GEOMETRY_INPUT 61 +#define INTEGRATE_NEE_BINDING_RADIANCE_INPUT 61 +#define INTEGRATE_NEE_BINDING_HIT_GEOMETRY_INPUT 62 // Inputs/Outputs diff --git a/src/dxvk/shaders/rtx/pass/integrate/integrate_nee_bindings.slangh b/src/dxvk/shaders/rtx/pass/integrate/integrate_nee_bindings.slangh index 4ff939815..3c1c02217 100644 --- a/src/dxvk/shaders/rtx/pass/integrate/integrate_nee_bindings.slangh +++ b/src/dxvk/shaders/rtx/pass/integrate/integrate_nee_bindings.slangh @@ -36,11 +36,14 @@ Texture2D SharedMaterialData0; layout(r32ui, binding = INTEGRATE_NEE_BINDING_SHARED_MATERIAL_DATA1_INPUT) Texture2D SharedMaterialData1; -layout(rg32f, binding = INTEGRATE_NEE_BINDING_DISPLACEMENT_TEXTURE_COORD_INPUT) -RWTexture2D DisplacementTextureCoord; +layout(rg32f, binding = INTEGRATE_NEE_BINDING_SHARED_TEXTURE_COORD_INPUT) +Texture2D SharedTextureCoord; layout(r16ui, binding = INTEGRATE_NEE_BINDING_SHARED_SURFACE_INDEX_INPUT) -RWTexture2D SharedSurfaceIndex; +Texture2D SharedSurfaceIndex; + +layout(rgba16ui, binding = INTEGRATE_NEE_BINDING_SHARED_SUBSURFACE_DATA_INPUT) +Texture2D SharedSubsurfaceData; // Primary Inputs @@ -111,3 +114,6 @@ RWStructuredBuffer NeeCacheSample; layout(rg32ui, binding = INTEGRATE_NEE_BINDING_NEE_CACHE_THREAD_TASK) RWTexture2D NeeCacheThreadTask; +layout(binding = INTEGRATE_NEE_BINDING_PRIMITIVE_ID_PREFIX_SUM) +StructuredBuffer PrimitiveIDPrefixSum; + diff --git a/src/dxvk/shaders/rtx/pass/integrate/visualize_nee.comp.slang b/src/dxvk/shaders/rtx/pass/integrate/visualize_nee.comp.slang index 5af588b99..b8d3eecaa 100644 --- a/src/dxvk/shaders/rtx/pass/integrate/visualize_nee.comp.slang +++ b/src/dxvk/shaders/rtx/pass/integrate/visualize_nee.comp.slang @@ -148,7 +148,7 @@ struct Canvas } } -void getTriangleCandidateInfo(NEECell cell, int binIndex, out float probability, out int age, out vec3 lightIntensity) +void getTriangleCandidateInfo(NEECell cell, int binIndex, out float probability, out vec3 lightIntensity) { NEECandidate candidiate = cell.getCandidate(binIndex); float sampleThreshold = candidiate.getSampleThreshold(); @@ -159,42 +159,71 @@ void getTriangleCandidateInfo(NEECell cell, int binIndex, out float probability, lastSampleThreshold = candidiate.getSampleThreshold(); } probability = sampleThreshold - lastSampleThreshold; - age = candidiate.getAge(); // Triangle color vec3 triangleCenter; f16vec3 triangleNormal; NEECacheUtils.calculateTriangleLightIntensity(candidiate.getSurfaceID(), candidiate.getPrimitiveID(), triangleCenter, triangleNormal, lightIntensity); + // Multiply 0.1 to show brighter lights + lightIntensity = log(calcBt709Luminance(lightIntensity) + 1) * 0.1; } -void getLightCandidateInfo(NEECell cell, int binIndex, RAB_Surface surface, f16vec3 viewDirection, out float probability, out int age, out vec3 radiance, out vec3 direction) +vec3 logarithmicColor(float value) +{ + if (value < 10) + { + return vec3(1,0,0); + } + else if(value < 100.0) + { + return vec3(1,0.5,0); + } + else if(value < 1000.0) + { + return vec3(1,1,0); + } + else if(value < 10000.0) + { + return vec3(0,1,0); + } + else if(value < 100000.0) + { + return vec3(0,1,1); + } + else if(value < 1000000.0) + { + return vec3(0,0,1); + } + else + { + return vec3(1); + } +} + +void getLightCandidateInfo(NEECell cell, int binIndex, RAB_Surface surface, f16vec3 viewDirection, out float probability, out vec3 radiance, out vec3 direction) { NEELightCandidate lightCandidate = cell.getLightCandidate(binIndex); - int3 cellID = NEECache.offsetToCell(cell.m_offset); - vec3 cellCenter = NEECache.cellToCenterPoint(cellID); float16_t specularRatio = calcBt709Luminance(surface.opaqueSurfaceMaterialInteraction.baseReflectivity) / calcBt709Luminance(surface.opaqueSurfaceMaterialInteraction.albedo + surface.opaqueSurfaceMaterialInteraction.baseReflectivity); - SubsurfaceMaterial subSurfaceMaterial; - const bool isSubsurface = subSurfaceMaterialReadHelper(surface.opaqueSurfaceMaterialInteraction.subsurfaceMaterialIndex, subSurfaceMaterial); + const bool isSubsurface = isSubsurfaceMaterial(surface.opaqueSurfaceMaterialInteraction); - cell.calculateLightCandidateNormalizedWeight(binIndex, cellCenter, - surface.minimalSurfaceInteraction.position, - viewDirection, - surface.opaqueSurfaceMaterialInteraction.shadingNormal, - specularRatio, - surface.opaqueSurfaceMaterialInteraction.isotropicRoughness, - isSubsurface, - probability); - age = lightCandidate.getAge(); + cell.calculateLightCandidateNormalizedWeight(binIndex, + surface.minimalSurfaceInteraction.position, + viewDirection, + surface.opaqueSurfaceMaterialInteraction.shadingNormal, + specularRatio, + surface.opaqueSurfaceMaterialInteraction.isotropicRoughness, + isSubsurface, + probability); MemoryPolymorphicLight memoryPolymorphicLight = lights[lightCandidate.getLightID()]; DecodedPolymorphicLight decodedPolymorphicLight = decodePolymorphicLight(memoryPolymorphicLight); radiance = decodedPolymorphicLight.radiance; float luminance = calcBt709Luminance(radiance); radiance = radiance / (1e-5 + luminance) * log(1 + luminance) * 0.1; - direction = abs(normalize(lightCandidate.getOffset())); + direction = abs(normalize(NEECache.getCenter() + lightCandidate.getOffset() - surface.minimalSurfaceInteraction.position)); } [shader("compute")] @@ -220,9 +249,9 @@ void main(uint2 threadIndex : SV_DispatchThreadID, uint2 LocalIndex : SV_GroupTh RAB_Surface surface = RAB_GetGBufferSurface(threadIndex, false); vec3 diffuseLight = PrimaryIndirectDiffuseLobeRadianceHitDistance[threadIndex].xyz; vec3 specularLight = PrimaryIndirectSpecularLobeRadianceHitDistance[threadIndex].xyz; - vec3 indirectLight = (diffuseLight + specularLight); + vec3 indirectLight = linearToGamma(diffuseLight + specularLight); - Canvas canvas = Canvas.create(threadIndex, 3); + Canvas canvas = Canvas.create(threadIndex, 2); ivec2 samplePixel = canvas.getSamplePixel(); RAB_Surface sampleSurface = RAB_GetGBufferSurface(samplePixel, false); @@ -237,17 +266,20 @@ void main(uint2 threadIndex : SV_DispatchThreadID, uint2 LocalIndex : SV_GroupTh { case DEBUG_VIEW_NEE_CACHE_LIGHT_HISTOGRAM: case DEBUG_VIEW_NEE_CACHE_HISTOGRAM: + case DEBUG_VIEW_NEE_CACHE_ACCUMULATE_MAP: + case DEBUG_VIEW_NEE_CACHE_HASH_MAP: if (geometryFlags.primarySelectedIntegrationSurface) { - NEECell cell = NEECache.findCell(surface.minimalSurfaceInteraction.position, false, vec3(0)); - ivec3 cellID = NEECache.pointToCell(surface.minimalSurfaceInteraction.position, false, vec3(0)); + RAB_RandomSamplerState rtxdiRNG = RAB_InitRandomSampler(threadIndex, cb.frameIdx, 0); + uint jitter = 0; + int cellOffset = NEECache.pointToOffset(surface.minimalSurfaceInteraction.position, f16vec3(0), jitter); + NEECell cell = NEECache.getCell(cellOffset); canvas.drawBackground(indirectLight); - if (all(cellID != -1)) - { - canvas.m_backgroundColor = lerp(canvas.m_backgroundColor, (dot(cellID, 1) & 0x1) ? vec3(abs(surface.opaqueSurfaceMaterialInteraction.shadingNormal)) : vec3(0), 0.2); - } - NEECell sampleCell = NEECache.findCell(sampleSurface.minimalSurfaceInteraction.position, false, vec3(0)); + vec3 cellColor = vec3(uint3(cellOffset >> 10, cellOffset >> 5, cellOffset) & 31) / 31.0; + canvas.m_backgroundColor = lerp(canvas.m_backgroundColor, cellColor, 0.5); + + NEECell sampleCell = NEECache.getCell(NEECache.pointToOffset(sampleSurface.minimalSurfaceInteraction.position, f16vec3(0), jitter)); if (sampleCell.isValid()) { if(!canvas.isPadding()) @@ -257,18 +289,34 @@ void main(uint2 threadIndex : SV_DispatchThreadID, uint2 LocalIndex : SV_GroupTh bool isBinValid = true; float probability; - int age; vec3 lightIntensity, direction; bool isLightHistogram = cb.debugView == DEBUG_VIEW_NEE_CACHE_LIGHT_HISTOGRAM; + vec3 histogramColor = vec3(1,0.5,0); if (isLightHistogram) { isBinValid = binIndex < sampleCell.getLightCandidateCount(); - getLightCandidateInfo(sampleCell, binIndex, sampleSurface, minimalRayInteraction.viewDirection, probability, age, lightIntensity, direction); + getLightCandidateInfo(sampleCell, binIndex, sampleSurface, minimalRayInteraction.viewDirection, probability, lightIntensity, direction); } else { isBinValid = binIndex < sampleCell.getCandidateCount(); - getTriangleCandidateInfo(sampleCell, binIndex, probability, age, lightIntensity); + getTriangleCandidateInfo(sampleCell, binIndex, probability, lightIntensity); + } + + int slotBinIndex = canvas.getBin(32); + if (cb.debugView == DEBUG_VIEW_NEE_CACHE_ACCUMULATE_MAP) + { + isBinValid = true; + uint2 thisValue = sampleCell.getSlotTaskValue(slotBinIndex); + probability = log(float(thisValue.y)) / (32); + histogramColor = logarithmicColor(thisValue.y); + } + else if (cb.debugView == DEBUG_VIEW_NEE_CACHE_HASH_MAP) + { + isBinValid = true; + uint2 thisValue = sampleCell.getHashSlotTaskValue(slotBinIndex); + probability = log(float(thisValue.y)) / (32); + histogramColor = logarithmicColor(thisValue.y); } // Calculate histogram @@ -276,27 +324,23 @@ void main(uint2 threadIndex : SV_DispatchThreadID, uint2 LocalIndex : SV_GroupTh { canvas.drawForeground(vec3(1,0,0)); } - if(canvas.isLegend(2)) + if(canvas.isLegend(1)) { // Show samples int binIndex = canvas.getBin(NEE_CACHE_SAMPLES); NEESample sample = sampleCell.getSample(binIndex); - vec3 color = isLightHistogram ? direction : sample.radiance / (calcBt709Luminance(sample.radiance) + 1e-5) * 0.7; + vec3 color = isLightHistogram ? (isBinValid ? direction : vec3(0.0)) : sample.radiance / (calcBt709Luminance(sample.radiance) + 1e-5) * 0.7; float alpha = isLightHistogram ? (isBinValid ? 1.0 : 0.0) : 1.0; canvas.drawForeground(color, alpha); } - else if(canvas.isLegend(1)) - { - canvas.drawForeground(lightIntensity, isBinValid ? 1.0 : 0.0); - } else if(canvas.isLegend(0)) { - canvas.drawForeground(lerp(vec3(0,1,0), vec3(82,66,44)/255.0, saturate(age / 255.0)), isBinValid ? 1.0 : 0.0); + canvas.drawForeground(lightIntensity, isBinValid ? 1.0 : 0.0); } else if(canvas.isHistogram() && canvas.isHistogramBar(probability)) { // Histogram - canvas.drawForeground(vec3(1,0.5,0), isBinValid ? 1.0 : 0.0); + canvas.drawForeground(histogramColor, isBinValid ? 1.0 : 0.0); } } else diff --git a/src/dxvk/shaders/rtx/pass/nee_cache/update_nee_cache.comp.slang b/src/dxvk/shaders/rtx/pass/nee_cache/update_nee_cache.comp.slang index fa3fc1677..066b54127 100644 --- a/src/dxvk/shaders/rtx/pass/nee_cache/update_nee_cache.comp.slang +++ b/src/dxvk/shaders/rtx/pass/nee_cache/update_nee_cache.comp.slang @@ -44,155 +44,227 @@ #define UPDATE_NEE_CACHE 1 #include "rtx/algorithm/nee_cache.h" -groupshared uint2 s_srcTaskList[NEE_CACHE_ELEMENTS][8]; -groupshared uint2 s_candidateList[NEE_CACHE_ELEMENTS * 2][8]; -groupshared float s_candidateLight[NEE_CACHE_ELEMENTS * 2][8]; -groupshared int s_candidateCount[8]; +#define INVALID_ID 0xffffff -uint2 mapIDData(uint2 data) +#define CANDIDATE_LENGTH NEE_CACHE_ELEMENTS * 2 +#define CANDIDATE_GROUP_SIZE 16 +#define BATCH_SIZE 8 +groupshared uint2 s_candidateList[CANDIDATE_LENGTH][BATCH_SIZE]; +groupshared float s_candidateLight[CANDIDATE_LENGTH][BATCH_SIZE]; +groupshared int s_candidateCount[BATCH_SIZE]; + +int convertToThisFramePrefixSumID(int lastID, out int surfaceID, out int primitiveID) { - uint2 newData = data; - newData.x = surfaceMapping[newData.x]; - return newData; + surfaceID = NEE_CACHE_INVALID_ID; + primitiveID = NEE_CACHE_INVALID_ID; + if (lastID == NEE_CACHE_INVALID_ID) + { + return NEE_CACHE_INVALID_ID; + } + int surfaceIDLast, primitiveIDLast; + if(!NEECacheUtils.convertPrefixSumIDToID(lastID, LastPrimitiveIDPrefixSum, surfaceIDLast, primitiveIDLast)) + return NEE_CACHE_INVALID_ID; + + int surfaceIDThis = NEECacheUtils.convertToCurrentSurfaceID(surfaceIDLast); + surfaceID = surfaceIDThis; + primitiveID = primitiveIDLast; + return NEECacheUtils.convertIDToPrefixSumID(surfaceIDThis, primitiveIDLast, PrimitiveIDPrefixSum); } -[shader("compute")] -[numthreads(NEE_CACHE_ELEMENTS, 8, 1)] -void main(uint3 threadIndex : SV_DispatchThreadID, uint2 localIndex : SV_GroupThreadID) +int convertLightIndex(int lightIndex) { - int3 cellID = threadIndex / int3(NEE_CACHE_ELEMENTS,1,1); - NEECell cell = NEECache.getCell(cellID); - vec3 cellCenter = NEECache.cellToCenterPoint(cellID); + if (lightIndex == NEE_CACHE_INVALID_ID) + return NEE_CACHE_INVALID_ID; - RNG rng = createRNG(uvec2(threadIndex.xy + threadIndex.z * NEE_CACHE_ELEMENTS * NEE_CACHE_PROBE_RESOLUTION * NEE_CACHE_PROBE_RESOLUTION), cb.frameIdx, threadIndex.z); + bool currentToPrevious = false; + return lightMapping[lightIndex + (currentToPrevious ? 0 : cb.lightCount)]; +} - if (localIndex.x == 0) +void sortTask(uint batchIndex) +{ + int listCount = min(32, s_candidateCount[batchIndex]); + int totalCount = 0; + for (int i = 0; i < listCount; ++i) { - s_candidateCount[localIndex.y] = 0; + bool sorted = true; + for (int j = i+1; j < listCount; ++j) + { + if (sorted && s_candidateList[j-1][batchIndex].y < s_candidateList[j][batchIndex].y) + { + sorted = false; + } + + if (s_candidateList[i][batchIndex].y < s_candidateList[j][batchIndex].y) + { + uint2 tempCandidate = s_candidateList[i][batchIndex]; + s_candidateList[i][batchIndex] = s_candidateList[j][batchIndex]; + s_candidateList[j][batchIndex] = tempCandidate; + } + } + + if (s_candidateList[i][batchIndex].y == 0 || sorted) + { + break; + } } - GroupMemoryBarrierWithGroupSync(); +} - // load new id to shared memory - int taskCount = cell.getTaskCount(); - int taskID = localIndex.x; - int encodedTask = taskID < taskCount ? cell.getTask(taskID) : -1; - int2 task = -1; - bool isLightSample = NEECache.isAnalyticalLight(encodedTask); - if (!isLightSample && !NEECacheUtils.convertPrefixSumIDToID(encodedTask, PrimitiveIDPrefixSum, task.x, task.y)) +// Clear task list in shared memory +void clearTask(uint2 localIndex) +{ + for (int i = localIndex.x; i < CANDIDATE_LENGTH; i += CANDIDATE_GROUP_SIZE) { - task = -1; + s_candidateList[i][localIndex.y] = 0; } - s_srcTaskList[taskID][localIndex.y] = task; - // load old id to shared memory - int oldIDCount = cb.neeCacheArgs.clearCache ? 0 : cell.getCandidateCount(); - if (localIndex.x < oldIDCount) + if (localIndex.x == 0) { - int oldCount = 0; - NEECandidate candidate = cell.getCandidate(localIndex.x); - uint newSurfaceID = NEECacheUtils.convertToCurrentSurfaceID(candidate.getSurfaceID()); - candidate.setSurfaceID(newSurfaceID); - candidate.setAge(candidate.getAge() + 1); - s_candidateList[localIndex.x][localIndex.y] = candidate.m_data; + s_candidateCount[localIndex.y] = 0; } +} - if (localIndex.x == 0) +// Put a new task to shared memory. +void insertTask(uint2 value, uint batchIndex) +{ + int oldCount = 0; + InterlockedAdd(s_candidateCount[batchIndex], 1, oldCount); + oldCount = min(oldCount, CANDIDATE_LENGTH - 1); + s_candidateList[oldCount][batchIndex] = value; +} + +// Insert this frame's new tasks, merge them if there is an existing one. +void mergeTask(uint2 value, uint existingCount, uint batchIndex) +{ + if (value.x == NEE_CACHE_INVALID_ID || value.y == 0) { - s_candidateCount[localIndex.y] = oldIDCount; + return; } - GroupMemoryBarrierWithGroupSync(); - // merge tasks - bool insertTask = false; - if (taskID < taskCount && all(task != -1)) + for (int i = 0; i < existingCount; ++i) { - bool isNewTask = true; - for (int i = 0; i < s_candidateCount[localIndex.y]; ++i) + if (s_candidateList[i][batchIndex].x == value.x) { - NEECandidate candidate = NEECandidate.create(s_candidateList[i][localIndex.y]); - if (all(candidate.getIDData() == task)) - { - candidate.setAge(0); - s_candidateList[i][localIndex.y] = candidate.m_data; - isNewTask = false; - } + s_candidateList[i][batchIndex].y += value.y; + return; } - insertTask = isNewTask; } - GroupMemoryBarrierWithGroupSync(); - if (insertTask) + insertTask(value, batchIndex); +} + +// Remove unimportant tasks and calculate normalized light contribution. +int finalizeTask(uint batchIndex) +{ + // Calculate total light contribution + float totalLight = 0; + int count = s_candidateCount[batchIndex]; + for (int i = 0; i < count; ++i) { - int oldCount = 0; - InterlockedAdd(s_candidateCount[localIndex.y], 1, oldCount); - NEECandidate newCandidate = NEECandidate.create(task.x, task.y); - s_candidateList[oldCount][localIndex.y] = newCandidate.m_data; + totalLight += float(s_candidateList[i][batchIndex].y); } - GroupMemoryBarrierWithGroupSync(); - // get triangle light - float neeCellSize = NEECache.getCellSize(); - int ithTriangle = localIndex.x; - if (ithTriangle < s_candidateCount[localIndex.y]) + // Cull unimportant tasks + int validCount = 0; + float validTotalLight = 0; + for (int i = 0; i < count; ++i) { - NEECandidate candidate = NEECandidate.create(s_candidateList[ithTriangle][localIndex.y]); - s_candidateLight[ithTriangle][localIndex.y] = NEECacheUtils.calculateTriangleLightToCell(candidate.getIDData(), candidate.getAge(), cellCenter, neeCellSize); + if (s_candidateList[i][batchIndex].y > totalLight * cb.neeCacheArgs.cullingThreshold) + { + validTotalLight += float(s_candidateList[i][batchIndex].y); + validCount++; + } } - ithTriangle = localIndex.x + NEE_CACHE_ELEMENTS; - if (ithTriangle < s_candidateCount[localIndex.y]) + s_candidateCount[batchIndex] = validCount; + + // Calculate light + for (int i = 0; i < validCount; ++i) { - NEECandidate candidate = NEECandidate.create(s_candidateList[ithTriangle][localIndex.y]); - s_candidateLight[ithTriangle][localIndex.y] = NEECacheUtils.calculateTriangleLightToCell(candidate.getIDData(), candidate.getAge(), cellCenter, neeCellSize); + s_candidateLight[i][batchIndex] = validTotalLight > 0 ? saturate(float(s_candidateList[i][batchIndex].y) / validTotalLight) : 0.0; } + return validCount; +} + +void updateTriangleTask(NEECell cell, uint2 localIndex) +{ + clearTask(localIndex); + GroupMemoryBarrierWithGroupSync(); - // sort tasks - if (localIndex.x == 0) + // load old id to shared memory + if (cb.neeCacheArgs.clearCache == 0) { - for (int i = 0; i < s_candidateCount[localIndex.y]-1; ++i) + int taskID = localIndex.x; + uint2 value = cell.getSlotTaskValue(taskID); + int surfaceID, primitiveID; + value.x = convertToThisFramePrefixSumID(value.x, surfaceID, primitiveID); + int delta = max(value.y * cb.neeCacheArgs.learningRate, 1); + value.y = clamp(int(value.y) - delta, 0, 1 << 25); + if (value.x != NEE_CACHE_INVALID_ID && value.y > 0) { - for (int j = i+1; j < s_candidateCount[localIndex.y]; ++j) - { - if (s_candidateLight[i][localIndex.y] < s_candidateLight[j][localIndex.y]) - { - float lightI = s_candidateLight[i][localIndex.y]; - s_candidateLight[i][localIndex.y] = s_candidateLight[j][localIndex.y]; - s_candidateLight[j][localIndex.y] = lightI; - - uint2 tempCandidate = s_candidateList[i][localIndex.y]; - s_candidateList[i][localIndex.y] = s_candidateList[j][localIndex.y]; - s_candidateList[j][localIndex.y] = tempCandidate; - } - } + insertTask(value, localIndex.y); } + } - s_candidateCount[localIndex.y] = cb.neeCacheArgs.enable != 0 ? min(s_candidateCount[localIndex.y], NEECell.getMaxCandidateCount()) : 0; - cell.setCandidateCount(s_candidateCount[localIndex.y]); + GroupMemoryBarrierWithGroupSync(); + + int maxDeltaValue = max(100, s_candidateList[0][localIndex.y].y); + int existingCount = s_candidateCount[localIndex.y]; - float totalLight = 0; - for (int i = 0; i < s_candidateCount[localIndex.y]; ++i) + // insert new task + for (int i = localIndex.x; i < NEE_CACHE_HASH_TASK_COUNT; i += CANDIDATE_GROUP_SIZE) + { + uint2 value = cell.getHashSlotTaskValue(i); + if (cb.neeCacheArgs.clearCache != 0 || cell.isLightTask(value)) { - totalLight += s_candidateLight[i][localIndex.y]; + continue; } - for (int i = 0; i < s_candidateCount[localIndex.y]; ++i) + value.x &= 0xffffff; + int surfaceID, primitiveID; + value.x = convertToThisFramePrefixSumID(value.x, surfaceID, primitiveID); + value.y = min(maxDeltaValue, value.y); + + // Remove non-emissive triangles, these triangles may be in the candidate list because of large mipmap footprint caused by diffuse ray + if (value.x != NEE_CACHE_INVALID_ID) { - if (cb.neeCacheArgs.enableImportanceSampling > 0) + vec3 triangleCenter, lightIntensity; + f16vec3 triangleNormal; + NEECacheUtils.calculateTriangleLightIntensity(surfaceID, primitiveID, triangleCenter, triangleNormal, lightIntensity); + if (all(lightIntensity == 0)) { - s_candidateLight[i][localIndex.y] = saturate(s_candidateLight[i][localIndex.y] / totalLight); - } - else - { - s_candidateLight[i][localIndex.y] = 1.0 / s_candidateCount[localIndex.y]; + value.x = NEE_CACHE_INVALID_ID; } } + + mergeTask(value, existingCount, localIndex.y); + } + + GroupMemoryBarrierWithGroupSync(); + + // sort tasks + if (localIndex.x == 0) + { + sortTask(localIndex.y); + s_candidateCount[localIndex.y] = cb.neeCacheArgs.enable != 0 ? min(s_candidateCount[localIndex.y], NEECell.getMaxCandidateCount()) : 0; + } + GroupMemoryBarrierWithGroupSync(); + + cell.setSlotTaskValue(localIndex.x, s_candidateList[localIndex.x][localIndex.y]); + + if (localIndex.x == 0) + { + int validCount = finalizeTask(localIndex.y); + cell.setCandidateCount(validCount); } GroupMemoryBarrierWithGroupSync(); - // update nee cache + // Update candidate if (localIndex.x < s_candidateCount[localIndex.y]) { int candidateID = localIndex.x; - NEECandidate candidate = NEECandidate.create(s_candidateList[candidateID][localIndex.y]); + uint2 task = s_candidateList[candidateID][localIndex.y]; + int surfaceID, primitiveID; + NEECacheUtils.convertPrefixSumIDToID(task.x & 0xffffff, PrimitiveIDPrefixSum, surfaceID, primitiveID); + NEECandidate candidate = NEECandidate.create(surfaceID, primitiveID); float sampleThreshold = 0; for (int i = 0; i <= candidateID; ++i) @@ -203,10 +275,13 @@ void main(uint3 threadIndex : SV_DispatchThreadID, uint2 localIndex : SV_GroupTh cell.setCandidate(candidateID, candidate); } +} - GroupMemoryBarrierWithGroupSync(); - +void updateTriangleSample(NEECell cell, uint2 localIndex, uint3 threadIndex) +{ + // Update sample vec3 cameraPosition = cameraGetWorldPosition(cb.camera); + RNG rng = createRNG(uvec2(threadIndex.xy + threadIndex.z * NEE_CACHE_ELEMENTS * NEE_CACHE_PROBE_RESOLUTION * NEE_CACHE_PROBE_RESOLUTION), cb.frameIdx, threadIndex.z); for (int i = localIndex.x; i < NEE_CACHE_SAMPLES; i += NEE_CACHE_ELEMENTS) { float lightObjectPdf = 0; @@ -216,180 +291,114 @@ void main(uint3 threadIndex : SV_DispatchThreadID, uint2 localIndex : SV_GroupTh if (candidate.isValid()) { float16_t spreadAngle = float16_t(cb.screenSpacePixelSpreadHalfAngle); - float16_t coneRadius = spreadAngle * length(cellCenter - cameraPosition); + float16_t coneRadius = 0.0; vec2 uv = vec2(getNextSampleBlueNoise(rng), getNextSampleBlueNoise(rng)); + float area; LightSample lightSample = NEECacheUtils.calculateLightSampleFromTriangle( candidate.getSurfaceID(), candidate.getPrimitiveID(), uv, lightObjectPdf, - cellCenter, coneRadius, spreadAngle, false, kFootprintFromTextureCoordDiff); + /*cellCenter*/ 0, coneRadius, spreadAngle, area, false, kFootprintFromTextureCoordDiff); sample.position = lightSample.position; sample.normal = lightSample.normal; sample.pdf = lightSample.solidAnglePdf; sample.radiance = lightSample.radiance; + sample.triangleID = NEECacheUtils.convertIDToPrefixSumID(candidate.getSurfaceID(), candidate.getPrimitiveID(), PrimitiveIDPrefixSum); } cell.setSample(i, sample); } + GroupMemoryBarrierWithGroupSync(); +} - // update light candidate - // load new id to shared memory - task = -1; - if (isLightSample) - { - task = NEECache.decodeAnalyticalLight(encodedTask); - task = RAB_TranslateLightIndex(task.x, false); - if (task.x == RTXDI_INVALID_LIGHT_INDEX) - { - task = -1; - } - } - taskID = localIndex.x; - s_srcTaskList[taskID][localIndex.y] = task; +void updateLightTask(NEECell cell, uint2 localIndex) +{ + // Update light candadate + clearTask(localIndex); - // load old id to shared memory - oldIDCount = cb.neeCacheArgs.clearCache ? 0 : cell.getLightCandidateCount(); - if (localIndex.x < oldIDCount) + GroupMemoryBarrierWithGroupSync(); + + if(cb.neeCacheArgs.clearCache == 0) { - int oldCount = 0; - NEELightCandidate candidate = cell.getLightCandidate(localIndex.x); - int lightID = candidate.getLightID(); - lightID = RAB_TranslateLightIndex(lightID, false); - if (lightID == RTXDI_INVALID_LIGHT_INDEX) + uint2 value = cell.getLightSlotTaskValue(localIndex.x); + value.x = convertLightIndex(value.x); + int delta = max(value.y * cb.neeCacheArgs.learningRate, 1); + value.y = clamp(int(value.y) - delta, 0, 1 << 25); + + if (value.x != NEE_CACHE_INVALID_ID && value.y > 0) { - lightID = -1; + insertTask(value, localIndex.y); } - candidate.setLightID(lightID); - candidate.setAge(candidate.getAge() + 1); - s_candidateList[localIndex.x][localIndex.y] = candidate.m_data; } - if (localIndex.x == 0) - { - s_candidateCount[localIndex.y] = oldIDCount; - } GroupMemoryBarrierWithGroupSync(); + int maxDeltaValue = max(100, s_candidateList[0][localIndex.y].y); + int existingCount = s_candidateCount[localIndex.y]; - // merge tasks - insertTask = false; - if (taskID < taskCount && all(task != -1)) + // insert new light task + for (int i = localIndex.x; i < NEE_CACHE_HASH_TASK_COUNT; i += CANDIDATE_GROUP_SIZE) { - bool isNewTask = true; - for (int i = 0; i < taskID -1; ++i) + uint2 value = cell.getHashSlotTaskValue(i); + if (cb.neeCacheArgs.clearCache != 0 || !cell.isLightTask(value)) { - if (all(s_srcTaskList[i][localIndex.y] == task)) - { - isNewTask = false; - } + continue; } - for (int i = 0; i < s_candidateCount[localIndex.y]; ++i) - { - NEELightCandidate candidate = NEELightCandidate.createFromPacked(s_candidateList[i][localIndex.y]); - if (all(candidate.getLightID() == task.x)) - { - candidate.setAge(0); - s_candidateList[i][localIndex.y] = candidate.m_data; - isNewTask = false; - } - } - insertTask = isNewTask; + value.x &= 0xffffff; + value.x = convertLightIndex(value.x); + value.y = min(maxDeltaValue, value.y); + + mergeTask(value, existingCount, localIndex.y); } + GroupMemoryBarrierWithGroupSync(); - if (insertTask) + // sort tasks + if (localIndex.x == 0) { - int oldCount = 0; - InterlockedAdd(s_candidateCount[localIndex.y], 1, oldCount); - NEELightCandidate newCandidate = NEELightCandidate.create(task.x); - s_candidateList[oldCount][localIndex.y] = newCandidate.m_data; + sortTask(localIndex.y); + s_candidateCount[localIndex.y] = cb.neeCacheArgs.enable != 0 ? min(s_candidateCount[localIndex.y], NEECell.getMaxCandidateCount()) : 0; } GroupMemoryBarrierWithGroupSync(); - // get triangle light - float cellSize = NEECache.getCellSize(); - uint triangleID = localIndex.x; - if (triangleID < s_candidateCount[localIndex.y]) + cell.setLightSlotTaskValue(localIndex.x, s_candidateList[localIndex.x][localIndex.y]); + for (int i = localIndex.x; i < NEE_CACHE_HASH_TASK_COUNT; i += CANDIDATE_GROUP_SIZE) { - NEELightCandidate candidate = NEELightCandidate.createFromPacked(s_candidateList[triangleID][localIndex.y]); - - float16_t radiance; - vec3 offset; - NEECacheUtils.calculateLightSampleInfo(candidate.getLightID(), vec2(0.5), candidate.getAge(), cellCenter, cellSize, radiance, offset); - candidate.setRadiance(radiance); - candidate.setOffset(offset); - - s_candidateList[triangleID][localIndex.y] = candidate.m_data; - s_candidateLight[triangleID][localIndex.y] = radiance; + cell.setHashSlotTaskValue(i, 0); } - triangleID = localIndex.x + NEE_CACHE_ELEMENTS; - if (triangleID < s_candidateCount[localIndex.y]) - { - NEELightCandidate candidate = NEELightCandidate.createFromPacked(s_candidateList[triangleID][localIndex.y]); - - float16_t radiance; - vec3 offset; - NEECacheUtils.calculateLightSampleInfo(candidate.getLightID(), vec2(0.5), candidate.getAge(), cellCenter, cellSize, radiance, offset); - candidate.setRadiance(radiance); - candidate.setOffset(offset); - s_candidateList[triangleID][localIndex.y] = candidate.m_data; - s_candidateLight[triangleID][localIndex.y] = radiance; - } - GroupMemoryBarrierWithGroupSync(); - - // sort tasks if (localIndex.x == 0) { - for (int i = 0; i < s_candidateCount[localIndex.y]-1; ++i) - { - for (int j = i+1; j < s_candidateCount[localIndex.y]; ++j) - { - if (s_candidateLight[i][localIndex.y] < s_candidateLight[j][localIndex.y]) - { - float lightI = s_candidateLight[i][localIndex.y]; - s_candidateLight[i][localIndex.y] = s_candidateLight[j][localIndex.y]; - s_candidateLight[j][localIndex.y] = lightI; - - uint2 tempCandidate = s_candidateList[i][localIndex.y]; - s_candidateList[i][localIndex.y] = s_candidateList[j][localIndex.y]; - s_candidateList[j][localIndex.y] = tempCandidate; - } - } - } - - int totalLightCount = cb.neeCacheArgs.enable != 0 ? min(s_candidateCount[localIndex.y], NEECell.getMaxLightCandidateCount()) : 0; - float totalLight = 0; - for (int i = 0; i < totalLightCount; ++i) - { - totalLight += s_candidateLight[i][localIndex.y]; - } - - float cullingThreshold = totalLight * cb.neeCacheArgs.cullingThreshold; - int validLightCount = 0; - float validTotalLight = 0; - for (int i = 0; i < totalLightCount; ++i) - { - if (s_candidateLight[i][localIndex.y] > cullingThreshold) - { - validTotalLight += s_candidateLight[i][localIndex.y]; - validLightCount++; - } - } - totalLight = validTotalLight; - totalLightCount = validLightCount; - s_candidateCount[localIndex.y] = totalLightCount; - cell.setLightCandidateCount(totalLightCount); + int validCount = finalizeTask(localIndex.y); + cell.setLightCandidateCount(validCount); } GroupMemoryBarrierWithGroupSync(); - // update nee cache + // update light candidate if (localIndex.x < s_candidateCount[localIndex.y]) { int candidateID = localIndex.x; - NEELightCandidate candidate = NEELightCandidate.createFromPacked(s_candidateList[candidateID][localIndex.y]); - cell.setLightCandidate(candidateID, candidate); - } + uint2 task = s_candidateList[candidateID][localIndex.y]; + NEELightCandidate candidate = NEELightCandidate.create(task.x & 0xffffff); - if (localIndex.x == 0) - { - cell.clearTasks(); + MemoryPolymorphicLight memoryPolymorphicLight = lights[candidate.getLightID()]; + candidate.setOffset(memoryPolymorphicLightGetPosition(memoryPolymorphicLight) - NEECache.getCenter()); + candidate.setRadiance(s_candidateLight[localIndex.x][localIndex.y]); + + cell.setLightCandidate(candidateID, candidate); } } + +[shader("compute")] +[numthreads(CANDIDATE_GROUP_SIZE, BATCH_SIZE, 1)] +void main(uint3 threadIndex : SV_DispatchThreadID, uint2 localIndex : SV_GroupThreadID) +{ + int3 cellID = threadIndex / int3(NEE_CACHE_ELEMENTS, 1, 1); + int cellOffset = cellID.z * NEE_CACHE_PROBE_RESOLUTION * NEE_CACHE_PROBE_RESOLUTION + + cellID.y * NEE_CACHE_PROBE_RESOLUTION + + cellID.x; + NEECell cell = NEECache.getCell(cellOffset); + + updateTriangleTask(cell, localIndex); + + updateTriangleSample(cell, localIndex, threadIndex); + + updateLightTask(cell, localIndex); +} \ No newline at end of file diff --git a/src/dxvk/shaders/rtx/pass/nee_cache/update_nee_cache_binding_indices.h b/src/dxvk/shaders/rtx/pass/nee_cache/update_nee_cache_binding_indices.h index 24c856f0d..f03ce9bec 100644 --- a/src/dxvk/shaders/rtx/pass/nee_cache/update_nee_cache_binding_indices.h +++ b/src/dxvk/shaders/rtx/pass/nee_cache/update_nee_cache_binding_indices.h @@ -28,6 +28,7 @@ #define UPDATE_NEE_CACHE_BINDING_NEE_CACHE_SAMPLE 19 #define UPDATE_NEE_CACHE_BINDING_NEE_CACHE_THREAD_TASK 20 #define UPDATE_NEE_CACHE_BINDING_PRIMITIVE_ID_PREFIX_SUM 21 +#define UPDATE_NEE_CACHE_BINDING_LAST_PRIMITIVE_ID_PREFIX_SUM 22 #define UPDATE_NEE_CACHE_MIN_BINDING UPDATE_NEE_CACHE_BINDING_NEE_CACHE diff --git a/src/dxvk/shaders/rtx/pass/nee_cache/update_nee_cache_bindings.slangh b/src/dxvk/shaders/rtx/pass/nee_cache/update_nee_cache_bindings.slangh index f2a87bcf5..d2af6a33b 100644 --- a/src/dxvk/shaders/rtx/pass/nee_cache/update_nee_cache_bindings.slangh +++ b/src/dxvk/shaders/rtx/pass/nee_cache/update_nee_cache_bindings.slangh @@ -37,3 +37,6 @@ Texture2D NeeCacheThreadTask; layout(binding = UPDATE_NEE_CACHE_BINDING_PRIMITIVE_ID_PREFIX_SUM) StructuredBuffer PrimitiveIDPrefixSum; + +layout(binding = UPDATE_NEE_CACHE_BINDING_LAST_PRIMITIVE_ID_PREFIX_SUM) +StructuredBuffer LastPrimitiveIDPrefixSum; diff --git a/src/dxvk/shaders/rtx/pass/nee_cache/update_nee_task.comp.slang b/src/dxvk/shaders/rtx/pass/nee_cache/update_nee_task.comp.slang deleted file mode 100644 index 95edda86f..000000000 --- a/src/dxvk/shaders/rtx/pass/nee_cache/update_nee_task.comp.slang +++ /dev/null @@ -1,156 +0,0 @@ -/* -* Copyright (c) 2022-2023, NVIDIA CORPORATION. All rights reserved. -* -* Permission is hereby granted, free of charge, to any person obtaining a -* copy of this software and associated documentation files (the "Software"), -* to deal in the Software without restriction, including without limitation -* the rights to use, copy, modify, merge, publish, distribute, sublicense, -* and/or sell copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following conditions: -* -* The above copyright notice and this permission notice shall be included in -* all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -* DEALINGS IN THE SOFTWARE. -*/ -#include "rtx/pass/common_bindings.slangh" -#include "rtx/pass/nee_cache/update_nee_cache_bindings.slangh" -#include "rtx/pass/raytrace_args.h" - -#include "rtx/utility/math.slangh" -#include "rtx/utility/color.slangh" -#include "rtx/utility/noise.slangh" -#include "rtx/utility/common.slangh" -#include "rtx/utility/brdf.slangh" -#include "rtx/utility/packing.slangh" -#include "rtx/utility/geometry_flags.slangh" -#include "rtx/utility/demodulate_helpers.slangh" -#include "rtx/utility/debug_view_helpers.slangh" -#include "rtx/utility/buffer_helpers.slangh" -#include "rtx/concept/camera/camera.slangh" -#include "rtx/concept/ray/ray.slangh" -#include "rtx/concept/ray/ray_helper.slangh" -#include "rtx/concept/surface/surface.slangh" -#include "rtx/concept/surface_material/surface_material.slangh" -#include "rtx/algorithm/integrator_helpers.slangh" -#include "rtx/external/nrd.slangh" - -#define UPDATE_NEE_CACHE 1 -#include "rtx/algorithm/nee_cache.h" - -groupshared ThreadTask s_task[16*8]; -groupshared int s_tarTaskCount; - -[shader("compute")] -[numthreads(16, 8, 1)] -void main(int2 threadIndex : SV_DispatchThreadID, uint2 localIndex : SV_GroupThreadID) -{ - // Clear target task list - if (localIndex.x == 0 && localIndex.y == 0) - { - s_tarTaskCount = 0; - } - GroupMemoryBarrierWithGroupSync(); - - // Load recorded task for each pixel - ThreadTask task = NEECache.loadThreadTask(threadIndex); - if (!cb.neeCacheArgs.clearCache && task.isValid()) - { - int oldCount; - InterlockedAdd(s_tarTaskCount, 1, oldCount); - s_task[oldCount] = task; - } - GroupMemoryBarrierWithGroupSync(); - - // Skip repeated tasks - bool shouldBeAdded = false; - int localTaskID = localIndex.y * 16 + localIndex.x; - ThreadTask localTask = ThreadTask.createEmpty(); - if (localTaskID < s_tarTaskCount) - { - localTask = s_task[localTaskID]; - bool isNew = true; - for (int i = 0; i < localTaskID-1; ++i) - { - if (all(s_task[i].m_data == localTask.m_data)) - { - isNew = false; - } - } - shouldBeAdded = isNew; - } - - // Add remaining tasks to the pixel's corresponding cell - uint lightToCellI = -1; - if (shouldBeAdded) - { - int3 cellID = NEECache.offsetToCell(localTask.getCellOffset()); - if (all(cellID != -1)) - { - NEECell cell = NEECache.getCell(cellID); - if (cell.isValid()) - { - vec3 cellCenter = NEECache.cellToCenterPoint(cellID); - float cellSize = NEECache.getCellSize(); - float lightToCell = 0; - uint encodedTask = 0; - - if (localTask.isLightTask()) - { - uint lightIdx = localTask.getLightTask(); - encodedTask = NEECache.encodeAnalyticalLight(lightIdx); - lightToCell = NEECacheUtils.calculateAnalyticLightToCell(lightIdx, 0, cellCenter, cellSize); - } - else - { - uint surfaceID, primitiveID; - if(localTask.getTriangleTask(surfaceID, primitiveID)) - { - surfaceID = NEECacheUtils.convertToCurrentSurfaceID(surfaceID); - if (surfaceID != 0xffffffff) - { - int prefixTask = NEECacheUtils.convertIDToPrefixSumID(surfaceID, primitiveID, PrimitiveIDPrefixSum); - if (prefixTask != -1) - { - lightToCell = NEECacheUtils.calculateTriangleLightToCell(uint2(surfaceID, primitiveID), 0, cellCenter, cellSize); - encodedTask = prefixTask; - } - } - } - } - - if (lightToCell > 0) - { - lightToCellI = clamp(log(lightToCell + 1e-10) + 10, 0, 255); - cell.insertTask(encodedTask, lightToCellI); - } - } - } - } - - switch(cb.debugView) - { - case DEBUG_VIEW_NEE_CACHE_TASK: - { - vec3 color = 0; - - if (task.isLightTask()) - { - color = vec3(0,0,1); - } - else if(task.isTriangleTask()) - { - color = vec3(1,0,0); - } - - storeInDebugView(ivec2(threadIndex), color); - break; - } - } -} diff --git a/src/dxvk/shaders/rtx/pass/raytrace_args.h b/src/dxvk/shaders/rtx/pass/raytrace_args.h index be12cf468..73c683810 100644 --- a/src/dxvk/shaders/rtx/pass/raytrace_args.h +++ b/src/dxvk/shaders/rtx/pass/raytrace_args.h @@ -70,13 +70,24 @@ struct NeeCacheArgs { NeeEnableMode enableModeAfterFirstBounce; float ageCullingSpeed; - float range; float emissiveTextureSampleFootprintScale; + float padding; - uint3 pad0; + float resolution; + float minRange; + float learningRate; uint clearCache; }; +struct DomeLightArgs { + mat4 worldToLightTransform; + + vec3 radiance; + uint active; + + uint textureIndex; +}; + // Constant buffer struct RaytraceArgs { Camera camera; @@ -291,8 +302,10 @@ struct RaytraceArgs { uint pomEnableReSTIRGI; uint pomEnablePSR; uint pomMaxIterations; - uint thinOpaqueEnable; - float totalMipBias; + + DomeLightArgs domeLightArgs; + + float2 upscaleFactor; // Displayed(upscaled) / RT resolution }; diff --git a/src/dxvk/shaders/rtx/pass/rtxdi/restir_gi_final_shading.comp.slang b/src/dxvk/shaders/rtx/pass/rtxdi/restir_gi_final_shading.comp.slang index 7e5c3e7ab..1cb02fa35 100644 --- a/src/dxvk/shaders/rtx/pass/rtxdi/restir_gi_final_shading.comp.slang +++ b/src/dxvk/shaders/rtx/pass/rtxdi/restir_gi_final_shading.comp.slang @@ -100,7 +100,8 @@ void main(uint2 threadIndex : SV_DispatchThreadID, uint2 LocalIndex : SV_GroupTh // Deserialize the Surface Material Interaction PolymorphicSurfaceMaterialInteraction polymorphicSurfaceMaterialInteraction = polymorphicSurfaceMaterialInteractionReadFromGBuffer( - threadIndex, PrimaryWorldShadingNormal, PrimaryPerceptualRoughness, PrimaryAlbedo, PrimaryBaseReflectivity, SharedMaterialData0, SharedMaterialData1); + threadIndex, PrimaryWorldShadingNormal, PrimaryPerceptualRoughness, PrimaryAlbedo, PrimaryBaseReflectivity, + SharedMaterialData0, SharedMaterialData1, SharedSurfaceIndex, SharedSubsurfaceData); ReSTIRGI_Reservoir spatialReservoir = RAB_LoadGIReservoir(threadIndex, ReSTIRGI_GetSpatialOutputPage()); @@ -122,7 +123,7 @@ void main(uint2 threadIndex : SV_DispatchThreadID, uint2 LocalIndex : SV_GroupTh VisibilityResult visibility = traceVisibilityRay(minimalSurfaceInteraction, dstPosition, rayMask, visibilityModeNone, portalID, geometryFlags.portalSpace, 0, 0, RAB_RESTIR_GI_VISIBILITY_SHORTENING, geometryFlags.isViewModel, - false, primarySurfaceIndex, DisplacementTextureCoord[threadIndex]); + false, primarySurfaceIndex, SharedTextureCoord[threadIndex]); filterColor = visibility.attenuation; } else diff --git a/src/dxvk/shaders/rtx/pass/rtxdi/restir_gi_final_shading_binding_indices.h b/src/dxvk/shaders/rtx/pass/rtxdi/restir_gi_final_shading_binding_indices.h index 8dc5b68f6..873ded666 100644 --- a/src/dxvk/shaders/rtx/pass/rtxdi/restir_gi_final_shading_binding_indices.h +++ b/src/dxvk/shaders/rtx/pass/rtxdi/restir_gi_final_shading_binding_indices.h @@ -28,19 +28,20 @@ #define RESTIR_GI_FINAL_SHADING_BINDING_SHARED_FLAGS_INPUT 40 #define RESTIR_GI_FINAL_SHADING_BINDING_SHARED_MATERIAL_DATA0_INPUT 41 #define RESTIR_GI_FINAL_SHADING_BINDING_SHARED_MATERIAL_DATA1_INPUT 42 -#define RESTIR_GI_FINAL_SHADING_BINDING_DISPLACEMENT_TEXTURE_COORD_INPUT 43 -#define RESTIR_GI_FINAL_SHADING_BINDING_SHARED_SURFACE_INDEX_INPUT 44 - -#define RESTIR_GI_FINAL_SHADING_BINDING_PRIMARY_WORLD_SHADING_NORMAL_INPUT 45 -#define RESTIR_GI_FINAL_SHADING_BINDING_PRIMARY_PERCEPTUAL_ROUGHNESS_INPUT 46 -#define RESTIR_GI_FINAL_SHADING_BINDING_PRIMARY_ALBEDO_INPUT 47 -#define RESTIR_GI_FINAL_SHADING_BINDING_PRIMARY_VIEW_DIRECTION_INPUT 48 -#define RESTIR_GI_FINAL_SHADING_BINDING_PRIMARY_CONE_RADIUS_INPUT 49 -#define RESTIR_GI_FINAL_SHADING_BINDING_PRIMARY_WORLD_POSITION_INPUT 50 - -#define RESTIR_GI_FINAL_SHADING_BINDING_PRIMARY_POSITION_ERROR_INPUT 51 -#define RESTIR_GI_FINAL_SHADING_BINDING_PRIMARY_HIT_DISTANCE_INPUT 52 -#define RESTIR_GI_FINAL_SHADING_BINDING_PRIMARY_WORLD_INTERPOLATED_NORMAL_INPUT 53 +#define RESTIR_GI_FINAL_SHADING_BINDING_SHARED_TEXTURE_COORD_INPUT 43 +#define RESTIR_GI_FINAL_SHADING_BINDING_SHARED_SURFACE_INDEX_INPUT 44 +#define RESTIR_GI_FINAL_SHADING_BINDING_SHARED_SUBSURFACE_DATA_INPUT 45 + +#define RESTIR_GI_FINAL_SHADING_BINDING_PRIMARY_WORLD_SHADING_NORMAL_INPUT 46 +#define RESTIR_GI_FINAL_SHADING_BINDING_PRIMARY_PERCEPTUAL_ROUGHNESS_INPUT 47 +#define RESTIR_GI_FINAL_SHADING_BINDING_PRIMARY_ALBEDO_INPUT 48 +#define RESTIR_GI_FINAL_SHADING_BINDING_PRIMARY_VIEW_DIRECTION_INPUT 49 +#define RESTIR_GI_FINAL_SHADING_BINDING_PRIMARY_CONE_RADIUS_INPUT 50 +#define RESTIR_GI_FINAL_SHADING_BINDING_PRIMARY_WORLD_POSITION_INPUT 51 + +#define RESTIR_GI_FINAL_SHADING_BINDING_PRIMARY_POSITION_ERROR_INPUT 52 +#define RESTIR_GI_FINAL_SHADING_BINDING_PRIMARY_HIT_DISTANCE_INPUT 53 +#define RESTIR_GI_FINAL_SHADING_BINDING_PRIMARY_WORLD_INTERPOLATED_NORMAL_INPUT 54 // Inputs/Outputs diff --git a/src/dxvk/shaders/rtx/pass/rtxdi/restir_gi_final_shading_bindings.slangh b/src/dxvk/shaders/rtx/pass/rtxdi/restir_gi_final_shading_bindings.slangh index 2b144cc55..28048d38d 100644 --- a/src/dxvk/shaders/rtx/pass/rtxdi/restir_gi_final_shading_bindings.slangh +++ b/src/dxvk/shaders/rtx/pass/rtxdi/restir_gi_final_shading_bindings.slangh @@ -36,11 +36,14 @@ Texture2D SharedMaterialData0; layout(r32ui, binding = RESTIR_GI_FINAL_SHADING_BINDING_SHARED_MATERIAL_DATA1_INPUT) Texture2D SharedMaterialData1; -layout(rg32f, binding = RESTIR_GI_FINAL_SHADING_BINDING_DISPLACEMENT_TEXTURE_COORD_INPUT) -RWTexture2D DisplacementTextureCoord; +layout(rg32f, binding = RESTIR_GI_FINAL_SHADING_BINDING_SHARED_TEXTURE_COORD_INPUT) +Texture2D SharedTextureCoord; layout(r16ui, binding = RESTIR_GI_FINAL_SHADING_BINDING_SHARED_SURFACE_INDEX_INPUT) -RWTexture2D SharedSurfaceIndex; +Texture2D SharedSurfaceIndex; + +layout(rgba16ui, binding = RESTIR_GI_FINAL_SHADING_BINDING_SHARED_SUBSURFACE_DATA_INPUT) +Texture2D SharedSubsurfaceData; // Primary Inputs diff --git a/src/dxvk/shaders/rtx/pass/rtxdi/restir_gi_reuse_binding_indices.h b/src/dxvk/shaders/rtx/pass/rtxdi/restir_gi_reuse_binding_indices.h index d263557a4..ede8cdcee 100644 --- a/src/dxvk/shaders/rtx/pass/rtxdi/restir_gi_reuse_binding_indices.h +++ b/src/dxvk/shaders/rtx/pass/rtxdi/restir_gi_reuse_binding_indices.h @@ -44,3 +44,4 @@ #define RESTIR_GI_REUSE_BINDING_SHARED_FLAGS_INPUT 34 #define RESTIR_GI_REUSE_BINDING_PREV_WORLD_POSITION_INPUT 35 #define RESTIR_GI_REUSE_BINDING_SHARED_SURFACE_INDEX_INPUT 36 +#define RESTIR_GI_REUSE_BINDING_SUBSURFACE_DATA_INPUT 37 diff --git a/src/dxvk/shaders/rtx/pass/rtxdi/restir_gi_reuse_bindings.slangh b/src/dxvk/shaders/rtx/pass/rtxdi/restir_gi_reuse_bindings.slangh index a32ec7ab6..8f5d43c00 100644 --- a/src/dxvk/shaders/rtx/pass/rtxdi/restir_gi_reuse_bindings.slangh +++ b/src/dxvk/shaders/rtx/pass/rtxdi/restir_gi_reuse_bindings.slangh @@ -74,3 +74,6 @@ Texture2D PreviousWorldPosition_WorldTriangleNormal; layout(r16ui, binding = RESTIR_GI_REUSE_BINDING_SHARED_SURFACE_INDEX_INPUT) Texture2D SharedSurfaceIndex; + +layout(rgba16ui, binding = RESTIR_GI_REUSE_BINDING_SUBSURFACE_DATA_INPUT) +Texture2D SharedSubsurfaceData; diff --git a/src/dxvk/shaders/rtx/pass/rtxdi/restir_gi_spatial_reuse.comp.slang b/src/dxvk/shaders/rtx/pass/rtxdi/restir_gi_spatial_reuse.comp.slang index d51fa2091..c2027d442 100644 --- a/src/dxvk/shaders/rtx/pass/rtxdi/restir_gi_spatial_reuse.comp.slang +++ b/src/dxvk/shaders/rtx/pass/rtxdi/restir_gi_spatial_reuse.comp.slang @@ -71,7 +71,9 @@ void main(int2 thread_id : SV_DispatchThreadID) sparams.resolution = camera.resolution; sparams.sourceBufferIndex = ReSTIRGI_GetSpatialInputPage(); sparams.pairwiseMISCentralWeight = cb.reSTIRGIMISModePairwiseMISCentralWeight; - + // Use large search radius to ensure diffuse quality and suppress boiling, small radius to reduce noise. Values are based on experiment. + sparams.initialSearchRadius = spatialReservoir.M < cb.temporalHistoryLength || (cb.frameIdx + thread_id.x / 16 + thread_id.y / 8) % 2 == 0 ? 200.f : 85.f; + sparams.numSamples = spatialReservoir.M < cb.temporalHistoryLength ? 4 : 1; ReSTIRGI_Reservoir resultReservoir = ReSTIRGI_SpatialResampling(spatialReservoir, thread_id, surface, rtxdiRNG, sparams); if (cb.debugView == DEBUG_VIEW_RESTIR_GI_SPATIAL_REUSE) diff --git a/src/dxvk/shaders/rtx/pass/rtxdi/restir_gi_temporal_reuse.comp.slang b/src/dxvk/shaders/rtx/pass/rtxdi/restir_gi_temporal_reuse.comp.slang index c80b60a9c..769ca522e 100644 --- a/src/dxvk/shaders/rtx/pass/rtxdi/restir_gi_temporal_reuse.comp.slang +++ b/src/dxvk/shaders/rtx/pass/rtxdi/restir_gi_temporal_reuse.comp.slang @@ -212,7 +212,7 @@ void main(int2 thread_id : SV_DispatchThreadID) tparams.prevPixelCenter = prevPixelCenter; tparams.prevBackupPixelCenter = prevBackupPixelCenter; tparams.resolution = cb.camera.resolution; - tparams.expectedPrevHitDistance = expectedPrevHitDistance; + tparams.expectedPrevHitDistance = vec2(expectedPrevHitDistance); tparams.normalThreshold = normalThreshold; tparams.depthThreshold = depthThreshold; tparams.virtualMotionVector = virtualMotionVector; @@ -222,6 +222,7 @@ void main(int2 thread_id : SV_DispatchThreadID) tparams.teleportationPortalIndex = cb.teleportationPortalIndex; tparams.discardEnlargedPixels = discardEnlargedPixels; tparams.sourceBufferIndex = ReSTIRGI_GetTemporalInputPage(); + tparams.initialSearchRadius = 0; bool isGBufferSimilar; bool isInitialSample; diff --git a/src/dxvk/shaders/rtx/pass/rtxdi/rtxdi_reuse_binding_indices.h b/src/dxvk/shaders/rtx/pass/rtxdi/rtxdi_reuse_binding_indices.h index 6dd4b9c3e..2a8851589 100644 --- a/src/dxvk/shaders/rtx/pass/rtxdi/rtxdi_reuse_binding_indices.h +++ b/src/dxvk/shaders/rtx/pass/rtxdi/rtxdi_reuse_binding_indices.h @@ -46,5 +46,6 @@ #define RTXDI_REUSE_BINDING_TEMPORAL_POSITION_OUTPUT 37 #define RTXDI_REUSE_BINDING_BEST_LIGHTS_INPUT 38 #define RTXDI_REUSE_BINDING_SHARED_SURFACE_INDEX_INPUT 39 +#define RTXDI_REUSE_BINDING_SUBSURFACE_DATA_INPUT 40 #endif diff --git a/src/dxvk/shaders/rtx/pass/rtxdi/rtxdi_reuse_bindings.slangh b/src/dxvk/shaders/rtx/pass/rtxdi/rtxdi_reuse_bindings.slangh index 29ee08faa..14d73efde 100644 --- a/src/dxvk/shaders/rtx/pass/rtxdi/rtxdi_reuse_bindings.slangh +++ b/src/dxvk/shaders/rtx/pass/rtxdi/rtxdi_reuse_bindings.slangh @@ -83,3 +83,6 @@ Texture2D RtxdiBestLights; layout(r16ui, binding = RTXDI_REUSE_BINDING_SHARED_SURFACE_INDEX_INPUT) Texture2D SharedSurfaceIndex; + +layout(rgba16ui, binding = RTXDI_REUSE_BINDING_SUBSURFACE_DATA_INPUT) +Texture2D SharedSubsurfaceData; diff --git a/src/dxvk/shaders/rtx/pass/rtxdi/rtxdi_spatial_reuse.comp.slang b/src/dxvk/shaders/rtx/pass/rtxdi/rtxdi_spatial_reuse.comp.slang index ca6e7d8ca..3eaaff190 100644 --- a/src/dxvk/shaders/rtx/pass/rtxdi/rtxdi_spatial_reuse.comp.slang +++ b/src/dxvk/shaders/rtx/pass/rtxdi/rtxdi_spatial_reuse.comp.slang @@ -114,7 +114,8 @@ void main(int2 thread_id : SV_DispatchThreadID) const float neighborNormalThreshold = 0.998f; const float brdfClampMin = 0.001f; const float brdfClampMax = 1000.0f; - factor = RTXDI_GetDlssEnhancementDirectFactor(thread_id, finalSample.position, cameraPosition, neighborDepthThreshold, neighborNormalThreshold, brdfClampMin, brdfClampMax); + float3 finalSampleDirection = normalize(finalSample.position - surface.minimalSurfaceInteraction.position); + factor = RTXDI_GetDlssEnhancementDirectFactor(thread_id, finalSample.position, finalSampleDirection, false, cameraPosition, neighborDepthThreshold, neighborNormalThreshold, brdfClampMin, brdfClampMax); factor = pow(factor, float16_t(cb.enhanceBSDFDirectLightPower)); factor = clamp(factor, f16vec2(0), f16vec2(cb.enhanceBSDFDirectLightMaxValue)); diff --git a/src/dxvk/shaders/rtx/utility/debug_view_helpers.slangh b/src/dxvk/shaders/rtx/utility/debug_view_helpers.slangh index d42fbb5d7..1528765ee 100644 --- a/src/dxvk/shaders/rtx/utility/debug_view_helpers.slangh +++ b/src/dxvk/shaders/rtx/utility/debug_view_helpers.slangh @@ -120,3 +120,106 @@ vec3 colorCodeIntoBGRexclusive(uint value, uint maxValue) // Reswizzle to show lowest values in Blue and highest in Red return color.bgr; } + +// Analytically integrated checkerboard (box filter). +float filteredCheckers( + vec2 uv, vec2 duvdx, vec2 duvdy, vec2 rcpUvToBoxSize, + out float numPixelsInBox) +{ + uv *= rcpUvToBoxSize; + duvdx *= rcpUvToBoxSize; + duvdy *= rcpUvToBoxSize; + + // Box filtered checkerboard + // Ref: http://iquilezles.org/www/articles/filterableprocedurals/filterableprocedurals.htm + float epsilon = 1e-6f; + vec2 w = max(max(abs(duvdx), abs(duvdy)), epsilon); + vec2 i = 2.0 * (abs(fract((uv - 0.5 * w) * 0.5) - 0.5)- + abs(fract((uv + 0.5 * w) * 0.5 ) - 0.5)) / w; + + numPixelsInBox = 1 / max(w.x, w.y); + + float checkers = 0.5 - 0.5 * i.x * i.y; + + return checkers; +} + +float calculateNormalizedRelativeValue(float x, float minValue, float maxValue) +{ + return saturate((x - minValue) / (maxValue - minValue)); +} + +// Calculates a filtered checkers scale based on textures resolution. +// boxSizeNumTexels: used only if >= 1. Otherwise, default is used +// checkersStrength: used only if > 0. Otherwise, default is used +// checkersValue: scale value for checkers with boxSizeNumTexels per box +// texelCheckersStrength: scale value for checkers with 1 texel per box +void calculateTextureResolutionCheckers( + vec2 uv, vec2 duvdx, vec2 duvdy, vec2 texDims, + uint boxSizeNumTexels, float checkersStrength, + out float checkersValue, + out float texelCheckersValue, + out float texelCheckersStrength) +{ + boxSizeNumTexels = boxSizeNumTexels >= 1 ? min(boxSizeNumTexels, 32 * 1024) : 64; + vec2 rcpUvToBoxSize = texDims / (float) boxSizeNumTexels; + + // Calculate checkers value for multi-texel box + float numPixelsInMultiTexelBox; + checkersValue = filteredCheckers(uv, duvdx, duvdy, rcpUvToBoxSize, numPixelsInMultiTexelBox); + + // Calculate checkers value for texel box + float numPixelsInTexelBox; + boxSizeNumTexels = 1; + rcpUvToBoxSize = texDims / (float) boxSizeNumTexels; + texelCheckersValue = filteredCheckers(uv, duvdx, duvdy, rcpUvToBoxSize, numPixelsInTexelBox); + + // Modulate texel checkers value since it can be very high frequency. + // Start displaying per texel checkers when both the checker and texel checker boxes are large enough to avoid too much noise. + // The reason for taking the checker box size into consideration is to limit texel checker box strength when a checker box shows only a few texels + const vec2 numPixelsPerMultiTexelBoxBounds = vec2(40, 120); + const vec2 numPixelsPerTexelBoxBounds = vec2(4, 20); + texelCheckersStrength = + calculateNormalizedRelativeValue(numPixelsInMultiTexelBox, numPixelsPerMultiTexelBoxBounds.x, numPixelsPerMultiTexelBoxBounds.y) * + calculateNormalizedRelativeValue(numPixelsInTexelBox, numPixelsPerTexelBoxBounds.x, numPixelsPerTexelBoxBounds.y); + + // Scale calculated checkers value with the input checkers strength + const float maxTexelCheckersStrength = 0.5; + checkersStrength = checkersStrength > 0 ? min(checkersStrength, 1) : 0.5; + texelCheckersStrength = min(texelCheckersStrength, min(maxTexelCheckersStrength, checkersStrength)); + checkersValue = mix(1, checkersValue, checkersStrength); + texelCheckersValue = mix(1, texelCheckersValue, texelCheckersStrength); +} + +vec3 tintColorValueWithTextureResolutionCheckers( + vec3 colorValue, vec2 uv, vec2 duvdx, vec2 duvdy, + vec2 texDims, uint boxSizeNumTexels, float checkersStrength) { + + // Ensure dealing with <0,1> color values so as to avoid dealing with negatives and large inputs + // when tinting the color + colorValue = saturate(colorValue); + + // Calculate checkers values + float checkersValue; + float texelCheckersValue; + float texelCheckersStrength; + calculateTextureResolutionCheckers( + uv, duvdx, duvdy, texDims, boxSizeNumTexels, checkersStrength, + checkersValue, texelCheckersValue, texelCheckersStrength); + + const float checkersColorWeight = 0.3; + const vec3 checkersTintColor = checkersColorWeight * vec3(0.6, 0.76, 0.93); + + // Add a tint to checkers for very dark input colors so the checkers are always visible + const float luminance = calcBt709Luminance(colorValue); + colorValue = colorValue + pow((1 - luminance), 4) * checkersTintColor; + + // Use 1 - color for texel checkers so that they always appear a bit different from the base checker using original color + const float texelCheckersColorWeight = 0.5; + const vec3 texelCheckersTintColor = texelCheckersColorWeight * (1 - (colorValue * checkersValue)); + + // Mix input color value with checkers' values + return colorValue * checkersValue * texelCheckersValue + // Add a tint to texel checker areas + + texelCheckersTintColor * texelCheckersStrength; +} \ No newline at end of file diff --git a/src/dxvk/shaders/rtx/utility/debug_view_indices.h b/src/dxvk/shaders/rtx/utility/debug_view_indices.h index c8bcba605..60262ee46 100644 --- a/src/dxvk/shaders/rtx/utility/debug_view_indices.h +++ b/src/dxvk/shaders/rtx/utility/debug_view_indices.h @@ -59,17 +59,23 @@ #define DEBUG_VIEW_EMISSIVE_RADIANCE 29 #define DEBUG_VIEW_THIN_FILM_THICKNESS 30 #define DEBUG_VIEW_RAW_ALBEDO 31 +#define DEBUG_VIEW_OPAQUE_RAW_ALBEDO_RESOLUTION_CHECKERS 32 +#define DEBUG_VIEW_OPAQUE_NORMAL_RESOLUTION_CHECKERS 33 +#define DEBUG_VIEW_OPAQUE_ROUGHNESS_RESOLUTION_CHECKERS 34 -#define DEBUG_VIEW_VIRTUAL_HIT_DISTANCE 32 -#define DEBUG_VIEW_TEXCOORD_GENERATION_MODE 33 -#define DEBUG_VIEW_EMISSIVE_TRIANGLE_INTENSITY 34 -#define DEBUG_VIEW_SURFACE_AREA 35 +#define DEBUG_VIEW_VIRTUAL_HIT_DISTANCE 35 +#define DEBUG_VIEW_TEXCOORD_GENERATION_MODE 36 +#define DEBUG_VIEW_EMISSIVE_TRIANGLE_INTENSITY 37 +#define DEBUG_VIEW_SURFACE_AREA 38 #define DEBUG_VIEW_BLUE_NOISE 41 #define DEBUG_VIEW_PIXEL_CHECKERBOARD 42 #define DEBUG_VIEW_VOLUME_RADIANCE_DEPTH_LAYERS 43 #define DEBUG_VIEW_SURFACE_VOLUME_RADIANCE 44 +#define DEBUG_VIEW_TEXCOORDS_GRADIENT_X 50 +#define DEBUG_VIEW_TEXCOORDS_GRADIENT_Y 51 + #define DEBUG_VIEW_PRIMARY_RAY_INTERACTIONS 60 #define DEBUG_VIEW_SECONDARY_RAY_INTERACTIONS 61 #define DEBUG_VIEW_PRIMARY_RAY_BOUNCES 62 @@ -123,11 +129,6 @@ #define DEBUG_VIEW_RESTIR_GI_FINAL_SHADING 133 #define DEBUG_VIEW_RESTIR_GI_VIRTUAL_HIT_T 134 -#define DEBUG_VIEW_NEE_CACHE_LIGHT_HISTOGRAM 135 -#define DEBUG_VIEW_NEE_CACHE_HISTOGRAM 136 -#define DEBUG_VIEW_NEE_CACHE_SAMPLE_RADIANCE 137 -#define DEBUG_VIEW_NEE_CACHE_TASK 138 - #define DEBUG_VIEW_EXPOSURE_HISTOGRAM 139 #define DEBUG_VIEW_NOISY_SECONDARY_DIRECT_DIFFUSE_RADIANCE 140 @@ -149,6 +150,13 @@ #define DEBUG_VIEW_STOCHASTIC_ALPHA_BLEND_GEOMETRY_HASH 162 #define DEBUG_VIEW_STOCHASTIC_ALPHA_BLEND_BACKGROUND_TRANSPARENCY 163 +#define DEBUG_VIEW_NEE_CACHE_LIGHT_HISTOGRAM 170 +#define DEBUG_VIEW_NEE_CACHE_HISTOGRAM 171 +#define DEBUG_VIEW_NEE_CACHE_HASH_MAP 172 +#define DEBUG_VIEW_NEE_CACHE_ACCUMULATE_MAP 173 +#define DEBUG_VIEW_NEE_CACHE_SAMPLE_RADIANCE 174 +#define DEBUG_VIEW_NEE_CACHE_TASK 175 + #define DEBUG_VIEW_PRE_TONEMAP_OUTPUT 200 #define DEBUG_VIEW_POST_TONEMAP_OUTPUT 201 #define DEBUG_VIEW_COMPOSITE_OUTPUT 202 diff --git a/src/dxvk/shaders/rtx/utility/gbuffer_helpers.slangh b/src/dxvk/shaders/rtx/utility/gbuffer_helpers.slangh index 8775853b4..e8a7b19ec 100644 --- a/src/dxvk/shaders/rtx/utility/gbuffer_helpers.slangh +++ b/src/dxvk/shaders/rtx/utility/gbuffer_helpers.slangh @@ -104,7 +104,9 @@ PolymorphicSurfaceMaterialInteraction polymorphicSurfaceMaterialInteractionReadF Texture2D albedoTexture, RWTexture2D baseReflectivityTexture, Texture2D data0Texture, - Texture2D data1Texture) + Texture2D data1Texture, + Texture2D surfaceIndexTexture, + Texture2D subsurfaceData) { GBufferMemoryPolymorphicSurfaceMaterialInteraction memory; @@ -115,7 +117,28 @@ PolymorphicSurfaceMaterialInteraction polymorphicSurfaceMaterialInteractionReadF memory.data0 = data0Texture[position].x; memory.data1 = data1Texture[position].x; - return polymorphicSurfaceMaterialInteractionCreate(memory); + PolymorphicSurfaceMaterialInteraction polymorphicSurfaceMaterialInteraction = polymorphicSurfaceMaterialInteractionCreate(memory); + if (cb.thinOpaqueEnable) + { + const uint16_t primarySurfaceIndex = uint16_t(surfaceIndexTexture[position]); + const MemoryPolymorphicSurfaceMaterial memoryPolymorphicSurfaceMaterial = surfaceMaterials[primarySurfaceIndex]; + const OpaqueSurfaceMaterial opaqueSurfaceMaterial = opaqueSurfaceMaterialCreate(primarySurfaceIndex, memoryPolymorphicSurfaceMaterial); + const SubsurfaceMaterialInteraction subsurfaceMaterialInteraction = subSurfaceMaterialReadHelper(subsurfaceData[position]); + + polymorphicSurfaceMaterialInteraction.fdata5 = subsurfaceMaterialInteraction.measurementDistance; + polymorphicSurfaceMaterialInteraction.idata0 = subsurfaceMaterialInteraction.packedTransmittanceColor; + polymorphicSurfaceMaterialInteraction.idata1 = subsurfaceMaterialInteraction.packedSingleScatteringAlbedo; + polymorphicSurfaceMaterialInteraction.bdata1 = subsurfaceMaterialInteraction.volumetricAnisotropy; + } + else + { + polymorphicSurfaceMaterialInteraction.fdata5 = 0.0h; + polymorphicSurfaceMaterialInteraction.idata0 = 0; + polymorphicSurfaceMaterialInteraction.idata1 = 0; + polymorphicSurfaceMaterialInteraction.bdata1 = 0; + } + + return polymorphicSurfaceMaterialInteraction; } void polymorphicSurfaceMaterialInteractionWriteToGBuffer( @@ -134,8 +157,9 @@ void polymorphicSurfaceMaterialInteractionWriteToGBuffer( RWTexture2D baseReflectivityTexture, RWTexture2D data0Texture, RWTexture2D data1Texture, - RWTexture2D pomTextureCoords, - RWTexture2D surfaceIndexTexture) + RWTexture2D sharedTextureCoords, + RWTexture2D surfaceIndexTexture, + RWTexture2D subsurfaceDataTexture) { const GBufferMemoryPolymorphicSurfaceMaterialInteraction gBufferMemoryPolymorphicSurfaceMaterialInteraction = gBufferMemoryPolymorphicSurfaceMaterialInteractionCreate(polymorphicSurfaceMaterialInteraction); @@ -186,9 +210,23 @@ void polymorphicSurfaceMaterialInteractionWriteToGBuffer( surfaceIndexTexture[position] = uint32_t(surface.surfaceMaterialIndex); - // Only write pomTextureCoords if the material has a height texture. + // Only write sharedTextureCoords if the material has a height texture, or the material has subsurface texture(s) if (polymorphicSurfaceMaterialInteractionHasHeightTexture(polymorphicSurfaceMaterialInteraction)) { - pomTextureCoords[position] = surfaceInteraction.textureCoordinates; + sharedTextureCoords[position] = surfaceInteraction.textureCoordinates; + } + + if (polymorphicSurfaceMaterialInteractionHasSubsurface(polymorphicSurfaceMaterialInteraction)) + { + subsurfaceDataTexture[position] = uint4( + polymorphicSurfaceMaterialInteraction.idata0, // transmittanceColor + float16BitsToUint16(polymorphicSurfaceMaterialInteraction.fdata5), // measurementDistance + polymorphicSurfaceMaterialInteraction.idata1, // singleScatteringAlbedo + uint16_t(polymorphicSurfaceMaterialInteraction.bdata1) // volumetricAnisotropy + ); + } + else + { + subsurfaceDataTexture[position] = uint4(0, 0, 0, 0); } } } diff --git a/src/dxvk/shaders/rtx/utility/math.slangh b/src/dxvk/shaders/rtx/utility/math.slangh index 6cd7f9217..65571fb51 100644 --- a/src/dxvk/shaders/rtx/utility/math.slangh +++ b/src/dxvk/shaders/rtx/utility/math.slangh @@ -156,6 +156,13 @@ GENERIC_SQUARE_LENGTH(float, vec2) GENERIC_SQUARE_LENGTH(float, vec3) GENERIC_SQUARE_LENGTH(float, vec4) +float2 cartesianDirectionToLatLongSphere(float3 direction) +{ + float theta = acos(direction.z); + float phi = atan2(direction.x, direction.y); + return float2(0.5 + phi / twoPi, theta / pi); +} + // Calculates an orthonormal basis from a specified normal vector // [Duff et al. 17, Building An Orthonormal Basis, Revisited] void calcOrthonormalBasis(f16vec3 normal, out f16vec3 tangent, out f16vec3 bitangent) diff --git a/src/dxvk/shaders/rtx/utility/noise.slangh b/src/dxvk/shaders/rtx/utility/noise.slangh index a337917d1..333b07d5e 100644 --- a/src/dxvk/shaders/rtx/utility/noise.slangh +++ b/src/dxvk/shaders/rtx/utility/noise.slangh @@ -44,6 +44,18 @@ uint hashJenkinsOneAtATime(uint x) { return x; } +// Alternative from https://nullprogram.com/blog/2018/07/31/ +uint prospectorHash(uint x) { + x ^= x >> 17; + x *= 0xed5ad4bbU; + x ^= x >> 11; + x *= 0xac4c1b51U; + x ^= x >> 15; + x *= 0x31848babU; + x ^= x >> 14; + return x; +} + vec2 R2(uint index) { // Generalized golden ratio to 2d. // Solution to x^3 = x + 1 diff --git a/src/dxvk/shaders/rtx/utility/shared_constants.h b/src/dxvk/shaders/rtx/utility/shared_constants.h index 463fcab4d..b5c5fe1a5 100644 --- a/src/dxvk/shaders/rtx/utility/shared_constants.h +++ b/src/dxvk/shaders/rtx/utility/shared_constants.h @@ -45,4 +45,9 @@ // If set, then the texture bound to transmittanceOrDiffuseTextureIndex is an albedo map for the diffuse layer #define TRANSLUCENT_SURFACE_MATERIAL_FLAG_USE_DIFFUSE_LAYER (1 << 0) +// bits for flags field in SubsurfaceMaterial +#define SUBSURFACE_MATERIAL_FLAG_HAS_TRANSMITTANCE_TEXTURE (1 << 0) +#define SUBSURFACE_MATERIAL_FLAG_HAS_THICKNESS_TEXTURE (1 << 1) +#define SUBSURFACE_MATERIAL_FLAG_HAS_SINGLE_SCATTERING_ALBEDO_TEXTURE (1 << 2) + #endif // ifndef SHARED_CONSTANTS_H diff --git a/src/lssusd/game_exporter.cpp b/src/lssusd/game_exporter.cpp index 9f06435ac..66e5ecd41 100644 --- a/src/lssusd/game_exporter.cpp +++ b/src/lssusd/game_exporter.cpp @@ -21,6 +21,7 @@ */ #include "game_exporter.h" #include "game_exporter_common.h" +#include "mdl_helpers.h" #include "../util/log/log.h" #include "usd_include_begin.h" @@ -53,8 +54,11 @@ #include #include #include +#include +#include #include #include +#include #include #include #include @@ -74,8 +78,15 @@ #include #include #include +#include #include "../util/util_env.h" +#ifndef NDEBUG +#define ASSERT_OR_EXECUTE(BODY) assert((BODY)) +#else +#define ASSERT_OR_EXECUTE(BODY) (BODY) +#endif + namespace { inline pxr::GfMatrix4d ToRHS(const pxr::GfMatrix4d& xform) { static pxr::GfMatrix4d XYflip(pxr::GfVec4d(1.0, 1.0, -1.0, 1.0)); @@ -93,7 +104,6 @@ pxr::VtMatrix4dArray sanitizeBoneXforms(const pxr::VtMatrix4dArray& xforms, sanitizedXforms.resize(numBones); pxr::GfMatrix4d worldFromRoot(1); - if (numBones > 0) { const pxr::GfMatrix4d rootFromWorld = bindPose[0] * xforms[0]; worldFromRoot = rootFromWorld.GetInverse(); @@ -108,10 +118,10 @@ pxr::VtMatrix4dArray sanitizeBoneXforms(const pxr::VtMatrix4dArray& xforms, } lss::Skeleton generateSkeleton(const size_t numBones, - const size_t bonesPerVertex, - const lss::PositionBuffer& points, - const lss::BlendWeightBuffer* weights, - const lss::BlendIndicesBuffer* indices) { + const size_t bonesPerVertex, + const lss::Buf& points, + const lss::Buf* weights, + const lss::Buf* indices) { lss::Skeleton output; output.bindPose.resize(numBones); output.restPose.resize(numBones); @@ -161,6 +171,16 @@ lss::Skeleton generateSkeleton(const size_t numBones, } return output; } + +void setStageOffsetXform(pxr::UsdGeomXform& parentXformSchema, const pxr::GfVec3d translate, const bool bDoLhsToRhs) { + pxr::GfMatrix4d moveToOrigin{1.0}; + moveToOrigin.SetTranslateOnly(translate); + moveToOrigin = bDoLhsToRhs ? ToRHS(moveToOrigin) : moveToOrigin; + moveToOrigin = moveToOrigin.GetInverse(); + auto transformOp = parentXformSchema.AddTransformOp(); + assert(transformOp); + transformOp.Set(moveToOrigin); +} } namespace lss { @@ -234,14 +254,16 @@ pxr::UsdStageRefPtr GameExporter::createInstanceStage(const Export& exportData) const auto rootPrim = instanceStage->DefinePrim(gRootNodePath); assert(rootPrim); instanceStage->SetDefaultPrim(rootPrim); - const auto rootLightsPrim = instanceStage->DefinePrim(gRootLightsPath); + const auto rootLightsPrim = pxr::UsdGeomXform::Define(instanceStage,gRootLightsPath); assert(rootLightsPrim); const auto rootMeshesPrim = instanceStage->DefinePrim(gRootMeshesPath); assert(rootMeshesPrim); const auto rootMaterialsPrim = instanceStage->DefinePrim(gRootMaterialsPath); assert(rootMaterialsPrim); - const auto rootInstancesPrim = instanceStage->DefinePrim(gRootInstancesPath); + const auto rootInstancesPrim = pxr::UsdGeomXform::Define(instanceStage,gRootInstancesPath); assert(rootInstancesPrim); + const auto rootCameraPrim = pxr::UsdGeomXform::Define(instanceStage, gRootLightCamera); + assert(rootCameraPrim); dxvk::Logger::debug("[GameExporter][" + exportData.debugId + "] Creating instance stage"); // capture meta data @@ -292,13 +314,67 @@ void GameExporter::createApertureMdls(const std::string& baseExportPath) { writeFile(materialsDirPath + "AperturePBR_Translucent.mdl", ___AperturePBR_Translucent); writeFile(materialsDirPath + "AperturePBR_Model.mdl", ___AperturePBR_Model); writeFile(materialsDirPath + "AperturePBR_Normal.mdl", ___AperturePBR_Normal); + writeFile(materialsDirPath + "AperturePBR_SpriteSheet.mdl", ___AperturePBR_SpriteSheet); +} + +namespace{ +struct AttrDesc { + pxr::TfToken attrName; + pxr::SdfValueTypeName typeName; + bool custom; + pxr::SdfVariability sdfVariability; +}; +#define AttrDescMapEntry(attrEnum, typeName, custom, sdfVariability) \ +{ \ + attrEnum, \ + AttrDesc{pxr::TfToken(attrNames[attrEnum]), \ + pxr::SdfValueTypeNames->##typeName, \ + custom, \ + pxr::SdfVariability##sdfVariability} \ +} + +namespace ShaderAttr { +enum Enum { + OutputsOut, + DiffuseTex, + ImplSrc, + MdlSrcAsset, + MdlSrcAssetSubId, + Opacity, + FilterMode, + WrapModeU, + WrapModeV, +}; +static std::unordered_map attrNames { + {OutputsOut, "outputs:out"}, + {DiffuseTex, "inputs:diffuse_texture"}, + {ImplSrc, "info:implementationSource"}, + {MdlSrcAsset, "info:mdl:sourceAsset"}, + {MdlSrcAssetSubId, "info:mdl:sourceAsset:subIdentifier"}, + {Opacity, "enable_opacity"}, + {FilterMode, "filter_mode"}, + {WrapModeU, "wrap_mode_u"}, + {WrapModeV, "wrap_mode_v"}, +}; +static std::unordered_map attrDescs{ + AttrDescMapEntry(OutputsOut, Token, false, Varying), + AttrDescMapEntry(DiffuseTex, Asset, false, Varying), + AttrDescMapEntry(ImplSrc, Token, false, Uniform), + AttrDescMapEntry(MdlSrcAsset, Asset, false, Uniform), + AttrDescMapEntry(MdlSrcAssetSubId, Token, false, Uniform), + AttrDescMapEntry(Opacity, Bool, false, Uniform), + AttrDescMapEntry(FilterMode, UInt, false, Uniform), + AttrDescMapEntry(WrapModeU, UInt, false, Uniform), + AttrDescMapEntry(WrapModeV, UInt, false, Uniform), +}; +} } void GameExporter::exportMaterials(const Export& exportData, ExportContext& ctx) { dxvk::Logger::debug("[GameExporter][" + exportData.debugId + "][exportMaterials] Begin"); const std::string matDirPath = exportData.baseExportPath + "/" + commonDirName::matDir; const std::string fullMaterialBasePath = computeLocalPath(matDirPath); - + dxvk::env::createDirectory(matDirPath); for(const auto& [matId, matData] : exportData.materials) { // Build material stage @@ -329,73 +405,42 @@ void GameExporter::exportMaterials(const Export& exportData, ExportContext& ctx) const auto shaderPrim = shader.GetPrim(); assert(shaderPrim); - // Create shader prim outputs attr - static const pxr::TfToken kTokOutputsOutput("outputs:out"); - const auto outputsOutAttr = - shaderPrim.CreateAttribute(kTokOutputsOutput, pxr::SdfValueTypeNames->Token, false, pxr::SdfVariabilityVarying); + std::unordered_map shaderAttrs; + for(const auto& [attrEnum, desc] : ShaderAttr::attrDescs) { + shaderAttrs[attrEnum] = + shaderPrim.CreateAttribute(desc.attrName, desc.typeName, desc.custom, desc.sdfVariability); + // Cannot assert. Attr "outputs:out" asserts false, but authoring + Setting works just fine. + // assert(shaderAttrs[attrEnum]); + } // Create and connect material outputs to shader outputs static const pxr::TfToken kTokOutputsMdlSurface("outputs:mdl:surface"); const auto outputsMdlSurfaceAttr = matPrim.CreateAttribute(kTokOutputsMdlSurface, pxr::SdfValueTypeNames->Token, false, pxr::SdfVariabilityVarying); - outputsMdlSurfaceAttr.AddConnection(outputsOutAttr.GetPath(), pxr::UsdListPositionFrontOfAppendList); + outputsMdlSurfaceAttr.AddConnection(shaderAttrs[ShaderAttr::OutputsOut].GetPath(), pxr::UsdListPositionFrontOfAppendList); // Set shader "Kind" static const pxr::TfToken kTokMaterial("Material"); pxr::UsdModelAPI(shader).SetKind(kTokMaterial); // Create and set textures asset paths on material - static const auto setTextureAttr = - [](const pxr::UsdPrim& shaderPrim, const pxr::TfToken attrName, const std::string& relTexPath, const std::string& fullMaterialBasePath) - { - const auto attr = shaderPrim.CreateAttribute(pxr::TfToken(attrName), pxr::SdfValueTypeNames->Asset, false, pxr::SdfVariabilityVarying); - assert(attr); - const auto fullTexturePath = computeLocalPath(relTexPath); - const auto relToMaterialsTexPath = std::filesystem::relative(fullTexturePath,fullMaterialBasePath).string(); - const bool bSetSuccessful = attr.Set(pxr::SdfAssetPath(relToMaterialsTexPath)); - assert(bSetSuccessful); - static const pxr::TfToken kTokColorSpaceAuto("auto"); - attr.SetColorSpace(kTokColorSpaceAuto); - return true; - }; - static const pxr::TfToken kTokenInputsDiffuseTex("inputs:diffuse_texture"); - - // Try to use an updated texture, if that doesn't work, try to use an old one - setTextureAttr(shaderPrim, kTokenInputsDiffuseTex, matData.albedoTexPath, fullMaterialBasePath); + const auto relToMaterialsTexPath = + std::filesystem::relative(computeLocalPath(matData.albedoTexPath), fullMaterialBasePath).string(); + ASSERT_OR_EXECUTE(shaderAttrs[ShaderAttr::DiffuseTex].Set(pxr::SdfAssetPath(relToMaterialsTexPath))); + shaderAttrs[ShaderAttr::DiffuseTex].SetColorSpace(pxr::TfToken("auto")); // Create and set OmniPBR MDL boilerplate attributes on shader - static const pxr::TfToken kTokInfoImplSource("info:implementationSource"); - static const pxr::TfToken kTokSourceAsset("sourceAsset"); - const auto infoImplSourceAttr = - shaderPrim.CreateAttribute(kTokInfoImplSource, pxr::SdfValueTypeNames->Token, false, pxr::SdfVariabilityUniform); - assert(infoImplSourceAttr); - const bool bSetInfoImplSourceAttr = infoImplSourceAttr.Set(kTokSourceAsset); - assert(bSetInfoImplSourceAttr); - - static const pxr::TfToken kTokInfoMdlSourceAsset("info:mdl:sourceAsset"); - - static const pxr::SdfAssetPath kSdfAssetPathOmniPBR("./AperturePBR_Opacity.mdl"); - const auto infoMdlSourceAsset = - shaderPrim.CreateAttribute(kTokInfoMdlSourceAsset, pxr::SdfValueTypeNames->Asset, false, pxr::SdfVariabilityUniform); - assert(infoMdlSourceAsset); - const bool bSetInfoMdlSourceAsset = infoMdlSourceAsset.Set(kSdfAssetPathOmniPBR); - assert(bSetInfoMdlSourceAsset); - - static const pxr::TfToken kTokInfoMdlSourceAssetSubId("info:mdl:sourceAsset:subIdentifier"); - static const pxr::TfToken kTokOmniPBR("AperturePBR_Opacity"); - const auto infoImplSourceSubIdAttr = - shaderPrim.CreateAttribute(kTokInfoMdlSourceAssetSubId, pxr::SdfValueTypeNames->Token, false, pxr::SdfVariabilityUniform); - assert(infoImplSourceSubIdAttr); - const bool bSetInfoMdlSourceAssetSubId = infoImplSourceSubIdAttr.Set(kTokOmniPBR); - assert(bSetInfoMdlSourceAssetSubId); + ASSERT_OR_EXECUTE(shaderAttrs[ShaderAttr::ImplSrc].Set(pxr::TfToken("sourceAsset"))); + ASSERT_OR_EXECUTE(shaderAttrs[ShaderAttr::MdlSrcAsset].Set(pxr::SdfAssetPath("./AperturePBR_Opacity.mdl"))); + ASSERT_OR_EXECUTE(shaderAttrs[ShaderAttr::MdlSrcAssetSubId].Set(pxr::TfToken("AperturePBR_Opacity"))); // Mark whether to enable varying opacity - static const pxr::TfToken kTokEnableOpacity("enable_opacity"); - const auto enableOpacityAttr = - shaderPrim.CreateAttribute(kTokEnableOpacity, pxr::SdfValueTypeNames->Bool, false, pxr::SdfVariabilityUniform); - assert(enableOpacityAttr); - const bool bSetEnableOpacityAttr = enableOpacityAttr.Set(matData.enableOpacity); - assert(bSetEnableOpacityAttr); + ASSERT_OR_EXECUTE(shaderAttrs[ShaderAttr::Opacity].Set(matData.enableOpacity)); + + // Sampler State + ASSERT_OR_EXECUTE(shaderAttrs[ShaderAttr::FilterMode].Set((uint32_t)lss::Mdl::Filter::vkToMdl(matData.sampler.filter))); + ASSERT_OR_EXECUTE(shaderAttrs[ShaderAttr::WrapModeU].Set((uint32_t)lss::Mdl::WrapMode::vkToMdl(matData.sampler.addrModeU))); + ASSERT_OR_EXECUTE(shaderAttrs[ShaderAttr::WrapModeV].Set((uint32_t)lss::Mdl::WrapMode::vkToMdl(matData.sampler.addrModeV))); matStage->Save(); @@ -538,6 +583,29 @@ void GameExporter::exportMeshes(const Export& exportData, ExportContext& ctx) { assert(meshXformVisibilityAttr); meshXformVisibilityAttr.Set(gVisibilityInherited); + // Visually undo world transform if it's baked into mesh vertices + if(exportData.meta.bCorrectBakedTransforms) { + auto meshXformOp = meshXformSchema.AddTransformOp(); + assert(meshXformOp); + pxr::GfMatrix4d xform{1.0}; + // Note: Don't call flipXForm for flipping here, because we need a post-flipping but SetScale function will clean-up the translation + if (!exportData.camera.bFlipMeshes) { + xform.SetTranslateOnly(pxr::GfVec3d(mesh.origin)); + xform = xform.GetInverse(); + } else { + if (!exportData.meta.isZUp) { + xform.SetTranslateOnly(pxr::GfVec3d(mesh.origin[0], -mesh.origin[1], mesh.origin[2])); + xform = xform.GetInverse(); + xform[1][1] *= -1.0; + } else { + xform.SetTranslateOnly(pxr::GfVec3d(mesh.origin[0], mesh.origin[1], -mesh.origin[2])); + xform = xform.GetInverse(); + xform[2][2] *= -1.0; + } + } + meshXformOp.Set(xform); + } + // Build mesh geometry prim under above xform const auto meshSchemaSdfPath = meshXformSdfPath.AppendChild(gTokMesh); pxr::UsdGeomMesh meshSchema = pxr::UsdGeomMesh::Define(meshStage, meshSchemaSdfPath); @@ -573,15 +641,15 @@ void GameExporter::exportMeshes(const Export& exportData, ExportContext& ctx) { // Indices const bool reduce = exportData.meta.bReduceMeshBuffers; ReducedIdxBufSet reducedIdxBufSet = reduce ? reduceIdxBufferSet(mesh.buffers.idxBufs) : ReducedIdxBufSet(); - const std::map& idxBufSet = - reduce ? reducedIdxBufSet.bufSet : mesh.buffers.idxBufs; + const BufSet& idxBufSet = reduce ? reducedIdxBufSet.bufSet : mesh.buffers.idxBufs; auto indexAttr = meshSchema.CreateFaceVertexIndicesAttr(); assert(indexAttr); exportBufferSet(idxBufSet, indexAttr); // Vertices + const auto& posBufs = mesh.buffers.positionBufs; auto pointsAttr = meshSchema.CreatePointsAttr(); assert(pointsAttr); - exportBufferSet(reduce ? reduceBufferSet(mesh.buffers.positionBufs, reducedIdxBufSet) : mesh.buffers.positionBufs, pointsAttr); + exportBufferSet(reduce ? reduceBufferSet(posBufs, reducedIdxBufSet) : posBufs, pointsAttr); // Normals auto normalsAttr = meshSchema.CreateNormalsAttr(); assert(normalsAttr); @@ -674,7 +742,7 @@ void GameExporter::exportMeshes(const Export& exportData, ExportContext& ctx) { meshInstanceXformSchema = pxr::UsdGeomXform::Define(ctx.instanceStage, meshInstanceXformSdfPath); } assert(meshInstanceXformSchema); - + const std::string relMeshStagePath = relMeshDirPath + meshName + ctx.extension; auto meshInstanceUsdReferences = meshInstanceXformSchema.GetPrim().GetReferences(); meshInstanceUsdReferences.AddReference(relMeshStagePath); @@ -697,7 +765,7 @@ void GameExporter::exportMeshes(const Export& exportData, ExportContext& ctx) { dxvk::Logger::debug("[GameExporter][" + exportData.debugId + "][exportMeshes] End"); } -GameExporter::ReducedIdxBufSet GameExporter::reduceIdxBufferSet(const std::map& idxBufSet) { +GameExporter::ReducedIdxBufSet GameExporter::reduceIdxBufferSet(const BufSet& idxBufSet) { ReducedIdxBufSet reducedIdxBufSet; for(const auto& [timeCode, idxBuf] : idxBufSet) { const std::set orderedIndices(idxBuf.cbegin(), idxBuf.cend()); @@ -717,10 +785,8 @@ GameExporter::ReducedIdxBufSet GameExporter::reduceIdxBufferSet(const std::map -std::map> GameExporter::reduceBufferSet(const std::map>& bufSet, - const ReducedIdxBufSet& reducedIdxBufSet, - size_t elemsPerIdx) { - std::map> reducedBufSet; +BufSet GameExporter::reduceBufferSet(const BufSet& bufSet, const ReducedIdxBufSet& reducedIdxBufSet, size_t elemsPerIdx) { + BufSet reducedBufSet; for(const auto& [timeCode, buf] : bufSet) { // There may not be a 1:1 mapping in timecodes b/w index buffers and other buffers float idxBufTimeCode = -1.f; @@ -756,8 +822,7 @@ std::map> GameExporter::reduceBufferSet(const std::map -void GameExporter::exportBufferSet(const std::map>& bufSet, - pxr::UsdAttribute attr) { +void GameExporter::exportBufferSet(const BufSet& bufSet, pxr::UsdAttribute attr) { if(bufSet.size() == 1) { attr.Set(bufSet.cbegin()->second); } else { @@ -768,13 +833,12 @@ void GameExporter::exportBufferSet(const std::map>& bufSet } } -template -void GameExporter::exportColorOpacityBufferSet(const std::map>& bufSet, pxr::UsdAttribute color, pxr::UsdAttribute opacity) { +void GameExporter::exportColorOpacityBufferSet(const BufSet& bufSet, pxr::UsdAttribute color, pxr::UsdAttribute opacity) { pxr::VtArray colorArray; pxr::VtArray opacityArray; if (bufSet.size() == 1) { - for (const T& element : bufSet.cbegin()->second) { + for (const Color& element : bufSet.cbegin()->second) { colorArray.emplace_back(pxr::GfVec3f(element[0], element[1], element[2])); opacityArray.emplace_back(element[3]); } @@ -782,7 +846,7 @@ void GameExporter::exportColorOpacityBufferSet(const std::map(ctx.instanceStage, fullInstancePath, instanceData.firstTime, instanceData.finalTime, instanceData.xforms, exportData.meta); - setVisibilityTimeSpan(ctx.instanceStage, fullInstancePath, instanceData.firstTime, instanceData.finalTime, exportData.meta.numFramesCaptured); + const auto& mesh = exportData.meshes.at(instanceData.meshId); + // Move instance back to its original positions by undoing the visual correction. + // a.k.a. Invert the invert done in exportMeshes + pxr::GfMatrix4d commonXform{1.0}; + if (exportData.meta.bCorrectBakedTransforms) { + if (!exportData.camera.bFlipMeshes) { + commonXform.SetTranslateOnly(pxr::GfVec3d(mesh.origin)); + } else { + // This translation actually combines 2 steps of translation corrections for bake-corrected + flipped meshes: + // 1. Move instance back to it's original position inside root by undoing the flipped visual correction (-mesh.origin[1] or -mesh.origin[2]) + // 2. Cancel out the root translation on the flipped dimension (Y or Z), then move exactly the same distance to the other side of flip axis: + // P y/z P' + // |--------->|--------->| + // stageOrigin[1/2] * 2 + if (!exportData.meta.isZUp) { + commonXform.SetTranslateOnly(pxr::GfVec3d(mesh.origin[0], -mesh.origin[1] + exportData.stageOrigin[1] * 2, mesh.origin[2])); + } else { + commonXform.SetTranslateOnly(pxr::GfVec3d(mesh.origin[0], mesh.origin[1], -mesh.origin[2] + exportData.stageOrigin[2] * 2)); + } + } + } else { + flipXForm(exportData, commonXform); + } + + setTimeSampledXforms(ctx.instanceStage, instancePath, + instanceData.firstTime, instanceData.finalTime, instanceData.xforms, + exportData.meta, commonXform); + setVisibilityTimeSpan(ctx.instanceStage, instancePath, instanceData.firstTime, instanceData.finalTime, exportData.meta.numFramesCaptured); } dxvk::Logger::debug("[GameExporter][" + exportData.debugId + "][exportInstances] End"); } void GameExporter::exportCamera(const Export& exportData, ExportContext& ctx) { dxvk::Logger::debug("[GameExporter][" + exportData.debugId + "][exportCamera] Begin"); + + auto gRootCamerasPath = gRootNodePath.AppendChild(kTokCameras); + static const pxr::TfToken kTokCamera("Camera"); - const pxr::SdfPath cameraSdfPath = gRootNodePath.AppendChild(kTokCamera); + const pxr::SdfPath cameraSdfPath = gRootCamerasPath.AppendChild(kTokCamera); auto geomCamera = pxr::UsdGeomCamera::Define(ctx.instanceStage, cameraSdfPath); + if (exportData.camera.bFlipMeshes) { + auto rootCamerasXformSchema = pxr::UsdGeomXform::Get(ctx.instanceStage, gRootCamerasPath); + assert(rootCamerasXformSchema); + setStageOffsetXform(rootCamerasXformSchema, pxr::GfVec3f { 0.f,0.f,0.f }, exportData.meta.isLHS); + } + // Create Gf Camera which will convert FOV + Aspect Ratio -> Usd Camera Attributes pxr::GfCamera simpleCam; simpleCam.SetPerspectiveFromAspectRatioAndFieldOfView( @@ -905,14 +1010,6 @@ void GameExporter::exportCamera(const Export& exportData, ExportContext& ctx) { auto horizontalAperture = geomCamera.CreateHorizontalApertureAttr(); horizontalAperture.Set(simpleCam.GetHorizontalAperture()); - // Set Vertical aperture - auto verticalAperture = geomCamera.CreateVerticalApertureAttr(); - float verticalApertureVal = simpleCam.GetVerticalAperture(); - if(exportData.camera.bFlipVertAperture) { - verticalApertureVal *= (-1.f); - } - verticalAperture.Set(verticalApertureVal); - // Set focal length auto focalLength = geomCamera.CreateFocalLengthAttr(); focalLength.Set(simpleCam.GetFocalLength()); @@ -920,8 +1017,21 @@ void GameExporter::exportCamera(const Export& exportData, ExportContext& ctx) { // Set clipping range auto clippingPlane = geomCamera.CreateClippingRangeAttr(); clippingPlane.Set(pxr::GfVec2f(exportData.camera.nearPlane, exportData.camera.farPlane)); - - setTimeSampledXforms(ctx.instanceStage, cameraSdfPath, exportData.camera.firstTime, exportData.camera.finalTime, exportData.camera.xforms, exportData.meta); + + // Camera position needs to be adjusted if we're visually correcting baked transforms + pxr::GfMatrix4d commonXform{1.0}; + if(exportData.meta.bCorrectBakedTransforms) { + pxr::GfVec3f stageOrigin = exportData.stageOrigin; + flipXForm(exportData, commonXform); + commonXform.SetTranslateOnly(pxr::GfVec3d(stageOrigin)); + commonXform = commonXform.GetInverse(); + } else { + flipXForm(exportData, commonXform); + } + + setCameraTimeSampledXforms(ctx.instanceStage, cameraSdfPath, + exportData.camera.firstTime, exportData.camera.finalTime, exportData.camera.xforms[0], + exportData.meta, commonXform); // Must modify here, since there may be existing data set earlier pxr::VtDictionary customLayerData = ctx.instanceStage->GetRootLayer()->GetCustomLayerData(); @@ -936,6 +1046,11 @@ void GameExporter::exportSphereLights(const Export& exportData, ExportContext& c const std::string relLightDirPath = commonDirName::lightDir + "/"; const std::string lightDirPath = exportData.baseExportPath + "/" + relLightDirPath; const std::string fullLightStagePath = computeLocalPath(lightDirPath); + if(exportData.meta.bCorrectBakedTransforms) { + auto rootLightsXformSchema = pxr::UsdGeomXform::Get(ctx.instanceStage,gRootLightsPath); + assert(rootLightsXformSchema); + setStageOffsetXform(rootLightsXformSchema, exportData.stageOrigin, exportData.meta.isLHS); + } dxvk::Logger::debug("[GameExporter][" + exportData.debugId + "][exportSphereLights] Begin"); for(const auto& [id,sphereLightData] : exportData.sphereLights) { // Build light stage @@ -980,7 +1095,13 @@ void GameExporter::exportSphereLights(const Export& exportData, ExportContext& c shaping.Apply(sphereLight.GetPrim()); } - setTimeSampledXforms(lightStage, lightAssetSdfPath, sphereLightData.firstTime, sphereLightData.finalTime, sphereLightData.xforms, exportData.meta); + // Sphere light position needs to be adjusted if we're visually flipping back upside down issue + pxr::GfMatrix4d commonXform { 1.0 }; + flipXForm(exportData, commonXform); + + setTimeSampledXforms(lightStage, lightAssetSdfPath, + sphereLightData.firstTime, sphereLightData.finalTime, sphereLightData.xforms, + exportData.meta, commonXform); pxr::UsdLuxLightAPI lightAPI(sphereLight.GetPrim()); setLightIntensityOnTimeSpan(lightAPI, sphereLightData.intensity, sphereLightData.firstTime, sphereLightData.finalTime, exportData.meta.numFramesCaptured); @@ -1027,7 +1148,15 @@ void GameExporter::exportDistantLights(const Export& exportData, ExportContext& angleAttr.Set(distantLightData.angle); static const pxr::GfVec3d distantLightDefault(0.0,0.0,-1.0); - const auto directionQuatF = pxr::GfQuatf{pxr::GfRotation(distantLightDefault, distantLightData.direction).GetQuat()}; + pxr::GfVec3f distantLightDirection = distantLightData.direction; + if (exportData.camera.bFlipMeshes) { + if (!exportData.meta.isZUp) { + distantLightDirection[1] *= -1.0f; + } else { + distantLightDirection[2] *= -1.0f; + } + } + const auto directionQuatF = pxr::GfQuatf{pxr::GfRotation(distantLightDefault, distantLightDirection).GetQuat()}; auto orientAttr = distantLightSchema.AddOrientOp(); assert(orientAttr); orientAttr.Set(directionQuatF); @@ -1149,26 +1278,26 @@ void GameExporter::setTimeSampledXforms(const pxr::UsdStageRefPtr stage, const float finalTime, const SampledXforms& xforms, const Export::Meta& meta, - const bool teleportAway) { + const pxr::GfMatrix4d& commonXform) { assert(stage); assert(sdfPath != pxr::SdfPath()); assert(xforms.size() > 0); - auto geomXform = pxr::UsdGeomXformCommonAPI::Get(stage, sdfPath); - assert(geomXform); const bool isSingleFrame = meta.numFramesCaptured == 1; - // [TODO]: make this game-programmable via RTX settings - static const pxr::GfVec3d defaultXform(-10000.0, -10000.0, -10000.0); - // If the first time this object is seen is not at t=0, we need it to be not visible - if(teleportAway && (firstTime > 0.0)) { - geomXform.SetTranslate(defaultXform, isSingleFrame ? pxr::UsdTimeCode::Default() : pxr::UsdTimeCode(0.0)); - } + auto geomXformable = pxr::UsdGeomXformable::Get(stage, sdfPath); + auto translateOp = geomXformable.AddTranslateOp(); + assert(translateOp); + auto rotateOp = geomXformable.AddRotateZYXOp(); + assert(rotateOp); + auto scaleOp = geomXformable.AddScaleOp(); + assert(scaleOp); for(const auto& sampledXform : xforms) { const pxr::UsdTimeCode timeCode = isSingleFrame ? pxr::UsdTimeCode::Default() : pxr::UsdTimeCode(sampledXform.time); - const auto xform = meta.isLHS ? ToRHS(sampledXform.xform) : sampledXform.xform; - + auto xform = sampledXform.xform; + xform *= commonXform; + xform = meta.isLHS ? ToRHS(xform) : xform; const pxr::GfVec3d translation = xform.ExtractTranslation(); pxr::GfVec3f scale(xform.GetRow3(0).GetLength(), xform.GetRow3(1).GetLength(), xform.GetRow3(2).GetLength()); @@ -1227,13 +1356,9 @@ void GameExporter::setTimeSampledXforms(const pxr::UsdStageRefPtr stage, } } - geomXform.SetTranslate(translation, timeCode); - geomXform.SetRotate(rotation, pxr::UsdGeomXformCommonAPI::RotationOrderZYX, timeCode); - geomXform.SetScale(scale, timeCode); - } - // If the entity stops existing midway through capture, move it to where it's invisible - if(teleportAway) { - geomXform.SetTranslate(defaultXform, isSingleFrame ? pxr::UsdTimeCode::Default() : pxr::UsdTimeCode(std::nextafter(finalTime, finalTime + 1.0))); + translateOp.Set(translation, timeCode); + rotateOp.Set(rotation, timeCode); + scaleOp.Set(scale, timeCode); } } @@ -1285,6 +1410,59 @@ void GameExporter::setLightIntensityOnTimeSpan(const pxr::UsdLuxLightAPI& luxLig } } +void GameExporter::setCameraTimeSampledXforms(const pxr::UsdStageRefPtr stage, + const pxr::SdfPath sdfPath, + const float firstTime, + const float finalTime, + const SampledXform& cameraXform, + const Export::Meta& meta, + const pxr::GfMatrix4d& commonXform) { + assert(stage); + assert(sdfPath != pxr::SdfPath()); + + const bool isSingleFrame = meta.numFramesCaptured == 1; + const pxr::UsdTimeCode timeCode = isSingleFrame ? pxr::UsdTimeCode::Default() : pxr::UsdTimeCode(cameraXform.time); + + auto xform = cameraXform.xform * commonXform; + xform = meta.isLHS ? ToRHS(xform) : xform; + const pxr::GfVec3d translation = xform.ExtractTranslation(); + pxr::GfVec3f scale(xform.GetRow3(0).GetLength(), xform.GetRow3(1).GetLength(), xform.GetRow3(2).GetLength()); + + pxr::GfVec3f rotation; + const auto r = xform.GetOrthonormalized().ExtractRotationMatrix(); + + if (r.GetHandedness() > 0) { + // Proper pure rotation - easy case. + rotation = ToEuler(r); + } else { + // Doing Improper Rotation for flipped matrix + ExtractEulerImproper(rotation, r, pxr::GfVec3d(-1.0, -1.0, -1.0)); + // Rotate camera around camera Up axis with 180 degrees + if (!meta.isZUp) { + rotation[0] -= 180.0f; + rotation[1] = -rotation[1]; + rotation[2] = 180.0f - rotation[2]; + } else { + rotation[0] -= 180.0f; + rotation[1] = -rotation[1]; + rotation[2] = -rotation[2] - 180.0f; + } + } + + // Apply transformation OPs + auto geomXformable = pxr::UsdGeomXformable::Get(stage, sdfPath); + auto translateOp = geomXformable.AddTranslateOp(); + assert(translateOp); + auto rotateOp = geomXformable.AddRotateZYXOp(); + assert(rotateOp); + auto scaleOp = geomXformable.AddScaleOp(); + assert(scaleOp); + + translateOp.Set(translation, timeCode); + rotateOp.Set(rotation, timeCode); + scaleOp.Set(scale, timeCode); +} + pxr::UsdStageRefPtr GameExporter::findOpenOrCreateStage(const std::string path, const bool bClearIfExists) { const bool bLayerAlreadyExists = pxr::TfIsFile(path); pxr::SdfLayerRefPtr alreadyExistentLayer; @@ -1300,4 +1478,15 @@ pxr::UsdStageRefPtr GameExporter::findOpenOrCreateStage(const std::string path, return stage; } +void GameExporter::flipXForm(const Export& exportData, + pxr::GfMatrix4d& commonXform) { + if (exportData.camera.bFlipMeshes) { + if (!exportData.meta.isZUp) { + commonXform.SetScale(pxr::GfVec3d(1.0, -1.0, 1.0)); + } else { + commonXform.SetScale(pxr::GfVec3d(1.0, 1.0, -1.0)); + } + } +} + } diff --git a/src/lssusd/game_exporter.h b/src/lssusd/game_exporter.h index cdc6a75d8..ae8f63ca0 100644 --- a/src/lssusd/game_exporter.h +++ b/src/lssusd/game_exporter.h @@ -63,22 +63,23 @@ class GameExporter static void createApertureMdls(const std::string& baseExportPath); static void exportMaterials(const Export& exportData, ExportContext& ctx); static void exportMeshes(const Export& exportData, ExportContext& ctx); - static void exportSkeletons(const Export& exportData, ExportContext& ctx); struct ReducedIdxBufSet { - std::map bufSet; + BufSet bufSet; // Per-timecode idx mapping using IdxMap = std::unordered_map; std::map redToOgSet; }; - static ReducedIdxBufSet reduceIdxBufferSet(const std::map& idxBufSet); - template - static std::map> reduceBufferSet(const std::map>& bufSet, - const ReducedIdxBufSet& reducedIdxBufSet, - size_t elemsPerIdx = 1); - template - static void exportBufferSet(const std::map>& bufSet, pxr::UsdAttribute attr); + static ReducedIdxBufSet reduceIdxBufferSet(const BufSet& idxBufSet); template - static void exportColorOpacityBufferSet(const std::map>& bufSet, pxr::UsdAttribute color, pxr::UsdAttribute opacity); + static BufSet reduceBufferSet(const BufSet& bufSet, + const ReducedIdxBufSet& reducedIdxBufSet, + size_t elemsPerIdx = 1); + template + static void exportBufferSet(const BufSet& bufSet, pxr::UsdAttribute attr); + static void exportColorOpacityBufferSet(const BufSet& bufSet, + pxr::UsdAttribute color, + pxr::UsdAttribute opacity); + static void exportSkeletons(const Export& exportData, ExportContext& ctx); static void exportInstances(const Export& exportData, ExportContext& ctx); static void exportCamera(const Export& exportData, ExportContext& ctx); static void exportSphereLights(const Export& exportData, ExportContext& ctx); @@ -91,7 +92,7 @@ class GameExporter const float finalTime, const SampledXforms& xforms, const Export::Meta& meta, - const bool teleportAway = false); + const pxr::GfMatrix4d& commonXform = pxr::GfMatrix4d { 1.0 }); static void setVisibilityTimeSpan(const pxr::UsdStageRefPtr stage, const pxr::SdfPath sdfPath, const double firstTime, @@ -102,9 +103,19 @@ class GameExporter const double firstTime, const double finalTime, const size_t numFramesCaptured); + static void setCameraTimeSampledXforms(const pxr::UsdStageRefPtr stage, + const pxr::SdfPath sdfPath, + const float firstTime, + const float finalTime, + const SampledXform& cameraXform, + const Export::Meta& meta, + const pxr::GfMatrix4d& commonXform); static pxr::UsdStageRefPtr findOpenOrCreateStage(const std::string path, const bool bClearIfExists = false); + static void flipXForm(const Export& exportData, + pxr::GfMatrix4d& commonXform); + static bool s_bMultiThreadSafety; static std::mutex s_mutex; }; diff --git a/src/lssusd/game_exporter_common.h b/src/lssusd/game_exporter_common.h index 8f4046390..763a5717e 100644 --- a/src/lssusd/game_exporter_common.h +++ b/src/lssusd/game_exporter_common.h @@ -35,6 +35,7 @@ const pxr::TfToken gTokLooks("Looks"); const pxr::TfToken gTokRemixSettings("remix_settings"); const pxr::TfToken gTokScope("Scope"); const pxr::TfToken gTokInstances("instances"); +const pxr::TfToken kTokCameras("cameras"); const pxr::TfToken gVisibilityInherited("inherited"); const pxr::TfToken gVisibilityInvisible("invisible"); const pxr::TfToken gZ("Z"); @@ -46,4 +47,5 @@ const pxr::SdfPath gRootLightsPath = gRootNodePath.AppendChild(gTokLights); const pxr::SdfPath gRootMeshesPath = gRootNodePath.AppendChild(gTokMeshes); const pxr::SdfPath gRootMaterialsPath = gRootNodePath.AppendChild(gTokLooks); const pxr::SdfPath gRootInstancesPath = gRootNodePath.AppendChild(gTokInstances); +const pxr::SdfPath gRootLightCamera = gRootNodePath.AppendChild(kTokCameras); } \ No newline at end of file diff --git a/src/lssusd/game_exporter_types.h b/src/lssusd/game_exporter_types.h index 681919d4f..434735693 100644 --- a/src/lssusd/game_exporter_types.h +++ b/src/lssusd/game_exporter_types.h @@ -21,16 +21,19 @@ */ #pragma once +#include "../dxvk/rtx_render/rtx_hashing.h" +#include + #include "usd_include_begin.h" #include #include #include #include "usd_include_end.h" + #include #include #include -#include "../dxvk/rtx_render/rtx_hashing.h" static_assert(std::numeric_limits::is_iec559); static_assert(std::numeric_limits::is_iec559); @@ -70,7 +73,7 @@ struct Camera { float finalTime = NAN; bool isLHS = false; bool isReverseZ = false; - bool bFlipVertAperture = false; // WAR until able to expect flipped meshes + bool bFlipMeshes = false; SampledXforms xforms; }; @@ -102,25 +105,35 @@ struct Material { std::string matName; std::string albedoTexPath; bool enableOpacity = false; + struct Sampler { + VkSamplerAddressMode addrModeU; + VkSamplerAddressMode addrModeV; + VkFilter filter; + VkClearColorValue borderColor; + } sampler; // TODO: std::string normalTexPath; // TODO: etc... }; -using IndexBuffer = pxr::VtArray; -using PositionBuffer = pxr::VtArray; -using NormalBuffer = pxr::VtArray; -using TexcoordBuffer = pxr::VtArray; -using ColorBuffer = pxr::VtArray; -using BlendWeightBuffer = pxr::VtArray; -using BlendIndicesBuffer = pxr::VtArray; +using Index = int; +using Pos = pxr::GfVec3f; +using Norm = pxr::GfVec3f; +using Texcoord = pxr::GfVec2f; +using Color = pxr::GfVec4f; +using BlendWeight = float; +using BlendIdx = int; +template +using Buf = pxr::VtArray ; +template +using BufSet = std::map>; struct MeshBuffers { - std::map idxBufs; - std::map positionBufs; - std::map normalBufs; - std::map texcoordBufs; - std::map colorBufs; - std::map blendWeightBufs; - std::map blendIndicesBufs; + BufSet idxBufs; + BufSet positionBufs; + BufSet normalBufs; + BufSet texcoordBufs; + BufSet colorBufs; + BufSet blendWeightBufs; + BufSet blendIndicesBufs; }; struct RenderingMetaData { @@ -145,13 +158,14 @@ struct Mesh { std::string meshName; std::unordered_map componentHashes; std::unordered_map categoryFlags; - uint32_t numVertices = 0; - uint32_t numIndices = 0; - bool isDoubleSided = false; - Id matId = kInvalidId; - MeshBuffers buffers; - uint32_t numBones = 0; - uint32_t bonesPerVertex = 0; + uint32_t numVertices = 0; + uint32_t numIndices = 0; + bool isDoubleSided = false; + Id matId = kInvalidId; + MeshBuffers buffers; + pxr::GfVec3f origin = pxr::GfVec3f{0.f,0.f,0.f}; + uint32_t numBones = 0; + uint32_t bonesPerVertex = 0; pxr::VtMatrix4dArray boneXForms; }; @@ -186,6 +200,7 @@ struct Export { bool isZUp; bool isLHS; std::unordered_map renderingSettingsDict; + bool bCorrectBakedTransforms; } meta; std::string baseExportPath; bool bExportInstanceStage; @@ -198,6 +213,7 @@ struct Export { Camera camera; IdMap sphereLights; IdMap distantLights; + pxr::GfVec3f stageOrigin = pxr::GfVec3f{0.f,0.f,0.f}; }; } \ No newline at end of file diff --git a/src/lssusd/mdl_helpers.h b/src/lssusd/mdl_helpers.h new file mode 100644 index 000000000..65e60f5ec --- /dev/null +++ b/src/lssusd/mdl_helpers.h @@ -0,0 +1,83 @@ +/* +* Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +* +* Permission is hereby granted, free of charge, to any person obtaining a +* copy of this software and associated documentation files (the "Software"), +* to deal in the Software without restriction, including without limitation +* the rights to use, copy, modify, merge, publish, distribute, sublicense, +* and/or sell copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice shall be included in +* all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +* DEALINGS IN THE SOFTWARE. +*/ +#pragma once + +#include "vulkan/vulkan_core.h" + +#include + +namespace lss { +namespace Mdl { +namespace Filter { +enum { + Nearest = 0, + Linear = 1 +}; +static inline uint8_t vkToMdl(const VkFilter vkFilter) { + return (vkFilter > VK_FILTER_LINEAR) ? Nearest : (uint8_t)vkFilter; +} +static inline VkFilter mdlToVk(const uint8_t mdlFilter) { + return (VkFilter)mdlFilter; +} +} + +namespace WrapMode { +// https://raytracing-docs.nvidia.com/mdl/api/group__mi__neuray__mdl__compiler.html#ga852d194e585ada01cc272e85e367ca9b +enum { + Clamp = 0, + Repeat = 1, + Mirrored_Repeat = 2, + Clip = 3 // Clamp to border, where border always black +}; +// pBorderColor is an optional convenience parameter to easily set border to black of "Clip" wrap mode is used +static inline uint8_t vkToMdl(const VkSamplerAddressMode vkAddrMode, VkClearColorValue* const pBorderColor = nullptr) { + switch(vkAddrMode) { + case VK_SAMPLER_ADDRESS_MODE_REPEAT: return Repeat; + case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT: return Mirrored_Repeat; + case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE: return Clamp; + case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER: + if(pBorderColor) { + memset(pBorderColor, 0x0, sizeof(VkClearColorValue)); + } + return Clip; // Maybe don't support? + default: return Repeat; + }; +} +// pBorderColor is an optional convenience parameter to easily set border to black of "Clip" wrap mode is used +static inline VkSamplerAddressMode mdlToVk(const uint8_t mdlWrapMode, VkClearColorValue* const pBorderColor = nullptr) { + switch(mdlWrapMode) { + case Clamp: return VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + case Repeat: return VK_SAMPLER_ADDRESS_MODE_REPEAT; + case Mirrored_Repeat: return VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT; + case Clip: + if(pBorderColor) { + memset(pBorderColor, 0x0, sizeof(VkClearColorValue)); + } + return VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; // Maybe don't support? + default: return VK_SAMPLER_ADDRESS_MODE_REPEAT; + }; +} +} + + +} +} diff --git a/src/lssusd/meson.build b/src/lssusd/meson.build index 279854484..c7dc99dfb 100644 --- a/src/lssusd/meson.build +++ b/src/lssusd/meson.build @@ -5,6 +5,7 @@ lssUsd_src = files([ 'usd_mesh_samplers.h', 'hd/usd_mesh_util.cpp', 'hd/usd_mesh_util.h', + 'mdl_helpers.h' ]) if not dxvk_is_msvc @@ -19,13 +20,14 @@ libusd_embedded_data_files = files([ '../../external/omni_core_materials/Base/AperturePBR_Translucent.mdl', '../../external/omni_core_materials/Base/AperturePBR_Model.mdl', '../../external/omni_core_materials/Base/AperturePBR_Normal.mdl', + '../../external/omni_core_materials/Base/AperturePBR_SpriteSheet.mdl', ]) libusd_generated_embedded_files = embedding_compiler.process(libusd_embedded_data_files) lssUsd_lib = static_library('lssUsd', lssUsd_src,[libusd_generated_embedded_files], dependencies : [ usd_dep, embedded_dep ], - include_directories : [ usd_include_paths ], + include_directories : [ usd_include_paths, lssusd_include_paths ], override_options : ['cpp_std='+dxvk_cpp_std], cpp_args : lss_Usd_args) diff --git a/src/util/config/config.cpp b/src/util/config/config.cpp index c1e0ea021..1645b5de8 100644 --- a/src/util/config/config.cpp +++ b/src/util/config/config.cpp @@ -161,14 +161,6 @@ namespace dxvk { { R"(\\SaintsRowTheThird_DX11\.exe$)", {{ { "d3d11.constantBufferRangeCheck", "True" }, }} }, - /* Metal Gear Solid 5 */ - { R"(\\mgsvtpp\.exe$)", {{ - { "dxvk.enableOpenVR", "False" }, - }} }, - /* Raft */ - { R"(\\Raft\.exe$)", {{ - { "dxvk.enableOpenVR", "False" }, - }} }, /* Crysis 3 - slower if it notices AMD card * * Apitrace mode helps massively in cpu bound * * game parts */ @@ -217,10 +209,6 @@ namespace dxvk { { R"(\\F1_20(1[89]|[2-9][0-9])\.exe$)", {{ { "d3d11.forceTgsmBarriers", "True" }, }} }, - /* Subnautica */ - { R"(\\Subnautica\.exe$)", {{ - { "dxvk.enableOpenVR", "False" }, - }} }, /* Blue Reflection */ { R"(\\BLUE_REFLECTION\.exe$)", {{ { "d3d11.constantBufferRangeCheck", "True" }, @@ -279,6 +267,17 @@ namespace dxvk { { R"(\\ffxiv_dx11\.exe$)", {{ { "dxvk.shrinkNvidiaHvvHeap", "True" }, }} }, + /* God of War - relies on NVAPI/AMDAGS for * + * barrier stuff, needs nvapi for DLSS */ + { R"(\\GoW\.exe$)", {{ + { "d3d11.ignoreGraphicsBarriers", "True" }, + { "d3d11.relaxedBarriers", "True" }, + { "dxgi.nvapiHack", "False" }, + }} }, + /* AoE 2 DE - runs poorly for some users */ + { R"(\\AoE2DE_s\.exe$)", {{ + { "d3d11.apitraceMode", "True" }, + }} }, /**********************************************/ /* D3D9 GAMES */ diff --git a/src/util/config/config.h b/src/util/config/config.h index 028e11aca..5d260b098 100644 --- a/src/util/config/config.h +++ b/src/util/config/config.h @@ -153,6 +153,13 @@ namespace dxvk { const Tristate& value); // NV-DXVK end + // NV-DXVK start: setOption type safety + template< typename T > + void setOption( + const std::string& key, + const T& value) = delete; + // NV-DXVK end + /** * \brief Sets an option * diff --git a/submodules/rtxdi b/submodules/rtxdi index 5decbea15..88f9530e4 160000 --- a/submodules/rtxdi +++ b/submodules/rtxdi @@ -1 +1 @@ -Subproject commit 5decbea15df837323e376ff4326e28effcee02c6 +Subproject commit 88f9530e4cefa6011a1ab1b67b280b961e13ec63 diff --git a/tests/rtx/unit/meson.build b/tests/rtx/unit/meson.build index 8d9a89ef8..71c567629 100644 --- a/tests/rtx/unit/meson.build +++ b/tests/rtx/unit/meson.build @@ -45,4 +45,8 @@ exe = executable('test_intersection_helper_sat', files('test_intersection_helpe test('test_intersection_helper_sat', exe, env: nomalloc) tests += exe +exe = executable('test_pnext', files('test_pnext.cpp'), include_directories : remix_api_include_path, dependencies : test_unit_deps, install : true, win_subsystem : 'console', override_options: ['cpp_std='+dxvk_cpp_std]) +test('test_pnext', exe, env: nomalloc) +tests += exe + alias_target('unit_tests', tests) \ No newline at end of file diff --git a/tests/rtx/unit/test_pnext.cpp b/tests/rtx/unit/test_pnext.cpp new file mode 100644 index 000000000..1d60193dd --- /dev/null +++ b/tests/rtx/unit/test_pnext.cpp @@ -0,0 +1,202 @@ +/* +* Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +* +* Permission is hereby granted, free of charge, to any person obtaining a +* copy of this software and associated documentation files (the "Software"), +* to deal in the Software without restriction, including without limitation +* the rights to use, copy, modify, merge, publish, distribute, sublicense, +* and/or sell copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice shall be included in +* all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +* DEALINGS IN THE SOFTWARE. +*/ + +#define RTX_REMIX_PNEXT_CHECK_STRUCTS + +#include "../../../src/dxvk/rtx_render/rtx_remix_pnext.h" + +// Note: including C++ wrapper to check consistency with C API +#include + +#include "../../test_utils.h" + +#ifdef _MSC_VER +#define STRINGIFY_HELPER(x) #x +#define STRINGIFY(x) STRINGIFY_HELPER(x) +#define ERROR_INTRO __FUNCTION__ "(" STRINGIFY(__LINE__) "): " +#else +#define ERROR_INTRO "" +#endif + +namespace dxvk { + // Note: Logger needed by some shared code used in this Unit Test. + Logger Logger::s_instance("test_pnext.log"); +} + +namespace pnext_test_app { + using namespace pnext::detail; + + void test_find() { + auto ext = remixapi_MaterialInfoOpaqueEXT {}; + { + ext.sType = REMIXAPI_STRUCT_TYPE_MATERIAL_INFO_OPAQUE_EXT; + ext.pNext = nullptr; + } + auto info = remixapi_MaterialInfo {}; + { + info.sType = REMIXAPI_STRUCT_TYPE_MATERIAL_INFO; + info.pNext = &ext; + } + + if (auto f = pnext::find< remixapi_MaterialInfo >(&info)) { + if (f != &info) { + throw dxvk::DxvkError { ERROR_INTRO + "Result of pnext::find< remixapi_MaterialInfoOpaqueEXT >( remixapi_MaterialInfo{..} )" + "must match the address of \'info\' variable" }; + } + } else { + throw dxvk::DxvkError { ERROR_INTRO + "pnext::find< remixapi_MaterialInfo >( remixapi_MaterialInfo{..} ) failed" }; + } + + if (auto f = pnext::find< remixapi_MaterialInfoOpaqueEXT >(&info)) { + if (f != &ext) { + throw dxvk::DxvkError { ERROR_INTRO + "Result of pnext::find< remixapi_MaterialInfoOpaqueEXT >( remixapi_MaterialInfo{..} )" + "must match the address of \'ext\' variable" }; + } + } else { + throw dxvk::DxvkError { ERROR_INTRO + "pnext::find< remixapi_MaterialInfoOpaqueEXT >( remixapi_MaterialInfo{..} ) must return non-null" }; + } + + // must be un-compilable and output a short compilation error + { + // pnext::find< remixapi_LightInfo >(&info); + // pnext::find< remixapi_LightInfoSphereEXT >(&info); + } + } + + void test_const() { + auto ext = remixapi_MaterialInfoOpaqueEXT {}; + { + ext.sType = REMIXAPI_STRUCT_TYPE_MATERIAL_INFO_OPAQUE_EXT; + ext.pNext = nullptr; + } + auto info = remixapi_MaterialInfo {}; + { + info.sType = REMIXAPI_STRUCT_TYPE_MATERIAL_INFO; + info.pNext = &ext; + } + const auto constInfo = remixapi_MaterialInfo { info }; + + { + auto pNext = getPNext(&info); + static_assert(!std::is_const_v< std::remove_pointer_t< decltype(pNext) > >); + + auto constPNext = getPNext(&constInfo); + static_assert(std::is_const_v< std::remove_pointer_t< decltype(constPNext) > >); + } + { + auto f = pnext::find< remixapi_MaterialInfoOpaqueEXT >(&info); + if (f != &ext) { + throw dxvk::DxvkError{ ERROR_INTRO + "Result of pnext::find< remixapi_MaterialInfoOpaqueEXT >( remixapi_MaterialInfo{..} )" + "must match the address of \'ext\' variable" }; + } + + static_assert(!std::is_const_v< std::remove_pointer_t< decltype(f) > >); + } + { + auto cf = pnext::find< remixapi_MaterialInfoOpaqueEXT >(&constInfo); + if (cf != &ext) { + throw dxvk::DxvkError { ERROR_INTRO + "Result of pnext::find< remixapi_MaterialInfoOpaqueEXT >( remixapi_MaterialInfo{..} )" + "must match the address of \'ext\' variable" }; + } + + static_assert(std::is_const_v< std::remove_pointer_t< decltype(cf) > >); + } + } + + void test_getPNext() { + auto ext = remixapi_LightInfoDistantEXT {}; + { + ext.sType = REMIXAPI_STRUCT_TYPE_LIGHT_INFO_DISTANT_EXT; + ext.pNext = nullptr; + } + auto info = remixapi_LightInfo {}; + { + info.sType = REMIXAPI_STRUCT_TYPE_LIGHT_INFO; + info.pNext = &ext; + } + + if (info.pNext != &ext) { + throw dxvk::DxvkError { ERROR_INTRO "Unexpected pNext mismatch: info.pNext != &ext" }; + } + if (getPNext(&info) != info.pNext || getPNext(&ext) != ext.pNext) { + throw dxvk::DxvkError { ERROR_INTRO "getPNext test fail: result is not equal to .pNext" }; + } + } + + void test_memberDetection() { + struct BadType_0 { + int sType_none; + void* pNext; + }; + struct BadType_1 { + int sType; + void* pNext_none; + }; + struct BadType_2 { + int sType; + }; + struct BadType_3 { + void* pNext; + }; + struct BadType_4 { + std::underlying_type_t< remixapi_StructType > sType; + void* pNext; + }; + struct BadType_5 { + remixapi_StructType sType; + uint64_t pNext; + }; + static_assert(!helper::HasSTypePNext< BadType_0 >); + static_assert(!helper::HasSTypePNext< BadType_1 >); + static_assert(!helper::HasSTypePNext< BadType_2 >); + static_assert(!helper::HasSTypePNext< BadType_3 >); + static_assert(!helper::HasSTypePNext< BadType_4 >); + static_assert(!helper::HasSTypePNext< BadType_5 >); + + struct GoodType { + remixapi_StructType sType; + void* pNext; + }; + static_assert(helper::HasSTypePNext< GoodType >); + } +} + +int main() { + try { + pnext_test_app::test_find(); + pnext_test_app::test_const(); + pnext_test_app::test_getPNext(); + pnext_test_app::test_memberDetection(); + } + catch (const dxvk::DxvkError& error) { + std::cerr << error.message() << std::endl; + return -1; + } + + return 0; +} \ No newline at end of file diff --git a/vsgen/dxvk_project.py b/vsgen/dxvk_project.py index ea654d07a..c0fa409f8 100644 --- a/vsgen/dxvk_project.py +++ b/vsgen/dxvk_project.py @@ -16,7 +16,8 @@ def generate_dxvk_project(output_root_path, dxvk_cpp_defines): vcxproj_file_references = [] vcxproj_include_paths = { "../include": 1, - "../include/vulkan/include": 1 + "../include/vulkan/include": 1, + "../public/include": 1, } def add_file(dirpath, filename):