From bcefbc4dbb900f1cb9120392ec8971548a8d6d54 Mon Sep 17 00:00:00 2001 From: Try Date: Sat, 21 Sep 2024 16:43:47 +0200 Subject: [PATCH] vsm: update page clump heuristics #681 --- game/graphics/renderer.cpp | 39 +++-- game/graphics/renderer.h | 7 +- game/graphics/shaders.cpp | 6 +- game/graphics/shaders.h | 5 +- shader/CMakeLists.txt | 6 +- shader/virtual_shadow/vsm_alloc_pages.comp | 161 +++++++++++++++++++++ shader/virtual_shadow/vsm_clump_pages.comp | 125 ++++++++-------- shader/virtual_shadow/vsm_list_pages.comp | 45 ------ shader/virtual_shadow/vsm_sort_pages.comp | 143 ------------------ 9 files changed, 246 insertions(+), 291 deletions(-) create mode 100644 shader/virtual_shadow/vsm_alloc_pages.comp delete mode 100644 shader/virtual_shadow/vsm_list_pages.comp delete mode 100644 shader/virtual_shadow/vsm_sort_pages.comp diff --git a/game/graphics/renderer.cpp b/game/graphics/renderer.cpp index a6fcc57a8..5e367bfd1 100644 --- a/game/graphics/renderer.cpp +++ b/game/graphics/renderer.cpp @@ -188,16 +188,13 @@ void Renderer::resetSwapchain() { cmaa2.defferedColorApplyUbo = device.descriptors(*cmaa2.defferedColorApply); if(settings.vsmEnabled) { - vsm.pagesClearPso = &Shaders::inst().vsmClear; - vsm.uboClear = device.descriptors(*vsm.pagesClearPso); + vsm.uboClear = device.descriptors(Shaders::inst().vsmClear); if(!vsm.pageDataCs.isEmpty()) vsm.uboClearPages = device.descriptors(Shaders::inst().vsmClearPages); - vsm.pagesMarkPso = &Shaders::inst().vsmMarkPages; - vsm.uboPages = device.descriptors(*vsm.pagesMarkPso); - - vsm.pagesListPso = &Shaders::inst().vsmListPages; - vsm.uboList = device.descriptors(*vsm.pagesListPso); + vsm.uboPages = device.descriptors(Shaders::inst().vsmMarkPages ); + vsm.uboClump = device.descriptors(Shaders::inst().vsmClumpPages); + vsm.uboAlloc = device.descriptors(Shaders::inst().vsmAllocPages); vsm.directLightPso = &Shaders::inst().vsmDirectLight; vsm.uboLight = device.descriptors(*vsm.directLightPso); @@ -205,6 +202,7 @@ void Renderer::resetSwapchain() { vsm.pagesDbgPso = &Shaders::inst().vsmDbg; vsm.uboDbg = device.descriptors(*vsm.pagesDbgPso); + vsm.pageDbg = device.image2d(TextureFormat::R32U, 32, 32); vsm.pageTbl = device.image3d(TextureFormat::R32U, 32, 32, 16); // vsm.pageDataCs = device.image2d(TextureFormat::R32U, 4096, 4096); vsm.pageData = device.zbuffer(shadowFormat, 4096, 4096); @@ -463,8 +461,12 @@ void Renderer::prepareUniforms() { vsm.uboPages.set(3, zbuffer, Sampler::nearest()); vsm.uboPages.set(4, vsm.pageTbl); - vsm.uboList.set(0, vsm.pageList); - vsm.uboList.set(1, vsm.pageTbl); + vsm.uboClump.set(0, vsm.pageList); + vsm.uboClump.set(1, vsm.pageTbl); + + vsm.uboAlloc.set(0, vsm.pageList); + vsm.uboAlloc.set(1, vsm.pageTbl); + vsm.uboAlloc.set(2, vsm.pageDbg); vsm.uboLight.set(0, wview->sceneGlobals().uboGlobal[SceneGlobals::V_Main]); vsm.uboLight.set(1, gbufDiffuse, Sampler::nearest()); @@ -856,35 +858,32 @@ void Renderer::drawVsm(Tempest::Encoder& cmd, uint8_t fI if(!settings.vsmEnabled) return; + auto& shaders = Shaders::inst(); cmd.setFramebuffer({}); cmd.setDebugMarker("VSM-pages"); - cmd.setUniforms(*vsm.pagesClearPso, vsm.uboClear); + cmd.setUniforms(shaders.vsmClear, vsm.uboClear); cmd.dispatchThreads(size_t(vsm.pageTbl.w()), size_t(vsm.pageTbl.h()), size_t(vsm.pageTbl.d())); - cmd.setUniforms(*vsm.pagesMarkPso, vsm.uboPages); + cmd.setUniforms(shaders.vsmMarkPages, vsm.uboPages); cmd.dispatchThreads(zbuffer.size()); if(vsm.pageDataCs.isEmpty()) { //TODO: trimming - //cmd.setUniforms(Shaders::inst().vsmClumpPages0, vsm.uboList); + //cmd.setUniforms(shaders.vsmClumpPages0, vsm.uboClump); //cmd.dispatch(1); // clump - cmd.setUniforms(Shaders::inst().vsmClumpPages1, vsm.uboList); + cmd.setUniforms(shaders.vsmClumpPages, vsm.uboClump); cmd.dispatchThreads(size_t(vsm.pageTbl.w()), size_t(vsm.pageTbl.h()), size_t(vsm.pageTbl.d())); } if(!vsm.pageDataCs.isEmpty()) { - cmd.setUniforms(Shaders::inst().vsmClearPages, vsm.uboClearPages); + cmd.setUniforms(shaders.vsmClearPages, vsm.uboClearPages); cmd.dispatchThreads(size_t(vsm.pageDataCs.w()), size_t(vsm.pageDataCs.h())); } - cmd.setUniforms(*vsm.pagesListPso, vsm.uboList); - cmd.dispatchThreads(size_t(vsm.pageTbl.w()), size_t(vsm.pageTbl.h()), size_t(vsm.pageTbl.d())); - - // sort for debug purpose only - cmd.setDebugMarker("VSM-sort(debug)"); - cmd.setUniforms(Shaders::inst().vsmSortPages, vsm.uboList); + // alloc + cmd.setUniforms(shaders.vsmAllocPages, vsm.uboAlloc); cmd.dispatch(1); cmd.setDebugMarker("VSM-visibility"); diff --git a/game/graphics/renderer.h b/game/graphics/renderer.h index 1e513319d..f61c4e57d 100644 --- a/game/graphics/renderer.h +++ b/game/graphics/renderer.h @@ -211,15 +211,11 @@ class Renderer final { } gi; struct { - Tempest::ComputePipeline* pagesClearPso = nullptr; Tempest::DescriptorSet uboClear; Tempest::DescriptorSet uboClearPages; - - Tempest::ComputePipeline* pagesMarkPso = nullptr; Tempest::DescriptorSet uboPages; - Tempest::ComputePipeline* pagesListPso = nullptr; - Tempest::DescriptorSet uboList; + Tempest::DescriptorSet uboClump, uboAlloc; Tempest::RenderPipeline* directLightPso = nullptr; Tempest::DescriptorSet uboLight; @@ -227,6 +223,7 @@ class Renderer final { Tempest::RenderPipeline* pagesDbgPso = nullptr; Tempest::DescriptorSet uboDbg; + Tempest::StorageImage pageDbg; Tempest::StorageImage pageTbl; Tempest::StorageImage pageDataCs; Tempest::ZBuffer pageData; diff --git a/game/graphics/shaders.cpp b/game/graphics/shaders.cpp index 39f92fbf1..10c4d84ab 100644 --- a/game/graphics/shaders.cpp +++ b/game/graphics/shaders.cpp @@ -199,10 +199,8 @@ Shaders::Shaders() { vsmClear = computeShader("vsm_clear.comp.sprv"); vsmClearPages = computeShader("vsm_clear_pages.comp.sprv"); vsmMarkPages = computeShader("vsm_mark_pages.comp.sprv"); - vsmClumpPages0 = computeShader("vsm_clump_pages0.comp.sprv"); - vsmClumpPages1 = computeShader("vsm_clump_pages1.comp.sprv"); - vsmListPages = computeShader("vsm_list_pages.comp.sprv"); - vsmSortPages = computeShader("vsm_sort_pages.comp.sprv"); + vsmClumpPages = computeShader("vsm_clump_pages.comp.sprv"); + vsmAllocPages = computeShader("vsm_alloc_pages.comp.sprv"); vsmPackDraw0 = computeShader("vsm_pack_draws0.comp.sprv"); vsmPackDraw1 = computeShader("vsm_pack_draws1.comp.sprv"); vsmDirectLight = postEffect("copy", "direct_light_vsm", RenderState::ZTestMode::NoEqual); diff --git a/game/graphics/shaders.h b/game/graphics/shaders.h index 811ee3e18..670d8cee1 100644 --- a/game/graphics/shaders.h +++ b/game/graphics/shaders.h @@ -76,11 +76,10 @@ class Shaders { // Virtual shadow Tempest::ComputePipeline vsmClusterTask; - Tempest::ComputePipeline vsmClear, vsmClearPages, vsmMarkPages, vsmListPages; - Tempest::ComputePipeline vsmClumpPages0, vsmClumpPages1; + Tempest::ComputePipeline vsmClear, vsmClearPages, vsmMarkPages; + Tempest::ComputePipeline vsmClumpPages, vsmAllocPages; Tempest::ComputePipeline vsmPackDraw0, vsmPackDraw1; Tempest::RenderPipeline vsmDirectLight; - Tempest::ComputePipeline vsmSortPages; Tempest::RenderPipeline vsmDbg; Tempest::ComputePipeline vsmRendering; diff --git a/shader/CMakeLists.txt b/shader/CMakeLists.txt index 013548c75..7d8bd967f 100644 --- a/shader/CMakeLists.txt +++ b/shader/CMakeLists.txt @@ -303,10 +303,8 @@ add_shader(vsm_dbg.frag virtual_shadow/vsm_mark_pages.comp -DDEBUG add_shader(vsm_mark_pages.comp virtual_shadow/vsm_mark_pages.comp -DMARK_PAGES -DLWC) add_shader(vsm_clear.comp virtual_shadow/vsm_clear.comp) add_shader(vsm_clear_pages.comp virtual_shadow/vsm_clear_pages.comp) -add_shader(vsm_clump_pages0.comp virtual_shadow/vsm_clump_pages.comp -DPASS0) -add_shader(vsm_clump_pages1.comp virtual_shadow/vsm_clump_pages.comp -DPASS1) -add_shader(vsm_list_pages.comp virtual_shadow/vsm_list_pages.comp) -add_shader(vsm_sort_pages.comp virtual_shadow/vsm_sort_pages.comp) +add_shader(vsm_clump_pages.comp virtual_shadow/vsm_clump_pages.comp) +add_shader(vsm_alloc_pages.comp virtual_shadow/vsm_alloc_pages.comp) add_shader(vsm_pack_draws0.comp virtual_shadow/vsm_pack_draws.comp -DPASS0) add_shader(vsm_pack_draws1.comp virtual_shadow/vsm_pack_draws.comp -DPASS1) add_shader(vsm_cluster_task.comp virtual_shadow/vsm_cluster_task.comp -DVIRTUAL_SHADOW) diff --git a/shader/virtual_shadow/vsm_alloc_pages.comp b/shader/virtual_shadow/vsm_alloc_pages.comp new file mode 100644 index 000000000..859be3cb6 --- /dev/null +++ b/shader/virtual_shadow/vsm_alloc_pages.comp @@ -0,0 +1,161 @@ +#version 450 + +#extension GL_GOOGLE_include_directive : enable +#extension GL_ARB_separate_shader_objects : enable +#extension GL_EXT_samplerless_texture_functions : enable + +#include "virtual_shadow/vsm_common.glsl" +#include "scene.glsl" +#include "common.glsl" + +layout(local_size_x = 32, local_size_y = 32) in; + +layout(binding = 0, std430) buffer Pages { VsmHeader header; uint pageList[]; } vsm; +layout(binding = 1, r32ui) uniform uimage3D pageTbl; +layout(binding = 2, r32ui) uniform uimage2D dbg; + +const uint NumThreads = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z; + +shared uint pageListSize; +shared uint pageList[VSM_MAX_PAGES]; + +shared uint bits[(NumThreads+31)/32]; + +shared uint ladder [VSM_PAGE_TBL_SIZE]; +shared uint ladderWr[VSM_PAGE_TBL_SIZE]; + +uint loadPageSize(ivec3 at) { + uint a = imageLoad(pageTbl, at).r; + return a >> 1; + } + +void storePage(ivec2 pId, ivec3 at, ivec2 tile, uint size) { + const uint pageId = pId.x + pId.y*VSM_PAGE_PER_ROW; + + vsm.pageList[pageId] = packVsmPageInfo(at.xyz, ivec2(size)); + atomicMax(vsm.header.pageCount, pageId+1); + imageStore(pageTbl, at+ivec3(tile,0), uvec4(pageId,0,0,0)); + + imageStore(dbg, pId, uvec4(size)); + } + +uint bitCountShared(uint till) { + uint n = till/32; + uint f = till%32; + uint r = 0; + for(uint i=0; i0) + atomicOr(bits[lane/32], 1u<<(lane%32)); + barrier(); + + if(frag>0) { + uint id = pageListSize + bitCountShared(lane); + pageList[id] = packVsmPageInfo(ivec3(at.xy, i), ivec2(frag)); + } + barrier(); + + if(frag>0) + atomicAdd(pageListSize, 1); + barrier(); + } + } + +ivec2 findRowCol(in uint page, const uint size) { + //const uint perRow = 32/size; + //const uint row = (page/perRow)*size; + //const uint col = (page%perRow)*size; + //return ivec2(col,row); + + for(uint i=0; i<32; i+=size) { + uint space = (32-ladder[i])/size; + if(page> 1; + } + void main() { const ivec3 size = imageSize(pageTbl); const ivec3 at = ivec3(gl_GlobalInvocationID); @@ -30,9 +50,16 @@ void main() { minY = 0xFFFF; barrier(); - const uint frag = imageLoad(pageTbl, at).r; - if(frag>0) + const uint frag = imageLoad(pageTbl, at).r; + const bool prime = frag>0; + if(frag>0) { atomicMin(minY, at.y); + + atomicMin(vsm.header.pageBbox[at.z].x, at.x); + atomicMin(vsm.header.pageBbox[at.z].y, at.y); + atomicMax(vsm.header.pageBbox[at.z].z, at.x+1); + atomicMax(vsm.header.pageBbox[at.z].w, at.y+1); + } barrier(); const uint stripe = (at.y-minY)/2; @@ -55,26 +82,43 @@ void main() { } barrier(); - bool quad = false; + bool quad = false; if(at.y>=minY && (at.y-minY)%2==0 && offsetX%2==0 && offsetX= 4*2) { - imageStore(pageTbl, at+ivec3(0,0,0), uvec4(4)); - imageStore(pageTbl, at+ivec3(2,0,0), uvec4(0)); - imageStore(pageTbl, at+ivec3(0,2,0), uvec4(0)); - imageStore(pageTbl, at+ivec3(2,2,0), uvec4(0)); + const int mergeQ = (at.z>4 ? 2 : 3)*2; + uint a = loadPageSize(at+ivec3(0,0,0)); + uint b = loadPageSize(at+ivec3(2,0,0)); + uint c = loadPageSize(at+ivec3(2,2,0)); + uint d = loadPageSize(at+ivec3(0,2,0)); + if(a+b+c+d >= mergeQ) { + storePage(at+ivec3(0,0,0), 4, prime); + storePage(at+ivec3(2,0,0), 0, prime); + storePage(at+ivec3(0,2,0), 0, prime); + storePage(at+ivec3(2,2,0), 0, prime); + } + } + memoryBarrierImage(); + barrier(); + + if(true && quad) { + uint s = loadPageSize(at); + uint a = loadPagePrimeBit(at+ivec3(0,0,0)); + uint b = loadPagePrimeBit(at+ivec3(1,0,0)); + uint c = loadPagePrimeBit(at+ivec3(1,1,0)); + uint d = loadPagePrimeBit(at+ivec3(0,1,0)); + if(s==2 && a+b+c+d <= 2) { + storePage(at+ivec3(0,0,0), a, a); + storePage(at+ivec3(1,0,0), b, b); + storePage(at+ivec3(1,1,0), c, c); + storePage(at+ivec3(0,1,0), d, d); } } } @@ -106,57 +150,4 @@ void trimMip(int mip) { } } } - -void mainTrim() { - const ivec3 size = imageSize(pageTbl); - // NOTE: need only about 5 mips - for(int i=1; i= 2) { - imageStore(pageTbl, at+ivec3(0,0,0), uvec4(2)); - imageStore(pageTbl, at+ivec3(1,0,0), uvec4(0)); - imageStore(pageTbl, at+ivec3(0,1,0), uvec4(0)); - imageStore(pageTbl, at+ivec3(1,1,0), uvec4(0)); - } - } - groupMemoryBarrier(); - barrier(); - - if(true && at.x%4==0 && at.y%4==0) { - uint a = imageLoad(pageTbl, at+ivec3(0,0,0)).r >=2 ? 1 : 0; - uint b = imageLoad(pageTbl, at+ivec3(2,0,0)).r >=2 ? 1 : 0; - uint c = imageLoad(pageTbl, at+ivec3(2,2,0)).r >=2 ? 1 : 0; - uint d = imageLoad(pageTbl, at+ivec3(0,2,0)).r >=2 ? 1 : 0; - if(a+b+c+d >= 2) { - imageStore(pageTbl, at+ivec3(0,0,0), uvec4(4)); - imageStore(pageTbl, at+ivec3(2,0,0), uvec4(0)); - imageStore(pageTbl, at+ivec3(0,2,0), uvec4(0)); - imageStore(pageTbl, at+ivec3(2,2,0), uvec4(0)); - } - } - } - -void main() { -#if defined(PASS0) - mainTrim(); -#elif defined(PASS1) - mainGroups(); -#else -#error "invalid pass-id" -#endif - } */ diff --git a/shader/virtual_shadow/vsm_list_pages.comp b/shader/virtual_shadow/vsm_list_pages.comp deleted file mode 100644 index 0ab526e23..000000000 --- a/shader/virtual_shadow/vsm_list_pages.comp +++ /dev/null @@ -1,45 +0,0 @@ -#version 450 - -#extension GL_GOOGLE_include_directive : enable -#extension GL_ARB_separate_shader_objects : enable -#extension GL_EXT_samplerless_texture_functions : enable - -#include "virtual_shadow/vsm_common.glsl" -#include "scene.glsl" -#include "common.glsl" - -layout(local_size_x = 8, local_size_y = 8) in; - -layout(binding = 0, std430) buffer CB0 { - VsmHeader header; - uint pageList[]; - }; -layout(binding = 1, r32ui) uniform uimage3D pageTbl; - -void main() { - const ivec3 size = imageSize(pageTbl); - const ivec3 at = ivec3(gl_GlobalInvocationID); - - if(any(greaterThanEqual(at, size))) - return; - - const uint alloc = imageLoad(pageTbl, at).r; - if(alloc==0) { - imageStore(pageTbl, at, uvec4(0xFFFFFFFF)); - return; - } - - const uint pageId = atomicAdd(header.pageCount, 1); - if(pageId>=pageList.length()) { - imageStore(pageTbl, at, uvec4(0xFFFFFFFF)); - return; - } - - atomicMin(header.pageBbox[at.z].x, at.x); - atomicMin(header.pageBbox[at.z].y, at.y); - atomicMax(header.pageBbox[at.z].z, at.x+1); - atomicMax(header.pageBbox[at.z].w, at.y+1); - - pageList[pageId] = packVsmPageInfo(at.xyz, ivec2(alloc)); - imageStore(pageTbl, at, uvec4(pageId,0,0,0)); - } diff --git a/shader/virtual_shadow/vsm_sort_pages.comp b/shader/virtual_shadow/vsm_sort_pages.comp deleted file mode 100644 index 663f70e1b..000000000 --- a/shader/virtual_shadow/vsm_sort_pages.comp +++ /dev/null @@ -1,143 +0,0 @@ -#version 450 - -#extension GL_GOOGLE_include_directive : enable -#extension GL_ARB_separate_shader_objects : enable -#extension GL_EXT_samplerless_texture_functions : enable - -#include "virtual_shadow/vsm_common.glsl" -#include "scene.glsl" -#include "common.glsl" - -// NOTE: 1024 is too much invocations in some gpu's -layout(local_size_x = VSM_MAX_PAGES) in; - -layout(binding = 0, std430) buffer CB0 { - VsmHeader header; - uint pageList[]; - } vsm; -layout(binding = 1, r32ui) uniform uimage3D pageTbl; - -uint pageCount = 0; - -shared uint minPId; -shared uint pageList[1024]; - -void bubleSort() { - const uint id = gl_LocalInvocationIndex; - - //uint v = page; - uint v = id