From 2f7f9ab1529c66d1d8295d0b8eded20c860cad76 Mon Sep 17 00:00:00 2001 From: Try Date: Sun, 10 Nov 2024 22:30:30 +0100 Subject: [PATCH] Vsm optimizations (#692) * split page listing and page alocation #681 * fixup * 8k vsm pool * build * new fog pages #681 * fixup vms cluster culling #681 * some tuning for vsm fog #681 * Update vsm_cluster_task.comp * epipolar fog initial * vsm: epipolar fog in progress #681 * fixup * fixup * fixup * epipolar fog * CI * CI --- game/gothic.cpp | 5 +- game/gothic.h | 1 + game/graphics/drawcommands.cpp | 9 +- game/graphics/instancestorage.h | 1 + game/graphics/lightsource.h | 1 - game/graphics/renderer.cpp | 86 ++++++-- game/graphics/renderer.h | 7 +- game/graphics/sceneglobals.cpp | 8 +- game/graphics/sceneglobals.h | 7 +- game/graphics/shaders.cpp | 44 ++-- game/graphics/shaders.h | 6 +- game/graphics/sky/sky.cpp | 52 +++-- game/graphics/sky/sky.h | 3 +- game/graphics/worldview.cpp | 8 +- game/graphics/worldview.h | 3 +- lib/Tempest | 2 +- shader/CMakeLists.txt | 38 ++-- shader/sky/fog.frag | 96 +-------- shader/virtual_shadow/vsm_alloc_pages.comp | 56 ++--- shader/virtual_shadow/vsm_clear.comp | 6 +- shader/virtual_shadow/vsm_clump_pages.comp | 2 + shader/virtual_shadow/vsm_cluster_task.comp | 183 ++++++++-------- shader/virtual_shadow/vsm_common.glsl | 29 ++- shader/virtual_shadow/vsm_direct_light.frag | 9 +- shader/virtual_shadow/vsm_fog_epipolar.comp | 193 +++++++++++++++++ shader/virtual_shadow/vsm_fog_sample.comp | 200 ++++++++++++++++++ shader/virtual_shadow/vsm_fog_shadow.comp | 60 ++++++ shader/virtual_shadow/vsm_list_pages.comp | 72 +++++-- shader/virtual_shadow/vsm_mark_fog_pages.comp | 110 ++++++++++ shader/virtual_shadow/vsm_mark_pages.comp | 35 +-- 30 files changed, 952 insertions(+), 380 deletions(-) create mode 100644 shader/virtual_shadow/vsm_fog_epipolar.comp create mode 100644 shader/virtual_shadow/vsm_fog_sample.comp create mode 100644 shader/virtual_shadow/vsm_fog_shadow.comp create mode 100644 shader/virtual_shadow/vsm_mark_fog_pages.comp diff --git a/game/gothic.cpp b/game/gothic.cpp index 9da8ae7e6..a760519ac 100644 --- a/game/gothic.cpp +++ b/game/gothic.cpp @@ -88,9 +88,10 @@ Gothic::Gothic() { opts.doBindless = CommandLine::inst().isBindless(); } - if(hasBindless() && gpu.compute.maxInvocations>=1024) { - //TODO: productize + if(gpu.compute.maxInvocations>=1024 && gpu.render.maxClipCullDistances>=4 && + gpu.render.maxViewportSize.w>=8192 && gpu.render.maxViewportSize.h>=8192) { opts.doVirtualShadow = CommandLine::inst().isVirtualShadow(); + opts.doVirtualFog = opts.doVirtualShadow; } opts.aaPreset = CommandLine::inst().aaPreset(); diff --git a/game/gothic.h b/game/gothic.h index 6b0dbb198..96a1f606b 100644 --- a/game/gothic.h +++ b/game/gothic.h @@ -47,6 +47,7 @@ class Gothic final { bool doMeshShading = false; bool doBindless = false; bool doVirtualShadow = false; + bool doVirtualFog = false; uint32_t swRenderingPreset = 0; uint32_t aaPreset = 0; diff --git a/game/graphics/drawcommands.cpp b/game/graphics/drawcommands.cpp index fa6b78dcf..8b6056c3c 100644 --- a/game/graphics/drawcommands.cpp +++ b/game/graphics/drawcommands.cpp @@ -238,15 +238,14 @@ void DrawCommands::updateTasksUniforms() { i.desc.set(T_Clusters, clusters.ssbo()); i.desc.set(T_Indirect, views[i.viewport].indirectCmd); i.desc.set(T_Payload, views[i.viewport].visClusters); + i.desc.set(T_Scene, scene.uboGlobal[i.viewport]); + i.desc.set(T_Instance, owner.instanceSsbo()); + i.desc.set(T_Bucket, buckets.ssbo()); if(i.viewport!=SceneGlobals::V_Vsm) { - i.desc.set(T_Scene, scene.uboGlobal[i.viewport]); - i.desc.set(T_Instance, owner.instanceSsbo()); - i.desc.set(T_Bucket, buckets.ssbo()); i.desc.set(T_HiZ, *scene.hiZ); } else { - i.desc.set(T_Scene, scene.uboGlobal[i.viewport]); - i.desc.set(T_Payload, views[i.viewport].vsmClusters); + i.desc.set(T_Payload, views[i.viewport].vsmClusters); //unsorted clusters i.desc.set(T_HiZ, *scene.vsmPageHiZ); i.desc.set(T_VsmPages, *scene.vsmPageList); i.desc.set(8, scene.vsmDbg); diff --git a/game/graphics/instancestorage.h b/game/graphics/instancestorage.h index f94de3fc2..532bf3470 100644 --- a/game/graphics/instancestorage.h +++ b/game/graphics/instancestorage.h @@ -6,6 +6,7 @@ #include #include +#include #include "resources.h" diff --git a/game/graphics/lightsource.h b/game/graphics/lightsource.h index 9a9be27c1..a7a3d8124 100644 --- a/game/graphics/lightsource.h +++ b/game/graphics/lightsource.h @@ -1,7 +1,6 @@ #pragma once #include -#include #include #include diff --git a/game/graphics/renderer.cpp b/game/graphics/renderer.cpp index 5dcf96018..7a037455e 100644 --- a/game/graphics/renderer.cpp +++ b/game/graphics/renderer.cpp @@ -144,8 +144,10 @@ void Renderer::resetSwapchain() { if(smSize>0) { for(int i=0; isceneGlobals().uboGlobal[SceneGlobals::V_Main]); + vsm.uboEpipole.set(3, vsm.epipoles); + vsm.uboEpipole.set(4, zbuffer); + vsm.uboEpipole.set(5, vsm.pageTbl); + vsm.uboEpipole.set(6, vsm.pageData); + + vsm.uboFogShadow.set(0, vsm.epTrace); + vsm.uboFogShadow.set(1, wview->sceneGlobals().uboGlobal[SceneGlobals::V_Main]); + vsm.uboFogShadow.set(2, vsm.epipoles); + vsm.uboFogShadow.set(3, vsm.pageTbl); + vsm.uboFogShadow.set(4, vsm.pageData); + + vsm.uboFogSample.set(0, vsm.ssTrace); + vsm.uboFogSample.set(1, vsm.epTrace); + vsm.uboFogSample.set(2, wview->sceneGlobals().uboGlobal[SceneGlobals::V_Main]); + vsm.uboFogSample.set(3, vsm.epipoles); + vsm.uboFogSample.set(4, zbuffer); + vsm.uboClump.set(0, vsm.pageList); vsm.uboClump.set(1, vsm.pageTbl); @@ -482,13 +513,7 @@ void Renderer::prepareUniforms() { if(!vsm.pageDataCs.isEmpty()) vsm.uboLight.set(6, vsm.pageDataCs); else vsm.uboLight.set(6, vsm.pageData); - - vsm.uboReproj.set(0, wview->sceneGlobals().uboGlobal[SceneGlobals::V_Main]); - vsm.uboReproj.set(1, vsm.pageTbl); - vsm.uboReproj.set(2, vsm.pageList); - if(!vsm.pageDataCs.isEmpty()) - vsm.uboReproj.set(3, vsm.pageDataCs); else - vsm.uboReproj.set(3, vsm.pageData); + vsm.uboLight.set(8, wview->sceneGlobals().vsmDbg); } if(settings.swrEnabled) { @@ -506,6 +531,7 @@ void Renderer::prepareUniforms() { } wview->setShadowMaps(sh); wview->setVirtualShadowMap(vsm.pageData, vsm.pageDataCs, vsm.pageTbl, vsm.pageHiZ, vsm.pageList); + wview->setVsmSkyShadows(vsm.ssTrace); wview->setSwRenderingImage(swr.outputImage); wview->setHiZ(textureCast(hiz.hiZ)); @@ -584,7 +610,8 @@ void Renderer::dbgDraw(Tempest::Painter& p) { //tex.push_back(&textureCast(hiz.hiZSm1)); //tex.push_back(&textureCast(shadowMap[1])); //tex.push_back(&textureCast(shadowMap[0])); - tex.push_back(&textureCast(vsm.pageData)); + //tex.push_back(&textureCast(vsm.pageData)); + tex.push_back(&textureCast(vsm.ssTrace)); static int size = 400; int left = 10; @@ -873,7 +900,12 @@ void Renderer::drawVsm(Tempest::Encoder& cmd, uint8_t fI cmd.setUniforms(shaders.vsmMarkPages, vsm.uboPages, &settings.vsmMipBias, sizeof(settings.vsmMipBias)); cmd.dispatchThreads(zbuffer.size()); - wview.vsmMarkSkyPages(cmd, fId); + // sky&fog + if(true && wview.sky().isVolumetric()) { + cmd.setUniforms(shaders.vsmMarkFogPages, vsm.uboPages); + cmd.dispatchThreads(zbuffer.size()); + // wview.vsmMarkSkyPages(cmd, fId); + } if(vsm.pageDataCs.isEmpty()) { // trimming @@ -890,6 +922,10 @@ void Renderer::drawVsm(Tempest::Encoder& cmd, uint8_t fI cmd.dispatchThreads(size_t(vsm.pageDataCs.w()), size_t(vsm.pageDataCs.h())); } + // list? + cmd.setUniforms(shaders.vsmListPages, vsm.uboAlloc); + cmd.dispatch(size_t(vsm.pageTbl.d())); + // alloc cmd.setUniforms(shaders.vsmAllocPages, vsm.uboAlloc); cmd.dispatch(1); @@ -904,13 +940,19 @@ void Renderer::drawVsm(Tempest::Encoder& cmd, uint8_t fI cmd.setFramebuffer({}, {vsm.pageData, 0.f, Tempest::Preserve}); wview.drawVsm(cmd,fId); - if(false) { - cmd.setDebugMarker("VSM-reproject"); - cmd.setFramebuffer({}, {shadowMap[1], 0.f, Tempest::Preserve}); - auto viewShadowLwcInv = shadowMatrix[1]; - viewShadowLwcInv.inverse(); - cmd.setUniforms(shaders.vsmReprojectSm, vsm.uboReproj, &viewShadowLwcInv, sizeof(viewShadowLwcInv)); - cmd.draw(Resources::fsqVbo()); + if(Gothic::inst().options().doVirtualFog) { + cmd.setFramebuffer({}); + cmd.setDebugMarker("VSM-epipolar"); + cmd.setUniforms(shaders.vsmFogEpipolar, vsm.uboEpipole); + cmd.dispatch(uint32_t(vsm.epTrace.h())); + + cmd.setDebugMarker("VSM-epipolar-fog"); + cmd.setUniforms(shaders.vsmFogShadow, vsm.uboFogShadow); + cmd.dispatchThreads(vsm.epTrace.size()); + + cmd.setDebugMarker("VSM-epipolar-fog"); + cmd.setUniforms(shaders.vsmFogSample, vsm.uboFogSample); + cmd.dispatchThreads(zbuffer.size()); } } diff --git a/game/graphics/renderer.h b/game/graphics/renderer.h index 27c7c5902..6ccf2c9a8 100644 --- a/game/graphics/renderer.h +++ b/game/graphics/renderer.h @@ -88,7 +88,7 @@ class Renderer final { float vidResIndex = 0; - float vsmMipBias = -0.25; + float vsmMipBias = 0.25; //TODO: set to lower, eventually } settings; Frustrum frustrum[SceneGlobals::V_Count]; @@ -217,6 +217,7 @@ class Renderer final { Tempest::DescriptorSet uboClearPages; Tempest::DescriptorSet uboPages; Tempest::DescriptorSet uboReproj; + Tempest::DescriptorSet uboEpipole, uboFogSample, uboFogShadow; Tempest::DescriptorSet uboClump, uboAlloc; @@ -229,6 +230,10 @@ class Renderer final { Tempest::StorageImage pageDataCs; Tempest::ZBuffer pageData; Tempest::StorageBuffer pageList; + + Tempest::StorageImage ssTrace; + Tempest::StorageImage epTrace; + Tempest::StorageBuffer epipoles; } vsm; struct { diff --git a/game/graphics/sceneglobals.cpp b/game/graphics/sceneglobals.cpp index f4b4c81b2..a80c25fa7 100644 --- a/game/graphics/sceneglobals.cpp +++ b/game/graphics/sceneglobals.cpp @@ -32,7 +32,9 @@ SceneGlobals::SceneGlobals() { vsmPageData = &Resources::fallbackBlack(); vsmPageTbl = &Resources::fallbackImage3d(); vsmPageHiZ = &Resources::fallbackImage3d(); - vsmDbg = device.image2d(Tempest::TextureFormat::R32U, 32, 32); + vsmDbg = device.image2d(Tempest::TextureFormat::R32U, 64, 64); + + skyShadows = &Resources::fallbackImage(); for(uint8_t lay=0; lay0) { diff --git a/game/graphics/shaders.h b/game/graphics/shaders.h index 11ea6f07a..f7c8c8b9b 100644 --- a/game/graphics/shaders.h +++ b/game/graphics/shaders.h @@ -76,11 +76,11 @@ class Shaders { // Virtual shadow Tempest::ComputePipeline vsmClusterTask; - Tempest::ComputePipeline vsmClear, vsmClearPages, vsmMarkPages, vsmMarkSky; - Tempest::ComputePipeline vsmTrimPages, vsmClumpPages, vsmAllocPages, vsmMergePages; + Tempest::ComputePipeline vsmClear, vsmClearPages, vsmMarkPages, vsmMarkFogPages; + Tempest::ComputePipeline vsmTrimPages, vsmListPages, vsmClumpPages, vsmAllocPages, vsmMergePages; Tempest::ComputePipeline vsmPackDraw0, vsmPackDraw1; + Tempest::ComputePipeline vsmFogEpipolar, vsmFogShadow, vsmFogSample; Tempest::RenderPipeline vsmDirectLight; - Tempest::RenderPipeline vsmReprojectSm; Tempest::RenderPipeline vsmDbg; Tempest::ComputePipeline vsmRendering; diff --git a/game/graphics/sky/sky.cpp b/game/graphics/sky/sky.cpp index 3343a3d4c..c62ed7c8c 100644 --- a/game/graphics/sky/sky.cpp +++ b/game/graphics/sky/sky.cpp @@ -109,7 +109,7 @@ Sky::~Sky() { void Sky::setupSettings() { auto& device = Resources::device(); const bool fog = Gothic::inst().settingsGetI("RENDERER_D3D","zFogRadial")!=0; - const bool vsm = false; //Gothic::inst().options().doVirtualShadow; + const bool vsm = Gothic::inst().options().doVirtualShadow; auto q = Quality::VolumetricLQ; if(!fog) { @@ -208,6 +208,10 @@ float Sky::isNight() const { return 1.f - linearstep(-0.18f, 0.f, sun.dir().y); } +bool Sky::isVolumetric() const { + return quality!=VolumetricLQ; + } + void Sky::setWorld(const World& world, const std::pair& bbox) { setupSettings(); } @@ -248,6 +252,7 @@ void Sky::updateLight(const int64_t now) { float ax = 360-360*std::fmod(k+0.25f,1.f); ax = ax*float(M_PI/180.0); sun.setDir(-std::sin(ax)*shadowLength, pulse, std::cos(ax)*shadowLength); + //sun.setDir(0, 1, 0); //debug } static float sunMul = 1; @@ -369,15 +374,6 @@ void Sky::prepareUniforms() { uboOcclusion.set(4, *scene.shadowMap[1], Resources::shadowSampler()); } - if(quality==VolumetricHQVsm) { - uboVsmPages = device.descriptors(Shaders::inst().vsmMarkSky); - uboVsmPages.set(1, *scene.zbuffer, Sampler::nearest()); - uboVsmPages.set(2, scene.uboGlobal[SceneGlobals::V_Main]); - uboVsmPages.set(3, occlusionLut); - uboVsmPages.set(4, *scene.vsmPageTbl); - uboVsmPages.set(5, *scene.vsmPageHiZ); - } - uboFogViewLut3d = device.descriptors(Shaders::inst().fogViewLut3d); uboFogViewLut3d.set(0, scene.uboGlobal[SceneGlobals::V_Main]); uboFogViewLut3d.set(1, transLut, smpB); @@ -389,7 +385,11 @@ void Sky::prepareUniforms() { uboFog3d.set(0, fogLut3D, smpB); uboFog3d.set(1, *scene.zbuffer, Sampler::nearest()); uboFog3d.set(2, scene.uboGlobal[SceneGlobals::V_Main]); - uboFog3d.set(3, occlusionLut); + if(quality==VolumetricHQVsm && Gothic::inst().options().doVirtualFog) { + uboFog3d.set(3, *scene.skyShadows); + } else { + uboFog3d.set(3, occlusionLut); + } } if(quality==PathTrace) { @@ -455,17 +455,6 @@ void Sky::prepareSky(Tempest::Encoder& cmd, uint32_t fra cmd.draw(Resources::fsqVbo()); } -void Sky::vsmMarkPage(Tempest::Encoder& cmd, uint32_t frameId) { - if(quality!=VolumetricHQVsm) - return; - - UboSky ubo = mkPush(); - auto& vsmMarkSky = Shaders::inst().vsmMarkSky; - cmd.setFramebuffer({}); - cmd.setUniforms(vsmMarkSky, uboVsmPages, &ubo, sizeof(ubo)); - cmd.dispatchThreads(occlusionLut.size()); - } - void Sky::prepareFog(Tempest::Encoder& cmd, uint32_t frameId) { UboSky ubo = mkPush(); @@ -477,15 +466,22 @@ void Sky::prepareFog(Tempest::Encoder& cmd, uint32_t fra cmd.dispatchThreads(uint32_t(fogLut3D.w()),uint32_t(fogLut3D.h())); break; } - case VolumetricHQ: - case VolumetricHQVsm: { - const bool vsm = (quality==VolumetricHQVsm); - auto& fogOcclusion = vsm ? Shaders::inst().fogOcclusionVsm : Shaders::inst().fogOcclusion; - + case VolumetricHQ:{ cmd.setFramebuffer({}); - cmd.setUniforms(fogOcclusion, uboOcclusion, &ubo, sizeof(ubo)); + cmd.setUniforms(Shaders::inst().fogOcclusion, uboOcclusion, &ubo, sizeof(ubo)); cmd.dispatchThreads(occlusionLut.size()); + cmd.setUniforms(Shaders::inst().fogViewLut3d, uboFogViewLut3d, &ubo, sizeof(ubo)); + cmd.dispatchThreads(uint32_t(fogLut3D.w()),uint32_t(fogLut3D.h())); + break; + } + case VolumetricHQVsm: { + if(!Gothic::inst().options().doVirtualFog) { + cmd.setFramebuffer({}); + cmd.setUniforms(Shaders::inst().fogOcclusionVsm, uboOcclusion, &ubo, sizeof(ubo)); + cmd.dispatchThreads(occlusionLut.size()); + } + // shadows filled extenally cmd.setUniforms(Shaders::inst().fogViewLut3d, uboFogViewLut3d, &ubo, sizeof(ubo)); cmd.dispatchThreads(uint32_t(fogLut3D.w()),uint32_t(fogLut3D.h())); break; diff --git a/game/graphics/sky/sky.h b/game/graphics/sky/sky.h index 1e64e5eb7..aa4d6837e 100644 --- a/game/graphics/sky/sky.h +++ b/game/graphics/sky/sky.h @@ -22,7 +22,6 @@ class Sky final { void updateLight(const int64_t now); void prepareSky (Tempest::Encoder& p, uint32_t frameId); - void vsmMarkPage(Tempest::Encoder& p, uint32_t frameId); void drawSky (Tempest::Encoder& p, uint32_t frameId); void drawSunMoon(Tempest::Encoder& p, uint32_t frameId); @@ -43,6 +42,7 @@ class Sky final { const State& cloudsNight() const { return clouds[1]; } Tempest::Vec2 cloudsOffset(int layer) const; float isNight() const; + bool isVolumetric() const; private: enum Quality : uint8_t { @@ -83,7 +83,6 @@ class Sky final { Tempest::DescriptorSet uboFogViewLut3d; Tempest::DescriptorSet uboSky, uboFog, uboFog3d; Tempest::DescriptorSet uboOcclusion, uboShadowRq; - Tempest::DescriptorSet uboVsmPages; Tempest::DescriptorSet uboIrradiance, uboExp; Tempest::DescriptorSet uboSkyPathtrace; diff --git a/game/graphics/worldview.cpp b/game/graphics/worldview.cpp index c368c9655..2b5cbd81a 100644 --- a/game/graphics/worldview.cpp +++ b/game/graphics/worldview.cpp @@ -93,6 +93,10 @@ void WorldView::setVirtualShadowMap(const Tempest::ZBuffer& pageData, sGlobal.setVirtualShadowMap(pageData, pageDataCs, pageTbl, pageHiZ, pageList); } +void WorldView::setVsmSkyShadows(const Tempest::StorageImage& skyShadows) { + sGlobal.setVsmSkyShadows(skyShadows); + } + void WorldView::setSwRenderingImage(const Tempest::StorageImage& mainView) { sGlobal.setSwRenderingImage(mainView); } @@ -127,10 +131,6 @@ void WorldView::prepareExposure(Tempest::Encoder& cmd, u gSky.prepareExposure(cmd, frameId); } -void WorldView::vsmMarkSkyPages(Tempest::Encoder& cmd, uint8_t frameId) { - gSky.vsmMarkPage(cmd, frameId); - } - void WorldView::updateFrustrum(const Frustrum fr[]) { for(uint8_t i=0; i &cmd, uint8_t frameId); void prepareExposure (Tempest::Encoder &cmd, uint8_t frameId); - void vsmMarkSkyPages (Tempest::Encoder &cmd, uint8_t frameId); - bool updateLights(); bool updateRtScene(); diff --git a/lib/Tempest b/lib/Tempest index 0cf71374a..37d79dcc6 160000 --- a/lib/Tempest +++ b/lib/Tempest @@ -1 +1 @@ -Subproject commit 0cf71374a68119b87b42c5d379601ea62039c2b7 +Subproject commit 37d79dcc6ea6bf71a4867fc31820d3ae051f1060 diff --git a/shader/CMakeLists.txt b/shader/CMakeLists.txt index 1e881bbe3..0d6f3ab61 100644 --- a/shader/CMakeLists.txt +++ b/shader/CMakeLists.txt @@ -237,7 +237,6 @@ add_shader(clouds_lut.comp sky/clouds_lut.comp) add_shader(fog_view_lut.comp sky/fog_view_lut.comp) add_shader(fog3d.comp sky/fog.frag -S comp -DVOLUMETRIC) -add_shader(fog3d_vsm_mark_pages.comp sky/fog.frag -S comp -DVOLUMETRIC -DVIRTUAL_SHADOW_MARK) add_shader(fog3d_vsm.comp sky/fog.frag -S comp -DVOLUMETRIC -DVIRTUAL_SHADOW) add_shader(fog3d_hq.frag sky/fog.frag -DVOLUMETRIC) add_shader(fog.frag sky/fog.frag) @@ -298,22 +297,27 @@ add_shader(cmaa2_deferred_color_apply_2x2.vert antialiasing/cmaa2/deferred_c add_shader(cmaa2_deferred_color_apply_2x2.frag antialiasing/cmaa2/deferred_color_apply_2x2.frag) # virtual shadows -add_shader(direct_light_vsm.frag virtual_shadow/vsm_direct_light.frag) -add_shader(vsm_reproject_sm.frag virtual_shadow/vsm_reproject_sm.frag) -add_shader(vsm_dbg.frag virtual_shadow/vsm_direct_light.frag -DDEBUG) -add_shader(vsm_mark_pages.comp virtual_shadow/vsm_mark_pages.comp) -add_shader(vsm_clear.comp virtual_shadow/vsm_clear.comp) -add_shader(vsm_clear_pages.comp virtual_shadow/vsm_clear_pages.comp) -add_shader(vsm_trim_pages.comp virtual_shadow/vsm_trim_pages.comp) -add_shader(vsm_clump_pages.comp virtual_shadow/vsm_clump_pages.comp) -add_shader(vsm_alloc_pages.comp virtual_shadow/vsm_alloc_pages.comp) -add_shader(vsm_merge_pages.comp virtual_shadow/vsm_merge_pages.comp) -add_shader(vsm_pack_draws0.comp virtual_shadow/vsm_pack_draws.comp -DPASS0) -add_shader(vsm_pack_draws1.comp virtual_shadow/vsm_pack_draws.comp -DPASS1) -add_shader(vsm_cluster_task.comp virtual_shadow/vsm_cluster_task.comp -DVIRTUAL_SHADOW) -#add_shader(vsm_list_pages.comp virtual_shadow/vsm_list_pages.comp) - -add_shader(vsm_rendering.comp virtual_shadow/vsm_rendering.comp) +add_shader(direct_light_vsm.frag virtual_shadow/vsm_direct_light.frag) +add_shader(vsm_reproject_sm.frag virtual_shadow/vsm_reproject_sm.frag) +add_shader(vsm_dbg.frag virtual_shadow/vsm_direct_light.frag -DDEBUG) +add_shader(vsm_mark_pages.comp virtual_shadow/vsm_mark_pages.comp) +add_shader(vsm_mark_fog_pages.comp virtual_shadow/vsm_mark_fog_pages.comp) +add_shader(vsm_clear.comp virtual_shadow/vsm_clear.comp) +add_shader(vsm_clear_pages.comp virtual_shadow/vsm_clear_pages.comp) +add_shader(vsm_trim_pages.comp virtual_shadow/vsm_trim_pages.comp) +add_shader(vsm_clump_pages.comp virtual_shadow/vsm_clump_pages.comp) +add_shader(vsm_list_pages.comp virtual_shadow/vsm_list_pages.comp) +add_shader(vsm_alloc_pages.comp virtual_shadow/vsm_alloc_pages.comp) +add_shader(vsm_merge_pages.comp virtual_shadow/vsm_merge_pages.comp) +add_shader(vsm_pack_draws0.comp virtual_shadow/vsm_pack_draws.comp -DPASS0) +add_shader(vsm_pack_draws1.comp virtual_shadow/vsm_pack_draws.comp -DPASS1) +add_shader(vsm_cluster_task.comp virtual_shadow/vsm_cluster_task.comp -DVIRTUAL_SHADOW) +# virtual shadows: fog +add_shader(vsm_fog_epipolar.comp virtual_shadow/vsm_fog_epipolar.comp) +add_shader(vsm_fog_shadow.comp virtual_shadow/vsm_fog_shadow.comp) +add_shader(vsm_fog_sample.comp virtual_shadow/vsm_fog_sample.comp) + +add_shader(vsm_rendering.comp virtual_shadow/vsm_rendering.comp) # software rendering add_shader(sw_rendering_imm.comp software_rendering/sw_rendering_imm.comp) diff --git a/shader/sky/fog.frag b/shader/sky/fog.frag index a45392e85..f28a57a5a 100644 --- a/shader/sky/fog.frag +++ b/shader/sky/fog.frag @@ -3,7 +3,7 @@ #extension GL_GOOGLE_include_directive : enable #extension GL_EXT_control_flow_attributes : enable -#if defined(VIRTUAL_SHADOW) || defined(VIRTUAL_SHADOW_MARK) +#if defined(VIRTUAL_SHADOW) #include "virtual_shadow/vsm_common.glsl" #endif @@ -37,15 +37,10 @@ layout(binding = 3, r32ui) uniform writeonly restrict uimage2D occlusionLut; layout(binding = 3, r32ui) uniform readonly restrict uimage2D occlusionLut; #endif -#if defined(VOLUMETRIC) && !defined(VIRTUAL_SHADOW) && !defined(VIRTUAL_SHADOW_MARK) && defined(GL_COMPUTE_SHADER) +#if defined(VOLUMETRIC) && !defined(VIRTUAL_SHADOW) && defined(GL_COMPUTE_SHADER) layout(binding = 4) uniform sampler2D textureSm1; #endif -#if defined(VOLUMETRIC) && defined(VIRTUAL_SHADOW_MARK) && defined(GL_COMPUTE_SHADER) -layout(binding = 4, r32ui) uniform uimage3D pageTbl; -layout(binding = 5, r32ui) uniform uimage3D pageTblDepth; -#endif - #if defined(VOLUMETRIC) && defined(VIRTUAL_SHADOW) && defined(GL_COMPUTE_SHADER) layout(binding = 4) uniform utexture3D pageTbl; layout(binding = 5) uniform texture2D pageData; @@ -59,60 +54,6 @@ const float dFogMax = 0.9999; uvec2 invocationID = gl_GlobalInvocationID.xy; #endif -#if defined (VIRTUAL_SHADOW_MARK) -shared uint pageHiZ[NumThreads]; -//uint pageHiZTh = 0xFFFFFFFF; - -void storeHiZValue(uint v) { - uvec4 dx = unpack565_16(v); - ivec3 at = ivec3(dx.xyz); - uint iz = floatBitsToUint(dx.w/float(0xFFFF)); - imageAtomicExchange(pageTbl, at, 1u); - imageAtomicMin(pageTblDepth, at, iz); - } - -void setupHiZ() { - const uint lane = gl_LocalInvocationIndex; - pageHiZ[lane] = 0xFFFFFFFF; - } - -void markPage(ivec3 at, float z) { - if(z<0 || z>=1) - return; - - uint iz = uint(z*0xFFFF); - uint cur = pack565_16(at,iz); - uint id = pageIdHash7(at) % pageHiZ.length(); - - /* - if((pageHiZTh==0xFFFFFFFF) || (pageHiZTh&0xFFFF0000)==(cur&0xFFFF0000)) { - // thread local cache - pageHiZTh = min(pageHiZTh, cur); - return; - } - */ - - uint v = atomicMin(pageHiZ[id], cur); - if(v==0xFFFFFFFF) - return; // clean insert - if((v&0xFFFF0000)==(cur&0xFFFF0000)) - return; // update same entry - - // imageAtomicAdd(pageTbl, ivec3(0), 1u); //counter - storeHiZValue(v); - } - -void flushHiZ() { - //if(pageHiZTh!=0xFFFFFFFF) - // storeHiZValue(pageHiZTh); - const uint lane = gl_LocalInvocationIndex; - const uint v = pageHiZ[lane]; - if(v==0xFFFFFFFF) - return; - storeHiZValue(v); - } -#endif - float interleavedGradientNoise() { #if defined(GL_COMPUTE_SHADER) return interleavedGradientNoise(invocationID.xy); @@ -121,25 +62,10 @@ float interleavedGradientNoise() { #endif } -#if defined(VOLUMETRIC) && defined(VIRTUAL_SHADOW_MARK) && defined(GL_COMPUTE_SHADER) -bool shadowFactor(vec4 shPos) { - vec3 shPos0 = shPos.xyz/shPos.w; - - int mip = vsmCalcMipIndex(shPos0.xy, VSM_FOG_MIP); - vec2 page = shPos0.xy / (1 << mip); - if(any(greaterThan(abs(page), vec2(1)))) - return true; - - ivec2 pageI = ivec2((page*0.5+0.5)*VSM_PAGE_TBL_SIZE); - ivec3 at = ivec3(pageI, mip); - markPage(at, shPos0.z); - return true; - } -#elif defined(VOLUMETRIC) && defined(VIRTUAL_SHADOW) && defined(GL_COMPUTE_SHADER) +#if defined(VOLUMETRIC) && defined(VIRTUAL_SHADOW) && defined(GL_COMPUTE_SHADER) bool shadowFactor(vec4 shPos) { vec3 shPos0 = shPos.xyz/shPos.w; - - int mip = vsmCalcMipIndex(shPos0.xy, VSM_FOG_MIP); + int mip = vsmCalcMipIndexFog(shPos0.xy); vec2 page = shPos0.xy / (1 << mip); if(any(greaterThan(abs(page), vec2(1)))) return true; @@ -183,7 +109,7 @@ vec4 fog(vec2 uv, float z) { const vec3 pos1 = project(scene.viewProjectLwcInv, vec3(inPos,dFogMax)); const vec3 posz = project(scene.viewProjectLwcInv, vec3(inPos,z)); -#if defined(VIRTUAL_SHADOW) || defined(VIRTUAL_SHADOW_MARK) +#if defined(VIRTUAL_SHADOW) const vec4 shPos0 = scene.viewVirtualShadowLwc*vec4(pos0, 1); const vec4 shPos1 = scene.viewVirtualShadowLwc*vec4(posz, 1); #else @@ -305,22 +231,14 @@ void main_comp() { vec2 uv = inPos*vec2(0.5)+vec2(0.5); vec3 view = normalize(inverse(vec3(inPos,1.0))); vec3 sunDir = scene.sunDir; - float z = textureLod(depth,uv,0).r; + float z = min(textureLod(depth,uv,0).r, dFogMax); fog(uv,z); } #endif void main() { -#if defined(VIRTUAL_SHADOW_MARK) && defined(GL_COMPUTE_SHADER) - setupHiZ(); - barrier(); - - main_comp(); - barrier(); - - flushHiZ(); -#elif defined(GL_COMPUTE_SHADER) +#if defined(GL_COMPUTE_SHADER) main_comp(); #else main_frag(); diff --git a/shader/virtual_shadow/vsm_alloc_pages.comp b/shader/virtual_shadow/vsm_alloc_pages.comp index 72262a22f..a8480ff65 100644 --- a/shader/virtual_shadow/vsm_alloc_pages.comp +++ b/shader/virtual_shadow/vsm_alloc_pages.comp @@ -21,14 +21,8 @@ shared uint pageList[VSM_MAX_PAGES]; shared uint bits[(NumThreads+31)/32]; -shared uint ladder [VSM_PAGE_TBL_SIZE]; -shared uint ladderWr[VSM_PAGE_TBL_SIZE]; - -ivec2 loadPageSize(ivec3 at) { - uint a = imageLoad(pageTbl, at).r; - a = a >> 1; - return ivec2(a, a >> 4) & 0xF; - } +shared uint ladder [VSM_PAGE_PER_ROW]; +shared uint ladderWr[VSM_PAGE_PER_ROW]; void storePage(ivec2 pId, ivec3 at, ivec2 tile, ivec2 size) { const uint pageId = pId.x + pId.y*VSM_PAGE_PER_ROW; @@ -55,41 +49,22 @@ uint bitCountShared(uint till) { } void listPages() { - const ivec3 size = imageSize(pageTbl); - const uint lane = gl_LocalInvocationIndex; - const ivec3 at = ivec3(gl_GlobalInvocationID); - pageListSize = 0; + const uint lane = gl_LocalInvocationIndex; - for(int i=0; i<1111 && i0) - atomicOr(bits[lane/32], 1u<<(lane%32)); - barrier(); - - if(sz.x>0) { - uint id = pageListSize + bitCountShared(lane); - pageList[id] = packVsmPageInfo(ivec3(at.xy, i), sz); - } - barrier(); - - if(sz.x>0) - atomicAdd(pageListSize, 1); - barrier(); - } + pageListSize = vsm.header.pageCount; + for(uint i=lane; i0) + atomicAdd(vsm.header.pagePerMip[at.z], 1); } } diff --git a/shader/virtual_shadow/vsm_cluster_task.comp b/shader/virtual_shadow/vsm_cluster_task.comp index 2d1fe80ed..d5062f712 100644 --- a/shader/virtual_shadow/vsm_cluster_task.comp +++ b/shader/virtual_shadow/vsm_cluster_task.comp @@ -4,26 +4,13 @@ #extension GL_GOOGLE_include_directive : enable #extension GL_EXT_control_flow_attributes : enable +#define CLUSTER 1 + #include "scene.glsl" #include "common.glsl" +#include "materials/materials_common.glsl" #include "virtual_shadow/vsm_common.glsl" -struct IndirectCmd { - uint vertexCount; - uint instanceCount; - uint firstVertex; - uint firstInstance; - uint writeOffset; - }; - -struct Cluster { - vec4 sphere; - uint bucketId_commandId; - uint firstMeshlet; - int meshletCount; - uint instanceId; - }; - layout(local_size_x = 1024) in; const uint NumThreads = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z; @@ -31,11 +18,9 @@ const uint NumThreads = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z layout(push_constant, std430) uniform UboPush { int meshletCount; } push; -layout(binding = 0, std140) uniform UboScene { - SceneDesc scene; - }; layout(binding = 1, std430) buffer SsboGlob { uvec4 payload[]; }; layout(binding = 2, std430) readonly buffer Mem { uint instanceMem[]; }; +layout(binding = 3, std140) readonly buffer Bbo { Bucket bucket[]; }; layout(binding = 4, std430) buffer IndirectBuf { IndirectCmd cmd[]; }; layout(binding = 5, std430) readonly buffer Cbo { Cluster clusters[]; }; @@ -52,6 +37,39 @@ shared uint vsmMipMask; shared uint anyPageGlob; #endif +Instance pullInstance(uint i) { + i *= 16; + Instance ret; + ret.mat[0][0] = uintBitsToFloat(instanceMem[i+0]); + ret.mat[0][1] = uintBitsToFloat(instanceMem[i+1]); + ret.mat[0][2] = uintBitsToFloat(instanceMem[i+2]); + ret.mat[1][0] = uintBitsToFloat(instanceMem[i+3]); + ret.mat[1][1] = uintBitsToFloat(instanceMem[i+4]); + ret.mat[1][2] = uintBitsToFloat(instanceMem[i+5]); + ret.mat[2][0] = uintBitsToFloat(instanceMem[i+6]); + ret.mat[2][1] = uintBitsToFloat(instanceMem[i+7]); + ret.mat[2][2] = uintBitsToFloat(instanceMem[i+8]); + ret.mat[3][0] = uintBitsToFloat(instanceMem[i+9]); + ret.mat[3][1] = uintBitsToFloat(instanceMem[i+10]); + ret.mat[3][2] = uintBitsToFloat(instanceMem[i+11]); + ret.fatness = uintBitsToFloat(instanceMem[i+12]); + ret.animPtr = instanceMem[i+13]; + return ret; + } + +bool frustrumTest(const vec4 sphere) { + if(sphere.w<0.f) + return false; + + for(int i=0; i<6; ++i) { + if(dot(vec4(sphere.xyz,1.0), scene.frustrum[i]) <= -sphere.w) { + return false; + } + } + + return true; + } + bool projectSphere(const vec4 sphere, out vec4 aabb, out float depthMax) { const vec3 c = (scene.viewProject * vec4(sphere.xyz, 1)).xyz; const float R = sphere.w; @@ -71,16 +89,28 @@ bool projectSphere(const vec4 sphere, out vec4 aabb, out float depthMax) { return true; } -bool frustrumTest(const vec4 sphere) { - if(sphere.w<0.f) - return false; - - for(int i=0; i<6; ++i) { - if(dot(vec4(sphere.xyz,1.0), scene.frustrum[i]) <= -sphere.w) { - return false; - } +bool projectInstance(const Cluster cluster, out vec4 aabb, out float depthMax) { + const uint bId = cluster.bucketId_commandId >> 16; + const uint instanceId = cluster.instanceId; + + const vec3 b[2] = {bucket[bId].bbox[0].xyz, bucket[bId].bbox[1].xyz}; + Instance obj = pullInstance(instanceId); + + aabb = vec4(1, 1, -1, -1); + depthMax = 0; + for(uint i=0; i<8; ++i) { + const vec3 pos = vec3(b[bitfieldExtract(i,0,1)].x, + b[bitfieldExtract(i,1,1)].y, + b[bitfieldExtract(i,2,1)].z); + vec4 trPos = vec4(pos,1.0); + trPos = vec4(obj.mat*trPos, 1.0); + trPos = scene.viewProject*trPos; + + vec3 bp = trPos.xyz / trPos.w; + aabb.xy = min(aabb.xy, bp.xy); + aabb.zw = max(aabb.zw, bp.xy); + depthMax = max(depthMax, bp.z); } - return true; } @@ -139,78 +169,17 @@ bool pageBoundsTest(in vec4 aabb, const ivec3 page, const ivec2 sz) { return false; if(aabb.w < 0 || sz.y <= aabb.y) return false; - const float dx = (aabb.z-aabb.x)*VSM_PAGE_SIZE; - const float dy = (aabb.w-aabb.y)*VSM_PAGE_SIZE; + const float dx = (aabb.z-aabb.x)*VSM_PAGE_SIZE; + const float dy = (aabb.w-aabb.y)*VSM_PAGE_SIZE; if(dx<1 || dy<1) return false; // subpixel sized meshlet - return true; - } - -void _runCluster(const uint clusterId) { - const Cluster cluster = clusters[clusterId]; - if(cluster.sphere.w<=0.f) - return; // disabled or deleted - - if(frustrumTest(cluster.sphere)) { - if(gl_LocalInvocationIndex==0) - atomicAdd(vsm.header.counterV, cluster.meshletCount); - } else { - // return; - } - vec4 aabb = vec4(0); - float depthMin = 1; - if(!projectCluster(cluster, aabb, depthMin)) - return; - - makeMipMask(aabb); - - const uint commandId = cluster.bucketId_commandId & 0xFFFF; - const uint pageCount = vsm.header.pageCount; - bool anyPage = false; - - for(uint i=gl_LocalInvocationIndex; i 1000000) - ;//return; - for(int i=0; i4) + if(!(3<=page.z && page.z<=6)) ;//continue; + if(!pageBoundsTest(aabb, page, sz)) continue; if(!pageHiZTest(aabb, depthMax, page, sz)) continue; + + // imageAtomicAdd(dbg, unpackVsmPageId(i), cluster.meshletCount); + imageAtomicAdd(dbg, ivec2(page.z,0), cluster.meshletCount); +#if 0 + if(unpackVsmPageId(i)==ivec2(56,4)) + continue; + if(unpackVsmPageId(i)==ivec2(32,4)) + continue; + if(unpackVsmPageId(i)==ivec2(24,4)) + ;//continue; + if(unpackVsmPageId(i)==ivec2(44,4)) + continue; +#endif + if(!emitCluster(cluster, i)) break; - imageAtomicAdd(dbg, unpackVsmPageId(i), 1); } } diff --git a/shader/virtual_shadow/vsm_common.glsl b/shader/virtual_shadow/vsm_common.glsl index f5bfb8245..3c03b26fd 100644 --- a/shader/virtual_shadow/vsm_common.glsl +++ b/shader/virtual_shadow/vsm_common.glsl @@ -8,8 +8,10 @@ const int VSM_PAGE_SIZE = 128; const int VSM_PAGE_TBL_SIZE = 32; // small for testing, 64 can be better const int VSM_PAGE_MIPS = 16; -const int VSM_FOG_MIP = 6; -const int VSM_PAGE_PER_ROW = 4096/VSM_PAGE_SIZE; +//const int VSM_FOG_MIP = 6; +//const int VSM_PAGE_PER_ROW = 4096/VSM_PAGE_SIZE; +const int VSM_FOG_MIP = 5; +const int VSM_PAGE_PER_ROW = 8192/VSM_PAGE_SIZE; const int VSM_MAX_PAGES = VSM_PAGE_PER_ROW * VSM_PAGE_PER_ROW; // 1024; const int VSM_CLIPMAP_SIZE = VSM_PAGE_SIZE * VSM_PAGE_TBL_SIZE; @@ -18,6 +20,7 @@ struct VsmHeader { uint meshletCount; uint counterM; uint counterV; + uint pagePerMip[VSM_PAGE_MIPS]; ivec4 pageBbox[VSM_PAGE_MIPS]; }; @@ -79,6 +82,12 @@ int vsmCalcMipIndex(in vec2 shPos, int minMip) { return max(vsmCalcMipIndex(shPos), minMip); } +int vsmCalcMipIndexFog(in vec2 shPos) { + float d = max(abs(shPos.x), abs(shPos.y)); + uint id = uint(d * 16.0); + return clamp(findMSB(id)+1, VSM_FOG_MIP, VSM_FOG_MIP+4); + } + uint pageIdHash7(ivec3 src) { uint x = (src.x & 0x3) << 0; uint y = (src.y & 0x3) << 2; @@ -94,6 +103,20 @@ float vsmTexelFetch(in texture2D pageData, const ivec2 pixel) { return texelFetch(pageData, pixel, 0).x; } +uint shadowPageIdFetch(in vec2 page, in int mip, in utexture3D pageTbl) { + //page-local + const ivec2 pageI = ivec2((page*0.5+0.5)*VSM_PAGE_TBL_SIZE); + const vec2 pageF = fract((page*0.5+0.5)*VSM_PAGE_TBL_SIZE); + const ivec2 at = ivec2(pageF*VSM_PAGE_SIZE); + + //page-global + const uint pageD = texelFetch(pageTbl, ivec3(pageI, mip), 0).x; + if(pageD==0) + return -1; + + return pageD >> 16u; + } + float shadowTexelFetch(in vec2 page, in int mip, in utexture3D pageTbl, #if defined(VSM_ATOMIC) in utexture2D pageData @@ -112,6 +135,8 @@ float shadowTexelFetch(in vec2 page, in int mip, in utexture3D pageTbl, return -1; const uint pageId = pageD >> 16u; + + // const uint pageId = shadowPageIdFetch(page, mip, pageTbl); const ivec2 pageImageAt = unpackVsmPageId(pageId)*VSM_PAGE_SIZE + at; return vsmTexelFetch(pageData, pageImageAt); } diff --git a/shader/virtual_shadow/vsm_direct_light.frag b/shader/virtual_shadow/vsm_direct_light.frag index f1f181967..2b70e918c 100644 --- a/shader/virtual_shadow/vsm_direct_light.frag +++ b/shader/virtual_shadow/vsm_direct_light.frag @@ -42,6 +42,7 @@ layout(binding = 6) uniform utexture2D pageData; #else layout(binding = 6) uniform texture2D pageData; #endif +layout(binding = 8, r32ui) uniform readonly uimage2D dbg; layout(location = 0) out vec4 outColor; @@ -109,7 +110,7 @@ int shadowLod(vec2 dx, vec2 dy) { const float bias = vsmMipBias; //return max(0, int((minLod + maxLod)*0.5 + bias + 0.5)); - return max(0, int(minLod + bias + 0.5)); + return max(0, int(minLod + bias)); } float shadowTexelFetch(vec2 page, int mip) { @@ -183,6 +184,12 @@ void main() { outColor = vec4(luminance * scene.exposure, 1); +#if 0 + const uint pageId = shadowPageIdFetch(page.xy,mip,pageTbl); + const uint v = imageLoad(dbg, unpackVsmPageId(pageId)).x; + outColor.rgb = vec3(v/512.0); +#endif + #if defined(DEBUG) const ivec2 pageI = ivec2((page.xy*0.5+0.5)*VSM_PAGE_TBL_SIZE); // int mip = 0; diff --git a/shader/virtual_shadow/vsm_fog_epipolar.comp b/shader/virtual_shadow/vsm_fog_epipolar.comp new file mode 100644 index 000000000..684a6e989 --- /dev/null +++ b/shader/virtual_shadow/vsm_fog_epipolar.comp @@ -0,0 +1,193 @@ +#version 450 + +#extension GL_GOOGLE_include_directive : enable +#extension GL_ARB_separate_shader_objects : enable +#extension GL_EXT_samplerless_texture_functions : enable +#extension GL_EXT_control_flow_attributes : enable + +#include "virtual_shadow/vsm_common.glsl" +#include "scene.glsl" +#include "common.glsl" + +struct Epipole { + vec2 rayOrig; + vec2 rayDir; + float tMin; + float tMax; + }; + +layout(local_size_x = 256) in; + +const uint NumThreads = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z; + +layout(binding = 0, r32ui) uniform uimage2D result; +//layout(binding = 0, rgba8) uniform image2D result; +layout(binding = 1, rgba8) uniform image2D rayData; +layout(binding = 2, std140) uniform UboScene { + SceneDesc scene; + }; +layout(binding = 3, std430) buffer Ep0 { Epipole epipole[]; }; +layout(binding = 4) uniform texture2D depth; +layout(binding = 5) uniform utexture3D pageTbl; +layout(binding = 6) uniform texture2D pageData; + +const float dFogMin = 0; +const float dFogMax = 0.9999; + uint NumSamples = imageSize(rayData).x; + uint NumSlices = imageSize(rayData).y; + ivec2 fragCoord = ivec2(gl_GlobalInvocationID.xy); + ivec2 viewportSize = textureSize(depth, 0); + +shared vec2 rayOrig, rayDir; +shared uint tMinUint, tMaxUint; +shared uint tDbg; + +vec4 dbgColor = vec4(0,1,0,0); + +float interleavedGradientNoise() { + return interleavedGradientNoise(fragCoord.xy); + } + +vec3 screenToShadow(vec3 scr) { + //TODO: optimize + const vec4 pos = scene.viewProjectLwcInv *vec4(scr,1); + const vec4 shPos = scene.viewVirtualShadowLwc*pos; + return shPos.xyz/shPos.w; + } + +vec3 wrldToShadow(vec3 pos) { + const vec4 shPos = scene.viewVirtualShadow*vec4(pos,1); + return shPos.xyz/shPos.w; + } + +vec2 sunPosition2d() { + vec3 sun = scene.sunDir; + sun = (scene.viewProject*vec4(sun,0)).xyz; + sun.xy /= sun.z; + return (sun.xy*0.5+0.5);// * viewportSize; + } + +vec2 rayPosition2d() { + const float ratio = float(viewportSize.x)/float(viewportSize.x+viewportSize.y); + const uint hor = int(0.5*ratio*NumSlices); + const uint vert = int(NumSlices)/2 - hor; + + uint rayId = gl_WorkGroupID.x; + + if(rayId1) { + vec2 dvec = sun - rpos; + float k = (1 - rpos.x)/dvec.x; + sun = rpos + dvec*k; + } + if(sun.y<0) { + vec2 dvec = sun - rpos; + float k = (0 - rpos.y)/dvec.y; + sun = rpos + dvec*k; + } + if(sun.y>1) { + vec2 dvec = sun - rpos; + float k = (1 - rpos.y)/dvec.y; + sun = rpos + dvec*k; + } + + trace(rpos, sun); + } diff --git a/shader/virtual_shadow/vsm_fog_sample.comp b/shader/virtual_shadow/vsm_fog_sample.comp new file mode 100644 index 000000000..9258839d3 --- /dev/null +++ b/shader/virtual_shadow/vsm_fog_sample.comp @@ -0,0 +1,200 @@ +#version 450 + +#extension GL_GOOGLE_include_directive : enable +#extension GL_ARB_separate_shader_objects : enable +#extension GL_EXT_samplerless_texture_functions : enable +#extension GL_EXT_control_flow_attributes : enable + +#include "virtual_shadow/vsm_common.glsl" +#include "scene.glsl" +#include "common.glsl" + +struct Epipole { + vec2 rayOrig; + vec2 rayDir; + float tMin; + float tMax; + }; + +layout(local_size_x = 8, local_size_y = 8) in; + +layout(binding = 0, r32ui) uniform uimage2D result; +//layout(binding = 0, rgba8) uniform image2D result; +layout(binding = 1, rgba8) uniform image2D rayData; +layout(binding = 2, std140) uniform UboScene { + SceneDesc scene; + }; +layout(binding = 3, std430) readonly buffer Ep0 { Epipole epipole[]; }; +layout(binding = 4) uniform texture2D depth; + +const float dFogMin = 0; +const float dFogMax = 0.9999; + uint NumSamples = imageSize(rayData).x; + uint NumSlices = imageSize(rayData).y; + ivec2 fragCoord = ivec2(gl_GlobalInvocationID.xy); + ivec2 viewportSize = textureSize(depth, 0); + +vec4 dbgColor = vec4(0); + +float interleavedGradientNoise() { + return interleavedGradientNoise(fragCoord.xy); + } + +vec3 project(mat4 m, vec3 pos) { + vec4 p = m*vec4(pos,1); + return p.xyz/p.w; + } + +vec3 screenToShadow(vec3 scr) { + //TODO: optimize + const vec4 pos = scene.viewProjectLwcInv *vec4(scr,1); + const vec4 shPos = scene.viewVirtualShadowLwc*pos; + return shPos.xyz/shPos.w; + } + +vec3 wrldToShadow(vec3 pos) { + const vec4 shPos = scene.viewVirtualShadow*vec4(pos,1); + return shPos.xyz/shPos.w; + } + +vec2 sunPosition2d() { + vec3 sun = scene.sunDir; + sun = (scene.viewProject*vec4(sun,0)).xyz; + sun.xy /= sun.z; + return sun.xy; + } + +vec2 rayOrigin() { + vec2 sun = sunPosition2d(); + vec2 rpos = vec2(fragCoord)/vec2(viewportSize); + rpos = rpos*2.0 - 1.0; + + vec2 dv = rpos - sun; + vec2 ret = vec2(0); + { + float d = (dv.x<0 ? -1 : +1) - sun.x; + ret = sun + dv*abs(d/dv.x); + if(-1<=ret.y && ret.y<=1) + return ret; + } + { + float d = (dv.y<0 ? -1 : +1) - sun.y; + ret = sun + dv*abs(d/dv.y); + if(-1<=ret.x && ret.x<=1) + return ret; + } + return vec2(-1); + } + +uint sliceId() { + const vec2 src = rayOrigin(); + const vec2 uv = src*0.5+0.5; + const float ratio = float(viewportSize.x)/float(viewportSize.x+viewportSize.y); + const uint hor = int(0.5*ratio*NumSlices); + const uint vert = int(NumSlices)/2 - hor; + + uint rayId = 0; + if(src.x < src.y && src.x > -src.y) { + // bottom + dbgColor = vec4(0,1,0,0); + return rayId + uint(uv.x * hor); + } + rayId += hor; + + if(src.x > src.y && src.x < -src.y) { + // top + dbgColor = vec4(1,0,0,0); + return rayId + uint(uv.x * hor); + } + rayId += hor; + + if(src.x < src.y && src.x < -src.y) { + // left + dbgColor = vec4(0,0,1,0); + return rayId + uint(uv.y * vert); + } + rayId += vert; + + // right + dbgColor = vec4(1,0,1,0); + return rayId + uint(uv.y * vert); + } + +void trace(const vec2 inPos, const float z, ivec2 pix, const uint sliceId) { + const int steps = 32; + const float noise = interleavedGradientNoise()/steps; + + const vec3 shPos0 = screenToShadow(vec3(inPos.xy,0)); + const vec3 shPos1 = screenToShadow(vec3(inPos.xy,z)); + + const Epipole e = epipole[sliceId]; + const float tMin = e.tMin; + const float tMax = e.tMax; + const vec2 rayA = e.rayOrig; + const vec2 rayDir = e.rayDir; + + uint occlusion = 0; + [[dont_unroll]] + for(uint i=0; i=NumSamples) + return; + + vec2 rayOrig = epipole[sliceId].rayOrig; + vec2 rayDir = epipole[sliceId].rayDir; + float tMin = epipole[sliceId].tMin; + float tMax = epipole[sliceId].tMax; + + const float a = (i+0.5)/float(NumSamples); + const float t = tMin + a*(tMax-tMin); + const vec2 shPos = rayOrig + t*rayDir; + const float sh = shadowTexelFetch(shPos); + + imageStore(rayData, ivec2(i, sliceId), vec4(sh)); + } diff --git a/shader/virtual_shadow/vsm_list_pages.comp b/shader/virtual_shadow/vsm_list_pages.comp index 5e56ea661..27c8d631a 100644 --- a/shader/virtual_shadow/vsm_list_pages.comp +++ b/shader/virtual_shadow/vsm_list_pages.comp @@ -8,27 +8,71 @@ #include "scene.glsl" #include "common.glsl" -//layout(local_size_x = 64) in; -layout(local_size_x = 8, local_size_y = 8) in; +layout(local_size_x = 32, local_size_y = 32) in; layout(binding = 0, std430) buffer Pages { VsmHeader header; uint pageList[]; } vsm; -layout(binding = 1, r32ui) uniform uimage3D pageTbl; -layout(binding = 2, r32ui) uniform readonly uimage3D pageTblDepth; -//layout(binding = 1, std430) writeonly buffer Dst { VsmHeader header; uint pageList[]; } dst; +layout(binding = 1, r32ui) uniform uimage3D pageTbl; +layout(binding = 2, r32ui) uniform uimage2D dbg; const uint NumThreads = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z; -void main() { - const ivec3 at = ivec3(gl_GlobalInvocationID.xyz); +shared uint pageListPrefix; +shared uint bits[(NumThreads+31)/32]; + +ivec2 loadPageSize(ivec3 at) { + uint a = imageLoad(pageTbl, at).r; + a = a >> 1; + return ivec2(a, a >> 4) & 0xF; + } + +uint bitCountShared(uint till) { + uint n = till/32; + uint f = till%32; + uint r = 0; + for(uint i=0; i0) + atomicOr(bits[lane/32], 1u<<(lane%32)); + barrier(); + + if(sz.x>0) { + const uint id = pageListPrefix + bitCountShared(lane); + vsm.pageList[id] = packVsmPageInfo(at, sz); + } + // barrier(); //NOTE: very slow on mac + } + +void main() { + const uint lane = gl_LocalInvocationIndex; + const uint mip = gl_WorkGroupID.x; + + pageListPrefix = 0; + barrier(); + + if(lane 0.0) + // return; + vec3 page = vec3(0); int mip = 0; if(!calcMipIndex(page, mip, z, normal)) return; + //if(mip>6) + // return; ivec2 pageI = ivec2((page.xy*0.5+0.5)*VSM_PAGE_TBL_SIZE); markPage(ivec3(pageI, mip), page.z); }