Skip to content

Commit

Permalink
vsm: update page clump heuristics
Browse files Browse the repository at this point in the history
  • Loading branch information
Try committed Sep 21, 2024
1 parent b825da6 commit bcefbc4
Show file tree
Hide file tree
Showing 9 changed files with 246 additions and 291 deletions.
39 changes: 19 additions & 20 deletions game/graphics/renderer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -188,23 +188,21 @@ void Renderer::resetSwapchain() {
cmaa2.defferedColorApplyUbo = device.descriptors(*cmaa2.defferedColorApply);

if(settings.vsmEnabled) {
vsm.pagesClearPso = &Shaders::inst().vsmClear;
vsm.uboClear = device.descriptors(*vsm.pagesClearPso);
vsm.uboClear = device.descriptors(Shaders::inst().vsmClear);
if(!vsm.pageDataCs.isEmpty())
vsm.uboClearPages = device.descriptors(Shaders::inst().vsmClearPages);

vsm.pagesMarkPso = &Shaders::inst().vsmMarkPages;
vsm.uboPages = device.descriptors(*vsm.pagesMarkPso);

vsm.pagesListPso = &Shaders::inst().vsmListPages;
vsm.uboList = device.descriptors(*vsm.pagesListPso);
vsm.uboPages = device.descriptors(Shaders::inst().vsmMarkPages );
vsm.uboClump = device.descriptors(Shaders::inst().vsmClumpPages);
vsm.uboAlloc = device.descriptors(Shaders::inst().vsmAllocPages);

vsm.directLightPso = &Shaders::inst().vsmDirectLight;
vsm.uboLight = device.descriptors(*vsm.directLightPso);

vsm.pagesDbgPso = &Shaders::inst().vsmDbg;
vsm.uboDbg = device.descriptors(*vsm.pagesDbgPso);

vsm.pageDbg = device.image2d(TextureFormat::R32U, 32, 32);
vsm.pageTbl = device.image3d(TextureFormat::R32U, 32, 32, 16);
// vsm.pageDataCs = device.image2d(TextureFormat::R32U, 4096, 4096);
vsm.pageData = device.zbuffer(shadowFormat, 4096, 4096);
Expand Down Expand Up @@ -463,8 +461,12 @@ void Renderer::prepareUniforms() {
vsm.uboPages.set(3, zbuffer, Sampler::nearest());
vsm.uboPages.set(4, vsm.pageTbl);

vsm.uboList.set(0, vsm.pageList);
vsm.uboList.set(1, vsm.pageTbl);
vsm.uboClump.set(0, vsm.pageList);
vsm.uboClump.set(1, vsm.pageTbl);

vsm.uboAlloc.set(0, vsm.pageList);
vsm.uboAlloc.set(1, vsm.pageTbl);
vsm.uboAlloc.set(2, vsm.pageDbg);

vsm.uboLight.set(0, wview->sceneGlobals().uboGlobal[SceneGlobals::V_Main]);
vsm.uboLight.set(1, gbufDiffuse, Sampler::nearest());
Expand Down Expand Up @@ -856,35 +858,32 @@ void Renderer::drawVsm(Tempest::Encoder<Tempest::CommandBuffer>& cmd, uint8_t fI
if(!settings.vsmEnabled)
return;

auto& shaders = Shaders::inst();
cmd.setFramebuffer({});
cmd.setDebugMarker("VSM-pages");
cmd.setUniforms(*vsm.pagesClearPso, vsm.uboClear);
cmd.setUniforms(shaders.vsmClear, vsm.uboClear);
cmd.dispatchThreads(size_t(vsm.pageTbl.w()), size_t(vsm.pageTbl.h()), size_t(vsm.pageTbl.d()));

cmd.setUniforms(*vsm.pagesMarkPso, vsm.uboPages);
cmd.setUniforms(shaders.vsmMarkPages, vsm.uboPages);
cmd.dispatchThreads(zbuffer.size());

if(vsm.pageDataCs.isEmpty()) {
//TODO: trimming
//cmd.setUniforms(Shaders::inst().vsmClumpPages0, vsm.uboList);
//cmd.setUniforms(shaders.vsmClumpPages0, vsm.uboClump);
//cmd.dispatch(1);

// clump
cmd.setUniforms(Shaders::inst().vsmClumpPages1, vsm.uboList);
cmd.setUniforms(shaders.vsmClumpPages, vsm.uboClump);
cmd.dispatchThreads(size_t(vsm.pageTbl.w()), size_t(vsm.pageTbl.h()), size_t(vsm.pageTbl.d()));
}

if(!vsm.pageDataCs.isEmpty()) {
cmd.setUniforms(Shaders::inst().vsmClearPages, vsm.uboClearPages);
cmd.setUniforms(shaders.vsmClearPages, vsm.uboClearPages);
cmd.dispatchThreads(size_t(vsm.pageDataCs.w()), size_t(vsm.pageDataCs.h()));
}

cmd.setUniforms(*vsm.pagesListPso, vsm.uboList);
cmd.dispatchThreads(size_t(vsm.pageTbl.w()), size_t(vsm.pageTbl.h()), size_t(vsm.pageTbl.d()));

// sort for debug purpose only
cmd.setDebugMarker("VSM-sort(debug)");
cmd.setUniforms(Shaders::inst().vsmSortPages, vsm.uboList);
// alloc
cmd.setUniforms(shaders.vsmAllocPages, vsm.uboAlloc);
cmd.dispatch(1);

cmd.setDebugMarker("VSM-visibility");
Expand Down
7 changes: 2 additions & 5 deletions game/graphics/renderer.h
Original file line number Diff line number Diff line change
Expand Up @@ -211,22 +211,19 @@ class Renderer final {
} gi;

struct {
Tempest::ComputePipeline* pagesClearPso = nullptr;
Tempest::DescriptorSet uboClear;
Tempest::DescriptorSet uboClearPages;

Tempest::ComputePipeline* pagesMarkPso = nullptr;
Tempest::DescriptorSet uboPages;

Tempest::ComputePipeline* pagesListPso = nullptr;
Tempest::DescriptorSet uboList;
Tempest::DescriptorSet uboClump, uboAlloc;

Tempest::RenderPipeline* directLightPso = nullptr;
Tempest::DescriptorSet uboLight;

Tempest::RenderPipeline* pagesDbgPso = nullptr;
Tempest::DescriptorSet uboDbg;

Tempest::StorageImage pageDbg;
Tempest::StorageImage pageTbl;
Tempest::StorageImage pageDataCs;
Tempest::ZBuffer pageData;
Expand Down
6 changes: 2 additions & 4 deletions game/graphics/shaders.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -199,10 +199,8 @@ Shaders::Shaders() {
vsmClear = computeShader("vsm_clear.comp.sprv");
vsmClearPages = computeShader("vsm_clear_pages.comp.sprv");
vsmMarkPages = computeShader("vsm_mark_pages.comp.sprv");
vsmClumpPages0 = computeShader("vsm_clump_pages0.comp.sprv");
vsmClumpPages1 = computeShader("vsm_clump_pages1.comp.sprv");
vsmListPages = computeShader("vsm_list_pages.comp.sprv");
vsmSortPages = computeShader("vsm_sort_pages.comp.sprv");
vsmClumpPages = computeShader("vsm_clump_pages.comp.sprv");
vsmAllocPages = computeShader("vsm_alloc_pages.comp.sprv");
vsmPackDraw0 = computeShader("vsm_pack_draws0.comp.sprv");
vsmPackDraw1 = computeShader("vsm_pack_draws1.comp.sprv");
vsmDirectLight = postEffect("copy", "direct_light_vsm", RenderState::ZTestMode::NoEqual);
Expand Down
5 changes: 2 additions & 3 deletions game/graphics/shaders.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,11 +76,10 @@ class Shaders {

// Virtual shadow
Tempest::ComputePipeline vsmClusterTask;
Tempest::ComputePipeline vsmClear, vsmClearPages, vsmMarkPages, vsmListPages;
Tempest::ComputePipeline vsmClumpPages0, vsmClumpPages1;
Tempest::ComputePipeline vsmClear, vsmClearPages, vsmMarkPages;
Tempest::ComputePipeline vsmClumpPages, vsmAllocPages;
Tempest::ComputePipeline vsmPackDraw0, vsmPackDraw1;
Tempest::RenderPipeline vsmDirectLight;
Tempest::ComputePipeline vsmSortPages;
Tempest::RenderPipeline vsmDbg;

Tempest::ComputePipeline vsmRendering;
Expand Down
6 changes: 2 additions & 4 deletions shader/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -303,10 +303,8 @@ add_shader(vsm_dbg.frag virtual_shadow/vsm_mark_pages.comp -DDEBUG
add_shader(vsm_mark_pages.comp virtual_shadow/vsm_mark_pages.comp -DMARK_PAGES -DLWC)
add_shader(vsm_clear.comp virtual_shadow/vsm_clear.comp)
add_shader(vsm_clear_pages.comp virtual_shadow/vsm_clear_pages.comp)
add_shader(vsm_clump_pages0.comp virtual_shadow/vsm_clump_pages.comp -DPASS0)
add_shader(vsm_clump_pages1.comp virtual_shadow/vsm_clump_pages.comp -DPASS1)
add_shader(vsm_list_pages.comp virtual_shadow/vsm_list_pages.comp)
add_shader(vsm_sort_pages.comp virtual_shadow/vsm_sort_pages.comp)
add_shader(vsm_clump_pages.comp virtual_shadow/vsm_clump_pages.comp)
add_shader(vsm_alloc_pages.comp virtual_shadow/vsm_alloc_pages.comp)
add_shader(vsm_pack_draws0.comp virtual_shadow/vsm_pack_draws.comp -DPASS0)
add_shader(vsm_pack_draws1.comp virtual_shadow/vsm_pack_draws.comp -DPASS1)
add_shader(vsm_cluster_task.comp virtual_shadow/vsm_cluster_task.comp -DVIRTUAL_SHADOW)
Expand Down
161 changes: 161 additions & 0 deletions shader/virtual_shadow/vsm_alloc_pages.comp
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
#version 450

#extension GL_GOOGLE_include_directive : enable
#extension GL_ARB_separate_shader_objects : enable
#extension GL_EXT_samplerless_texture_functions : enable

#include "virtual_shadow/vsm_common.glsl"
#include "scene.glsl"
#include "common.glsl"

layout(local_size_x = 32, local_size_y = 32) in;

layout(binding = 0, std430) buffer Pages { VsmHeader header; uint pageList[]; } vsm;
layout(binding = 1, r32ui) uniform uimage3D pageTbl;
layout(binding = 2, r32ui) uniform uimage2D dbg;

const uint NumThreads = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;

shared uint pageListSize;
shared uint pageList[VSM_MAX_PAGES];

shared uint bits[(NumThreads+31)/32];

shared uint ladder [VSM_PAGE_TBL_SIZE];
shared uint ladderWr[VSM_PAGE_TBL_SIZE];

uint loadPageSize(ivec3 at) {
uint a = imageLoad(pageTbl, at).r;
return a >> 1;
}

void storePage(ivec2 pId, ivec3 at, ivec2 tile, uint size) {
const uint pageId = pId.x + pId.y*VSM_PAGE_PER_ROW;

vsm.pageList[pageId] = packVsmPageInfo(at.xyz, ivec2(size));
atomicMax(vsm.header.pageCount, pageId+1);
imageStore(pageTbl, at+ivec3(tile,0), uvec4(pageId,0,0,0));

imageStore(dbg, pId, uvec4(size));
}

uint bitCountShared(uint till) {
uint n = till/32;
uint f = till%32;
uint r = 0;
for(uint i=0; i<n; ++i)
r += bitCount(bits[i]);
r += bitCount(bits[n] & ((1u << f)-1u));
return r;
}

void listPages() {
const ivec3 size = imageSize(pageTbl);
const uint lane = gl_LocalInvocationIndex;
const ivec3 at = ivec3(gl_GlobalInvocationID);
pageListSize = 0;

for(int i=0; i<1111 && i<size.z; ++i) {
const uint frag = loadPageSize(ivec3(at.xy, i)).r;
if(lane < bits.length())
bits[lane] = 0;
barrier();

if(frag>0)
atomicOr(bits[lane/32], 1u<<(lane%32));
barrier();

if(frag>0) {
uint id = pageListSize + bitCountShared(lane);
pageList[id] = packVsmPageInfo(ivec3(at.xy, i), ivec2(frag));
}
barrier();

if(frag>0)
atomicAdd(pageListSize, 1);
barrier();
}
}

ivec2 findRowCol(in uint page, const uint size) {
//const uint perRow = 32/size;
//const uint row = (page/perRow)*size;
//const uint col = (page%perRow)*size;
//return ivec2(col,row);

for(uint i=0; i<32; i+=size) {
uint space = (32-ladder[i])/size;
if(page<space)
return ivec2(ladder[i]+page*size, i);
page -= space;
}
// error
return ivec2(-1);
}

void layoutPages(uint pageData, uint pageSz, uint size) {
const uint lane = gl_LocalInvocationIndex;

pageListSize = 0;
if(lane < bits.length())
bits[lane] = 0;
if(lane < ladderWr.length())
ladder[lane] = ladderWr[lane];
barrier();

if(pageSz==size)
atomicOr(bits[lane/32], 1u<<(lane%32));
barrier();

if(pageSz==size) {
uint id = bitCountShared(lane);
pageList[id] = pageData;
atomicMax(pageListSize, id+1);
}
barrier();

const uint perRow = VSM_PAGE_TBL_SIZE/size;
const uint maxPix = pageListSize*(size*size);
// const uint maxPix = 2*(size*size);
for(uint i=lane; i<maxPix; i+=NumThreads) {
const uint page = i/(size*size);
const uint tile = i%(size*size);
const ivec2 rc = findRowCol(page, size);
if(rc.x<0)
break;

const ivec3 at = unpackVsmPageInfo(pageList[page]);
const ivec2 tx = ivec2(tile%size, tile/size);
storePage(rc+tx, at, tx, tile==0 ? size : 0);
atomicMax(ladderWr[rc.y + tx.y], rc.x + tx.x+1);
}
barrier();
}

void main() {
const ivec3 at = ivec3(gl_GlobalInvocationID);
const ivec3 id = ivec3(gl_LocalInvocationID);
const uint lane = gl_LocalInvocationIndex;

imageStore(dbg, at.xy, uvec4(0));
memoryBarrierImage();
barrier();

if(lane < ladderWr.length())
ladderWr[lane] = 0;
if(lane < vsm.pageList.length())
vsm.pageList[lane] = 0;
memoryBarrierBuffer();
barrier();

listPages();

const uint frag = lane<pageListSize ? pageList[lane] : 0;
const uint size = unpackVsmPageSize(frag).x;
barrier();

//layoutPages(frag, size, 8);
layoutPages(frag, size, 4);
layoutPages(frag, size, 2);
layoutPages(frag, size, 1);
}
Loading

0 comments on commit bcefbc4

Please sign in to comment.