Skip to content

Commit

Permalink
vsm: page hiz culling
Browse files Browse the repository at this point in the history
  • Loading branch information
Try committed Oct 1, 2024
1 parent f2ad5f2 commit c4e906d
Show file tree
Hide file tree
Showing 7 changed files with 84 additions and 27 deletions.
2 changes: 1 addition & 1 deletion game/gothic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ Gothic::Gothic() {
opts.doBindless = CommandLine::inst().isBindless();
}

if(hasBindless() && gpu.compute.maxInvocations>=1024 && gpu.memoryModel) {
if(hasBindless() && gpu.compute.maxInvocations>=1024) {
//TODO: productize
opts.doVirtualShadow = CommandLine::inst().isVirtualShadow();
}
Expand Down
2 changes: 1 addition & 1 deletion lib/Tempest
Submodule Tempest updated 1 files
+1 −1 appveyor.yml
1 change: 1 addition & 0 deletions shader/virtual_shadow/vsm_clear.comp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ void main() {
return;

imageStore(pageTbl, at, uvec4(0));
// imageStore(pageTblDepth, at, uvec4(floatBitsToUint(1)));
imageStore(pageTblDepth, at, uvec4(0xFFFFFFFF));

if(at==ivec3(0)) {
Expand Down
4 changes: 4 additions & 0 deletions shader/virtual_shadow/vsm_clump_pages.comp
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,10 @@ void main() {
const uint lane = gl_LocalInvocationIndex;

poolPageTable();
// if(true) {
// storePageTable();
// return;
// }

if(lane < minX.length()) {
minX [lane] = 0xFFFF;
Expand Down
46 changes: 34 additions & 12 deletions shader/virtual_shadow/vsm_cluster_task.comp
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ shared uint vsmMipMask;
shared uint anyPageGlob;
#endif

bool projectSphere(const vec4 sphere, out vec4 aabb, out float depthMin) {
bool projectSphere(const vec4 sphere, out vec4 aabb, out float depthMax) {
const vec3 c = (scene.viewProject * vec4(sphere.xyz, 1)).xyz;
const float R = sphere.w;

Expand All @@ -65,7 +65,7 @@ bool projectSphere(const vec4 sphere, out vec4 aabb, out float depthMin) {
float Rx = R * smWidthInv;
float Ry = R * smWidthInv;
aabb = vec4(c.xy-vec2(Rx,Ry), c.xy+vec2(Rx,Ry));
depthMin = c.z + Rz;
depthMax = c.z + Rz;
// aabb = c.xyxy;
return true;
}
Expand All @@ -84,11 +84,11 @@ bool frustrumTest(const vec4 sphere) {
}


bool projectCluster(const Cluster cluster, out vec4 aabb, out float depthMin) {
bool projectCluster(const Cluster cluster, out vec4 aabb, out float depthMax) {
if(cluster.instanceId==0xFFFFFFFF)
return projectSphere(cluster.sphere, aabb, depthMin);
return projectSphere(cluster.sphere, aabb, depthMin);
// return projectInstance(cluster, aabb, depthMin);
return projectSphere(cluster.sphere, aabb, depthMax);
return projectSphere(cluster.sphere, aabb, depthMax);
// return projectInstance(cluster, aabb, depthMax);
}

bool emitCluster(const Cluster cluster, uint page) {
Expand Down Expand Up @@ -236,6 +236,30 @@ void pullPages(uint mip) {
barrier();
}

bool pageHiZTest(vec4 aabb, float depthMax, const ivec3 at, const ivec2 sz) {
aabb /= (1u << at.z);
aabb = aabb*0.5+0.5; // [0..1]
aabb = aabb*VSM_PAGE_TBL_SIZE;

ivec4 iaabb = ivec4(ivec2(floor(aabb.xy)), ivec2(ceil(aabb.zw))+1);
iaabb.xy = max(iaabb.xy, ivec2(at.xy));
iaabb.zw = min(iaabb.zw, ivec2(at.xy+sz));
iaabb.zw = min(iaabb.zw, ivec2(VSM_PAGE_TBL_SIZE));

for(int x=iaabb.x; x<iaabb.z; ++x)
for(int y=iaabb.y; y<iaabb.w; ++y) {
const uint p = imageLoad(pageTbl, ivec3(x,y,at.z)).x;
const float f = uintBitsToFloat(p);

// if(p!=0xFFFFFFFF)
// return true;
if(f<depthMax)
return true;
}

return false;
}

void runCluster(const uint clusterId) {
const Cluster cluster = clusters[clusterId];
if(cluster.sphere.w<=0.f)
Expand All @@ -249,8 +273,8 @@ void runCluster(const uint clusterId) {
}

vec4 aabb = vec4(0);
float depthMin = 1;
if(!projectCluster(cluster, aabb, depthMin))
float depthMax = 1;
if(!projectCluster(cluster, aabb, depthMax))
return;

for(uint ix=0; ix<pageListSize; ++ix) {
Expand All @@ -261,10 +285,8 @@ void runCluster(const uint clusterId) {
const ivec2 sz = unpackVsmPageSize(data);
if(!pageBoundsTest(aabb, page, sz))
continue;
const uint p = imageLoad(pageTbl, ivec3(page)).x;
const float f = uintBitsToFloat(p);
if(depthMin < f)
;//continue;
if(!pageHiZTest(aabb, depthMax, page, sz))
continue;
if(!emitCluster(cluster, i))
break;
}
Expand Down
34 changes: 34 additions & 0 deletions shader/virtual_shadow/vsm_list_pages.comp
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#version 450

#extension GL_GOOGLE_include_directive : enable
#extension GL_ARB_separate_shader_objects : enable
#extension GL_EXT_samplerless_texture_functions : enable

#include "virtual_shadow/vsm_common.glsl"
#include "scene.glsl"
#include "common.glsl"

//layout(local_size_x = 64) in;
layout(local_size_x = 8, local_size_y = 8) in;

layout(binding = 0, std430) buffer Pages { VsmHeader header; uint pageList[]; } vsm;
layout(binding = 1, r32ui) uniform uimage3D pageTbl;
layout(binding = 2, r32ui) uniform readonly uimage3D pageTblDepth;
//layout(binding = 1, std430) writeonly buffer Dst { VsmHeader header; uint pageList[]; } dst;

const uint NumThreads = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;

void main() {
const ivec3 at = ivec3(gl_GlobalInvocationID.xyz);
const ivec3 size = imageSize(pageTbl);
if(any(greaterThanEqual(at, size)))
return;

uint v = imageLoad(pageTbl, at).x;
if(v==0)
return;

uint i = atomicAdd(vsm.header.pageCount, 1);
vsm.pageList[i] = 0xFFFFFFFF;
imageStore(pageTbl, at, uvec4(i));
}
22 changes: 9 additions & 13 deletions shader/virtual_shadow/vsm_mark_pages.comp
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,10 @@ uvec4 unpack565_16(uint p) {
return ret;
}

void storeVal(uint v) {
void storeHiZValue(uint v) {
uvec4 dx = unpack565_16(v);
ivec3 at = ivec3(dx.xyz);
uint iz = dx.w;
uint iz = floatBitsToUint(dx.w/float(0xFFFF));

//NOTE: require vulkan memory-model extension or vulkan 1.2
//imageAtomicStore(pageTbl, at, 1u, gl_ScopeDevice, gl_StorageSemanticsNone, gl_SemanticsRelaxed);
Expand All @@ -86,7 +86,7 @@ void setupHiZ() {
pageHiZ[lane] = 0xFFFFFFFF;
}

void insertHiZ(ivec3 at, float z) {
void markPage(ivec3 at, float z) {
if(z<0 || z>=1)
return;
//imageAtomicStore(pageTbl, at, 1u, gl_ScopeDevice, gl_StorageSemanticsNone, gl_SemanticsRelaxed);
Expand All @@ -101,7 +101,7 @@ void insertHiZ(ivec3 at, float z) {
cur = (v > cur) ? v : cur;
}
// imageAtomicAdd(pageTbl, ivec3(0), 1u); //counter
storeVal(cur);
storeHiZValue(cur);
}

void flushHiZ() {
Expand All @@ -111,7 +111,7 @@ void flushHiZ() {
const uint v = pageHiZ[lane];
if(v==0xFFFFFFFF)
return;
storeVal(v);
storeHiZValue(v);
}
#else
shared uint pageHiZ[NumThreads];
Expand All @@ -120,7 +120,7 @@ void setupHiZ() {
pageHiZ[lane] = 0xFFFFFFFF;
}

void insertHiZ(ivec3 at, float z) {
void markPage(ivec3 at, float z) {
if(z<0 || z>=1)
return;

Expand All @@ -136,15 +136,15 @@ void insertHiZ(ivec3 at, float z) {
return; // update same entry

// imageAtomicAdd(pageTbl, ivec3(0), 1u); //counter
storeVal(v);
storeHiZValue(v);
}

void flushHiZ() {
const uint lane = gl_LocalInvocationIndex;
const uint v = pageHiZ[lane];
if(v==0xFFFFFFFF)
return;
storeVal(v);
storeHiZValue(v);
}
#endif

Expand Down Expand Up @@ -180,10 +180,6 @@ int shadowLod(vec2 dx, vec2 dy) {
return max(0, int(minLod + bias + 0.5));
}

void markPage(ivec2 pageI, int mip, float depth) {
insertHiZ(ivec3(pageI, mip), depth);
}

void realMain() {
const ivec2 size = textureSize(depth, 0);
if(any(greaterThanEqual(pixelPos, size)))
Expand Down Expand Up @@ -213,7 +209,7 @@ void realMain() {
return;

ivec2 pageI = ivec2((page*0.5+0.5)*VSM_PAGE_TBL_SIZE);
markPage(pageI, mip, shPos0.z);
markPage(ivec3(pageI, mip), shPos0.z);
}

void main() {
Expand Down

0 comments on commit c4e906d

Please sign in to comment.