Skip to content

Commit b146204

Browse files
committed
vsm: hiz in progress
1 parent 093137a commit b146204

File tree

3 files changed

+97
-19
lines changed

3 files changed

+97
-19
lines changed

game/gothic.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ Gothic::Gothic() {
8888
opts.doBindless = CommandLine::inst().isBindless();
8989
}
9090

91-
if(hasBindless() && gpu.compute.maxInvocations>=1024) {
91+
if(hasBindless() && gpu.compute.maxInvocations>=1024 && gpu.memoryModel) {
9292
//TODO: productize
9393
opts.doVirtualShadow = CommandLine::inst().isVirtualShadow();
9494
}

shader/CMakeLists.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -298,9 +298,9 @@ add_shader(cmaa2_deferred_color_apply_2x2.vert antialiasing/cmaa2/deferred_c
298298
add_shader(cmaa2_deferred_color_apply_2x2.frag antialiasing/cmaa2/deferred_color_apply_2x2.frag)
299299

300300
# virtual shadows
301-
add_shader(direct_light_vsm.frag virtual_shadow/vsm_mark_pages.comp -DCOMPOSE -DLWC -S frag)
302-
add_shader(vsm_dbg.frag virtual_shadow/vsm_mark_pages.comp -DDEBUG -DLWC -S frag)
303-
add_shader(vsm_mark_pages.comp virtual_shadow/vsm_mark_pages.comp -DMARK_PAGES -DLWC)
301+
add_shader(direct_light_vsm.frag virtual_shadow/vsm_mark_pages.comp -DCOMPOSE -S frag)
302+
add_shader(vsm_dbg.frag virtual_shadow/vsm_mark_pages.comp -DDEBUG -S frag)
303+
add_shader(vsm_mark_pages.comp virtual_shadow/vsm_mark_pages.comp -DMARK_PAGES)
304304
add_shader(vsm_clear.comp virtual_shadow/vsm_clear.comp)
305305
add_shader(vsm_clear_pages.comp virtual_shadow/vsm_clear_pages.comp)
306306
add_shader(vsm_clump_pages.comp virtual_shadow/vsm_clump_pages.comp)

shader/virtual_shadow/vsm_mark_pages.comp

Lines changed: 93 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,22 @@
33
#extension GL_GOOGLE_include_directive : enable
44
#extension GL_ARB_separate_shader_objects : enable
55
#extension GL_EXT_samplerless_texture_functions : enable
6+
#extension GL_KHR_memory_scope_semantics : enable
7+
8+
// #define MARK_PAGES 1
9+
// #define HIZ_PAGES 1
10+
// #define COMPOSE 1
11+
// #define DEBUG 1
12+
#define LWC 1
613

714
#include "virtual_shadow/vsm_common.glsl"
815
#include "lighting/tonemapping.glsl"
916
#include "scene.glsl"
1017
#include "common.glsl"
1118

1219
#if defined(GL_COMPUTE_SHADER)
13-
layout(local_size_x = 8, local_size_y = 8) in;
20+
//layout(local_size_x = 8, local_size_y = 8) in;
21+
layout(local_size_x = 16, local_size_y = 8) in;
1422
#endif
1523

1624
const vec3 debugColors[] = {
@@ -35,17 +43,17 @@ layout(binding = 0, std140) uniform UboScene {
3543
layout(binding = 1) uniform texture2D gbufDiffuse;
3644
layout(binding = 2) uniform utexture2D gbufNormal;
3745
layout(binding = 3) uniform texture2D depth;
38-
#if defined(MARK_PAGES)
39-
layout(binding = 4, r32ui) uniform uimage3D pageTbl;
46+
#if defined(MARK_PAGES) || defined(HIZ_PAGES)
47+
layout(binding = 4, r32ui) uniform uimage3D pageTbl;
4048
#elif defined(COMPOSE) || defined(DEBUG)
41-
layout(binding = 4) uniform utexture3D pageTbl;
49+
layout(binding = 4) uniform utexture3D pageTbl;
4250
# if defined(VSM_ATOMIC)
43-
layout(binding = 5) uniform utexture2D pageData;
51+
layout(binding = 5) uniform utexture2D pageData;
4452
# else
45-
layout(binding = 5) uniform texture2D pageData;
53+
layout(binding = 5) uniform texture2D pageData;
4654
# endif
4755
#endif
48-
layout(binding = 6, r32ui) uniform uimage3D pageTblDepth;
56+
layout(binding = 6, r32ui) uniform uimage3D pageTblDepth;
4957

5058
#if defined(COMPOSE) || defined(DEBUG)
5159
layout(binding = 7, std430) readonly buffer Pages { VsmHeader header; uint pageList[]; } vsm;
@@ -62,6 +70,58 @@ ivec2 pixelPos = ivec2(gl_GlobalInvocationID.xy);
6270
ivec2 pixelPos = ivec2(gl_FragCoord.xy);
6371
#endif
6472

73+
#if defined(GL_COMPUTE_SHADER)
74+
shared uint pageHiZ[2];
75+
76+
uint pack565_16(ivec3 a, uint b) {
77+
uint x = (a.x & 0x1F) << 0;
78+
uint y = (a.y & 0x3F) << 5;
79+
uint z = (a.z & 0x1F) << 11;
80+
uint w = ( b & 0xFFFF) << 16;
81+
//uint d = uint(z*0xFFFF) << 16;
82+
return x | y | z | w;
83+
}
84+
85+
uvec4 unpack565_16(uint p) {
86+
uvec4 ret;
87+
ret.x = (p >> 0) & 0x1F;
88+
ret.y = (p >> 5) & 0x3F;
89+
ret.z = (p >> 11) & 0x1F;
90+
ret.w = (p >> 16);
91+
return ret;
92+
}
93+
94+
void insertHiZ(ivec3 at, float z) {
95+
if(z>=1)
96+
return;
97+
//imageAtomicStore(pageTbl, at, 1u, gl_ScopeDevice, gl_StorageSemanticsNone, gl_SemanticsRelaxed);
98+
//return;
99+
100+
uint t = pack565_16(at,uint(z*0xFFFF));
101+
uint cur = packUint4x8(uvec4(at,0));
102+
for(uint i=0; i<pageHiZ.length(); ++i) {
103+
uint v = atomicMin(pageHiZ[i], cur);
104+
if(v==0xFFFFFFFF || v==cur)
105+
return;
106+
cur = (v > cur) ? v : cur;
107+
}
108+
imageAtomicStore(pageTbl, ivec3(at), 1u, gl_ScopeDevice, gl_StorageSemanticsNone, gl_SemanticsRelaxed);
109+
imageAtomicMin(pageTblDepth, ivec3(at), 1234);
110+
}
111+
112+
void flushHiZ() {
113+
const uint lane = gl_LocalInvocationIndex;
114+
if(lane >= pageHiZ.length())
115+
return;
116+
const uint v = pageHiZ[lane];
117+
if(v==0xFFFFFFFF)
118+
return;
119+
uvec3 at = unpackUint4x8(v).xyz;
120+
imageAtomicStore(pageTbl, ivec3(at), 1u, gl_ScopeDevice, gl_StorageSemanticsNone, gl_SemanticsRelaxed);
121+
imageAtomicMin(pageTblDepth, ivec3(at), 1234);
122+
}
123+
#endif
124+
65125
float drawInt(in vec2 where, in int n) {
66126
const float RESOLUTION = 0.5;
67127
int i=int((where*=RESOLUTION).y);
@@ -90,7 +150,7 @@ uint hash(uvec3 gridPos) {
90150
return (gridPos.x * 18397) + (gridPos.y * 20483) + (gridPos.z * 29303);
91151
}
92152

93-
vec4 worldPosLwc(ivec2 frag, float depth) {
153+
vec4 worldPos(ivec2 frag, float depth) {
94154
const vec2 fragCoord = ((frag.xy+0.5)*scene.screenResInv)*2.0 - vec2(1.0);
95155
const vec4 scr = vec4(fragCoord.x, fragCoord.y, depth, 1.0);
96156
#if defined(LWC)
@@ -101,7 +161,7 @@ vec4 worldPosLwc(ivec2 frag, float depth) {
101161
}
102162

103163
vec3 shadowPos(float z, ivec2 offset) {
104-
const vec4 wpos = worldPosLwc(pixelPos+offset, z);
164+
const vec4 wpos = worldPos(pixelPos+offset, z);
105165
#if defined(LWC)
106166
vec4 shPos = scene.viewVirtualShadowLwc * wpos;
107167
#else
@@ -140,9 +200,12 @@ float shadowTest(vec2 page, int mip, in float refZ, bool isATest) {
140200

141201
void markPage(ivec2 pageI, int mip, float depth) {
142202
#if defined(MARK_PAGES)
143-
//imageAtomicAdd(pageTbl, ivec3(pageI, mip), 1u);
144-
imageStore(pageTbl, ivec3(pageI, mip), uvec4(1u));
145-
//imageAtomicMin(pageTblDepth, ivec3(pageI, mip), floatBitsToUint(depth));
203+
//imageAtomicExchange(pageTbl, ivec3(pageI, mip), 1u);
204+
//imageStore(pageTbl, ivec3(pageI, mip), uvec4(1u));
205+
206+
//NOTE: require vulkan memory-model extension or vulkan 1.2
207+
//imageAtomicStore(pageTbl, ivec3(pageI, mip), 1u, gl_ScopeDevice, gl_StorageSemanticsNone, gl_SemanticsRelaxed);
208+
insertHiZ(ivec3(pageI, mip), depth);
146209
#endif
147210
}
148211

@@ -180,7 +243,7 @@ vec3 directLight(vec2 page, int mip, float refZ) {
180243
}
181244
#endif
182245

183-
void main() {
246+
void realMain() {
184247
#if defined(DEBUG)
185248
outColor = vec4(0,0,0, 1);
186249
#endif
@@ -202,7 +265,7 @@ void main() {
202265
if(z==1)
203266
return;
204267

205-
#if defined(MARK_PAGES)
268+
#if defined(MARK_PAGES) || defined(HIZ_PAGES)
206269
{
207270
const vec3 normal = normalFetch(gbufNormal, pixelPos);
208271
if(dot(scene.sunDir,normal)<=0)
@@ -226,7 +289,7 @@ void main() {
226289
return;
227290

228291
ivec2 pageI = ivec2((page*0.5+0.5)*VSM_PAGE_TBL_SIZE);
229-
#if defined(MARK_PAGES)
292+
#if defined(MARK_PAGES) || defined(HIZ_PAGES)
230293
markPage(pageI, mip, shPos0.z);
231294
#elif defined(COMPOSE)
232295
outColor = vec4(directLight(page, mip, shPos0.z), 1);
@@ -246,3 +309,18 @@ void main() {
246309
outColor = vec4(color.xyz,1);
247310
#endif
248311
}
312+
313+
void main() {
314+
#if defined(GL_COMPUTE_SHADER)
315+
const uint lane = gl_LocalInvocationIndex;
316+
if(lane < pageHiZ.length())
317+
pageHiZ[lane] = 0xFFFFFFFF;
318+
barrier();
319+
#endif
320+
321+
realMain();
322+
#if defined(GL_COMPUTE_SHADER)
323+
barrier();
324+
flushHiZ();
325+
#endif
326+
}

0 commit comments

Comments
 (0)