Skip to content

Commit

Permalink
vsm in progress
Browse files Browse the repository at this point in the history
  • Loading branch information
Try committed Oct 6, 2024
1 parent 53338f8 commit 94e4ed0
Show file tree
Hide file tree
Showing 5 changed files with 89 additions and 84 deletions.
73 changes: 38 additions & 35 deletions shader/virtual_shadow/vsm_alloc_pages.comp
Original file line number Diff line number Diff line change
Expand Up @@ -24,22 +24,24 @@ shared uint bits[(NumThreads+31)/32];
shared uint ladder [VSM_PAGE_TBL_SIZE];
shared uint ladderWr[VSM_PAGE_TBL_SIZE];

uint loadPageSize(ivec3 at) {
ivec2 loadPageSize(ivec3 at) {
uint a = imageLoad(pageTbl, at).r;
return a >> 1;
a = a >> 1;
return ivec2(a, a >> 4) & 0xF;
}

void storePage(ivec2 pId, ivec3 at, ivec2 tile, uint size) {
void storePage(ivec2 pId, ivec3 at, ivec2 tile, ivec2 size) {
const uint pageId = pId.x + pId.y*VSM_PAGE_PER_ROW;

vsm.pageList[pageId] = packVsmPageInfo(at.xyz, ivec2(size));
vsm.pageList[pageId] = packVsmPageInfo(at, size);
atomicMax(vsm.header.pageCount, pageId+1);

at += ivec3(tile,0);
const uint prime = imageLoad(pageTbl, at+ivec3(tile,0)).r & 0x1;
const uint prime = imageLoad(pageTbl, at).r & 0x1;
imageStore(pageTbl, at, uvec4((pageId << 16u) | prime));

imageStore(dbg, pId, uvec4(size));
imageStore(dbg, pId, uvec4(size.y));
//imageStore(dbg, pId, uvec4(1));
}

uint bitCountShared(uint till) {
Expand All @@ -59,44 +61,44 @@ void listPages() {
pageListSize = 0;

for(int i=0; i<1111 && i<size.z; ++i) {
const uint frag = loadPageSize(ivec3(at.xy, i)).r;
const ivec2 sz = loadPageSize(ivec3(at.xy, i));
if(lane < bits.length())
bits[lane] = 0;
barrier();

if(frag>0)
if(sz.x>0)
atomicOr(bits[lane/32], 1u<<(lane%32));
barrier();

if(frag>0) {
uint id = pageListSize + bitCountShared(lane);
pageList[id] = packVsmPageInfo(ivec3(at.xy, i), ivec2(frag));
if(sz.x>0) {
uint id = pageListSize + bitCountShared(lane);
pageList[id] = packVsmPageInfo(ivec3(at.xy, i), sz);
}
barrier();

if(frag>0)
if(sz.x>0)
atomicAdd(pageListSize, 1);
barrier();
}
}

ivec2 findRowCol(in uint page, const uint size) {
//const uint perRow = 32/size;
//const uint row = (page/perRow)*size;
//const uint col = (page%perRow)*size;
//return ivec2(col,row);
ivec2 findRowCol(in uint page, const uvec2 size) {
// const uint perRow = 32/size.x;
// const uint row = (page/perRow)*size.y;
// const uint col = (page%perRow)*size.x;
// return ivec2(col,row);

for(uint i=0; i<32; i+=size) {
uint space = (32-ladder[i])/size;
for(uint i=0; i<32; i+=size.y) {
uint space = (32-ladder[i])/size.x;
if(page<space)
return ivec2(ladder[i]+page*size, i);
return ivec2(ladder[i] + page*size.x, i);
page -= space;
}
// error
return ivec2(-1);
}

void layoutPages(uint pageData, uint pageSz, uint size) {
void layoutPages(uint pageData, uvec2 pageSz, uvec2 size) {
const uint lane = gl_LocalInvocationIndex;

pageListSize = 0;
Expand All @@ -117,20 +119,20 @@ void layoutPages(uint pageData, uint pageSz, uint size) {
}
barrier();

const uint perRow = VSM_PAGE_TBL_SIZE/size;
const uint maxPix = pageListSize*(size*size);
// const uint maxPix = 2*(size*size);
const uint perRow = VSM_PAGE_TBL_SIZE/size.x;
const uint maxPix = pageListSize*(size.x*size.y);
// const uint maxPix = 1*(size);
for(uint i=lane; i<maxPix; i+=NumThreads) {
const uint page = i/(size*size);
const uint tile = i%(size*size);
const uint page = i / (size.x*size.y);
const uint tile = i % (size.x*size.y);
const ivec2 rc = findRowCol(page, size);
if(rc.x<0)
break;

const ivec3 at = unpackVsmPageInfo(pageList[page]);
const ivec2 tx = ivec2(tile%size, tile/size);
storePage(rc+tx, at, tx, tile==0 ? size : 0);
atomicMax(ladderWr[rc.y + tx.y], rc.x + tx.x+1);
const ivec2 tx = ivec2(tile/size.y, tile%size.y);
storePage(rc+tx, at, tx, tile==0 ? ivec2(size) : ivec2(0));
atomicMax(ladderWr[rc.y+tx.y], rc.x + tx.x + 1);
}
barrier();
}
Expand All @@ -153,12 +155,13 @@ void main() {

listPages();

const uint frag = lane<pageListSize ? pageList[lane] : 0;
const uint size = unpackVsmPageSize(frag).y;
const uint frag = lane<pageListSize ? pageList[lane] : 0;
const uvec2 size = unpackVsmPageSize(frag);
barrier();

//layoutPages(frag, size, 8);
layoutPages(frag, size, 4);
layoutPages(frag, size, 2);
layoutPages(frag, size, 1);
layoutPages(frag, size, uvec2(4));
layoutPages(frag, size, uvec2(1, 4));
layoutPages(frag, size, uvec2(2));
layoutPages(frag, size, uvec2(1, 2));
layoutPages(frag, size, uvec2(1));
}
65 changes: 32 additions & 33 deletions shader/virtual_shadow/vsm_clump_pages.comp
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,16 @@ shared uint minX [gl_WorkGroupSize.y/4];
shared uint sWidth[gl_WorkGroupSize.y/4];
shared uint data[VSM_PAGE_TBL_SIZE+3][VSM_PAGE_TBL_SIZE+3];

uint packPage(uint samples, uint size, uint prime) {
return (samples & 0xFF) | (size << 8) | (prime << 16);
uint packPage(uint samples, uvec2 size, uint prime) {
return (samples & 0xFF) | (size.x << 8) | (size.y << 12) | (prime << 16);
}

uint packPage(uint samples, uint size) {
return (samples & 0xFF) | (size << 8);
uint packPage(uint samples, uvec2 size) {
return (samples & 0xFF) | (size.x << 8) | (size.y << 12);
}

uint unpackSize(uint a){
return (a >> 8) & 0xFF;
uvec2 unpackSize(uint a){
return uvec2((a >> 8) & 0xF, (a >> 12) & 0xF);
}

uint unpackSamples(uint a){
Expand All @@ -49,7 +49,7 @@ void poolPageTable() {
if(all(lessThan(at, size))) {
const uint frag = imageLoad(pageTbl, at).r;
//data[at.x][at.y] = frag;
data[at.x][at.y] = packPage(frag, frag, frag);
data[at.x][at.y] = packPage(frag, uvec2(frag), frag);
} else {
data[at.x][at.y] = 0;
}
Expand All @@ -66,11 +66,10 @@ void storePageTable() {
const ivec3 id = ivec3(gl_LocalInvocationID);

if(all(lessThan(at, size))) {
uint d = data[at.x][at.y];
uint p = unpackPrimeBit(d);
//uint s = p>0 ? unpackSize(d) : 0;
uint s = unpackSize(d);
imageStore(pageTbl, at, uvec4((s<<1) | p) );
uint d = data[at.x][at.y];
uint p = unpackPrimeBit(d);
uvec2 s = uvec2(unpackSize(d));
imageStore(pageTbl, at, uvec4(p | (s.x << 1) | (s.y << 5)));
}
}

Expand All @@ -81,10 +80,10 @@ void main() {
const uint lane = gl_LocalInvocationIndex;

poolPageTable();
// if(true) {
// storePageTable();
// return;
// }
#if 0
storePageTable();
return;
#endif

if(lane < minX.length()) {
minX [lane] = 0xFFFF;
Expand Down Expand Up @@ -116,23 +115,23 @@ void main() {
barrier();

const bool valid = (0<=tileCoord.x && tileCoord.x<sWidth[stripe] && 0<=tileCoord.y);
const bool quad = (valid && tileCoord.x%2==0 && tileCoord.y%2==0); // && at.x<size.x-1 && at.y<size.y-1);
const bool bQuad = (valid && tileCoord.x%4==0 && tileCoord.y%4==0); // && at.x<size.x-2 && at.y<size.y-2);
const bool quad = (valid && tileCoord.x%2==0 && tileCoord.y%2==0);
const bool bQuad = (valid && tileCoord.x%4==0 && tileCoord.y%4==0);
if(true && quad) {
uint a = unpackPrimeBit(data[at.x+0][at.y+0]);
uint b = unpackPrimeBit(data[at.x+1][at.y+0]);
uint c = unpackPrimeBit(data[at.x+1][at.y+1]);
uint d = unpackPrimeBit(data[at.x+0][at.y+1]);
data[at.x][at.y] = packPage(a+b+c+d, 2);
data[at.x][at.y] = packPage(a+b+c+d, uvec2(2));
}
barrier();

if(!quad)
data[at.x][at.y] = packPage(0,0);
data[at.x][at.y] = packPage(0, uvec2(0));
barrier();

if(true && bQuad) {
const int mergeQ = 5; //(at.z>4 ? 2 : 3)*2;
const int mergeQ = 5;
uint a = unpackSamples(data[at.x+0][at.y+0]);
uint b = unpackSamples(data[at.x+2][at.y+0]);
uint c = unpackSamples(data[at.x+2][at.y+2]);
Expand All @@ -141,25 +140,25 @@ void main() {

bool denie = (a==0 && b==0) || (c==0 && d==0); // horizontal-only patterns
if(!denie && s >= mergeQ) {
data[at.x+0][at.y+0] = packPage(s, 4);
data[at.x+2][at.y+0] = packPage(0, 0);
data[at.x+0][at.y+2] = packPage(0, 0);
data[at.x+2][at.y+2] = packPage(0, 0);
data[at.x+0][at.y+0] = packPage(s, uvec2(4));
data[at.x+2][at.y+0] = packPage(0, uvec2(0));
data[at.x+0][at.y+2] = packPage(0, uvec2(0));
data[at.x+2][at.y+2] = packPage(0, uvec2(0));
}
}
barrier();

// prime bit
{
uint d = unpackSamples(data[at.x][at.y]);
uint s = unpackSize (data[at.x][at.y]);
data[at.x][at.y] = packPage(d ,s, prime ? 1 : 0);
uint d = unpackSamples(data[at.x][at.y]);
uvec2 s = unpackSize (data[at.x][at.y]);
data[at.x][at.y] = packPage(d, s, prime ? 1 : 0);
}
barrier();

// demote unfilled quads to 1x1 pages
if(true && quad) {
uint q = unpackSize (data[at.x][at.y]);
uint q = unpackSize (data[at.x][at.y]).x;
uint a = unpackPrimeBit(data[at.x+0][at.y+0]);
uint b = unpackPrimeBit(data[at.x+1][at.y+0]);
uint c = unpackPrimeBit(data[at.x+1][at.y+1]);
Expand All @@ -168,10 +167,10 @@ void main() {

bool denie = (a==0 && b==0) || (c==0 && d==0); // horizontal-only patterns
if(q==2 && (denie || s<2)) {
data[at.x+0][at.y+0] = packPage(1, a, a);
data[at.x+1][at.y+0] = packPage(1, b, b);
data[at.x+1][at.y+1] = packPage(1, c, c);
data[at.x+0][at.y+1] = packPage(1, d, d);
data[at.x+0][at.y+0] = packPage(1, uvec2(a), a);
data[at.x+1][at.y+0] = packPage(1, uvec2(b), b);
data[at.x+1][at.y+1] = packPage(1, uvec2(c), c);
data[at.x+0][at.y+1] = packPage(1, uvec2(d), d);
}
}

Expand Down
6 changes: 4 additions & 2 deletions shader/virtual_shadow/vsm_cluster_task.comp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ layout(binding = 2, std430) readonly buffer Mem { uint instanceMem[]; };
layout(binding = 4, std430) buffer IndirectBuf { IndirectCmd cmd[]; };
layout(binding = 5, std430) readonly buffer Cbo { Cluster clusters[]; };

layout(binding = 6, r32ui) uniform uimage3D pageTbl;
layout(binding = 6, r32ui) uniform uimage3D pageTblDepth;
layout(binding = 7, std430) buffer Pages { VsmHeader header; uint pageList[]; } vsm;

layout(push_constant, std430) uniform UboPush {
Expand Down Expand Up @@ -248,7 +248,7 @@ bool pageHiZTest(vec4 aabb, float depthMax, const ivec3 at, const ivec2 sz) {

for(int x=iaabb.x; x<iaabb.z; ++x)
for(int y=iaabb.y; y<iaabb.w; ++y) {
const uint p = imageLoad(pageTbl, ivec3(x,y,at.z)).x;
const uint p = imageLoad(pageTblDepth, ivec3(x,y,at.z)).x;
const float f = uintBitsToFloat(p);

// if(p!=0xFFFFFFFF)
Expand Down Expand Up @@ -283,6 +283,8 @@ void runCluster(const uint clusterId) {

const ivec3 page = unpackVsmPageInfo(data);
const ivec2 sz = unpackVsmPageSize(data);
if(page.z>4)
;//continue;
if(!pageBoundsTest(aabb, page, sz))
continue;
if(!pageHiZTest(aabb, depthMax, page, sz))
Expand Down
2 changes: 1 addition & 1 deletion shader/virtual_shadow/vsm_mark_pages.comp
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#extension GL_GOOGLE_include_directive : enable
#extension GL_ARB_separate_shader_objects : enable
#extension GL_EXT_samplerless_texture_functions : enable
#extension GL_KHR_memory_scope_semantics : enable
// #extension GL_KHR_memory_scope_semantics : enable

#define LWC 1

Expand Down
27 changes: 14 additions & 13 deletions shader/virtual_shadow/vsm_merge_pages.comp
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ void provokingPage(uint lane) {
const uint row = lane/VSM_PAGE_TBL_SIZE;
const uint col = lane%VSM_PAGE_TBL_SIZE;
const uint p = vsm.pageList[lane];
const uint sz = unpackVsmPageSize(p).x;
const uint sz = unpackVsmPageSize(p).y;
const ivec3 at = unpackVsmPageInfo(p);
if(sz<=0)
return;
Expand All @@ -39,7 +39,7 @@ void provokingPage(uint lane) {
//return;

const uint p1 = vsm.pageList[lane-sz.x];
const uint sz1 = unpackVsmPageSize(p1).x;
const uint sz1 = unpackVsmPageSize(p1).y;
const ivec3 at1 = unpackVsmPageInfo(p1);

if(sz==sz1 && at1+ivec3(sz,0,0)==at)
Expand All @@ -51,6 +51,7 @@ void provokingPage(uint lane) {

void main() {
const uint lane = gl_LocalInvocationIndex;
// return;

pageListSize = 0;
barrier();
Expand All @@ -60,7 +61,7 @@ void main() {
provokingPage(lane);
barrier();

//vsm.header.counterM = pageListSize;
atomicExchange(vsm.header.counterM, pageListSize);
if(lane < pageListSize) {
//uint id = pageList[lane];
//vsm.pageList[id] = 0;
Expand All @@ -75,27 +76,27 @@ void main() {
const uint p0 = vsm.pageList[pId];
const uint off = row*VSM_PAGE_TBL_SIZE;

uint sz = unpackVsmPageSize(p0).x;
uvec2 sz = unpackVsmPageSize(p0);
ivec3 at = unpackVsmPageInfo(p0);
uint size = sz;
for(uint i=col+sz; i<VSM_PAGE_TBL_SIZE; i+=sz) {
uint size = sz.x;
for(uint i=col+sz.x; i<VSM_PAGE_TBL_SIZE; i+=sz.x) {
const uint pId1 = off+i;
const uint p1 = vsm.pageList[pId1];
const uint sz1 = unpackVsmPageSize(p1).x;
const uvec2 sz1 = unpackVsmPageSize(p1);
const ivec3 at1 = unpackVsmPageInfo(p1);
if(sz!=sz1 || at+uvec3(size,0,0)!=at1)
if(sz.y!=sz1.y || at+uvec3(size,0,0)!=at1)
break;
if(size+sz>=16) {
if(size+sz.x>=16) {
// restart stripe
vsm.pageList[pId] = packVsmPageInfo(at, ivec2(size, sz));
vsm.pageList[pId] = packVsmPageInfo(at, ivec2(size, sz.y));
pId = pId1;
sz = sz1;
at = at1;
size = sz1;
size = sz1.x;
continue;
}
vsm.pageList[pId1] = 0;
size += sz;
size += sz.x;
}
vsm.pageList[pId] = packVsmPageInfo(at, ivec2(size, sz));
vsm.pageList[pId] = packVsmPageInfo(at, ivec2(size, sz.y));
}

0 comments on commit 94e4ed0

Please sign in to comment.