Skip to content

Commit

Permalink
Merge branch 'dev/adunn/cleanup_vertex_capture' into 'main'
Browse files Browse the repository at this point in the history
Vertex capture system improvements

See merge request lightspeedrtx/dxvk-remix-nv!657
  • Loading branch information
AlexDunn committed Jan 24, 2024
2 parents 90bb5a0 + e4ef2aa commit 1be52c2
Show file tree
Hide file tree
Showing 5 changed files with 46 additions and 60 deletions.
60 changes: 19 additions & 41 deletions src/d3d9/d3d9_rtx.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@ namespace dxvk {

D3D9Rtx::D3D9Rtx(D3D9DeviceEx* d3d9Device, bool enableDrawCallConversion)
: m_rtStagingData(d3d9Device->GetDXVKDevice(), (VkMemoryPropertyFlagBits) (VK_MEMORY_PROPERTY_HOST_CACHED_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT))
, m_vertexCaptureData(d3d9Device->GetDXVKDevice(),
(VkMemoryPropertyFlagBits) (VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT),
(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT),
(VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT),
VK_ACCESS_TRANSFER_READ_BIT)
, m_parent(d3d9Device)
, m_enableDrawCallConversion(enableDrawCallConversion)
, m_pGeometryWorkers(enableDrawCallConversion ? std::make_unique<GeometryProcessor>(popcnt_uint8(D3D9Rtx::kAllThreads), "geometry-processing") : nullptr) {
Expand Down Expand Up @@ -98,16 +103,6 @@ namespace dxvk {
return stagingSlice;
}

DxvkBufferSlice D3D9Rtx::allocVertexCaptureBuffer(const VkDeviceSize size) {
DxvkBufferCreateInfo info;
info.usage = VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT;
info.access = VK_ACCESS_TRANSFER_READ_BIT;
info.stages = VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT;
info.size = size;

return DxvkBufferSlice(m_parent->GetDXVKDevice()->createBuffer(info, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, DxvkMemoryStats::Category::AppBuffer));
}

void D3D9Rtx::prepareVertexCapture(const int vertexIndexOffset) {
ScopedCpuProfileZone();

Expand Down Expand Up @@ -139,16 +134,8 @@ namespace dxvk {
const uint32_t stride = sizeof(CapturedVertex);
const size_t vertexCaptureDataSize = align(geoData.vertexCount * stride, CACHE_LINE_SIZE);

DxvkBufferSlice slice = allocVertexCaptureBuffer(vertexCaptureDataSize);

// Fill in buffer view info
DxvkBufferViewCreateInfo viewInfo;
viewInfo.format = VK_FORMAT_R32G32B32A32_SFLOAT;
viewInfo.rangeOffset = 0;
viewInfo.rangeLength = vertexCaptureDataSize;

// Create underlying buffer view object
Rc<DxvkBufferView> bufferView = m_parent->GetDXVKDevice()->createBufferView(slice.buffer(), viewInfo);
DxvkBufferSlice slice = m_vertexCaptureData.alloc(CACHE_LINE_SIZE, vertexCaptureDataSize);
slice.buffer()->acquire(DxvkAccess::Write);

geoData.positionBuffer = RasterBuffer(slice, 0, stride, VK_FORMAT_R32G32B32A32_SFLOAT);
assert(geoData.positionBuffer.offset() % 4 == 0);
Expand All @@ -172,34 +159,25 @@ namespace dxvk {

auto constants = m_vsVertexCaptureData->allocSlice();

m_parent->EmitCs([cProjection = m_activeDrawCallState.transformData.viewToProjection,
cView = m_activeDrawCallState.transformData.worldToView,
cWorld = m_activeDrawCallState.transformData.objectToWorld,
cBuffer = bufferView,
cConstantBuffer = m_vsVertexCaptureData,
cConstants = constants,
vertexIndexOffset](DxvkContext* ctx) {
RtxContext* rtxCtx = static_cast<RtxContext*>(ctx);

// Set constants required for vertex shader injection
// NOTE: May be better to move reverse transformation to end of frame, because this won't work if there hasnt been a FF draw this frame to scrape the matrix from...
const Matrix4& ObjectToProjection = m_activeDrawCallState.transformData.viewToProjection * m_activeDrawCallState.transformData.worldToView * m_activeDrawCallState.transformData.objectToWorld;

// Bind the latest projection to world matrix...
// NOTE: May be better to move reverse transformation to end of frame, because this won't work if there hasnt been a FF draw this frame to scrape the matrix from...
const Matrix4& ObjectToProjection = cProjection * cView * cWorld;
// Set constants required for vertex shader injection
D3D9RtxVertexCaptureData& data = *(D3D9RtxVertexCaptureData*) constants.mapPtr;
// Apply an inverse transform to get positions in object space (what renderer expects)
data.projectionToWorld = inverse(ObjectToProjection);
data.normalTransform = m_activeDrawCallState.transformData.objectToWorld;
data.baseVertex = (uint32_t)std::max(0, vertexIndexOffset);

m_parent->EmitCs([cVertexDataSlice = slice,
cConstantBuffer = m_vsVertexCaptureData,
cConstants = constants](DxvkContext* ctx) {
// Bind the new constants to buffer
ctx->invalidateBuffer(cConstantBuffer, cConstants);

D3D9RtxVertexCaptureData& data = *(D3D9RtxVertexCaptureData*) cConstants.mapPtr;
// Apply an inverse transform to get positions in object space (what renderer expects)
data.projectionToWorld = inverse(ObjectToProjection);
data.normalTransform = cWorld;
data.baseVertex = vertexIndexOffset;

// Invalidate rest of the members
// customWorldToProjection is not invalidated as its use is controlled by D3D9SpecConstantId::CustomVertexTransformEnabled being enabled

rtxCtx->bindResourceView(getVertexCaptureBufferSlot(), nullptr, cBuffer);
ctx->bindResourceBuffer(getVertexCaptureBufferSlot(), cVertexDataSlice);
});
}

Expand Down
2 changes: 1 addition & 1 deletion src/d3d9/d3d9_rtx.h
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,7 @@ namespace dxvk {
DrawCallState m_activeDrawCallState;

DxvkStagingDataAlloc m_rtStagingData;
DxvkStagingDataAlloc m_vertexCaptureData;
D3D9DeviceEx* m_parent;

D3DPRESENT_PARAMETERS m_activePresentParams;
Expand Down Expand Up @@ -235,7 +236,6 @@ namespace dxvk {
template<typename T>
DxvkBufferSlice processIndexBuffer(const uint32_t indexCount, const uint32_t startIndex, const DxvkBufferSliceHandle& indexSlice, uint32_t& minIndex, uint32_t& maxIndex);

DxvkBufferSlice allocVertexCaptureBuffer(const VkDeviceSize size);
void prepareVertexCapture(const int vertexIndexOffset);

void processVertices(const VertexContext vertexContext[caps::MaxStreams], int vertexIndexOffset, RasterGeometry& geoData);
Expand Down
4 changes: 2 additions & 2 deletions src/d3d9/d3d9_state.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ namespace dxvk {
Matrix4 normalTransform;
Matrix4 projectionToWorld;
Matrix4 customWorldToProjection;
int baseVertex = 0;
int padding[3];
uint32_t baseVertex = 0;
std::array<uint32_t, 3> padding = { };
};

enum class D3D9RtxVertexCaptureMembers {
Expand Down
37 changes: 21 additions & 16 deletions src/dxso/dxso_compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -540,14 +540,17 @@ namespace dxvk {
}

// Bind vertex out buffer
m_module.enableCapability(spv::CapabilityStorageImageWriteWithoutFormat);
m_module.enableCapability(spv::CapabilityImageBuffer);

const uint32_t imageTypeId = m_module.defImageType(m_module.defFloatType(32), spv::DimBuffer, 0, 0, 0, 2, spv::ImageFormatRgba32f);

uint32_t arrayType = m_module.defRuntimeArrayTypeUnique(vec4Type);
uint32_t structType = m_module.defStructTypeUnique(1, &arrayType);
uint32_t ptrType = m_module.defPointerType(structType, spv::StorageClassUniform);

const uint32_t resourcePtrType = m_module.defPointerType(imageTypeId, spv::StorageClassUniformConstant);
const uint32_t resourcePtrType = m_module.defPointerType(vec4Type, spv::StorageClassUniform);
const uint32_t varId = m_module.newVar(ptrType, spv::StorageClassUniform);

const uint32_t varId = m_module.newVar(resourcePtrType, spv::StorageClassUniformConstant);
m_module.decorateArrayStride(arrayType, sizeof(float) * 4);
m_module.decorate(structType, spv::DecorationBufferBlock);
m_module.memberDecorateOffset(structType, 0, 0);

m_module.setDebugName(varId, "vertex_capture_out");

Expand All @@ -567,7 +570,7 @@ namespace dxvk {
uav.specId = specConstId;
uav.sampledType = DxsoScalarType::Float32;
uav.sampledTypeId = m_module.defFloatType(32);
uav.imageTypeId = imageTypeId;
uav.imageTypeId = resourcePtrType;
uav.structStride = 0;
uav.structAlign = 0;

Expand All @@ -577,9 +580,8 @@ namespace dxvk {
DxvkResourceSlot resource;
resource.slot = bindingId;
resource.view = VK_IMAGE_VIEW_TYPE_MAX_ENUM;
resource.access = VK_ACCESS_SHADER_READ_BIT;
resource.type = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER;
resource.access |= VK_ACCESS_SHADER_WRITE_BIT;
resource.access = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
resource.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;

m_resourceSlots.push_back(resource);
}
Expand Down Expand Up @@ -3629,8 +3631,12 @@ void DxsoCompiler::emitControlFlowGenericLoop(
}
}


// NV-DXVK start: vertex shader data capture implementation
void DxsoCompiler::emitVertexCaptureWrite(uint32_t writeAddress, const uint32_t dataId) {
uint32_t indices[2] = { m_module.constu32(0), writeAddress };
m_module.opStore(m_module.opAccessChain(m_vs.vertexOutBuf.imageTypeId, m_vs.vertexOutBuf.varId, 2, indices), dataId);
}

void DxsoCompiler::emitVertexCaptureOp() {
// Write oPos to a UAV at vertex ID, vertex capture
DxsoRegisterInfo vertexIdReg;
Expand All @@ -3654,8 +3660,6 @@ void DxsoCompiler::emitControlFlowGenericLoop(

uint32_t writeAddress = m_module.opIMul(getScalarTypeId(DxsoScalarType::Uint32), offsetVertexIdVal, m_module.constu32(3));

const uint32_t vertexOutBufferId = m_module.opLoad(m_vs.vertexOutBuf.imageTypeId, m_vs.vertexOutBuf.varId);

// Get the post transform oPos and reverse to world space
const uint32_t vec4typeId = getVectorTypeId({ DxsoScalarType::Float32, 4 });
const uint32_t mat4type = m_module.defMatrixType(vec4typeId, 4);
Expand All @@ -3669,7 +3673,8 @@ void DxsoCompiler::emitControlFlowGenericLoop(

// Write the data to buffer
SpirvImageOperands defaultOperands;
m_module.opImageWrite(vertexOutBufferId, writeAddress, worldPosId, defaultOperands);

emitVertexCaptureWrite(writeAddress, worldPosId);

// Get the next slot
writeAddress = m_module.opIAdd(getScalarTypeId(DxsoScalarType::Uint32), writeAddress, m_module.constu32(1));
Expand All @@ -3679,7 +3684,7 @@ void DxsoCompiler::emitControlFlowGenericLoop(
const uint32_t texcoord0 = m_module.opLoad(vec4typeId, m_vs.oTex0.id);

// Write texcoord
m_module.opImageWrite(vertexOutBufferId, writeAddress, texcoord0, defaultOperands);
emitVertexCaptureWrite(writeAddress, texcoord0);
}

// Get the next slot
Expand Down Expand Up @@ -3727,7 +3732,7 @@ void DxsoCompiler::emitControlFlowGenericLoop(
}

// Write normal
m_module.opImageWrite(vertexOutBufferId, writeAddress, normal0, defaultOperands);
emitVertexCaptureWrite(writeAddress, normal0);
}

// Apply custom vertex transform if it's enabled
Expand Down
3 changes: 3 additions & 0 deletions src/dxso/dxso_compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -721,6 +721,9 @@ namespace dxvk {

// NV-DXVK start: vertex shader data capture implementation
void emitVertexCaptureOp();

void emitVertexCaptureWrite(uint32_t writeAddress, const uint32_t worldPosId);

// NV-DXVK end

void emitLinkerOutputSetup();
Expand Down

0 comments on commit 1be52c2

Please sign in to comment.