diff --git a/src/d3d9/d3d9_rtx.cpp b/src/d3d9/d3d9_rtx.cpp index fe39f8ed..3d1be04f 100644 --- a/src/d3d9/d3d9_rtx.cpp +++ b/src/d3d9/d3d9_rtx.cpp @@ -25,11 +25,6 @@ namespace dxvk { D3D9Rtx::D3D9Rtx(D3D9DeviceEx* d3d9Device, bool enableDrawCallConversion) : m_rtStagingData(d3d9Device->GetDXVKDevice(), (VkMemoryPropertyFlagBits) (VK_MEMORY_PROPERTY_HOST_CACHED_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) - , m_vertexCaptureData(d3d9Device->GetDXVKDevice(), - (VkMemoryPropertyFlagBits) (VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT), - (VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT), - (VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT), - VK_ACCESS_TRANSFER_READ_BIT) , m_parent(d3d9Device) , m_enableDrawCallConversion(enableDrawCallConversion) , m_pGeometryWorkers(enableDrawCallConversion ? std::make_unique(numGeometryProcessingThreads(), "geometry-processing") : nullptr) { @@ -123,6 +118,15 @@ namespace dxvk { return result.slice; } + DxvkBufferSlice allocVertexCaptureBuffer(DxvkDevice* pDevice, const VkDeviceSize size) { + DxvkBufferCreateInfo info; + info.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT; + info.access = VK_ACCESS_TRANSFER_READ_BIT; + info.stages = VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT; + info.size = size; + return DxvkBufferSlice(pDevice->createBuffer(info, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, DxvkMemoryStats::Category::AppBuffer)); + } + void D3D9Rtx::prepareVertexCapture(const int vertexIndexOffset) { ScopedCpuProfileZone(); @@ -154,8 +158,7 @@ namespace dxvk { const uint32_t stride = sizeof(CapturedVertex); const size_t vertexCaptureDataSize = align(geoData.vertexCount * stride, CACHE_LINE_SIZE); - DxvkBufferSlice slice = m_vertexCaptureData.alloc(CACHE_LINE_SIZE, vertexCaptureDataSize); - slice.buffer()->acquire(DxvkAccess::Write); + DxvkBufferSlice slice = allocVertexCaptureBuffer(m_parent->GetDXVKDevice().ptr(), vertexCaptureDataSize); geoData.positionBuffer = RasterBuffer(slice, 0, stride, VK_FORMAT_R32G32B32A32_SFLOAT); assert(geoData.positionBuffer.offset() % 4 == 0); diff --git a/src/d3d9/d3d9_rtx.h b/src/d3d9/d3d9_rtx.h index c175878b..cba1250b 100644 --- a/src/d3d9/d3d9_rtx.h +++ b/src/d3d9/d3d9_rtx.h @@ -180,7 +180,6 @@ namespace dxvk { DrawCallState m_activeDrawCallState; RtxStagingDataAlloc m_rtStagingData; - RtxStagingDataAlloc m_vertexCaptureData; D3D9DeviceEx* m_parent; std::optional m_activePresentParams; diff --git a/src/dxvk/rtx_render/rtx_geometry_utils.cpp b/src/dxvk/rtx_render/rtx_geometry_utils.cpp index aa5b5181..99ad01bf 100644 --- a/src/dxvk/rtx_render/rtx_geometry_utils.cpp +++ b/src/dxvk/rtx_render/rtx_geometry_utils.cpp @@ -807,7 +807,7 @@ namespace dxvk { assert(output.buffer->info().size == align(output.stride * input.vertexCount, CACHE_LINE_SIZE)); - bool pendingGpuWrites = input.positionBuffer.isPendingGpuWrite(); + bool mustUseGPU = input.positionBuffer.isPendingGpuWrite() || input.positionBuffer.mapPtr() == nullptr; // Interleave vertex data InterleaveGeometryArgs args; @@ -821,7 +821,7 @@ namespace dxvk { } args.hasNormals = input.normalBuffer.defined(); if (args.hasNormals) { - pendingGpuWrites |= input.normalBuffer.isPendingGpuWrite(); + mustUseGPU |= input.normalBuffer.isPendingGpuWrite() || input.normalBuffer.mapPtr() == nullptr; assert(input.normalBuffer.offsetFromSlice() % 4 == 0); args.normalOffset = input.normalBuffer.offsetFromSlice() / 4; args.normalStride = input.normalBuffer.stride() / 4; @@ -832,7 +832,7 @@ namespace dxvk { } args.hasTexcoord = input.texcoordBuffer.defined(); if (args.hasTexcoord) { - pendingGpuWrites |= input.texcoordBuffer.isPendingGpuWrite(); + mustUseGPU |= input.texcoordBuffer.isPendingGpuWrite() || input.texcoordBuffer.mapPtr() == nullptr; assert(input.texcoordBuffer.offsetFromSlice() % 4 == 0); args.texcoordOffset = input.texcoordBuffer.offsetFromSlice() / 4; args.texcoordStride = input.texcoordBuffer.stride() / 4; @@ -843,7 +843,7 @@ namespace dxvk { } args.hasColor0 = input.color0Buffer.defined(); if (args.hasColor0) { - pendingGpuWrites |= input.color0Buffer.isPendingGpuWrite(); + mustUseGPU |= input.color0Buffer.isPendingGpuWrite() || input.color0Buffer.mapPtr() == nullptr; assert(input.color0Buffer.offsetFromSlice() % 4 == 0); args.color0Offset = input.color0Buffer.offsetFromSlice() / 4; args.color0Stride = input.color0Buffer.stride() / 4; @@ -859,7 +859,7 @@ namespace dxvk { args.vertexCount = input.vertexCount; const uint32_t kNumVerticesToProcessOnCPU = 1024; - const bool useGPU = input.vertexCount > kNumVerticesToProcessOnCPU || pendingGpuWrites; + const bool useGPU = input.vertexCount > kNumVerticesToProcessOnCPU || mustUseGPU; if (useGPU) { ctx->bindResourceBuffer(INTERLEAVE_GEOMETRY_BINDING_OUTPUT, DxvkBufferSlice(output.buffer));