From fb64f8b4f8f94fc2dbb130d4e14afd4dc73f91d2 Mon Sep 17 00:00:00 2001 From: DH Date: Sun, 6 Oct 2024 02:18:57 +0300 Subject: [PATCH] rpcsx-gpu: fix linear tiler offset implement depth clear implemented shader resource merge fix smrd offset fix discard export --- rpcsx-gpu/Cache.cpp | 32 ++-- rpcsx-gpu/Cache.hpp | 11 +- rpcsx-gpu/Renderer.cpp | 175 ++++++++++++------ rpcsx-gpu/lib/amdgpu-tiler/src/tiler.cpp | 2 +- rpcsx-gpu/lib/gcn-shader/shaders/rdna.glsl | 2 +- rpcsx-gpu/lib/gcn-shader/src/GcnConverter.cpp | 16 +- .../lib/gcn-shader/src/GcnInstruction.cpp | 3 +- rpcsx-gpu/lib/gcn-shader/src/gcn.cpp | 2 +- 8 files changed, 155 insertions(+), 88 deletions(-) diff --git a/rpcsx-gpu/Cache.cpp b/rpcsx-gpu/Cache.cpp index 87e9b34b0e..b570f85561 100644 --- a/rpcsx-gpu/Cache.cpp +++ b/rpcsx-gpu/Cache.cpp @@ -99,8 +99,8 @@ void Cache::ShaderResources::loadResources( bufferMemoryTable.map(*pointerBase, *pointerBase + *pointerOffset + pointer.size, Access::Read); - resourceSlotToAddress.push_back( - {slotOffset + pointer.resourceSlot, *pointerBase}); + resourceSlotToAddress.emplace_back(slotOffset + pointer.resourceSlot, + *pointerBase); } for (auto &bufferRes : res.buffers) { @@ -124,10 +124,16 @@ void Cache::ShaderResources::loadResources( std::memcpy(reinterpret_cast(&buffer) + 3, &*word3, sizeof(std::uint32_t)); - bufferMemoryTable.map(buffer.address(), buffer.address() + buffer.size(), - bufferRes.access); - resourceSlotToAddress.push_back( - {slotOffset + bufferRes.resourceSlot, buffer.address()}); + if (auto it = bufferMemoryTable.queryArea(buffer.address()); + it != bufferMemoryTable.end() && + it.beginAddress() == buffer.address() && it.size() == buffer.size()) { + it.get() |= bufferRes.access; + } else { + bufferMemoryTable.map(buffer.address(), buffer.address() + buffer.size(), + bufferRes.access); + } + resourceSlotToAddress.emplace_back(slotOffset + bufferRes.resourceSlot, + buffer.address()); } for (auto &texture : res.textures) { @@ -569,7 +575,6 @@ struct CachedImage : Cache::Entry { auto ®ionInfo = info.getSubresourceInfo(mipLevel); regions.push_back({ .bufferOffset = regionInfo.linearOffset, - .bufferRowLength = regionInfo.linearPitch, .imageSubresource = { .aspectMask = toAspect(kind), @@ -579,9 +584,9 @@ struct CachedImage : Cache::Entry { }, .imageExtent = { - .width = regionInfo.linearWidth, - .height = regionInfo.linearHeight, - .depth = regionInfo.linearDepth, + .width = std::max(image.getWidth() >> mipLevel, 1u), + .height = std::max(image.getHeight() >> mipLevel, 1u), + .depth = std::max(image.getDepth() >> mipLevel, 1u), }, }); } @@ -1183,7 +1188,6 @@ Cache::Image Cache::Tag::getImage(const ImageKey &key, Access access) { regions.push_back({ .bufferOffset = info.linearOffset, - .bufferRowLength = info.linearPitch, .imageSubresource = { .aspectMask = toAspect(key.kind), @@ -1193,9 +1197,9 @@ Cache::Image Cache::Tag::getImage(const ImageKey &key, Access access) { }, .imageExtent = { - .width = info.linearWidth, - .height = info.linearHeight, - .depth = info.linearDepth, + .width = std::max(key.extent.width >> mipLevel, 1u), + .height = std::max(key.extent.height >> mipLevel, 1u), + .depth = std::max(key.extent.depth >> mipLevel, 1u), }, }); } diff --git a/rpcsx-gpu/Cache.hpp b/rpcsx-gpu/Cache.hpp index cbd02fd9d6..6f5c463f7d 100644 --- a/rpcsx-gpu/Cache.hpp +++ b/rpcsx-gpu/Cache.hpp @@ -3,14 +3,11 @@ #include "Pipe.hpp" #include "amdgpu/tiler.hpp" #include "gnm/constants.hpp" -#include "rx/die.hpp" #include "shader/Access.hpp" #include "shader/Evaluator.hpp" #include "shader/GcnConverter.hpp" #include #include -#include -#include #include #include #include @@ -27,7 +24,7 @@ struct ShaderKey { shader::gcn::Environment env; }; -enum class ImageKind { Color, Depth, Stencil }; +enum class ImageKind : std::uint8_t { Color, Depth, Stencil }; struct ImageKey { std::uint64_t readAddress; @@ -265,7 +262,7 @@ struct Cache { } ~Tag() { release(); } - void swap(Tag &other) { + void swap(Tag &other) noexcept { std::swap(static_cast(*this), static_cast(other)); } @@ -364,7 +361,7 @@ struct Cache { std::span viewPorts); void release(); - void swap(GraphicsTag &other) { + void swap(GraphicsTag &other) noexcept { Tag::swap(other); std::swap(mAcquiredGraphicsDescriptorSet, other.mAcquiredGraphicsDescriptorSet); @@ -396,7 +393,7 @@ struct Cache { void release(); - void swap(ComputeTag &other) { + void swap(ComputeTag &other) noexcept { Tag::swap(other); std::swap(mAcquiredComputeDescriptorSet, other.mAcquiredComputeDescriptorSet); diff --git a/rpcsx-gpu/Renderer.cpp b/rpcsx-gpu/Renderer.cpp index 0e88fb8afd..44058c086f 100644 --- a/rpcsx-gpu/Renderer.cpp +++ b/rpcsx-gpu/Renderer.cpp @@ -139,10 +139,6 @@ void amdgpu::draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex, std::uint32_t vertexCount, std::uint32_t firstInstance, std::uint32_t instanceCount, std::uint64_t indiciesAddress, std::uint32_t indexCount) { - if (pipe.uConfig.vgtPrimitiveType == gnm::PrimitiveType::None) { - return; - } - if (pipe.context.cbColorControl.mode == gnm::CbMode::Disable) { return; } @@ -153,10 +149,6 @@ void amdgpu::draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex, return; } - if (pipe.context.cbTargetMask.raw == 0) { - return; - } - auto cacheTag = pipe.device->getGraphicsTag(vmId, pipe.scheduler); auto targetMask = pipe.context.cbTargetMask.raw; @@ -200,50 +192,6 @@ void amdgpu::draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex, // FIXME stencilAccess = Access::None; - if (depthAccess != Access::None) { - auto viewPortScissor = pipe.context.paScScreenScissor; - auto viewPortRect = gnm::toVkRect2D(viewPortScissor); - - auto imageView = cacheTag.getImageView( - { - .readAddress = static_cast(pipe.context.dbZReadBase) - << 8, - .writeAddress = - static_cast(pipe.context.dbZWriteBase) << 8, - .type = gnm::TextureType::Dim2D, - .dfmt = gnm::getDataFormat(pipe.context.dbZInfo.format), - .nfmt = gnm::getNumericFormat(pipe.context.dbZInfo.format), - .extent = - { - .width = viewPortRect.extent.width, - .height = viewPortRect.extent.height, - .depth = 1, - }, - .pitch = viewPortRect.extent.width, - .mipCount = 1, - .arrayLayerCount = 1, - .kind = ImageKind::Depth, - }, - depthAccess); - - depthAttachment = { - .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO, - .imageView = imageView.handle, - .imageLayout = VK_IMAGE_LAYOUT_GENERAL, - .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, - .storeOp = VK_ATTACHMENT_STORE_OP_STORE, - }; - - if ((depthAccess & Access::Read) == Access::None) { - depthAttachment.clearValue.depthStencil.depth = pipe.context.dbDepthClear; - depthAttachment.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; - } - - if ((depthAccess & Access::Write) == Access::None) { - depthAttachment.storeOp = VK_ATTACHMENT_STORE_OP_NONE; - } - } - for (auto &cbColor : pipe.context.cbColor) { if (targetMask == 0) { break; @@ -304,6 +252,26 @@ void amdgpu::draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex, access |= Access::Write; } + if (pipe.uConfig.vgtPrimitiveType == gnm::PrimitiveType::None) { + if (cbColor.info.fastClear) { + auto image = cacheTag.getImage(renderTargetInfo, access); + VkClearColorValue clearValue = { + .uint32 = + { + cbColor.clearWord0, + cbColor.clearWord1, + cbColor.clearWord2, + }, + }; + + vkCmdClearColorImage(cacheTag.getScheduler().getCommandBuffer(), + image.handle, VK_IMAGE_LAYOUT_GENERAL, &clearValue, + 1, &image.subresource); + } + + continue; + } + auto imageView = cacheTag.getImageView(renderTargetInfo, access); colorAttachments[renderTargets] = { @@ -359,8 +327,94 @@ void amdgpu::draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex, } if (renderTargets == 0) { + if ((depthAccess & Access::Write) != Access::None) { + auto screenRect = gnm::toVkRect2D(pipe.context.paScScreenScissor); + + auto image = cacheTag.getImage( + { + .readAddress = + static_cast(pipe.context.dbZReadBase) << 8, + .writeAddress = + static_cast(pipe.context.dbZWriteBase) << 8, + .type = gnm::TextureType::Dim2D, + .dfmt = gnm::getDataFormat(pipe.context.dbZInfo.format), + .nfmt = gnm::getNumericFormat(pipe.context.dbZInfo.format), + .extent = + { + .width = screenRect.extent.width, + .height = screenRect.extent.height, + .depth = 1, + }, + .pitch = screenRect.extent.width, + .mipCount = 1, + .arrayLayerCount = 1, + .kind = ImageKind::Depth, + }, + Access::Write); + + VkClearDepthStencilValue depthStencil = { + .depth = pipe.context.dbDepthClear, + }; + + vkCmdClearDepthStencilImage(cacheTag.getScheduler().getCommandBuffer(), + image.handle, VK_IMAGE_LAYOUT_GENERAL, + &depthStencil, 1, &image.subresource); + pipe.scheduler.submit(); + pipe.scheduler.wait(); + } + return; } + + if (pipe.uConfig.vgtPrimitiveType == gnm::PrimitiveType::None) { + pipe.scheduler.submit(); + pipe.scheduler.wait(); + return; + } + + if (depthAccess != Access::None) { + auto screenRect = gnm::toVkRect2D(pipe.context.paScScreenScissor); + + auto imageView = cacheTag.getImageView( + { + .readAddress = static_cast(pipe.context.dbZReadBase) + << 8, + .writeAddress = + static_cast(pipe.context.dbZWriteBase) << 8, + .type = gnm::TextureType::Dim2D, + .dfmt = gnm::getDataFormat(pipe.context.dbZInfo.format), + .nfmt = gnm::getNumericFormat(pipe.context.dbZInfo.format), + .extent = + { + .width = screenRect.extent.width, + .height = screenRect.extent.height, + .depth = 1, + }, + .pitch = screenRect.extent.width, + .mipCount = 1, + .arrayLayerCount = 1, + .kind = ImageKind::Depth, + }, + depthAccess); + + depthAttachment = { + .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO, + .imageView = imageView.handle, + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + }; + + if ((depthAccess & Access::Read) == Access::None) { + depthAttachment.clearValue.depthStencil.depth = pipe.context.dbDepthClear; + depthAttachment.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; + } + + if ((depthAccess & Access::Write) == Access::None) { + depthAttachment.storeOp = VK_ATTACHMENT_STORE_OP_NONE; + } + } + if (indiciesAddress == 0) { indexCount = vertexCount; } @@ -483,11 +537,11 @@ void amdgpu::draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex, VkCullModeFlags cullMode = VK_CULL_MODE_NONE; if (pipe.uConfig.vgtPrimitiveType != gnm::PrimitiveType::RectList) { - if (pipe.context.paSuScModeCntl.cullBack) { - cullMode |= VK_CULL_MODE_BACK_BIT; - } - if (pipe.context.paSuScModeCntl.cullFront) { - cullMode |= VK_CULL_MODE_FRONT_BIT; + if (pipe.context.paSuScModeCntl.cullBack) { + cullMode |= VK_CULL_MODE_BACK_BIT; + } + if (pipe.context.paSuScModeCntl.cullFront) { + cullMode |= VK_CULL_MODE_FRONT_BIT; } } @@ -521,12 +575,11 @@ void amdgpu::draw(GraphicsPipe &pipe, int vmId, std::uint32_t firstVertex, } void amdgpu::dispatch(Cache &cache, Scheduler &sched, - Registers::ComputeConfig &computeConfig, - std::uint32_t groupCountX, std::uint32_t groupCountY, - std::uint32_t groupCountZ) { + Registers::ComputeConfig &pgm, std::uint32_t groupCountX, + std::uint32_t groupCountY, std::uint32_t groupCountZ) { auto tag = cache.createComputeTag(sched); auto descriptorSet = tag.getDescriptorSet(); - auto shader = tag.getShader(computeConfig); + auto shader = tag.getShader(pgm); auto pipelineLayout = tag.getComputePipelineLayout(); tag.buildDescriptors(descriptorSet); diff --git a/rpcsx-gpu/lib/amdgpu-tiler/src/tiler.cpp b/rpcsx-gpu/lib/amdgpu-tiler/src/tiler.cpp index 5091a3707d..9108af8300 100644 --- a/rpcsx-gpu/lib/amdgpu-tiler/src/tiler.cpp +++ b/rpcsx-gpu/lib/amdgpu-tiler/src/tiler.cpp @@ -555,7 +555,7 @@ static SurfaceInfo computeTextureLinearInfo( } surfaceOffset += arraySliceCount * surfaceSize; - surfaceOffset += arraySliceCount * linearSize; + linearOffset += arraySliceCount * linearSize; } result.totalTiledSize = surfaceOffset; diff --git a/rpcsx-gpu/lib/gcn-shader/shaders/rdna.glsl b/rpcsx-gpu/lib/gcn-shader/shaders/rdna.glsl index 86802154fc..ff3d00479b 100644 --- a/rpcsx-gpu/lib/gcn-shader/shaders/rdna.glsl +++ b/rpcsx-gpu/lib/gcn-shader/shaders/rdna.glsl @@ -2101,7 +2101,7 @@ void tbuffer_store_format_xyzw(u32vec4 vdata, uint32_t vOFFSET, uint32_t vINDEX, #define S_LOAD_DWORD(dest, memoryLocationHint, sbase, offset, N) \ int32_t _offset = 0; \ uint64_t deviceAreaSize = 0; \ - uint64_t deviceAddress = findMemoryAddress(sbase + offset, SIZEOF(uint32_t) * N, memoryLocationHint, deviceAreaSize); \ + uint64_t deviceAddress = findMemoryAddress((sbase & ~uint64_t(3)) + (offset & ~3), SIZEOF(uint32_t) * N, memoryLocationHint, deviceAreaSize); \ if (deviceAddress == kInvalidAddress || deviceAreaSize < SIZEOF(uint32_t) * N) { \ for (int i = 0; i < (N); ++i) { \ dest[i] = 0; \ diff --git a/rpcsx-gpu/lib/gcn-shader/src/GcnConverter.cpp b/rpcsx-gpu/lib/gcn-shader/src/GcnConverter.cpp index 40014dd6f4..5fd9ccc664 100644 --- a/rpcsx-gpu/lib/gcn-shader/src/GcnConverter.cpp +++ b/rpcsx-gpu/lib/gcn-shader/src/GcnConverter.cpp @@ -133,7 +133,7 @@ struct ResourcesBuilder { std::printf("failed to resolve function call to %s\n", ns->getNameOf(call.getOperand(1).getAsValue()).c_str()); - for (auto op : call.getOperands().subspan(2)) { + for (auto &op : call.getOperands().subspan(2)) { std::cerr << "arg: "; op.print(std::cerr, *ns); auto argValue = op.getAsValue(); @@ -728,6 +728,20 @@ static void expToSpv(GcnConverter &converter, gcn::Stage stage, auto channelType = context.getTypeFloat32(); + if (swizzle == 0 && done) { + auto termBuilder = gcn::Builder::createAppend( + context, context.layout.getOrCreateFunctions(context)); + auto terminateFn = termBuilder.createSpvFunction( + loc, context.getTypeVoid(), ir::spv::FunctionControl::None, + context.getTypeFunction(context.getTypeVoid(), {})); + termBuilder.createSpvLabel(loc); + termBuilder.createSpvKill(loc); + termBuilder.createSpvFunctionEnd(loc); + + builder.createSpvFunctionCall(loc, context.getTypeVoid(), terminateFn, + {}); + } + for (int channel = 0; channel < 4; ++channel) { if (~swizzle & (1 << channel)) { continue; diff --git a/rpcsx-gpu/lib/gcn-shader/src/GcnInstruction.cpp b/rpcsx-gpu/lib/gcn-shader/src/GcnInstruction.cpp index a0cb01021f..184ed48e00 100644 --- a/rpcsx-gpu/lib/gcn-shader/src/GcnInstruction.cpp +++ b/rpcsx-gpu/lib/gcn-shader/src/GcnInstruction.cpp @@ -207,8 +207,7 @@ readSmrdInst(GcnInstruction &inst, std::uint64_t &address, if (op != ir::smrd::MEMTIME) { auto baseOperand = createSgprGcnOperand(address, sbase); - auto offsetOperand = imm ? GcnOperand::createConstant( - std::uint32_t(std::int8_t(offset << 2))) + auto offsetOperand = imm ? GcnOperand::createConstant(offset << 2) : createSgprGcnOperand(address, offset).withR(); if (isBuffer) { diff --git a/rpcsx-gpu/lib/gcn-shader/src/gcn.cpp b/rpcsx-gpu/lib/gcn-shader/src/gcn.cpp index 77a42f6bae..1be27322bd 100644 --- a/rpcsx-gpu/lib/gcn-shader/src/gcn.cpp +++ b/rpcsx-gpu/lib/gcn-shader/src/gcn.cpp @@ -1283,7 +1283,7 @@ static ir::Value deserializeGcnRegion( inst.addOperand(createOperandRead(loc, paramBuilder, uint32TV, op)); } - if (isaInst == ir::exp::EXP) { + if (isaInst == ir::exp::EXP && isaInst.getOperand(1).value != 0) { createExecTest(); } continue;