Skip to content

Commit

Permalink
Merge branch 'bottleneckOpts' into 'main'
Browse files Browse the repository at this point in the history
[REMIX-3744] optimize bottlenecks when instances are repeated many times

See merge request lightspeedrtx/dxvk-remix-nv!1193
  • Loading branch information
MarkEHenderson committed Dec 17, 2024
2 parents 368525a + 918e3d1 commit 6801315
Show file tree
Hide file tree
Showing 10 changed files with 193 additions and 133 deletions.
50 changes: 35 additions & 15 deletions src/dxvk/rtx_render/rtx_camera_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,6 @@
*/
#include "rtx_camera_manager.h"

#include "rtx_matrix_helpers.h"

#include "dxvk_device.h"

namespace {
Expand All @@ -44,6 +42,7 @@ namespace dxvk {

void CameraManager::onFrameEnd() {
m_lastSetCameraType = CameraType::Unknown;
m_decompositionCache.clear();
}

CameraType::Enum CameraManager::processCameraData(const DrawCallState& input) {
Expand Down Expand Up @@ -75,10 +74,7 @@ namespace dxvk {
}

// Get camera params
float fov, aspectRatio, nearPlane, farPlane, shearX, shearY;
bool isLHS;
bool isReverseZ;
decomposeProjection(input.getTransformData().viewToProjection, aspectRatio, fov, nearPlane, farPlane, shearX, shearY, isLHS, isReverseZ);
DecomposeProjectionParams decomposeProjectionParams = getOrDecomposeProjection(input.getTransformData().viewToProjection);

// Filter invalid cameras, extreme shearing
static auto isFovValid = [](float fovA) {
Expand All @@ -88,7 +84,7 @@ namespace dxvk {
return std::abs(fovA - cameraB.getFov()) < kFovToleranceRadians;
};

if (std::abs(shearX) > 0.01f || !isFovValid(fov)) {
if (std::abs(decomposeProjectionParams.shearX) > 0.01f || !isFovValid(decomposeProjectionParams.fov)) {
ONCE(Logger::warn("[RTX] CameraManager: rejected an invalid camera"));
return input.getCategoryFlags().test(InstanceCategories::Sky) ? CameraType::Sky : CameraType::Unknown;
}
Expand Down Expand Up @@ -117,13 +113,13 @@ namespace dxvk {
cameraType = CameraType::RenderToTexture;
} else if (input.testCategoryFlags(InstanceCategories::Sky)) {
cameraType = CameraType::Sky;
} else if (isViewModel(fov, input.maxZ, frameId)) {
} else if (isViewModel(decomposeProjectionParams.fov, input.maxZ, frameId)) {
cameraType = CameraType::ViewModel;
}

// Check fov consistency across frames
if (frameId > 0) {
if (getCamera(cameraType).isValid(frameId - 1) && !areFovsClose(fov, getCamera(cameraType))) {
if (getCamera(cameraType).isValid(frameId - 1) && !areFovsClose(decomposeProjectionParams.fov, getCamera(cameraType))) {
ONCE(Logger::warn("[RTX] CameraManager: FOV of a camera changed between frames"));
}
}
Expand All @@ -148,7 +144,15 @@ namespace dxvk {
}
} else {
isCameraCut = camera.update(
frameId, worldToView, viewToProjection, fov, aspectRatio, nearPlane,farPlane, isLHS);
frameId,
worldToView,
viewToProjection,
decomposeProjectionParams.fov,
decomposeProjectionParams.aspectRatio,
decomposeProjectionParams.nearPlane,
decomposeProjectionParams.farPlane,
decomposeProjectionParams.isLHS
);
}


Expand All @@ -173,13 +177,29 @@ namespace dxvk {
void CameraManager::processExternalCamera(CameraType::Enum type,
const Matrix4& worldToView,
const Matrix4& viewToProjection) {
float fov, aspectRatio, nearPlane, farPlane, shearX, shearY;
bool isLHS;
bool isReverseZ;
decomposeProjection(viewToProjection, aspectRatio, fov, nearPlane, farPlane, shearX, shearY, isLHS, isReverseZ);
DecomposeProjectionParams decomposeProjectionParams = getOrDecomposeProjection(viewToProjection);

getCamera(type).update(
m_device->getCurrentFrameId(),
worldToView, viewToProjection, fov, aspectRatio, nearPlane, farPlane, isLHS);
worldToView,
viewToProjection,
decomposeProjectionParams.fov,
decomposeProjectionParams.aspectRatio,
decomposeProjectionParams.nearPlane,
decomposeProjectionParams.farPlane,
decomposeProjectionParams.isLHS);
}

DecomposeProjectionParams CameraManager::getOrDecomposeProjection(const Matrix4& viewToProjection) {
XXH64_hash_t projectionHash = XXH64(&viewToProjection, sizeof(viewToProjection), 0);
auto iter = m_decompositionCache.find(projectionHash);
if (iter != m_decompositionCache.end()) {
return iter->second;
}

DecomposeProjectionParams decomposeProjectionParams;
decomposeProjection(viewToProjection, decomposeProjectionParams);
m_decompositionCache.emplace(projectionHash, decomposeProjectionParams);
return decomposeProjectionParams;
}
} // namespace dxvk
4 changes: 4 additions & 0 deletions src/dxvk/rtx_render/rtx_camera_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include "rtx_option.h"
#include "rtx_types.h"
#include "rtx_common_object.h"
#include "rtx_matrix_helpers.h"

namespace dxvk {
class DxvkDevice;
Expand Down Expand Up @@ -81,6 +82,9 @@ namespace dxvk {
std::array<RtCamera, CameraType::Count> m_cameras;
CameraType::Enum m_lastSetCameraType = CameraType::Unknown;
uint32_t m_lastCameraCutFrameId = -1;
fast_unordered_cache<DecomposeProjectionParams> m_decompositionCache;

DecomposeProjectionParams getOrDecomposeProjection(const Matrix4& viewToProjection);

RTX_OPTION("rtx", bool, rayPortalEnabled, false, "Enables ray portal support. Note this requires portal texture hashes to be set for the ray portal geometries in rtx.rayPortalModelTextureHashes.");
};
Expand Down
46 changes: 14 additions & 32 deletions src/dxvk/rtx_render/rtx_instance_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ namespace dxvk {
m_frameLastUpdated
m_frameCreated
m_isCreatedByRenderer
m_spatialCachePos
m_spatialCacheHash
buildGeometries
buildRanges
billboardIndices
Expand Down Expand Up @@ -170,8 +170,7 @@ namespace dxvk {

// Cache based on current position.
const Vector3 newPos = getBlas()->input.getGeometryData().boundingBox.getTransformedCentroid(surface.objectToWorld);
m_linkedBlas->getSpatialMap().move(m_spatialCachePos, newPos, this);
m_spatialCachePos = newPos;
m_spatialCacheHash = m_linkedBlas->getSpatialMap().move(m_spatialCacheHash, newPos, surface.objectToWorld, this);
}
}

Expand All @@ -180,8 +179,8 @@ namespace dxvk {
surface.normalObjectToWorld = transpose(inverse(Matrix3(surface.objectToWorld)));
surface.prevObjectToWorld = objectToWorld;
if (!m_isCreatedByRenderer) {
m_spatialCachePos = getBlas()->input.getGeometryData().boundingBox.getTransformedCentroid(surface.objectToWorld);
m_linkedBlas->getSpatialMap().insert(m_spatialCachePos, this);
const Vector3 centroid = getBlas()->input.getGeometryData().boundingBox.getTransformedCentroid(surface.objectToWorld);
m_spatialCacheHash = m_linkedBlas->getSpatialMap().insert(centroid, surface.objectToWorld, this);
}

// The D3D matrix on input, needs to be transposed before feeding to the VK API (left/right handed conversion)
Expand Down Expand Up @@ -653,38 +652,21 @@ namespace dxvk {
// Search the BLAS for an instance matching ours
{
// Search for an exact match
const std::vector<const RtInstance*>* matchingCell = blas.getSpatialMap().getDataAtPos(worldPosition);
if (matchingCell != nullptr) {
for (const RtInstance* instance : *matchingCell) {
const Matrix4 instanceTransform = instance->getTransform();
// Note: this may be the second call this frame targetting the same instance.
if (memcmp(&transform, &instanceTransform, sizeof(instanceTransform)) == 0) {
return const_cast<RtInstance*>(instance);
}
}
result = const_cast<RtInstance*>(blas.getSpatialMap().getDataAtTransform(transform));
if (result != nullptr) {
return result;
}

// No exact match, so find the closest match in the region
// (need to check a 2x2x2 patch of cells to account for positions close to a border)
const auto adjacentCells = blas.getSpatialMap().getDataNearPos(worldPosition);
for (const std::vector<const RtInstance*>* cellPtr : adjacentCells){
for (const RtInstance* instance : *cellPtr) {
if (instance->m_frameLastUpdated != currentFrameIdx && instance->m_materialHash == material.getHash()) {
// Instance hasn't been touched yet this frame.

const Vector3& prevInstanceWorldPosition = instance->getSpatialCachePosition();

const float distSqr = lengthSqr(prevInstanceWorldPosition - worldPosition);
if (distSqr <= uniqueObjectDistanceSqr && distSqr < nearestDistSqr) {
if (distSqr == 0.0f) {
// Not going to find anything closer.
return const_cast<RtInstance*>(instance);
}
nearestDistSqr = distSqr;
result = const_cast<RtInstance*>(instance);
}
}
result = const_cast<RtInstance*>(blas.getSpatialMap().getNearestData(worldPosition, uniqueObjectDistanceSqr, nearestDistSqr,
[&] (const RtInstance* instance) {
return instance->m_frameLastUpdated != currentFrameIdx && instance->m_materialHash == material.getHash();
}
));
if (nearestDistSqr == 0.0f && result != nullptr) {
// Not going to find anything closer
return result;
}
}

Expand Down
5 changes: 2 additions & 3 deletions src/dxvk/rtx_render/rtx_instance_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,12 +65,11 @@ class RtInstance {
Vector3 getWorldPosition() const { return Vector3{ m_vkInstance.transform.matrix[0][3], m_vkInstance.transform.matrix[1][3], m_vkInstance.transform.matrix[2][3] }; }
const Vector3& getPrevWorldPosition() const { return surface.prevObjectToWorld.data[3].xyz(); }

const Vector3& getSpatialCachePosition() const { return m_spatialCachePos; }
void removeFromSpatialCache() const {
if (m_isCreatedByRenderer) {
return;
}
m_linkedBlas->getSpatialMap().erase(m_spatialCachePos, this);
m_linkedBlas->getSpatialMap().erase(m_spatialCacheHash);
}

bool isCreatedThisFrame(uint32_t frameIndex) const { return frameIndex == m_frameCreated; }
Expand Down Expand Up @@ -203,7 +202,7 @@ class RtInstance {

CategoryFlags m_categoryFlags;

Vector3 m_spatialCachePos = Vector3(0.f);
XXH64_hash_t m_spatialCacheHash = 0;

public:
bool isFrontFaceFlipped = false;
Expand Down
17 changes: 16 additions & 1 deletion src/dxvk/rtx_render/rtx_matrix_helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,4 +80,19 @@ static inline void decomposeProjection(const dxvk::Matrix4& matrix, float& aspec
"\n\tPROJ_ANGLEMAXY: ", cameraParams[PROJ_ANGLEMAXY]));
}
#endif
}
}

struct DecomposeProjectionParams {
float fov;
float aspectRatio;
float nearPlane;
float farPlane;
float shearX;
float shearY;
bool isLHS;
bool isReverseZ;
};

static inline void decomposeProjection(const dxvk::Matrix4& matrix, DecomposeProjectionParams& result, bool log = false) {
decomposeProjection(matrix, result.aspectRatio, result.fov, result.nearPlane, result.farPlane, result.shearX, result.shearY, result.isLHS, result.isReverseZ, log);
}
3 changes: 1 addition & 2 deletions src/dxvk/rtx_render/rtx_texture.h
Original file line number Diff line number Diff line change
Expand Up @@ -143,9 +143,8 @@ namespace dxvk {
}

XXH64_hash_t getImageHash() const {
const DxvkImageView* resolvedImageView = getImageView();
XXH64_hash_t result = 0;
if (resolvedImageView) {
if (const DxvkImageView* resolvedImageView = getImageView()) {
result = resolvedImageView->image()->getHash();
}

Expand Down
9 changes: 1 addition & 8 deletions src/dxvk/rtx_render/rtx_types.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -360,14 +360,7 @@ namespace dxvk {
}

void BlasEntry::rebuildSpatialMap() {
InstanceMap newMap(RtxOptions::uniqueObjectDistance() * 2.f);

for (const auto& iter : m_spatialMap.getAll()){
for (const RtInstance* instance : iter.second) {
newMap.insert(instance->getSpatialCachePosition(), instance);
}
}
m_spatialMap = std::move(newMap);
m_spatialMap.rebuild(RtxOptions::uniqueObjectDistance() * 2.f);
}

} // namespace dxvk
2 changes: 1 addition & 1 deletion src/dxvk/rtx_render/rtx_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -586,7 +586,7 @@ struct BlasEntry {
// Frame when the vertex data of this geometry was last updated, used to detect static geometries
uint32_t frameLastUpdated = kInvalidFrameIndex;

using InstanceMap = SpatialMap<const RtInstance*>;
using InstanceMap = SpatialMap<RtInstance>;

Rc<PooledBlas> staticBlas;

Expand Down
Loading

0 comments on commit 6801315

Please sign in to comment.