From 23f94f65afef168e8fe525587896bb48c0391995 Mon Sep 17 00:00:00 2001
From: Riley Alston <ralston@nvidia.com>
Date: Fri, 4 Oct 2024 09:58:16 -0700
Subject: [PATCH] [REMIX-844] Material Bounded VNDF Sampling

* Add bounded VNDF sampling for sampling the specular GGX lobe of the opaque material.
  * This is a improvement on the existing old VNDF sampling logic we used by further reducing the number of invalid direction samples generated while sampling.
  * As such this should slightly boost the quality of indirect specular by allowing more rays to reach light than before especially in cases of high roughness where this method has its strongest performance. Has no effect on perfectly smooth surfaces.
  * Additionally, this new method is based on a new spherical cap sampling method rather than the older projective method which also should improve performance of the sampling algorithm slightly (probably nothing significant though).
  * Direct performance comparison may be deceptive however as since more indirect rays are valid they will take up more GPU time to calculate. Generally this is a benefit though as it means less GPU work is going to waste (work was put into firing a primary ray, so making good use of it is always better).
* Remove existing rejection sampling logic as it has been unused for a while and is now superseded by this change.
  * Closes REMIX-844 as a result.
---
 .../rtx/algorithm/integrator_direct.slangh    |   2 +-
 .../rtx/algorithm/integrator_indirect.slangh  |  12 +-
 ...opaque_surface_material_interaction.slangh | 306 +++++++++---------
 src/dxvk/shaders/rtx/utility/brdf.slangh      |  67 +++-
 src/dxvk/shaders/rtx/utility/sampling.slangh  | 105 +++++-
 5 files changed, 326 insertions(+), 166 deletions(-)

diff --git a/src/dxvk/shaders/rtx/algorithm/integrator_direct.slangh b/src/dxvk/shaders/rtx/algorithm/integrator_direct.slangh
index a38f2309a..0c5dd0f67 100644
--- a/src/dxvk/shaders/rtx/algorithm/integrator_direct.slangh
+++ b/src/dxvk/shaders/rtx/algorithm/integrator_direct.slangh
@@ -206,7 +206,7 @@ void sampleIndirectRayForFirstBounce(
                                primaryPolymorphicSurfaceMaterialInteraction,
                                firstRayPathSampledLobe, firstSampledLobePdf, firstSampledSolidAnglePdf,
                                true);
-  
+
   // Set the material medium index if a medium was entered or exited
   mediumChanged = evalMediumChange(oldInsideMedium, insideMedium, primaryPolymorphicSurfaceMaterialInteraction, mediumMaterialIndex);
   
diff --git a/src/dxvk/shaders/rtx/algorithm/integrator_indirect.slangh b/src/dxvk/shaders/rtx/algorithm/integrator_indirect.slangh
index af68ce46c..e8f297e27 100644
--- a/src/dxvk/shaders/rtx/algorithm/integrator_indirect.slangh
+++ b/src/dxvk/shaders/rtx/algorithm/integrator_indirect.slangh
@@ -694,19 +694,19 @@ void integratePathVertex(
       // Note: This logic will conditionally terminate paths based on the effective throughput, allowing for
       // paths which contribute little to the final image to be shorter while paths which are more important
       // are allowed to be longer.
-    
-      float continueProbability = calculateRussianRouletteContinueProbability(
+
+      const float continueProbability = calculateRussianRouletteContinueProbability(
         calcBt709Luminance(pathState.throughput),
-        isOpaqueMaterial, 
+        isOpaqueMaterial,
         continuationLobe != opaqueLobeTypeDiffuseReflection,
         opaqueSurfaceMaterialInteraction.isotropicRoughness,
         rayInteraction.hitDistance / pathState.accumulatedHitDistance);
-      
+
       const float continueSample = getNextSampleBlueNoise(randomState);
       // Note: <= used to ensure path never continues if the probability is 0 (and to avoid NaNs in the
       // throughput when the probability is divided into it).
       const bool terminate = continueProbability <= continueSample;
-    
+
       if (terminate)
       {
         pathState.continuePath = false;
@@ -717,7 +717,7 @@ void integratePathVertex(
         // in denoising when the continue probability is very low, but this math has been validated to be
         // correct as far as I can tell.
         continuationThroughput /= continueProbability;
-      } 
+      }
     }
     
     // Steal samples from restir gi
diff --git a/src/dxvk/shaders/rtx/concept/surface_material/opaque_surface_material_interaction.slangh b/src/dxvk/shaders/rtx/concept/surface_material/opaque_surface_material_interaction.slangh
index ca130c759..7aac34d51 100644
--- a/src/dxvk/shaders/rtx/concept/surface_material/opaque_surface_material_interaction.slangh
+++ b/src/dxvk/shaders/rtx/concept/surface_material/opaque_surface_material_interaction.slangh
@@ -33,6 +33,11 @@ static const f16vec3 baseReflectivityThreshold = f16vec3(1.0h / 256.0h);
 // Must be greater than materialMinimumRoughness in the BRDF helpers as all roughnesses will be clamped to at minimum this value.
 static const float16_t roughnessThreshold = float16_t(0.001h);
 
+// Note: When set to 1 this enables bounded VNDF sampling, when set to 0 this uses original unbounded VNDF sampling.
+// Generally bounded should be preferred as it has better performance (due to using spherical cap sampling) as well
+// as generating fewer invalid samples.
+#define OPAQUE_MATERIAL_USE_BOUNDED_VNDF_SAMPLING 1
+
 // Opaque Surface Material Interaction Helper Functions
 
 #ifdef OPAQUE_MATERIAL_USE_POM
@@ -950,11 +955,12 @@ SurfaceMaterialInteractionLobeSample opaqueSurfaceMaterialInteractionCalcLobeSam
   float16_t specularReflectionProbability = 0.0f;
   float16_t opacityTransmissionProbability = 0.0f;
   float16_t diffuseTransmissionProbability = 0.0f;
-  bool isValid = opaqueSurfaceMaterialInteractionCalcLobeProbability(
+  const bool isValid = opaqueSurfaceMaterialInteractionCalcLobeProbability(
     opaqueSurfaceMaterialInteraction, minimalRayInteraction.viewDirection,
     diffuseReflectionProbability, specularReflectionProbability, opacityTransmissionProbability, diffuseTransmissionProbability);
 
   SurfaceMaterialInteractionLobeSample surfaceMaterialInteractionLobeSample;
+
   if (!isValid)
   {
     // Note: Default to sampling specular lobe when no lobe desires to be sampled from. Ideally this could just be skipped entirely as
@@ -962,6 +968,7 @@ SurfaceMaterialInteractionLobeSample opaqueSurfaceMaterialInteractionCalcLobeSam
     // rays ideally to minimize the performance impact.
     surfaceMaterialInteractionLobeSample.lobe = opaqueLobeTypeSpecularReflection;
     surfaceMaterialInteractionLobeSample.pdf = float16_t(1.0f);
+
     return surfaceMaterialInteractionLobeSample;
   }
 
@@ -994,10 +1001,6 @@ SurfaceMaterialInteractionLobeSample opaqueSurfaceMaterialInteractionCalcLobeSam
   return surfaceMaterialInteractionLobeSample;
 }
 
-
-// Note: Rejection sampling introduces bias which needs to be corrected before reenabling it
-#define USE_REJECTION_SAMPLING 0
-
 SurfaceMaterialInteractionSample opaqueSurfaceMaterialInteractionCalcDiffuseReflectionSample(
   OpaqueSurfaceMaterialInteraction opaqueSurfaceMaterialInteraction,
   inout RNG randomState,
@@ -1010,54 +1013,37 @@ SurfaceMaterialInteractionSample opaqueSurfaceMaterialInteractionCalcDiffuseRefl
 
   const f16vec3 outputDirection = quaternionTransformVector(worldToTangentSpaceQuaternion, minimalRayInteraction.viewDirection);
 
-  f16vec3 inputDirection;
+  // Sample a new input direction based on a cosine weighted hemisphere
+
+  const f16vec2 u = f16vec2(getNextSampleBlueNoise(randomState), getNextSampleBlueNoise(randomState));
   float cosineHemisphereSolidAnglePdf;
-  float16_t /* l.v */ inputDirectionDotOutputDirection;
-  float16_t /* n.l */ normalDotInputDirection;
-  float16_t /* n.v */ normalDotOutputDirection;
-  float16_t /* n.h */ normalDotMicrofacetNormal;
-
-  // Random sampling may end up with an invalid sample, which will contribute 0 radiance to the output and bias the result to be darker
-  // Apply rejection sampling on top by taking a few random sample attempts to find a valid sample instead
-  // Max attempts set empirically, 2 attempts cleaned up 99%+ invalid samples, setting this to 5 was a safe upper bound to handle most dead samples
-  int32_t sampleAttemptsRemaining = 5; 
-
-#if USE_REJECTION_SAMPLING 
-  while (sampleAttemptsRemaining-- > 0)
-#else
-  sampleAttemptsRemaining = -1;   // Mark sample as invalid - it's updated below
-#endif
-  {
-    // Sample a new input direction based on a cosine weighted hemisphere
+  const f16vec3 inputDirection = calcCosineHemisphereDirectionSample(u, cosineHemisphereSolidAnglePdf);
 
-    const f16vec2 u = f16vec2(getNextSampleBlueNoise(randomState), getNextSampleBlueNoise(randomState));
-    inputDirection = calcCosineHemisphereDirectionSample(u, cosineHemisphereSolidAnglePdf);
+  // Todo: Sanity check if inputDirection is facing the wrong direction (Into the surface), currently this does not have much
+  // effect and getting the triangle normal here requires a fairly high memory bandwidth cost, so for now it is not done
 
-    // Todo: Sanity check if inputDirection is facing the wrong direction (Into the surface), currently this does not have much
-    // effect and getting the triangle normal here requires a fairly high memory bandwidth cost, so for now it is not done
+  // Calculate the microfacet normal from the input and output directions
 
-    // Calculate the microfacet normal from the input and output directions
+  const f16vec3 microfacetNormal = normalize(outputDirection + inputDirection);
 
-    const f16vec3 microfacetNormal = normalize(outputDirection + inputDirection);
+  // Calculate dot products used for evaluation
+  // Note: Dot products against tangent space basis vectors are single components of the other vector
 
-    // Calculate dot products used for evaluation
-    // Note: Dot products against tangent space basis vectors are single components of the other vector
+  const float16_t /* l.v */ inputDirectionDotOutputDirection = dot(inputDirection, outputDirection);
+  const float16_t /* n.l */ normalDotInputDirection = inputDirection.z;
+  const float16_t /* n.v */ normalDotOutputDirection = outputDirection.z;
+  const float16_t /* n.h */ normalDotMicrofacetNormal = microfacetNormal.z;
 
-    /* l.v */ inputDirectionDotOutputDirection = dot(inputDirection, outputDirection);
-    /* n.l */ normalDotInputDirection = inputDirection.z;
-    /* n.v */ normalDotOutputDirection = outputDirection.z;
-    /* n.h */ normalDotMicrofacetNormal = microfacetNormal.z;
+  // Check if the sampled direction is valid
+  // Note: Also checking the output direction here, in theory this should be valid with pure triangle normals, but normal maps
+  // allow for surfaces to be viewed from outside the actual normal's hemisphere, resulting in an invalid sample.
+  // Note: Input direction check here is currently not needed, diffuse sampling should always produce sampled directions valid within
+  // the normal's hemisphere, but just kept here in case the sampling changes some day.
 
-    if (normalDotOutputDirection > 0.h && normalDotInputDirection > 0.h)
-#if USE_REJECTION_SAMPLING 
-      break;
-#else
-      sampleAttemptsRemaining = 1;
-#endif      
-  }
-  
-  // Failed to find a valid sample
-  if (sampleAttemptsRemaining == -1)
+  // Note: < used rather than <= as parallel vectors to the triangle normal can be considered in the hemisphere without colliding
+  // with the triangle. Convention can vary but in this case cosine sampling can produce parallel directions like this at times
+  // so it's better to not reject those samples.
+  if (normalDotOutputDirection < 0.0h || normalDotInputDirection < 0.0h)
   {
     SurfaceMaterialInteractionSample materialSample;
 
@@ -1081,8 +1067,7 @@ SurfaceMaterialInteractionSample opaqueSurfaceMaterialInteractionCalcDiffuseRefl
     evalHammonDiffuse(opaqueSurfaceMaterialInteraction.albedo, opaqueSurfaceMaterialInteraction.isotropicRoughness,
       inputDirectionDotOutputDirection, normalDotInputDirection,
       normalDotOutputDirection, normalDotMicrofacetNormal) * lambertTransmissionWeight;
-  f16vec3 throughput = safePositiveDivide(weight, f16vec3(cosineHemisphereSolidAnglePdf), materialEpsilon) *
-    normalDotInputDirection;
+  const f16vec3 throughput = safePositiveDivide(weight, f16vec3(cosineHemisphereSolidAnglePdf), materialEpsilon) * normalDotInputDirection;
 
   // Return the material sample
 
@@ -1120,69 +1105,61 @@ SurfaceMaterialInteractionSample opaqueSurfaceMaterialInteractionCalcSpecularRef
   const float lobeTrimming = calculateLobeTrimmingFactor(
     opaqueSurfaceMaterialInteraction.isotropicRoughness, cb.primaryIndirectNrd.specularLobeTrimmingParams);
 
-  f16vec3 inputDirection;
-
-  float16_t /* v.h */ outputDirectionDotMicrofacetNormal;
-  float16_t /* t.v */ tangentDotOutputDirection;
-  float16_t /* b.v */ bitangentDotOutputDirection;
-  float16_t /* n.v */ normalDotOutputDirection;
-  float16_t /* t.l */ tangentDotInputDirection;
-  float16_t /* b.l */ bitangentDotInputDirection;
-  float16_t /* n.l */ normalDotInputDirection;
-  float16_t /* t.h */ tangentDotMicrofacetNormal;
-  float16_t /* b.h */ bitangentDotMicrofacetNormal;
-  float16_t /* n.h */ normalDotMicrofacetNormal;
-  
-  // Random sampling may end up with an invalid sample, which will contribute 0 radiance to the output and bias the result to be darker
-  // Apply rejection sampling on top by taking a few random sample attempts to find a valid sample instead
-  // Max attempts set empirically, 3-4 attempts cleaned up 95%+ invalid samples
-  // After 10 attempts there were only a handful pixels left with invalid samples, setting this to 15 was a safe upper bound to handle most pixels
-  int32_t sampleAttemptsRemaining = 15; 
-
-#if USE_REJECTION_SAMPLING
-  while (sampleAttemptsRemaining-- > 0)
+  // Sample a new microfacet normal based on the distribution of visible normals
+
+  const f16vec2 u = f16vec2(
+    lobeTrimming * getNextSampleBlueNoise(randomState),
+    getNextSampleBlueNoise(randomState));
+
+#if OPAQUE_MATERIAL_USE_BOUNDED_VNDF_SAMPLING
+  // Note: Manually calculate the lower bound scaling constant rather than using the usual calcGGXBoundedVisibleNormalDistributionSample
+  // helper function. This is done so that this k value only needs to be computed once, as it is also needed for how the PDF is calculated
+  // later (which is worked into the throughput math here).
+  const float16_t k = calcBoundedVNDFLowerBoundScalar(opaqueSurfaceMaterialInteraction.anisotropicRoughness, outputDirection);
+
+  const f16vec3 microfacetNormal = calcGGXVisibleNormalDistributionSphericalCapSample(
+    opaqueSurfaceMaterialInteraction.anisotropicRoughness,
+    outputDirection,
+    u,
+    k);
 #else
-  sampleAttemptsRemaining = -1;   // Mark sample as invalid - it's updated below
+  const f16vec3 microfacetNormal = calcGGXVisibleNormalDistributionSample(
+    opaqueSurfaceMaterialInteraction.anisotropicRoughness,
+    outputDirection,
+    u);
 #endif
-  {
-    // Sample a new microfacet normal based on the distribution of visible normals
 
-    const f16vec2 u = f16vec2(
-      lobeTrimming * getNextSampleBlueNoise(randomState), 
-      getNextSampleBlueNoise(randomState));
-    const f16vec3 microfacetNormal = calcGGXVisibleNormalDistributionSample(opaqueSurfaceMaterialInteraction.anisotropicRoughness, outputDirection, u);
+  // Reflect the output direction across the microfacet normal to get the input direction
 
-    // Reflect the output direction across the microfacet normal to get the input direction
+  const f16vec3 inputDirection = reflect(-outputDirection, microfacetNormal);
 
-    inputDirection = reflect(-outputDirection, microfacetNormal);
+  // Todo: Sanity check if inputDirection is facing the wrong direction (Into the surface), currently this does not have much
+  // effect and getting the triangle normal here requires a fairly high memory bandwidth cost, so for now it is not done
 
-    // Todo: Sanity check if inputDirection is facing the wrong direction (Into the surface), currently this does not have much
-    // effect and getting the triangle normal here requires a fairly high memory bandwidth cost, so for now it is not done
+  // Calculate dot products used for evaluation
+  // Note: Dot products against tangent space basis vectors are single components of the other vector
 
-    // Calculate dot products used for evaluation
-    // Note: Dot products against tangent space basis vectors are single components of the other vector
+  const float16_t /* v.h */ outputDirectionDotMicrofacetNormal = dot(outputDirection, microfacetNormal);
+  const float16_t /* t.v */ tangentDotOutputDirection = outputDirection.x;
+  const float16_t /* b.v */ bitangentDotOutputDirection = outputDirection.y;
+  const float16_t /* n.v */ normalDotOutputDirection = outputDirection.z;
+  const float16_t /* t.l */ tangentDotInputDirection = inputDirection.x;
+  const float16_t /* b.l */ bitangentDotInputDirection = inputDirection.y;
+  const float16_t /* n.l */ normalDotInputDirection = inputDirection.z;
+  const float16_t /* t.h */ tangentDotMicrofacetNormal = microfacetNormal.x;
+  const float16_t /* b.h */ bitangentDotMicrofacetNormal = microfacetNormal.y;
+  const float16_t /* n.h */ normalDotMicrofacetNormal = microfacetNormal.z;
 
-    /* v.h */ outputDirectionDotMicrofacetNormal = dot(outputDirection, microfacetNormal);
-    /* t.v */ tangentDotOutputDirection = outputDirection.x;
-    /* b.v */ bitangentDotOutputDirection = outputDirection.y;
-    /* n.v */ normalDotOutputDirection = outputDirection.z;
-    /* t.l */ tangentDotInputDirection = inputDirection.x;
-    /* b.l */ bitangentDotInputDirection = inputDirection.y;
-    /* n.l */ normalDotInputDirection = inputDirection.z;
-    /* t.h */ tangentDotMicrofacetNormal = microfacetNormal.x;
-    /* b.h */ bitangentDotMicrofacetNormal = microfacetNormal.y;
-    /* n.h */ normalDotMicrofacetNormal = microfacetNormal.z;
+  // Check if the sampled direction is valid
+  // Note: Also checking the output direction here, in theory this should be valid with pure triangle normals, but normal maps
+  // allow for surfaces to be viewed from outside the actual normal's hemisphere, resulting in an invalid sample. Additionally,
+  // while bounded VNDF sampling does not produce any invalid directions outside the normal's hemisphere in normal cases, it
+  // is not tightly bounded in cases with anisotropy, so this check is still needed. If a tighter bound is found in the future
+  // which handles anisotropy as well though, the sampled reflected direction check may be able to be removed.
 
-    if (normalDotOutputDirection > 0.h && normalDotInputDirection > 0.h)
-#if USE_REJECTION_SAMPLING 
-      break;
-#else
-      sampleAttemptsRemaining = 1;
-#endif      
-  }
-   
-  // Failed to find a valid sample
-  if (sampleAttemptsRemaining == -1)
+  // Note: < used rather than <= as parallel vectors to the triangle normal can be considered in the hemisphere without colliding
+  // with the triangle. Convention can vary but using this here to be consistent with diffuse sampling.
+  if (normalDotOutputDirection < 0.0h || normalDotInputDirection < 0.0h)
   {
     SurfaceMaterialInteractionSample materialSample;
 
@@ -1216,30 +1193,66 @@ SurfaceMaterialInteractionSample opaqueSurfaceMaterialInteractionCalcSpecularRef
   const f16vec3 finalFresnel = fresnel;
 #endif
 
+#if OPAQUE_MATERIAL_USE_BOUNDED_VNDF_SAMPLING
+  // Note: More typical G2/G1 calculation with special bounded G1 using the bounded scalar k calculated earlier for sampling
+  // the bounded VNDF lobe. This must be done instead of the usual G2/G1 optimization as the bounding of G1 does not allow for
+  // the same mathematical optimization to be performed.
+  const float16_t G2 = evalHeightCorrelatedGGXShadowingMasking(
+    opaqueSurfaceMaterialInteraction.anisotropicRoughness,
+    tangentDotInputDirection, tangentDotOutputDirection,
+    bitangentDotInputDirection, bitangentDotOutputDirection,
+    normalDotInputDirection, normalDotOutputDirection);
+  const float16_t G1 = evalGGXShadowing(
+    opaqueSurfaceMaterialInteraction.anisotropicRoughness,
+    tangentDotOutputDirection, bitangentDotOutputDirection, normalDotOutputDirection,
+    k);
+  const float16_t G2OverG1 = safePositiveDivide(G2, G1, materialEpsilon);
+#else
+  // Note: Algebraic simplification of height correlated G2/G1 to reduce evaluation cost. Could be simplified a bit further
+  // I think however by calculating lambda for the input/output directions and evaluating (1 + L(O)) / (1 + L(O) + L(I)) as
+  // this would avoid a multiplication, but may not benefit from the somewhat simplified G1 form itself. Also note that the paper
+  // this is from uses w_i for the view direction (output direction in our case) and w_o for the light direction which can be a bit
+  // confusing since it is backwards from our notation.
+  // [Heitz 2015, "Implementing a Simple Anisotropic Rough Diffuse Material with Stochastic Evaluation"]
   const float16_t outputGGXShadowing = evalGGXShadowing(
     opaqueSurfaceMaterialInteraction.anisotropicRoughness,
     tangentDotOutputDirection, bitangentDotOutputDirection, normalDotOutputDirection);
   const float16_t inputGGXShadowing = evalGGXShadowing(
     opaqueSurfaceMaterialInteraction.anisotropicRoughness,
     tangentDotInputDirection, bitangentDotInputDirection, normalDotInputDirection);
-  // Note: Algebraic simplification of height correlated G2/G1 to reduce evaluation cost.
-  // [Heitz 2015, "Implementing a Simple Anisotropic Rough Diffuse Material with Stochastic Evaluation"]
   const float16_t G2OverG1 = safePositiveDivide(
     inputGGXShadowing,
     inputGGXShadowing + outputGGXShadowing - (inputGGXShadowing * outputGGXShadowing),
     materialEpsilon);
+#endif
   // Note: Simplified version of (weight / solidAnglePdf) * (n.l) when sampling from the distribution of
   // visible normals.
   // [Heitz 2014, "Importance sampling microfacet-based BSDFs using the distribution of visible normals"]
-  const f16vec3 throughput = finalFresnel * G2OverG1; 
+  const f16vec3 throughput = finalFresnel * G2OverG1;
 
   // Calculate the solid angle PDF of the sample
-
+  // Note: Ideally the PDF wouldn't be needed here as it's already been baked into the throughput for a simpler calculation
+  // but some logic needs the PDF so it is included here for those cases (whereas others can optimize this part out).
+  // Todo: Reuse the G1 calculation from earlier in this PDF rather than redundantly recalculating it (as G1
+  // is part of the VNDF PDF).
+
+#if OPAQUE_MATERIAL_USE_BOUNDED_VNDF_SAMPLING
+  // Note: Normal version instead of evalGGXBoundedVisibleNormalDistributionSamplePdf used here so that the shared k
+  // value computed from earlier can be used here to save some computation. Note that really the entire G1 function
+  // evaluation can be shared between the throughput calculation and the PDF here.
+  const float solidAnglePdf = evalGGXVisibleNormalDistributionSamplePdf(
+    opaqueSurfaceMaterialInteraction.anisotropicRoughness,
+    tangentDotOutputDirection, bitangentDotOutputDirection, normalDotOutputDirection,
+    tangentDotMicrofacetNormal, bitangentDotMicrofacetNormal, normalDotMicrofacetNormal,
+    outputDirectionDotMicrofacetNormal,
+    k);
+#else
   const float solidAnglePdf = evalGGXVisibleNormalDistributionSamplePdf(
     opaqueSurfaceMaterialInteraction.anisotropicRoughness,
     tangentDotOutputDirection, bitangentDotOutputDirection, normalDotOutputDirection,
     tangentDotMicrofacetNormal, bitangentDotMicrofacetNormal, normalDotMicrofacetNormal,
     outputDirectionDotMicrofacetNormal);
+#endif
 
   // Return the material sample
 
@@ -1466,7 +1479,7 @@ SurfaceMaterialInteractionPSRSample opaqueSurfaceMaterialInteractionCalcPSRRefle
   const float16_t /* n.v */ normalDotOutputDirection = outputDirectionDotMicrofacetNormal;
   const float16_t /* n.l */ normalDotInputDirection = dot(inputDirection, normal);
 
-  if (normalDotOutputDirection <= float16_t(0.0) || normalDotInputDirection <= float16_t(0.0))
+  if (normalDotOutputDirection < 0.0h || normalDotInputDirection < 0.0h)
   {
     SurfaceMaterialInteractionPSRSample materialPSRSample;
 
@@ -1571,8 +1584,8 @@ float opaqueSurfaceMaterialInteractionCalcDiffuseReflectionSolidAnglePdf(
   MinimalRayInteraction minimalRayInteraction,
   f16vec3 inputDirection)
 {
-  float cosTheta = dot(inputDirection, opaqueSurfaceMaterialInteraction.shadingNormal);
-  float solidAnglePdf = getCosineHemisphereSolidAnglePdf(saturate(cosTheta));
+  const float cosTheta = dot(inputDirection, opaqueSurfaceMaterialInteraction.shadingNormal);
+  const float solidAnglePdf = getCosineHemisphereSolidAnglePdf(saturate(cosTheta));
 
   return solidAnglePdf;
 }
@@ -1582,58 +1595,56 @@ float opaqueSurfaceMaterialInteractionCalcSpecularReflectionSolidAnglePdf(
   MinimalRayInteraction minimalRayInteraction,
   f16vec3 worldInputDirection)
 {
-
   const f16vec4 tangentToWorldSpaceQuaternion =
     quaternionCreateOrientation(materialTangentSpaceNormal, opaqueSurfaceMaterialInteraction.shadingNormal);
   const f16vec4 worldToTangentSpaceQuaternion = quaternionInverse(tangentToWorldSpaceQuaternion);
 
   // Set up relevant input vectors in tangent space
+
   const f16vec3 outputDirection = quaternionTransformVector(worldToTangentSpaceQuaternion, minimalRayInteraction.viewDirection);
   const f16vec3 inputDirection = quaternionTransformVector(worldToTangentSpaceQuaternion, worldInputDirection);
 
-  // Calculate the specular lobe trimming factor based on roughness and NRD parameters.
-  // Note: use the 'primaryIndirectNrd' parameter set because it is always controlled by the active indirect denoiser.
-  // TODO: ideally this lobe trimming should only happen at the primary vertex, but there's likely no big difference.
+  // Calculate the microfacet normal
 
-  float16_t /* v.h */ outputDirectionDotMicrofacetNormal;
-  float16_t /* t.v */ tangentDotOutputDirection;
-  float16_t /* b.v */ bitangentDotOutputDirection;
-  float16_t /* n.v */ normalDotOutputDirection;
-  float16_t /* t.l */ tangentDotInputDirection;
-  float16_t /* b.l */ bitangentDotInputDirection;
-  float16_t /* n.l */ normalDotInputDirection;
-  float16_t /* t.h */ tangentDotMicrofacetNormal;
-  float16_t /* b.h */ bitangentDotMicrofacetNormal;
-  float16_t /* n.h */ normalDotMicrofacetNormal;
-  
   const f16vec3 microfacetNormal = normalize(inputDirection + outputDirection);
 
-  // Todo: Sanity check if inputDirection is facing the wrong direction (Into the surface), currently this does not have much
-  // effect and getting the triangle normal here requires a fairly high memory bandwidth cost, so for now it is not done
-
   // Calculate dot products used for evaluation
   // Note: Dot products against tangent space basis vectors are single components of the other vector
 
-  /* v.h */ outputDirectionDotMicrofacetNormal = dot(outputDirection, microfacetNormal);
-  /* t.v */ tangentDotOutputDirection = outputDirection.x;
-  /* b.v */ bitangentDotOutputDirection = outputDirection.y;
-  /* n.v */ normalDotOutputDirection = outputDirection.z;
-  /* t.l */ tangentDotInputDirection = inputDirection.x;
-  /* b.l */ bitangentDotInputDirection = inputDirection.y;
-  /* n.l */ normalDotInputDirection = inputDirection.z;
-  /* t.h */ tangentDotMicrofacetNormal = microfacetNormal.x;
-  /* b.h */ bitangentDotMicrofacetNormal = microfacetNormal.y;
-  /* n.h */ normalDotMicrofacetNormal = microfacetNormal.z;
+  const float16_t /* v.h */ outputDirectionDotMicrofacetNormal = dot(outputDirection, microfacetNormal);
+  const float16_t /* t.v */ tangentDotOutputDirection = outputDirection.x;
+  const float16_t /* b.v */ bitangentDotOutputDirection = outputDirection.y;
+  const float16_t /* n.v */ normalDotOutputDirection = outputDirection.z;
+  const float16_t /* t.l */ tangentDotInputDirection = inputDirection.x;
+  const float16_t /* b.l */ bitangentDotInputDirection = inputDirection.y;
+  const float16_t /* n.l */ normalDotInputDirection = inputDirection.z;
+  const float16_t /* t.h */ tangentDotMicrofacetNormal = microfacetNormal.x;
+  const float16_t /* b.h */ bitangentDotMicrofacetNormal = microfacetNormal.y;
+  const float16_t /* n.h */ normalDotMicrofacetNormal = microfacetNormal.z;
+
+  // Todo: Sanity check if inputDirection is facing the wrong direction (Into the surface), currently this does not have much
+  // effect and getting the triangle normal here requires a fairly high memory bandwidth cost, so for now it is not done
 
-  if (normalDotOutputDirection <= 0.h || normalDotInputDirection <= 0.h)
-    return 0;
+  if (normalDotOutputDirection < 0.0h || normalDotInputDirection < 0.0h)
+  {
+    return 0.0f;
+  }
 
   // Calculate the solid angle PDF of the sample
+
+#if OPAQUE_MATERIAL_USE_BOUNDED_VNDF_SAMPLING
+  const float solidAnglePdf = evalGGXBoundedVisibleNormalDistributionSamplePdf(
+    opaqueSurfaceMaterialInteraction.anisotropicRoughness,
+    tangentDotOutputDirection, bitangentDotOutputDirection, normalDotOutputDirection,
+    tangentDotMicrofacetNormal, bitangentDotMicrofacetNormal, normalDotMicrofacetNormal,
+    outputDirectionDotMicrofacetNormal);
+#else
   const float solidAnglePdf = evalGGXVisibleNormalDistributionSamplePdf(
     opaqueSurfaceMaterialInteraction.anisotropicRoughness,
     tangentDotOutputDirection, bitangentDotOutputDirection, normalDotOutputDirection,
     tangentDotMicrofacetNormal, bitangentDotMicrofacetNormal, normalDotMicrofacetNormal,
     outputDirectionDotMicrofacetNormal);
+#endif
 
   return solidAnglePdf;
 }
@@ -1643,8 +1654,8 @@ float opaqueSurfaceMaterialInteractionCalcDiffuseTransmissionSolidAnglePdf(
   MinimalRayInteraction minimalRayInteraction,
   f16vec3 inputDirection)
 {
-  float cosTheta = dot(-inputDirection, opaqueSurfaceMaterialInteraction.shadingNormal);
-  float solidAnglePdf = getCosineHemisphereSolidAnglePdf(saturate(cosTheta));
+  const float cosTheta = dot(-inputDirection, opaqueSurfaceMaterialInteraction.shadingNormal);
+  const float solidAnglePdf = getCosineHemisphereSolidAnglePdf(saturate(cosTheta));
 
   return solidAnglePdf;
 }
@@ -1661,13 +1672,13 @@ float opaqueSurfaceMaterialInteractionCalcSolidAnglePdf(
   float16_t specularReflectionProbability = 0.0f;
   float16_t opacityTransmissionProbability = 0.0f;
   float16_t diffuseTransmissionProbability = 0.0f;
-  bool isValid = opaqueSurfaceMaterialInteractionCalcLobeProbability(
+  const bool isValid = opaqueSurfaceMaterialInteractionCalcLobeProbability(
     opaqueSurfaceMaterialInteraction, minimalRayInteraction.viewDirection,
     diffuseReflectionProbability, specularReflectionProbability, opacityTransmissionProbability, diffuseTransmissionProbability);
 
-  float diffuseSolidAnglePdf = opaqueSurfaceMaterialInteractionCalcDiffuseReflectionSolidAnglePdf(opaqueSurfaceMaterialInteraction, minimalRayInteraction, inputDirection);
-  float specularSolidAnglePdf = opaqueSurfaceMaterialInteractionCalcSpecularReflectionSolidAnglePdf(opaqueSurfaceMaterialInteraction, minimalRayInteraction, inputDirection);
-  float diffuseTransmissionSolidAnglePdf = opaqueSurfaceMaterialInteractionCalcDiffuseTransmissionSolidAnglePdf(opaqueSurfaceMaterialInteraction, minimalRayInteraction, inputDirection);
+  const float diffuseSolidAnglePdf = opaqueSurfaceMaterialInteractionCalcDiffuseReflectionSolidAnglePdf(opaqueSurfaceMaterialInteraction, minimalRayInteraction, inputDirection);
+  const float specularSolidAnglePdf = opaqueSurfaceMaterialInteractionCalcSpecularReflectionSolidAnglePdf(opaqueSurfaceMaterialInteraction, minimalRayInteraction, inputDirection);
+  const float diffuseTransmissionSolidAnglePdf = opaqueSurfaceMaterialInteractionCalcDiffuseTransmissionSolidAnglePdf(opaqueSurfaceMaterialInteraction, minimalRayInteraction, inputDirection);
 
   return diffuseSolidAnglePdf * diffuseReflectionProbability +
          specularSolidAnglePdf * specularReflectionProbability +
@@ -1696,14 +1707,14 @@ SurfaceMaterialInteractionSplitWeight opaqueSurfaceMaterialInteractionCalcApprox
     diffuseTransmissionWeight = subsurfaceMaterial.singleScatteringAlbedo;
   }
 
-  if (normalDotOutputDirection <= float16_t(0.0) || normalDotInputDirection <= float16_t(0.0))
+  if (normalDotOutputDirection < 0.0h || normalDotInputDirection < 0.0h)
   {
     SurfaceMaterialInteractionSplitWeight splitWeight;
 
     splitWeight.diffuseReflectionWeight = f16vec3(0.0, 0.0, 0.0);
     splitWeight.specularReflectionWeight = f16vec3(0.0, 0.0, 0.0);
 
-    if (normalDotOutputDirection <= float16_t(0.0))
+    if (normalDotOutputDirection < 0.0h)
     {
       splitWeight.diffuseTransmissionWeight = f16vec3(0.0, 0.0, 0.0);
     }
@@ -1780,9 +1791,10 @@ SurfaceMaterialInteractionSplitWeight opaqueSurfaceMaterialInteractionCalcProjec
       inputDirectionDotOutputDirection);
   }
 
-  if (normalDotOutputDirection <= 0.0h || normalDotInputDirection <= 0.0h)
+  if (normalDotOutputDirection < 0.0h || normalDotInputDirection < 0.0h)
   {
     SurfaceMaterialInteractionSplitWeight splitWeight;
+
     splitWeight.diffuseReflectionWeight = f16vec3(0.0, 0.0, 0.0);
     splitWeight.specularReflectionWeight = f16vec3(0.0, 0.0, 0.0);
     splitWeight.diffuseTransmissionWeight = diffuseTransmissionWeight * transmissionNormalDotInputDirection;
diff --git a/src/dxvk/shaders/rtx/utility/brdf.slangh b/src/dxvk/shaders/rtx/utility/brdf.slangh
index 2659791fb..a768ea858 100644
--- a/src/dxvk/shaders/rtx/utility/brdf.slangh
+++ b/src/dxvk/shaders/rtx/utility/brdf.slangh
@@ -1,5 +1,5 @@
 /*
-* Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
+* Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -23,6 +23,7 @@
 
 #include "rtx/utility/math.slangh"
 #include "rtx/utility/color.slangh"
+#include "rtx/utility/sampling.slangh"
 #include "rtx/concept/surface_material/surface_material.h"
 
 // Surface/Volume Material Constants
@@ -311,10 +312,13 @@ float evalGGXNormalDistributionIsotropic(float16_t isotropicRoughness,
 
 // Shadowing (G1): Smith Anisotropic GGX
 // [Heitz 2014, "Understanding the Masking-Shadowing Function in Microfacet Based BRDFs"]
+// Note: Augmented with k parameter for lower bound scaling for usage with bounded VNDF sampling
+// PDF calculations and is unused in more typical cases.
 float16_t evalGGXShadowing(f16vec2 anisotropicRoughness,
                            float16_t /* t.d */ tangentDotDirection,
                            float16_t /* b.d */ bitangentDotDirection,
-                           float16_t /* n.d */ normalDotDirection)
+                           float16_t /* n.d */ normalDotDirection,
+                           float16_t k = 1.0h)
 {
   const f16vec3 directionDenominatorVector =
     f16vec3(normalDotDirection,
@@ -322,8 +326,16 @@ float16_t evalGGXShadowing(f16vec2 anisotropicRoughness,
             anisotropicRoughness.y * bitangentDotDirection);
   const float16_t sqrtDirectionDenominator = length(directionDenominatorVector);
 
+  // Note: This is part of the conditional logic for Listing 2 in "Bounded VNDF Sampling for Smith-GGX Reflections"
+  // for use with bounded VNDF sampling, but integrated into the G1 function (as this is where it seems to fit given the
+  // paper, but it is not 100% clear as the paper does not explicitly have the G function written out in the PDF).
+  // When k = 1 (default) this line is equal to normalDotDirection.
+  // Note also that > is used here instead of >= like in the paper as Listing 2 is inconsistent with
+  // Listing 1 and how a backfacing shading normal is defined (defined as z <= 0 in the paper, not z < 0).
+  const float16_t b = normalDotDirection > 0.0h ? k * normalDotDirection : normalDotDirection;
+
   return safePositiveDivide(
-    float16_t(2.0) * normalDotDirection, normalDotDirection + sqrtDirectionDenominator, materialEpsilon);
+    float16_t(2.0) * normalDotDirection, b + sqrtDirectionDenominator, materialEpsilon);
 }
 
 // Shadowing/Masking (G2): Anisotropic Height-correlated GGX
@@ -393,16 +405,19 @@ float16_t evalGGXVisibleNormalDistribution(f16vec2 anisotropicRoughness,
                                            float16_t /* t.h */ tangentDotMicrofacetNormal,
                                            float16_t /* b.h */ bitangentDotMicrofacetNormal,
                                            float16_t /* n.h */ normalDotMicrofacetNormal,
-                                           float16_t /* v.h */ outputDirectionDotMicrofacetNormal)
+                                           float16_t /* v.h */ outputDirectionDotMicrofacetNormal,
+                                           float16_t k = 1.0h)
 {
   // Note: Multiply all components of the numerator together in advance to ideally result in as small a value as is possible before it
   // may need to be clamped by saferPositiveDivide to avoid overflows (this assumes the terms other than the normal distribution function
   // will be <1, which I think is true for the GGX shadowing function and of course the dot product).
   const float16_t numerator =
-    evalGGXShadowing(anisotropicRoughness, tangentDotOutputDirection, bitangentDotOutputDirection, normalDotOutputDirection) *
+    evalGGXShadowing(anisotropicRoughness, tangentDotOutputDirection, bitangentDotOutputDirection, normalDotOutputDirection, k) *
     max(float16_t(0.0), outputDirectionDotMicrofacetNormal) *
     evalGGXNormalDistribution(anisotropicRoughness, tangentDotMicrofacetNormal, bitangentDotMicrofacetNormal, normalDotMicrofacetNormal);
 
+  // Note: There is some optimization potential here, normalDotOutputDirection cancels out with a term in the G1 function used in the numerator
+  // so if this was written differently it may save a few operations. Done like this though for now just for less redundant implementations.
   return saferPositiveDivide(numerator, normalDotOutputDirection, materialEpsilon, materialMaximum);
 }
 
@@ -418,25 +433,59 @@ float evalGGXVisibleNormalDistributionSamplePdf(f16vec2 anisotropicRoughness,
                                                 float16_t /* t.h */ tangentDotMicrofacetNormal,
                                                 float16_t /* b.h */ bitangentDotMicrofacetNormal,
                                                 float16_t /* n.h */ normalDotMicrofacetNormal,
-                                                float16_t /* v.h */ outputDirectionDotMicrofacetNormal)
+                                                float16_t /* v.h */ outputDirectionDotMicrofacetNormal,
+                                                float16_t k = 1.0h)
 {
   const float16_t ggxVisibleNormalDistribution = evalGGXVisibleNormalDistribution(
     anisotropicRoughness,
     tangentDotOutputDirection, bitangentDotOutputDirection, normalDotOutputDirection,
     tangentDotMicrofacetNormal, bitangentDotMicrofacetNormal, normalDotMicrofacetNormal,
-    outputDirectionDotMicrofacetNormal);
+    outputDirectionDotMicrofacetNormal, k);
 
   return safePositiveDivide(
     float(ggxVisibleNormalDistribution), 4.0f * float(outputDirectionDotMicrofacetNormal), float(materialEpsilon));
 }
 
-/// Calculates Beer-Lambert attenuation at a specified distance through a medium with a specified attenuation coefficient.
+// [Eto 2023, "Bounded VNDF Sampling for Smith-GGX Reflections"]
+// Note: Similar to evalGGXVisibleNormalDistributionSamplePdf but to be used with bounded VNDF sampling only.
+float evalGGXBoundedVisibleNormalDistributionSamplePdf(f16vec2 anisotropicRoughness,
+                                                       float16_t /* t.v */ tangentDotOutputDirection,
+                                                       float16_t /* b.v */ bitangentDotOutputDirection,
+                                                       float16_t /* n.v */ normalDotOutputDirection,
+                                                       float16_t /* t.h */ tangentDotMicrofacetNormal,
+                                                       float16_t /* b.h */ bitangentDotMicrofacetNormal,
+                                                       float16_t /* n.h */ normalDotMicrofacetNormal,
+                                                       float16_t /* v.h */ outputDirectionDotMicrofacetNormal)
+{
+  // Calculate the lower bound scaling constant
+  // Note: Output direction dotted with tangent frame is the output direction in tangent space, which is what
+  // this function accepts.
+
+  const float16_t k = calcBoundedVNDFLowerBoundScalar(anisotropicRoughness, f16vec3(
+    tangentDotOutputDirection,
+    bitangentDotOutputDirection,
+    normalDotOutputDirection
+  ));
+
+  return evalGGXVisibleNormalDistributionSamplePdf(
+    anisotropicRoughness,
+    tangentDotOutputDirection,
+    bitangentDotOutputDirection,
+    normalDotOutputDirection,
+    tangentDotMicrofacetNormal,
+    bitangentDotMicrofacetNormal,
+    normalDotMicrofacetNormal,
+    outputDirectionDotMicrofacetNormal,
+    k);
+}
+
+// Calculates Beer-Lambert attenuation at a specified distance through a medium with a specified attenuation coefficient.
 f16vec3 evalBeerLambertAttenuation(in const vec3 attenuationCoefficient, in const float16_t distance)
 {
   return exp(-attenuationCoefficient * distance);
 }
 
-/// Calculates Beer-Lambert attenuation at an infinite distance through a medium with a specified attenuation coefficient.
+// Calculates Beer-Lambert attenuation at an infinite distance through a medium with a specified attenuation coefficient.
 f16vec3 evalBeerLambertInfiniteAttenuation(in const vec3 attenuationCoefficient)
 {
   // Note: Return a per-channel attenuation of 0 if any attenuation is desired due to infinite attenuation
diff --git a/src/dxvk/shaders/rtx/utility/sampling.slangh b/src/dxvk/shaders/rtx/utility/sampling.slangh
index 662c78549..e294f561e 100644
--- a/src/dxvk/shaders/rtx/utility/sampling.slangh
+++ b/src/dxvk/shaders/rtx/utility/sampling.slangh
@@ -166,12 +166,15 @@ f16vec3 calcUniformConeDirectionSample(f16vec2 rndSample, float cosThetaMax, ino
 }
 
 // [Heitz 2018, "Sampling the GGX Distribution of Visible Normals"]
+// Calculates a new direction based on the visible GGX normal distribution function for a given view direction using
+// the projective method (projecting a 2D sampled area on to the hemisphere). This method is slightly worse performance
+// wise than later methods (e.g. spherical cap based sampling).
 // Note: This function operates in tangent space so any vectors given to it or retreived from it should also be in
 // tangent space.
-f16vec3 calcGGXVisibleNormalDistributionSample(f16vec2 anisotropicRoughness, f16vec3 Ve, f16vec2 u)
+f16vec3 calcGGXVisibleNormalDistributionProjectedSample(f16vec2 anisotropicRoughness, f16vec3 Ve, f16vec2 u)
 {
   // Section 3.2: transforming the view direction to the hemisphere configuration
-  const f16vec3 Vh = normalize(f16vec3(anisotropicRoughness.x * Ve.x, anisotropicRoughness.y * Ve.y, Ve.z));
+  const f16vec3 Vh = normalize(f16vec3(anisotropicRoughness * Ve.xy, Ve.z));
 
   // Section 4.1: orthonormal basis
   // Note: This implementation deviates from the paper's by using Pixar's orthonormal basis function as this eliminates
@@ -193,11 +196,107 @@ f16vec3 calcGGXVisibleNormalDistributionSample(f16vec2 anisotropicRoughness, f16
   const f16vec3 Nh = t1 * T1 + t2 * T2 + sqrt(max(float16_t(0.0), float16_t(1.0 - (t1sq + t2sq)))) * Vh;
 
   // Section 3.4: transforming the normal back to the ellipsoid configuration
-  const f16vec3 Ne = normalize(f16vec3(anisotropicRoughness.x * Nh.x, anisotropicRoughness.y * Nh.y, max(float16_t(0.0), Nh.z)));
+  const f16vec3 Ne = normalize(f16vec3(anisotropicRoughness * Nh.xy, max(float16_t(0.0), Nh.z)));
 
   return Ne;
 }
 
+// [Dupuy 2023, "Sampling Visible GGX Normals with Spherical Caps"]
+// Calculates a new direction based on the visible GGX normal distribution function for a given view direction using
+// the spherical cap method. Lower bound scalar parameter should only be set to 1 in typical use cases, intended to be
+// set by different sampling methods using the general spherical cap method as a basis.
+// Note: This function operates in tangent space so any vectors given to it or retreived from it should also be in
+// tangent space.
+f16vec3 calcGGXVisibleNormalDistributionSphericalCapSample(f16vec2 anisotropicRoughness, f16vec3 outputDirection, f16vec2 u, float16_t k = 1.0h)
+{
+  // Unstretch the view direction back to a hemisphere
+  const f16vec3 unstretchedOutputDirection = normalize(f16vec3(
+    anisotropicRoughness * outputDirection.xy,
+    outputDirection.z
+  ));
+
+  // Note: This b is from Listing 1 in "Bounded VNDF Sampling for Smith-GGX Reflections" for use with bounded
+  // VNDF sampling. When k = 1 (default) this line is equal to unstretchedOutputDirection.z.
+  // Note: Probably could be >= 0 rather than > 0 to consider parallel view directions to the triangle as I do not think
+  // that would affect the math, but keeping it like this for now. If ever changed however, a similar condition exists
+  // in evalGGXShadowing which would also need to be updated.
+  const float16_t b = outputDirection.z > 0.0h ? k * unstretchedOutputDirection.z : unstretchedOutputDirection.z;
+
+  // Listing 3: Spherical cap sampling
+  const float16_t phi = float16_t(twoPi) * u.x;
+  const float16_t z = fma(1.0h - u.y, 1.0h + b, -b);
+  const float16_t sinTheta = sqrt(clamp(1.0h - z * z, 0.0f, 1.0f));
+  const float16_t x = sinTheta * cos(phi);
+  const float16_t y = sinTheta * sin(phi);
+  const f16vec3 c = f16vec3(x, y, z);
+  const f16vec3 unstretchedMicrofacetNormal = c + unstretchedOutputDirection;
+
+  // Stretch the microfacet normal back to the anisotropic ellipsoid
+  const f16vec3 microfacetNormal = normalize(f16vec3(
+    anisotropicRoughness * unstretchedMicrofacetNormal.xy,
+    unstretchedMicrofacetNormal.z
+  ));
+
+  return microfacetNormal;
+}
+
+// Note: This macro selects between different implementations of typical (non-bounded) VNDF sampling to validate
+// behavior and analyze performance. Current options are as follows:
+// 0 - Projected: Original method from 2018, used in Remix until late 2024.
+// 1 - Spherical Cap: Newer method from 2023, simpler implementation and better performance than projected.
+#define GGX_VNDF_SAMPLING_METHOD 1
+
+// Calculates a new direction based on the visible GGX normal distribution function for a given view direction using
+// the current selected method (via macro). All sampling methods this sampling function uses have the same PDF (which
+// can be calculated via evalGGXVisibleNormalDistributionSamplePdf).
+f16vec3 calcGGXVisibleNormalDistributionSample(f16vec2 anisotropicRoughness, f16vec3 outputDirection, f16vec2 u)
+{
+#if GGX_VNDF_SAMPLING_METHOD == 0
+  return calcGGXVisibleNormalDistributionProjectedSample(anisotropicRoughness, outputDirection, u);
+#elif GGX_VNDF_SAMPLING_METHOD == 1
+  return calcGGXVisibleNormalDistributionSphericalCapSample(anisotropicRoughness, outputDirection, u);
+#else
+  #error Invalid GGX VNDF sampling method.
+#endif
+}
+
+// [Eto 2023, "Bounded VNDF Sampling for Smith-GGX Reflections"]
+// Calculates the lower bound scaling constant to apply to bounded VNDF sampling, k, from Equation 5.
+float16_t calcBoundedVNDFLowerBoundScalar(f16vec2 anisotropicRoughness, f16vec3 outputDirection)
+{
+  const float16_t a = saturate(min(anisotropicRoughness.x, anisotropicRoughness.y));
+  // Note: Sign omitted from Equation 4 as this never applies when a <= 1 (which is always the case with our requirements on
+  // how roughness is parameterized).
+  const float16_t s = 1.0h + length(outputDirection.xy);
+  const float16_t a2 = square(a);
+  const float16_t s2 = square(s);
+  const float16_t k = (1.0h - a2) * s2 / (s2 + a2 * square(outputDirection.z));
+
+  return k;
+}
+
+// [Eto 2023, "Bounded VNDF Sampling for Smith-GGX Reflections"]
+// Calculates a new direction based on the visible GGX normal distribution function for a given view direction using
+// bounded VNDF sampling. Unlike normal VNDF sampling, this bounded approach eliminates many (not all) "invalid" microfacet
+// normals which when reflected across would result in a direction outside the visible hemisphere. This method should eliminate
+// most invalid samples for isotropic roughness, but depending on the output direction some samples will still be invalid, moreso
+// for anisotropic roughness where the lower bound is not the infimum with this approach (but still better than older approaches).
+// Do note that roughness values provided must be in the range [0, 1] due to assumptions made in some of the math. This can
+// be changed to allow unusual roughness values, but the lower bound is significantly worse if either roughness parameter is
+// greater than 1 anyways so it is not desirable.
+// Additionally, since this bounded VNDF sampling changes the sampled distribution, care must be taken to use the proper PDF
+// as the usual GGX VNDF PDF will not work for this.
+f16vec3 calcGGXBoundedVisibleNormalDistributionSample(f16vec2 anisotropicRoughness, f16vec3 outputDirection, f16vec2 u)
+{
+  // Calculate the lower bound scaling constant
+
+  const float16_t k = calcBoundedVNDFLowerBoundScalar(anisotropicRoughness, outputDirection);
+
+  // Sample the VNDF with a bounded spherical cap based on the scaling constant
+
+  return calcGGXVisibleNormalDistributionSphericalCapSample(anisotropicRoughness, outputDirection, u, k);
+}
+
 // Calculates the cosine of the angle CAB between spherical triangle points on the surface of a unit sphere A, B and C.
 float sphericalTriangleCosineAngle(vec3 A, vec3 B, vec3 C)
 {