From 28b71e7c859f8fb6e9f76798ac8feb0c39712315 Mon Sep 17 00:00:00 2001 From: sfriedmapixar Date: Thu, 15 Aug 2024 10:58:32 -0700 Subject: [PATCH] fix(batched): Assume BatchedRendererServices texture derivatives are in st space. (#1828) The convention in the single-point RendererServices is that the texture call returns derivatives in st space, and they are transformed to xy space before returning from the wrapper to RenderServices. This change makes BatchedRendererServices follow the same convention. --------- Signed-off-by: Stephen Friedman --- src/liboslexec/wide/wide_optexture.cpp | 223 +++++++++++++++++++++---- 1 file changed, 189 insertions(+), 34 deletions(-) diff --git a/src/liboslexec/wide/wide_optexture.cpp b/src/liboslexec/wide/wide_optexture.cpp index 63353be22..2f99d6af8 100644 --- a/src/liboslexec/wide/wide_optexture.cpp +++ b/src/liboslexec/wide/wide_optexture.cpp @@ -117,14 +117,6 @@ default_texture(BatchedRendererServices* bsr, ustring filename, has_derivs ? (float*)&dresultds_simd : NULL, has_derivs ? (float*)&dresultdt_simd : NULL); - OIIO::simd::vfloat4 dresultdx_simd; - OIIO::simd::vfloat4 dresultdy_simd; - if (has_derivs) { - // Correct our st texture space gradients into xy-space gradients - dresultdx_simd = dresultds_simd * dsdx + dresultdt_simd * dtdx; - dresultdy_simd = dresultds_simd * dsdy + dresultdt_simd * dtdy; - } - // NOTE: regardless of the value of "retVal" we will always copy over the texture system's results. // We are relying on the texture system properly filling in missing or fill colors @@ -142,10 +134,10 @@ default_texture(BatchedRendererServices* bsr, ustring filename, MaskedDx resultDx(resultRef); MaskedDy resultDy(resultRef); - resultDx[lane] = Color3(dresultdx_simd[0], dresultdx_simd[1], - dresultdx_simd[2]); - resultDy[lane] = Color3(dresultdy_simd[0], dresultdy_simd[1], - dresultdy_simd[2]); + resultDx[lane] = Color3(dresultds_simd[0], dresultds_simd[1], + dresultds_simd[2]); + resultDy[lane] = Color3(dresultdt_simd[0], dresultdt_simd[1], + dresultdt_simd[2]); } } else if (Masked::is(resultRef)) { alphaChannelIndex = 1; @@ -154,8 +146,8 @@ default_texture(BatchedRendererServices* bsr, ustring filename, MaskedDy resultDy(resultRef); result[lane] = result_simd[0]; if (resultRef.has_derivs()) { - resultDx[lane] = dresultdx_simd[0]; - resultDy[lane] = dresultdy_simd[0]; + resultDx[lane] = dresultds_simd[0]; + resultDy[lane] = dresultdt_simd[0]; } } @@ -165,8 +157,8 @@ default_texture(BatchedRendererServices* bsr, ustring filename, if (alphaRef.has_derivs()) { MaskedDx alphaDx(alphaRef); MaskedDy alphaDy(alphaRef); - alphaDx[lane] = dresultdx_simd[alphaChannelIndex]; - alphaDy[lane] = dresultdy_simd[alphaChannelIndex]; + alphaDx[lane] = dresultds_simd[alphaChannelIndex]; + alphaDy[lane] = dresultdt_simd[alphaChannelIndex]; } } //std::cout << "s: " << s.get(i) << " t: " << t.get(i) << " color: " << resultColor << " " << wideResult.get(i) << std::endl; @@ -311,16 +303,6 @@ default_texture3d(BatchedRendererServices* bsr, ustring filename, has_derivs ? (float*)&dresultdt_simd : nullptr, has_derivs ? (float*)&dresultdr_simd : nullptr); - OIIO::simd::vfloat4 dresultdx_simd; - OIIO::simd::vfloat4 dresultdy_simd; - if (has_derivs) { - // Correct our str texture space gradients into xyz-space gradients - dresultdx_simd = dresultds_simd * dPdx.x + dresultdt_simd * dPdx.y - + dresultdr_simd * dPdx.z; - dresultdy_simd = dresultds_simd * dPdy.x + dresultdt_simd * dPdy.y - + dresultdr_simd * dPdy.z; - } - // NOTE: regardless of the value of "retVal" we will always copy over the texture system's results. // We are relying on the texture system properly filling in missing or fill colors @@ -337,10 +319,10 @@ default_texture3d(BatchedRendererServices* bsr, ustring filename, if (resultRef.has_derivs()) { MaskedDx resultDx(resultRef); MaskedDy resultDy(resultRef); - resultDx[lane] = Color3(dresultdx_simd[0], dresultdx_simd[1], - dresultdx_simd[2]); - resultDy[lane] = Color3(dresultdy_simd[0], dresultdy_simd[1], - dresultdy_simd[2]); + resultDx[lane] = Color3(dresultds_simd[0], dresultds_simd[1], + dresultds_simd[2]); + resultDy[lane] = Color3(dresultdt_simd[0], dresultdt_simd[1], + dresultdt_simd[2]); } } else if (Masked::is(resultRef)) { alphaChannelIndex = 1; @@ -349,8 +331,8 @@ default_texture3d(BatchedRendererServices* bsr, ustring filename, if (resultRef.has_derivs()) { MaskedDx resultDx(resultRef); MaskedDy resultDy(resultRef); - resultDx[lane] = dresultdx_simd[0]; - resultDy[lane] = dresultdy_simd[0]; + resultDx[lane] = dresultds_simd[0]; + resultDy[lane] = dresultdt_simd[0]; } } @@ -361,8 +343,8 @@ default_texture3d(BatchedRendererServices* bsr, ustring filename, if (alphaRef.has_derivs()) { MaskedDx alphaDx(alphaRef); MaskedDy alphaDy(alphaRef); - alphaDx[lane] = dresultdx_simd[alphaChannelIndex]; - alphaDy[lane] = dresultdy_simd[alphaChannelIndex]; + alphaDx[lane] = dresultds_simd[alphaChannelIndex]; + alphaDy[lane] = dresultdt_simd[alphaChannelIndex]; } } @@ -561,6 +543,164 @@ dispatch_environment(BatchedRendererServices* bsr, ustring filename, } // namespace +static OSL_NOINLINE void +transformWideTextureGradients(BatchedTextureOutputs& outputs, + Wide dsdx, Wide dtdx, + Wide dsdy, Wide dtdy) +{ + MaskedData resultRef = outputs.result(); + if (resultRef.valid() && resultRef.has_derivs()) { + if (Masked::is(resultRef)) { + OSL_FORCEINLINE_BLOCK + { + MaskedDx drds(resultRef); + MaskedDy drdt(resultRef); + + OSL_OMP_PRAGMA(omp simd simdlen(__OSL_WIDTH)) + for (int i = 0; i < __OSL_WIDTH; ++i) { + float drdsVal = drds[i]; + float drdtVal = drdt[i]; + float drdx = drdsVal * dsdx[i] + drdtVal * dtdx[i]; + float drdy = drdsVal * dsdy[i] + drdtVal * dtdy[i]; + drds[i] = drdx; + drdt[i] = drdy; + } + } + } else { + // keep assert out of inlined code + OSL_DASSERT(Masked::is(resultRef)); + OSL_FORCEINLINE_BLOCK + { + //printf("doint color\n"); + MaskedDx widedrds(resultRef); + MaskedDy widedrdt(resultRef); + OSL_OMP_PRAGMA(omp simd simdlen(__OSL_WIDTH)) + for (int i = 0; i < __OSL_WIDTH; ++i) { + Color3 drdsColor = widedrds[i]; + Color3 drdtColor = widedrdt[i]; + + widedrds[i] = drdsColor * dsdx[i] + drdtColor * dtdx[i]; + widedrdt[i] = drdsColor * dsdy[i] + drdtColor * dtdy[i]; + } + } + } + } + + MaskedData alphaRef = outputs.alpha(); + OSL_FORCEINLINE_BLOCK + if (alphaRef.valid() && alphaRef.has_derivs()) { + MaskedDx dads(alphaRef); + MaskedDy dadt(alphaRef); + OSL_OMP_PRAGMA(omp simd simdlen(__OSL_WIDTH)) + for (int i = 0; i < __OSL_WIDTH; ++i) { + float dadsVal = dads[i]; + float dadtVal = dadt[i]; + float dadx = dadsVal * dsdx[i] + dadtVal * dtdx[i]; + float dady = dadsVal * dsdy[i] + dadtVal * dtdy[i]; + dads[i] = dadx; + dadt[i] = dady; + } + } +} + +static OSL_NOINLINE void +transformWideTextureGradientsTexture3d(BatchedTextureOutputs& outputs, + Wide Pdx, + Wide Pdy, + Wide Pdz) +{ + MaskedData resultRef = outputs.result(); + if (resultRef.valid() && resultRef.has_derivs()) { + if (Masked::is(resultRef)) { + OSL_FORCEINLINE_BLOCK + { + MaskedDx drds(resultRef); + MaskedDy drdt(resultRef); + //MaskedDz drdr(resultRef); // our duals don't actually have space for this + + OSL_OMP_PRAGMA(omp simd simdlen(__OSL_WIDTH)) + for (int i = 0; i < __OSL_WIDTH; ++i) { + float dres_xVal = drds[i]; + float dres_yVal = drdt[i]; + //float dres_zVal = drdr[i]; + + Vec3 v3pdx = Pdx[i]; + Vec3 v3pdy = Pdy[i]; + //Vec3 v3pdz = Pdz[i]; + + float dres_x = dres_xVal * v3pdx.x + + dres_yVal + * v3pdx.y; // + dres_zVal * v3pdx.z; + float dres_y = dres_xVal * v3pdy.x + + dres_yVal + * v3pdy.y; // + dres_zVal * v3pdy.z; + //float dres_z = dres_xVal * v3pdz.x + dres_yVal * v3pdz.y + dres_zVal * v3pdz.z; + + drds[i] = dres_x; + drdt[i] = dres_y; + //drdr[i] = dres_z; + } + } + } else { + // keep assert out of inlined code + OSL_DASSERT(Masked::is(resultRef)); + OSL_FORCEINLINE_BLOCK + { + MaskedDx widedrp1(resultRef); + MaskedDy widedrp2(resultRef); + //MaskedDz widedrp3(resultRef); + + OSL_OMP_PRAGMA(omp simd simdlen(__OSL_WIDTH)) + for (int i = 0; i < __OSL_WIDTH; ++i) { + Color3 drdp1Color = widedrp1[i]; + Color3 drdp2Color = widedrp2[i]; + //Color3 drdp3Color = widedrp3[i]; + + Vec3 v3pdx = Pdx[i]; + Vec3 v3pdy = Pdy[i]; + //Vec3 v3pdz = Pdz[i]; + + widedrp1[i] = drdp1Color * v3pdx.x + + drdp2Color + * v3pdx.y; // + drdp3Color * v3pdx.z; + widedrp2[i] = drdp1Color * v3pdy.x + + drdp2Color + * v3pdy.y; // + drdp3Color * v3pdy.z; + //widedrp3[i] = drdp1Color * v3pdz.x + drdp2Color * v3pdz.y + drdp3Color * v3pdz.z; + } + } + } + } + + MaskedData alphaRef = outputs.alpha(); + OSL_FORCEINLINE_BLOCK + if (alphaRef.valid() && alphaRef.has_derivs()) { + MaskedDx dap1(alphaRef); + MaskedDy dap2(alphaRef); + // MaskedDz dap3(alphaRef); + + OSL_OMP_PRAGMA(omp simd simdlen(__OSL_WIDTH)) + for (int i = 0; i < __OSL_WIDTH; ++i) { + float dadp1Val = dap1[i]; + float dadp2Val = dap2[i]; + //float dadp3Val = dap3[i]; + + Vec3 v3pdx = Pdx[i]; + Vec3 v3pdy = Pdy[i]; + //Vec3 v3pdz = Pdz[i]; + + float dadpx = dadp1Val * v3pdx.x + + dadp2Val * v3pdx.y; // + dadp3Val * v3pdx.z; + float dadpy = dadp1Val * v3pdy.x + + dadp2Val * v3pdy.y; // + dadp3Val * v3pdy.z; + //float dadpz = dadp1Val * v3pdz.x + dadp2Val * v3pdz.y + dadp3Val * v3pdz.z; + + dap1[i] = dadpx; + dap2[i] = dadpy; + //dap3[i] = dadpz; + } + } +} OSL_BATCHOP int __OSL_MASKED_OP(texture)(void* bsg_, ustring_pod name_, void* handle, @@ -590,6 +730,14 @@ __OSL_MASKED_OP(texture)(void* bsg_, ustring_pod name_, void* handle, Wide(dsdy), Wide(dtdy), outputs); + // Correct our st texture space gradients into xy-space gradients + if (resultHasDerivs || alphaHasDerivs) { + transformWideTextureGradients(outputs, Wide(dsdx), + Wide(dtdx), + Wide(dsdy), + Wide(dtdy)); + } + OSL_FORCEINLINE_BLOCK if (outputs.errormessage().valid()) { Masked err(outputs.errormessage()); @@ -637,6 +785,13 @@ __OSL_MASKED_OP(texture3d)(void* bsg_, ustring_pod name_, void* handle, Wide(wPdy), Wide(wPdz), outputs); + // Correct our P (Vec3) space gradients into xyz-space gradients + if (resultHasDerivs || alphaHasDerivs) { + transformWideTextureGradientsTexture3d(outputs, Wide(wPdx), + Wide(wPdy), + Wide(wPdz)); + } + OSL_FORCEINLINE_BLOCK if (outputs.errormessage().valid()) { Masked err(outputs.errormessage());