diff --git a/doc/snippets/MagnumMath.cpp b/doc/snippets/MagnumMath.cpp index d2bb4bbf0c..167b490f32 100644 --- a/doc/snippets/MagnumMath.cpp +++ b/doc/snippets/MagnumMath.cpp @@ -39,6 +39,7 @@ #include "Magnum/Math/Range.h" #include "Magnum/Math/Algorithms/GramSchmidt.h" #include "Magnum/Math/StrictWeakOrdering.h" +#include "Magnum/Math/Swizzle.h" using namespace Magnum; using namespace Magnum::Math::Literals; diff --git a/doc/snippets/MagnumTrade.cpp b/doc/snippets/MagnumTrade.cpp index 77da743944..a0460181c3 100644 --- a/doc/snippets/MagnumTrade.cpp +++ b/doc/snippets/MagnumTrade.cpp @@ -32,8 +32,9 @@ #include "Magnum/ImageView.h" #include "Magnum/Mesh.h" #include "Magnum/PixelFormat.h" -#include "Magnum/MeshTools/Interleave.h" #include "Magnum/Animation/Player.h" +#include "Magnum/Math/Swizzle.h" +#include "Magnum/MeshTools/Interleave.h" #include "Magnum/MeshTools/Transform.h" #include "Magnum/Trade/AbstractImporter.h" #include "Magnum/Trade/AnimationData.h" diff --git a/src/Magnum/Math/Test/CMakeLists.txt b/src/Magnum/Math/Test/CMakeLists.txt index de48dbe704..930bf15e65 100644 --- a/src/Magnum/Math/Test/CMakeLists.txt +++ b/src/Magnum/Math/Test/CMakeLists.txt @@ -68,6 +68,7 @@ corrade_add_test(MathInterpolationBenchmark InterpolationBenchmark.cpp LIBRARIES corrade_add_test(MathConfigurationValueTest ConfigurationValueTest.cpp LIBRARIES MagnumMathTestLib) corrade_add_test(MathStrictWeakOrderingTest StrictWeakOrderingTest.cpp LIBRARIES MagnumMathTestLib) +corrade_add_test(MathVectorBenchmark VectorBenchmark.cpp LIBRARIES MagnumMathTestLib) corrade_add_test(MathMatrixBenchmark MatrixBenchmark.cpp LIBRARIES MagnumMathTestLib) set_property(TARGET diff --git a/src/Magnum/Math/Test/ColorTest.cpp b/src/Magnum/Math/Test/ColorTest.cpp index 5cef591e62..4c4b6c60b6 100644 --- a/src/Magnum/Math/Test/ColorTest.cpp +++ b/src/Magnum/Math/Test/ColorTest.cpp @@ -35,6 +35,7 @@ #include "Magnum/Math/Color.h" #include "Magnum/Math/Half.h" #include "Magnum/Math/StrictWeakOrdering.h" +#include "Magnum/Math/Swizzle.h" struct Vec3 { float x, y, z; diff --git a/src/Magnum/Math/Test/Vector2Test.cpp b/src/Magnum/Math/Test/Vector2Test.cpp index 9919567684..4d6685f60a 100644 --- a/src/Magnum/Math/Test/Vector2Test.cpp +++ b/src/Magnum/Math/Test/Vector2Test.cpp @@ -29,6 +29,7 @@ #include "Magnum/Math/Vector3.h" /* Vector3 used in Vector2Test::cross() */ #include "Magnum/Math/StrictWeakOrdering.h" +#include "Magnum/Math/Swizzle.h" struct Vec2 { float x, y; @@ -65,6 +66,7 @@ struct Vector2Test: Corrade::TestSuite::Tester { void access(); void cross(); + void crossCatastrophicCancellation(); void axes(); void scales(); void perpendicular(); @@ -78,6 +80,7 @@ struct Vector2Test: Corrade::TestSuite::Tester { typedef Math::Vector3 Vector3i; typedef Math::Vector2 Vector2; +typedef Math::Vector2 Vector2d; typedef Math::Vector2 Vector2i; Vector2Test::Vector2Test() { @@ -91,6 +94,7 @@ Vector2Test::Vector2Test() { &Vector2Test::access, &Vector2Test::cross, + &Vector2Test::crossCatastrophicCancellation, &Vector2Test::axes, &Vector2Test::scales, &Vector2Test::perpendicular, @@ -207,6 +211,18 @@ void Vector2Test::cross() { CORRADE_COMPARE(Math::cross({a, 0}, {b, 0}), Vector3i(0, 0, Math::cross(a, b))); } +void Vector2Test::crossCatastrophicCancellation() { + /* Repro case and algorithm from + https://pharr.org/matt/blog/2019/11/03/difference-of-floats.html */ + + Vector2 a{33962.035f, 41563.4f}; + Vector2 b{-24871.969f, -30438.8f}; + + Float expected = -75.1656f; + CORRADE_COMPARE(Float(Math::cross(Vector2d{a}, Vector2d{b})), expected); + CORRADE_COMPARE(Math::cross(a, b), expected); +} + void Vector2Test::axes() { constexpr Vector2 x = Vector2::xAxis(5.0f); constexpr Vector2 y = Vector2::yAxis(6.0f); diff --git a/src/Magnum/Math/Test/Vector3Test.cpp b/src/Magnum/Math/Test/Vector3Test.cpp index bfeb9030ee..46b8f0ebe8 100644 --- a/src/Magnum/Math/Test/Vector3Test.cpp +++ b/src/Magnum/Math/Test/Vector3Test.cpp @@ -29,6 +29,7 @@ #include "Magnum/Math/Vector3.h" #include "Magnum/Math/StrictWeakOrdering.h" +#include "Magnum/Math/Swizzle.h" struct Vec3 { float x, y, z; @@ -66,6 +67,7 @@ struct Vector3Test: Corrade::TestSuite::Tester { void access(); void cross(); + void crossCatastrophicCancellation(); void axes(); void scales(); void twoComponent(); @@ -77,6 +79,7 @@ struct Vector3Test: Corrade::TestSuite::Tester { }; typedef Math::Vector3 Vector3; +typedef Math::Vector3 Vector3d; typedef Math::Vector3 Vector3i; typedef Math::Vector2 Vector2; @@ -92,6 +95,7 @@ Vector3Test::Vector3Test() { &Vector3Test::access, &Vector3Test::cross, + &Vector3Test::crossCatastrophicCancellation, &Vector3Test::axes, &Vector3Test::scales, &Vector3Test::twoComponent, @@ -227,6 +231,17 @@ void Vector3Test::cross() { CORRADE_COMPARE(Math::cross(a, b), Vector3i(-10, -3, 7)); } +void Vector3Test::crossCatastrophicCancellation() { + /* Repro case and algorithm from + https://pharr.org/matt/blog/2019/11/03/difference-of-floats.html */ + Vector3 a{33962.035f, 41563.4f, 7706.415f}; + Vector3 b{-24871.969f, -30438.8f, -5643.727f}; + + Vector3 expected{1556.0276f, -1257.5151f, -75.1656f}; + CORRADE_COMPARE(Vector3{Math::cross(Vector3d{a}, Vector3d{b})}, expected); + CORRADE_COMPARE(Math::cross(a, b), expected); +} + void Vector3Test::axes() { constexpr Vector3 x = Vector3::xAxis(5.0f); constexpr Vector3 y = Vector3::yAxis(6.0f); diff --git a/src/Magnum/Math/Test/Vector4Test.cpp b/src/Magnum/Math/Test/Vector4Test.cpp index db89add8a4..b8f6ce7397 100644 --- a/src/Magnum/Math/Test/Vector4Test.cpp +++ b/src/Magnum/Math/Test/Vector4Test.cpp @@ -29,6 +29,7 @@ #include "Magnum/Math/Vector4.h" #include "Magnum/Math/StrictWeakOrdering.h" +#include "Magnum/Math/Swizzle.h" struct Vec4 { float x, y, z, w; diff --git a/src/Magnum/Math/Test/VectorBenchmark.cpp b/src/Magnum/Math/Test/VectorBenchmark.cpp new file mode 100644 index 0000000000..e7c0625fc8 --- /dev/null +++ b/src/Magnum/Math/Test/VectorBenchmark.cpp @@ -0,0 +1,233 @@ +/* + This file is part of Magnum. + + Copyright © 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019 + Vladimír Vondruš + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the "Software"), + to deal in the Software without restriction, including without limitation + the rights to use, copy, modify, merge, publish, distribute, sublicense, + and/or sell copies of the Software, and to permit persons to whom the + Software is furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. +*/ + +#include + +#include "Magnum/Math/Vector3.h" + +#ifdef CORRADE_TARGET_SSE2 +#include +#endif + +namespace Magnum { namespace Math { namespace Test { namespace { + +struct VectorBenchmark: Corrade::TestSuite::Tester { + explicit VectorBenchmark(); + + void dot(); + + template void cross2Baseline(); + void cross2(); + + template void cross3Baseline(); + void cross3(); + #ifdef CORRADE_TARGET_SSE2 + void cross3SseNaive(); + void cross3SseOneShuffleLess(); + #endif +}; + +VectorBenchmark::VectorBenchmark() { + addBenchmarks({ + &VectorBenchmark::dot, + + &VectorBenchmark::cross2Baseline, + &VectorBenchmark::cross2Baseline, + &VectorBenchmark::cross2, + + &VectorBenchmark::cross3Baseline, + &VectorBenchmark::cross3Baseline, + &VectorBenchmark::cross3, + #ifdef CORRADE_TARGET_SSE2 + &VectorBenchmark::cross3SseNaive, + &VectorBenchmark::cross3SseOneShuffleLess, + #endif + }, 500); +} + +typedef Math::Constants Constants; +typedef Math::Vector2 Vector2; +typedef Math::Vector3 Vector3; + +enum: std::size_t { Repeats = 100000 }; + +using namespace Literals; + +void VectorBenchmark::dot() { + Vector3 a{1.3f, -1.1f, 1.0f}; + Vector3 b{4.5f, 3.2f, 7.3f}; + CORRADE_COMPARE(Math::dot(a, b), 9.63f); + + CORRADE_BENCHMARK(Repeats) { + a.x() = Math::dot(a, b); + } + + CORRADE_COMPARE(a, (Vector3{Constants::inf(), -1.1f, 1.0f})); +} + +template inline T cross2Baseline(const Vector2& v1, const Vector2& v2) { + T v1x = T(v1.x()), + v1y = T(v1.y()); + T v2x = T(v2.x()), + v2y = T(v2.y()); + return T(v1x*v2y - v1y*v2x); +} + +template void VectorBenchmark::cross2Baseline() { + setTestCaseTemplateName(TypeTraits::name()); + + Vector2 a{1.3f, -1.1f}; + Vector2 b{4.5f, 3.2f}; + CORRADE_COMPARE(Test::cross2Baseline(a, b), 9.11f); + + CORRADE_BENCHMARK(Repeats) { + a.x() = Test::cross2Baseline(a, b); + } + + CORRADE_COMPARE(a, (Vector2{Constants::inf(), -1.1f})); +} + +void VectorBenchmark::cross2() { + Vector2 a{1.3f, -1.1f}; + Vector2 b{4.5f, 3.2f}; + CORRADE_COMPARE(Math::cross(a, b), 9.11f); + + CORRADE_BENCHMARK(Repeats) { + a.x() = Math::cross(a, b); + } + + CORRADE_COMPARE(a, (Vector2{Constants::inf(), -1.1f})); +} + +template inline Vector3 cross3Baseline(const Vector3& v1, const Vector3& v2) { + T v1x = T(v1.x()), + v1y = T(v1.y()), + v1z = T(v1.z()); + T v2x = T(v2.x()), + v2y = T(v2.y()), + v2z = T(v2.z()); + return Vector3(v1y*v2z - v1z*v2y, + v1z*v2x - v1x*v2z, + v1x*v2y - v1y*v2x); +} + +template void VectorBenchmark::cross3Baseline() { + setTestCaseTemplateName(TypeTraits::name()); + + Vector3 a{1.3f, -1.1f, 1.0f}; + Vector3 b{4.5f, 3.2f, 7.3f}; + CORRADE_COMPARE(Test::cross3Baseline(a, b), + (Vector3{-11.23f, -4.99f, 9.11f})); + + CORRADE_BENCHMARK(Repeats) { + a = Test::cross3Baseline(a, b); + } + + CORRADE_VERIFY(a != a); +} + +void VectorBenchmark::cross3() { + Vector3 a{1.3f, -1.1f, 1.0f}; + Vector3 b{4.5f, 3.2f, 7.3f}; + CORRADE_COMPARE(Math::cross(a, b), + (Vector3{-11.23f, -4.99f, 9.11f})); + + CORRADE_BENCHMARK(Repeats) { + a = Math::cross(a, b); + } + + CORRADE_VERIFY(a != a); +} + +#ifdef CORRADE_TARGET_SSE2 +inline Vector3 crossSseNaive(const Vector3& a, const Vector3& b) { + union { + __m128 v; + Float s[4]; + }; + + const __m128 aa = _mm_set_ps(0.0f, a[2], a[1], a[0]); + const __m128 bb = _mm_set_ps(0.0f, b[2], b[1], b[0]); + + v = _mm_sub_ps( + _mm_mul_ps(_mm_shuffle_ps(aa, aa, _MM_SHUFFLE(3, 0, 2, 1)), + _mm_shuffle_ps(bb, bb, _MM_SHUFFLE(3, 1, 0, 2))), + _mm_mul_ps(_mm_shuffle_ps(aa, aa, _MM_SHUFFLE(3, 1, 0, 2)), + _mm_shuffle_ps(bb, bb, _MM_SHUFFLE(3, 0, 2, 1)))); + return {s[0], s[1], s[2]}; +} + +/* https://twitter.com/sjb3d/status/563640846671953920. Originally the + Math::cross() was doing this, implemented as + gather<'y', 'z', 'x'>(a*gather<'y', 'z', 'x'>(b) - + b*gather<'y', 'z', 'x'>(a)) + but while slightly faster in Release (on GCC at least) than the + straightforward version, it was insanely slow in Debug. */ +inline Vector3 crossSseOneShuffleLess(const Vector3& a, const Vector3& b) { + union { + __m128 v; + Float s[4]; + }; + + const __m128 aa = _mm_set_ps(0.0f, a[2], a[1], a[0]); + const __m128 bb = _mm_set_ps(0.0f, b[2], b[1], b[0]); + const __m128 cc = _mm_sub_ps( + _mm_mul_ps(aa, _mm_shuffle_ps(bb, bb, _MM_SHUFFLE(3, 0, 2, 1))), + _mm_mul_ps(bb, _mm_shuffle_ps(aa, aa, _MM_SHUFFLE(3, 0, 2, 1)))); + + v = _mm_shuffle_ps(cc, cc, _MM_SHUFFLE(3, 0, 2, 1)); + return {s[0], s[1], s[2]}; +} + +void VectorBenchmark::cross3SseNaive() { + Vector3 a{1.3f, -1.1f, 1.0f}; + Vector3 b{4.5f, 3.2f, 7.3f}; + CORRADE_COMPARE(Test::crossSseNaive(a, b), + (Vector3{-11.23f, -4.99f, 9.11f})); + + CORRADE_BENCHMARK(Repeats) { + a = Test::crossSseNaive(a, b); + } + + CORRADE_VERIFY(a != a); +} + +void VectorBenchmark::cross3SseOneShuffleLess() { + Vector3 a{1.3f, -1.1f, 1.0f}; + Vector3 b{4.5f, 3.2f, 7.3f}; + CORRADE_COMPARE(Test::crossSseOneShuffleLess(a, b), + (Vector3{-11.23f, -4.99f, 9.11f})); + + CORRADE_BENCHMARK(Repeats) { + a = Test::crossSseOneShuffleLess(a, b); + } + + CORRADE_VERIFY(a != a); +} +#endif + +}}}} + +CORRADE_TEST_MAIN(Magnum::Math::Test::VectorBenchmark) diff --git a/src/Magnum/Math/Vector.h b/src/Magnum/Math/Vector.h index 83060ca9b8..bb13dc6ba6 100644 --- a/src/Magnum/Math/Vector.h +++ b/src/Magnum/Math/Vector.h @@ -102,7 +102,10 @@ the same general direction, `1` when two *normalized* vectors are parallel, @see @ref Vector::dot() const, @ref Vector::operator-(), @ref Vector2::perpendicular() */ template inline T dot(const Vector& a, const Vector& b) { - return (a*b).sum(); + T out{}; + for(std::size_t i = 0; i != size; ++i) + out += a._data[i]*b._data[i]; + return out; } /** @relatesalso Vector @@ -686,6 +689,8 @@ template class Vector { template friend BoolVector equal(const Vector&, const Vector&); template friend BoolVector notEqual(const Vector&, const Vector&); + template friend U dot(const Vector&, const Vector&); + /* Implementation for Vector::Vector(const Vector&) */ template constexpr explicit Vector(Implementation::Sequence, const Vector& vector) noexcept: _data{T(vector._data[sequence])...} {} diff --git a/src/Magnum/Math/Vector2.h b/src/Magnum/Math/Vector2.h index d5d7b47ef6..53a0a9bd47 100644 --- a/src/Magnum/Math/Vector2.h +++ b/src/Magnum/Math/Vector2.h @@ -33,6 +33,16 @@ namespace Magnum { namespace Math { +namespace Implementation { + /* https://pharr.org/matt/blog/2019/11/03/difference-of-floats.html */ + template inline T differenceOfProducts(T a, T b, T c, T d) { + T cd = c*d; + T err = std::fma(-c, d, cd); + T dop = std::fma(a, b, -cd); + return dop + err; + } +} + /** @brief 2D cross product @@ -44,11 +54,15 @@ are parallel or antiparallel and `1` when two *normalized* vectors are perpendicular. @f[ \boldsymbol a \times \boldsymbol b = \boldsymbol a_\bot \cdot \boldsymbol b = a_xb_y - a_yb_x @f] + +The precision of the calculation is further improved using Kahan's algorithm +as described in [an article by Matt Pharr](https://pharr.org/matt/blog/2019/11/03/difference-of-floats.html). @see @ref Vector2::perpendicular(), @ref dot(const Vector&, const Vector&) */ template inline T cross(const Vector2& a, const Vector2& b) { - return dot(a.perpendicular(), b); + return Implementation::differenceOfProducts(a._data[0], b._data[1], + a._data[1], b._data[0]); } /** @@ -182,6 +196,9 @@ template class Vector2: public Vector<2, T> { aspectRatio() const { return x()/y(); } MAGNUM_VECTOR_SUBCLASS_IMPLEMENTATION(2, Vector2) + + private: + template friend U cross(const Vector2&, const Vector2&); }; #ifndef DOXYGEN_GENERATING_OUTPUT diff --git a/src/Magnum/Math/Vector3.h b/src/Magnum/Math/Vector3.h index ca5cfe119b..153e80819e 100644 --- a/src/Magnum/Math/Vector3.h +++ b/src/Magnum/Math/Vector3.h @@ -30,29 +30,31 @@ */ #include "Magnum/Math/Vector2.h" -#include "Magnum/Math/Swizzle.h" namespace Magnum { namespace Math { /** @brief Cross product -Result has length of `0` either when one of them is zero or they are parallel -or antiparallel and length of `1` when two *normalized* vectors are -perpendicular. Done using the following equation: @f[ - \boldsymbol a \times \boldsymbol b = \begin{pmatrix} c_y \\ c_z \\ c_x \end{pmatrix} ~~~~~ - \boldsymbol c = \boldsymbol a \begin{pmatrix} b_y \\ b_z \\ b_x \end{pmatrix} - - \boldsymbol b \begin{pmatrix} a_y \\ a_z \\ a_x \end{pmatrix} -@f] -Which is equivalent to the common one (source: -https://twitter.com/sjb3d/status/563640846671953920): @f[ - \boldsymbol a \times \boldsymbol b = \begin{pmatrix}a_yb_z - a_zb_y \\ a_zb_x - a_xb_z \\ a_xb_y - a_yb_x \end{pmatrix} +Result has length of @cpp 0 @ce either when one of them is zero or they are +parallel or antiparallel and length of @cpp 1 @ce when two *normalized* vectors +are perpendicular. @f[ + \boldsymbol a \times \boldsymbol b = \begin{pmatrix}a_yb_z - a_zb_y \\ a_zb_x - a_xb_z \\ a_xb_y - a_yb_x \end{pmatrix} @f] + +The precision of the calculation is further improved using Kahan's algorithm +as described in [an article by Matt Pharr](https://pharr.org/matt/blog/2019/11/03/difference-of-floats.html). @see @ref cross(const Vector2&, const Vector2&), @ref planeEquation() */ template inline Vector3 cross(const Vector3& a, const Vector3& b) { - return gather<'y', 'z', 'x'>(a*gather<'y', 'z', 'x'>(b) - - b*gather<'y', 'z', 'x'>(a)); + return { + Implementation::differenceOfProducts(a._data[1], b._data[2], + a._data[2], b._data[1]), + Implementation::differenceOfProducts(a._data[2], b._data[0], + a._data[0], b._data[2]), + Implementation::differenceOfProducts(a._data[0], b._data[1], + a._data[1], b._data[0]) + }; } /** @@ -232,6 +234,9 @@ template class Vector3: public Vector<3, T> { } /**< @overload */ MAGNUM_VECTOR_SUBCLASS_IMPLEMENTATION(3, Vector3) + + private: + template friend Vector3 cross(const Vector3&, const Vector3&); }; #ifndef DOXYGEN_GENERATING_OUTPUT