diff --git a/upstream_utils/eigen.py b/upstream_utils/eigen.py index cc4b102144d..875c6dfbbae 100755 --- a/upstream_utils/eigen.py +++ b/upstream_utils/eigen.py @@ -126,7 +126,8 @@ def copy_upstream_src(wpilib_root): def main(): name = "eigen" url = "https://gitlab.com/libeigen/eigen.git" - tag = "d14b0a4e531760b6aeccf20b666eaec8bd0b8461" + # master on 2024-11-14 + tag = "0fb2ed140d4fc0108553ecfb25f2d7fc1a9319a1" eigen = Lib(name, url, tag, copy_upstream_src) eigen.main() diff --git a/upstream_utils/eigen_patches/0001-Disable-warnings.patch b/upstream_utils/eigen_patches/0001-Disable-warnings.patch index 8d3c7be7989..0ff38396e5c 100644 --- a/upstream_utils/eigen_patches/0001-Disable-warnings.patch +++ b/upstream_utils/eigen_patches/0001-Disable-warnings.patch @@ -8,7 +8,7 @@ Subject: [PATCH 1/2] Disable warnings 1 file changed, 6 insertions(+) diff --git a/Eigen/src/Core/util/DisableStupidWarnings.h b/Eigen/src/Core/util/DisableStupidWarnings.h -index 32a427d852355a51dc4263d81498554ff4c3cbba..1182198231ab784346f8d80838363e4a0abba2ba 100644 +index ab0c542d0e24c6ecb77abfc535c8232774cba6d5..7ecd7bf8cc927d07a28c9da4ebbe1ea4d4d2b97b 100644 --- a/Eigen/src/Core/util/DisableStupidWarnings.h +++ b/Eigen/src/Core/util/DisableStupidWarnings.h @@ -81,6 +81,12 @@ @@ -23,4 +23,4 @@ index 32a427d852355a51dc4263d81498554ff4c3cbba..1182198231ab784346f8d80838363e4a +#endif #endif - #if defined __NVCC__ + #if defined __NVCC__ && defined __CUDACC__ diff --git a/wpimath/src/main/native/include/frc/StateSpaceUtil.h b/wpimath/src/main/native/include/frc/StateSpaceUtil.h index 7a3b67b89ae..785255d6748 100644 --- a/wpimath/src/main/native/include/frc/StateSpaceUtil.h +++ b/wpimath/src/main/native/include/frc/StateSpaceUtil.h @@ -41,7 +41,7 @@ constexpr Matrixd MakeCostMatrix( for (int row = 0; row < result.rows(); ++row) { for (int col = 0; col < result.cols(); ++col) { if (row != col) { - result.coeffRef(row, col) = 0.0; + result(row, col) = 0.0; } } } @@ -49,9 +49,9 @@ constexpr Matrixd MakeCostMatrix( wpi::for_each( [&](int i, double tolerance) { if (tolerance == std::numeric_limits::infinity()) { - result.coeffRef(i, i) = 0.0; + result(i, i) = 0.0; } else { - result.coeffRef(i, i) = 1.0 / (tolerance * tolerance); + result(i, i) = 1.0 / (tolerance * tolerance); } }, tolerances...); @@ -78,14 +78,13 @@ constexpr Matrixd MakeCovMatrix(Ts... stdDevs) { for (int row = 0; row < result.rows(); ++row) { for (int col = 0; col < result.cols(); ++col) { if (row != col) { - result.coeffRef(row, col) = 0.0; + result(row, col) = 0.0; } } } - wpi::for_each( - [&](int i, double stdDev) { result.coeffRef(i, i) = stdDev * stdDev; }, - stdDevs...); + wpi::for_each([&](int i, double stdDev) { result(i, i) = stdDev * stdDev; }, + stdDevs...); return result; } @@ -111,12 +110,12 @@ constexpr Matrixd MakeCostMatrix(const std::array& costs) { for (int col = 0; col < result.cols(); ++col) { if (row == col) { if (costs[row] == std::numeric_limits::infinity()) { - result.coeffRef(row, col) = 0.0; + result(row, col) = 0.0; } else { - result.coeffRef(row, col) = 1.0 / (costs[row] * costs[row]); + result(row, col) = 1.0 / (costs[row] * costs[row]); } } else { - result.coeffRef(row, col) = 0.0; + result(row, col) = 0.0; } } } @@ -143,9 +142,9 @@ constexpr Matrixd MakeCovMatrix(const std::array& stdDevs) { for (int row = 0; row < result.rows(); ++row) { for (int col = 0; col < result.cols(); ++col) { if (row == col) { - result.coeffRef(row, col) = stdDevs[row] * stdDevs[row]; + result(row, col) = stdDevs[row] * stdDevs[row]; } else { - result.coeffRef(row, col) = 0.0; + result(row, col) = 0.0; } } } @@ -334,7 +333,7 @@ constexpr Vectord ClampInputMaxMagnitude(const Vectord& u, const Vectord& umax) { Vectord result; for (int i = 0; i < Inputs; ++i) { - result.coeffRef(i) = std::clamp(u.coeff(i), umin.coeff(i), umax.coeff(i)); + result(i) = std::clamp(u(i), umin(i), umax(i)); } return result; } diff --git a/wpimath/src/main/native/include/frc/ct_matrix.h b/wpimath/src/main/native/include/frc/ct_matrix.h index ecc9f28a608..7b6f9056e7c 100644 --- a/wpimath/src/main/native/include/frc/ct_matrix.h +++ b/wpimath/src/main/native/include/frc/ct_matrix.h @@ -62,7 +62,7 @@ class ct_matrix { * @param col Column index. */ constexpr const Scalar& operator()(int row, int col) const { - return m_storage.coeff(row, col); + return m_storage(row, col); } /** @@ -71,9 +71,7 @@ class ct_matrix { * @param row Row index. * @param col Column index. */ - constexpr Scalar& operator()(int row, int col) { - return m_storage.coeffRef(row, col); - } + constexpr Scalar& operator()(int row, int col) { return m_storage(row, col); } /** * Returns reference to matrix element. @@ -83,7 +81,7 @@ class ct_matrix { constexpr const Scalar& operator()(int index) const requires(Rows == 1 || Cols == 1) { - return m_storage.coeff(index); + return m_storage(index); } /** @@ -94,7 +92,7 @@ class ct_matrix { constexpr Scalar& operator()(int index) requires(Rows == 1 || Cols == 1) { - return m_storage.coeffRef(index); + return m_storage(index); } /** diff --git a/wpimath/src/main/native/include/frc/geometry/CoordinateSystem.h b/wpimath/src/main/native/include/frc/geometry/CoordinateSystem.h index 90a6a363d0d..0d4ba8f6c8c 100644 --- a/wpimath/src/main/native/include/frc/geometry/CoordinateSystem.h +++ b/wpimath/src/main/native/include/frc/geometry/CoordinateSystem.h @@ -35,12 +35,10 @@ class WPILIB_DLLEXPORT CoordinateSystem { // the NWU coordinate system. Each column vector in the change of basis // matrix is one of the old basis vectors mapped to its representation in // the new basis. - Eigen::Matrix3d R{{positiveX.m_axis.coeff(0), positiveY.m_axis.coeff(0), - positiveZ.m_axis.coeff(0)}, - {positiveX.m_axis.coeff(1), positiveY.m_axis.coeff(1), - positiveZ.m_axis.coeff(1)}, - {positiveX.m_axis.coeff(2), positiveY.m_axis.coeff(2), - positiveZ.m_axis.coeff(2)}}; + Eigen::Matrix3d R{ + {positiveX.m_axis(0), positiveY.m_axis(0), positiveZ.m_axis(0)}, + {positiveX.m_axis(1), positiveY.m_axis(1), positiveZ.m_axis(1)}, + {positiveX.m_axis(2), positiveY.m_axis(2), positiveZ.m_axis(2)}}; // The change of basis matrix should be a pure rotation. The Rotation3d // constructor will verify this by checking for special orthogonality. diff --git a/wpimath/src/main/native/include/frc/geometry/Pose3d.h b/wpimath/src/main/native/include/frc/geometry/Pose3d.h index 779d3522f60..dc6a7416fd0 100644 --- a/wpimath/src/main/native/include/frc/geometry/Pose3d.h +++ b/wpimath/src/main/native/include/frc/geometry/Pose3d.h @@ -281,9 +281,9 @@ constexpr Eigen::Matrix3d RotationVectorToMatrix( // [ 0 -c b] // Omega = [ c 0 -a] // [-b a 0] - return Eigen::Matrix3d{{0.0, -rotation.coeff(2), rotation.coeff(1)}, - {rotation.coeff(2), 0.0, -rotation.coeff(0)}, - {-rotation.coeff(1), rotation.coeff(0), 0.0}}; + return Eigen::Matrix3d{{0.0, -rotation(2), rotation(1)}, + {rotation(2), 0.0, -rotation(0)}, + {-rotation(1), rotation(0), 0.0}}; } } // namespace detail diff --git a/wpimath/src/main/native/include/frc/geometry/Quaternion.h b/wpimath/src/main/native/include/frc/geometry/Quaternion.h index e4d52b8f022..65c6a8265e3 100644 --- a/wpimath/src/main/native/include/frc/geometry/Quaternion.h +++ b/wpimath/src/main/native/include/frc/geometry/Quaternion.h @@ -299,7 +299,7 @@ class WPILIB_DLLEXPORT Quaternion { // 𝑞 = std::cos(θ/2) + std::sin(θ/2) * v̂ // 𝑞 = std::cos(θ/2) + std::sin(θ/2) / θ * 𝑣⃗ - double theta = gcem::hypot(rvec.coeff(0), rvec.coeff(1), rvec.coeff(2)); + double theta = gcem::hypot(rvec(0), rvec(1), rvec(2)); double cos = gcem::cos(theta / 2); double axial_scalar; @@ -312,9 +312,8 @@ class WPILIB_DLLEXPORT Quaternion { axial_scalar = gcem::sin(theta / 2) / theta; } - return Quaternion{cos, axial_scalar * rvec.coeff(0), - axial_scalar * rvec.coeff(1), - axial_scalar * rvec.coeff(2)}; + return Quaternion{cos, axial_scalar * rvec(0), axial_scalar * rvec(1), + axial_scalar * rvec(2)}; } private: diff --git a/wpimath/src/main/native/include/frc/geometry/Rotation3d.h b/wpimath/src/main/native/include/frc/geometry/Rotation3d.h index 8393f675589..f27d1d94131 100644 --- a/wpimath/src/main/native/include/frc/geometry/Rotation3d.h +++ b/wpimath/src/main/native/include/frc/geometry/Rotation3d.h @@ -85,11 +85,10 @@ class WPILIB_DLLEXPORT Rotation3d { } // https://en.wikipedia.org/wiki/Conversion_between_quaternions_and_Euler_angles#Definition - Eigen::Vector3d v{{axis.coeff(0) / norm * units::math::sin(angle / 2.0), - axis.coeff(1) / norm * units::math::sin(angle / 2.0), - axis.coeff(2) / norm * units::math::sin(angle / 2.0)}}; - m_q = Quaternion{units::math::cos(angle / 2.0), v.coeff(0), v.coeff(1), - v.coeff(2)}; + Eigen::Vector3d v{{axis(0) / norm * units::math::sin(angle / 2.0), + axis(1) / norm * units::math::sin(angle / 2.0), + axis(2) / norm * units::math::sin(angle / 2.0)}}; + m_q = Quaternion{units::math::cos(angle / 2.0), v(0), v(1), v(2)}; } /** @@ -191,9 +190,9 @@ class WPILIB_DLLEXPORT Rotation3d { // rotation is required. Any other vector can be used to generate an // orthogonal one. - double x = gcem::abs(initial.coeff(0)); - double y = gcem::abs(initial.coeff(1)); - double z = gcem::abs(initial.coeff(2)); + double x = gcem::abs(initial(0)); + double y = gcem::abs(initial(1)); + double z = gcem::abs(initial(2)); // Find vector that is most orthogonal to initial vector Eigen::Vector3d other; diff --git a/wpimath/src/main/native/include/frc/geometry/Translation2d.h b/wpimath/src/main/native/include/frc/geometry/Translation2d.h index 0b8133b2319..a8a26b8bff5 100644 --- a/wpimath/src/main/native/include/frc/geometry/Translation2d.h +++ b/wpimath/src/main/native/include/frc/geometry/Translation2d.h @@ -60,8 +60,7 @@ class WPILIB_DLLEXPORT Translation2d { * @param vector The translation vector to represent. */ constexpr explicit Translation2d(const Eigen::Vector2d& vector) - : m_x{units::meter_t{vector.coeff(0)}}, - m_y{units::meter_t{vector.coeff(1)}} {} + : m_x{units::meter_t{vector(0)}}, m_y{units::meter_t{vector(1)}} {} /** * Calculates the distance between two translations in 2D space. diff --git a/wpimath/src/main/native/include/frc/system/LinearSystem.h b/wpimath/src/main/native/include/frc/system/LinearSystem.h index 0089d72a4fd..d2b021375b4 100644 --- a/wpimath/src/main/native/include/frc/system/LinearSystem.h +++ b/wpimath/src/main/native/include/frc/system/LinearSystem.h @@ -55,7 +55,7 @@ class LinearSystem { if (std::is_constant_evaluated()) { for (int row = 0; row < mat.rows(); ++row) { for (int col = 0; col < mat.cols(); ++col) { - if (!gcem::internal::is_finite(mat.coeff(row, col))) { + if (!gcem::internal::is_finite(mat(row, col))) { return false; } } diff --git a/wpimath/src/main/native/thirdparty/eigen/include/.clang-format b/wpimath/src/main/native/thirdparty/eigen/include/.clang-format index b48874efafb..1f33cbed2e4 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/.clang-format +++ b/wpimath/src/main/native/thirdparty/eigen/include/.clang-format @@ -7,6 +7,8 @@ BasedOnStyle: Google ColumnLimit: 120 StatementMacros: - EIGEN_STATIC_ASSERT + - EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED + - EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN SortIncludes: false AttributeMacros: - EIGEN_STRONG_INLINE diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/Core b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/Core index 7475c5a04bb..a7f8dcadd31 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/Core +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/Core @@ -97,6 +97,11 @@ // for std::is_nothrow_move_assignable #include +// for std::this_thread::yield(). +#if !defined(EIGEN_USE_BLAS) && (defined(EIGEN_HAS_OPENMP) || defined(EIGEN_GEMM_THREADPOOL)) +#include +#endif + // for outputting debug info #ifdef EIGEN_DEBUG_ASSIGN #include @@ -117,8 +122,8 @@ #include #include #include -#include #include +#include #ifndef EIGEN_SYCL_LOCAL_THREAD_DIM0 #define EIGEN_SYCL_LOCAL_THREAD_DIM0 16 #endif @@ -319,6 +324,7 @@ using std::ptrdiff_t; #include "src/Core/CwiseNullaryOp.h" #include "src/Core/CwiseUnaryView.h" #include "src/Core/SelfCwiseBinaryOp.h" +#include "src/Core/InnerProduct.h" #include "src/Core/Dot.h" #include "src/Core/StableNorm.h" #include "src/Core/Stride.h" diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/Array.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/Array.h index 29c968210f3..2098749c0fd 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/Array.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/Array.h @@ -117,18 +117,12 @@ class Array : public PlainObjectBase::value) - : Base(std::move(other)) { - } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Array(Array&&) = default; EIGEN_DEVICE_FUNC Array& operator=(Array&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_assignable::value) { Base::operator=(std::move(other)); return *this; @@ -232,7 +226,7 @@ class Array : public PlainObjectBase > { return m_expression.innerStride(); } - EIGEN_DEVICE_FUNC inline ScalarWithConstIfNotLvalue* data() { return m_expression.data(); } - EIGEN_DEVICE_FUNC inline const Scalar* data() const { return m_expression.data(); } + EIGEN_DEVICE_FUNC constexpr ScalarWithConstIfNotLvalue* data() { return m_expression.data(); } + EIGEN_DEVICE_FUNC constexpr const Scalar* data() const { return m_expression.data(); } EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index rowId, Index colId) const { return m_expression.coeffRef(rowId, colId); @@ -144,8 +144,8 @@ class MatrixWrapper : public MatrixBase > { return m_expression.innerStride(); } - EIGEN_DEVICE_FUNC inline ScalarWithConstIfNotLvalue* data() { return m_expression.data(); } - EIGEN_DEVICE_FUNC inline const Scalar* data() const { return m_expression.data(); } + EIGEN_DEVICE_FUNC constexpr ScalarWithConstIfNotLvalue* data() { return m_expression.data(); } + EIGEN_DEVICE_FUNC constexpr const Scalar* data() const { return m_expression.data(); } EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index rowId, Index colId) const { return m_expression.derived().coeffRef(rowId, colId); diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/Block.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/Block.h index 9b16ed289e7..709264c6999 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/Block.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/Block.h @@ -278,7 +278,7 @@ class BlockImpl_dense : public internal::dense_xpr_base class plainobjectbase_evaluator_data { public: - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE plainobjectbase_evaluator_data(const Scalar* ptr, Index outerStride) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr plainobjectbase_evaluator_data(const Scalar* ptr, Index outerStride) : data(ptr) { #ifndef EIGEN_INTERNAL_DEBUGGING EIGEN_UNUSED_VARIABLE(outerStride); @@ -157,9 +156,9 @@ class plainobjectbase_evaluator_data { template class plainobjectbase_evaluator_data { public: - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE plainobjectbase_evaluator_data(const Scalar* ptr, Index outerStride) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr plainobjectbase_evaluator_data(const Scalar* ptr, Index outerStride) : data(ptr), m_outerStride(outerStride) {} - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index outerStride() const { return m_outerStride; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index outerStride() const { return m_outerStride; } const Scalar* data; protected: @@ -189,35 +188,37 @@ struct evaluator> : evaluator_base { : RowsAtCompileTime }; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE evaluator() : m_d(0, OuterStrideAtCompileTime) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr evaluator() : m_d(0, OuterStrideAtCompileTime) { EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const PlainObjectType& m) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr explicit evaluator(const PlainObjectType& m) : m_d(m.data(), IsVectorAtCompileTime ? 0 : m.outerStride()) { EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr CoeffReturnType coeff(Index row, Index col) const { if (IsRowMajor) return m_d.data[row * m_d.outerStride() + col]; else return m_d.data[row + col * m_d.outerStride()]; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { return m_d.data[index]; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr CoeffReturnType coeff(Index index) const { return m_d.data[index]; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Scalar& coeffRef(Index row, Index col) { if (IsRowMajor) return const_cast(m_d.data)[row * m_d.outerStride() + col]; else return const_cast(m_d.data)[row + col * m_d.outerStride()]; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { return const_cast(m_d.data)[index]; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Scalar& coeffRef(Index index) { + return const_cast(m_d.data)[index]; + } template - EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { if (IsRowMajor) return ploadt(m_d.data + row * m_d.outerStride() + col); else @@ -225,12 +226,12 @@ struct evaluator> : evaluator_base { } template - EIGEN_STRONG_INLINE PacketType packet(Index index) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index index) const { return ploadt(m_d.data + index); } template - EIGEN_STRONG_INLINE void writePacket(Index row, Index col, const PacketType& x) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index row, Index col, const PacketType& x) { if (IsRowMajor) return pstoret(const_cast(m_d.data) + row * m_d.outerStride() + col, x); else @@ -238,7 +239,7 @@ struct evaluator> : evaluator_base { } template - EIGEN_STRONG_INLINE void writePacket(Index index, const PacketType& x) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index index, const PacketType& x) { return pstoret(const_cast(m_d.data) + index, x); } @@ -251,9 +252,10 @@ struct evaluator> : evaluator>> { typedef Matrix XprType; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE evaluator() {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr evaluator() = default; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& m) : evaluator>(m) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr explicit evaluator(const XprType& m) + : evaluator>(m) {} }; template @@ -261,9 +263,10 @@ struct evaluator> : evaluator>> { typedef Array XprType; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE evaluator() {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr evaluator() = default; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& m) : evaluator>(m) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr explicit evaluator(const XprType& m) + : evaluator>(m) {} }; // -------------------- Transpose -------------------- @@ -296,22 +299,22 @@ struct unary_evaluator, IndexBased> : evaluator_base - EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { return m_argImpl.template packet(col, row); } template - EIGEN_STRONG_INLINE PacketType packet(Index index) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index index) const { return m_argImpl.template packet(index); } template - EIGEN_STRONG_INLINE void writePacket(Index row, Index col, const PacketType& x) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index row, Index col, const PacketType& x) { m_argImpl.template writePacket(col, row, x); } template - EIGEN_STRONG_INLINE void writePacket(Index index, const PacketType& x) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index index, const PacketType& x) { m_argImpl.template writePacket(index, x); } @@ -490,12 +493,12 @@ struct evaluator> } template - EIGEN_STRONG_INLINE PacketType packet(IndexType row, IndexType col) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(IndexType row, IndexType col) const { return m_wrapper.template packetOp(m_functor, row, col); } template - EIGEN_STRONG_INLINE PacketType packet(IndexType index) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(IndexType index) const { return m_wrapper.template packetOp(m_functor, index); } @@ -534,12 +537,12 @@ struct unary_evaluator, IndexBased> : evaluator_b } template - EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { return m_d.func().packetOp(m_d.argImpl.template packet(row, col)); } template - EIGEN_STRONG_INLINE PacketType packet(Index index) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index index) const { return m_d.func().packetOp(m_d.argImpl.template packet(index)); } @@ -627,7 +630,7 @@ struct unary_evaluator, ArgType>, In } template - EIGEN_STRONG_INLINE PacketType srcPacket(Index row, Index col, Index offset) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType srcPacket(Index row, Index col, Index offset) const { constexpr int PacketSize = unpacket_traits::size; Index actualRow = IsRowMajor ? row : row + (offset * PacketSize); Index actualCol = IsRowMajor ? col + (offset * PacketSize) : col; @@ -635,7 +638,7 @@ struct unary_evaluator, ArgType>, In return m_argImpl.template packet(actualRow, actualCol); } template - EIGEN_STRONG_INLINE PacketType srcPacket(Index index, Index offset) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType srcPacket(Index index, Index offset) const { constexpr int PacketSize = unpacket_traits::size; Index actualIndex = index + (offset * PacketSize); eigen_assert(check_array_bounds(actualIndex, PacketSize) && "Array index out of bounds"); @@ -652,7 +655,7 @@ struct unary_evaluator, ArgType>, In // If not, perform full load. Otherwise, revert to a scalar loop to perform a partial load. // In either case, perform a vectorized cast of the source packet. template = true> - EIGEN_STRONG_INLINE DstPacketType packet(Index row, Index col) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packet(Index row, Index col) const { constexpr int DstPacketSize = unpacket_traits::size; constexpr int SrcBytesIncrement = DstPacketSize * sizeof(SrcType); constexpr int SrcLoadMode = plain_enum_min(SrcBytesIncrement, LoadMode); @@ -669,7 +672,7 @@ struct unary_evaluator, ArgType>, In } // Use the source packet type with the same size as DstPacketType, if it exists template = true> - EIGEN_STRONG_INLINE DstPacketType packet(Index row, Index col) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packet(Index row, Index col) const { constexpr int DstPacketSize = unpacket_traits::size; using SizedSrcPacketType = typename find_packet_by_size::type; constexpr int SrcBytesIncrement = DstPacketSize * sizeof(SrcType); @@ -678,14 +681,14 @@ struct unary_evaluator, ArgType>, In } // unpacket_traits::size == 2 * SrcPacketSize template = true> - EIGEN_STRONG_INLINE DstPacketType packet(Index row, Index col) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packet(Index row, Index col) const { constexpr int SrcLoadMode = plain_enum_min(SrcPacketBytes, LoadMode); return pcast(srcPacket(row, col, 0), srcPacket(row, col, 1)); } // unpacket_traits::size == 4 * SrcPacketSize template = true> - EIGEN_STRONG_INLINE DstPacketType packet(Index row, Index col) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packet(Index row, Index col) const { constexpr int SrcLoadMode = plain_enum_min(SrcPacketBytes, LoadMode); return pcast(srcPacket(row, col, 0), srcPacket(row, col, 1), srcPacket(row, col, 2), @@ -693,7 +696,7 @@ struct unary_evaluator, ArgType>, In } // unpacket_traits::size == 8 * SrcPacketSize template = true> - EIGEN_STRONG_INLINE DstPacketType packet(Index row, Index col) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packet(Index row, Index col) const { constexpr int SrcLoadMode = plain_enum_min(SrcPacketBytes, LoadMode); return pcast( srcPacket(row, col, 0), srcPacket(row, col, 1), srcPacket(row, col, 2), @@ -703,7 +706,7 @@ struct unary_evaluator, ArgType>, In // Analogous routines for linear access. template = true> - EIGEN_STRONG_INLINE DstPacketType packet(Index index) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packet(Index index) const { constexpr int DstPacketSize = unpacket_traits::size; constexpr int SrcBytesIncrement = DstPacketSize * sizeof(SrcType); constexpr int SrcLoadMode = plain_enum_min(SrcBytesIncrement, LoadMode); @@ -719,7 +722,7 @@ struct unary_evaluator, ArgType>, In return pcast(src); } template = true> - EIGEN_STRONG_INLINE DstPacketType packet(Index index) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packet(Index index) const { constexpr int DstPacketSize = unpacket_traits::size; using SizedSrcPacketType = typename find_packet_by_size::type; constexpr int SrcBytesIncrement = DstPacketSize * sizeof(SrcType); @@ -727,18 +730,18 @@ struct unary_evaluator, ArgType>, In return pcast(srcPacket(index, 0)); } template = true> - EIGEN_STRONG_INLINE DstPacketType packet(Index index) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packet(Index index) const { constexpr int SrcLoadMode = plain_enum_min(SrcPacketBytes, LoadMode); return pcast(srcPacket(index, 0), srcPacket(index, 1)); } template = true> - EIGEN_STRONG_INLINE DstPacketType packet(Index index) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packet(Index index) const { constexpr int SrcLoadMode = plain_enum_min(SrcPacketBytes, LoadMode); return pcast(srcPacket(index, 0), srcPacket(index, 1), srcPacket(index, 2), srcPacket(index, 3)); } template = true> - EIGEN_STRONG_INLINE DstPacketType packet(Index index) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packet(Index index) const { constexpr int SrcLoadMode = plain_enum_min(SrcPacketBytes, LoadMode); return pcast(srcPacket(index, 0), srcPacket(index, 1), srcPacket(index, 2), srcPacket(index, 3), @@ -810,14 +813,14 @@ struct ternary_evaluator, IndexBased } template - EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { return m_d.func().packetOp(m_d.arg1Impl.template packet(row, col), m_d.arg2Impl.template packet(row, col), m_d.arg3Impl.template packet(row, col)); } template - EIGEN_STRONG_INLINE PacketType packet(Index index) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index index) const { return m_d.func().packetOp(m_d.arg1Impl.template packet(index), m_d.arg2Impl.template packet(index), m_d.arg3Impl.template packet(index)); @@ -908,13 +911,13 @@ struct binary_evaluator, IndexBased, IndexBase } template - EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { return m_d.func().packetOp(m_d.lhsImpl.template packet(row, col), m_d.rhsImpl.template packet(row, col)); } template - EIGEN_STRONG_INLINE PacketType packet(Index index) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index index) const { return m_d.func().packetOp(m_d.lhsImpl.template packet(index), m_d.rhsImpl.template packet(index)); } @@ -1030,24 +1033,24 @@ struct mapbase_evaluator : evaluator_base { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { return m_data[index * m_innerStride.value()]; } template - EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { PointerType ptr = m_data + row * rowStride() + col * colStride(); return internal::ploadt(ptr); } template - EIGEN_STRONG_INLINE PacketType packet(Index index) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index index) const { return internal::ploadt(m_data + index * m_innerStride.value()); } template - EIGEN_STRONG_INLINE void writePacket(Index row, Index col, const PacketType& x) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index row, Index col, const PacketType& x) { PointerType ptr = m_data + row * rowStride() + col * colStride(); return internal::pstoret(ptr, x); } template - EIGEN_STRONG_INLINE void writePacket(Index index, const PacketType& x) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index index, const PacketType& x) { internal::pstoret(m_data + index * m_innerStride.value(), x); } @@ -1217,12 +1220,12 @@ struct unary_evaluator, IndexBa } template - EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { return m_argImpl.template packet(m_startRow.value() + row, m_startCol.value() + col); } template - EIGEN_STRONG_INLINE PacketType packet(Index index) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index index) const { if (ForwardLinearAccess) return m_argImpl.template packet(m_linear_offset.value() + index); else @@ -1230,12 +1233,12 @@ struct unary_evaluator, IndexBa } template - EIGEN_STRONG_INLINE void writePacket(Index row, Index col, const PacketType& x) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index row, Index col, const PacketType& x) { return m_argImpl.template writePacket(m_startRow.value() + row, m_startCol.value() + col, x); } template - EIGEN_STRONG_INLINE void writePacket(Index index, const PacketType& x) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index index, const PacketType& x) { if (ForwardLinearAccess) return m_argImpl.template writePacket(m_linear_offset.value() + index, x); else @@ -1378,7 +1381,7 @@ struct unary_evaluator> } template - EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { const Index actual_row = internal::traits::RowsAtCompileTime == 1 ? 0 : RowFactor == 1 ? row : row % m_rows.value(); @@ -1390,7 +1393,7 @@ struct unary_evaluator> } template - EIGEN_STRONG_INLINE PacketType packet(Index index) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index index) const { const Index actual_index = internal::traits::RowsAtCompileTime == 1 ? (ColFactor == 1 ? index : index % m_cols.value()) : (RowFactor == 1 ? index : index % m_rows.value()); @@ -1435,22 +1438,22 @@ struct evaluator_wrapper_base : evaluator_base { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { return m_argImpl.coeffRef(index); } template - EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { return m_argImpl.template packet(row, col); } template - EIGEN_STRONG_INLINE PacketType packet(Index index) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index index) const { return m_argImpl.template packet(index); } template - EIGEN_STRONG_INLINE void writePacket(Index row, Index col, const PacketType& x) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index row, Index col, const PacketType& x) { m_argImpl.template writePacket(row, col, x); } template - EIGEN_STRONG_INLINE void writePacket(Index index, const PacketType& x) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index index, const PacketType& x) { m_argImpl.template writePacket(index, x); } @@ -1532,7 +1535,7 @@ struct unary_evaluator> : evaluator_base - EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { enum { PacketSize = unpacket_traits::size, OffsetRow = ReverseRow && IsColMajor ? PacketSize : 1, @@ -1544,14 +1547,14 @@ struct unary_evaluator> : evaluator_base - EIGEN_STRONG_INLINE PacketType packet(Index index) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index index) const { enum { PacketSize = unpacket_traits::size }; return preverse( m_argImpl.template packet(m_rows.value() * m_cols.value() - index - PacketSize)); } template - EIGEN_STRONG_INLINE void writePacket(Index row, Index col, const PacketType& x) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index row, Index col, const PacketType& x) { // FIXME we could factorize some code with packet(i,j) enum { PacketSize = unpacket_traits::size, @@ -1565,7 +1568,7 @@ struct unary_evaluator> : evaluator_base - EIGEN_STRONG_INLINE void writePacket(Index index, const PacketType& x) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index index, const PacketType& x) { enum { PacketSize = unpacket_traits::size }; m_argImpl.template writePacket(m_rows.value() * m_cols.value() - index - PacketSize, preverse(x)); } diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/DenseBase.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/DenseBase.h index 8c8ca734794..e1fbb0b82ad 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/DenseBase.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/DenseBase.h @@ -642,6 +642,18 @@ class DenseBase EIGEN_DEVICE_FUNC explicit DenseBase(const DenseBase&); }; +/** Free-function swap. + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + // Use forwarding references to capture all combinations of cv-qualified l+r-value cases. + std::enable_if_t>, std::decay_t>::value && + std::is_base_of>, std::decay_t>::value, + void> + swap(DerivedA&& a, DerivedB&& b) { + a.swap(b); +} + } // end namespace Eigen #endif // EIGEN_DENSEBASE_H diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/DenseCoeffsBase.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/DenseCoeffsBase.h index 30e0aa38a79..97f9b50f345 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/DenseCoeffsBase.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/DenseCoeffsBase.h @@ -298,7 +298,7 @@ class DenseCoeffsBase : public DenseCoeffsBase= 0 && row < rows() && col >= 0 && col < cols()); return internal::evaluator(derived()).coeffRef(row, col); } @@ -312,7 +312,7 @@ class DenseCoeffsBase : public DenseCoeffsBase= 0 && row < rows() && col >= 0 && col < cols()); return coeffRef(row, col); } @@ -332,7 +332,7 @@ class DenseCoeffsBase : public DenseCoeffsBase::Flags & LinearAccessBit, THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS) eigen_internal_assert(index >= 0 && index < size()); @@ -346,7 +346,7 @@ class DenseCoeffsBase : public DenseCoeffsBase= 0 && index < size()); diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/DenseStorage.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/DenseStorage.h index f61693982ed..d62586c99b5 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/DenseStorage.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/DenseStorage.h @@ -27,622 +27,550 @@ namespace Eigen { namespace internal { -struct constructor_without_unaligned_array_assert {}; +#if defined(EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT) +#define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(Alignment) +#else +#define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(Alignment) \ + eigen_assert((is_constant_evaluated() || (std::uintptr_t(array) % Alignment == 0)) && \ + "this assertion is explained here: " \ + "http://eigen.tuxfamily.org/dox-devel/group__TopicUnalignedArrayAssert.html" \ + " **** READ THIS WEB PAGE !!! ****"); +#endif -template -EIGEN_DEVICE_FUNC constexpr void check_static_allocation_size() { -// if EIGEN_STACK_ALLOCATION_LIMIT is defined to 0, then no limit #if EIGEN_STACK_ALLOCATION_LIMIT - EIGEN_STATIC_ASSERT(Size * sizeof(T) <= EIGEN_STACK_ALLOCATION_LIMIT, OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG); +#define EIGEN_MAKE_STACK_ALLOCATION_ASSERT(X) \ + EIGEN_STATIC_ASSERT(X <= EIGEN_STACK_ALLOCATION_LIMIT, OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG) +#else +#define EIGEN_MAKE_STACK_ALLOCATION_ASSERT(X) #endif -} /** \internal * Static array. If the MatrixOrArrayOptions require auto-alignment, the array will be automatically aligned: * to 16 bytes boundary if the total size is a multiple of 16 bytes. */ + template ::value> struct plain_array { - T array[Size]; - - EIGEN_DEVICE_FUNC constexpr plain_array() { check_static_allocation_size(); } - - EIGEN_DEVICE_FUNC constexpr plain_array(constructor_without_unaligned_array_assert) { - check_static_allocation_size(); - } -}; - -#if defined(EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT) -#define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask) + EIGEN_ALIGN_TO_BOUNDARY(Alignment) T array[Size]; +#if defined(EIGEN_NO_DEBUG) || defined(EIGEN_TESTING_PLAINOBJECT_CTOR) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr plain_array() = default; #else -#define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask) \ - eigen_assert((internal::is_constant_evaluated() || (std::uintptr_t(array) & (sizemask)) == 0) && \ - "this assertion is explained here: " \ - "http://eigen.tuxfamily.org/dox-devel/group__TopicUnalignedArrayAssert.html" \ - " **** READ THIS WEB PAGE !!! ****"); -#endif - -template -struct plain_array { - EIGEN_ALIGN_TO_BOUNDARY(8) T array[Size]; - - EIGEN_DEVICE_FUNC constexpr plain_array() { - EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(7); - check_static_allocation_size(); - } - - EIGEN_DEVICE_FUNC constexpr plain_array(constructor_without_unaligned_array_assert) { - check_static_allocation_size(); - } -}; - -template -struct plain_array { - EIGEN_ALIGN_TO_BOUNDARY(16) T array[Size]; - - EIGEN_DEVICE_FUNC constexpr plain_array() { - EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(15); - check_static_allocation_size(); - } - - EIGEN_DEVICE_FUNC constexpr plain_array(constructor_without_unaligned_array_assert) { - check_static_allocation_size(); - } -}; - -template -struct plain_array { - EIGEN_ALIGN_TO_BOUNDARY(32) T array[Size]; - - EIGEN_DEVICE_FUNC constexpr plain_array() { - EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(31); - check_static_allocation_size(); - } - - EIGEN_DEVICE_FUNC constexpr plain_array(constructor_without_unaligned_array_assert) { - check_static_allocation_size(); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr plain_array() { + EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(Alignment) + EIGEN_MAKE_STACK_ALLOCATION_ASSERT(Size * sizeof(T)) } +#endif }; template -struct plain_array { - EIGEN_ALIGN_TO_BOUNDARY(64) T array[Size]; - - EIGEN_DEVICE_FUNC constexpr plain_array() { - EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(63); - check_static_allocation_size(); - } - - EIGEN_DEVICE_FUNC constexpr plain_array(constructor_without_unaligned_array_assert) { - check_static_allocation_size(); - } +struct plain_array { + T array[Size]; +#if defined(EIGEN_NO_DEBUG) || defined(EIGEN_TESTING_PLAINOBJECT_CTOR) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr plain_array() = default; +#else + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr plain_array() { EIGEN_MAKE_STACK_ALLOCATION_ASSERT(Size * sizeof(T)) } +#endif }; template struct plain_array { T array[1]; - EIGEN_DEVICE_FUNC constexpr plain_array() {} - EIGEN_DEVICE_FUNC constexpr plain_array(constructor_without_unaligned_array_assert) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr plain_array() = default; }; -struct plain_array_helper { - template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static void copy( - const plain_array& src, const Eigen::Index size, - plain_array& dst) { - smart_copy(src.array, src.array + size, dst.array); - } - - template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static void swap(plain_array& a, - const Eigen::Index a_size, - plain_array& b, - const Eigen::Index b_size) { - if (a_size < b_size) { - std::swap_ranges(b.array, b.array + a_size, a.array); - smart_move(b.array + a_size, b.array + b_size, a.array + a_size); - } else if (a_size > b_size) { - std::swap_ranges(a.array, a.array + b_size, b.array); - smart_move(a.array + b_size, a.array + a_size, b.array + b_size); - } else { - std::swap_ranges(a.array, a.array + a_size, b.array); - } - } -}; - -} // end namespace internal - -/** \internal - * - * \class DenseStorage - * \ingroup Core_Module - * - * \brief Stores the data of a matrix - * - * This class stores the data of fixed-size, dynamic-size or mixed matrices - * in a way as compact as possible. - * - * \sa Matrix - */ -template -class DenseStorage; +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void swap_plain_array(plain_array& a, + plain_array& b, + Index a_size, Index b_size) { + Index common_size = numext::mini(a_size, b_size); + std::swap_ranges(a.array, a.array + common_size, b.array); + if (a_size > b_size) + smart_copy(a.array + common_size, a.array + a_size, b.array + common_size); + else if (b_size > a_size) + smart_copy(b.array + common_size, b.array + b_size, a.array + common_size); +} -// purely fixed-size matrix -template -class DenseStorage { - internal::plain_array m_data; +template +class DenseStorage_impl { + plain_array m_data; public: - constexpr EIGEN_DEVICE_FUNC DenseStorage(){EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN( - Index size = - Size)} EIGEN_DEVICE_FUNC explicit constexpr DenseStorage(internal::constructor_without_unaligned_array_assert) - : m_data(internal::constructor_without_unaligned_array_assert()) {} -#if defined(EIGEN_DENSE_STORAGE_CTOR_PLUGIN) - EIGEN_DEVICE_FUNC constexpr DenseStorage(const DenseStorage& other) - : m_data(other.m_data){EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = Size)} +#ifndef EIGEN_DENSE_STORAGE_CTOR_PLUGIN + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl() = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(const DenseStorage_impl&) = default; #else - EIGEN_DEVICE_FUNC constexpr DenseStorage(const DenseStorage&) = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl() { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = Size) + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(const DenseStorage_impl& other) { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = Size) + smart_copy(other.m_data.array, other.m_data.array + Size, m_data.array); + } #endif - EIGEN_DEVICE_FUNC constexpr DenseStorage - & - operator=(const DenseStorage&) = default; - EIGEN_DEVICE_FUNC constexpr DenseStorage(DenseStorage&&) = default; - EIGEN_DEVICE_FUNC constexpr DenseStorage& operator=(DenseStorage&&) = default; - EIGEN_DEVICE_FUNC constexpr DenseStorage(Index size, Index rows, Index cols) { - EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({}) - eigen_internal_assert(size == rows * cols && rows == Rows_ && cols == Cols_); - EIGEN_UNUSED_VARIABLE(size); - EIGEN_UNUSED_VARIABLE(rows); - EIGEN_UNUSED_VARIABLE(cols); - } - EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { numext::swap(m_data, other.m_data); } - EIGEN_DEVICE_FUNC static constexpr Index rows(void) EIGEN_NOEXCEPT { return Rows_; } - EIGEN_DEVICE_FUNC static constexpr Index cols(void) EIGEN_NOEXCEPT { return Cols_; } - EIGEN_DEVICE_FUNC constexpr void conservativeResize(Index, Index, Index) {} - EIGEN_DEVICE_FUNC constexpr void resize(Index, Index, Index) {} - EIGEN_DEVICE_FUNC constexpr const T* data() const { return m_data.array; } - EIGEN_DEVICE_FUNC constexpr T* data() { return m_data.array; } -}; - -// null matrix -template -class DenseStorage { - public: - static_assert(Rows_ * Cols_ == 0, "The fixed number of rows times columns must equal the storage size."); - EIGEN_DEVICE_FUNC constexpr DenseStorage() {} - EIGEN_DEVICE_FUNC explicit constexpr DenseStorage(internal::constructor_without_unaligned_array_assert) {} - EIGEN_DEVICE_FUNC constexpr DenseStorage(const DenseStorage&) {} - EIGEN_DEVICE_FUNC constexpr DenseStorage& operator=(const DenseStorage&) { return *this; } - EIGEN_DEVICE_FUNC constexpr DenseStorage(Index, Index, Index) {} - EIGEN_DEVICE_FUNC constexpr void swap(DenseStorage&) {} - EIGEN_DEVICE_FUNC static constexpr Index rows(void) EIGEN_NOEXCEPT { return Rows_; } - EIGEN_DEVICE_FUNC static constexpr Index cols(void) EIGEN_NOEXCEPT { return Cols_; } - EIGEN_DEVICE_FUNC constexpr void conservativeResize(Index, Index, Index) {} - EIGEN_DEVICE_FUNC constexpr void resize(Index, Index, Index) {} - EIGEN_DEVICE_FUNC constexpr const T* data() const { return 0; } - EIGEN_DEVICE_FUNC constexpr T* data() { return 0; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(Index /*size*/, Index /*rows*/, Index /*cols*/) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl& operator=(const DenseStorage_impl&) = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void swap(DenseStorage_impl& other) { + numext::swap(m_data, other.m_data); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void conservativeResize(Index /*size*/, Index /*rows*/, + Index /*cols*/) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void resize(Index /*size*/, Index /*rows*/, Index /*cols*/) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const { return Rows; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const { return Cols; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index size() const { return Rows * Cols; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr T* data() { return m_data.array; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const T* data() const { return m_data.array; } }; - -// more specializations for null matrices; these are necessary to resolve ambiguities -template -class DenseStorage { - Index m_rows; - Index m_cols; +template +class DenseStorage_impl { + plain_array m_data; + Index m_rows = 0; public: - EIGEN_DEVICE_FUNC DenseStorage() : m_rows(0), m_cols(0) {} - EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : DenseStorage() {} - EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) : m_rows(other.m_rows), m_cols(other.m_cols) {} - EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl() = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(const DenseStorage_impl& other) + : m_rows(other.m_rows) { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = other.size()) + smart_copy(other.m_data.array, other.m_data.array + other.size(), m_data.array); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(Index size, Index rows, Index /*cols*/) + : m_rows(rows) { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({}) + EIGEN_UNUSED_VARIABLE(size) + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl& operator=(const DenseStorage_impl& other) { + smart_copy(other.m_data.array, other.m_data.array + other.size(), m_data.array); m_rows = other.m_rows; - m_cols = other.m_cols; return *this; } - EIGEN_DEVICE_FUNC DenseStorage(Index, Index rows, Index cols) : m_rows(rows), m_cols(cols) { - eigen_assert(m_rows * m_cols == 0 && "The number of rows times columns must equal the storage size."); - } - EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void swap(DenseStorage_impl& other) { + swap_plain_array(m_data, other.m_data, size(), other.size()); numext::swap(m_rows, other.m_rows); - numext::swap(m_cols, other.m_cols); } - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index rows() const EIGEN_NOEXCEPT { return m_rows; } - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index cols() const EIGEN_NOEXCEPT { return m_cols; } - EIGEN_DEVICE_FUNC void conservativeResize(Index, Index rows, Index cols) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void conservativeResize(Index /*size*/, Index rows, Index /*cols*/) { m_rows = rows; - m_cols = cols; - eigen_assert(m_rows * m_cols == 0 && "The number of rows times columns must equal the storage size."); } - EIGEN_DEVICE_FUNC void resize(Index, Index rows, Index cols) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void resize(Index /*size*/, Index rows, Index /*cols*/) { m_rows = rows; - m_cols = cols; - eigen_assert(m_rows * m_cols == 0 && "The number of rows times columns must equal the storage size."); } - EIGEN_DEVICE_FUNC const T* data() const { return nullptr; } - EIGEN_DEVICE_FUNC T* data() { return nullptr; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const { return m_rows; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const { return Cols; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index size() const { return m_rows * Cols; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr T* data() { return m_data.array; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const T* data() const { return m_data.array; } }; - -template -class DenseStorage { - Index m_cols; +template +class DenseStorage_impl { + plain_array m_data; + Index m_cols = 0; public: - EIGEN_DEVICE_FUNC DenseStorage() : m_cols(0) {} - EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : DenseStorage() {} - EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) : m_cols(other.m_cols) {} - EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl() = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(const DenseStorage_impl& other) + : m_cols(other.m_cols) { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = other.size()) + smart_copy(other.m_data.array, other.m_data.array + other.size(), m_data.array); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(Index size, Index /*rows*/, Index cols) + : m_cols(cols) { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({}) + EIGEN_UNUSED_VARIABLE(size) + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl& operator=(const DenseStorage_impl& other) { + smart_copy(other.m_data.array, other.m_data.array + other.size(), m_data.array); m_cols = other.m_cols; return *this; } - EIGEN_DEVICE_FUNC DenseStorage(Index, Index, Index cols) : m_cols(cols) { - eigen_assert(Rows_ * m_cols == 0 && "The number of rows times columns must equal the storage size."); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void swap(DenseStorage_impl& other) { + swap_plain_array(m_data, other.m_data, size(), other.size()); + numext::swap(m_cols, other.m_cols); } - EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { numext::swap(m_cols, other.m_cols); } - EIGEN_DEVICE_FUNC static EIGEN_CONSTEXPR Index rows(void) EIGEN_NOEXCEPT { return Rows_; } - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index cols(void) const EIGEN_NOEXCEPT { return m_cols; } - EIGEN_DEVICE_FUNC void conservativeResize(Index, Index, Index cols) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void conservativeResize(Index /*size*/, Index /*rows*/, Index cols) { m_cols = cols; - eigen_assert(Rows_ * m_cols == 0 && "The number of rows times columns must equal the storage size."); } - EIGEN_DEVICE_FUNC void resize(Index, Index, Index cols) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void resize(Index /*size*/, Index /*rows*/, Index cols) { m_cols = cols; - eigen_assert(Rows_ * m_cols == 0 && "The number of rows times columns must equal the storage size."); } - EIGEN_DEVICE_FUNC const T* data() const { return nullptr; } - EIGEN_DEVICE_FUNC T* data() { return nullptr; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const { return Rows; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const { return m_cols; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index size() const { return Rows * m_cols; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr T* data() { return m_data.array; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const T* data() const { return m_data.array; } }; - -template -class DenseStorage { - Index m_rows; +template +class DenseStorage_impl { + plain_array m_data; + Index m_rows = 0; + Index m_cols = 0; public: - EIGEN_DEVICE_FUNC DenseStorage() : m_rows(0) {} - EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : DenseStorage() {} - EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) : m_rows(other.m_rows) {} - EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl() = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(const DenseStorage_impl& other) + : m_rows(other.m_rows), m_cols(other.m_cols) { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = other.size()) + smart_copy(other.m_data.array, other.m_data.array + other.size(), m_data.array); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(Index size, Index rows, Index cols) + : m_rows(rows), m_cols(cols) { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({}) + EIGEN_UNUSED_VARIABLE(size) + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl& operator=(const DenseStorage_impl& other) { + smart_copy(other.m_data.array, other.m_data.array + other.size(), m_data.array); m_rows = other.m_rows; + m_cols = other.m_cols; return *this; } - EIGEN_DEVICE_FUNC DenseStorage(Index, Index rows, Index) : m_rows(rows) { - eigen_assert(m_rows * Cols_ == 0 && "The number of rows times columns must equal the storage size."); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void swap(DenseStorage_impl& other) { + swap_plain_array(m_data, other.m_data, size(), other.size()); + numext::swap(m_rows, other.m_rows); + numext::swap(m_cols, other.m_cols); } - EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { numext::swap(m_rows, other.m_rows); } - EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index rows(void) const EIGEN_NOEXCEPT { return m_rows; } - EIGEN_DEVICE_FUNC static EIGEN_CONSTEXPR Index cols(void) EIGEN_NOEXCEPT { return Cols_; } - EIGEN_DEVICE_FUNC void conservativeResize(Index, Index rows, Index) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void conservativeResize(Index /*size*/, Index rows, Index cols) { m_rows = rows; - eigen_assert(m_rows * Cols_ == 0 && "The number of rows times columns must equal the storage size."); + m_cols = cols; } - EIGEN_DEVICE_FUNC void resize(Index, Index rows, Index) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void resize(Index /*size*/, Index rows, Index cols) { m_rows = rows; - eigen_assert(m_rows * Cols_ == 0 && "The number of rows times columns must equal the storage size."); + m_cols = cols; } - EIGEN_DEVICE_FUNC const T* data() const { return nullptr; } - EIGEN_DEVICE_FUNC T* data() { return nullptr; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const { return m_rows; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const { return m_cols; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index size() const { return m_rows * m_cols; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr T* data() { return m_data.array; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const T* data() const { return m_data.array; } }; - -// dynamic-size matrix with fixed-size storage -template -class DenseStorage { - internal::plain_array m_data; - Index m_rows; - Index m_cols; +// null matrix variants +template +class DenseStorage_impl { + public: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl() = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(const DenseStorage_impl&) = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(Index /*size*/, Index /*rows*/, Index /*cols*/) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl& operator=(const DenseStorage_impl&) = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void swap(DenseStorage_impl&) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void conservativeResize(Index /*size*/, Index /*rows*/, + Index /*cols*/) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void resize(Index /*size*/, Index /*rows*/, Index /*cols*/) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const { return Rows; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const { return Cols; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index size() const { return Rows * Cols; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr T* data() { return nullptr; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const T* data() const { return nullptr; } +}; +template +class DenseStorage_impl { + Index m_rows = 0; public: - EIGEN_DEVICE_FUNC constexpr DenseStorage() : m_data(), m_rows(0), m_cols(0) {} - EIGEN_DEVICE_FUNC explicit constexpr DenseStorage(internal::constructor_without_unaligned_array_assert) - : m_data(internal::constructor_without_unaligned_array_assert()), m_rows(0), m_cols(0) {} - EIGEN_DEVICE_FUNC constexpr DenseStorage(const DenseStorage& other) - : m_data(internal::constructor_without_unaligned_array_assert()), m_rows(other.m_rows), m_cols(other.m_cols) { - internal::plain_array_helper::copy(other.m_data, m_rows * m_cols, m_data); - } - EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) { - if (this != &other) { - m_rows = other.m_rows; - m_cols = other.m_cols; - internal::plain_array_helper::copy(other.m_data, m_rows * m_cols, m_data); - } - return *this; - } - EIGEN_DEVICE_FUNC constexpr DenseStorage(Index, Index rows, Index cols) : m_rows(rows), m_cols(cols) {} - EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { - internal::plain_array_helper::swap(m_data, m_rows * m_cols, other.m_data, other.m_rows * other.m_cols); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl() = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(const DenseStorage_impl&) = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(Index /*size*/, Index rows, Index /*cols*/) + : m_rows(rows) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl& operator=(const DenseStorage_impl&) = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void swap(DenseStorage_impl& other) noexcept { numext::swap(m_rows, other.m_rows); - numext::swap(m_cols, other.m_cols); } - EIGEN_DEVICE_FUNC constexpr Index rows() const { return m_rows; } - EIGEN_DEVICE_FUNC constexpr Index cols() const { return m_cols; } - EIGEN_DEVICE_FUNC constexpr void conservativeResize(Index, Index rows, Index cols) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void conservativeResize(Index /*size*/, Index rows, Index /*cols*/) { m_rows = rows; - m_cols = cols; } - EIGEN_DEVICE_FUNC constexpr void resize(Index, Index rows, Index cols) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void resize(Index /*size*/, Index rows, Index /*cols*/) { m_rows = rows; - m_cols = cols; } - EIGEN_DEVICE_FUNC constexpr const T* data() const { return m_data.array; } - EIGEN_DEVICE_FUNC constexpr T* data() { return m_data.array; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const { return m_rows; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const { return Cols; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index size() const { return m_rows * Cols; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr T* data() { return nullptr; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const T* data() const { return nullptr; } }; - -// dynamic-size matrix with fixed-size storage and fixed width -template -class DenseStorage { - internal::plain_array m_data; - Index m_rows; +template +class DenseStorage_impl { + Index m_cols = 0; public: - EIGEN_DEVICE_FUNC constexpr DenseStorage() : m_rows(0) {} - EIGEN_DEVICE_FUNC explicit constexpr DenseStorage(internal::constructor_without_unaligned_array_assert) - : m_data(internal::constructor_without_unaligned_array_assert()), m_rows(0) {} - EIGEN_DEVICE_FUNC constexpr DenseStorage(const DenseStorage& other) - : m_data(internal::constructor_without_unaligned_array_assert()), m_rows(other.m_rows) { - internal::plain_array_helper::copy(other.m_data, m_rows * Cols_, m_data); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl() = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(const DenseStorage_impl&) = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(Index /*size*/, Index /*rows*/, Index cols) + : m_cols(cols) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl& operator=(const DenseStorage_impl&) = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void swap(DenseStorage_impl& other) noexcept { + numext::swap(m_cols, other.m_cols); } - - EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) { - if (this != &other) { - m_rows = other.m_rows; - internal::plain_array_helper::copy(other.m_data, m_rows * Cols_, m_data); - } - return *this; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void conservativeResize(Index /*size*/, Index /*rows*/, Index cols) { + m_cols = cols; } - EIGEN_DEVICE_FUNC constexpr DenseStorage(Index, Index rows, Index) : m_rows(rows) {} - EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { - internal::plain_array_helper::swap(m_data, m_rows * Cols_, other.m_data, other.m_rows * Cols_); - numext::swap(m_rows, other.m_rows); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void resize(Index /*size*/, Index /*rows*/, Index cols) { + m_cols = cols; } - EIGEN_DEVICE_FUNC constexpr Index rows(void) const EIGEN_NOEXCEPT { return m_rows; } - EIGEN_DEVICE_FUNC constexpr Index cols(void) const EIGEN_NOEXCEPT { return Cols_; } - EIGEN_DEVICE_FUNC constexpr void conservativeResize(Index, Index rows, Index) { m_rows = rows; } - EIGEN_DEVICE_FUNC constexpr void resize(Index, Index rows, Index) { m_rows = rows; } - EIGEN_DEVICE_FUNC constexpr const T* data() const { return m_data.array; } - EIGEN_DEVICE_FUNC constexpr T* data() { return m_data.array; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const { return Rows; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const { return m_cols; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index size() const { return Rows * m_cols; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr T* data() { return nullptr; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const T* data() const { return nullptr; } }; - -// dynamic-size matrix with fixed-size storage and fixed height -template -class DenseStorage { - internal::plain_array m_data; - Index m_cols; +template +class DenseStorage_impl { + Index m_rows = 0; + Index m_cols = 0; public: - EIGEN_DEVICE_FUNC constexpr DenseStorage() : m_cols(0) {} - EIGEN_DEVICE_FUNC explicit constexpr DenseStorage(internal::constructor_without_unaligned_array_assert) - : m_data(internal::constructor_without_unaligned_array_assert()), m_cols(0) {} - EIGEN_DEVICE_FUNC constexpr DenseStorage(const DenseStorage& other) - : m_data(internal::constructor_without_unaligned_array_assert()), m_cols(other.m_cols) { - internal::plain_array_helper::copy(other.m_data, Rows_ * m_cols, m_data); - } - EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) { - if (this != &other) { - m_cols = other.m_cols; - internal::plain_array_helper::copy(other.m_data, Rows_ * m_cols, m_data); - } - return *this; - } - EIGEN_DEVICE_FUNC DenseStorage(Index, Index, Index cols) : m_cols(cols) {} - EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { - internal::plain_array_helper::swap(m_data, Rows_ * m_cols, other.m_data, Rows_ * other.m_cols); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl() = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(const DenseStorage_impl&) = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(Index /*size*/, Index rows, Index cols) + : m_rows(rows), m_cols(cols) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl& operator=(const DenseStorage_impl&) = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void swap(DenseStorage_impl& other) noexcept { + numext::swap(m_rows, other.m_rows); numext::swap(m_cols, other.m_cols); } - EIGEN_DEVICE_FUNC constexpr Index rows(void) const EIGEN_NOEXCEPT { return Rows_; } - EIGEN_DEVICE_FUNC constexpr Index cols(void) const EIGEN_NOEXCEPT { return m_cols; } - EIGEN_DEVICE_FUNC constexpr void conservativeResize(Index, Index, Index cols) { m_cols = cols; } - EIGEN_DEVICE_FUNC constexpr void resize(Index, Index, Index cols) { m_cols = cols; } - EIGEN_DEVICE_FUNC constexpr const T* data() const { return m_data.array; } - EIGEN_DEVICE_FUNC constexpr T* data() { return m_data.array; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void conservativeResize(Index /*size*/, Index rows, Index cols) { + m_rows = rows; + m_cols = cols; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void resize(Index /*size*/, Index rows, Index cols) { + m_rows = rows; + m_cols = cols; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const { return m_rows; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const { return m_cols; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index size() const { return m_rows * m_cols; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr T* data() { return nullptr; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const T* data() const { return nullptr; } }; - -// purely dynamic matrix. -template -class DenseStorage { - T* m_data; - Index m_rows; - Index m_cols; +// fixed-size matrix with dynamic memory allocation not currently supported +template +class DenseStorage_impl {}; +// dynamic-sized variants +template +class DenseStorage_impl { + static constexpr bool Align = (Options & DontAlign) == 0; + T* m_data = nullptr; + Index m_rows = 0; public: - EIGEN_DEVICE_FUNC constexpr DenseStorage() : m_data(0), m_rows(0), m_cols(0) {} - EIGEN_DEVICE_FUNC explicit constexpr DenseStorage(internal::constructor_without_unaligned_array_assert) - : m_data(0), m_rows(0), m_cols(0) {} - EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) - : m_data(internal::conditional_aligned_new_auto(size)), - m_rows(rows), - m_cols(cols) { + static constexpr int Size = Dynamic; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl() = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(const DenseStorage_impl& other) + : m_data(conditional_aligned_new_auto(other.size())), m_rows(other.m_rows) { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = other.size()) + smart_copy(other.m_data, other.m_data + other.size(), m_data); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(Index size, Index rows, Index /*cols*/) + : m_data(conditional_aligned_new_auto(size)), m_rows(rows) { EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({}) - eigen_internal_assert(size == rows * cols && rows >= 0 && cols >= 0); - } - EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) - : m_data(internal::conditional_aligned_new_auto(other.m_rows * other.m_cols)), - m_rows(other.m_rows), - m_cols(other.m_cols) { - EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = m_rows * m_cols) - internal::smart_copy(other.m_data, other.m_data + other.m_rows * other.m_cols, m_data); - } - EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) { - if (this != &other) { - DenseStorage tmp(other); - this->swap(tmp); - } - return *this; } - EIGEN_DEVICE_FUNC DenseStorage(DenseStorage&& other) EIGEN_NOEXCEPT : m_data(std::move(other.m_data)), - m_rows(std::move(other.m_rows)), - m_cols(std::move(other.m_cols)) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(DenseStorage_impl&& other) noexcept + : m_data(other.m_data), m_rows(other.m_rows) { other.m_data = nullptr; other.m_rows = 0; - other.m_cols = 0; } - EIGEN_DEVICE_FUNC DenseStorage& operator=(DenseStorage&& other) EIGEN_NOEXCEPT { - numext::swap(m_data, other.m_data); - numext::swap(m_rows, other.m_rows); - numext::swap(m_cols, other.m_cols); + EIGEN_DEVICE_FUNC ~DenseStorage_impl() { conditional_aligned_delete_auto(m_data, size()); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl& operator=(const DenseStorage_impl& other) { + resize(other.size(), other.rows(), other.cols()); + smart_copy(other.m_data, other.m_data + other.size(), m_data); return *this; } - EIGEN_DEVICE_FUNC ~DenseStorage() { - internal::conditional_aligned_delete_auto(m_data, m_rows * m_cols); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl& operator=(DenseStorage_impl&& other) noexcept { + this->swap(other); + return *this; } - EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void swap(DenseStorage_impl& other) noexcept { numext::swap(m_data, other.m_data); numext::swap(m_rows, other.m_rows); - numext::swap(m_cols, other.m_cols); } - EIGEN_DEVICE_FUNC Index rows(void) const EIGEN_NOEXCEPT { return m_rows; } - EIGEN_DEVICE_FUNC Index cols(void) const EIGEN_NOEXCEPT { return m_cols; } - void conservativeResize(Index size, Index rows, Index cols) { - m_data = - internal::conditional_aligned_realloc_new_auto(m_data, size, m_rows * m_cols); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void conservativeResize(Index size, Index rows, Index /*cols*/) { + m_data = conditional_aligned_realloc_new_auto(m_data, size, this->size()); m_rows = rows; - m_cols = cols; } - EIGEN_DEVICE_FUNC void resize(Index size, Index rows, Index cols) { - if (size != m_rows * m_cols) { - internal::conditional_aligned_delete_auto(m_data, m_rows * m_cols); - if (size > 0) // >0 and not simply !=0 to let the compiler knows that size cannot be negative - m_data = internal::conditional_aligned_new_auto(size); - else - m_data = 0; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void resize(Index size, Index rows, Index /*cols*/) { + Index oldSize = this->size(); + if (oldSize != size) { + conditional_aligned_delete_auto(m_data, oldSize); + m_data = conditional_aligned_new_auto(size); EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({}) } m_rows = rows; - m_cols = cols; } - EIGEN_DEVICE_FUNC const T* data() const { return m_data; } - EIGEN_DEVICE_FUNC T* data() { return m_data; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const { return m_rows; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const { return Cols; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index size() const { return m_rows * Cols; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr T* data() { return m_data; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const T* data() const { return m_data; } }; - -// matrix with dynamic width and fixed height (so that matrix has dynamic size). -template -class DenseStorage { - T* m_data; - Index m_cols; +template +class DenseStorage_impl { + static constexpr bool Align = (Options & DontAlign) == 0; + T* m_data = nullptr; + Index m_cols = 0; public: - EIGEN_DEVICE_FUNC constexpr DenseStorage() : m_data(0), m_cols(0) {} - explicit constexpr DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_cols(0) {} - EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) - : m_data(internal::conditional_aligned_new_auto(size)), m_cols(cols) { + static constexpr int Size = Dynamic; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl() = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(const DenseStorage_impl& other) + : m_data(conditional_aligned_new_auto(other.size())), m_cols(other.m_cols) { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = other.size()) + smart_copy(other.m_data, other.m_data + other.size(), m_data); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(Index size, Index /*rows*/, Index cols) + : m_data(conditional_aligned_new_auto(size)), m_cols(cols) { EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({}) - eigen_internal_assert(size == rows * cols && rows == Rows_ && cols >= 0); - EIGEN_UNUSED_VARIABLE(rows); - } - EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) - : m_data(internal::conditional_aligned_new_auto(Rows_ * other.m_cols)), - m_cols(other.m_cols) { - EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = m_cols * Rows_) - internal::smart_copy(other.m_data, other.m_data + Rows_ * m_cols, m_data); - } - EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) { - if (this != &other) { - DenseStorage tmp(other); - this->swap(tmp); - } - return *this; } - EIGEN_DEVICE_FUNC DenseStorage(DenseStorage&& other) EIGEN_NOEXCEPT : m_data(std::move(other.m_data)), - m_cols(std::move(other.m_cols)) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(DenseStorage_impl&& other) noexcept + : m_data(other.m_data), m_cols(other.m_cols) { other.m_data = nullptr; other.m_cols = 0; } - EIGEN_DEVICE_FUNC DenseStorage& operator=(DenseStorage&& other) EIGEN_NOEXCEPT { - numext::swap(m_data, other.m_data); - numext::swap(m_cols, other.m_cols); + EIGEN_DEVICE_FUNC ~DenseStorage_impl() { conditional_aligned_delete_auto(m_data, size()); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl& operator=(const DenseStorage_impl& other) { + resize(other.size(), other.rows(), other.cols()); + smart_copy(other.m_data, other.m_data + other.size(), m_data); return *this; } - EIGEN_DEVICE_FUNC ~DenseStorage() { - internal::conditional_aligned_delete_auto(m_data, Rows_ * m_cols); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl& operator=(DenseStorage_impl&& other) noexcept { + this->swap(other); + return *this; } - EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void swap(DenseStorage_impl& other) noexcept { numext::swap(m_data, other.m_data); numext::swap(m_cols, other.m_cols); } - EIGEN_DEVICE_FUNC static constexpr Index rows(void) EIGEN_NOEXCEPT { return Rows_; } - EIGEN_DEVICE_FUNC Index cols(void) const EIGEN_NOEXCEPT { return m_cols; } - EIGEN_DEVICE_FUNC void conservativeResize(Index size, Index, Index cols) { - m_data = - internal::conditional_aligned_realloc_new_auto(m_data, size, Rows_ * m_cols); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void conservativeResize(Index size, Index /*rows*/, Index cols) { + m_data = conditional_aligned_realloc_new_auto(m_data, size, this->size()); m_cols = cols; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resize(Index size, Index, Index cols) { - if (size != Rows_ * m_cols) { - internal::conditional_aligned_delete_auto(m_data, Rows_ * m_cols); - if (size > 0) // >0 and not simply !=0 to let the compiler knows that size cannot be negative - m_data = internal::conditional_aligned_new_auto(size); - else - m_data = 0; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void resize(Index size, Index /*rows*/, Index cols) { + Index oldSize = this->size(); + if (oldSize != size) { + conditional_aligned_delete_auto(m_data, oldSize); + m_data = conditional_aligned_new_auto(size); EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({}) } m_cols = cols; } - EIGEN_DEVICE_FUNC const T* data() const { return m_data; } - EIGEN_DEVICE_FUNC T* data() { return m_data; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const { return Rows; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const { return m_cols; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index size() const { return Rows * m_cols; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr T* data() { return m_data; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const T* data() const { return m_data; } }; - -// matrix with dynamic height and fixed width (so that matrix has dynamic size). -template -class DenseStorage { - T* m_data; - Index m_rows; +template +class DenseStorage_impl { + static constexpr bool Align = (Options & DontAlign) == 0; + T* m_data = nullptr; + Index m_rows = 0; + Index m_cols = 0; public: - EIGEN_DEVICE_FUNC constexpr DenseStorage() : m_data(0), m_rows(0) {} - explicit constexpr DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_rows(0) {} - EIGEN_DEVICE_FUNC constexpr DenseStorage(Index size, Index rows, Index cols) - : m_data(internal::conditional_aligned_new_auto(size)), m_rows(rows) { + static constexpr int Size = Dynamic; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl() = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(const DenseStorage_impl& other) + : m_data(conditional_aligned_new_auto(other.size())), m_rows(other.m_rows), m_cols(other.m_cols) { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = other.size()) + smart_copy(other.m_data, other.m_data + other.size(), m_data); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(Index size, Index rows, Index cols) + : m_data(conditional_aligned_new_auto(size)), m_rows(rows), m_cols(cols) { EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({}) - eigen_internal_assert(size == rows * cols && rows >= 0 && cols == Cols_); - EIGEN_UNUSED_VARIABLE(cols); - } - EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) - : m_data(internal::conditional_aligned_new_auto(other.m_rows * Cols_)), - m_rows(other.m_rows) { - EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = m_rows * Cols_) - internal::smart_copy(other.m_data, other.m_data + other.m_rows * Cols_, m_data); - } - EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) { - if (this != &other) { - DenseStorage tmp(other); - this->swap(tmp); - } - return *this; } - EIGEN_DEVICE_FUNC DenseStorage(DenseStorage&& other) EIGEN_NOEXCEPT : m_data(std::move(other.m_data)), - m_rows(std::move(other.m_rows)) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(DenseStorage_impl&& other) noexcept + : m_data(other.m_data), m_rows(other.m_rows), m_cols(other.m_cols) { other.m_data = nullptr; other.m_rows = 0; + other.m_cols = 0; } - EIGEN_DEVICE_FUNC DenseStorage& operator=(DenseStorage&& other) EIGEN_NOEXCEPT { - numext::swap(m_data, other.m_data); - numext::swap(m_rows, other.m_rows); + EIGEN_DEVICE_FUNC ~DenseStorage_impl() { conditional_aligned_delete_auto(m_data, size()); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl& operator=(const DenseStorage_impl& other) { + resize(other.size(), other.rows(), other.cols()); + smart_copy(other.m_data, other.m_data + other.size(), m_data); return *this; } - EIGEN_DEVICE_FUNC ~DenseStorage() { - internal::conditional_aligned_delete_auto(m_data, Cols_ * m_rows); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl& operator=(DenseStorage_impl&& other) noexcept { + this->swap(other); + return *this; } - EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void swap(DenseStorage_impl& other) noexcept { numext::swap(m_data, other.m_data); numext::swap(m_rows, other.m_rows); + numext::swap(m_cols, other.m_cols); } - EIGEN_DEVICE_FUNC Index rows(void) const EIGEN_NOEXCEPT { return m_rows; } - EIGEN_DEVICE_FUNC static constexpr Index cols(void) { return Cols_; } - void conservativeResize(Index size, Index rows, Index) { - m_data = - internal::conditional_aligned_realloc_new_auto(m_data, size, m_rows * Cols_); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void conservativeResize(Index size, Index rows, Index cols) { + m_data = conditional_aligned_realloc_new_auto(m_data, size, this->size()); m_rows = rows; + m_cols = cols; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resize(Index size, Index rows, Index) { - if (size != m_rows * Cols_) { - internal::conditional_aligned_delete_auto(m_data, Cols_ * m_rows); - if (size > 0) // >0 and not simply !=0 to let the compiler knows that size cannot be negative - m_data = internal::conditional_aligned_new_auto(size); - else - m_data = 0; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void resize(Index size, Index rows, Index cols) { + Index oldSize = this->size(); + if (oldSize != size) { + conditional_aligned_delete_auto(m_data, oldSize); + m_data = conditional_aligned_new_auto(size); EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({}) } m_rows = rows; + m_cols = cols; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const { return m_rows; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const { return m_cols; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index size() const { return m_rows * m_cols; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr T* data() { return m_data; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const T* data() const { return m_data; } +}; +template +struct use_default_move { + static constexpr bool DynamicObject = Size == Dynamic; + static constexpr bool TrivialObject = + (!NumTraits::RequireInitialization) && (Rows >= 0) && (Cols >= 0) && (Size == Rows * Cols); + static constexpr bool value = DynamicObject || TrivialObject; +}; +} // end namespace internal + +/** \internal + * + * \class DenseStorage_impl + * \ingroup Core_Module + * + * \brief Stores the data of a matrix + * + * This class stores the data of fixed-size, dynamic-size or mixed matrices + * in a way as compact as possible. + * + * \sa Matrix + */ +template ::value> +class DenseStorage : public internal::DenseStorage_impl { + using Base = internal::DenseStorage_impl; + + public: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage() = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage(const DenseStorage&) = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage(Index size, Index rows, Index cols) + : Base(size, rows, cols) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage& operator=(const DenseStorage&) = default; + // if DenseStorage meets the requirements of use_default_move, then use the move construction and move assignment + // operation defined in DenseStorage_impl, or the compiler-generated version if none is defined + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage(DenseStorage&&) = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage& operator=(DenseStorage&&) = default; +}; +template +class DenseStorage + : public internal::DenseStorage_impl { + using Base = internal::DenseStorage_impl; + + public: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage() = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage(const DenseStorage&) = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage(Index size, Index rows, Index cols) + : Base(size, rows, cols) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage& operator=(const DenseStorage&) = default; + // if DenseStorage does not meet the requirements of use_default_move, then defer to the copy construction and copy + // assignment behavior + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage(DenseStorage&& other) + : DenseStorage(static_cast(other)) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage& operator=(DenseStorage&& other) { + *this = other; + return *this; } - EIGEN_DEVICE_FUNC const T* data() const { return m_data; } - EIGEN_DEVICE_FUNC T* data() { return m_data; } }; } // end namespace Eigen diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/Dot.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/Dot.h index 82eb9c70969..059527c85f8 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/Dot.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/Dot.h @@ -17,28 +17,18 @@ namespace Eigen { namespace internal { -// helper function for dot(). The problem is that if we put that in the body of dot(), then upon calling dot -// with mismatched types, the compiler emits errors about failing to instantiate cwiseProduct BEFORE -// looking at the static assertions. Thus this is a trick to get better compile errors. -template -struct dot_nocheck { - typedef scalar_conj_product_op::Scalar, typename traits::Scalar> conj_prod; - typedef typename conj_prod::result_type ResScalar; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static ResScalar run(const MatrixBase& a, const MatrixBase& b) { - return a.template binaryExpr(b).sum(); +template ::Scalar> +struct squared_norm_impl { + using Real = typename NumTraits::Real; + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Real run(const Derived& a) { + Scalar result = a.unaryExpr(squared_norm_functor()).sum(); + return numext::real(result) + numext::imag(result); } }; -template -struct dot_nocheck { - typedef scalar_conj_product_op::Scalar, typename traits::Scalar> conj_prod; - typedef typename conj_prod::result_type ResScalar; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static ResScalar run(const MatrixBase& a, const MatrixBase& b) { - return a.transpose().template binaryExpr(b).sum(); - } +template +struct squared_norm_impl { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool run(const Derived& a) { return a.any(); } }; } // end namespace internal @@ -60,18 +50,7 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename ScalarBinaryOpTraits::Scalar, typename internal::traits::Scalar>::ReturnType MatrixBase::dot(const MatrixBase& other) const { - EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) - EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived) - EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(Derived, OtherDerived) -#if !(defined(EIGEN_NO_STATIC_ASSERT) && defined(EIGEN_NO_DEBUG)) - EIGEN_CHECK_BINARY_COMPATIBILIY( - Eigen::internal::scalar_conj_product_op, Scalar, - typename OtherDerived::Scalar); -#endif - - eigen_assert(size() == other.size()); - - return internal::dot_nocheck::run(*this, other); + return internal::dot_impl::run(derived(), other.derived()); } //---------- implementation of L2 norm and related functions ---------- @@ -85,7 +64,7 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename NumTraits::Scalar>::Real MatrixBase::squaredNorm() const { - return numext::real((*this).cwiseAbs2().sum()); + return internal::squared_norm_impl::run(derived()); } /** \returns, for vectors, the \em l2 norm of \c *this, and for matrices the Frobenius norm. diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/EigenBase.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/EigenBase.h index a2d6ee2ec32..6d167006a09 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/EigenBase.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/EigenBase.h @@ -46,9 +46,9 @@ struct EigenBase { typedef typename internal::traits::StorageKind StorageKind; /** \returns a reference to the derived object */ - EIGEN_DEVICE_FUNC Derived& derived() { return *static_cast(this); } + EIGEN_DEVICE_FUNC constexpr Derived& derived() { return *static_cast(this); } /** \returns a const reference to the derived object */ - EIGEN_DEVICE_FUNC const Derived& derived() const { return *static_cast(this); } + EIGEN_DEVICE_FUNC constexpr const Derived& derived() const { return *static_cast(this); } EIGEN_DEVICE_FUNC inline Derived& const_cast_derived() const { return *static_cast(const_cast(this)); diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/GeneralProduct.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/GeneralProduct.h index b7ae44fda94..e4c51d2a6f6 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/GeneralProduct.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/GeneralProduct.h @@ -229,7 +229,7 @@ struct gemv_static_vector_if; template struct gemv_static_vector_if { - EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Scalar* data() { + EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC constexpr Scalar* data() { eigen_internal_assert(false && "should never be called"); return 0; } @@ -237,19 +237,19 @@ struct gemv_static_vector_if { template struct gemv_static_vector_if { - EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Scalar* data() { return 0; } + EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC constexpr Scalar* data() { return 0; } }; template struct gemv_static_vector_if { #if EIGEN_MAX_STATIC_ALIGN_BYTES != 0 internal::plain_array m_data; - EIGEN_STRONG_INLINE Scalar* data() { return m_data.array; } + EIGEN_STRONG_INLINE constexpr Scalar* data() { return m_data.array; } #else // Some architectures cannot align on the stack, // => let's manually enforce alignment by allocating more data and return the address of the first aligned element. internal::plain_array m_data; - EIGEN_STRONG_INLINE Scalar* data() { + EIGEN_STRONG_INLINE constexpr Scalar* data() { return reinterpret_cast((std::uintptr_t(m_data.array) & ~(std::size_t(EIGEN_MAX_ALIGN_BYTES - 1))) + EIGEN_MAX_ALIGN_BYTES); } @@ -327,6 +327,7 @@ struct gemv_dense_selector { if (!evalToDest) { #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN + constexpr int Size = Dest::SizeAtCompileTime; Index size = dest.size(); EIGEN_DENSE_STORAGE_CTOR_PLUGIN #endif @@ -391,6 +392,7 @@ struct gemv_dense_selector { if (!DirectlyUseRhs) { #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN + constexpr int Size = ActualRhsTypeCleaned::SizeAtCompileTime; Index size = actualRhs.size(); EIGEN_DENSE_STORAGE_CTOR_PLUGIN #endif diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/GenericPacketMath.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/GenericPacketMath.h index 1d79b4ab8ff..c7a9846d038 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/GenericPacketMath.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/GenericPacketMath.h @@ -1083,8 +1083,13 @@ EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patanh(const Packet& /** \internal \returns the exp of \a a (coeff-wise) */ template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexp(const Packet& a) { - EIGEN_USING_STD(exp); - return exp(a); + return numext::exp(a); +} + +/** \internal \returns the exp2 of \a a (coeff-wise) */ +template +EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexp2(const Packet& a) { + return numext::exp2(a); } /** \internal \returns the expm1 of \a a (coeff-wise) */ @@ -1113,7 +1118,7 @@ EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog10(const Packet& return log10(a); } -/** \internal \returns the log10 of \a a (coeff-wise) */ +/** \internal \returns the log2 of \a a (coeff-wise) */ template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog2(const Packet& a) { using Scalar = typename internal::unpacket_traits::type; diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/GlobalFunctions.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/GlobalFunctions.h index 3f147b8f6fb..df1098e27e6 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/GlobalFunctions.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/GlobalFunctions.h @@ -76,6 +76,7 @@ EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erf, scalar_erf_op, error function,\sa ArrayBas EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erfc, scalar_erfc_op, complement error function,\sa ArrayBase::erfc) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(ndtri, scalar_ndtri_op, inverse normal distribution function,\sa ArrayBase::ndtri) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(exp, scalar_exp_op, exponential,\sa ArrayBase::exp) +EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(exp2, scalar_exp2_op, exponential,\sa ArrayBase::exp2) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(expm1, scalar_expm1_op, exponential of a value minus 1,\sa ArrayBase::expm1) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log, scalar_log_op, natural logarithm,\sa Eigen::log10 DOXCOMMA ArrayBase::log) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log1p, scalar_log1p_op, natural logarithm of 1 plus the value,\sa ArrayBase::log1p) diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/InnerProduct.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/InnerProduct.h new file mode 100644 index 00000000000..1e16942c238 --- /dev/null +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/InnerProduct.h @@ -0,0 +1,250 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2024 Charlie Schlosser +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_INNER_PRODUCT_EVAL_H +#define EIGEN_INNER_PRODUCT_EVAL_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +namespace internal { + +// recursively searches for the largest simd type that does not exceed Size, or the smallest if no such type exists +template ::type, + bool Stop = + (unpacket_traits::size <= Size) || is_same::half>::value> +struct find_inner_product_packet_helper; + +template +struct find_inner_product_packet_helper { + using type = typename find_inner_product_packet_helper::half>::type; +}; + +template +struct find_inner_product_packet_helper { + using type = Packet; +}; + +template +struct find_inner_product_packet : find_inner_product_packet_helper {}; + +template +struct find_inner_product_packet { + using type = typename packet_traits::type; +}; + +template +struct inner_product_assert { + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Lhs) + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Rhs) + EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(Lhs, Rhs) +#ifndef EIGEN_NO_DEBUG + static EIGEN_DEVICE_FUNC void run(const Lhs& lhs, const Rhs& rhs) { + eigen_assert((lhs.size() == rhs.size()) && "Inner product: lhs and rhs vectors must have same size"); + } +#else + static EIGEN_DEVICE_FUNC void run(const Lhs&, const Rhs&) {} +#endif +}; + +template +struct inner_product_evaluator { + static constexpr int LhsFlags = evaluator::Flags, RhsFlags = evaluator::Flags, + SizeAtCompileTime = min_size_prefer_fixed(Lhs::SizeAtCompileTime, Rhs::SizeAtCompileTime), + LhsAlignment = evaluator::Alignment, RhsAlignment = evaluator::Alignment; + + using Scalar = typename Func::result_type; + using Packet = typename find_inner_product_packet::type; + + static constexpr bool Vectorize = + bool(LhsFlags & RhsFlags & PacketAccessBit) && Func::PacketAccess && + ((SizeAtCompileTime == Dynamic) || (unpacket_traits::size <= SizeAtCompileTime)); + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit inner_product_evaluator(const Lhs& lhs, const Rhs& rhs, + Func func = Func()) + : m_func(func), m_lhs(lhs), m_rhs(rhs), m_size(lhs.size()) { + inner_product_assert::run(lhs, rhs); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index size() const { return m_size.value(); } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(Index index) const { + return m_func.coeff(m_lhs.coeff(index), m_rhs.coeff(index)); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(const Scalar& value, Index index) const { + return m_func.coeff(value, m_lhs.coeff(index), m_rhs.coeff(index)); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index index) const { + return m_func.packet(m_lhs.template packet(index), + m_rhs.template packet(index)); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(const PacketType& value, Index index) const { + return m_func.packet(value, m_lhs.template packet(index), + m_rhs.template packet(index)); + } + + const Func m_func; + const evaluator m_lhs; + const evaluator m_rhs; + const variable_if_dynamic m_size; +}; + +template +struct inner_product_impl; + +// scalar loop +template +struct inner_product_impl { + using Scalar = typename Evaluator::Scalar; + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run(const Evaluator& eval) { + const Index size = eval.size(); + if (size == 0) return Scalar(0); + + Scalar result = eval.coeff(0); + for (Index k = 1; k < size; k++) { + result = eval.coeff(result, k); + } + + return result; + } +}; + +// vector loop +template +struct inner_product_impl { + using UnsignedIndex = std::make_unsigned_t; + using Scalar = typename Evaluator::Scalar; + using Packet = typename Evaluator::Packet; + static constexpr int PacketSize = unpacket_traits::size; + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run(const Evaluator& eval) { + const UnsignedIndex size = static_cast(eval.size()); + if (size < PacketSize) return inner_product_impl::run(eval); + + const UnsignedIndex packetEnd = numext::round_down(size, PacketSize); + const UnsignedIndex quadEnd = numext::round_down(size, 4 * PacketSize); + const UnsignedIndex numPackets = size / PacketSize; + const UnsignedIndex numRemPackets = (packetEnd - quadEnd) / PacketSize; + + Packet presult0, presult1, presult2, presult3; + + presult0 = eval.template packet(0 * PacketSize); + if (numPackets >= 2) presult1 = eval.template packet(1 * PacketSize); + if (numPackets >= 3) presult2 = eval.template packet(2 * PacketSize); + if (numPackets >= 4) { + presult3 = eval.template packet(3 * PacketSize); + + for (UnsignedIndex k = 4 * PacketSize; k < quadEnd; k += 4 * PacketSize) { + presult0 = eval.packet(presult0, k + 0 * PacketSize); + presult1 = eval.packet(presult1, k + 1 * PacketSize); + presult2 = eval.packet(presult2, k + 2 * PacketSize); + presult3 = eval.packet(presult3, k + 3 * PacketSize); + } + + if (numRemPackets >= 1) presult0 = eval.packet(presult0, quadEnd + 0 * PacketSize); + if (numRemPackets >= 2) presult1 = eval.packet(presult1, quadEnd + 1 * PacketSize); + if (numRemPackets == 3) presult2 = eval.packet(presult2, quadEnd + 2 * PacketSize); + + presult2 = padd(presult2, presult3); + } + + if (numPackets >= 3) presult1 = padd(presult1, presult2); + if (numPackets >= 2) presult0 = padd(presult0, presult1); + + Scalar result = predux(presult0); + for (UnsignedIndex k = packetEnd; k < size; k++) { + result = eval.coeff(result, k); + } + + return result; + } +}; + +template +struct conditional_conj; + +template +struct conditional_conj { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(const Scalar& a) { return numext::conj(a); } + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packet(const Packet& a) { + return pconj(a); + } +}; + +template +struct conditional_conj { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(const Scalar& a) { return a; } + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packet(const Packet& a) { + return a; + } +}; + +template +struct scalar_inner_product_op { + using result_type = typename ScalarBinaryOpTraits::ReturnType; + using conj_helper = conditional_conj; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type coeff(const LhsScalar& a, const RhsScalar& b) const { + return (conj_helper::coeff(a) * b); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type coeff(const result_type& accum, const LhsScalar& a, + const RhsScalar& b) const { + return (conj_helper::coeff(a) * b) + accum; + } + static constexpr bool PacketAccess = false; +}; + +template +struct scalar_inner_product_op { + using result_type = Scalar; + using conj_helper = conditional_conj; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(const Scalar& a, const Scalar& b) const { + return pmul(conj_helper::coeff(a), b); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(const Scalar& accum, const Scalar& a, const Scalar& b) const { + return pmadd(conj_helper::coeff(a), b, accum); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packet(const Packet& a, const Packet& b) const { + return pmul(conj_helper::packet(a), b); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packet(const Packet& accum, const Packet& a, const Packet& b) const { + return pmadd(conj_helper::packet(a), b, accum); + } + static constexpr bool PacketAccess = packet_traits::HasMul && packet_traits::HasAdd; +}; + +template +struct default_inner_product_impl { + using LhsScalar = typename traits::Scalar; + using RhsScalar = typename traits::Scalar; + using Op = scalar_inner_product_op; + using Evaluator = inner_product_evaluator; + using result_type = typename Evaluator::Scalar; + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type run(const MatrixBase& a, const MatrixBase& b) { + Evaluator eval(a.derived(), b.derived(), Op()); + return inner_product_impl::run(eval); + } +}; + +template +struct dot_impl : default_inner_product_impl {}; + +} // namespace internal +} // namespace Eigen + +#endif // EIGEN_INNER_PRODUCT_EVAL_H diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/Inverse.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/Inverse.h index cfb3b20e5a2..013ad0a9426 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/Inverse.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/Inverse.h @@ -92,7 +92,7 @@ struct unary_evaluator > : public evaluator(this, m_result); internal::call_assignment_no_alias(m_result, inv_xpr); } diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/MapBase.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/MapBase.h index da95b5c4007..1e83fdf70fe 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/MapBase.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/MapBase.h @@ -94,7 +94,7 @@ class MapBase : public internal::dense_xpr_base : public MapBase::value, Scalar, const Scalar> ScalarWithConstIfNotLvalue; - EIGEN_DEVICE_FUNC inline const Scalar* data() const { return this->m_data; } - EIGEN_DEVICE_FUNC inline ScalarWithConstIfNotLvalue* data() { + EIGEN_DEVICE_FUNC constexpr const Scalar* data() const { return this->m_data; } + EIGEN_DEVICE_FUNC constexpr ScalarWithConstIfNotLvalue* data() { return this->m_data; } // no const-cast here so non-const-correct code will give a compile error diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/MathFunctions.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/MathFunctions.h index feb9099b4cd..57fb3bde304 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/MathFunctions.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/MathFunctions.h @@ -1477,6 +1477,63 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE std::complex exp(const std::comple } #endif +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T exp2(const T& x) { + EIGEN_USING_STD(exp2); + return exp2(x); +} + +// MSVC screws up some edge-cases for std::exp2(complex). +#ifdef EIGEN_COMP_MSVC +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE std::complex exp2(const std::complex& x) { + EIGEN_USING_STD(exp); + // If z is (x,±∞) (for any finite x), the result is (NaN,NaN) and FE_INVALID is raised. + // If z is (x,NaN) (for any finite x), the result is (NaN,NaN) and FE_INVALID may be raised. + if ((isfinite)(real_ref(x)) && !(isfinite)(imag_ref(x))) { + return std::complex(NumTraits::quiet_NaN(), NumTraits::quiet_NaN()); + } + // If z is (+∞,±∞), the result is (±∞,NaN) and FE_INVALID is raised (the sign of the real part is unspecified) + // If z is (+∞,NaN), the result is (±∞,NaN) (the sign of the real part is unspecified) + if ((real_ref(x) == NumTraits::infinity() && !(isfinite)(imag_ref(x)))) { + return std::complex(NumTraits::infinity(), NumTraits::quiet_NaN()); + } + return exp2(x); +} +#endif + +#if defined(SYCL_DEVICE_ONLY) +SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(exp2, exp2) +#endif + +#if defined(EIGEN_GPUCC) +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float exp2(const float& x) { + return ::exp2f(x); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double exp2(const double& x) { + return ::exp2(x); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE std::complex exp2(const std::complex& x) { + float com = ::exp2f(x.real()); + float res_real = com * ::cosf(static_cast(EIGEN_LN2) * x.imag()); + float res_imag = com * ::sinf(static_cast(EIGEN_LN2) * x.imag()); + return std::complex(res_real, res_imag); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE std::complex exp2(const std::complex& x) { + double com = ::exp2(x.real()); + double res_real = com * ::cos(static_cast(EIGEN_LN2) * x.imag()); + double res_imag = com * ::sin(static_cast(EIGEN_LN2) * x.imag()); + return std::complex(res_real, res_imag); +} +#endif + template EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(expm1, Scalar) expm1(const Scalar& x) { return EIGEN_MATHFUNC_IMPL(expm1, Scalar)::run(x); diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/Matrix.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/Matrix.h index 0d8691e7ac4..8b7f70c162b 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/Matrix.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/Matrix.h @@ -250,17 +250,12 @@ class Matrix : public PlainObjectBase::value) - : Base(std::move(other)) {} +#if defined(EIGEN_INITIALIZE_COEFFS) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Matrix() { EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED } +#else + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Matrix() = default; +#endif + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Matrix(Matrix&&) = default; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Matrix& operator=(Matrix&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_assignable::value) { Base::operator=(std::move(other)); @@ -379,7 +374,7 @@ class Matrix : public PlainObjectBase&) diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/NumTraits.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/NumTraits.h index a6e2de47746..67a6c08ae44 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/NumTraits.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/NumTraits.h @@ -250,6 +250,7 @@ struct NumTraits > : GenericNumTraits > typedef typename NumTraits::Literal Literal; enum { IsComplex = 1, + IsSigned = NumTraits::IsSigned, RequireInitialization = NumTraits::RequireInitialization, ReadCost = 2 * NumTraits::ReadCost, AddCost = 2 * NumTraits::AddCost, diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/PlainObjectBase.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/PlainObjectBase.h index 5f846a0f104..8720c4473e3 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/PlainObjectBase.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/PlainObjectBase.h @@ -270,10 +270,10 @@ class PlainObjectBase : public internal::dense_xpr_base::type } /** \returns a const pointer to the data array of this matrix */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar* data() const { return m_storage.data(); } + EIGEN_DEVICE_FUNC constexpr const Scalar* data() const { return m_storage.data(); } /** \returns a pointer to the data array of this matrix */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar* data() { return m_storage.data(); } + EIGEN_DEVICE_FUNC constexpr Scalar* data() { return m_storage.data(); } /** Resizes \c *this to a \a rows x \a cols matrix. * @@ -472,34 +472,17 @@ class PlainObjectBase : public internal::dense_xpr_base::type // Prevent user from trying to instantiate PlainObjectBase objects // by making all its constructor protected. See bug 1074. protected: - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr PlainObjectBase() : m_storage() { - // EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED - } - -#ifndef EIGEN_PARSED_BY_DOXYGEN - // FIXME is it still needed ? - /** \internal */ - EIGEN_DEVICE_FUNC constexpr explicit PlainObjectBase(internal::constructor_without_unaligned_array_assert) - : m_storage(internal::constructor_without_unaligned_array_assert()) { - // EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED - } -#endif - - EIGEN_DEVICE_FUNC constexpr PlainObjectBase(PlainObjectBase&& other) EIGEN_NOEXCEPT - : m_storage(std::move(other.m_storage)) {} - + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr PlainObjectBase() = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr PlainObjectBase(PlainObjectBase&&) = default; EIGEN_DEVICE_FUNC constexpr PlainObjectBase& operator=(PlainObjectBase&& other) EIGEN_NOEXCEPT { m_storage = std::move(other.m_storage); return *this; } /** Copy constructor */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr PlainObjectBase(const PlainObjectBase& other) - : Base(), m_storage(other.m_storage) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr PlainObjectBase(const PlainObjectBase&) = default; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PlainObjectBase(Index size, Index rows, Index cols) - : m_storage(size, rows, cols) { - // EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED - } + : m_storage(size, rows, cols) {} /** \brief Construct a row of column vector with fixed size from an arbitrary number of coefficients. * diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/ProductEvaluators.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/ProductEvaluators.h index fa4d0384b6e..77a658a8ef9 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/ProductEvaluators.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/ProductEvaluators.h @@ -235,19 +235,20 @@ EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(sub_assign_op, scalar_difference_op, add_assig template struct generic_product_impl { + using impl = default_inner_product_impl; template static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { - dst.coeffRef(0, 0) = (lhs.transpose().cwiseProduct(rhs)).sum(); + dst.coeffRef(0, 0) = impl::run(lhs, rhs); } template static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { - dst.coeffRef(0, 0) += (lhs.transpose().cwiseProduct(rhs)).sum(); + dst.coeffRef(0, 0) += impl::run(lhs, rhs); } template static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { - dst.coeffRef(0, 0) -= (lhs.transpose().cwiseProduct(rhs)).sum(); + dst.coeffRef(0, 0) -= impl::run(lhs, rhs); } }; diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/RandomImpl.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/RandomImpl.h index e82da96609d..76e43f5dc5f 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/RandomImpl.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/RandomImpl.h @@ -115,6 +115,7 @@ struct random_float_impl { } }; +#if !EIGEN_COMP_NVCC // random implementation for long double // this specialization is not compatible with double-double scalars template { }; template <> struct random_float_impl : random_longdouble_impl<> {}; +#endif template struct random_default_impl { diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/Reshaped.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/Reshaped.h index b881dd6cd4f..4b34e16f6a4 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/Reshaped.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/Reshaped.h @@ -173,7 +173,7 @@ class ReshapedImpl_dense #ifdef EIGEN_PARSED_BY_DOXYGEN /** \sa MapBase::data() */ - EIGEN_DEVICE_FUNC inline const Scalar* data() const; + EIGEN_DEVICE_FUNC constexpr const Scalar* data() const; EIGEN_DEVICE_FUNC inline Index innerStride() const; EIGEN_DEVICE_FUNC inline Index outerStride() const; #endif diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/Reverse.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/Reverse.h index 66116aa4ee3..eb06fff57f7 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/Reverse.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/Reverse.h @@ -127,19 +127,25 @@ EIGEN_DEVICE_FUNC inline typename DenseBase::ReverseReturnType DenseBas * \sa VectorwiseOp::reverseInPlace(), reverse() */ template EIGEN_DEVICE_FUNC inline void DenseBase::reverseInPlace() { + constexpr int HalfRowsAtCompileTime = RowsAtCompileTime == Dynamic ? Dynamic : RowsAtCompileTime / 2; + constexpr int HalfColsAtCompileTime = ColsAtCompileTime == Dynamic ? Dynamic : ColsAtCompileTime / 2; if (cols() > rows()) { Index half = cols() / 2; - leftCols(half).swap(rightCols(half).reverse()); + this->template leftCols(half).swap( + this->template rightCols(half).reverse()); if ((cols() % 2) == 1) { Index half2 = rows() / 2; - col(half).head(half2).swap(col(half).tail(half2).reverse()); + col(half).template head(half2).swap( + col(half).template tail(half2).reverse()); } } else { Index half = rows() / 2; - topRows(half).swap(bottomRows(half).reverse()); + this->template topRows(half).swap( + this->template bottomRows(half).reverse()); if ((rows() % 2) == 1) { Index half2 = cols() / 2; - row(half).head(half2).swap(row(half).tail(half2).reverse()); + row(half).template head(half2).swap( + row(half).template tail(half2).reverse()); } } } diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/StableNorm.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/StableNorm.h index de84d81f711..711ee3fb474 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/StableNorm.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/StableNorm.h @@ -48,34 +48,16 @@ inline void stable_norm_kernel(const ExpressionType& bl, Scalar& ssq, Scalar& sc template void stable_norm_impl_inner_step(const VectorType& vec, RealScalar& ssq, RealScalar& scale, RealScalar& invScale) { - typedef typename VectorType::Scalar Scalar; const Index blockSize = 4096; - typedef typename internal::nested_eval::type VectorTypeCopy; - typedef internal::remove_all_t VectorTypeCopyClean; - const VectorTypeCopy copy(vec); - - enum { - CanAlign = - ((int(VectorTypeCopyClean::Flags) & DirectAccessBit) || - (int(internal::evaluator::Alignment) > 0) // FIXME Alignment)>0 might not be enough - ) && - (blockSize * sizeof(Scalar) * 2 < EIGEN_STACK_ALLOCATION_LIMIT) && - (EIGEN_MAX_STATIC_ALIGN_BYTES > - 0) // if we cannot allocate on the stack, then let's not bother about this optimization - }; - typedef std::conditional_t< - CanAlign, - Ref, internal::evaluator::Alignment>, - typename VectorTypeCopyClean::ConstSegmentReturnType> - SegmentWrapper; Index n = vec.size(); - - Index bi = internal::first_default_aligned(copy); - if (bi > 0) internal::stable_norm_kernel(copy.head(bi), ssq, scale, invScale); - for (; bi < n; bi += blockSize) - internal::stable_norm_kernel(SegmentWrapper(copy.segment(bi, numext::mini(blockSize, n - bi))), ssq, scale, - invScale); + Index blockEnd = numext::round_down(n, blockSize); + for (Index i = 0; i < blockEnd; i += blockSize) { + internal::stable_norm_kernel(vec.template segment(i), ssq, scale, invScale); + } + if (n > blockEnd) { + internal::stable_norm_kernel(vec.tail(n - blockEnd), ssq, scale, invScale); + } } template @@ -85,8 +67,7 @@ typename VectorType::RealScalar stable_norm_impl(const VectorType& vec, using std::sqrt; Index n = vec.size(); - - if (n == 1) return abs(vec.coeff(0)); + if (EIGEN_PREDICT_FALSE(n == 1)) return abs(vec.coeff(0)); typedef typename VectorType::RealScalar RealScalar; RealScalar scale(0); diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/StlIterators.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/StlIterators.h index bb897f8c686..25d45753062 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/StlIterators.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/StlIterators.h @@ -325,7 +325,7 @@ class pointer_based_stl_iterator { public: typedef Index difference_type; typedef typename XprType::Scalar value_type; -#if __cplusplus >= 202002L +#if EIGEN_CPLUSPLUS >= 202002L typedef std::conditional_t iterator_category; diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/Stride.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/Stride.h index a8fdeaf3753..14b025c4673 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/Stride.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/Stride.h @@ -70,6 +70,13 @@ class Stride { /** Copy constructor */ EIGEN_DEVICE_FUNC Stride(const Stride& other) : m_outer(other.outer()), m_inner(other.inner()) {} + /** Copy assignment operator */ + EIGEN_DEVICE_FUNC Stride& operator=(const Stride& other) { + m_outer.setValue(other.outer()); + m_inner.setValue(other.inner()); + return *this; + } + /** \returns the outer stride */ EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR inline Index outer() const { return m_outer.value(); } /** \returns the inner stride */ diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/Transpose.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/Transpose.h index 1cc7a286766..89e3d95fd72 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/Transpose.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/Transpose.h @@ -119,10 +119,12 @@ class TransposeImpl : public internal::TransposeImpl_base::value, Scalar, const Scalar> ScalarWithConstIfNotLvalue; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ScalarWithConstIfNotLvalue* data() { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr ScalarWithConstIfNotLvalue* data() { + return derived().nestedExpression().data(); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const Scalar* data() const { return derived().nestedExpression().data(); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar* data() const { return derived().nestedExpression().data(); } // FIXME: shall we keep the const version of coeffRef? EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeffRef(Index rowId, Index colId) const { diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/AVX/Complex.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/AVX/Complex.h index bae57146b12..d5506dae4d3 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/AVX/Complex.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/AVX/Complex.h @@ -85,10 +85,14 @@ EIGEN_STRONG_INLINE Packet4cf pconj(const Packet4cf& a) { } template <> -EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf& a, const Packet4cf& b) { - __m256 tmp1 = _mm256_mul_ps(_mm256_moveldup_ps(a.v), b.v); - __m256 tmp2 = _mm256_mul_ps(_mm256_movehdup_ps(a.v), _mm256_permute_ps(b.v, _MM_SHUFFLE(2, 3, 0, 1))); - __m256 result = _mm256_addsub_ps(tmp1, tmp2); +EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf& a, const Packet4cf& b) { + __m256 tmp1 = _mm256_mul_ps(_mm256_movehdup_ps(a.v), _mm256_permute_ps(b.v, _MM_SHUFFLE(2, 3, 0, 1))); + __m256 tmp2 = _mm256_moveldup_ps(a.v); +#ifdef EIGEN_VECTORIZE_FMA + __m256 result = _mm256_fmaddsub_ps(tmp2, b.v, tmp1); +#else + __m256 result = _mm256_addsub_ps(_mm256_mul_ps(tmp2, b.v), tmp1); +#endif return Packet4cf(result); } @@ -121,11 +125,11 @@ EIGEN_STRONG_INLINE Packet4cf pandnot(const Packet4cf& a, const Packe template <> EIGEN_STRONG_INLINE Packet4cf pload(const std::complex* from) { - EIGEN_DEBUG_ALIGNED_LOAD return Packet4cf(pload(&numext::real_ref(*from))); + EIGEN_DEBUG_ALIGNED_LOAD return Packet4cf(_mm256_load_ps(&numext::real_ref(*from))); } template <> EIGEN_STRONG_INLINE Packet4cf ploadu(const std::complex* from) { - EIGEN_DEBUG_UNALIGNED_LOAD return Packet4cf(ploadu(&numext::real_ref(*from))); + EIGEN_DEBUG_UNALIGNED_LOAD return Packet4cf(_mm256_loadu_ps(&numext::real_ref(*from))); } template <> @@ -145,11 +149,11 @@ EIGEN_STRONG_INLINE Packet4cf ploaddup(const std::complex* fro template <> EIGEN_STRONG_INLINE void pstore >(std::complex* to, const Packet4cf& from) { - EIGEN_DEBUG_ALIGNED_STORE pstore(&numext::real_ref(*to), from.v); + EIGEN_DEBUG_ALIGNED_STORE _mm256_store_ps(&numext::real_ref(*to), from.v); } template <> EIGEN_STRONG_INLINE void pstoreu >(std::complex* to, const Packet4cf& from) { - EIGEN_DEBUG_UNALIGNED_STORE pstoreu(&numext::real_ref(*to), from.v); + EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_ps(&numext::real_ref(*to), from.v); } template <> @@ -283,13 +287,15 @@ EIGEN_STRONG_INLINE Packet2cd pconj(const Packet2cd& a) { } template <> -EIGEN_STRONG_INLINE Packet2cd pmul(const Packet2cd& a, const Packet2cd& b) { - __m256d tmp1 = _mm256_shuffle_pd(a.v, a.v, 0x0); - __m256d even = _mm256_mul_pd(tmp1, b.v); - __m256d tmp2 = _mm256_shuffle_pd(a.v, a.v, 0xF); - __m256d tmp3 = _mm256_shuffle_pd(b.v, b.v, 0x5); - __m256d odd = _mm256_mul_pd(tmp2, tmp3); - return Packet2cd(_mm256_addsub_pd(even, odd)); +EIGEN_STRONG_INLINE Packet2cd pmul(const Packet2cd& a, const Packet2cd& b) { + __m256d tmp1 = _mm256_mul_pd(_mm256_permute_pd(a.v, 0xF), _mm256_permute_pd(b.v, 0x5)); + __m256d tmp2 = _mm256_movedup_pd(a.v); +#ifdef EIGEN_VECTORIZE_FMA + __m256d result = _mm256_fmaddsub_pd(tmp2, b.v, tmp1); +#else + __m256d result = _mm256_addsub_pd(_mm256_mul_pd(tmp2, b.v), tmp1); +#endif + return Packet2cd(result); } template <> @@ -321,11 +327,11 @@ EIGEN_STRONG_INLINE Packet2cd pandnot(const Packet2cd& a, const Packe template <> EIGEN_STRONG_INLINE Packet2cd pload(const std::complex* from) { - EIGEN_DEBUG_ALIGNED_LOAD return Packet2cd(pload((const double*)from)); + EIGEN_DEBUG_ALIGNED_LOAD return Packet2cd(_mm256_load_pd((const double*)from)); } template <> EIGEN_STRONG_INLINE Packet2cd ploadu(const std::complex* from) { - EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cd(ploadu((const double*)from)); + EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cd(_mm256_loadu_pd((const double*)from)); } template <> @@ -342,11 +348,11 @@ EIGEN_STRONG_INLINE Packet2cd ploaddup(const std::complex* fr template <> EIGEN_STRONG_INLINE void pstore >(std::complex* to, const Packet2cd& from) { - EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); + EIGEN_DEBUG_ALIGNED_STORE _mm256_store_pd((double*)to, from.v); } template <> EIGEN_STRONG_INLINE void pstoreu >(std::complex* to, const Packet2cd& from) { - EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); + EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_pd((double*)to, from.v); } template <> @@ -449,6 +455,74 @@ EIGEN_STRONG_INLINE Packet4cf pexp(const Packet4cf& a) { return pexp_complex(a); } +#ifdef EIGEN_VECTORIZE_FMA +// std::complex +template <> +EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet4cf& a, const Packet4cf& b, const Packet4cf& c) { + __m256 a_odd = _mm256_movehdup_ps(a.v); + __m256 a_even = _mm256_moveldup_ps(a.v); + __m256 b_swap = _mm256_permute_ps(b.v, _MM_SHUFFLE(2, 3, 0, 1)); + __m256 result = _mm256_fmaddsub_ps(a_even, b.v, _mm256_fmaddsub_ps(a_odd, b_swap, c.v)); + return Packet4cf(result); +} +template <> +EIGEN_STRONG_INLINE Packet4cf pmsub(const Packet4cf& a, const Packet4cf& b, const Packet4cf& c) { + __m256 a_odd = _mm256_movehdup_ps(a.v); + __m256 a_even = _mm256_moveldup_ps(a.v); + __m256 b_swap = _mm256_permute_ps(b.v, _MM_SHUFFLE(2, 3, 0, 1)); + __m256 result = _mm256_fmaddsub_ps(a_even, b.v, _mm256_fmsubadd_ps(a_odd, b_swap, c.v)); + return Packet4cf(result); +} +template <> +EIGEN_STRONG_INLINE Packet4cf pnmadd(const Packet4cf& a, const Packet4cf& b, const Packet4cf& c) { + __m256 a_odd = _mm256_movehdup_ps(a.v); + __m256 a_even = _mm256_moveldup_ps(a.v); + __m256 b_swap = _mm256_permute_ps(b.v, _MM_SHUFFLE(2, 3, 0, 1)); + __m256 result = _mm256_fmaddsub_ps(a_odd, b_swap, _mm256_fmaddsub_ps(a_even, b.v, c.v)); + return Packet4cf(result); +} +template <> +EIGEN_STRONG_INLINE Packet4cf pnmsub(const Packet4cf& a, const Packet4cf& b, const Packet4cf& c) { + __m256 a_odd = _mm256_movehdup_ps(a.v); + __m256 a_even = _mm256_moveldup_ps(a.v); + __m256 b_swap = _mm256_permute_ps(b.v, _MM_SHUFFLE(2, 3, 0, 1)); + __m256 result = _mm256_fmaddsub_ps(a_odd, b_swap, _mm256_fmsubadd_ps(a_even, b.v, c.v)); + return Packet4cf(result); +} +// std::complex +template <> +EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet2cd& a, const Packet2cd& b, const Packet2cd& c) { + __m256d a_odd = _mm256_permute_pd(a.v, 0xF); + __m256d a_even = _mm256_movedup_pd(a.v); + __m256d b_swap = _mm256_permute_pd(b.v, 0x5); + __m256d result = _mm256_fmaddsub_pd(a_even, b.v, _mm256_fmaddsub_pd(a_odd, b_swap, c.v)); + return Packet2cd(result); +} +template <> +EIGEN_STRONG_INLINE Packet2cd pmsub(const Packet2cd& a, const Packet2cd& b, const Packet2cd& c) { + __m256d a_odd = _mm256_permute_pd(a.v, 0xF); + __m256d a_even = _mm256_movedup_pd(a.v); + __m256d b_swap = _mm256_permute_pd(b.v, 0x5); + __m256d result = _mm256_fmaddsub_pd(a_even, b.v, _mm256_fmsubadd_pd(a_odd, b_swap, c.v)); + return Packet2cd(result); +} +template <> +EIGEN_STRONG_INLINE Packet2cd pnmadd(const Packet2cd& a, const Packet2cd& b, const Packet2cd& c) { + __m256d a_odd = _mm256_permute_pd(a.v, 0xF); + __m256d a_even = _mm256_movedup_pd(a.v); + __m256d b_swap = _mm256_permute_pd(b.v, 0x5); + __m256d result = _mm256_fmaddsub_pd(a_odd, b_swap, _mm256_fmaddsub_pd(a_even, b.v, c.v)); + return Packet2cd(result); +} +template <> +EIGEN_STRONG_INLINE Packet2cd pnmsub(const Packet2cd& a, const Packet2cd& b, const Packet2cd& c) { + __m256d a_odd = _mm256_permute_pd(a.v, 0xF); + __m256d a_even = _mm256_movedup_pd(a.v); + __m256d b_swap = _mm256_permute_pd(b.v, 0x5); + __m256d result = _mm256_fmaddsub_pd(a_odd, b_swap, _mm256_fmsubadd_pd(a_even, b.v, c.v)); + return Packet2cd(result); +} +#endif } // end namespace internal } // end namespace Eigen diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/AVX/MathFunctions.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/AVX/MathFunctions.h index 321188c4b2e..a5c38e7878c 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/AVX/MathFunctions.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/AVX/MathFunctions.h @@ -23,14 +23,17 @@ namespace internal { EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_FLOAT(Packet8f) -EIGEN_DOUBLE_PACKET_FUNCTION(atan, Packet4d) +EIGEN_DOUBLE_PACKET_FUNCTION(atanh, Packet4d) EIGEN_DOUBLE_PACKET_FUNCTION(log, Packet4d) EIGEN_DOUBLE_PACKET_FUNCTION(log2, Packet4d) EIGEN_DOUBLE_PACKET_FUNCTION(exp, Packet4d) +EIGEN_DOUBLE_PACKET_FUNCTION(tanh, Packet4d) #ifdef EIGEN_VECTORIZE_AVX2 EIGEN_DOUBLE_PACKET_FUNCTION(sin, Packet4d) EIGEN_DOUBLE_PACKET_FUNCTION(cos, Packet4d) #endif +EIGEN_GENERIC_PACKET_FUNCTION(atan, Packet4d) +EIGEN_GENERIC_PACKET_FUNCTION(exp2, Packet4d) // Notice that for newer processors, it is counterproductive to use Newton // iteration for square root. In particular, Skylake and Zen2 processors @@ -93,6 +96,7 @@ EIGEN_STRONG_INLINE Packet8bf pldexp(const Packet8bf& a, const Packet8bf& expone BF16_PACKET_FUNCTION(Packet8f, Packet8bf, pcos) BF16_PACKET_FUNCTION(Packet8f, Packet8bf, pexp) +BF16_PACKET_FUNCTION(Packet8f, Packet8bf, pexp2) BF16_PACKET_FUNCTION(Packet8f, Packet8bf, pexpm1) BF16_PACKET_FUNCTION(Packet8f, Packet8bf, plog) BF16_PACKET_FUNCTION(Packet8f, Packet8bf, plog1p) @@ -104,6 +108,7 @@ BF16_PACKET_FUNCTION(Packet8f, Packet8bf, psqrt) BF16_PACKET_FUNCTION(Packet8f, Packet8bf, ptanh) F16_PACKET_FUNCTION(Packet8f, Packet8h, pcos) F16_PACKET_FUNCTION(Packet8f, Packet8h, pexp) +F16_PACKET_FUNCTION(Packet8f, Packet8h, pexp2) F16_PACKET_FUNCTION(Packet8f, Packet8h, pexpm1) F16_PACKET_FUNCTION(Packet8f, Packet8h, plog) F16_PACKET_FUNCTION(Packet8f, Packet8h, plog1p) diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/AVX/PacketMath.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/AVX/PacketMath.h index ea58f0ed2f6..1980e928be2 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/AVX/PacketMath.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/AVX/PacketMath.h @@ -124,6 +124,7 @@ struct packet_traits : default_packet_traits { HasRsqrt = 1, HasTanh = EIGEN_FAST_MATH, HasErf = EIGEN_FAST_MATH, + HasErfc = EIGEN_FAST_MATH, HasBlend = 1 }; }; @@ -142,11 +143,14 @@ struct packet_traits : default_packet_traits { HasSin = EIGEN_FAST_MATH, HasCos = EIGEN_FAST_MATH, #endif + HasTanh = EIGEN_FAST_MATH, HasLog = 1, + HasErfc = 1, HasExp = 1, HasSqrt = 1, HasRsqrt = 1, HasATan = 1, + HasATanh = 1, HasBlend = 1 }; }; @@ -657,6 +661,16 @@ EIGEN_STRONG_INLINE uint64_t predux(const Packet4ul& a) { __m128i r = _mm_add_epi64(_mm256_castsi256_si128(a), _mm256_extractf128_si256(a, 1)); return numext::bit_cast(_mm_extract_epi64_0(r) + _mm_extract_epi64_1(r)); } + +template <> +EIGEN_STRONG_INLINE bool predux_any(const Packet4l& a) { + return _mm256_movemask_pd(_mm256_castsi256_pd(a)) != 0; +} +template <> +EIGEN_STRONG_INLINE bool predux_any(const Packet4ul& a) { + return _mm256_movemask_pd(_mm256_castsi256_pd(a)) != 0; +} + #define MM256_SHUFFLE_EPI64(A, B, M) _mm256_shuffle_pd(_mm256_castsi256_pd(A), _mm256_castsi256_pd(B), M) EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock& kernel) { __m256d T0 = MM256_SHUFFLE_EPI64(kernel.packet[0], kernel.packet[1], 15); @@ -1999,6 +2013,11 @@ EIGEN_STRONG_INLINE bool predux_any(const Packet8f& x) { return _mm256_movemask_ps(x) != 0; } +template <> +EIGEN_STRONG_INLINE bool predux_any(const Packet4d& x) { + return _mm256_movemask_pd(x) != 0; +} + template <> EIGEN_STRONG_INLINE bool predux_any(const Packet8i& x) { return _mm256_movemask_ps(_mm256_castsi256_ps(x)) != 0; @@ -2008,6 +2027,15 @@ EIGEN_STRONG_INLINE bool predux_any(const Packet8ui& x) { return _mm256_movemask_ps(_mm256_castsi256_ps(x)) != 0; } +template <> +EIGEN_STRONG_INLINE bool predux_any(const Packet8h& x) { + return _mm_movemask_epi8(x) != 0; +} +template <> +EIGEN_STRONG_INLINE bool predux_any(const Packet8bf& x) { + return _mm_movemask_epi8(x) != 0; +} + EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock& kernel) { __m256 T0 = _mm256_unpacklo_ps(kernel.packet[0], kernel.packet[1]); __m256 T1 = _mm256_unpackhi_ps(kernel.packet[0], kernel.packet[1]); diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/Default/BFloat16.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/Default/BFloat16.h index 14f0524a3b0..b8d3b4f69c6 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/Default/BFloat16.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/Default/BFloat16.h @@ -613,6 +613,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 abs(const bfloat16& a) { return numext::bit_cast(x); } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 exp(const bfloat16& a) { return bfloat16(::expf(float(a))); } +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 exp2(const bfloat16& a) { return bfloat16(::exp2f(float(a))); } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 expm1(const bfloat16& a) { return bfloat16(numext::expm1(float(a))); } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 log(const bfloat16& a) { return bfloat16(::logf(float(a))); } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 log1p(const bfloat16& a) { return bfloat16(numext::log1p(float(a))); } @@ -762,6 +763,31 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC uint16_t bit_cast(from); + bool from_sign = from_bits >> 15; + // Whether we are adjusting toward the infinity with the same sign as from. + bool toward_inf = (to > from) == !from_sign; + if (toward_inf) { + ++from_bits; + } else if ((from_bits & 0x7fff) == 0) { + // Adjusting away from inf, but from is zero, so just toggle the sign. + from_bits ^= 0x8000; + } else { + --from_bits; + } + return numext::bit_cast(from_bits); +} + } // namespace numext } // namespace Eigen diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h index d9e6d037d6e..4e441b498d1 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h @@ -42,6 +42,134 @@ struct make_integer { typedef numext::int16_t type; }; +/* polevl (modified for Eigen) + * + * Evaluate polynomial + * + * + * + * SYNOPSIS: + * + * int N; + * Scalar x, y, coef[N+1]; + * + * y = polevl( x, coef); + * + * + * + * DESCRIPTION: + * + * Evaluates polynomial of degree N: + * + * 2 N + * y = C + C x + C x +...+ C x + * 0 1 2 N + * + * Coefficients are stored in reverse order: + * + * coef[0] = C , ..., coef[N] = C . + * N 0 + * + * The function p1evl() assumes that coef[N] = 1.0 and is + * omitted from the array. Its calling arguments are + * otherwise the same as polevl(). + * + * + * The Eigen implementation is templatized. For best speed, store + * coef as a const array (constexpr), e.g. + * + * const double coef[] = {1.0, 2.0, 3.0, ...}; + * + */ +template +struct ppolevl { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(const Packet& x, + const typename unpacket_traits::type coeff[]) { + EIGEN_STATIC_ASSERT((N > 0), YOU_MADE_A_PROGRAMMING_MISTAKE); + return pmadd(ppolevl::run(x, coeff), x, pset1(coeff[N])); + } +}; + +template +struct ppolevl { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(const Packet& x, + const typename unpacket_traits::type coeff[]) { + EIGEN_UNUSED_VARIABLE(x); + return pset1(coeff[0]); + } +}; + +/* chbevl (modified for Eigen) + * + * Evaluate Chebyshev series + * + * + * + * SYNOPSIS: + * + * int N; + * Scalar x, y, coef[N], chebevl(); + * + * y = chbevl( x, coef, N ); + * + * + * + * DESCRIPTION: + * + * Evaluates the series + * + * N-1 + * - ' + * y = > coef[i] T (x/2) + * - i + * i=0 + * + * of Chebyshev polynomials Ti at argument x/2. + * + * Coefficients are stored in reverse order, i.e. the zero + * order term is last in the array. Note N is the number of + * coefficients, not the order. + * + * If coefficients are for the interval a to b, x must + * have been transformed to x -> 2(2x - b - a)/(b-a) before + * entering the routine. This maps x from (a, b) to (-1, 1), + * over which the Chebyshev polynomials are defined. + * + * If the coefficients are for the inverted interval, in + * which (a, b) is mapped to (1/b, 1/a), the transformation + * required is x -> 2(2ab/x - b - a)/(b-a). If b is infinity, + * this becomes x -> 4a/x - 1. + * + * + * + * SPEED: + * + * Taking advantage of the recurrence properties of the + * Chebyshev polynomials, the routine requires one more + * addition per loop than evaluating a nested polynomial of + * the same degree. + * + */ + +template +struct pchebevl { + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Packet run(Packet x, + const typename unpacket_traits::type coef[]) { + typedef typename unpacket_traits::type Scalar; + Packet b0 = pset1(coef[0]); + Packet b1 = pset1(static_cast(0.f)); + Packet b2; + + for (int i = 1; i < N; i++) { + b2 = b1; + b1 = b0; + b0 = psub(pmadd(x, b1, pset1(coef[i])), b2); + } + + return pmul(pset1(static_cast(0.5f)), psub(b0, b2)); + } +}; + template EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Packet pfrexp_generic_get_biased_exponent(const Packet& a) { typedef typename unpacket_traits::type Scalar; @@ -193,21 +321,14 @@ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog_impl_float(const e = psub(e, pand(cst_1, mask)); x = padd(x, tmp); - // Polynomial coefficients for rational (3,3) r(x) = p(x)/q(x) + // Polynomial coefficients for rational r(x) = p(x)/q(x) // approximating log(1+x) on [sqrt(0.5)-1;sqrt(2)-1]. - const Packet cst_p1 = pset1(1.0000000190281136f); - const Packet cst_p2 = pset1(1.0000000190281063f); - const Packet cst_p3 = pset1(0.18256296349849254f); - const Packet cst_q1 = pset1(1.4999999999999927f); - const Packet cst_q2 = pset1(0.59923249590823520f); - const Packet cst_q3 = pset1(0.049616247954120038f); - - Packet p = pmadd(x, cst_p3, cst_p2); - p = pmadd(x, p, cst_p1); + constexpr float alpha[] = {0.18256296349849254f, 1.0000000190281063f, 1.0000000190281136f}; + constexpr float beta[] = {0.049616247954120038f, 0.59923249590823520f, 1.4999999999999927f, 1.0f}; + + Packet p = ppolevl::run(x, alpha); p = pmul(x, p); - Packet q = pmadd(x, cst_q3, cst_q2); - q = pmadd(x, q, cst_q1); - q = pmadd(x, q, cst_1); + Packet q = ppolevl::run(x, beta); x = pdiv(p, q); // Add the logarithm of the exponent back to the result of the interpolation. @@ -348,7 +469,7 @@ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog2_double(const Pa See: http://www.plunk.org/~hatch/rightway.php */ template -Packet generic_plog1p(const Packet& x) { +EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet generic_log1p(const Packet& x) { typedef typename unpacket_traits::type ScalarType; const Packet one = pset1(ScalarType(1)); Packet xp1 = padd(x, one); @@ -363,7 +484,7 @@ Packet generic_plog1p(const Packet& x) { See: http://www.plunk.org/~hatch/rightway.php */ template -Packet generic_expm1(const Packet& x) { +EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet generic_expm1(const Packet& x) { typedef typename unpacket_traits::type ScalarType; const Packet one = pset1(ScalarType(1)); const Packet neg_one = pset1(ScalarType(-1)); @@ -919,13 +1040,6 @@ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pasin_float(const Pac const Packet cst_one = pset1(1.0f); const Packet cst_two = pset1(2.0f); const Packet cst_pi_over_two = pset1(kPiOverTwo); - // For |x| < 0.5 approximate asin(x)/x by an 8th order polynomial with - // even terms only. - const Packet p9 = pset1(5.08838854730129241943359375e-2f); - const Packet p7 = pset1(3.95139865577220916748046875e-2f); - const Packet p5 = pset1(7.550220191478729248046875e-2f); - const Packet p3 = pset1(0.16664917767047882080078125f); - const Packet p1 = pset1(1.00000011920928955078125f); const Packet abs_x = pabs(x_in); const Packet sign_mask = pandnot(x_in, abs_x); @@ -941,13 +1055,11 @@ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pasin_float(const Pac const Packet x = pselect(large_mask, x_large, abs_x); const Packet x2 = pmul(x, x); - // Compute polynomial. - // x * (p1 + x^2*(p3 + x^2*(p5 + x^2*(p7 + x^2*p9)))) - - Packet p = pmadd(p9, x2, p7); - p = pmadd(p, x2, p5); - p = pmadd(p, x2, p3); - p = pmadd(p, x2, p1); + // For |x| < 0.5 approximate asin(x)/x by an 8th order polynomial with + // even terms only. + constexpr float alpha[] = {5.08838854730129241943359375e-2f, 3.95139865577220916748046875e-2f, + 7.550220191478729248046875e-2f, 0.16664917767047882080078125f, 1.00000011920928955078125f}; + Packet p = ppolevl::run(x2, alpha); p = pmul(p, x); const Packet p_large = pnmadd(cst_two, p, cst_pi_over_two); @@ -958,228 +1070,239 @@ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pasin_float(const Pac return por(invalid_mask, p); } +template +struct patan_reduced { + template + static EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet run(const Packet& x); +}; + +template <> +template +EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patan_reduced::run(const Packet& x) { + constexpr double alpha[] = {2.6667153866462208e-05, 3.0917513112462781e-03, 5.2574296781008604e-02, + 3.0409318473444424e-01, 7.5365702534987022e-01, 8.2704055405494614e-01, + 3.3004361289279920e-01}; + + constexpr double beta[] = { + 2.7311202462436667e-04, 1.0899150928962708e-02, 1.1548932646420353e-01, 4.9716458728465573e-01, 1.0, + 9.3705509168587852e-01, 3.3004361289279920e-01}; + + Packet x2 = pmul(x, x); + Packet p = ppolevl::run(x2, alpha); + Packet q = ppolevl::run(x2, beta); + return pmul(x, pdiv(p, q)); +} + // Computes elementwise atan(x) for x in [-1:1] with 2 ulp accuracy. +template <> template -EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patan_reduced_float(const Packet& x) { - const Packet q0 = pset1(-0.3333314359188079833984375f); - const Packet q2 = pset1(0.19993579387664794921875f); - const Packet q4 = pset1(-0.14209578931331634521484375f); - const Packet q6 = pset1(0.1066047251224517822265625f); - const Packet q8 = pset1(-7.5408883392810821533203125e-2f); - const Packet q10 = pset1(4.3082617223262786865234375e-2f); - const Packet q12 = pset1(-1.62907354533672332763671875e-2f); - const Packet q14 = pset1(2.90188402868807315826416015625e-3f); - - // Approximate atan(x) by a polynomial of the form - // P(x) = x + x^3 * Q(x^2), - // where Q(x^2) is a 7th order polynomial in x^2. - // We evaluate even and odd terms in x^2 in parallel - // to take advantage of instruction level parallelism - // and hardware with multiple FMA units. - - // note: if x == -0, this returns +0 - const Packet x2 = pmul(x, x); - const Packet x4 = pmul(x2, x2); - Packet q_odd = pmadd(q14, x4, q10); - Packet q_even = pmadd(q12, x4, q8); - q_odd = pmadd(q_odd, x4, q6); - q_even = pmadd(q_even, x4, q4); - q_odd = pmadd(q_odd, x4, q2); - q_even = pmadd(q_even, x4, q0); - const Packet q = pmadd(q_odd, x2, q_even); - return pmadd(q, pmul(x, x2), x); +EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patan_reduced::run(const Packet& x) { + constexpr float alpha[] = {1.12026982009410858154296875e-01f, 7.296695709228515625e-01f, 8.109951019287109375e-01f}; + + constexpr float beta[] = {1.00917108356952667236328125e-02f, 2.8318560123443603515625e-01f, 1.0f, + 8.109951019287109375e-01f}; + + Packet x2 = pmul(x, x); + Packet p = ppolevl::run(x2, alpha); + Packet q = ppolevl::run(x2, beta); + return pmul(x, pdiv(p, q)); } template -EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patan_float(const Packet& x_in) { +EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet generic_atan(const Packet& x_in) { typedef typename unpacket_traits::type Scalar; - static_assert(std::is_same::value, "Scalar type must be float"); - constexpr float kPiOverTwo = static_cast(EIGEN_PI / 2); + constexpr Scalar kPiOverTwo = static_cast(EIGEN_PI / 2); - const Packet cst_signmask = pset1(-0.0f); - const Packet cst_one = pset1(1.0f); + const Packet cst_signmask = pset1(-Scalar(0)); + const Packet cst_one = pset1(Scalar(1)); const Packet cst_pi_over_two = pset1(kPiOverTwo); // "Large": For |x| > 1, use atan(1/x) = sign(x)*pi/2 - atan(x). // "Small": For |x| <= 1, approximate atan(x) directly by a polynomial - // calculated using Sollya. + // calculated using Rminimax. const Packet abs_x = pabs(x_in); const Packet x_signmask = pand(x_in, cst_signmask); const Packet large_mask = pcmp_lt(cst_one, abs_x); const Packet x = pselect(large_mask, preciprocal(abs_x), abs_x); - const Packet p = patan_reduced_float(x); + const Packet p = patan_reduced::run(x); // Apply transformations according to the range reduction masks. Packet result = pselect(large_mask, psub(cst_pi_over_two, p), p); // Return correct sign return pxor(result, x_signmask); } -// Computes elementwise atan(x) for x in [-tan(pi/8):tan(pi/8)] -// with 2 ulp accuracy. -template -EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patan_reduced_double(const Packet& x) { - const Packet q0 = pset1(-0.33333333333330028569463365784031338989734649658203); - const Packet q2 = pset1(0.199999999990664090177006073645316064357757568359375); - const Packet q4 = pset1(-0.142857141937123677255527809393242932856082916259766); - const Packet q6 = pset1(0.111111065991039953404495577160560060292482376098633); - const Packet q8 = pset1(-9.0907812986129224452902519715280504897236824035645e-2); - const Packet q10 = pset1(7.6900542950704739442180368769186316058039665222168e-2); - const Packet q12 = pset1(-6.6410112986494976294871150912513257935643196105957e-2); - const Packet q14 = pset1(5.6920144995467943094258345126945641823112964630127e-2); - const Packet q16 = pset1(-4.3577020814990513608577771265117917209863662719727e-2); - const Packet q18 = pset1(2.1244050233624342527427586446719942614436149597168e-2); - - // Approximate atan(x) on [0:tan(pi/8)] by a polynomial of the form - // P(x) = x + x^3 * Q(x^2), - // where Q(x^2) is a 9th order polynomial in x^2. - // We evaluate even and odd terms in x^2 in parallel - // to take advantage of instruction level parallelism - // and hardware with multiple FMA units. - const Packet x2 = pmul(x, x); - const Packet x4 = pmul(x2, x2); - Packet q_odd = pmadd(q18, x4, q14); - Packet q_even = pmadd(q16, x4, q12); - q_odd = pmadd(q_odd, x4, q10); - q_even = pmadd(q_even, x4, q8); - q_odd = pmadd(q_odd, x4, q6); - q_even = pmadd(q_even, x4, q4); - q_odd = pmadd(q_odd, x4, q2); - q_even = pmadd(q_even, x4, q0); - const Packet p = pmadd(q_odd, x2, q_even); - return pmadd(p, pmul(x, x2), x); -} +/** \internal \returns the hyperbolic tan of \a a (coeff-wise) + Doesn't do anything fancy, just a 9/8-degree rational interpolant which + is accurate up to a couple of ulps in the (approximate) range [-8, 8], + outside of which tanh(x) = +/-1 in single precision. The input is clamped + to the range [-c, c]. The value c is chosen as the smallest value where + the approximation evaluates to exactly 1. -template -EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patan_double(const Packet& x_in) { - typedef typename unpacket_traits::type Scalar; - static_assert(std::is_same::value, "Scalar type must be double"); + This implementation works on both scalars and packets. +*/ +template +EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS T ptanh_float(const T& a_x) { + // Clamp the inputs to the range [-c, c] and set everything + // outside that range to 1.0. The value c is chosen as the smallest + // floating point argument such that the approximation is exactly 1. + // This saves clamping the value at the end. +#ifdef EIGEN_VECTORIZE_FMA + const T plus_clamp = pset1(8.01773357391357422f); + const T minus_clamp = pset1(-8.01773357391357422f); +#else + const T plus_clamp = pset1(7.90738964080810547f); + const T minus_clamp = pset1(-7.90738964080810547f); +#endif + const T x = pmax(pmin(a_x, plus_clamp), minus_clamp); - constexpr double kPiOverTwo = static_cast(EIGEN_PI / 2); - constexpr double kPiOverFour = static_cast(EIGEN_PI / 4); - constexpr double kTanPiOverEight = 0.4142135623730950488016887; - constexpr double kTan3PiOverEight = 2.4142135623730950488016887; + // The following rational approximation was generated by rminimax + // (https://gitlab.inria.fr/sfilip/rminimax) using the following + // command: + // $ ratapprox --function="tanh(x)" --dom='[-8.67,8.67]' --num="odd" + // --den="even" --type="[9,8]" --numF="[SG]" --denF="[SG]" --log + // --output=tanhf.sollya --dispCoeff="dec" - const Packet cst_signmask = pset1(-0.0); - const Packet cst_one = pset1(1.0); - const Packet cst_pi_over_two = pset1(kPiOverTwo); - const Packet cst_pi_over_four = pset1(kPiOverFour); - const Packet cst_large = pset1(kTan3PiOverEight); - const Packet cst_medium = pset1(kTanPiOverEight); - - // Use the same range reduction strategy (to [0:tan(pi/8)]) as the - // Cephes library: - // "Large": For x >= tan(3*pi/8), use atan(1/x) = pi/2 - atan(x). - // "Medium": For x in [tan(pi/8) : tan(3*pi/8)), - // use atan(x) = pi/4 + atan((x-1)/(x+1)). - // "Small": For x < tan(pi/8), approximate atan(x) directly by a polynomial - // calculated using Sollya. + // The monomial coefficients of the numerator polynomial (odd). + constexpr float alpha[] = {1.394553628e-8f, 2.102733560e-5f, 3.520756727e-3f, 1.340216100e-1f}; - const Packet abs_x = pabs(x_in); - const Packet x_signmask = pand(x_in, cst_signmask); - const Packet large_mask = pcmp_lt(cst_large, abs_x); - const Packet medium_mask = pandnot(pcmp_lt(cst_medium, abs_x), large_mask); + // The monomial coefficients of the denominator polynomial (even). + constexpr float beta[] = {8.015776984e-7f, 3.326951409e-4f, 2.597254514e-2f, 4.673548340e-1f, 1.0f}; - Packet x = abs_x; - x = pselect(large_mask, preciprocal(abs_x), x); - x = pselect(medium_mask, pdiv(psub(abs_x, cst_one), padd(abs_x, cst_one)), x); + // Since the polynomials are odd/even, we need x^2. + const T x2 = pmul(x, x); + const T x3 = pmul(x2, x); - // Compute approximation of p ~= atan(x') where x' is the argument reduced to - // [0:tan(pi/8)]. - Packet p = patan_reduced_double(x); + T p = ppolevl::run(x2, alpha); + T q = ppolevl::run(x2, beta); + // Take advantage of the fact that the constant term in p is 1 to compute + // x*(x^2*p + 1) = x^3 * p + x. + p = pmadd(x3, p, x); - // Apply transformations according to the range reduction masks. - p = pselect(large_mask, psub(cst_pi_over_two, p), p); - p = pselect(medium_mask, padd(cst_pi_over_four, p), p); - // Return the correct sign - return pxor(p, x_signmask); + // Divide the numerator by the denominator. + return pdiv(p, q); } /** \internal \returns the hyperbolic tan of \a a (coeff-wise) - Doesn't do anything fancy, just a 13/6-degree rational interpolant which - is accurate up to a couple of ulps in the (approximate) range [-8, 8], + This uses a 19/18-degree rational interpolant which + is accurate up to a couple of ulps in the (approximate) range [-18.7, 18.7], outside of which tanh(x) = +/-1 in single precision. The input is clamped to the range [-c, c]. The value c is chosen as the smallest value where - the approximation evaluates to exactly 1. In the reange [-0.0004, 0.0004] - the approximation tanh(x) ~= x is used for better accuracy as x tends to zero. + the approximation evaluates to exactly 1. This implementation works on both scalars and packets. */ template -EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS T ptanh_float(const T& a_x) { - // Clamp the inputs to the range [-c, c] +EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS T ptanh_double(const T& a_x) { + // Clamp the inputs to the range [-c, c] and set everything + // outside that range to 1.0. The value c is chosen as the smallest + // floating point argument such that the approximation is exactly 1. + // This saves clamping the value at the end. #ifdef EIGEN_VECTORIZE_FMA - const T plus_clamp = pset1(7.99881172180175781f); - const T minus_clamp = pset1(-7.99881172180175781f); + const T plus_clamp = pset1(17.6610191624600077); + const T minus_clamp = pset1(-17.6610191624600077); #else - const T plus_clamp = pset1(7.90531110763549805f); - const T minus_clamp = pset1(-7.90531110763549805f); + const T plus_clamp = pset1(17.714196154005176); + const T minus_clamp = pset1(-17.714196154005176); #endif - const T tiny = pset1(0.0004f); const T x = pmax(pmin(a_x, plus_clamp), minus_clamp); - const T tiny_mask = pcmp_lt(pabs(a_x), tiny); + + // The following rational approximation was generated by rminimax + // (https://gitlab.inria.fr/sfilip/rminimax) using the following + // command: + // $ ./ratapprox --function="tanh(x)" --dom='[-18.72,18.72]' + // --num="odd" --den="even" --type="[19,18]" --numF="[D]" + // --denF="[D]" --log --output=tanh.sollya --dispCoeff="dec" + // The monomial coefficients of the numerator polynomial (odd). - const T alpha_1 = pset1(4.89352455891786e-03f); - const T alpha_3 = pset1(6.37261928875436e-04f); - const T alpha_5 = pset1(1.48572235717979e-05f); - const T alpha_7 = pset1(5.12229709037114e-08f); - const T alpha_9 = pset1(-8.60467152213735e-11f); - const T alpha_11 = pset1(2.00018790482477e-13f); - const T alpha_13 = pset1(-2.76076847742355e-16f); + constexpr double alpha[] = {2.6158007860482230e-23, 7.6534862268749319e-19, 3.1309488231386680e-15, + 4.2303918148209176e-12, 2.4618379131293676e-09, 6.8644367682497074e-07, + 9.3839087674268880e-05, 5.9809711724441161e-03, 1.5184719640284322e-01}; // The monomial coefficients of the denominator polynomial (even). - const T beta_0 = pset1(4.89352518554385e-03f); - const T beta_2 = pset1(2.26843463243900e-03f); - const T beta_4 = pset1(1.18534705686654e-04f); - const T beta_6 = pset1(1.19825839466702e-06f); + constexpr double beta[] = {6.463747022670968018e-21, 5.782506856739003571e-17, + 1.293019623712687916e-13, 1.123643448069621992e-10, + 4.492975677839633985e-08, 8.785185266237658698e-06, + 8.295161192716231542e-04, 3.437448108450402717e-02, + 4.851805297361760360e-01, 1.0}; // Since the polynomials are odd/even, we need x^2. const T x2 = pmul(x, x); + const T x3 = pmul(x2, x); - // Evaluate the numerator polynomial p. - T p = pmadd(x2, alpha_13, alpha_11); - p = pmadd(x2, p, alpha_9); - p = pmadd(x2, p, alpha_7); - p = pmadd(x2, p, alpha_5); - p = pmadd(x2, p, alpha_3); - p = pmadd(x2, p, alpha_1); - p = pmul(x, p); - - // Evaluate the denominator polynomial q. - T q = pmadd(x2, beta_6, beta_4); - q = pmadd(x2, q, beta_2); - q = pmadd(x2, q, beta_0); + // Interleave the evaluation of the numerator polynomial p and + // denominator polynomial q. + T p = ppolevl::run(x2, alpha); + T q = ppolevl::run(x2, beta); + // Take advantage of the fact that the constant term in p is 1 to compute + // x*(x^2*p + 1) = x^3 * p + x. + p = pmadd(x3, p, x); // Divide the numerator by the denominator. - return pselect(tiny_mask, x, pdiv(p, q)); + return pdiv(p, q); } template EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patanh_float(const Packet& x) { typedef typename unpacket_traits::type Scalar; static_assert(std::is_same::value, "Scalar type must be float"); - const Packet half = pset1(0.5f); - const Packet x_gt_half = pcmp_le(half, pabs(x)); + // For |x| in [0:0.5] we use a polynomial approximation of the form - // P(x) = x + x^3*(c3 + x^2 * (c5 + x^2 * (... x^2 * c11) ... )). - const Packet C3 = pset1(0.3333373963832855224609375f); - const Packet C5 = pset1(0.1997792422771453857421875f); - const Packet C7 = pset1(0.14672131836414337158203125f); - const Packet C9 = pset1(8.2311116158962249755859375e-2f); - const Packet C11 = pset1(0.1819281280040740966796875f); + // P(x) = x + x^3*(alpha[4] + x^2 * (alpha[3] + x^2 * (... x^2 * alpha[0]) ... )). + constexpr float alpha[] = {0.1819281280040740966796875f, 8.2311116158962249755859375e-2f, + 0.14672131836414337158203125f, 0.1997792422771453857421875f, 0.3333373963832855224609375f}; const Packet x2 = pmul(x, x); - Packet p = pmadd(C11, x2, C9); - p = pmadd(x2, p, C7); - p = pmadd(x2, p, C5); - p = pmadd(x2, p, C3); - p = pmadd(pmul(x, x2), p, x); + const Packet x3 = pmul(x, x2); + Packet p = ppolevl::run(x2, alpha); + p = pmadd(x3, p, x); // For |x| in ]0.5:1.0] we use atanh = 0.5*ln((1+x)/(1-x)); + const Packet half = pset1(0.5f); const Packet one = pset1(1.0f); Packet r = pdiv(padd(one, x), psub(one, x)); r = pmul(half, plog(r)); - return pselect(x_gt_half, r, p); + + const Packet x_gt_half = pcmp_le(half, pabs(x)); + const Packet x_eq_one = pcmp_eq(one, pabs(x)); + const Packet x_gt_one = pcmp_lt(one, pabs(x)); + const Packet sign_mask = pset1(-0.0f); + const Packet x_sign = pand(sign_mask, x); + const Packet inf = pset1(std::numeric_limits::infinity()); + return por(x_gt_one, pselect(x_eq_one, por(x_sign, inf), pselect(x_gt_half, r, p))); +} + +template +EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patanh_double(const Packet& x) { + typedef typename unpacket_traits::type Scalar; + static_assert(std::is_same::value, "Scalar type must be double"); + // For x in [-0.5:0.5] we use a rational approximation of the form + // R(x) = x + x^3*P(x^2)/Q(x^2), where P is or order 4 and Q is of order 5. + constexpr double alpha[] = {3.3071338469301391e-03, -4.7129526768798737e-02, 1.8185306179826699e-01, + -2.5949536095445679e-01, 1.2306328729812676e-01}; + + constexpr double beta[] = {-3.8679974580640881e-03, 7.6391885763341910e-02, -4.2828141436397615e-01, + 9.8733495886883648e-01, -1.0000000000000000e+00, 3.6918986189438030e-01}; + + const Packet x2 = pmul(x, x); + const Packet x3 = pmul(x, x2); + Packet p = ppolevl::run(x2, alpha); + Packet q = ppolevl::run(x2, beta); + Packet y_small = pmadd(x3, pdiv(p, q), x); + + // For |x| in ]0.5:1.0] we use atanh = 0.5*ln((1+x)/(1-x)); + const Packet half = pset1(0.5); + const Packet one = pset1(1.0); + Packet y_large = pdiv(padd(one, x), psub(one, x)); + y_large = pmul(half, plog(y_large)); + + const Packet x_gt_half = pcmp_le(half, pabs(x)); + const Packet x_eq_one = pcmp_eq(one, pabs(x)); + const Packet x_gt_one = pcmp_lt(one, pabs(x)); + const Packet sign_mask = pset1(-0.0); + const Packet x_sign = pand(sign_mask, x); + const Packet inf = pset1(std::numeric_limits::infinity()); + return por(x_gt_one, pselect(x_eq_one, por(x_sign, inf), pselect(x_gt_half, y_large, y_small))); } template @@ -1511,7 +1634,7 @@ struct psign_impl -EIGEN_STRONG_INLINE void absolute_split(const Packet& x, Packet& n, Packet& r) { +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void absolute_split(const Packet& x, Packet& n, Packet& r) { n = pround(x); r = psub(x, n); } @@ -1519,7 +1642,7 @@ EIGEN_STRONG_INLINE void absolute_split(const Packet& x, Packet& n, Packet& r) { // This function computes the sum {s, r}, such that x + y = s_hi + s_lo // holds exactly, and s_hi = fl(x+y), if |x| >= |y|. template -EIGEN_STRONG_INLINE void fast_twosum(const Packet& x, const Packet& y, Packet& s_hi, Packet& s_lo) { +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void fast_twosum(const Packet& x, const Packet& y, Packet& s_hi, Packet& s_lo) { s_hi = padd(x, y); const Packet t = psub(s_hi, x); s_lo = psub(y, t); @@ -1531,11 +1654,18 @@ EIGEN_STRONG_INLINE void fast_twosum(const Packet& x, const Packet& y, Packet& s // {p_hi, p_lo} such that x * y = p_hi + p_lo holds exactly and // p_hi = fl(x * y). template -EIGEN_STRONG_INLINE void twoprod(const Packet& x, const Packet& y, Packet& p_hi, Packet& p_lo) { +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void twoprod(const Packet& x, const Packet& y, Packet& p_hi, Packet& p_lo) { p_hi = pmul(x, y); p_lo = pmsub(x, y, p_hi); } +// A version of twoprod that takes x, y, and fl(x*y) as input and returns the p_lo such that +// x * y = xy + p_lo holds exactly. +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet twoprod_low(const Packet& x, const Packet& y, const Packet& xy) { + return pmsub(x, y, xy); +} + #else // This function implements the Veltkamp splitting. Given a floating point @@ -1544,7 +1674,7 @@ EIGEN_STRONG_INLINE void twoprod(const Packet& x, const Packet& y, Packet& p_hi, // This is Algorithm 3 from Jean-Michel Muller, "Elementary Functions", // 3rd edition, Birkh\"auser, 2016. template -EIGEN_STRONG_INLINE void veltkamp_splitting(const Packet& x, Packet& x_hi, Packet& x_lo) { +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void veltkamp_splitting(const Packet& x, Packet& x_hi, Packet& x_lo) { typedef typename unpacket_traits::type Scalar; EIGEN_CONSTEXPR int shift = (NumTraits::digits() + 1) / 2; const Scalar shift_scale = Scalar(uint64_t(1) << shift); // Scalar constructor not necessarily constexpr. @@ -1559,7 +1689,7 @@ EIGEN_STRONG_INLINE void veltkamp_splitting(const Packet& x, Packet& x_hi, Packe // {p_hi, p_lo} such that x * y = p_hi + p_lo holds exactly and // p_hi = fl(x * y). template -EIGEN_STRONG_INLINE void twoprod(const Packet& x, const Packet& y, Packet& p_hi, Packet& p_lo) { +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void twoprod(const Packet& x, const Packet& y, Packet& p_hi, Packet& p_lo) { Packet x_hi, x_lo, y_hi, y_lo; veltkamp_splitting(x, x_hi, x_lo); veltkamp_splitting(y, y_hi, y_lo); @@ -1571,6 +1701,21 @@ EIGEN_STRONG_INLINE void twoprod(const Packet& x, const Packet& y, Packet& p_hi, p_lo = pmadd(x_lo, y_lo, p_lo); } +// A version of twoprod that takes x, y, and fl(x*y) as input and returns the p_lo such that +// x * y = xy + p_lo holds exactly. +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet twoprod_low(const Packet& x, const Packet& y, const Packet& xy) { + Packet x_hi, x_lo, y_hi, y_lo; + veltkamp_splitting(x, x_hi, x_lo); + veltkamp_splitting(y, y_hi, y_lo); + + Packet p_lo = pmadd(x_hi, y_hi, pnegate(xy)); + p_lo = pmadd(x_hi, y_lo, p_lo); + p_lo = pmadd(x_lo, y_hi, p_lo); + p_lo = pmadd(x_lo, y_lo, p_lo); + return p_lo; +} + #endif // EIGEN_VECTORIZE_FMA // This function implements Dekker's algorithm for the addition @@ -1580,8 +1725,8 @@ EIGEN_STRONG_INLINE void twoprod(const Packet& x, const Packet& y, Packet& p_hi, // This is Algorithm 5 from Jean-Michel Muller, "Elementary Functions", // 3rd edition, Birkh\"auser, 2016. template -EIGEN_STRONG_INLINE void twosum(const Packet& x_hi, const Packet& x_lo, const Packet& y_hi, const Packet& y_lo, - Packet& s_hi, Packet& s_lo) { +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void twosum(const Packet& x_hi, const Packet& x_lo, const Packet& y_hi, + const Packet& y_lo, Packet& s_hi, Packet& s_lo) { const Packet x_greater_mask = pcmp_lt(pabs(y_hi), pabs(x_hi)); Packet r_hi_1, r_lo_1; fast_twosum(x_hi, y_hi, r_hi_1, r_lo_1); @@ -1599,8 +1744,8 @@ EIGEN_STRONG_INLINE void twosum(const Packet& x_hi, const Packet& x_lo, const Pa // This is a version of twosum for double word numbers, // which assumes that |x_hi| >= |y_hi|. template -EIGEN_STRONG_INLINE void fast_twosum(const Packet& x_hi, const Packet& x_lo, const Packet& y_hi, const Packet& y_lo, - Packet& s_hi, Packet& s_lo) { +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void fast_twosum(const Packet& x_hi, const Packet& x_lo, const Packet& y_hi, + const Packet& y_lo, Packet& s_hi, Packet& s_lo) { Packet r_hi, r_lo; fast_twosum(x_hi, y_hi, r_hi, r_lo); const Packet s = padd(padd(y_lo, r_lo), x_lo); @@ -1611,8 +1756,8 @@ EIGEN_STRONG_INLINE void fast_twosum(const Packet& x_hi, const Packet& x_lo, con // double word number {y_hi, y_lo} number, with the assumption // that |x| >= |y_hi|. template -EIGEN_STRONG_INLINE void fast_twosum(const Packet& x, const Packet& y_hi, const Packet& y_lo, Packet& s_hi, - Packet& s_lo) { +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void fast_twosum(const Packet& x, const Packet& y_hi, const Packet& y_lo, + Packet& s_hi, Packet& s_lo) { Packet r_hi, r_lo; fast_twosum(x, y_hi, r_hi, r_lo); const Packet s = padd(y_lo, r_lo); @@ -1628,7 +1773,8 @@ EIGEN_STRONG_INLINE void fast_twosum(const Packet& x, const Packet& y_hi, const // This is Algorithm 7 from Jean-Michel Muller, "Elementary Functions", // 3rd edition, Birkh\"auser, 2016. template -EIGEN_STRONG_INLINE void twoprod(const Packet& x_hi, const Packet& x_lo, const Packet& y, Packet& p_hi, Packet& p_lo) { +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void twoprod(const Packet& x_hi, const Packet& x_lo, const Packet& y, + Packet& p_hi, Packet& p_lo) { Packet c_hi, c_lo1; twoprod(x_hi, y, c_hi, c_lo1); const Packet c_lo2 = pmul(x_lo, y); @@ -1645,8 +1791,8 @@ EIGEN_STRONG_INLINE void twoprod(const Packet& x_hi, const Packet& x_lo, const P // of less than 2*2^{-2p}, where p is the number of significand bit // in the floating point type. template -EIGEN_STRONG_INLINE void twoprod(const Packet& x_hi, const Packet& x_lo, const Packet& y_hi, const Packet& y_lo, - Packet& p_hi, Packet& p_lo) { +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void twoprod(const Packet& x_hi, const Packet& x_lo, const Packet& y_hi, + const Packet& y_lo, Packet& p_hi, Packet& p_lo) { Packet p_hi_hi, p_hi_lo; twoprod(x_hi, x_lo, y_hi, p_hi_hi, p_hi_lo); Packet p_lo_hi, p_lo_lo; @@ -1659,7 +1805,8 @@ EIGEN_STRONG_INLINE void twoprod(const Packet& x_hi, const Packet& x_lo, const P // for basic building blocks of double-word arithmetic", Joldes, Muller, & Popescu, // 2017. https://hal.archives-ouvertes.fr/hal-01351529 template -void doubleword_div_fp(const Packet& x_hi, const Packet& x_lo, const Packet& y, Packet& z_hi, Packet& z_lo) { +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void doubleword_div_fp(const Packet& x_hi, const Packet& x_lo, const Packet& y, + Packet& z_hi, Packet& z_lo) { const Packet t_hi = pdiv(x_hi, y); Packet pi_hi, pi_lo; twoprod(t_hi, y, pi_hi, pi_lo); @@ -1674,7 +1821,7 @@ void doubleword_div_fp(const Packet& x_hi, const Packet& x_lo, const Packet& y, template struct accurate_log2 { template - EIGEN_STRONG_INLINE void operator()(const Packet& x, Packet& log2_x_hi, Packet& log2_x_lo) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator()(const Packet& x, Packet& log2_x_hi, Packet& log2_x_lo) { log2_x_hi = plog2(x); log2_x_lo = pzero(x); } @@ -1689,7 +1836,7 @@ struct accurate_log2 { template <> struct accurate_log2 { template - EIGEN_STRONG_INLINE void operator()(const Packet& z, Packet& log2_x_hi, Packet& log2_x_lo) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator()(const Packet& z, Packet& log2_x_hi, Packet& log2_x_lo) { // The function log(1+x)/x is approximated in the interval // [1/sqrt(2)-1;sqrt(2)-1] by a degree 10 polynomial of the form // Q(x) = (C0 + x * (C1 + x * (C2 + x * (C3 + x * P(x))))), @@ -1769,7 +1916,7 @@ struct accurate_log2 { template <> struct accurate_log2 { template - EIGEN_STRONG_INLINE void operator()(const Packet& x, Packet& log2_x_hi, Packet& log2_x_lo) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator()(const Packet& x, Packet& log2_x_hi, Packet& log2_x_lo) { // We use a transformation of variables: // r = c * (x-1) / (x+1), // such that @@ -1850,13 +1997,13 @@ struct accurate_log2 { } }; -// This function computes exp2(x) (i.e. 2**x). +// This function accurately computes exp2(x) for x in [-0.5:0.5], which is +// needed in pow(x,y). template struct fast_accurate_exp2 { template - EIGEN_STRONG_INLINE Packet operator()(const Packet& x) { - // TODO(rmlarsen): Add a pexp2 packetop. - return pexp(pmul(pset1(Scalar(EIGEN_LN2)), x)); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet operator()(const Packet& x) { + return generic_exp2(x); } }; @@ -1867,7 +2014,7 @@ struct fast_accurate_exp2 { template <> struct fast_accurate_exp2 { template - EIGEN_STRONG_INLINE Packet operator()(const Packet& x) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet operator()(const Packet& x) { // This function approximates exp2(x) by a degree 6 polynomial of the form // Q(x) = 1 + x * (C + x * P(x)), where the degree 4 polynomial P(x) is evaluated in // single precision, and the remaining steps are evaluated with extra precision using @@ -1924,7 +2071,7 @@ struct fast_accurate_exp2 { template <> struct fast_accurate_exp2 { template - EIGEN_STRONG_INLINE Packet operator()(const Packet& x) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet operator()(const Packet& x) { // This function approximates exp2(x) by a degree 10 polynomial of the form // Q(x) = 1 + x * (C + x * P(x)), where the degree 8 polynomial P(x) is evaluated in // single precision, and the remaining steps are evaluated with extra precision using @@ -1990,7 +2137,7 @@ struct fast_accurate_exp2 { // TODO(rmlarsen): We should probably add this as a packet up 'ppow', to make it // easier to specialize or turn off for specific types and/or backends.x template -EIGEN_STRONG_INLINE Packet generic_pow_impl(const Packet& x, const Packet& y) { +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet generic_pow_impl(const Packet& x, const Packet& y) { typedef typename unpacket_traits::type Scalar; // Split x into exponent e_x and mantissa m_x. Packet e_x; @@ -2107,134 +2254,6 @@ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet generic_pow(const Pac pselect(negate_pow_abs, pnegate(pow_abs), pow_abs))))))); } -/* polevl (modified for Eigen) - * - * Evaluate polynomial - * - * - * - * SYNOPSIS: - * - * int N; - * Scalar x, y, coef[N+1]; - * - * y = polevl( x, coef); - * - * - * - * DESCRIPTION: - * - * Evaluates polynomial of degree N: - * - * 2 N - * y = C + C x + C x +...+ C x - * 0 1 2 N - * - * Coefficients are stored in reverse order: - * - * coef[0] = C , ..., coef[N] = C . - * N 0 - * - * The function p1evl() assumes that coef[N] = 1.0 and is - * omitted from the array. Its calling arguments are - * otherwise the same as polevl(). - * - * - * The Eigen implementation is templatized. For best speed, store - * coef as a const array (constexpr), e.g. - * - * const double coef[] = {1.0, 2.0, 3.0, ...}; - * - */ -template -struct ppolevl { - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(const Packet& x, - const typename unpacket_traits::type coeff[]) { - EIGEN_STATIC_ASSERT((N > 0), YOU_MADE_A_PROGRAMMING_MISTAKE); - return pmadd(ppolevl::run(x, coeff), x, pset1(coeff[N])); - } -}; - -template -struct ppolevl { - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(const Packet& x, - const typename unpacket_traits::type coeff[]) { - EIGEN_UNUSED_VARIABLE(x); - return pset1(coeff[0]); - } -}; - -/* chbevl (modified for Eigen) - * - * Evaluate Chebyshev series - * - * - * - * SYNOPSIS: - * - * int N; - * Scalar x, y, coef[N], chebevl(); - * - * y = chbevl( x, coef, N ); - * - * - * - * DESCRIPTION: - * - * Evaluates the series - * - * N-1 - * - ' - * y = > coef[i] T (x/2) - * - i - * i=0 - * - * of Chebyshev polynomials Ti at argument x/2. - * - * Coefficients are stored in reverse order, i.e. the zero - * order term is last in the array. Note N is the number of - * coefficients, not the order. - * - * If coefficients are for the interval a to b, x must - * have been transformed to x -> 2(2x - b - a)/(b-a) before - * entering the routine. This maps x from (a, b) to (-1, 1), - * over which the Chebyshev polynomials are defined. - * - * If the coefficients are for the inverted interval, in - * which (a, b) is mapped to (1/b, 1/a), the transformation - * required is x -> 2(2ab/x - b - a)/(b-a). If b is infinity, - * this becomes x -> 4a/x - 1. - * - * - * - * SPEED: - * - * Taking advantage of the recurrence properties of the - * Chebyshev polynomials, the routine requires one more - * addition per loop than evaluating a nested polynomial of - * the same degree. - * - */ - -template -struct pchebevl { - EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Packet run(Packet x, - const typename unpacket_traits::type coef[]) { - typedef typename unpacket_traits::type Scalar; - Packet b0 = pset1(coef[0]); - Packet b1 = pset1(static_cast(0.f)); - Packet b2; - - for (int i = 1; i < N; i++) { - b2 = b1; - b1 = b0; - b0 = psub(pmadd(x, b1, pset1(coef[i])), b2); - } - - return pmul(pset1(static_cast(0.5f)), psub(b0, b2)); - } -}; - namespace unary_pow { template ::IsInteger> @@ -2469,6 +2488,32 @@ struct unary_pow_impl { } }; +// This function computes exp2(x) = exp(ln(2) * x). +// To improve accuracy, the product ln(2)*x is computed using the twoprod +// algorithm, such that ln(2) * x = p_hi + p_lo holds exactly. Then exp2(x) is +// computed as exp2(x) = exp(p_hi) * exp(p_lo) ~= exp(p_hi) * (1 + p_lo). This +// correction step this reduces the maximum absolute error as follows: +// +// type | max error (simple product) | max error (twoprod) | +// ----------------------------------------------------------- +// float | 35 ulps | 4 ulps | +// double | 363 ulps | 110 ulps | +// +template +EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet generic_exp2(const Packet& _x) { + typedef typename unpacket_traits::type Scalar; + constexpr int max_exponent = std::numeric_limits::max_exponent; + constexpr int digits = std::numeric_limits::digits; + constexpr Scalar max_cap = Scalar(max_exponent + 1); + constexpr Scalar min_cap = -Scalar(max_exponent + digits - 1); + Packet x = pmax(pmin(_x, pset1(max_cap)), pset1(min_cap)); + Packet p_hi, p_lo; + twoprod(pset1(Scalar(EIGEN_LN2)), x, p_hi, p_lo); + Packet exp2_hi = pexp(p_hi); + Packet exp2_lo = padd(pset1(Scalar(1)), p_lo); + return pmul(exp2_hi, exp2_lo); +} + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet generic_rint(const Packet& a) { using Scalar = typename unpacket_traits::type; @@ -2505,11 +2550,14 @@ template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet generic_ceil(const Packet& a) { using Scalar = typename unpacket_traits::type; const Packet cst_1 = pset1(Scalar(1)); + const Packet sign_mask = pset1(static_cast(-0.0)); Packet rint_a = generic_rint(a); // if rint(a) < a, then rint(a) == floor(a) Packet mask = pcmp_lt(rint_a, a); Packet offset = pand(cst_1, mask); Packet result = padd(rint_a, offset); + // Signed zero must remain signed (e.g. ceil(-0.02) == -0). + result = por(result, pand(sign_mask, a)); return result; } diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h index 4d113ca6a14..3b362f4f68f 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h @@ -60,11 +60,19 @@ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog2_double(const Pa /** \internal \returns log(1 + x) */ template -Packet generic_plog1p(const Packet& x); +EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet generic_log1p(const Packet& x); /** \internal \returns exp(x)-1 */ template -Packet generic_expm1(const Packet& x); +EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet generic_expm1(const Packet& x); + +/** \internal \returns atan(x) */ +template +EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet generic_atan(const Packet& x); + +/** \internal \returns exp2(x) */ +template +EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet generic_exp2(const Packet& x); /** \internal \returns exp(x) for single precision float */ template @@ -98,22 +106,22 @@ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pasin_float(const Pac template EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pacos_float(const Packet& x); -/** \internal \returns atan(x) for single precision float */ -template -EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patan_float(const Packet& x); - -/** \internal \returns atan(x) for double precision float */ -template -EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patan_double(const Packet& x); - /** \internal \returns tanh(x) for single precision float */ template EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet ptanh_float(const Packet& x); +/** \internal \returns tanh(x) for double precision float */ +template +EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet ptanh_double(const Packet& x); + /** \internal \returns atanh(x) for single precision float */ template EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patanh_float(const Packet& x); +/** \internal \returns atanh(x) for double precision float */ +template +EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patanh_double(const Packet& x); + /** \internal \returns sqrt(x) for complex types */ template EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet psqrt_complex(const Packet& a); @@ -155,36 +163,41 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet generic_round(const Packet& a); return p##METHOD##_##SCALAR(_x); \ } +// Macros for instantiating these generic functions for different backends. +#define EIGEN_GENERIC_PACKET_FUNCTION(METHOD, PACKET) \ + template <> \ + EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_UNUSED PACKET p##METHOD(const PACKET& _x) { \ + return generic_##METHOD(_x); \ + } + #define EIGEN_FLOAT_PACKET_FUNCTION(METHOD, PACKET) EIGEN_PACKET_FUNCTION(METHOD, float, PACKET) #define EIGEN_DOUBLE_PACKET_FUNCTION(METHOD, PACKET) EIGEN_PACKET_FUNCTION(METHOD, double, PACKET) -#define EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_FLOAT(PACKET) \ - EIGEN_FLOAT_PACKET_FUNCTION(sin, PACKET) \ - EIGEN_FLOAT_PACKET_FUNCTION(cos, PACKET) \ - EIGEN_FLOAT_PACKET_FUNCTION(asin, PACKET) \ - EIGEN_FLOAT_PACKET_FUNCTION(acos, PACKET) \ - EIGEN_FLOAT_PACKET_FUNCTION(atan, PACKET) \ - EIGEN_FLOAT_PACKET_FUNCTION(tanh, PACKET) \ - EIGEN_FLOAT_PACKET_FUNCTION(atanh, PACKET) \ - EIGEN_FLOAT_PACKET_FUNCTION(log, PACKET) \ - EIGEN_FLOAT_PACKET_FUNCTION(log2, PACKET) \ - EIGEN_FLOAT_PACKET_FUNCTION(exp, PACKET) \ - template <> \ - EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_UNUSED PACKET pexpm1(const PACKET& _x) { \ - return internal::generic_expm1(_x); \ - } \ - template <> \ - EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_UNUSED PACKET plog1p(const PACKET& _x) { \ - return internal::generic_plog1p(_x); \ - } +#define EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_FLOAT(PACKET) \ + EIGEN_FLOAT_PACKET_FUNCTION(sin, PACKET) \ + EIGEN_FLOAT_PACKET_FUNCTION(cos, PACKET) \ + EIGEN_FLOAT_PACKET_FUNCTION(asin, PACKET) \ + EIGEN_FLOAT_PACKET_FUNCTION(acos, PACKET) \ + EIGEN_FLOAT_PACKET_FUNCTION(tanh, PACKET) \ + EIGEN_FLOAT_PACKET_FUNCTION(atanh, PACKET) \ + EIGEN_FLOAT_PACKET_FUNCTION(log, PACKET) \ + EIGEN_FLOAT_PACKET_FUNCTION(log2, PACKET) \ + EIGEN_FLOAT_PACKET_FUNCTION(exp, PACKET) \ + EIGEN_GENERIC_PACKET_FUNCTION(expm1, PACKET) \ + EIGEN_GENERIC_PACKET_FUNCTION(exp2, PACKET) \ + EIGEN_GENERIC_PACKET_FUNCTION(log1p, PACKET) \ + EIGEN_GENERIC_PACKET_FUNCTION(atan, PACKET) #define EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_DOUBLE(PACKET) \ - EIGEN_DOUBLE_PACKET_FUNCTION(atan, PACKET) \ + EIGEN_DOUBLE_PACKET_FUNCTION(atanh, PACKET) \ EIGEN_DOUBLE_PACKET_FUNCTION(log, PACKET) \ EIGEN_DOUBLE_PACKET_FUNCTION(sin, PACKET) \ EIGEN_DOUBLE_PACKET_FUNCTION(cos, PACKET) \ EIGEN_DOUBLE_PACKET_FUNCTION(log2, PACKET) \ - EIGEN_DOUBLE_PACKET_FUNCTION(exp, PACKET) + EIGEN_DOUBLE_PACKET_FUNCTION(exp, PACKET) \ + EIGEN_DOUBLE_PACKET_FUNCTION(tanh, PACKET) \ + EIGEN_GENERIC_PACKET_FUNCTION(atan, PACKET) \ + EIGEN_GENERIC_PACKET_FUNCTION(exp2, PACKET) } // end namespace internal } // end namespace Eigen diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/Default/Half.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/Default/Half.h index 1f314fae257..a8cb228c186 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/Default/Half.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/Default/Half.h @@ -672,6 +672,14 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half exp(const half& a) { return half(::expf(float(a))); #endif } +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half exp2(const half& a) { +#if (EIGEN_CUDA_SDK_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530) || \ + defined(EIGEN_HIP_DEVICE_COMPILE) + return half(hexp2(a)); +#else + return half(::exp2f(float(a))); +#endif +} EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half expm1(const half& a) { return half(numext::expm1(float(a))); } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log(const half& a) { #if (defined(EIGEN_HAS_CUDA_FP16) && EIGEN_CUDA_SDK_VER >= 80000 && defined(EIGEN_CUDA_ARCH) && \ diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/NEON/Complex.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/NEON/Complex.h index 5257c03c86d..4190d1bd1ac 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/NEON/Complex.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/NEON/Complex.h @@ -108,6 +108,16 @@ EIGEN_STRONG_INLINE Packet2cf pcast(const Packet2f& a) { return Packet2cf(vreinterpretq_f32_u64(vmovl_u32(vreinterpret_u32_f32(a)))); } +template <> +EIGEN_STRONG_INLINE Packet1cf pzero(const Packet1cf& /*a*/) { + return Packet1cf(vdup_n_f32(0.0f)); +} + +template <> +EIGEN_STRONG_INLINE Packet2cf pzero(const Packet2cf& /*a*/) { + return Packet2cf(vdupq_n_f32(0.0f)); +} + template <> EIGEN_STRONG_INLINE Packet1cf pset1(const std::complex& from) { return Packet1cf(vld1_f32(reinterpret_cast(&from))); @@ -156,6 +166,20 @@ EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a) { return Packet2cf(vreinterpretq_f32_u32(veorq_u32(b, p4ui_CONJ_XOR()))); } +#ifdef __ARM_FEATURE_COMPLEX +template <> +EIGEN_STRONG_INLINE Packet1cf pmadd(const Packet1cf& a, const Packet1cf& b, const Packet1cf& c) { + Packet1cf result; + result.v = vcmla_f32(c.v, a.v, b.v); + result.v = vcmla_rot90_f32(result.v, a.v, b.v); + return result; +} + +template <> +EIGEN_STRONG_INLINE Packet1cf pmul(const Packet1cf& a, const Packet1cf& b) { + return pmadd(a, b, pzero(a)); +} +#else template <> EIGEN_STRONG_INLINE Packet1cf pmul(const Packet1cf& a, const Packet1cf& b) { Packet2f v1, v2; @@ -175,6 +199,22 @@ EIGEN_STRONG_INLINE Packet1cf pmul(const Packet1cf& a, const Packet1c // Add and return the result return Packet1cf(vadd_f32(v1, v2)); } +#endif + +#ifdef __ARM_FEATURE_COMPLEX +template <> +EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& a, const Packet2cf& b, const Packet2cf& c) { + Packet2cf result; + result.v = vcmlaq_f32(c.v, a.v, b.v); + result.v = vcmlaq_rot90_f32(result.v, a.v, b.v); + return result; +} + +template <> +EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) { + return pmadd(a, b, pzero(a)); +} +#else template <> EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) { Packet4f v1, v2; @@ -194,6 +234,7 @@ EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2c // Add and return the result return Packet2cf(vaddq_f32(v1, v2)); } +#endif template <> EIGEN_STRONG_INLINE Packet1cf pcmp_eq(const Packet1cf& a, const Packet1cf& b) { @@ -461,13 +502,10 @@ EIGEN_STRONG_INLINE Packet2cf pexp(const Packet2cf& a) { //---------- double ---------- #if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG -// See bug 1325, clang fails to call vld1q_u64. -#if EIGEN_COMP_CLANG || EIGEN_COMP_CASTXML || EIGEN_COMP_CPE -static uint64x2_t p2ul_CONJ_XOR = {0x0, 0x8000000000000000}; -#else -const uint64_t p2ul_conj_XOR_DATA[] = {0x0, 0x8000000000000000}; -static uint64x2_t p2ul_CONJ_XOR = vld1q_u64(p2ul_conj_XOR_DATA); -#endif +inline uint64x2_t p2ul_CONJ_XOR() { + static const uint64_t p2ul_conj_XOR_DATA[] = {0x0, 0x8000000000000000}; + return vld1q_u64(p2ul_conj_XOR_DATA); +} struct Packet1cd { EIGEN_STRONG_INLINE Packet1cd() {} @@ -523,6 +561,11 @@ EIGEN_STRONG_INLINE Packet1cd ploadu(const std::complex* from EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu(reinterpret_cast(from))); } +template <> +EIGEN_STRONG_INLINE Packet1cd pzero(const Packet1cd& /*a*/) { + return Packet1cd(vdupq_n_f64(0.0)); +} + template <> EIGEN_STRONG_INLINE Packet1cd pset1(const std::complex& from) { /* here we really have to use unaligned loads :( */ @@ -546,9 +589,23 @@ EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { template <> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) { - return Packet1cd(vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(a.v), p2ul_CONJ_XOR))); + return Packet1cd(vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(a.v), p2ul_CONJ_XOR()))); +} + +#ifdef __ARM_FEATURE_COMPLEX +template <> +EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& a, const Packet1cd& b, const Packet1cd& c) { + Packet1cd result; + result.v = vcmlaq_f64(c.v, a.v, b.v); + result.v = vcmlaq_rot90_f64(result.v, a.v, b.v); + return result; } +template <> +EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) { + return pmadd(a, b, pzero(a)); +} +#else template <> EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) { Packet2d v1, v2; @@ -562,12 +619,13 @@ EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1c // Multiply the imag a with b v2 = vmulq_f64(v2, b.v); // Conjugate v2 - v2 = vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(v2), p2ul_CONJ_XOR)); + v2 = vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(v2), p2ul_CONJ_XOR())); // Swap real/imag elements in v2. v2 = preverse(v2); // Add and return the result return Packet1cd(vaddq_f64(v1, v2)); } +#endif template <> EIGEN_STRONG_INLINE Packet1cd pcmp_eq(const Packet1cd& a, const Packet1cd& b) { diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/NEON/MathFunctions.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/NEON/MathFunctions.h index bebe081a1ea..0046e01efb8 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/NEON/MathFunctions.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/NEON/MathFunctions.h @@ -37,6 +37,7 @@ BF16_PACKET_FUNCTION(Packet4f, Packet4bf, psin) BF16_PACKET_FUNCTION(Packet4f, Packet4bf, pcos) BF16_PACKET_FUNCTION(Packet4f, Packet4bf, plog) BF16_PACKET_FUNCTION(Packet4f, Packet4bf, pexp) +BF16_PACKET_FUNCTION(Packet4f, Packet4bf, pexp2) BF16_PACKET_FUNCTION(Packet4f, Packet4bf, ptanh) template <> diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/NEON/PacketMath.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/NEON/PacketMath.h index 794d063168e..2f401fdff15 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/NEON/PacketMath.h @@ -209,6 +209,7 @@ struct packet_traits : default_packet_traits { HasRsqrt = 1, HasTanh = EIGEN_FAST_MATH, HasErf = EIGEN_FAST_MATH, + HasErfc = EIGEN_FAST_MATH, HasBessel = 0, // Issues with accuracy. HasNdtri = 0 }; @@ -654,6 +655,16 @@ struct unpacket_traits { }; }; +template <> +EIGEN_STRONG_INLINE Packet2f pzero(const Packet2f& /*a*/) { + return vdup_n_f32(0.0f); +} + +template <> +EIGEN_STRONG_INLINE Packet4f pzero(const Packet4f& /*a*/) { + return vdupq_n_f32(0.0f); +} + template <> EIGEN_STRONG_INLINE Packet2f pset1(const float& from) { return vdup_n_f32(from); @@ -5123,13 +5134,15 @@ struct packet_traits : default_packet_traits { HasExp = 1, HasLog = 1, HasATan = 1, + HasATanh = 1, #endif HasSin = EIGEN_FAST_MATH, HasCos = EIGEN_FAST_MATH, HasSqrt = 1, HasRsqrt = 1, - HasTanh = 0, - HasErf = 0 + HasTanh = EIGEN_FAST_MATH, + HasErf = 0, + HasErfc = EIGEN_FAST_MATH }; }; @@ -5147,6 +5160,11 @@ struct unpacket_traits { }; }; +template <> +EIGEN_STRONG_INLINE Packet2d pzero(const Packet2d& /*a*/) { + return vdupq_n_f64(0.0); +} + template <> EIGEN_STRONG_INLINE Packet2d pset1(const double& from) { return vdupq_n_f64(from); diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/SSE/Complex.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/SSE/Complex.h index 0e70f031828..c69e3d4791f 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/SSE/Complex.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/SSE/Complex.h @@ -89,19 +89,25 @@ EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a) { } template <> -EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) { +EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) { #ifdef EIGEN_VECTORIZE_SSE3 - return Packet2cf(_mm_addsub_ps(_mm_mul_ps(_mm_moveldup_ps(a.v), b.v), - _mm_mul_ps(_mm_movehdup_ps(a.v), vec4f_swizzle1(b.v, 1, 0, 3, 2)))); - // return Packet2cf(_mm_addsub_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v), - // _mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3), - // vec4f_swizzle1(b.v, 1, 0, 3, 2)))); + __m128 tmp1 = _mm_mul_ps(_mm_movehdup_ps(a.v), vec4f_swizzle1(b.v, 1, 0, 3, 2)); + __m128 tmp2 = _mm_moveldup_ps(a.v); #else - const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000, 0x00000000, 0x80000000, 0x00000000)); - return Packet2cf( - _mm_add_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v), - _mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3), vec4f_swizzle1(b.v, 1, 0, 3, 2)), mask))); + __m128 tmp1 = _mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3), vec4f_swizzle1(b.v, 1, 0, 3, 2)); + __m128 tmp2 = vec4f_swizzle1(a.v, 0, 0, 2, 2); #endif +#ifdef EIGEN_VECTORIZE_FMA + __m128 result = _mm_fmaddsub_ps(tmp2, b.v, tmp1); +#else +#ifdef EIGEN_VECTORIZE_SSE3 + __m128 result = _mm_addsub_ps(_mm_mul_ps(tmp2, b.v), tmp1); +#else + const __m128 mask = _mm_setr_ps(-0.0f, 0.0f, -0.0f, 0.0f); + __m128 result = _mm_add_ps(_mm_mul_ps(tmp2, b.v), _mm_xor_ps(tmp1, mask)); +#endif +#endif + return Packet2cf(result); } template <> @@ -127,11 +133,11 @@ EIGEN_STRONG_INLINE Packet2cf pandnot(const Packet2cf& a, const Packe template <> EIGEN_STRONG_INLINE Packet2cf pload(const std::complex* from) { - EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload(&numext::real_ref(*from))); + EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(_mm_load_ps(&numext::real_ref(*from))); } template <> EIGEN_STRONG_INLINE Packet2cf ploadu(const std::complex* from) { - EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu(&numext::real_ref(*from))); + EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(_mm_loadu_ps(&numext::real_ref(*from))); } template <> @@ -148,11 +154,11 @@ EIGEN_STRONG_INLINE Packet2cf ploaddup(const std::complex* fro template <> EIGEN_STRONG_INLINE void pstore >(std::complex* to, const Packet2cf& from) { - EIGEN_DEBUG_ALIGNED_STORE pstore(&numext::real_ref(*to), Packet4f(from.v)); + EIGEN_DEBUG_ALIGNED_STORE _mm_store_ps(&numext::real_ref(*to), from.v); } template <> EIGEN_STRONG_INLINE void pstoreu >(std::complex* to, const Packet2cf& from) { - EIGEN_DEBUG_UNALIGNED_STORE pstoreu(&numext::real_ref(*to), Packet4f(from.v)); + EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_ps(&numext::real_ref(*to), from.v); } template <> @@ -277,15 +283,24 @@ EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) { } template <> -EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) { +EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) { + __m128d tmp1 = _mm_mul_pd(_mm_unpackhi_pd(a.v, a.v), vec2d_swizzle1(b.v, 1, 0)); +#ifdef EIGEN_VECTORIZE_SSE3 + __m128d tmp2 = _mm_movedup_pd(a.v); +#else + __m128d tmp2 = _mm_unpacklo_pd(a.v, a.v); +#endif +#ifdef EIGEN_VECTORIZE_FMA + __m128d result = _mm_fmaddsub_pd(tmp2, b.v, tmp1); +#else #ifdef EIGEN_VECTORIZE_SSE3 - return Packet1cd(_mm_addsub_pd(_mm_mul_pd(_mm_movedup_pd(a.v), b.v), - _mm_mul_pd(vec2d_swizzle1(a.v, 1, 1), vec2d_swizzle1(b.v, 1, 0)))); + __m128d result = _mm_addsub_pd(_mm_mul_pd(tmp2, b.v), tmp1); #else - const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x0, 0x0, 0x80000000, 0x0)); - return Packet1cd(_mm_add_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v), - _mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 1, 1), vec2d_swizzle1(b.v, 1, 0)), mask))); + const __m128d mask = _mm_setr_pd(-0.0, 0.0); + __m128d result = _mm_add_pd(_mm_mul_pd(tmp2, b.v), _mm_xor_pd(tmp1, mask)); #endif +#endif + return Packet1cd(result); } template <> @@ -312,11 +327,11 @@ EIGEN_STRONG_INLINE Packet1cd pandnot(const Packet1cd& a, const Packe // FIXME force unaligned load, this is a temporary fix template <> EIGEN_STRONG_INLINE Packet1cd pload(const std::complex* from) { - EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload((const double*)from)); + EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(_mm_load_pd((const double*)from)); } template <> EIGEN_STRONG_INLINE Packet1cd ploadu(const std::complex* from) { - EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu((const double*)from)); + EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(_mm_loadu_pd((const double*)from)); } template <> EIGEN_STRONG_INLINE Packet1cd @@ -332,11 +347,11 @@ EIGEN_STRONG_INLINE Packet1cd ploaddup(const std::complex* fr // FIXME force unaligned store, this is a temporary fix template <> EIGEN_STRONG_INLINE void pstore >(std::complex* to, const Packet1cd& from) { - EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, Packet2d(from.v)); + EIGEN_DEBUG_ALIGNED_STORE _mm_store_pd((double*)to, from.v); } template <> EIGEN_STRONG_INLINE void pstoreu >(std::complex* to, const Packet1cd& from) { - EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, Packet2d(from.v)); + EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_pd((double*)to, from.v); } template <> @@ -430,6 +445,74 @@ EIGEN_STRONG_INLINE Packet2cf pexp(const Packet2cf& a) { return pexp_complex(a); } +#ifdef EIGEN_VECTORIZE_FMA +// std::complex +template <> +EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& a, const Packet2cf& b, const Packet2cf& c) { + __m128 a_odd = _mm_movehdup_ps(a.v); + __m128 a_even = _mm_moveldup_ps(a.v); + __m128 b_swap = _mm_permute_ps(b.v, _MM_SHUFFLE(2, 3, 0, 1)); + __m128 result = _mm_fmaddsub_ps(a_even, b.v, _mm_fmaddsub_ps(a_odd, b_swap, c.v)); + return Packet2cf(result); +} +template <> +EIGEN_STRONG_INLINE Packet2cf pmsub(const Packet2cf& a, const Packet2cf& b, const Packet2cf& c) { + __m128 a_odd = _mm_movehdup_ps(a.v); + __m128 a_even = _mm_moveldup_ps(a.v); + __m128 b_swap = _mm_permute_ps(b.v, _MM_SHUFFLE(2, 3, 0, 1)); + __m128 result = _mm_fmaddsub_ps(a_even, b.v, _mm_fmsubadd_ps(a_odd, b_swap, c.v)); + return Packet2cf(result); +} +template <> +EIGEN_STRONG_INLINE Packet2cf pnmadd(const Packet2cf& a, const Packet2cf& b, const Packet2cf& c) { + __m128 a_odd = _mm_movehdup_ps(a.v); + __m128 a_even = _mm_moveldup_ps(a.v); + __m128 b_swap = _mm_permute_ps(b.v, _MM_SHUFFLE(2, 3, 0, 1)); + __m128 result = _mm_fmaddsub_ps(a_odd, b_swap, _mm_fmaddsub_ps(a_even, b.v, c.v)); + return Packet2cf(result); +} +template <> +EIGEN_STRONG_INLINE Packet2cf pnmsub(const Packet2cf& a, const Packet2cf& b, const Packet2cf& c) { + __m128 a_odd = _mm_movehdup_ps(a.v); + __m128 a_even = _mm_moveldup_ps(a.v); + __m128 b_swap = _mm_permute_ps(b.v, _MM_SHUFFLE(2, 3, 0, 1)); + __m128 result = _mm_fmaddsub_ps(a_odd, b_swap, _mm_fmsubadd_ps(a_even, b.v, c.v)); + return Packet2cf(result); +} +// std::complex +template <> +EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& a, const Packet1cd& b, const Packet1cd& c) { + __m128d a_odd = _mm_permute_pd(a.v, 0x3); + __m128d a_even = _mm_movedup_pd(a.v); + __m128d b_swap = _mm_permute_pd(b.v, 0x1); + __m128d result = _mm_fmaddsub_pd(a_even, b.v, _mm_fmaddsub_pd(a_odd, b_swap, c.v)); + return Packet1cd(result); +} +template <> +EIGEN_STRONG_INLINE Packet1cd pmsub(const Packet1cd& a, const Packet1cd& b, const Packet1cd& c) { + __m128d a_odd = _mm_permute_pd(a.v, 0x3); + __m128d a_even = _mm_movedup_pd(a.v); + __m128d b_swap = _mm_permute_pd(b.v, 0x1); + __m128d result = _mm_fmaddsub_pd(a_even, b.v, _mm_fmsubadd_pd(a_odd, b_swap, c.v)); + return Packet1cd(result); +} +template <> +EIGEN_STRONG_INLINE Packet1cd pnmadd(const Packet1cd& a, const Packet1cd& b, const Packet1cd& c) { + __m128d a_odd = _mm_permute_pd(a.v, 0x3); + __m128d a_even = _mm_movedup_pd(a.v); + __m128d b_swap = _mm_permute_pd(b.v, 0x1); + __m128d result = _mm_fmaddsub_pd(a_odd, b_swap, _mm_fmaddsub_pd(a_even, b.v, c.v)); + return Packet1cd(result); +} +template <> +EIGEN_STRONG_INLINE Packet1cd pnmsub(const Packet1cd& a, const Packet1cd& b, const Packet1cd& c) { + __m128d a_odd = _mm_permute_pd(a.v, 0x3); + __m128d a_even = _mm_movedup_pd(a.v); + __m128d b_swap = _mm_permute_pd(b.v, 0x1); + __m128d result = _mm_fmaddsub_pd(a_odd, b_swap, _mm_fmsubadd_pd(a_even, b.v, c.v)); + return Packet1cd(result); +} +#endif } // end namespace internal } // end namespace Eigen diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/SSE/PacketMath.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/SSE/PacketMath.h index e5dce3b49ba..c749763df77 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/arch/SSE/PacketMath.h @@ -197,6 +197,7 @@ struct packet_traits : default_packet_traits { HasRsqrt = 1, HasTanh = EIGEN_FAST_MATH, HasErf = EIGEN_FAST_MATH, + HasErfc = EIGEN_FAST_MATH, HasBlend = 1, HasSign = 0 // The manually vectorized version is slightly slower for SSE. }; @@ -214,11 +215,14 @@ struct packet_traits : default_packet_traits { HasDiv = 1, HasSin = EIGEN_FAST_MATH, HasCos = EIGEN_FAST_MATH, + HasTanh = EIGEN_FAST_MATH, HasLog = 1, + HasErfc = EIGEN_FAST_MATH, HasExp = 1, HasSqrt = 1, HasRsqrt = 1, HasATan = 1, + HasATanh = 1, HasBlend = 1 }; }; @@ -1228,13 +1232,13 @@ EIGEN_STRONG_INLINE Packet4ui plogical_shift_left(const Packet4ui& a) { template <> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a) { - const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF)); - return _mm_and_ps(a, mask); + const __m128i mask = _mm_setr_epi32(0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF); + return _mm_castsi128_ps(_mm_and_si128(mask, _mm_castps_si128(a))); } template <> EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a) { - const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0xFFFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF, 0x7FFFFFFF)); - return _mm_and_pd(a, mask); + const __m128i mask = _mm_setr_epi32(0xFFFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF, 0x7FFFFFFF); + return _mm_castsi128_pd(_mm_and_si128(mask, _mm_castpd_si128(a))); } template <> EIGEN_STRONG_INLINE Packet2l pabs(const Packet2l& a) { @@ -2277,8 +2281,6 @@ EIGEN_STRONG_INLINE __m128i half2floatsse(__m128i h) { } EIGEN_STRONG_INLINE __m128i float2half(__m128 f) { - __m128i o = _mm_setzero_si128(); - // unsigned int sign_mask = 0x80000000u; __m128i sign = _mm_set1_epi32(0x80000000u); // unsigned int sign = f.u & sign_mask; @@ -2305,7 +2307,7 @@ EIGEN_STRONG_INLINE __m128i float2half(__m128 f) { // f.f += denorm_magic.f; f = _mm_add_ps(f, _mm_castsi128_ps(denorm_magic)); // f.u - denorm_magic.u - o = _mm_sub_epi32(_mm_castps_si128(f), denorm_magic); + __m128i o = _mm_sub_epi32(_mm_castps_si128(f), denorm_magic); o = _mm_and_si128(o, subnorm_mask); // Correct result for inf/nan/zero/subnormal, 0 otherwise o = _mm_or_si128(o, naninf_value); diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/functors/UnaryFunctors.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/functors/UnaryFunctors.h index 5059a540881..defd3c2a188 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/functors/UnaryFunctors.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/functors/UnaryFunctors.h @@ -103,6 +103,26 @@ struct functor_traits> { enum { Cost = NumTraits::MulCost, PacketAccess = packet_traits::HasAbs2 }; }; +template ::IsComplex> +struct squared_norm_functor { + typedef Scalar result_type; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& a) const { + return Scalar(numext::real(a) * numext::real(a), numext::imag(a) * numext::imag(a)); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const { + return Packet(pmul(a.v, a.v)); + } +}; +template +struct squared_norm_functor : scalar_abs2_op {}; + +template +struct functor_traits> { + using Real = typename NumTraits::Real; + enum { Cost = NumTraits::MulCost, PacketAccess = packet_traits::HasMul }; +}; + /** \internal * \brief Template functor to compute the conjugate of a complex value * @@ -356,6 +376,22 @@ struct functor_traits> { }; }; +template +struct scalar_exp2_op { + EIGEN_DEVICE_FUNC inline const Scalar operator()(const Scalar& a) const { return internal::pexp2(a); } + template + EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { + return internal::pexp2(a); + } +}; +template +struct functor_traits> { + enum { + PacketAccess = packet_traits::HasExp, + Cost = functor_traits>::Cost // TODO measure cost of exp2 + }; +}; + /** \internal * * \brief Template functor to compute the exponential of a scalar - 1. diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/products/GeneralMatrixMatrix.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/products/GeneralMatrixMatrix.h index e9d0cae8d42..ebfac0146be 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/products/GeneralMatrixMatrix.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/products/GeneralMatrixMatrix.h @@ -97,6 +97,7 @@ struct general_matrix_matrix_producttask_info[tid].users to the number of threads to mark that all other threads are going to // use it. while (info->task_info[tid].users != 0) { + std::this_thread::yield(); } info->task_info[tid].users = threads; @@ -115,6 +116,7 @@ struct general_matrix_matrix_product 0) { while (info->task_info[i].sync != k) { + std::this_thread::yield(); } } diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h index ac94b3f6fc6..f47615f7275 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h @@ -68,6 +68,10 @@ struct general_matrix_matrix_triangular_product& blocking) { + if (size == 0) { + return; + } + typedef gebp_traits Traits; typedef const_blas_data_mapper LhsMapper; @@ -157,7 +161,7 @@ struct tribb_kernel { gebp_kernel gebp_kernel1; gebp_kernel gebp_kernel2; - Matrix buffer((internal::constructor_without_unaligned_array_assert())); + Matrix buffer; // let's process the block per panel of actual_mc x BlockSize, // again, each is split into three parts, etc. diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/products/Parallelizer.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/products/Parallelizer.h index 667fea25b63..018efa64b2f 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/products/Parallelizer.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/products/Parallelizer.h @@ -71,7 +71,7 @@ inline void setNbThreads(int v) { internal::manage_multi_threading(SetAction, &v // TODO(rmlarsen): Make the device API available instead of // storing a local static pointer variable to avoid this issue. inline ThreadPool* setGemmThreadPool(ThreadPool* new_pool) { - static ThreadPool* pool; + static ThreadPool* pool = nullptr; if (new_pool != nullptr) { // This will wait for work in all threads in *pool to finish, // then destroy the old ThreadPool, and then replace it with new_pool. @@ -232,7 +232,6 @@ EIGEN_STRONG_INLINE void parallelize_gemm(const Functor& func, Index rows, Index } #elif defined(EIGEN_GEMM_THREADPOOL) - ei_declare_aligned_stack_constructed_variable(GemmParallelTaskInfo, meta_info, threads, 0); Barrier barrier(threads); auto task = [=, &func, &barrier, &task_info](int i) { Index actual_threads = threads; diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/products/SelfadjointMatrixVector.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/products/SelfadjointMatrixVector.h index 9333d1604c8..10f60266df1 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/products/SelfadjointMatrixVector.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/products/SelfadjointMatrixVector.h @@ -197,6 +197,7 @@ struct selfadjoint_product_impl { if (!EvalToDest) { #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN + constexpr int Size = Dest::SizeAtCompileTime; Index size = dest.size(); EIGEN_DENSE_STORAGE_CTOR_PLUGIN #endif @@ -205,6 +206,7 @@ struct selfadjoint_product_impl { if (!UseRhs) { #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN + constexpr int Size = ActualRhsTypeCleaned::SizeAtCompileTime; Index size = rhs.size(); EIGEN_DENSE_STORAGE_CTOR_PLUGIN #endif diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/products/TriangularMatrixMatrix.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/products/TriangularMatrixMatrix.h index c541909a320..a0d05ef8fdb 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/products/TriangularMatrixMatrix.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/products/TriangularMatrixMatrix.h @@ -113,13 +113,7 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix< ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA()); ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB()); - // To work around an "error: member reference base type 'Matrix<...> - // (Eigen::internal::constructor_without_unaligned_array_assert (*)())' is - // not a structure or union" compilation error in nvcc (tested V8.0.61), - // create a dummy internal::constructor_without_unaligned_array_assert - // object to pass to the Matrix constructor. - internal::constructor_without_unaligned_array_assert a; - Matrix triangularBuffer(a); + Matrix triangularBuffer; triangularBuffer.setZero(); if ((Mode & ZeroDiag) == ZeroDiag) triangularBuffer.diagonal().setZero(); @@ -245,8 +239,7 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix< ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA()); ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB()); - internal::constructor_without_unaligned_array_assert a; - Matrix triangularBuffer(a); + Matrix triangularBuffer; triangularBuffer.setZero(); if ((Mode & ZeroDiag) == ZeroDiag) triangularBuffer.diagonal().setZero(); diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/products/TriangularMatrixVector.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/products/TriangularMatrixVector.h index 05a5827a96c..bef4cbaf88a 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/products/TriangularMatrixVector.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/products/TriangularMatrixVector.h @@ -230,6 +230,7 @@ struct trmv_selector { if (!evalToDest) { #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN + constexpr int Size = Dest::SizeAtCompileTime; Index size = dest.size(); EIGEN_DENSE_STORAGE_CTOR_PLUGIN #endif @@ -310,6 +311,7 @@ struct trmv_selector { #endif } #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN + constexpr int Size = ActualRhsTypeCleaned::SizeAtCompileTime; Index size = actualRhs.size(); EIGEN_DENSE_STORAGE_CTOR_PLUGIN #endif diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/util/BlasUtil.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/util/BlasUtil.h index c2994b20490..19d9917d788 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/util/BlasUtil.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/util/BlasUtil.h @@ -241,7 +241,7 @@ class blas_data_mapper { EIGEN_DEVICE_FUNC const Index stride() const { return m_stride; } EIGEN_DEVICE_FUNC const Index incr() const { return 1; } - EIGEN_DEVICE_FUNC const Scalar* data() const { return m_data; } + EIGEN_DEVICE_FUNC constexpr const Scalar* data() const { return m_data; } EIGEN_DEVICE_FUNC Index firstAligned(Index size) const { if (std::uintptr_t(m_data) % sizeof(Scalar)) { @@ -430,7 +430,7 @@ class blas_data_mapper { EIGEN_DEVICE_FUNC const Index stride() const { return m_stride; } EIGEN_DEVICE_FUNC const Index incr() const { return m_incr.value(); } - EIGEN_DEVICE_FUNC Scalar* data() const { return m_data; } + EIGEN_DEVICE_FUNC constexpr Scalar* data() const { return m_data; } protected: Scalar* EIGEN_RESTRICT m_data; diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/util/DisableStupidWarnings.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/util/DisableStupidWarnings.h index 1182198231a..7ecd7bf8cc9 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/util/DisableStupidWarnings.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/util/DisableStupidWarnings.h @@ -89,7 +89,7 @@ #endif #endif -#if defined __NVCC__ +#if defined __NVCC__ && defined __CUDACC__ // MSVC 14.16 (required by CUDA 9.*) does not support the _Pragma keyword, so // we instead use Microsoft's __pragma extension. #if defined _MSC_VER diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/util/IndexedViewHelper.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/util/IndexedViewHelper.h index 59486ea50ac..abf4b19502c 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/util/IndexedViewHelper.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/util/IndexedViewHelper.h @@ -69,7 +69,7 @@ static constexpr auto lastp1 = last + fix<1>; #else // Using a FixedExpr<1> expression is important here to make sure the compiler // can fully optimize the computation starting indices with zero overhead. -static constexpr lastp1_t lastp1(last + fix<1>()); +static constexpr lastp1_t lastp1 = lastp1_t{}; #endif /** \var end diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/util/Macros.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/util/Macros.h index 91c821bd5ce..fb560516419 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/util/Macros.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/util/Macros.h @@ -196,6 +196,13 @@ #define EIGEN_COMP_PGI 0 #endif +/// \internal EIGEN_COMP_NVHPC set to NVHPC version if the compiler is nvc++ +#if defined(__NVCOMPILER) +#define EIGEN_COMP_NVHPC (__NVCOMPILER_MAJOR__ * 100 + __NVCOMPILER_MINOR__) +#else +#define EIGEN_COMP_NVHPC 0 +#endif + /// \internal EIGEN_COMP_ARM set to 1 if the compiler is ARM Compiler #if defined(__CC_ARM) || defined(__ARMCC_VERSION) #define EIGEN_COMP_ARM 1 @@ -1076,14 +1083,7 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void ignore_unused_variable(cons #define EIGEN_USING_STD(FUNC) using std::FUNC; #endif -#if EIGEN_COMP_MSVC_STRICT && EIGEN_COMP_NVCC -// Wwhen compiling with NVCC, using the base operator is necessary, -// otherwise we get duplicate definition errors -// For later MSVC versions, we require explicit operator= definition, otherwise we get -// use of implicitly deleted operator errors. -// (cf Bugs 920, 1000, 1324, 2291) -#define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) using Base::operator=; -#elif EIGEN_COMP_CLANG // workaround clang bug (see http://forum.kde.org/viewtopic.php?f=74&t=102653) +#if EIGEN_COMP_CLANG // workaround clang bug (see http://forum.kde.org/viewtopic.php?f=74&t=102653) #define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \ using Base::operator=; \ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const Derived& other) { \ diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/util/MaxSizeVector.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/util/MaxSizeVector.h index 2f1e3d35ecc..54b556dd01a 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/util/MaxSizeVector.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/util/MaxSizeVector.h @@ -116,17 +116,17 @@ class MaxSizeVector { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool empty() const { return size_ == 0; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T* data() { return data_; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr T* data() { return data_; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T* data() const { return data_; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const T* data() const { return data_; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T* begin() { return data_; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr T* begin() { return data_; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T* end() { return data_ + size_; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr T* end() { return data_ + size_; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T* begin() const { return data_; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const T* begin() const { return data_; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T* end() const { return data_ + size_; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const T* end() const { return data_ + size_; } private: size_t reserve_; diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/util/Memory.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/util/Memory.h index 7edd0a190ae..a278c9129be 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/util/Memory.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Core/util/Memory.h @@ -147,7 +147,7 @@ EIGEN_DEVICE_FUNC inline void* handmade_aligned_malloc(std::size_t size, check_that_malloc_is_allowed(); EIGEN_USING_STD(malloc) void* original = malloc(size + alignment); - if (original == 0) return 0; + if (original == nullptr) return nullptr; uint8_t offset = static_cast(alignment - (reinterpret_cast(original) & (alignment - 1))); void* aligned = static_cast(static_cast(original) + offset); *(static_cast(aligned) - 1) = offset; @@ -391,7 +391,8 @@ EIGEN_DEVICE_FUNC inline T* move_construct_elements_of_array(T* ptr, T* src, std template EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void check_size_for_overflow(std::size_t size) { - if (size > std::size_t(-1) / sizeof(T)) throw_std_bad_alloc(); + constexpr std::size_t max_elements = (std::numeric_limits::max)() / sizeof(T); + if (size > max_elements) throw_std_bad_alloc(); } /** \internal Allocates \a size objects of type T. The returned pointer is guaranteed to have 16 bytes alignment. @@ -473,7 +474,7 @@ EIGEN_DEVICE_FUNC inline T* conditional_aligned_realloc_new(T* pts, std::size_t template EIGEN_DEVICE_FUNC inline T* conditional_aligned_new_auto(std::size_t size) { - if (size == 0) return 0; // short-cut. Also fixes Bug 884 + if (size == 0) return nullptr; // short-cut. Also fixes Bug 884 check_size_for_overflow(size); T* result = static_cast(conditional_aligned_malloc(sizeof(T) * size)); if (NumTraits::RequireInitialization) { @@ -765,7 +766,7 @@ void swap(scoped_array& a, scoped_array& b) { // We always manually re-align the result of EIGEN_ALLOCA. // If alloca is already aligned, the compiler should be smart enough to optimize away the re-alignment. -#if (EIGEN_COMP_GNUC || EIGEN_COMP_CLANG) +#if ((EIGEN_COMP_GNUC || EIGEN_COMP_CLANG) && !EIGEN_COMP_NVHPC) #define EIGEN_ALIGNED_ALLOCA(SIZE) __builtin_alloca_with_align(SIZE, CHAR_BIT* EIGEN_DEFAULT_ALIGN_BYTES) #else EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void* eigen_aligned_alloca_helper(void* ptr) { diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Eigenvalues/EigenSolver.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Eigenvalues/EigenSolver.h index 40830fbdc5f..f73d58f8709 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Eigenvalues/EigenSolver.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Eigenvalues/EigenSolver.h @@ -319,17 +319,24 @@ template MatrixType EigenSolver::pseudoEigenvalueMatrix() const { eigen_assert(m_isInitialized && "EigenSolver is not initialized."); const RealScalar precision = RealScalar(2) * NumTraits::epsilon(); - Index n = m_eivalues.rows(); + const Index n = m_eivalues.rows(); MatrixType matD = MatrixType::Zero(n, n); - for (Index i = 0; i < n; ++i) { - if (internal::isMuchSmallerThan(numext::imag(m_eivalues.coeff(i)), numext::real(m_eivalues.coeff(i)), precision)) - matD.coeffRef(i, i) = numext::real(m_eivalues.coeff(i)); - else { - matD.template block<2, 2>(i, i) << numext::real(m_eivalues.coeff(i)), numext::imag(m_eivalues.coeff(i)), - -numext::imag(m_eivalues.coeff(i)), numext::real(m_eivalues.coeff(i)); + Index i = 0; + for (; i < n - 1; ++i) { + RealScalar real = numext::real(m_eivalues.coeff(i)); + RealScalar imag = numext::imag(m_eivalues.coeff(i)); + matD.coeffRef(i, i) = real; + if (!internal::isMuchSmallerThan(imag, real, precision)) { + matD.coeffRef(i, i + 1) = imag; + matD.coeffRef(i + 1, i) = -imag; + matD.coeffRef(i + 1, i + 1) = real; ++i; } } + if (i == n - 1) { + matD.coeffRef(i, i) = numext::real(m_eivalues.coeff(i)); + } + return matD; } diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h index ca15e6de591..b114cfab551 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h @@ -161,7 +161,7 @@ class GeneralizedEigenSolver { compute(A, B, computeEigenvectors); } - /* \brief Returns the computed generalized eigenvectors. + /** \brief Returns the computed generalized eigenvectors. * * \returns %Matrix whose columns are the (possibly complex) right eigenvectors. * i.e. the eigenvectors that solve (A - l*B)x = 0. The ordering matches the eigenvalues. diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Geometry/Transform.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Geometry/Transform.h index fd3fc58f46d..b1a9f21fac5 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Geometry/Transform.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Geometry/Transform.h @@ -588,9 +588,9 @@ class Transform { EIGEN_DEVICE_FUNC inline Transform inverse(TransformTraits traits = (TransformTraits)Mode) const; /** \returns a const pointer to the column major internal matrix */ - EIGEN_DEVICE_FUNC const Scalar* data() const { return m_matrix.data(); } + EIGEN_DEVICE_FUNC constexpr const Scalar* data() const { return m_matrix.data(); } /** \returns a non-const pointer to the column major internal matrix */ - EIGEN_DEVICE_FUNC Scalar* data() { return m_matrix.data(); } + EIGEN_DEVICE_FUNC constexpr Scalar* data() { return m_matrix.data(); } /** \returns \c *this with scalar type casted to \a NewScalarType * diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Geometry/arch/Geometry_SIMD.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Geometry/arch/Geometry_SIMD.h index e8b210e7017..5601a473e4f 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Geometry/arch/Geometry_SIMD.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/Geometry/arch/Geometry_SIMD.h @@ -74,7 +74,9 @@ struct cross3_impl { Packet4f mul1 = pmul(vec4f_swizzle1(a, 1, 2, 0, 3), vec4f_swizzle1(b, 2, 0, 1, 3)); Packet4f mul2 = pmul(vec4f_swizzle1(a, 2, 0, 1, 3), vec4f_swizzle1(b, 1, 2, 0, 3)); DstPlainType res; - pstoret(&res.x(), psub(mul1, mul2)); + pstoret(res.data(), psub(mul1, mul2)); + // Ensure last component is 0 in case original a or b contain inf/nan. + res[3] = 0.0f; return res; } }; diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/SVD/BDCSVD.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/SVD/BDCSVD.h index a49299cbbbf..6b85d1d5a76 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/SVD/BDCSVD.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/SVD/BDCSVD.h @@ -50,25 +50,6 @@ struct traits > : svd_traits typedef MatrixType_ MatrixType; }; -template -struct allocate_small_svd { - static void run(JacobiSVD& smallSvd, Index rows, Index cols, unsigned int) { - internal::construct_at(&smallSvd, rows, cols); - } -}; - -EIGEN_DIAGNOSTICS(push) -EIGEN_DISABLE_DEPRECATED_WARNING - -template -struct allocate_small_svd { - static void run(JacobiSVD& smallSvd, Index rows, Index cols, unsigned int computationOptions) { - internal::construct_at(&smallSvd, rows, cols, computationOptions); - } -}; - -EIGEN_DIAGNOSTICS(pop) - } // end namespace internal /** \ingroup SVD_Module @@ -288,7 +269,7 @@ void BDCSVD::allocate(Index rows, Index cols, unsigned int if (Base::allocate(rows, cols, computationOptions)) return; if (cols < m_algoswap) - internal::allocate_small_svd::run(smallSvd, rows, cols, computationOptions); + smallSvd.allocate(rows, cols, Options == 0 ? computationOptions : internal::get_computation_options(Options)); m_computed = MatrixXr::Zero(diagSize() + 1, diagSize()); m_compU = computeV(); diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/SVD/JacobiSVD.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/SVD/JacobiSVD.h index 84934db8d18..e81db2dc2d6 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/SVD/JacobiSVD.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/SVD/JacobiSVD.h @@ -553,8 +553,7 @@ class JacobiSVD : public SVDBase > { * \deprecated Will be removed in the next major Eigen version. Options should * be specified in the \a Options template parameter. */ - // EIGEN_DEPRECATED // this constructor is used to allocate memory in BDCSVD - JacobiSVD(Index rows, Index cols, unsigned int computationOptions) { + EIGEN_DEPRECATED JacobiSVD(Index rows, Index cols, unsigned int computationOptions) { internal::check_svd_options_assertions(computationOptions, rows, cols); allocate(rows, cols, computationOptions); } @@ -612,7 +611,6 @@ class JacobiSVD : public SVDBase > { using Base::rank; using Base::rows; - private: void allocate(Index rows_, Index cols_, unsigned int computationOptions) { if (Base::allocate(rows_, cols_, computationOptions)) return; eigen_assert(!(ShouldComputeThinU && int(QRPreconditioner) == int(FullPivHouseholderQRPreconditioner)) && @@ -625,6 +623,7 @@ class JacobiSVD : public SVDBase > { if (rows() > cols()) m_qr_precond_morerows.allocate(*this); } + private: JacobiSVD& compute_impl(const MatrixType& matrix, unsigned int computationOptions); protected: diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/SparseCore/SparseMatrix.h b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/SparseCore/SparseMatrix.h index 24ebb7cac19..caa8ffada31 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/SparseCore/SparseMatrix.h +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/SparseCore/SparseMatrix.h @@ -202,9 +202,9 @@ class SparseMatrix : public SparseCompressedBase= 0 && row < m_size) : (row == 0 && col >= 0 && col < m_size)); @@ -278,6 +278,7 @@ class SparseVector : public SparseCompressedBase inline void swap(SparseMatrix& other) { @@ -285,6 +286,14 @@ class SparseVector : public SparseCompressedBase + friend EIGEN_DEVICE_FUNC void swap(SparseVector& a, SparseMatrix& b) { + a.swap(b); + } + template + friend EIGEN_DEVICE_FUNC void swap(SparseMatrix& a, SparseVector& b) { + b.swap(a); + } inline SparseVector& operator=(const SparseVector& other) { if (other.isRValue()) { diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/plugins/ArrayCwiseUnaryOps.inc b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/plugins/ArrayCwiseUnaryOps.inc index cc708faddef..93f7eabe5d6 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/plugins/ArrayCwiseUnaryOps.inc +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/plugins/ArrayCwiseUnaryOps.inc @@ -11,6 +11,7 @@ typedef CwiseUnaryOp, const Derived> Boo typedef CwiseUnaryOp, const Derived> BitwiseNotReturnType; typedef CwiseUnaryOp, const Derived> ExpReturnType; +typedef CwiseUnaryOp, const Derived> Exp2ReturnType; typedef CwiseUnaryOp, const Derived> Expm1ReturnType; typedef CwiseUnaryOp, const Derived> LogReturnType; typedef CwiseUnaryOp, const Derived> Log1pReturnType; @@ -78,10 +79,20 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Abs2ReturnType abs2() const { return * Example: \include Cwise_exp.cpp * Output: \verbinclude Cwise_exp.out * - * \sa Math functions, pow(), log(), sin(), cos() + * \sa Math functions, exp2(), pow(), log(), sin(), + * cos() */ EIGEN_DEVICE_FUNC inline const ExpReturnType exp() const { return ExpReturnType(derived()); } +/** \returns an expression of the coefficient-wise exponential of *this. + * + * This function computes the coefficient-wise base2 exponential, i.e. 2^x. + * + * \sa Math functions, exp(), pow(), log(), sin(), + * cos() + */ +EIGEN_DEVICE_FUNC inline const Exp2ReturnType exp2() const { return Exp2ReturnType(derived()); } + /** \returns an expression of the coefficient-wise exponential of *this minus 1. * * In exact arithmetic, \c x.expm1() is equivalent to \c x.exp() - 1, diff --git a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/plugins/BlockMethods.inc b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/plugins/BlockMethods.inc index 122a2f4ab72..46dc9ddd102 100644 --- a/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/plugins/BlockMethods.inc +++ b/wpimath/src/main/native/thirdparty/eigen/include/Eigen/src/plugins/BlockMethods.inc @@ -1304,14 +1304,14 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename ConstFixedSegmentReturnType::T template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename FixedSegmentReturnType::Type tail(Index n = N) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) - return typename FixedSegmentReturnType::Type(derived(), size() - n); + return typename FixedSegmentReturnType::Type(derived(), size() - n, n); } /// This is the const version of tail. template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename ConstFixedSegmentReturnType::Type tail(Index n = N) const { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) - return typename ConstFixedSegmentReturnType::Type(derived(), size() - n); + return typename ConstFixedSegmentReturnType::Type(derived(), size() - n, n); } /// \returns the \a outer -th column (resp. row) of the matrix \c *this if \c *this