Commit a6547fce authored by Yaroslav Gevorkov's avatar Yaroslav Gevorkov
Browse files

eigen update

parent 6caeec18
...@@ -12,7 +12,7 @@ if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) ...@@ -12,7 +12,7 @@ if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
"MinSizeRel" "RelWithDebInfo") "MinSizeRel" "RelWithDebInfo")
endif() endif()
find_package(Eigen3 3.3.4 NO_MODULE) find_package(Eigen3 3.3.7 NO_MODULE)
find_package(OpenMP) find_package(OpenMP)
include_directories(include) include_directories(include)
......
include(RegexUtils)
test_escape_string_as_regex()
file(GLOB Eigen_directory_files "*")
escape_string_as_regex(ESCAPED_CMAKE_CURRENT_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
foreach(f ${Eigen_directory_files})
if(NOT f MATCHES "\\.txt" AND NOT f MATCHES "${ESCAPED_CMAKE_CURRENT_SOURCE_DIR}/[.].+" AND NOT f MATCHES "${ESCAPED_CMAKE_CURRENT_SOURCE_DIR}/src")
list(APPEND Eigen_directory_files_to_install ${f})
endif()
endforeach(f ${Eigen_directory_files})
install(FILES
${Eigen_directory_files_to_install}
DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen COMPONENT Devel
)
install(DIRECTORY src DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen COMPONENT Devel FILES_MATCHING PATTERN "*.h")
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
#define EIGEN_CHOLESKY_MODULE_H #define EIGEN_CHOLESKY_MODULE_H
#include "Core" #include "Core"
#include "Jacobi"
#include "src/Core/util/DisableStupidWarnings.h" #include "src/Core/util/DisableStupidWarnings.h"
...@@ -31,7 +32,11 @@ ...@@ -31,7 +32,11 @@
#include "src/Cholesky/LLT.h" #include "src/Cholesky/LLT.h"
#include "src/Cholesky/LDLT.h" #include "src/Cholesky/LDLT.h"
#ifdef EIGEN_USE_LAPACKE #ifdef EIGEN_USE_LAPACKE
#ifdef EIGEN_USE_MKL
#include "mkl_lapacke.h"
#else
#include "src/misc/lapacke.h" #include "src/misc/lapacke.h"
#endif
#include "src/Cholesky/LLT_LAPACKE.h" #include "src/Cholesky/LLT_LAPACKE.h"
#endif #endif
......
...@@ -14,6 +14,22 @@ ...@@ -14,6 +14,22 @@
// first thing Eigen does: stop the compiler from committing suicide // first thing Eigen does: stop the compiler from committing suicide
#include "src/Core/util/DisableStupidWarnings.h" #include "src/Core/util/DisableStupidWarnings.h"
#if defined(__CUDACC__) && !defined(EIGEN_NO_CUDA)
#define EIGEN_CUDACC __CUDACC__
#endif
#if defined(__CUDA_ARCH__) && !defined(EIGEN_NO_CUDA)
#define EIGEN_CUDA_ARCH __CUDA_ARCH__
#endif
#if defined(__CUDACC_VER_MAJOR__) && (__CUDACC_VER_MAJOR__ >= 9)
#define EIGEN_CUDACC_VER ((__CUDACC_VER_MAJOR__ * 10000) + (__CUDACC_VER_MINOR__ * 100))
#elif defined(__CUDACC_VER__)
#define EIGEN_CUDACC_VER __CUDACC_VER__
#else
#define EIGEN_CUDACC_VER 0
#endif
// Handle NVCC/CUDA/SYCL // Handle NVCC/CUDA/SYCL
#if defined(__CUDACC__) || defined(__SYCL_DEVICE_ONLY__) #if defined(__CUDACC__) || defined(__SYCL_DEVICE_ONLY__)
// Do not try asserts on CUDA and SYCL! // Do not try asserts on CUDA and SYCL!
...@@ -37,9 +53,9 @@ ...@@ -37,9 +53,9 @@
#endif #endif
#define EIGEN_DEVICE_FUNC __host__ __device__ #define EIGEN_DEVICE_FUNC __host__ __device__
// We need math_functions.hpp to ensure that that EIGEN_USING_STD_MATH macro // We need cuda_runtime.h to ensure that that EIGEN_USING_STD_MATH macro
// works properly on the device side // works properly on the device side
#include <math_functions.hpp> #include <cuda_runtime.h>
#else #else
#define EIGEN_DEVICE_FUNC #define EIGEN_DEVICE_FUNC
#endif #endif
...@@ -155,6 +171,9 @@ ...@@ -155,6 +171,9 @@
#ifdef __AVX512DQ__ #ifdef __AVX512DQ__
#define EIGEN_VECTORIZE_AVX512DQ #define EIGEN_VECTORIZE_AVX512DQ
#endif #endif
#ifdef __AVX512ER__
#define EIGEN_VECTORIZE_AVX512ER
#endif
#endif #endif
// include files // include files
...@@ -229,7 +248,7 @@ ...@@ -229,7 +248,7 @@
#if defined __CUDACC__ #if defined __CUDACC__
#define EIGEN_VECTORIZE_CUDA #define EIGEN_VECTORIZE_CUDA
#include <vector_types.h> #include <vector_types.h>
#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 #if EIGEN_CUDACC_VER >= 70500
#define EIGEN_HAS_CUDA_FP16 #define EIGEN_HAS_CUDA_FP16
#endif #endif
#endif #endif
...@@ -352,6 +371,7 @@ using std::ptrdiff_t; ...@@ -352,6 +371,7 @@ using std::ptrdiff_t;
#include "src/Core/MathFunctions.h" #include "src/Core/MathFunctions.h"
#include "src/Core/GenericPacketMath.h" #include "src/Core/GenericPacketMath.h"
#include "src/Core/MathFunctionsImpl.h" #include "src/Core/MathFunctionsImpl.h"
#include "src/Core/arch/Default/ConjHelper.h"
#if defined EIGEN_VECTORIZE_AVX512 #if defined EIGEN_VECTORIZE_AVX512
#include "src/Core/arch/SSE/PacketMath.h" #include "src/Core/arch/SSE/PacketMath.h"
...@@ -367,6 +387,7 @@ using std::ptrdiff_t; ...@@ -367,6 +387,7 @@ using std::ptrdiff_t;
#include "src/Core/arch/AVX/MathFunctions.h" #include "src/Core/arch/AVX/MathFunctions.h"
#include "src/Core/arch/AVX/Complex.h" #include "src/Core/arch/AVX/Complex.h"
#include "src/Core/arch/AVX/TypeCasting.h" #include "src/Core/arch/AVX/TypeCasting.h"
#include "src/Core/arch/SSE/TypeCasting.h"
#elif defined EIGEN_VECTORIZE_SSE #elif defined EIGEN_VECTORIZE_SSE
#include "src/Core/arch/SSE/PacketMath.h" #include "src/Core/arch/SSE/PacketMath.h"
#include "src/Core/arch/SSE/MathFunctions.h" #include "src/Core/arch/SSE/MathFunctions.h"
......
...@@ -45,7 +45,11 @@ ...@@ -45,7 +45,11 @@
#include "src/Eigenvalues/GeneralizedEigenSolver.h" #include "src/Eigenvalues/GeneralizedEigenSolver.h"
#include "src/Eigenvalues/MatrixBaseEigenvalues.h" #include "src/Eigenvalues/MatrixBaseEigenvalues.h"
#ifdef EIGEN_USE_LAPACKE #ifdef EIGEN_USE_LAPACKE
#ifdef EIGEN_USE_MKL
#include "mkl_lapacke.h"
#else
#include "src/misc/lapacke.h" #include "src/misc/lapacke.h"
#endif
#include "src/Eigenvalues/RealSchur_LAPACKE.h" #include "src/Eigenvalues/RealSchur_LAPACKE.h"
#include "src/Eigenvalues/ComplexSchur_LAPACKE.h" #include "src/Eigenvalues/ComplexSchur_LAPACKE.h"
#include "src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h" #include "src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h"
......
...@@ -28,7 +28,11 @@ ...@@ -28,7 +28,11 @@
#include "src/LU/FullPivLU.h" #include "src/LU/FullPivLU.h"
#include "src/LU/PartialPivLU.h" #include "src/LU/PartialPivLU.h"
#ifdef EIGEN_USE_LAPACKE #ifdef EIGEN_USE_LAPACKE
#ifdef EIGEN_USE_MKL
#include "mkl_lapacke.h"
#else
#include "src/misc/lapacke.h" #include "src/misc/lapacke.h"
#endif
#include "src/LU/PartialPivLU_LAPACKE.h" #include "src/LU/PartialPivLU_LAPACKE.h"
#endif #endif
#include "src/LU/Determinant.h" #include "src/LU/Determinant.h"
......
...@@ -36,7 +36,11 @@ ...@@ -36,7 +36,11 @@
#include "src/QR/ColPivHouseholderQR.h" #include "src/QR/ColPivHouseholderQR.h"
#include "src/QR/CompleteOrthogonalDecomposition.h" #include "src/QR/CompleteOrthogonalDecomposition.h"
#ifdef EIGEN_USE_LAPACKE #ifdef EIGEN_USE_LAPACKE
#ifdef EIGEN_USE_MKL
#include "mkl_lapacke.h"
#else
#include "src/misc/lapacke.h" #include "src/misc/lapacke.h"
#endif
#include "src/QR/HouseholderQR_LAPACKE.h" #include "src/QR/HouseholderQR_LAPACKE.h"
#include "src/QR/ColPivHouseholderQR_LAPACKE.h" #include "src/QR/ColPivHouseholderQR_LAPACKE.h"
#endif #endif
......
...@@ -27,7 +27,7 @@ void qFree(void *ptr) ...@@ -27,7 +27,7 @@ void qFree(void *ptr)
void *qRealloc(void *ptr, std::size_t size) void *qRealloc(void *ptr, std::size_t size)
{ {
void* newPtr = Eigen::internal::aligned_malloc(size); void* newPtr = Eigen::internal::aligned_malloc(size);
memcpy(newPtr, ptr, size); std::memcpy(newPtr, ptr, size);
Eigen::internal::aligned_free(ptr); Eigen::internal::aligned_free(ptr);
return newPtr; return newPtr;
} }
......
...@@ -37,7 +37,11 @@ ...@@ -37,7 +37,11 @@
#include "src/SVD/JacobiSVD.h" #include "src/SVD/JacobiSVD.h"
#include "src/SVD/BDCSVD.h" #include "src/SVD/BDCSVD.h"
#if defined(EIGEN_USE_LAPACKE) && !defined(EIGEN_USE_LAPACKE_STRICT) #if defined(EIGEN_USE_LAPACKE) && !defined(EIGEN_USE_LAPACKE_STRICT)
#ifdef EIGEN_USE_MKL
#include "mkl_lapacke.h"
#else
#include "src/misc/lapacke.h" #include "src/misc/lapacke.h"
#endif
#include "src/SVD/JacobiSVD_LAPACKE.h" #include "src/SVD/JacobiSVD_LAPACKE.h"
#endif #endif
......
...@@ -248,7 +248,7 @@ template<typename _MatrixType, int _UpLo> class LDLT ...@@ -248,7 +248,7 @@ template<typename _MatrixType, int _UpLo> class LDLT
/** \brief Reports whether previous computation was successful. /** \brief Reports whether previous computation was successful.
* *
* \returns \c Success if computation was succesful, * \returns \c Success if computation was succesful,
* \c NumericalIssue if the matrix.appears to be negative. * \c NumericalIssue if the factorization failed because of a zero pivot.
*/ */
ComputationInfo info() const ComputationInfo info() const
{ {
...@@ -305,7 +305,8 @@ template<> struct ldlt_inplace<Lower> ...@@ -305,7 +305,8 @@ template<> struct ldlt_inplace<Lower>
if (size <= 1) if (size <= 1)
{ {
transpositions.setIdentity(); transpositions.setIdentity();
if (numext::real(mat.coeff(0,0)) > static_cast<RealScalar>(0) ) sign = PositiveSemiDef; if(size==0) sign = ZeroSign;
else if (numext::real(mat.coeff(0,0)) > static_cast<RealScalar>(0) ) sign = PositiveSemiDef;
else if (numext::real(mat.coeff(0,0)) < static_cast<RealScalar>(0)) sign = NegativeSemiDef; else if (numext::real(mat.coeff(0,0)) < static_cast<RealScalar>(0)) sign = NegativeSemiDef;
else sign = ZeroSign; else sign = ZeroSign;
return true; return true;
...@@ -376,6 +377,8 @@ template<> struct ldlt_inplace<Lower> ...@@ -376,6 +377,8 @@ template<> struct ldlt_inplace<Lower>
if((rs>0) && pivot_is_valid) if((rs>0) && pivot_is_valid)
A21 /= realAkk; A21 /= realAkk;
else if(rs>0)
ret = ret && (A21.array()==Scalar(0)).all();
if(found_zero_pivot && pivot_is_valid) ret = false; // factorization failed if(found_zero_pivot && pivot_is_valid) ret = false; // factorization failed
else if(!pivot_is_valid) found_zero_pivot = true; else if(!pivot_is_valid) found_zero_pivot = true;
...@@ -568,13 +571,14 @@ void LDLT<_MatrixType,_UpLo>::_solve_impl(const RhsType &rhs, DstType &dst) cons ...@@ -568,13 +571,14 @@ void LDLT<_MatrixType,_UpLo>::_solve_impl(const RhsType &rhs, DstType &dst) cons
// more precisely, use pseudo-inverse of D (see bug 241) // more precisely, use pseudo-inverse of D (see bug 241)
using std::abs; using std::abs;
const typename Diagonal<const MatrixType>::RealReturnType vecD(vectorD()); const typename Diagonal<const MatrixType>::RealReturnType vecD(vectorD());
// In some previous versions, tolerance was set to the max of 1/highest and the maximal diagonal entry * epsilon // In some previous versions, tolerance was set to the max of 1/highest (or rather numeric_limits::min())
// as motivated by LAPACK's xGELSS: // and the maximal diagonal entry * epsilon as motivated by LAPACK's xGELSS:
// RealScalar tolerance = numext::maxi(vecD.array().abs().maxCoeff() * NumTraits<RealScalar>::epsilon(),RealScalar(1) / NumTraits<RealScalar>::highest()); // RealScalar tolerance = numext::maxi(vecD.array().abs().maxCoeff() * NumTraits<RealScalar>::epsilon(),RealScalar(1) / NumTraits<RealScalar>::highest());
// However, LDLT is not rank revealing, and so adjusting the tolerance wrt to the highest // However, LDLT is not rank revealing, and so adjusting the tolerance wrt to the highest
// diagonal element is not well justified and leads to numerical issues in some cases. // diagonal element is not well justified and leads to numerical issues in some cases.
// Moreover, Lapack's xSYTRS routines use 0 for the tolerance. // Moreover, Lapack's xSYTRS routines use 0 for the tolerance.
RealScalar tolerance = RealScalar(1) / NumTraits<RealScalar>::highest(); // Using numeric_limits::min() gives us more robustness to denormals.
RealScalar tolerance = (std::numeric_limits<RealScalar>::min)();
for (Index i = 0; i < vecD.size(); ++i) for (Index i = 0; i < vecD.size(); ++i)
{ {
......
...@@ -24,7 +24,7 @@ template<typename MatrixType, int UpLo> struct LLT_Traits; ...@@ -24,7 +24,7 @@ template<typename MatrixType, int UpLo> struct LLT_Traits;
* *
* \tparam _MatrixType the type of the matrix of which we are computing the LL^T Cholesky decomposition * \tparam _MatrixType the type of the matrix of which we are computing the LL^T Cholesky decomposition
* \tparam _UpLo the triangular part that will be used for the decompositon: Lower (default) or Upper. * \tparam _UpLo the triangular part that will be used for the decompositon: Lower (default) or Upper.
* The other triangular part won't be read. * The other triangular part won't be read.
* *
* This class performs a LL^T Cholesky decomposition of a symmetric, positive definite * This class performs a LL^T Cholesky decomposition of a symmetric, positive definite
* matrix A such that A = LL^* = U^*U, where L is lower triangular. * matrix A such that A = LL^* = U^*U, where L is lower triangular.
...@@ -41,14 +41,18 @@ template<typename MatrixType, int UpLo> struct LLT_Traits; ...@@ -41,14 +41,18 @@ template<typename MatrixType, int UpLo> struct LLT_Traits;
* Example: \include LLT_example.cpp * Example: \include LLT_example.cpp
* Output: \verbinclude LLT_example.out * Output: \verbinclude LLT_example.out
* *
* \b Performance: for best performance, it is recommended to use a column-major storage format
* with the Lower triangular part (the default), or, equivalently, a row-major storage format
* with the Upper triangular part. Otherwise, you might get a 20% slowdown for the full factorization
* step, and rank-updates can be up to 3 times slower.
*
* This class supports the \link InplaceDecomposition inplace decomposition \endlink mechanism. * This class supports the \link InplaceDecomposition inplace decomposition \endlink mechanism.
* *
* Note that during the decomposition, only the lower (or upper, as defined by _UpLo) triangular part of A is considered.
* Therefore, the strict lower part does not have to store correct values.
*
* \sa MatrixBase::llt(), SelfAdjointView::llt(), class LDLT * \sa MatrixBase::llt(), SelfAdjointView::llt(), class LDLT
*/ */
/* HEY THIS DOX IS DISABLED BECAUSE THERE's A BUG EITHER HERE OR IN LDLT ABOUT THAT (OR BOTH)
* Note that during the decomposition, only the upper triangular part of A is considered. Therefore,
* the strict lower part does not have to store correct values.
*/
template<typename _MatrixType, int _UpLo> class LLT template<typename _MatrixType, int _UpLo> class LLT
{ {
public: public:
...@@ -146,7 +150,7 @@ template<typename _MatrixType, int _UpLo> class LLT ...@@ -146,7 +150,7 @@ template<typename _MatrixType, int _UpLo> class LLT
} }
template<typename Derived> template<typename Derived>
void solveInPlace(MatrixBase<Derived> &bAndX) const; void solveInPlace(const MatrixBase<Derived> &bAndX) const;
template<typename InputType> template<typename InputType>
LLT& compute(const EigenBase<InputType>& matrix); LLT& compute(const EigenBase<InputType>& matrix);
...@@ -177,7 +181,7 @@ template<typename _MatrixType, int _UpLo> class LLT ...@@ -177,7 +181,7 @@ template<typename _MatrixType, int _UpLo> class LLT
/** \brief Reports whether previous computation was successful. /** \brief Reports whether previous computation was successful.
* *
* \returns \c Success if computation was succesful, * \returns \c Success if computation was succesful,
* \c NumericalIssue if the matrix.appears to be negative. * \c NumericalIssue if the matrix.appears not to be positive definite.
*/ */
ComputationInfo info() const ComputationInfo info() const
{ {
...@@ -425,7 +429,8 @@ LLT<MatrixType,_UpLo>& LLT<MatrixType,_UpLo>::compute(const EigenBase<InputType> ...@@ -425,7 +429,8 @@ LLT<MatrixType,_UpLo>& LLT<MatrixType,_UpLo>::compute(const EigenBase<InputType>
eigen_assert(a.rows()==a.cols()); eigen_assert(a.rows()==a.cols());
const Index size = a.rows(); const Index size = a.rows();
m_matrix.resize(size, size); m_matrix.resize(size, size);
m_matrix = a.derived(); if (!internal::is_same_dense(m_matrix, a.derived()))
m_matrix = a.derived();
// Compute matrix L1 norm = max abs column sum. // Compute matrix L1 norm = max abs column sum.
m_l1_norm = RealScalar(0); m_l1_norm = RealScalar(0);
...@@ -485,11 +490,14 @@ void LLT<_MatrixType,_UpLo>::_solve_impl(const RhsType &rhs, DstType &dst) const ...@@ -485,11 +490,14 @@ void LLT<_MatrixType,_UpLo>::_solve_impl(const RhsType &rhs, DstType &dst) const
* *
* This version avoids a copy when the right hand side matrix b is not needed anymore. * This version avoids a copy when the right hand side matrix b is not needed anymore.
* *
* \warning The parameter is only marked 'const' to make the C++ compiler accept a temporary expression here.
* This function will const_cast it, so constness isn't honored here.
*
* \sa LLT::solve(), MatrixBase::llt() * \sa LLT::solve(), MatrixBase::llt()
*/ */
template<typename MatrixType, int _UpLo> template<typename MatrixType, int _UpLo>
template<typename Derived> template<typename Derived>
void LLT<MatrixType,_UpLo>::solveInPlace(MatrixBase<Derived> &bAndX) const void LLT<MatrixType,_UpLo>::solveInPlace(const MatrixBase<Derived> &bAndX) const
{ {
eigen_assert(m_isInitialized && "LLT is not initialized."); eigen_assert(m_isInitialized && "LLT is not initialized.");
eigen_assert(m_matrix.rows()==bAndX.rows()); eigen_assert(m_matrix.rows()==bAndX.rows());
......
...@@ -153,8 +153,6 @@ class Array ...@@ -153,8 +153,6 @@ class Array
: Base(std::move(other)) : Base(std::move(other))
{ {
Base::_check_template_params(); Base::_check_template_params();
if (RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic)
Base::_set_noalias(other);
} }
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
Array& operator=(Array&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_assignable<Scalar>::value) Array& operator=(Array&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_assignable<Scalar>::value)
......
...@@ -39,7 +39,7 @@ public: ...@@ -39,7 +39,7 @@ public:
enum { enum {
DstAlignment = DstEvaluator::Alignment, DstAlignment = DstEvaluator::Alignment,
SrcAlignment = SrcEvaluator::Alignment, SrcAlignment = SrcEvaluator::Alignment,
DstHasDirectAccess = DstFlags & DirectAccessBit, DstHasDirectAccess = (DstFlags & DirectAccessBit) == DirectAccessBit,
JointAlignment = EIGEN_PLAIN_ENUM_MIN(DstAlignment,SrcAlignment) JointAlignment = EIGEN_PLAIN_ENUM_MIN(DstAlignment,SrcAlignment)
}; };
...@@ -83,7 +83,7 @@ private: ...@@ -83,7 +83,7 @@ private:
&& int(OuterStride)!=Dynamic && int(OuterStride)%int(InnerPacketSize)==0 && int(OuterStride)!=Dynamic && int(OuterStride)%int(InnerPacketSize)==0
&& (EIGEN_UNALIGNED_VECTORIZE || int(JointAlignment)>=int(InnerRequiredAlignment)), && (EIGEN_UNALIGNED_VECTORIZE || int(JointAlignment)>=int(InnerRequiredAlignment)),
MayLinearize = bool(StorageOrdersAgree) && (int(DstFlags) & int(SrcFlags) & LinearAccessBit), MayLinearize = bool(StorageOrdersAgree) && (int(DstFlags) & int(SrcFlags) & LinearAccessBit),
MayLinearVectorize = bool(MightVectorize) && MayLinearize && DstHasDirectAccess MayLinearVectorize = bool(MightVectorize) && bool(MayLinearize) && bool(DstHasDirectAccess)
&& (EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment)) || MaxSizeAtCompileTime == Dynamic), && (EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment)) || MaxSizeAtCompileTime == Dynamic),
/* If the destination isn't aligned, we have to do runtime checks and we don't unroll, /* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
so it's only good for large enough sizes. */ so it's only good for large enough sizes. */
......
...@@ -84,7 +84,8 @@ class vml_assign_traits ...@@ -84,7 +84,8 @@ class vml_assign_traits
struct Assignment<DstXprType, CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested>, assign_op<EIGENTYPE,EIGENTYPE>, \ struct Assignment<DstXprType, CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested>, assign_op<EIGENTYPE,EIGENTYPE>, \
Dense2Dense, typename enable_if<vml_assign_traits<DstXprType,SrcXprNested>::EnableVml>::type> { \ Dense2Dense, typename enable_if<vml_assign_traits<DstXprType,SrcXprNested>::EnableVml>::type> { \
typedef CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested> SrcXprType; \ typedef CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested> SrcXprType; \
static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE,EIGENTYPE> &/*func*/) { \ static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE,EIGENTYPE> &func) { \
resize_if_allowed(dst, src, func); \
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \ eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \
if(vml_assign_traits<DstXprType,SrcXprNested>::Traversal==LinearTraversal) { \ if(vml_assign_traits<DstXprType,SrcXprNested>::Traversal==LinearTraversal) { \
VMLOP(dst.size(), (const VMLTYPE*)src.nestedExpression().data(), \ VMLOP(dst.size(), (const VMLTYPE*)src.nestedExpression().data(), \
...@@ -144,7 +145,8 @@ EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(ceil, Ceil, _) ...@@ -144,7 +145,8 @@ EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(ceil, Ceil, _)
Dense2Dense, typename enable_if<vml_assign_traits<DstXprType,SrcXprNested>::EnableVml>::type> { \ Dense2Dense, typename enable_if<vml_assign_traits<DstXprType,SrcXprNested>::EnableVml>::type> { \
typedef CwiseBinaryOp<scalar_##EIGENOP##_op<EIGENTYPE,EIGENTYPE>, SrcXprNested, \ typedef CwiseBinaryOp<scalar_##EIGENOP##_op<EIGENTYPE,EIGENTYPE>, SrcXprNested, \
const CwiseNullaryOp<internal::scalar_constant_op<EIGENTYPE>,Plain> > SrcXprType; \ const CwiseNullaryOp<internal::scalar_constant_op<EIGENTYPE>,Plain> > SrcXprType; \
static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE,EIGENTYPE> &/*func*/) { \ static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE,EIGENTYPE> &func) { \
resize_if_allowed(dst, src, func); \
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \ eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \
VMLTYPE exponent = reinterpret_cast<const VMLTYPE&>(src.rhs().functor().m_other); \ VMLTYPE exponent = reinterpret_cast<const VMLTYPE&>(src.rhs().functor().m_other); \
if(vml_assign_traits<DstXprType,SrcXprNested>::Traversal==LinearTraversal) \ if(vml_assign_traits<DstXprType,SrcXprNested>::Traversal==LinearTraversal) \
......
...@@ -160,7 +160,7 @@ rcond_estimate_helper(typename Decomposition::RealScalar matrix_norm, const Deco ...@@ -160,7 +160,7 @@ rcond_estimate_helper(typename Decomposition::RealScalar matrix_norm, const Deco
{ {
typedef typename Decomposition::RealScalar RealScalar; typedef typename Decomposition::RealScalar RealScalar;
eigen_assert(dec.rows() == dec.cols()); eigen_assert(dec.rows() == dec.cols());
if (dec.rows() == 0) return RealScalar(1); if (dec.rows() == 0) return NumTraits<RealScalar>::infinity();
if (matrix_norm == RealScalar(0)) return RealScalar(0); if (matrix_norm == RealScalar(0)) return RealScalar(0);
if (dec.rows() == 1) return RealScalar(1); if (dec.rows() == 1) return RealScalar(1);
const RealScalar inverse_matrix_norm = rcond_invmatrix_L1_norm_estimate(dec); const RealScalar inverse_matrix_norm = rcond_invmatrix_L1_norm_estimate(dec);
......
...@@ -977,7 +977,7 @@ struct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> > ...@@ -977,7 +977,7 @@ struct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> >
OuterStrideAtCompileTime = HasSameStorageOrderAsArgType OuterStrideAtCompileTime = HasSameStorageOrderAsArgType
? int(outer_stride_at_compile_time<ArgType>::ret) ? int(outer_stride_at_compile_time<ArgType>::ret)
: int(inner_stride_at_compile_time<ArgType>::ret), : int(inner_stride_at_compile_time<ArgType>::ret),
MaskPacketAccessBit = (InnerStrideAtCompileTime == 1) ? PacketAccessBit : 0, MaskPacketAccessBit = (InnerStrideAtCompileTime == 1 || HasSameStorageOrderAsArgType) ? PacketAccessBit : 0,
FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1 || (InnerPanel && (evaluator<ArgType>::Flags&LinearAccessBit))) ? LinearAccessBit : 0, FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1 || (InnerPanel && (evaluator<ArgType>::Flags&LinearAccessBit))) ? LinearAccessBit : 0,
FlagsRowMajorBit = XprType::Flags&RowMajorBit, FlagsRowMajorBit = XprType::Flags&RowMajorBit,
...@@ -987,7 +987,9 @@ struct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> > ...@@ -987,7 +987,9 @@ struct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> >
Flags = Flags0 | FlagsLinearAccessBit | FlagsRowMajorBit, Flags = Flags0 | FlagsLinearAccessBit | FlagsRowMajorBit,
PacketAlignment = unpacket_traits<PacketScalar>::alignment, PacketAlignment = unpacket_traits<PacketScalar>::alignment,
Alignment0 = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % int(PacketAlignment)) == 0)) ? int(PacketAlignment) : 0, Alignment0 = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic)
&& (OuterStrideAtCompileTime!=0)
&& (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % int(PacketAlignment)) == 0)) ? int(PacketAlignment) : 0,
Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator<ArgType>::Alignment, Alignment0) Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator<ArgType>::Alignment, Alignment0)
}; };
typedef block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel> block_evaluator_type; typedef block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel> block_evaluator_type;
...@@ -1018,14 +1020,16 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa ...@@ -1018,14 +1020,16 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa
EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& block) EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& block)
: m_argImpl(block.nestedExpression()), : m_argImpl(block.nestedExpression()),
m_startRow(block.startRow()), m_startRow(block.startRow()),
m_startCol(block.startCol()) m_startCol(block.startCol()),
m_linear_offset(InnerPanel?(XprType::IsRowMajor ? block.startRow()*block.cols() : block.startCol()*block.rows()):0)
{ } { }
typedef typename XprType::Scalar Scalar; typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::CoeffReturnType CoeffReturnType;
enum { enum {
RowsAtCompileTime = XprType::RowsAtCompileTime RowsAtCompileTime = XprType::RowsAtCompileTime,
ForwardLinearAccess = InnerPanel && bool(evaluator<ArgType>::Flags&LinearAccessBit)
}; };
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
...@@ -1037,7 +1041,10 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa ...@@ -1037,7 +1041,10 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
CoeffReturnType coeff(Index index) const CoeffReturnType coeff(Index index) const
{ {
return coeff(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0); if (ForwardLinearAccess)
return m_argImpl.coeff(m_linear_offset.value() + index);
else
return coeff(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0);
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
...@@ -1049,7 +1056,10 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa ...@@ -1049,7 +1056,10 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Scalar& coeffRef(Index index) Scalar& coeffRef(Index index)
{ {
return coeffRef(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0); if (ForwardLinearAccess)
return m_argImpl.coeffRef(m_linear_offset.value() + index);
else
return coeffRef(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0);
} }
template<int LoadMode, typename PacketType> template<int LoadMode, typename PacketType>
...@