93 files changed, 1243 insertions, 3405 deletions
diff --git a/eigen/Eigen/src/Cholesky/LDLT.h b/eigen/Eigen/src/Cholesky/LDLT.h
index 9b4fdb4..fcee7b2 100644
--- a/eigen/Eigen/src/Cholesky/LDLT.h
+++ b/eigen/Eigen/src/Cholesky/LDLT.h
@@ -258,6 +258,7 @@ template<typename _MatrixType, int _UpLo> class LDLT
 
     #ifndef EIGEN_PARSED_BY_DOXYGEN
     template<typename RhsType, typename DstType>
+    EIGEN_DEVICE_FUNC
     void _solve_impl(const RhsType &rhs, DstType &dst) const;
     #endif
 
diff --git a/eigen/Eigen/src/Cholesky/LLT.h b/eigen/Eigen/src/Cholesky/LLT.h
index e6c02d8..87ca8d4 100644
--- a/eigen/Eigen/src/Cholesky/LLT.h
+++ b/eigen/Eigen/src/Cholesky/LLT.h
@@ -200,6 +200,7 @@ template<typename _MatrixType, int _UpLo> class LLT
 
     #ifndef EIGEN_PARSED_BY_DOXYGEN
     template<typename RhsType, typename DstType>
+    EIGEN_DEVICE_FUNC
     void _solve_impl(const RhsType &rhs, DstType &dst) const;
     #endif
 
diff --git a/eigen/Eigen/src/CholmodSupport/CholmodSupport.h b/eigen/Eigen/src/CholmodSupport/CholmodSupport.h
index 61faf43..5719720 100644
--- a/eigen/Eigen/src/CholmodSupport/CholmodSupport.h
+++ b/eigen/Eigen/src/CholmodSupport/CholmodSupport.h
@@ -32,7 +32,7 @@ template<> struct cholmod_configure_matrix<std::complex<double> > {
   }
 };
 
-// Other scalar types are not yet supported by Cholmod
+// Other scalar types are not yet suppotred by Cholmod
 // template<> struct cholmod_configure_matrix<float> {
 //   template<typename CholmodType>
 //   static void run(CholmodType& mat) {
@@ -124,9 +124,6 @@ cholmod_sparse viewAsCholmod(const SparseSelfAdjointView<const SparseMatrix<_Sca
   
   if(UpLo==Upper) res.stype =  1;
   if(UpLo==Lower) res.stype = -1;
-  // swap stype for rowmajor matrices (only works for real matrices)
-  EIGEN_STATIC_ASSERT((_Options & RowMajorBit) == 0 || NumTraits<_Scalar>::IsComplex == 0, THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES);
-  if(_Options & RowMajorBit) res.stype *=-1;
 
   return res;
 }
@@ -162,44 +159,6 @@ MappedSparseMatrix<Scalar,Flags,StorageIndex> viewAsEigen(cholmod_sparse& cm)
           static_cast<StorageIndex*>(cm.p), static_cast<StorageIndex*>(cm.i),static_cast<Scalar*>(cm.x) );
 }
 
-namespace internal {
-
-// template specializations for int and long that call the correct cholmod method
-
-#define EIGEN_CHOLMOD_SPECIALIZE0(ret, name) \
-    template<typename _StorageIndex> ret cm_ ## name       (cholmod_common &Common) { return cholmod_ ## name   (&Common); } \
-    template<>                       ret cm_ ## name<long> (cholmod_common &Common) { return cholmod_l_ ## name (&Common); }
-
-#define EIGEN_CHOLMOD_SPECIALIZE1(ret, name, t1, a1) \
-    template<typename _StorageIndex> ret cm_ ## name       (t1& a1, cholmod_common &Common) { return cholmod_ ## name   (&a1, &Common); } \
-    template<>                       ret cm_ ## name<long> (t1& a1, cholmod_common &Common) { return cholmod_l_ ## name (&a1, &Common); }
-
-EIGEN_CHOLMOD_SPECIALIZE0(int, start)
-EIGEN_CHOLMOD_SPECIALIZE0(int, finish)
-
-EIGEN_CHOLMOD_SPECIALIZE1(int, free_factor, cholmod_factor*, L)
-EIGEN_CHOLMOD_SPECIALIZE1(int, free_dense,  cholmod_dense*,  X)
-EIGEN_CHOLMOD_SPECIALIZE1(int, free_sparse, cholmod_sparse*, A)
-
-EIGEN_CHOLMOD_SPECIALIZE1(cholmod_factor*, analyze, cholmod_sparse, A)
-
-template<typename _StorageIndex> cholmod_dense*  cm_solve         (int sys, cholmod_factor& L, cholmod_dense&  B, cholmod_common &Common) { return cholmod_solve     (sys, &L, &B, &Common); }
-template<>                       cholmod_dense*  cm_solve<long>   (int sys, cholmod_factor& L, cholmod_dense&  B, cholmod_common &Common) { return cholmod_l_solve   (sys, &L, &B, &Common); }
-
-template<typename _StorageIndex> cholmod_sparse* cm_spsolve       (int sys, cholmod_factor& L, cholmod_sparse& B, cholmod_common &Common) { return cholmod_spsolve   (sys, &L, &B, &Common); }
-template<>                       cholmod_sparse* cm_spsolve<long> (int sys, cholmod_factor& L, cholmod_sparse& B, cholmod_common &Common) { return cholmod_l_spsolve (sys, &L, &B, &Common); }
-
-template<typename _StorageIndex>
-int  cm_factorize_p       (cholmod_sparse*  A, double beta[2], _StorageIndex* fset, std::size_t fsize, cholmod_factor* L, cholmod_common &Common) { return cholmod_factorize_p   (A, beta, fset, fsize, L, &Common); }
-template<>
-int  cm_factorize_p<long> (cholmod_sparse*  A, double beta[2], long* fset,          std::size_t fsize, cholmod_factor* L, cholmod_common &Common) { return cholmod_l_factorize_p (A, beta, fset, fsize, L, &Common); }
-
-#undef EIGEN_CHOLMOD_SPECIALIZE0
-#undef EIGEN_CHOLMOD_SPECIALIZE1
-
-}  // namespace internal
-
-
 enum CholmodMode {
   CholmodAuto, CholmodSimplicialLLt, CholmodSupernodalLLt, CholmodLDLt
 };
@@ -236,7 +195,7 @@ class CholmodBase : public SparseSolverBase<Derived>
     {
       EIGEN_STATIC_ASSERT((internal::is_same<double,RealScalar>::value), CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY);
       m_shiftOffset[0] = m_shiftOffset[1] = 0.0;
-      internal::cm_start<StorageIndex>(m_cholmod);
+      cholmod_start(&m_cholmod);
     }
 
     explicit CholmodBase(const MatrixType& matrix)
@@ -244,15 +203,15 @@ class CholmodBase : public SparseSolverBase<Derived>
     {
       EIGEN_STATIC_ASSERT((internal::is_same<double,RealScalar>::value), CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY);
       m_shiftOffset[0] = m_shiftOffset[1] = 0.0;
-      internal::cm_start<StorageIndex>(m_cholmod);
+      cholmod_start(&m_cholmod);
       compute(matrix);
     }
 
     ~CholmodBase()
     {
       if(m_cholmodFactor)
-        internal::cm_free_factor<StorageIndex>(m_cholmodFactor, m_cholmod);
-      internal::cm_finish<StorageIndex>(m_cholmod);
+        cholmod_free_factor(&m_cholmodFactor, &m_cholmod);
+      cholmod_finish(&m_cholmod);
     }
     
     inline StorageIndex cols() const { return internal::convert_index<StorageIndex, Index>(m_cholmodFactor->n); }
@@ -260,7 +219,7 @@ class CholmodBase : public SparseSolverBase<Derived>
     
     /** \brief Reports whether previous computation was successful.
       *
-      * \returns \c Success if computation was successful,
+      * \returns \c Success if computation was succesful,
       *          \c NumericalIssue if the matrix.appears to be negative.
       */
     ComputationInfo info() const
@@ -287,11 +246,11 @@ class CholmodBase : public SparseSolverBase<Derived>
     {
       if(m_cholmodFactor)
       {
-        internal::cm_free_factor<StorageIndex>(m_cholmodFactor, m_cholmod);
+        cholmod_free_factor(&m_cholmodFactor, &m_cholmod);
         m_cholmodFactor = 0;
       }
       cholmod_sparse A = viewAsCholmod(matrix.template selfadjointView<UpLo>());
-      m_cholmodFactor = internal::cm_analyze<StorageIndex>(A, m_cholmod);
+      m_cholmodFactor = cholmod_analyze(&A, &m_cholmod);
       
       this->m_isInitialized = true;
       this->m_info = Success;
@@ -309,7 +268,7 @@ class CholmodBase : public SparseSolverBase<Derived>
     {
       eigen_assert(m_analysisIsOk && "You must first call analyzePattern()");
       cholmod_sparse A = viewAsCholmod(matrix.template selfadjointView<UpLo>());
-      internal::cm_factorize_p<StorageIndex>(&A, m_shiftOffset, 0, 0, m_cholmodFactor, m_cholmod);
+      cholmod_factorize_p(&A, m_shiftOffset, 0, 0, m_cholmodFactor, &m_cholmod);
 
       // If the factorization failed, minor is the column at which it did. On success minor == n.
       this->m_info = (m_cholmodFactor->minor == m_cholmodFactor->n ? Success : NumericalIssue);
@@ -330,20 +289,19 @@ class CholmodBase : public SparseSolverBase<Derived>
       EIGEN_UNUSED_VARIABLE(size);
       eigen_assert(size==b.rows());
       
-      // Cholmod needs column-major storage without inner-stride, which corresponds to the default behavior of Ref.
+      // Cholmod needs column-major stoarge without inner-stride, which corresponds to the default behavior of Ref.
       Ref<const Matrix<typename Rhs::Scalar,Dynamic,Dynamic,ColMajor> > b_ref(b.derived());
 
       cholmod_dense b_cd = viewAsCholmod(b_ref);
-      cholmod_dense* x_cd = internal::cm_solve<StorageIndex>(CHOLMOD_A, *m_cholmodFactor, b_cd, m_cholmod);
+      cholmod_dense* x_cd = cholmod_solve(CHOLMOD_A, m_cholmodFactor, &b_cd, &m_cholmod);
       if(!x_cd)
       {
         this->m_info = NumericalIssue;
         return;
       }
       // TODO optimize this copy by swapping when possible (be careful with alignment, etc.)
-      // NOTE Actually, the copy can be avoided by calling cholmod_solve2 instead of cholmod_solve
       dest = Matrix<Scalar,Dest::RowsAtCompileTime,Dest::ColsAtCompileTime>::Map(reinterpret_cast<Scalar*>(x_cd->x),b.rows(),b.cols());
-      internal::cm_free_dense<StorageIndex>(x_cd, m_cholmod);
+      cholmod_free_dense(&x_cd, &m_cholmod);
     }
     
     /** \internal */
@@ -358,16 +316,15 @@ class CholmodBase : public SparseSolverBase<Derived>
       // note: cs stands for Cholmod Sparse
       Ref<SparseMatrix<typename RhsDerived::Scalar,ColMajor,typename RhsDerived::StorageIndex> > b_ref(b.const_cast_derived());
       cholmod_sparse b_cs = viewAsCholmod(b_ref);
-      cholmod_sparse* x_cs = internal::cm_spsolve<StorageIndex>(CHOLMOD_A, *m_cholmodFactor, b_cs, m_cholmod);
+      cholmod_sparse* x_cs = cholmod_spsolve(CHOLMOD_A, m_cholmodFactor, &b_cs, &m_cholmod);
       if(!x_cs)
       {
         this->m_info = NumericalIssue;
         return;
       }
       // TODO optimize this copy by swapping when possible (be careful with alignment, etc.)
-      // NOTE cholmod_spsolve in fact just calls the dense solver for blocks of 4 columns at a time (similar to Eigen's sparse solver)
       dest.derived() = viewAsEigen<typename DestDerived::Scalar,ColMajor,typename DestDerived::StorageIndex>(*x_cs);
-      internal::cm_free_sparse<StorageIndex>(x_cs, m_cholmod);
+      cholmod_free_sparse(&x_cs, &m_cholmod);
     }
     #endif // EIGEN_PARSED_BY_DOXYGEN
     
diff --git a/eigen/Eigen/src/Core/ArithmeticSequence.h b/eigen/Eigen/src/Core/ArithmeticSequence.h
deleted file mode 100644
index ada1571..0000000
--- a/eigen/Eigen/src/Core/ArithmeticSequence.h
+++ /dev/null
@@ -1,350 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2017 Gael Guennebaud <gael.guennebaud@inria.fr>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_ARITHMETIC_SEQUENCE_H
-#define EIGEN_ARITHMETIC_SEQUENCE_H
-
-namespace Eigen {
-
-namespace internal {
-
-#if (!EIGEN_HAS_CXX11) || !((!EIGEN_COMP_GNUC) || EIGEN_COMP_GNUC>=48)
-template<typename T> struct aseq_negate {};
-
-template<> struct aseq_negate<Index> {
-  typedef Index type;
-};
-
-template<int N> struct aseq_negate<FixedInt<N> > {
-  typedef FixedInt<-N> type;
-};
-
-// Compilation error in the following case:
-template<> struct aseq_negate<FixedInt<DynamicIndex> > {};
-
-template<typename FirstType,typename SizeType,typename IncrType,
-         bool FirstIsSymbolic=Symbolic::is_symbolic<FirstType>::value,
-         bool SizeIsSymbolic =Symbolic::is_symbolic<SizeType>::value>
-struct aseq_reverse_first_type {
-  typedef Index type;
-};
-
-template<typename FirstType,typename SizeType,typename IncrType>
-struct aseq_reverse_first_type<FirstType,SizeType,IncrType,true,true> {
-  typedef Symbolic::AddExpr<FirstType,
-                            Symbolic::ProductExpr<Symbolic::AddExpr<SizeType,Symbolic::ValueExpr<FixedInt<-1> > >,
-                                                  Symbolic::ValueExpr<IncrType> >
-                           > type;
-};
-
-template<typename SizeType,typename IncrType,typename EnableIf = void>
-struct aseq_reverse_first_type_aux {
-  typedef Index type;
-};
-
-template<typename SizeType,typename IncrType>
-struct aseq_reverse_first_type_aux<SizeType,IncrType,typename internal::enable_if<bool((SizeType::value+IncrType::value)|0x1)>::type> {
-  typedef FixedInt<(SizeType::value-1)*IncrType::value> type;
-};
-
-template<typename FirstType,typename SizeType,typename IncrType>
-struct aseq_reverse_first_type<FirstType,SizeType,IncrType,true,false> {
-  typedef typename aseq_reverse_first_type_aux<SizeType,IncrType>::type Aux;
-  typedef Symbolic::AddExpr<FirstType,Symbolic::ValueExpr<Aux> > type;
-};
-
-template<typename FirstType,typename SizeType,typename IncrType>
-struct aseq_reverse_first_type<FirstType,SizeType,IncrType,false,true> {
-  typedef Symbolic::AddExpr<Symbolic::ProductExpr<Symbolic::AddExpr<SizeType,Symbolic::ValueExpr<FixedInt<-1> > >,
-                                                  Symbolic::ValueExpr<IncrType> >,
-                            Symbolic::ValueExpr<> > type;
-};
-#endif
-
-// Helper to cleanup the type of the increment:
-template<typename T> struct cleanup_seq_incr {
-  typedef typename cleanup_index_type<T,DynamicIndex>::type type;
-};
-
-}
-
-//--------------------------------------------------------------------------------
-// seq(first,last,incr) and seqN(first,size,incr)
-//--------------------------------------------------------------------------------
-
-template<typename FirstType=Index,typename SizeType=Index,typename IncrType=internal::FixedInt<1> >
-class ArithmeticSequence;
-
-template<typename FirstType,typename SizeType,typename IncrType>
-ArithmeticSequence<typename internal::cleanup_index_type<FirstType>::type,
-                   typename internal::cleanup_index_type<SizeType>::type,
-                   typename internal::cleanup_seq_incr<IncrType>::type >
-seqN(FirstType first, SizeType size, IncrType incr);
-
-/** \class ArithmeticSequence
-  * \ingroup Core_Module
-  *
-  * This class represents an arithmetic progression \f$ a_0, a_1, a_2, ..., a_{n-1}\f$ defined by
-  * its \em first value \f$ a_0 \f$, its \em size (aka length) \em n, and the \em increment (aka stride)
-  * that is equal to \f$ a_{i+1}-a_{i}\f$ for any \em i.
-  *
-  * It is internally used as the return type of the Eigen::seq and Eigen::seqN functions, and as the input arguments
-  * of DenseBase::operator()(const RowIndices&, const ColIndices&), and most of the time this is the
-  * only way it is used.
-  *
-  * \tparam FirstType type of the first element, usually an Index,
-  *                   but internally it can be a symbolic expression
-  * \tparam SizeType type representing the size of the sequence, usually an Index
-  *                  or a compile time integral constant. Internally, it can also be a symbolic expression
-  * \tparam IncrType type of the increment, can be a runtime Index, or a compile time integral constant (default is compile-time 1)
-  *
-  * \sa Eigen::seq, Eigen::seqN, DenseBase::operator()(const RowIndices&, const ColIndices&), class IndexedView
-  */
-template<typename FirstType,typename SizeType,typename IncrType>
-class ArithmeticSequence
-{
-public:
-  ArithmeticSequence(FirstType first, SizeType size) : m_first(first), m_size(size) {}
-  ArithmeticSequence(FirstType first, SizeType size, IncrType incr) : m_first(first), m_size(size), m_incr(incr) {}
-
-  enum {
-    SizeAtCompileTime = internal::get_fixed_value<SizeType>::value,
-    IncrAtCompileTime = internal::get_fixed_value<IncrType,DynamicIndex>::value
-  };
-
-  /** \returns the size, i.e., number of elements, of the sequence */
-  Index size()  const { return m_size; }
-
-  /** \returns the first element \f$ a_0 \f$ in the sequence */
-  Index first()  const { return m_first; }
-
-  /** \returns the value \f$ a_i \f$ at index \a i in the sequence. */
-  Index operator[](Index i) const { return m_first + i * m_incr; }
-
-  const FirstType& firstObject() const { return m_first; }
-  const SizeType&  sizeObject()  const { return m_size; }
-  const IncrType&  incrObject()  const { return m_incr; }
-
-protected:
-  FirstType m_first;
-  SizeType  m_size;
-  IncrType  m_incr;
-
-public:
-
-#if EIGEN_HAS_CXX11 && ((!EIGEN_COMP_GNUC) || EIGEN_COMP_GNUC>=48)
-  auto reverse() const -> decltype(Eigen::seqN(m_first+(m_size+fix<-1>())*m_incr,m_size,-m_incr)) {
-    return seqN(m_first+(m_size+fix<-1>())*m_incr,m_size,-m_incr);
-  }
-#else
-protected:
-  typedef typename internal::aseq_negate<IncrType>::type ReverseIncrType;
-  typedef typename internal::aseq_reverse_first_type<FirstType,SizeType,IncrType>::type ReverseFirstType;
-public:
-  ArithmeticSequence<ReverseFirstType,SizeType,ReverseIncrType>
-  reverse() const {
-    return seqN(m_first+(m_size+fix<-1>())*m_incr,m_size,-m_incr);
-  }
-#endif
-};
-
-/** \returns an ArithmeticSequence starting at \a first, of length \a size, and increment \a incr
-  *
-  * \sa seqN(FirstType,SizeType), seq(FirstType,LastType,IncrType) */
-template<typename FirstType,typename SizeType,typename IncrType>
-ArithmeticSequence<typename internal::cleanup_index_type<FirstType>::type,typename internal::cleanup_index_type<SizeType>::type,typename internal::cleanup_seq_incr<IncrType>::type >
-seqN(FirstType first, SizeType size, IncrType incr)  {
-  return ArithmeticSequence<typename internal::cleanup_index_type<FirstType>::type,typename internal::cleanup_index_type<SizeType>::type,typename internal::cleanup_seq_incr<IncrType>::type>(first,size,incr);
-}
-
-/** \returns an ArithmeticSequence starting at \a first, of length \a size, and unit increment
-  *
-  * \sa seqN(FirstType,SizeType,IncrType), seq(FirstType,LastType) */
-template<typename FirstType,typename SizeType>
-ArithmeticSequence<typename internal::cleanup_index_type<FirstType>::type,typename internal::cleanup_index_type<SizeType>::type >
-seqN(FirstType first, SizeType size)  {
-  return ArithmeticSequence<typename internal::cleanup_index_type<FirstType>::type,typename internal::cleanup_index_type<SizeType>::type>(first,size);
-}
-
-#ifdef EIGEN_PARSED_BY_DOXYGEN
-
-/** \returns an ArithmeticSequence starting at \a f, up (or down) to \a l, and with positive (or negative) increment \a incr
-  *
-  * It is essentially an alias to:
-  * \code
-  * seqN(f, (l-f+incr)/incr, incr);
-  * \endcode
-  *
-  * \sa seqN(FirstType,SizeType,IncrType), seq(FirstType,LastType)
-  */
-template<typename FirstType,typename LastType, typename IncrType>
-auto seq(FirstType f, LastType l, IncrType incr);
-
-/** \returns an ArithmeticSequence starting at \a f, up (or down) to \a l, and unit increment
-  *
-  * It is essentially an alias to:
-  * \code
-  * seqN(f,l-f+1);
-  * \endcode
-  *
-  * \sa seqN(FirstType,SizeType), seq(FirstType,LastType,IncrType)
-  */
-template<typename FirstType,typename LastType>
-auto seq(FirstType f, LastType l);
-
-#else // EIGEN_PARSED_BY_DOXYGEN
-
-#if EIGEN_HAS_CXX11
-template<typename FirstType,typename LastType>
-auto seq(FirstType f, LastType l) -> decltype(seqN(typename internal::cleanup_index_type<FirstType>::type(f),
-                                                   (  typename internal::cleanup_index_type<LastType>::type(l)
-                                                    - typename internal::cleanup_index_type<FirstType>::type(f)+fix<1>())))
-{
-  return seqN(typename internal::cleanup_index_type<FirstType>::type(f),
-              (typename internal::cleanup_index_type<LastType>::type(l)
-               -typename internal::cleanup_index_type<FirstType>::type(f)+fix<1>()));
-}
-
-template<typename FirstType,typename LastType, typename IncrType>
-auto seq(FirstType f, LastType l, IncrType incr)
-  -> decltype(seqN(typename internal::cleanup_index_type<FirstType>::type(f),
-                   (   typename internal::cleanup_index_type<LastType>::type(l)
-                     - typename internal::cleanup_index_type<FirstType>::type(f)+typename internal::cleanup_seq_incr<IncrType>::type(incr)
-                   ) / typename internal::cleanup_seq_incr<IncrType>::type(incr),
-                   typename internal::cleanup_seq_incr<IncrType>::type(incr)))
-{
-  typedef typename internal::cleanup_seq_incr<IncrType>::type CleanedIncrType;
-  return seqN(typename internal::cleanup_index_type<FirstType>::type(f),
-              ( typename internal::cleanup_index_type<LastType>::type(l)
-               -typename internal::cleanup_index_type<FirstType>::type(f)+CleanedIncrType(incr)) / CleanedIncrType(incr),
-              CleanedIncrType(incr));
-}
-#else
-
-template<typename FirstType,typename LastType>
-typename internal::enable_if<!(Symbolic::is_symbolic<FirstType>::value || Symbolic::is_symbolic<LastType>::value),
-                             ArithmeticSequence<typename internal::cleanup_index_type<FirstType>::type,Index> >::type
-seq(FirstType f, LastType l)
-{
-  return seqN(typename internal::cleanup_index_type<FirstType>::type(f),
-              Index((typename internal::cleanup_index_type<LastType>::type(l)-typename internal::cleanup_index_type<FirstType>::type(f)+fix<1>())));
-}
-
-template<typename FirstTypeDerived,typename LastType>
-typename internal::enable_if<!Symbolic::is_symbolic<LastType>::value,
-    ArithmeticSequence<FirstTypeDerived, Symbolic::AddExpr<Symbolic::AddExpr<Symbolic::NegateExpr<FirstTypeDerived>,Symbolic::ValueExpr<> >,
-                                                            Symbolic::ValueExpr<internal::FixedInt<1> > > > >::type
-seq(const Symbolic::BaseExpr<FirstTypeDerived> &f, LastType l)
-{
-  return seqN(f.derived(),(typename internal::cleanup_index_type<LastType>::type(l)-f.derived()+fix<1>()));
-}
-
-template<typename FirstType,typename LastTypeDerived>
-typename internal::enable_if<!Symbolic::is_symbolic<FirstType>::value,
-    ArithmeticSequence<typename internal::cleanup_index_type<FirstType>::type,
-                        Symbolic::AddExpr<Symbolic::AddExpr<LastTypeDerived,Symbolic::ValueExpr<> >,
-                                          Symbolic::ValueExpr<internal::FixedInt<1> > > > >::type
-seq(FirstType f, const Symbolic::BaseExpr<LastTypeDerived> &l)
-{
-  return seqN(typename internal::cleanup_index_type<FirstType>::type(f),(l.derived()-typename internal::cleanup_index_type<FirstType>::type(f)+fix<1>()));
-}
-
-template<typename FirstTypeDerived,typename LastTypeDerived>
-ArithmeticSequence<FirstTypeDerived,
-                    Symbolic::AddExpr<Symbolic::AddExpr<LastTypeDerived,Symbolic::NegateExpr<FirstTypeDerived> >,Symbolic::ValueExpr<internal::FixedInt<1> > > >
-seq(const Symbolic::BaseExpr<FirstTypeDerived> &f, const Symbolic::BaseExpr<LastTypeDerived> &l)
-{
-  return seqN(f.derived(),(l.derived()-f.derived()+fix<1>()));
-}
-
-
-template<typename FirstType,typename LastType, typename IncrType>
-typename internal::enable_if<!(Symbolic::is_symbolic<FirstType>::value || Symbolic::is_symbolic<LastType>::value),
-    ArithmeticSequence<typename internal::cleanup_index_type<FirstType>::type,Index,typename internal::cleanup_seq_incr<IncrType>::type> >::type
-seq(FirstType f, LastType l, IncrType incr)
-{
-  typedef typename internal::cleanup_seq_incr<IncrType>::type CleanedIncrType;
-  return seqN(typename internal::cleanup_index_type<FirstType>::type(f),
-              Index((typename internal::cleanup_index_type<LastType>::type(l)-typename internal::cleanup_index_type<FirstType>::type(f)+CleanedIncrType(incr))/CleanedIncrType(incr)), incr);
-}
-
-template<typename FirstTypeDerived,typename LastType, typename IncrType>
-typename internal::enable_if<!Symbolic::is_symbolic<LastType>::value,
-    ArithmeticSequence<FirstTypeDerived,
-                        Symbolic::QuotientExpr<Symbolic::AddExpr<Symbolic::AddExpr<Symbolic::NegateExpr<FirstTypeDerived>,
-                                                                                   Symbolic::ValueExpr<> >,
-                                                                 Symbolic::ValueExpr<typename internal::cleanup_seq_incr<IncrType>::type> >,
-                                              Symbolic::ValueExpr<typename internal::cleanup_seq_incr<IncrType>::type> >,
-                        typename internal::cleanup_seq_incr<IncrType>::type> >::type
-seq(const Symbolic::BaseExpr<FirstTypeDerived> &f, LastType l, IncrType incr)
-{
-  typedef typename internal::cleanup_seq_incr<IncrType>::type CleanedIncrType;
-  return seqN(f.derived(),(typename internal::cleanup_index_type<LastType>::type(l)-f.derived()+CleanedIncrType(incr))/CleanedIncrType(incr), incr);
-}
-
-template<typename FirstType,typename LastTypeDerived, typename IncrType>
-typename internal::enable_if<!Symbolic::is_symbolic<FirstType>::value,
-    ArithmeticSequence<typename internal::cleanup_index_type<FirstType>::type,
-                        Symbolic::QuotientExpr<Symbolic::AddExpr<Symbolic::AddExpr<LastTypeDerived,Symbolic::ValueExpr<> >,
-                                                                 Symbolic::ValueExpr<typename internal::cleanup_seq_incr<IncrType>::type> >,
-                                               Symbolic::ValueExpr<typename internal::cleanup_seq_incr<IncrType>::type> >,
-                        typename internal::cleanup_seq_incr<IncrType>::type> >::type
-seq(FirstType f, const Symbolic::BaseExpr<LastTypeDerived> &l, IncrType incr)
-{
-  typedef typename internal::cleanup_seq_incr<IncrType>::type CleanedIncrType;
-  return seqN(typename internal::cleanup_index_type<FirstType>::type(f),
-              (l.derived()-typename internal::cleanup_index_type<FirstType>::type(f)+CleanedIncrType(incr))/CleanedIncrType(incr), incr);
-}
-
-template<typename FirstTypeDerived,typename LastTypeDerived, typename IncrType>
-ArithmeticSequence<FirstTypeDerived,
-                    Symbolic::QuotientExpr<Symbolic::AddExpr<Symbolic::AddExpr<LastTypeDerived,
-                                                                               Symbolic::NegateExpr<FirstTypeDerived> >,
-                                                             Symbolic::ValueExpr<typename internal::cleanup_seq_incr<IncrType>::type> >,
-                                          Symbolic::ValueExpr<typename internal::cleanup_seq_incr<IncrType>::type> >,
-                    typename internal::cleanup_seq_incr<IncrType>::type>
-seq(const Symbolic::BaseExpr<FirstTypeDerived> &f, const Symbolic::BaseExpr<LastTypeDerived> &l, IncrType incr)
-{
-  typedef typename internal::cleanup_seq_incr<IncrType>::type CleanedIncrType;
-  return seqN(f.derived(),(l.derived()-f.derived()+CleanedIncrType(incr))/CleanedIncrType(incr), incr);
-}
-#endif
-
-#endif // EIGEN_PARSED_BY_DOXYGEN
-
-namespace internal {
-
-// Convert a symbolic span into a usable one (i.e., remove last/end "keywords")
-template<typename T>
-struct make_size_type {
-  typedef typename internal::conditional<Symbolic::is_symbolic<T>::value, Index, T>::type type;
-};
-
-template<typename FirstType,typename SizeType,typename IncrType,int XprSize>
-struct IndexedViewCompatibleType<ArithmeticSequence<FirstType,SizeType,IncrType>, XprSize> {
-  typedef ArithmeticSequence<Index,typename make_size_type<SizeType>::type,IncrType> type;
-};
-
-template<typename FirstType,typename SizeType,typename IncrType>
-ArithmeticSequence<Index,typename make_size_type<SizeType>::type,IncrType>
-makeIndexedViewCompatible(const ArithmeticSequence<FirstType,SizeType,IncrType>& ids, Index size,SpecializedType) {
-  return ArithmeticSequence<Index,typename make_size_type<SizeType>::type,IncrType>(
-            eval_expr_given_size(ids.firstObject(),size),eval_expr_given_size(ids.sizeObject(),size),ids.incrObject());
-}
-
-template<typename FirstType,typename SizeType,typename IncrType>
-struct get_compile_time_incr<ArithmeticSequence<FirstType,SizeType,IncrType> > {
-  enum { value = get_fixed_value<IncrType,DynamicIndex>::value };
-};
-
-} // end namespace internal
-
-} // end namespace Eigen
-
-#endif // EIGEN_ARITHMETIC_SEQUENCE_H
diff --git a/eigen/Eigen/src/Core/Array.h b/eigen/Eigen/src/Core/Array.h
index 0d34269..e10020d 100644
--- a/eigen/Eigen/src/Core/Array.h
+++ b/eigen/Eigen/src/Core/Array.h
@@ -231,10 +231,16 @@ class Array
             : Base(other)
     { }
 
+  private:
+    struct PrivateType {};
+  public:
+
     /** \sa MatrixBase::operator=(const EigenBase<OtherDerived>&) */
     template<typename OtherDerived>
     EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE Array(const EigenBase<OtherDerived> &other)
+    EIGEN_STRONG_INLINE Array(const EigenBase<OtherDerived> &other,
+                              typename internal::enable_if<internal::is_convertible<typename OtherDerived::Scalar,Scalar>::value,
+                                                           PrivateType>::type = PrivateType())
       : Base(other.derived())
     { }
 
diff --git a/eigen/Eigen/src/Core/ArrayBase.h b/eigen/Eigen/src/Core/ArrayBase.h
index 9da960f..3dbc708 100644
--- a/eigen/Eigen/src/Core/ArrayBase.h
+++ b/eigen/Eigen/src/Core/ArrayBase.h
@@ -69,7 +69,6 @@ template<typename Derived> class ArrayBase
     using Base::coeff;
     using Base::coeffRef;
     using Base::lazyAssign;
-    using Base::operator-;
     using Base::operator=;
     using Base::operator+=;
     using Base::operator-=;
@@ -89,6 +88,7 @@ template<typename Derived> class ArrayBase
 
 #define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::ArrayBase
 #define EIGEN_DOC_UNARY_ADDONS(X,Y)
+#   include "../plugins/CommonCwiseUnaryOps.h"
 #   include "../plugins/MatrixCwiseUnaryOps.h"
 #   include "../plugins/ArrayCwiseUnaryOps.h"
 #   include "../plugins/CommonCwiseBinaryOps.h"
diff --git a/eigen/Eigen/src/Core/ArrayWrapper.h b/eigen/Eigen/src/Core/ArrayWrapper.h
index a04521a..688aadd 100644
--- a/eigen/Eigen/src/Core/ArrayWrapper.h
+++ b/eigen/Eigen/src/Core/ArrayWrapper.h
@@ -32,7 +32,8 @@ struct traits<ArrayWrapper<ExpressionType> >
   // Let's remove NestByRefBit
   enum {
     Flags0 = traits<typename remove_all<typename ExpressionType::Nested>::type >::Flags,
-    Flags = Flags0 & ~NestByRefBit
+    LvalueBitFlag = is_lvalue<ExpressionType>::value ? LvalueBit : 0,
+    Flags = (Flags0 & ~(NestByRefBit | LvalueBit)) | LvalueBitFlag
   };
 };
 }
@@ -129,7 +130,8 @@ struct traits<MatrixWrapper<ExpressionType> >
   // Let's remove NestByRefBit
   enum {
     Flags0 = traits<typename remove_all<typename ExpressionType::Nested>::type >::Flags,
-    Flags = Flags0 & ~NestByRefBit
+    LvalueBitFlag = is_lvalue<ExpressionType>::value ? LvalueBit : 0,
+    Flags = (Flags0 & ~(NestByRefBit | LvalueBit)) | LvalueBitFlag
   };
 };
 }
diff --git a/eigen/Eigen/src/Core/Assign.h b/eigen/Eigen/src/Core/Assign.h
index 655412e..53806ba 100644
--- a/eigen/Eigen/src/Core/Assign.h
+++ b/eigen/Eigen/src/Core/Assign.h
@@ -16,7 +16,7 @@ namespace Eigen {
 
 template<typename Derived>
 template<typename OtherDerived>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>
+EIGEN_STRONG_INLINE Derived& DenseBase<Derived>
   ::lazyAssign(const DenseBase<OtherDerived>& other)
 {
   enum{
diff --git a/eigen/Eigen/src/Core/BooleanRedux.h b/eigen/Eigen/src/Core/BooleanRedux.h
index ccf5190..8409d87 100644
--- a/eigen/Eigen/src/Core/BooleanRedux.h
+++ b/eigen/Eigen/src/Core/BooleanRedux.h
@@ -14,54 +14,56 @@ namespace Eigen {
 
 namespace internal {
 
-template<typename Derived, int UnrollCount, int Rows>
+template<typename Derived, int UnrollCount>
 struct all_unroller
 {
+  typedef typename Derived::ExpressionTraits Traits;
   enum {
-    col = (UnrollCount-1) / Rows,
-    row = (UnrollCount-1) % Rows
+    col = (UnrollCount-1) / Traits::RowsAtCompileTime,
+    row = (UnrollCount-1) % Traits::RowsAtCompileTime
   };
 
   static inline bool run(const Derived &mat)
   {
-    return all_unroller<Derived, UnrollCount-1, Rows>::run(mat) && mat.coeff(row, col);
+    return all_unroller<Derived, UnrollCount-1>::run(mat) && mat.coeff(row, col);
   }
 };
 
-template<typename Derived, int Rows>
-struct all_unroller<Derived, 0, Rows>
+template<typename Derived>
+struct all_unroller<Derived, 0>
 {
   static inline bool run(const Derived &/*mat*/) { return true; }
 };
 
-template<typename Derived, int Rows>
-struct all_unroller<Derived, Dynamic, Rows>
+template<typename Derived>
+struct all_unroller<Derived, Dynamic>
 {
   static inline bool run(const Derived &) { return false; }
 };
 
-template<typename Derived, int UnrollCount, int Rows>
+template<typename Derived, int UnrollCount>
 struct any_unroller
 {
+  typedef typename Derived::ExpressionTraits Traits;
   enum {
-    col = (UnrollCount-1) / Rows,
-    row = (UnrollCount-1) % Rows
+    col = (UnrollCount-1) / Traits::RowsAtCompileTime,
+    row = (UnrollCount-1) % Traits::RowsAtCompileTime
   };
   
   static inline bool run(const Derived &mat)
   {
-    return any_unroller<Derived, UnrollCount-1, Rows>::run(mat) || mat.coeff(row, col);
+    return any_unroller<Derived, UnrollCount-1>::run(mat) || mat.coeff(row, col);
   }
 };
 
-template<typename Derived, int Rows>
-struct any_unroller<Derived, 0, Rows>
+template<typename Derived>
+struct any_unroller<Derived, 0>
 {
   static inline bool run(const Derived & /*mat*/) { return false; }
 };
 
-template<typename Derived, int Rows>
-struct any_unroller<Derived, Dynamic, Rows>
+template<typename Derived>
+struct any_unroller<Derived, Dynamic>
 {
   static inline bool run(const Derived &) { return false; }
 };
@@ -76,7 +78,7 @@ struct any_unroller<Derived, Dynamic, Rows>
   * \sa any(), Cwise::operator<()
   */
 template<typename Derived>
-EIGEN_DEVICE_FUNC inline bool DenseBase<Derived>::all() const
+inline bool DenseBase<Derived>::all() const
 {
   typedef internal::evaluator<Derived> Evaluator;
   enum {
@@ -85,7 +87,7 @@ EIGEN_DEVICE_FUNC inline bool DenseBase<Derived>::all() const
   };
   Evaluator evaluator(derived());
   if(unroll)
-    return internal::all_unroller<Evaluator, unroll ? int(SizeAtCompileTime) : Dynamic, internal::traits<Derived>::RowsAtCompileTime>::run(evaluator);
+    return internal::all_unroller<Evaluator, unroll ? int(SizeAtCompileTime) : Dynamic>::run(evaluator);
   else
   {
     for(Index j = 0; j < cols(); ++j)
@@ -100,7 +102,7 @@ EIGEN_DEVICE_FUNC inline bool DenseBase<Derived>::all() const
   * \sa all()
   */
 template<typename Derived>
-EIGEN_DEVICE_FUNC inline bool DenseBase<Derived>::any() const
+inline bool DenseBase<Derived>::any() const
 {
   typedef internal::evaluator<Derived> Evaluator;
   enum {
@@ -109,7 +111,7 @@ EIGEN_DEVICE_FUNC inline bool DenseBase<Derived>::any() const
   };
   Evaluator evaluator(derived());
   if(unroll)
-    return internal::any_unroller<Evaluator, unroll ? int(SizeAtCompileTime) : Dynamic, internal::traits<Derived>::RowsAtCompileTime>::run(evaluator);
+    return internal::any_unroller<Evaluator, unroll ? int(SizeAtCompileTime) : Dynamic>::run(evaluator);
   else
   {
     for(Index j = 0; j < cols(); ++j)
@@ -124,7 +126,7 @@ EIGEN_DEVICE_FUNC inline bool DenseBase<Derived>::any() const
   * \sa all(), any()
   */
 template<typename Derived>
-EIGEN_DEVICE_FUNC inline Eigen::Index DenseBase<Derived>::count() const
+inline Eigen::Index DenseBase<Derived>::count() const
 {
   return derived().template cast<bool>().template cast<Index>().sum();
 }
diff --git a/eigen/Eigen/src/Core/CommaInitializer.h b/eigen/Eigen/src/Core/CommaInitializer.h
index 35fdbb8..d218e98 100644
--- a/eigen/Eigen/src/Core/CommaInitializer.h
+++ b/eigen/Eigen/src/Core/CommaInitializer.h
@@ -141,7 +141,7 @@ struct CommaInitializer
   * \sa CommaInitializer::finished(), class CommaInitializer
   */
 template<typename Derived>
-EIGEN_DEVICE_FUNC inline CommaInitializer<Derived> DenseBase<Derived>::operator<< (const Scalar& s)
+inline CommaInitializer<Derived> DenseBase<Derived>::operator<< (const Scalar& s)
 {
   return CommaInitializer<Derived>(*static_cast<Derived*>(this), s);
 }
@@ -149,7 +149,7 @@ EIGEN_DEVICE_FUNC inline CommaInitializer<Derived> DenseBase<Derived>::operator<
 /** \sa operator<<(const Scalar&) */
 template<typename Derived>
 template<typename OtherDerived>
-EIGEN_DEVICE_FUNC inline CommaInitializer<Derived>
+inline CommaInitializer<Derived>
 DenseBase<Derived>::operator<<(const DenseBase<OtherDerived>& other)
 {
   return CommaInitializer<Derived>(*static_cast<Derived *>(this), other);
diff --git a/eigen/Eigen/src/Core/CoreEvaluators.h b/eigen/Eigen/src/Core/CoreEvaluators.h
index 15b361b..f7c1eff 100644
--- a/eigen/Eigen/src/Core/CoreEvaluators.h
+++ b/eigen/Eigen/src/Core/CoreEvaluators.h
@@ -106,7 +106,7 @@ struct evaluator<const T>
 // ---------- base class for all evaluators ----------
 
 template<typename ExpressionType>
-struct evaluator_base
+struct evaluator_base : public noncopyable
 {
   // TODO that's not very nice to have to propagate all these traits. They are currently only needed to handle outer,inner indices.
   typedef traits<ExpressionType> ExpressionTraits;
@@ -114,14 +114,6 @@ struct evaluator_base
   enum {
     Alignment = 0
   };
-  // noncopyable:
-  // Don't make this class inherit noncopyable as this kills EBO (Empty Base Optimization)
-  // and make complex evaluator much larger than then should do.
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE evaluator_base() {}
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ~evaluator_base() {}
-private:
-  EIGEN_DEVICE_FUNC evaluator_base(const evaluator_base&);
-  EIGEN_DEVICE_FUNC const evaluator_base& operator=(const evaluator_base&);
 };
 
 // -------------------- Matrix and Array --------------------
@@ -131,27 +123,6 @@ private:
 // Here we directly specialize evaluator. This is not really a unary expression, and it is, by definition, dense,
 // so no need for more sophisticated dispatching.
 
-// this helper permits to completely eliminate m_outerStride if it is known at compiletime.
-template<typename Scalar,int OuterStride> class plainobjectbase_evaluator_data {
-public:
-  EIGEN_DEVICE_FUNC plainobjectbase_evaluator_data(const Scalar* ptr, Index outerStride) : data(ptr)
-  {
-    EIGEN_ONLY_USED_FOR_DEBUG(outerStride);
-    eigen_internal_assert(outerStride==OuterStride);
-  }
-  EIGEN_DEVICE_FUNC Index outerStride() const { return OuterStride; }
-  const Scalar *data;
-};
-
-template<typename Scalar> class plainobjectbase_evaluator_data<Scalar,Dynamic> {
-public:
-  EIGEN_DEVICE_FUNC plainobjectbase_evaluator_data(const Scalar* ptr, Index outerStride) : data(ptr), m_outerStride(outerStride) {}
-  EIGEN_DEVICE_FUNC Index outerStride() const { return m_outerStride; }
-  const Scalar *data;
-protected:
-  Index m_outerStride;
-};
-
 template<typename Derived>
 struct evaluator<PlainObjectBase<Derived> >
   : evaluator_base<Derived>
@@ -170,21 +141,18 @@ struct evaluator<PlainObjectBase<Derived> >
     Flags = traits<Derived>::EvaluatorFlags,
     Alignment = traits<Derived>::Alignment
   };
-  enum {
-    // We do not need to know the outer stride for vectors
-    OuterStrideAtCompileTime = IsVectorAtCompileTime  ? 0
-                                                      : int(IsRowMajor) ? ColsAtCompileTime
-                                                                        : RowsAtCompileTime
-  };
-
+  
   EIGEN_DEVICE_FUNC evaluator()
-    : m_d(0,OuterStrideAtCompileTime)
+    : m_data(0),
+      m_outerStride(IsVectorAtCompileTime  ? 0 
+                                           : int(IsRowMajor) ? ColsAtCompileTime 
+                                           : RowsAtCompileTime)
   {
     EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
   }
-
+  
   EIGEN_DEVICE_FUNC explicit evaluator(const PlainObjectType& m)
-    : m_d(m.data(),IsVectorAtCompileTime ? 0 : m.outerStride())
+    : m_data(m.data()), m_outerStride(IsVectorAtCompileTime ? 0 : m.outerStride()) 
   {
     EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
   }
@@ -193,30 +161,30 @@ struct evaluator<PlainObjectBase<Derived> >
   CoeffReturnType coeff(Index row, Index col) const
   {
     if (IsRowMajor)
-      return m_d.data[row * m_d.outerStride() + col];
+      return m_data[row * m_outerStride.value() + col];
     else
-      return m_d.data[row + col * m_d.outerStride()];
+      return m_data[row + col * m_outerStride.value()];
   }
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   CoeffReturnType coeff(Index index) const
   {
-    return m_d.data[index];
+    return m_data[index];
   }
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   Scalar& coeffRef(Index row, Index col)
   {
     if (IsRowMajor)
-      return const_cast<Scalar*>(m_d.data)[row * m_d.outerStride() + col];
+      return const_cast<Scalar*>(m_data)[row * m_outerStride.value() + col];
     else
-      return const_cast<Scalar*>(m_d.data)[row + col * m_d.outerStride()];
+      return const_cast<Scalar*>(m_data)[row + col * m_outerStride.value()];
   }
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   Scalar& coeffRef(Index index)
   {
-    return const_cast<Scalar*>(m_d.data)[index];
+    return const_cast<Scalar*>(m_data)[index];
   }
 
   template<int LoadMode, typename PacketType>
@@ -224,16 +192,16 @@ struct evaluator<PlainObjectBase<Derived> >
   PacketType packet(Index row, Index col) const
   {
     if (IsRowMajor)
-      return ploadt<PacketType, LoadMode>(m_d.data + row * m_d.outerStride() + col);
+      return ploadt<PacketType, LoadMode>(m_data + row * m_outerStride.value() + col);
     else
-      return ploadt<PacketType, LoadMode>(m_d.data + row + col * m_d.outerStride());
+      return ploadt<PacketType, LoadMode>(m_data + row + col * m_outerStride.value());
   }
 
   template<int LoadMode, typename PacketType>
   EIGEN_STRONG_INLINE
   PacketType packet(Index index) const
   {
-    return ploadt<PacketType, LoadMode>(m_d.data + index);
+    return ploadt<PacketType, LoadMode>(m_data + index);
   }
 
   template<int StoreMode,typename PacketType>
@@ -242,22 +210,26 @@ struct evaluator<PlainObjectBase<Derived> >
   {
     if (IsRowMajor)
       return pstoret<Scalar, PacketType, StoreMode>
-	            (const_cast<Scalar*>(m_d.data) + row * m_d.outerStride() + col, x);
+	            (const_cast<Scalar*>(m_data) + row * m_outerStride.value() + col, x);
     else
       return pstoret<Scalar, PacketType, StoreMode>
-                    (const_cast<Scalar*>(m_d.data) + row + col * m_d.outerStride(), x);
+                    (const_cast<Scalar*>(m_data) + row + col * m_outerStride.value(), x);
   }
 
   template<int StoreMode, typename PacketType>
   EIGEN_STRONG_INLINE
   void writePacket(Index index, const PacketType& x)
   {
-    return pstoret<Scalar, PacketType, StoreMode>(const_cast<Scalar*>(m_d.data) + index, x);
+    return pstoret<Scalar, PacketType, StoreMode>(const_cast<Scalar*>(m_data) + index, x);
   }
 
 protected:
+  const Scalar *m_data;
 
-  plainobjectbase_evaluator_data<Scalar,OuterStrideAtCompileTime> m_d;
+  // We do not need to know the outer stride for vectors
+  variable_if_dynamic<Index, IsVectorAtCompileTime  ? 0 
+                                                    : int(IsRowMajor) ? ColsAtCompileTime 
+                                                    : RowsAtCompileTime> m_outerStride;
 };
 
 template<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>
@@ -555,7 +527,9 @@ struct unary_evaluator<CwiseUnaryOp<UnaryOp, ArgType>, IndexBased >
   };
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  explicit unary_evaluator(const XprType& op) : m_d(op)
+  explicit unary_evaluator(const XprType& op)
+    : m_functor(op.functor()), 
+      m_argImpl(op.nestedExpression()) 
   {
     EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits<UnaryOp>::Cost);
     EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
@@ -566,43 +540,32 @@ struct unary_evaluator<CwiseUnaryOp<UnaryOp, ArgType>, IndexBased >
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   CoeffReturnType coeff(Index row, Index col) const
   {
-    return m_d.func()(m_d.argImpl.coeff(row, col));
+    return m_functor(m_argImpl.coeff(row, col));
   }
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   CoeffReturnType coeff(Index index) const
   {
-    return m_d.func()(m_d.argImpl.coeff(index));
+    return m_functor(m_argImpl.coeff(index));
   }
 
   template<int LoadMode, typename PacketType>
   EIGEN_STRONG_INLINE
   PacketType packet(Index row, Index col) const
   {
-    return m_d.func().packetOp(m_d.argImpl.template packet<LoadMode, PacketType>(row, col));
+    return m_functor.packetOp(m_argImpl.template packet<LoadMode, PacketType>(row, col));
   }
 
   template<int LoadMode, typename PacketType>
   EIGEN_STRONG_INLINE
   PacketType packet(Index index) const
   {
-    return m_d.func().packetOp(m_d.argImpl.template packet<LoadMode, PacketType>(index));
+    return m_functor.packetOp(m_argImpl.template packet<LoadMode, PacketType>(index));
   }
 
 protected:
-
-  // this helper permits to completely eliminate the functor if it is empty
-  class Data : private UnaryOp
-  {
-  public:
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    Data(const XprType& xpr) : UnaryOp(xpr.functor()), argImpl(xpr.nestedExpression()) {}
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const UnaryOp& func() const { return static_cast<const UnaryOp&>(*this); }
-    evaluator<ArgType> argImpl;
-  };
-
-  Data m_d;
+  const UnaryOp m_functor;
+  evaluator<ArgType> m_argImpl;
 };
 
 // -------------------- CwiseTernaryOp --------------------
@@ -646,7 +609,11 @@ struct ternary_evaluator<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3>, IndexBased
         evaluator<Arg3>::Alignment)
   };
 
-  EIGEN_DEVICE_FUNC explicit ternary_evaluator(const XprType& xpr) : m_d(xpr)
+  EIGEN_DEVICE_FUNC explicit ternary_evaluator(const XprType& xpr)
+    : m_functor(xpr.functor()),
+      m_arg1Impl(xpr.arg1()), 
+      m_arg2Impl(xpr.arg2()), 
+      m_arg3Impl(xpr.arg3())  
   {
     EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits<TernaryOp>::Cost);
     EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
@@ -657,47 +624,38 @@ struct ternary_evaluator<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3>, IndexBased
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   CoeffReturnType coeff(Index row, Index col) const
   {
-    return m_d.func()(m_d.arg1Impl.coeff(row, col), m_d.arg2Impl.coeff(row, col), m_d.arg3Impl.coeff(row, col));
+    return m_functor(m_arg1Impl.coeff(row, col), m_arg2Impl.coeff(row, col), m_arg3Impl.coeff(row, col));
   }
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   CoeffReturnType coeff(Index index) const
   {
-    return m_d.func()(m_d.arg1Impl.coeff(index), m_d.arg2Impl.coeff(index), m_d.arg3Impl.coeff(index));
+    return m_functor(m_arg1Impl.coeff(index), m_arg2Impl.coeff(index), m_arg3Impl.coeff(index));
   }
 
   template<int LoadMode, typename PacketType>
   EIGEN_STRONG_INLINE
   PacketType packet(Index row, Index col) const
   {
-    return m_d.func().packetOp(m_d.arg1Impl.template packet<LoadMode,PacketType>(row, col),
-                               m_d.arg2Impl.template packet<LoadMode,PacketType>(row, col),
-                               m_d.arg3Impl.template packet<LoadMode,PacketType>(row, col));
+    return m_functor.packetOp(m_arg1Impl.template packet<LoadMode,PacketType>(row, col),
+                              m_arg2Impl.template packet<LoadMode,PacketType>(row, col),
+                              m_arg3Impl.template packet<LoadMode,PacketType>(row, col));
   }
 
   template<int LoadMode, typename PacketType>
   EIGEN_STRONG_INLINE
   PacketType packet(Index index) const
   {
-    return m_d.func().packetOp(m_d.arg1Impl.template packet<LoadMode,PacketType>(index),
-                               m_d.arg2Impl.template packet<LoadMode,PacketType>(index),
-                               m_d.arg3Impl.template packet<LoadMode,PacketType>(index));
+    return m_functor.packetOp(m_arg1Impl.template packet<LoadMode,PacketType>(index),
+                              m_arg2Impl.template packet<LoadMode,PacketType>(index),
+                              m_arg3Impl.template packet<LoadMode,PacketType>(index));
   }
 
 protected:
-  // this helper permits to completely eliminate the functor if it is empty
-  struct Data : private TernaryOp
-  {
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    Data(const XprType& xpr) : TernaryOp(xpr.functor()), arg1Impl(xpr.arg1()), arg2Impl(xpr.arg2()), arg3Impl(xpr.arg3()) {}
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TernaryOp& func() const { return static_cast<const TernaryOp&>(*this); }
-    evaluator<Arg1> arg1Impl;
-    evaluator<Arg2> arg2Impl;
-    evaluator<Arg3> arg3Impl;
-  };
-
-  Data m_d;
+  const TernaryOp m_functor;
+  evaluator<Arg1> m_arg1Impl;
+  evaluator<Arg2> m_arg2Impl;
+  evaluator<Arg3> m_arg3Impl;
 };
 
 // -------------------- CwiseBinaryOp --------------------
@@ -738,7 +696,10 @@ struct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IndexBased, IndexBase
     Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator<Lhs>::Alignment,evaluator<Rhs>::Alignment)
   };
 
-  EIGEN_DEVICE_FUNC explicit binary_evaluator(const XprType& xpr) : m_d(xpr)
+  EIGEN_DEVICE_FUNC explicit binary_evaluator(const XprType& xpr)
+    : m_functor(xpr.functor()),
+      m_lhsImpl(xpr.lhs()), 
+      m_rhsImpl(xpr.rhs())  
   {
     EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits<BinaryOp>::Cost);
     EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
@@ -749,45 +710,35 @@ struct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IndexBased, IndexBase
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   CoeffReturnType coeff(Index row, Index col) const
   {
-    return m_d.func()(m_d.lhsImpl.coeff(row, col), m_d.rhsImpl.coeff(row, col));
+    return m_functor(m_lhsImpl.coeff(row, col), m_rhsImpl.coeff(row, col));
   }
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   CoeffReturnType coeff(Index index) const
   {
-    return m_d.func()(m_d.lhsImpl.coeff(index), m_d.rhsImpl.coeff(index));
+    return m_functor(m_lhsImpl.coeff(index), m_rhsImpl.coeff(index));
   }
 
   template<int LoadMode, typename PacketType>
   EIGEN_STRONG_INLINE
   PacketType packet(Index row, Index col) const
   {
-    return m_d.func().packetOp(m_d.lhsImpl.template packet<LoadMode,PacketType>(row, col),
-                               m_d.rhsImpl.template packet<LoadMode,PacketType>(row, col));
+    return m_functor.packetOp(m_lhsImpl.template packet<LoadMode,PacketType>(row, col),
+                              m_rhsImpl.template packet<LoadMode,PacketType>(row, col));
   }
 
   template<int LoadMode, typename PacketType>
   EIGEN_STRONG_INLINE
   PacketType packet(Index index) const
   {
-    return m_d.func().packetOp(m_d.lhsImpl.template packet<LoadMode,PacketType>(index),
-                               m_d.rhsImpl.template packet<LoadMode,PacketType>(index));
+    return m_functor.packetOp(m_lhsImpl.template packet<LoadMode,PacketType>(index),
+                              m_rhsImpl.template packet<LoadMode,PacketType>(index));
   }
 
 protected:
-
-  // this helper permits to completely eliminate the functor if it is empty
-  struct Data : private BinaryOp
-  {
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    Data(const XprType& xpr) : BinaryOp(xpr.functor()), lhsImpl(xpr.lhs()), rhsImpl(xpr.rhs()) {}
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const BinaryOp& func() const { return static_cast<const BinaryOp&>(*this); }
-    evaluator<Lhs> lhsImpl;
-    evaluator<Rhs> rhsImpl;
-  };
-
-  Data m_d;
+  const BinaryOp m_functor;
+  evaluator<Lhs> m_lhsImpl;
+  evaluator<Rhs> m_rhsImpl;
 };
 
 // -------------------- CwiseUnaryView --------------------
@@ -806,7 +757,9 @@ struct unary_evaluator<CwiseUnaryView<UnaryOp, ArgType>, IndexBased>
     Alignment = 0 // FIXME it is not very clear why alignment is necessarily lost...
   };
 
-  EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op) : m_d(op)
+  EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op)
+    : m_unaryOp(op.functor()), 
+      m_argImpl(op.nestedExpression()) 
   {
     EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits<UnaryOp>::Cost);
     EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
@@ -818,40 +771,30 @@ struct unary_evaluator<CwiseUnaryView<UnaryOp, ArgType>, IndexBased>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   CoeffReturnType coeff(Index row, Index col) const
   {
-    return m_d.func()(m_d.argImpl.coeff(row, col));
+    return m_unaryOp(m_argImpl.coeff(row, col));
   }
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   CoeffReturnType coeff(Index index) const
   {
-    return m_d.func()(m_d.argImpl.coeff(index));
+    return m_unaryOp(m_argImpl.coeff(index));
   }
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   Scalar& coeffRef(Index row, Index col)
   {
-    return m_d.func()(m_d.argImpl.coeffRef(row, col));
+    return m_unaryOp(m_argImpl.coeffRef(row, col));
   }
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
   Scalar& coeffRef(Index index)
   {
-    return m_d.func()(m_d.argImpl.coeffRef(index));
+    return m_unaryOp(m_argImpl.coeffRef(index));
   }
 
 protected:
-
-  // this helper permits to completely eliminate the functor if it is empty
-  struct Data : private UnaryOp
-  {
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    Data(const XprType& xpr) : UnaryOp(xpr.functor()), argImpl(xpr.nestedExpression()) {}
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const UnaryOp& func() const { return static_cast<const UnaryOp&>(*this); }
-    evaluator<ArgType> argImpl;
-  };
-
-  Data m_d;
+  const UnaryOp m_unaryOp;
+  evaluator<ArgType> m_argImpl;
 };
 
 // -------------------- Map --------------------
diff --git a/eigen/Eigen/src/Core/CoreIterators.h b/eigen/Eigen/src/Core/CoreIterators.h
index b967196..4eb42b9 100644
--- a/eigen/Eigen/src/Core/CoreIterators.h
+++ b/eigen/Eigen/src/Core/CoreIterators.h
@@ -48,11 +48,6 @@ public:
     * Explicit zeros are not skipped over. To skip explicit zeros, see class SparseView
     */
   EIGEN_STRONG_INLINE InnerIterator& operator++()   { m_iter.operator++(); return *this; }
-  EIGEN_STRONG_INLINE InnerIterator& operator+=(Index i) { m_iter.operator+=(i); return *this; }
-  EIGEN_STRONG_INLINE InnerIterator operator+(Index i) 
-  { InnerIterator result(*this); result+=i; return result; }
-    
-
   /// \returns the column or row index of the current coefficient.
   EIGEN_STRONG_INLINE Index index() const           { return m_iter.index(); }
   /// \returns the row index of the current coefficient.
diff --git a/eigen/Eigen/src/Core/CwiseBinaryOp.h b/eigen/Eigen/src/Core/CwiseBinaryOp.h
index bf2632d..a36765e 100644
--- a/eigen/Eigen/src/Core/CwiseBinaryOp.h
+++ b/eigen/Eigen/src/Core/CwiseBinaryOp.h
@@ -158,7 +158,7 @@ public:
   */
 template<typename Derived>
 template<typename OtherDerived>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived &
+EIGEN_STRONG_INLINE Derived &
 MatrixBase<Derived>::operator-=(const MatrixBase<OtherDerived> &other)
 {
   call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar,typename OtherDerived::Scalar>());
@@ -171,7 +171,7 @@ MatrixBase<Derived>::operator-=(const MatrixBase<OtherDerived> &other)
   */
 template<typename Derived>
 template<typename OtherDerived>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived &
+EIGEN_STRONG_INLINE Derived &
 MatrixBase<Derived>::operator+=(const MatrixBase<OtherDerived>& other)
 {
   call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar,typename OtherDerived::Scalar>());
@@ -181,3 +181,4 @@ MatrixBase<Derived>::operator+=(const MatrixBase<OtherDerived>& other)
 } // end namespace Eigen
 
 #endif // EIGEN_CWISE_BINARY_OP_H
+
diff --git a/eigen/Eigen/src/Core/CwiseNullaryOp.h b/eigen/Eigen/src/Core/CwiseNullaryOp.h
index 144608e..ddd607e 100644
--- a/eigen/Eigen/src/Core/CwiseNullaryOp.h
+++ b/eigen/Eigen/src/Core/CwiseNullaryOp.h
@@ -131,7 +131,7 @@ DenseBase<Derived>::NullaryExpr(Index rows, Index cols, const CustomNullaryOp& f
   */
 template<typename Derived>
 template<typename CustomNullaryOp>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject>
+EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject>
 DenseBase<Derived>::NullaryExpr(Index size, const CustomNullaryOp& func)
 {
   EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
@@ -170,7 +170,7 @@ DenseBase<Derived>::NullaryExpr(const CustomNullaryOp& func)
   * \sa class CwiseNullaryOp
   */
 template<typename Derived>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
+EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
 DenseBase<Derived>::Constant(Index rows, Index cols, const Scalar& value)
 {
   return DenseBase<Derived>::NullaryExpr(rows, cols, internal::scalar_constant_op<Scalar>(value));
diff --git a/eigen/Eigen/src/Core/DenseBase.h b/eigen/Eigen/src/Core/DenseBase.h
index fd933ee..90066ae 100644
--- a/eigen/Eigen/src/Core/DenseBase.h
+++ b/eigen/Eigen/src/Core/DenseBase.h
@@ -570,17 +570,13 @@ template<typename Derived> class DenseBase
 #define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::DenseBase
 #define EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
 #define EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(COND)
-#define EIGEN_DOC_UNARY_ADDONS(X,Y)
-#   include "../plugins/CommonCwiseUnaryOps.h"
 #   include "../plugins/BlockMethods.h"
-#   include "../plugins/IndexedViewMethods.h"
 #   ifdef EIGEN_DENSEBASE_PLUGIN
 #     include EIGEN_DENSEBASE_PLUGIN
 #   endif
 #undef EIGEN_CURRENT_STORAGE_BASE_CLASS
 #undef EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
 #undef EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF
-#undef EIGEN_DOC_UNARY_ADDONS
 
     // disable the use of evalTo for dense objects with a nice compilation error
     template<typename Dest>
diff --git a/eigen/Eigen/src/Core/Diagonal.h b/eigen/Eigen/src/Core/Diagonal.h
index c62f5ff..49e7112 100644
--- a/eigen/Eigen/src/Core/Diagonal.h
+++ b/eigen/Eigen/src/Core/Diagonal.h
@@ -184,7 +184,7 @@ template<typename MatrixType, int _DiagIndex> class Diagonal
   *
   * \sa class Diagonal */
 template<typename Derived>
-EIGEN_DEVICE_FUNC inline typename MatrixBase<Derived>::DiagonalReturnType
+inline typename MatrixBase<Derived>::DiagonalReturnType
 MatrixBase<Derived>::diagonal()
 {
   return DiagonalReturnType(derived());
@@ -192,7 +192,7 @@ MatrixBase<Derived>::diagonal()
 
 /** This is the const version of diagonal(). */
 template<typename Derived>
-EIGEN_DEVICE_FUNC inline typename MatrixBase<Derived>::ConstDiagonalReturnType
+inline typename MatrixBase<Derived>::ConstDiagonalReturnType
 MatrixBase<Derived>::diagonal() const
 {
   return ConstDiagonalReturnType(derived());
@@ -210,7 +210,7 @@ MatrixBase<Derived>::diagonal() const
   *
   * \sa MatrixBase::diagonal(), class Diagonal */
 template<typename Derived>
-EIGEN_DEVICE_FUNC inline typename MatrixBase<Derived>::DiagonalDynamicIndexReturnType
+inline typename MatrixBase<Derived>::DiagonalDynamicIndexReturnType
 MatrixBase<Derived>::diagonal(Index index)
 {
   return DiagonalDynamicIndexReturnType(derived(), index);
@@ -218,7 +218,7 @@ MatrixBase<Derived>::diagonal(Index index)
 
 /** This is the const version of diagonal(Index). */
 template<typename Derived>
-EIGEN_DEVICE_FUNC inline typename MatrixBase<Derived>::ConstDiagonalDynamicIndexReturnType
+inline typename MatrixBase<Derived>::ConstDiagonalDynamicIndexReturnType
 MatrixBase<Derived>::diagonal(Index index) const
 {
   return ConstDiagonalDynamicIndexReturnType(derived(), index);
@@ -237,7 +237,6 @@ MatrixBase<Derived>::diagonal(Index index) const
   * \sa MatrixBase::diagonal(), class Diagonal */
 template<typename Derived>
 template<int Index_>
-EIGEN_DEVICE_FUNC
 inline typename MatrixBase<Derived>::template DiagonalIndexReturnType<Index_>::Type
 MatrixBase<Derived>::diagonal()
 {
@@ -247,7 +246,6 @@ MatrixBase<Derived>::diagonal()
 /** This is the const version of diagonal<int>(). */
 template<typename Derived>
 template<int Index_>
-EIGEN_DEVICE_FUNC
 inline typename MatrixBase<Derived>::template ConstDiagonalIndexReturnType<Index_>::Type
 MatrixBase<Derived>::diagonal() const
 {
diff --git a/eigen/Eigen/src/Core/DiagonalMatrix.h b/eigen/Eigen/src/Core/DiagonalMatrix.h
index 4e8297e..ecfdce8 100644
--- a/eigen/Eigen/src/Core/DiagonalMatrix.h
+++ b/eigen/Eigen/src/Core/DiagonalMatrix.h
@@ -44,7 +44,7 @@ class DiagonalBase : public EigenBase<Derived>
 
     EIGEN_DEVICE_FUNC
     DenseMatrixType toDenseMatrix() const { return derived(); }
-
+    
     EIGEN_DEVICE_FUNC
     inline const DiagonalVectorType& diagonal() const { return derived().diagonal(); }
     EIGEN_DEVICE_FUNC
@@ -273,7 +273,7 @@ class DiagonalWrapper
   * \sa class DiagonalWrapper, class DiagonalMatrix, diagonal(), isDiagonal()
   **/
 template<typename Derived>
-EIGEN_DEVICE_FUNC inline const DiagonalWrapper<const Derived>
+inline const DiagonalWrapper<const Derived>
 MatrixBase<Derived>::asDiagonal() const
 {
   return DiagonalWrapper<const Derived>(derived());
diff --git a/eigen/Eigen/src/Core/DiagonalProduct.h b/eigen/Eigen/src/Core/DiagonalProduct.h
index 7911d1c..d372b93 100644
--- a/eigen/Eigen/src/Core/DiagonalProduct.h
+++ b/eigen/Eigen/src/Core/DiagonalProduct.h
@@ -17,7 +17,7 @@ namespace Eigen {
   */
 template<typename Derived>
 template<typename DiagonalDerived>
-EIGEN_DEVICE_FUNC inline const Product<Derived, DiagonalDerived, LazyProduct>
+inline const Product<Derived, DiagonalDerived, LazyProduct>
 MatrixBase<Derived>::operator*(const DiagonalBase<DiagonalDerived> &a_diagonal) const
 {
   return Product<Derived, DiagonalDerived, LazyProduct>(derived(),a_diagonal.derived());
diff --git a/eigen/Eigen/src/Core/Dot.h b/eigen/Eigen/src/Core/Dot.h
index bb8e3fe..06ef18b 100644
--- a/eigen/Eigen/src/Core/Dot.h
+++ b/eigen/Eigen/src/Core/Dot.h
@@ -90,7 +90,7 @@ MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const
   * \sa dot(), norm(), lpNorm()
   */
 template<typename Derived>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::squaredNorm() const
+EIGEN_STRONG_INLINE typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::squaredNorm() const
 {
   return numext::real((*this).cwiseAbs2().sum());
 }
@@ -102,7 +102,7 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename NumTraits<typename internal::trai
   * \sa lpNorm(), dot(), squaredNorm()
   */
 template<typename Derived>
-EIGEN_DEVICE_FUNC inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::norm() const
+inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::norm() const
 {
   return numext::sqrt(squaredNorm());
 }
@@ -117,7 +117,7 @@ EIGEN_DEVICE_FUNC inline typename NumTraits<typename internal::traits<Derived>::
   * \sa norm(), normalize()
   */
 template<typename Derived>
-EIGEN_DEVICE_FUNC inline const typename MatrixBase<Derived>::PlainObject
+inline const typename MatrixBase<Derived>::PlainObject
 MatrixBase<Derived>::normalized() const
 {
   typedef typename internal::nested_eval<Derived,2>::type _Nested;
@@ -139,7 +139,7 @@ MatrixBase<Derived>::normalized() const
   * \sa norm(), normalized()
   */
 template<typename Derived>
-EIGEN_DEVICE_FUNC inline void MatrixBase<Derived>::normalize()
+inline void MatrixBase<Derived>::normalize()
 {
   RealScalar z = squaredNorm();
   // NOTE: after extensive benchmarking, this conditional does not impact performance, at least on recent x86 CPU
@@ -160,7 +160,7 @@ EIGEN_DEVICE_FUNC inline void MatrixBase<Derived>::normalize()
   * \sa stableNorm(), stableNormalize(), normalized()
   */
 template<typename Derived>
-EIGEN_DEVICE_FUNC inline const typename MatrixBase<Derived>::PlainObject
+inline const typename MatrixBase<Derived>::PlainObject
 MatrixBase<Derived>::stableNormalized() const
 {
   typedef typename internal::nested_eval<Derived,3>::type _Nested;
@@ -185,7 +185,7 @@ MatrixBase<Derived>::stableNormalized() const
   * \sa stableNorm(), stableNormalized(), normalize()
   */
 template<typename Derived>
-EIGEN_DEVICE_FUNC inline void MatrixBase<Derived>::stableNormalize()
+inline void MatrixBase<Derived>::stableNormalize()
 {
   RealScalar w = cwiseAbs().maxCoeff();
   RealScalar z = (derived()/w).squaredNorm();
@@ -257,9 +257,9 @@ struct lpNorm_selector<Derived, Infinity>
 template<typename Derived>
 template<int p>
 #ifndef EIGEN_PARSED_BY_DOXYGEN
-EIGEN_DEVICE_FUNC inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real
+inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real
 #else
-EIGEN_DEVICE_FUNC MatrixBase<Derived>::RealScalar
+MatrixBase<Derived>::RealScalar
 #endif
 MatrixBase<Derived>::lpNorm() const
 {
diff --git a/eigen/Eigen/src/Core/EigenBase.h b/eigen/Eigen/src/Core/EigenBase.h
index ccc122c..b195506 100644
--- a/eigen/Eigen/src/Core/EigenBase.h
+++ b/eigen/Eigen/src/Core/EigenBase.h
@@ -14,6 +14,7 @@
 namespace Eigen {
 
 /** \class EigenBase
+  * \ingroup Core_Module
   * 
   * Common base class for all classes T such that MatrixBase has an operator=(T) and a constructor MatrixBase(T).
   *
diff --git a/eigen/Eigen/src/Core/Fuzzy.h b/eigen/Eigen/src/Core/Fuzzy.h
index 43aa49b..3e403a0 100644
--- a/eigen/Eigen/src/Core/Fuzzy.h
+++ b/eigen/Eigen/src/Core/Fuzzy.h
@@ -100,7 +100,7 @@ struct isMuchSmallerThan_scalar_selector<Derived, true>
   */
 template<typename Derived>
 template<typename OtherDerived>
-EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isApprox(
+bool DenseBase<Derived>::isApprox(
   const DenseBase<OtherDerived>& other,
   const RealScalar& prec
 ) const
@@ -122,7 +122,7 @@ EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isApprox(
   * \sa isApprox(), isMuchSmallerThan(const DenseBase<OtherDerived>&, RealScalar) const
   */
 template<typename Derived>
-EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isMuchSmallerThan(
+bool DenseBase<Derived>::isMuchSmallerThan(
   const typename NumTraits<Scalar>::Real& other,
   const RealScalar& prec
 ) const
@@ -142,7 +142,7 @@ EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isMuchSmallerThan(
   */
 template<typename Derived>
 template<typename OtherDerived>
-EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isMuchSmallerThan(
+bool DenseBase<Derived>::isMuchSmallerThan(
   const DenseBase<OtherDerived>& other,
   const RealScalar& prec
 ) const
diff --git a/eigen/Eigen/src/Core/GeneralProduct.h b/eigen/Eigen/src/Core/GeneralProduct.h
index b206b0a..0f16cd8 100644
--- a/eigen/Eigen/src/Core/GeneralProduct.h
+++ b/eigen/Eigen/src/Core/GeneralProduct.h
@@ -428,7 +428,7 @@ MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
 template<typename Derived>
 template<typename OtherDerived>
 const Product<Derived,OtherDerived,LazyProduct>
-EIGEN_DEVICE_FUNC MatrixBase<Derived>::lazyProduct(const MatrixBase<OtherDerived> &other) const
+MatrixBase<Derived>::lazyProduct(const MatrixBase<OtherDerived> &other) const
 {
   enum {
     ProductIsValid =  Derived::ColsAtCompileTime==Dynamic
diff --git a/eigen/Eigen/src/Core/GenericPacketMath.h b/eigen/Eigen/src/Core/GenericPacketMath.h
index d19d5bb..029f8ac 100644
--- a/eigen/Eigen/src/Core/GenericPacketMath.h
+++ b/eigen/Eigen/src/Core/GenericPacketMath.h
@@ -61,7 +61,6 @@ struct default_packet_traits
     HasSqrt   = 0,
     HasRsqrt  = 0,
     HasExp    = 0,
-    HasExpm1  = 0,
     HasLog    = 0,
     HasLog1p  = 0,
     HasLog10  = 0,
@@ -402,10 +401,6 @@ Packet ptanh(const Packet& a) { using std::tanh; return tanh(a); }
 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
 Packet pexp(const Packet& a) { using std::exp; return exp(a); }
 
-/** \internal \returns the expm1 of \a a (coeff-wise) */
-template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
-Packet pexpm1(const Packet& a) { return numext::expm1(a); }
-
 /** \internal \returns the log of \a a (coeff-wise) */
 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
 Packet plog(const Packet& a) { using std::log; return log(a); }
diff --git a/eigen/Eigen/src/Core/GlobalFunctions.h b/eigen/Eigen/src/Core/GlobalFunctions.h
index 12828a7..769dc25 100644
--- a/eigen/Eigen/src/Core/GlobalFunctions.h
+++ b/eigen/Eigen/src/Core/GlobalFunctions.h
@@ -71,7 +71,6 @@ namespace Eigen
   EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erf,scalar_erf_op,error function,\sa ArrayBase::erf)
   EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erfc,scalar_erfc_op,complement error function,\sa ArrayBase::erfc)
   EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(exp,scalar_exp_op,exponential,\sa ArrayBase::exp)
-  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(expm1,scalar_expm1_op,exponential of a value minus 1,\sa ArrayBase::expm1)
   EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log,scalar_log_op,natural logarithm,\sa Eigen::log10 DOXCOMMA ArrayBase::log)
   EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log1p,scalar_log1p_op,natural logarithm of 1 plus the value,\sa ArrayBase::log1p)
   EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log10,scalar_log10_op,base 10 logarithm,\sa Eigen::log DOXCOMMA ArrayBase::log)
diff --git a/eigen/Eigen/src/Core/IndexedView.h b/eigen/Eigen/src/Core/IndexedView.h
deleted file mode 100644
index 8c57a27..0000000
--- a/eigen/Eigen/src/Core/IndexedView.h
+++ /dev/null
@@ -1,207 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2017 Gael Guennebaud <gael.guennebaud@inria.fr>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_INDEXED_VIEW_H
-#define EIGEN_INDEXED_VIEW_H
-
-namespace Eigen {
-
-namespace internal {
-
-template<typename XprType, typename RowIndices, typename ColIndices>
-struct traits<IndexedView<XprType, RowIndices, ColIndices> >
- : traits<XprType>
-{
-  enum {
-    RowsAtCompileTime = int(array_size<RowIndices>::value),
-    ColsAtCompileTime = int(array_size<ColIndices>::value),
-    MaxRowsAtCompileTime = RowsAtCompileTime != Dynamic ? int(RowsAtCompileTime) : int(traits<XprType>::MaxRowsAtCompileTime),
-    MaxColsAtCompileTime = ColsAtCompileTime != Dynamic ? int(ColsAtCompileTime) : int(traits<XprType>::MaxColsAtCompileTime),
-
-    XprTypeIsRowMajor = (int(traits<XprType>::Flags)&RowMajorBit) != 0,
-    IsRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1
-               : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0
-               : XprTypeIsRowMajor,
-
-    RowIncr = int(get_compile_time_incr<RowIndices>::value),
-    ColIncr = int(get_compile_time_incr<ColIndices>::value),
-    InnerIncr = IsRowMajor ? ColIncr : RowIncr,
-    OuterIncr = IsRowMajor ? RowIncr : ColIncr,
-
-    HasSameStorageOrderAsXprType = (IsRowMajor == XprTypeIsRowMajor),
-    XprInnerStride = HasSameStorageOrderAsXprType ? int(inner_stride_at_compile_time<XprType>::ret) : int(outer_stride_at_compile_time<XprType>::ret),
-    XprOuterstride = HasSameStorageOrderAsXprType ? int(outer_stride_at_compile_time<XprType>::ret) : int(inner_stride_at_compile_time<XprType>::ret),
-
-    InnerSize = XprTypeIsRowMajor ? ColsAtCompileTime : RowsAtCompileTime,
-    IsBlockAlike = InnerIncr==1 && OuterIncr==1,
-    IsInnerPannel = HasSameStorageOrderAsXprType && is_same<AllRange<InnerSize>,typename conditional<XprTypeIsRowMajor,ColIndices,RowIndices>::type>::value,
-
-    InnerStrideAtCompileTime = InnerIncr<0 || InnerIncr==DynamicIndex || XprInnerStride==Dynamic ? Dynamic : XprInnerStride * InnerIncr,
-    OuterStrideAtCompileTime = OuterIncr<0 || OuterIncr==DynamicIndex || XprOuterstride==Dynamic ? Dynamic : XprOuterstride * OuterIncr,
-
-    ReturnAsScalar = is_same<RowIndices,SingleRange>::value && is_same<ColIndices,SingleRange>::value,
-    ReturnAsBlock = (!ReturnAsScalar) && IsBlockAlike,
-    ReturnAsIndexedView = (!ReturnAsScalar) && (!ReturnAsBlock),
-
-    // FIXME we deal with compile-time strides if and only if we have DirectAccessBit flag,
-    // but this is too strict regarding negative strides...
-    DirectAccessMask = (int(InnerIncr)!=UndefinedIncr && int(OuterIncr)!=UndefinedIncr && InnerIncr>=0 && OuterIncr>=0) ? DirectAccessBit : 0,
-    FlagsRowMajorBit = IsRowMajor ? RowMajorBit : 0,
-    FlagsLvalueBit = is_lvalue<XprType>::value ? LvalueBit : 0,
-    Flags = (traits<XprType>::Flags & (HereditaryBits | DirectAccessMask)) | FlagsLvalueBit | FlagsRowMajorBit
-  };
-
-  typedef Block<XprType,RowsAtCompileTime,ColsAtCompileTime,IsInnerPannel> BlockType;
-};
-
-}
-
-template<typename XprType, typename RowIndices, typename ColIndices, typename StorageKind>
-class IndexedViewImpl;
-
-
-/** \class IndexedView
-  * \ingroup Core_Module
-  *
-  * \brief Expression of a non-sequential sub-matrix defined by arbitrary sequences of row and column indices
-  *
-  * \tparam XprType the type of the expression in which we are taking the intersections of sub-rows and sub-columns
-  * \tparam RowIndices the type of the object defining the sequence of row indices
-  * \tparam ColIndices the type of the object defining the sequence of column indices
-  *
-  * This class represents an expression of a sub-matrix (or sub-vector) defined as the intersection
-  * of sub-sets of rows and columns, that are themself defined by generic sequences of row indices \f$ \{r_0,r_1,..r_{m-1}\} \f$
-  * and column indices \f$ \{c_0,c_1,..c_{n-1} \}\f$. Let \f$ A \f$  be the nested matrix, then the resulting matrix \f$ B \f$ has \c m
-  * rows and \c n columns, and its entries are given by: \f$ B(i,j) = A(r_i,c_j) \f$.
-  *
-  * The \c RowIndices and \c ColIndices types must be compatible with the following API:
-  * \code
-  * <integral type> operator[](Index) const;
-  * Index size() const;
-  * \endcode
-  *
-  * Typical supported types thus include:
-  *  - std::vector<int>
-  *  - std::valarray<int>
-  *  - std::array<int>
-  *  - Plain C arrays: int[N]
-  *  - Eigen::ArrayXi
-  *  - decltype(ArrayXi::LinSpaced(...))
-  *  - Any view/expressions of the previous types
-  *  - Eigen::ArithmeticSequence
-  *  - Eigen::internal::AllRange      (helper for Eigen::all)
-  *  - Eigen::internal::SingleRange  (helper for single index)
-  *  - etc.
-  *
-  * In typical usages of %Eigen, this class should never be used directly. It is the return type of
-  * DenseBase::operator()(const RowIndices&, const ColIndices&).
-  *
-  * \sa class Block
-  */
-template<typename XprType, typename RowIndices, typename ColIndices>
-class IndexedView : public IndexedViewImpl<XprType, RowIndices, ColIndices, typename internal::traits<XprType>::StorageKind>
-{
-public:
-  typedef typename IndexedViewImpl<XprType, RowIndices, ColIndices, typename internal::traits<XprType>::StorageKind>::Base Base;
-  EIGEN_GENERIC_PUBLIC_INTERFACE(IndexedView)
-  EIGEN_INHERIT_ASSIGNMENT_OPERATORS(IndexedView)
-
-  typedef typename internal::ref_selector<XprType>::non_const_type MatrixTypeNested;
-  typedef typename internal::remove_all<XprType>::type NestedExpression;
-
-  template<typename T0, typename T1>
-  IndexedView(XprType& xpr, const T0& rowIndices, const T1& colIndices)
-    : m_xpr(xpr), m_rowIndices(rowIndices), m_colIndices(colIndices)
-  {}
-
-  /** \returns number of rows */
-  Index rows() const { return internal::size(m_rowIndices); }
-
-  /** \returns number of columns */
-  Index cols() const { return internal::size(m_colIndices); }
-
-  /** \returns the nested expression */
-  const typename internal::remove_all<XprType>::type&
-  nestedExpression() const { return m_xpr; }
-
-  /** \returns the nested expression */
-  typename internal::remove_reference<XprType>::type&
-  nestedExpression() { return m_xpr.const_cast_derived(); }
-
-  /** \returns a const reference to the object storing/generating the row indices */
-  const RowIndices& rowIndices() const { return m_rowIndices; }
-
-  /** \returns a const reference to the object storing/generating the column indices */
-  const ColIndices& colIndices() const { return m_colIndices; }
-
-protected:
-  MatrixTypeNested m_xpr;
-  RowIndices m_rowIndices;
-  ColIndices m_colIndices;
-};
-
-
-// Generic API dispatcher
-template<typename XprType, typename RowIndices, typename ColIndices, typename StorageKind>
-class IndexedViewImpl
-  : public internal::generic_xpr_base<IndexedView<XprType, RowIndices, ColIndices> >::type
-{
-public:
-  typedef typename internal::generic_xpr_base<IndexedView<XprType, RowIndices, ColIndices> >::type Base;
-};
-
-namespace internal {
-
-
-template<typename ArgType, typename RowIndices, typename ColIndices>
-struct unary_evaluator<IndexedView<ArgType, RowIndices, ColIndices>, IndexBased>
-  : evaluator_base<IndexedView<ArgType, RowIndices, ColIndices> >
-{
-  typedef IndexedView<ArgType, RowIndices, ColIndices> XprType;
-
-  enum {
-    CoeffReadCost = evaluator<ArgType>::CoeffReadCost /* TODO + cost of row/col index */,
-
-    Flags = (evaluator<ArgType>::Flags & (HereditaryBits /*| LinearAccessBit | DirectAccessBit*/)),
-
-    Alignment = 0
-  };
-
-  EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& xpr) : m_argImpl(xpr.nestedExpression()), m_xpr(xpr)
-  {
-    EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
-  }
-
-  typedef typename XprType::Scalar Scalar;
-  typedef typename XprType::CoeffReturnType CoeffReturnType;
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  CoeffReturnType coeff(Index row, Index col) const
-  {
-    return m_argImpl.coeff(m_xpr.rowIndices()[row], m_xpr.colIndices()[col]);
-  }
-
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  Scalar& coeffRef(Index row, Index col)
-  {
-    return m_argImpl.coeffRef(m_xpr.rowIndices()[row], m_xpr.colIndices()[col]);
-  }
-
-protected:
-
-  evaluator<ArgType> m_argImpl;
-  const XprType& m_xpr;
-
-};
-
-} // end namespace internal
-
-} // end namespace Eigen
-
-#endif // EIGEN_INDEXED_VIEW_H
diff --git a/eigen/Eigen/src/Core/MathFunctions.h b/eigen/Eigen/src/Core/MathFunctions.h
index 5ec6c39..a648aa0 100644
--- a/eigen/Eigen/src/Core/MathFunctions.h
+++ b/eigen/Eigen/src/Core/MathFunctions.h
@@ -14,6 +14,7 @@
 // TODO this should better be moved to NumTraits
 #define EIGEN_PI 3.141592653589793238462643383279502884197169399375105820974944592307816406L
 
+
 namespace Eigen {
 
 // On WINCE, std::abs is defined for int only, so let's defined our own overloads:
@@ -412,7 +413,7 @@ inline NewType cast(const OldType& x)
     static inline Scalar run(const Scalar& x)
     {
       EIGEN_STATIC_ASSERT((!NumTraits<Scalar>::IsComplex), NUMERIC_TYPE_MUST_BE_REAL)
-      EIGEN_USING_STD_MATH(round);
+      using std::round;
       return round(x);
     }
   };
@@ -482,55 +483,6 @@ struct arg_retval
 };
 
 /****************************************************************************
-* Implementation of expm1                                                   *
-****************************************************************************/
-
-// This implementation is based on GSL Math's expm1.
-namespace std_fallback {
-  // fallback expm1 implementation in case there is no expm1(Scalar) function in namespace of Scalar,
-  // or that there is no suitable std::expm1 function available. Implementation
-  // attributed to Kahan. See: http://www.plunk.org/~hatch/rightway.php.
-  template<typename Scalar>
-  EIGEN_DEVICE_FUNC inline Scalar expm1(const Scalar& x) {
-    EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
-    typedef typename NumTraits<Scalar>::Real RealScalar;
-
-    EIGEN_USING_STD_MATH(exp);
-    Scalar u = exp(x);
-    if (u == Scalar(1)) {
-      return x;
-    }
-    Scalar um1 = u - RealScalar(1);
-    if (um1 == Scalar(-1)) {
-      return RealScalar(-1);
-    }
-
-    EIGEN_USING_STD_MATH(log);
-    return (u - RealScalar(1)) * x / log(u);
-  }
-}
-
-template<typename Scalar>
-struct expm1_impl {
-  EIGEN_DEVICE_FUNC static inline Scalar run(const Scalar& x)
-  {
-    EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
-    #if EIGEN_HAS_CXX11_MATH
-    using std::expm1;
-    #endif
-    using std_fallback::expm1;
-    return expm1(x);
-  }
-};
-
-
-template<typename Scalar>
-struct expm1_retval
-{
-  typedef Scalar type;
-};
-
-/****************************************************************************
 * Implementation of log1p                                                   *
 ****************************************************************************/
 
@@ -549,7 +501,7 @@ namespace std_fallback {
 
 template<typename Scalar>
 struct log1p_impl {
-  EIGEN_DEVICE_FUNC static inline Scalar run(const Scalar& x)
+  static inline Scalar run(const Scalar& x)
   {
     EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
     #if EIGEN_HAS_CXX11_MATH
@@ -688,7 +640,7 @@ template<typename Scalar>
 struct random_default_impl<Scalar, false, true>
 {
   static inline Scalar run(const Scalar& x, const Scalar& y)
-  {
+  { 
     typedef typename conditional<NumTraits<Scalar>::IsSigned,std::ptrdiff_t,std::size_t>::type ScalarX;
     if(y<x)
       return x;
@@ -874,7 +826,7 @@ template<typename T> T generic_fast_tanh_float(const T& a_x);
 
 namespace numext {
 
-#if !defined(__CUDA_ARCH__) && !defined(__SYCL_DEVICE_ONLY__)
+#ifndef __CUDA_ARCH__
 template<typename T>
 EIGEN_DEVICE_FUNC
 EIGEN_ALWAYS_INLINE T mini(const T& x, const T& y)
@@ -890,84 +842,6 @@ EIGEN_ALWAYS_INLINE T maxi(const T& x, const T& y)
   EIGEN_USING_STD_MATH(max);
   return max EIGEN_NOT_A_MACRO (x,y);
 }
-
-
-#elif defined(__SYCL_DEVICE_ONLY__)
-template<typename T>
-EIGEN_ALWAYS_INLINE T mini(const T& x, const T& y)
-{
-
-  return y < x ? y : x;
-}
-
-template<typename T>
-EIGEN_ALWAYS_INLINE T maxi(const T& x, const T& y)
-{
-
-  return x < y ? y : x;
-}
-
-EIGEN_ALWAYS_INLINE int mini(const int& x, const int& y)
-{
-  return cl::sycl::min(x,y);
-}
-
-EIGEN_ALWAYS_INLINE int maxi(const int& x, const int& y)
-{
-  return cl::sycl::max(x,y);
-}
-
-EIGEN_ALWAYS_INLINE unsigned int mini(const unsigned int& x, const unsigned int& y)
-{
-  return cl::sycl::min(x,y);
-}
-
-EIGEN_ALWAYS_INLINE unsigned int maxi(const unsigned int& x, const unsigned int& y)
-{
-  return cl::sycl::max(x,y);
-}
-
-EIGEN_ALWAYS_INLINE  long mini(const long & x, const long & y)
-{
-  return cl::sycl::min(x,y);
-}
-
-EIGEN_ALWAYS_INLINE  long maxi(const long & x, const long & y)
-{
-  return cl::sycl::max(x,y);
-}
-
-EIGEN_ALWAYS_INLINE unsigned long mini(const unsigned long& x, const unsigned long& y)
-{
-  return cl::sycl::min(x,y);
-}
-
-EIGEN_ALWAYS_INLINE unsigned long maxi(const unsigned long& x, const unsigned long& y)
-{
-  return cl::sycl::max(x,y);
-}
-
-
-EIGEN_ALWAYS_INLINE float mini(const float& x, const float& y)
-{
-  return cl::sycl::fmin(x,y);
-}
-
-EIGEN_ALWAYS_INLINE float maxi(const float& x, const float& y)
-{
-  return cl::sycl::fmax(x,y);
-}
-
-EIGEN_ALWAYS_INLINE double mini(const double& x, const double& y)
-{
-  return cl::sycl::fmin(x,y);
-}
-
-EIGEN_ALWAYS_INLINE double maxi(const double& x, const double& y)
-{
-  return cl::sycl::fmax(x,y);
-}
-
 #else
 template<typename T>
 EIGEN_DEVICE_FUNC
@@ -1080,11 +954,6 @@ inline EIGEN_MATHFUNC_RETVAL(log1p, Scalar) log1p(const Scalar& x)
   return EIGEN_MATHFUNC_IMPL(log1p, Scalar)::run(x);
 }
 
-#if defined(__SYCL_DEVICE_ONLY__)
-EIGEN_ALWAYS_INLINE float   log1p(float x) { return cl::sycl::log1p(x); }
-EIGEN_ALWAYS_INLINE double  log1p(double x) { return cl::sycl::log1p(x); }
-#endif // defined(__SYCL_DEVICE_ONLY__)
-
 #ifdef __CUDACC__
 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
 float log1p(const float &x) { return ::log1pf(x); }
@@ -1100,24 +969,10 @@ inline typename internal::pow_impl<ScalarX,ScalarY>::result_type pow(const Scala
   return internal::pow_impl<ScalarX,ScalarY>::run(x, y);
 }
 
-#if defined(__SYCL_DEVICE_ONLY__)
-EIGEN_ALWAYS_INLINE float   pow(float x, float y) { return cl::sycl::pow(x, y); }
-EIGEN_ALWAYS_INLINE double  pow(double x, double y) { return cl::sycl::pow(x, y); }
-#endif // defined(__SYCL_DEVICE_ONLY__)
-
 template<typename T> EIGEN_DEVICE_FUNC bool (isnan)   (const T &x) { return internal::isnan_impl(x); }
 template<typename T> EIGEN_DEVICE_FUNC bool (isinf)   (const T &x) { return internal::isinf_impl(x); }
 template<typename T> EIGEN_DEVICE_FUNC bool (isfinite)(const T &x) { return internal::isfinite_impl(x); }
 
-#if defined(__SYCL_DEVICE_ONLY__)
-EIGEN_ALWAYS_INLINE float   isnan(float x) { return cl::sycl::isnan(x); }
-EIGEN_ALWAYS_INLINE double  isnan(double x) { return cl::sycl::isnan(x); }
-EIGEN_ALWAYS_INLINE float   isinf(float x) { return cl::sycl::isinf(x); }
-EIGEN_ALWAYS_INLINE double  isinf(double x) { return cl::sycl::isinf(x); }
-EIGEN_ALWAYS_INLINE float   isfinite(float x) { return cl::sycl::isfinite(x); }
-EIGEN_ALWAYS_INLINE double  isfinite(double x) { return cl::sycl::isfinite(x); }
-#endif // defined(__SYCL_DEVICE_ONLY__)
-
 template<typename Scalar>
 EIGEN_DEVICE_FUNC
 inline EIGEN_MATHFUNC_RETVAL(round, Scalar) round(const Scalar& x)
@@ -1125,11 +980,6 @@ inline EIGEN_MATHFUNC_RETVAL(round, Scalar) round(const Scalar& x)
   return EIGEN_MATHFUNC_IMPL(round, Scalar)::run(x);
 }
 
-#if defined(__SYCL_DEVICE_ONLY__)
-EIGEN_ALWAYS_INLINE float   round(float x) { return cl::sycl::round(x); }
-EIGEN_ALWAYS_INLINE double  round(double x) { return cl::sycl::round(x); }
-#endif // defined(__SYCL_DEVICE_ONLY__)
-
 template<typename T>
 EIGEN_DEVICE_FUNC
 T (floor)(const T& x)
@@ -1138,11 +988,6 @@ T (floor)(const T& x)
   return floor(x);
 }
 
-#if defined(__SYCL_DEVICE_ONLY__)
-EIGEN_ALWAYS_INLINE float   floor(float x) { return cl::sycl::floor(x); }
-EIGEN_ALWAYS_INLINE double  floor(double x) { return cl::sycl::floor(x); }
-#endif // defined(__SYCL_DEVICE_ONLY__)
-
 #ifdef __CUDACC__
 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
 float floor(const float &x) { return ::floorf(x); }
@@ -1159,11 +1004,6 @@ T (ceil)(const T& x)
   return ceil(x);
 }
 
-#if defined(__SYCL_DEVICE_ONLY__)
-EIGEN_ALWAYS_INLINE float   ceil(float x) { return cl::sycl::ceil(x); }
-EIGEN_ALWAYS_INLINE double  ceil(double x) { return cl::sycl::ceil(x); }
-#endif // defined(__SYCL_DEVICE_ONLY__)
-
 #ifdef __CUDACC__
 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
 float ceil(const float &x) { return ::ceilf(x); }
@@ -1204,11 +1044,6 @@ T sqrt(const T &x)
   return sqrt(x);
 }
 
-#if defined(__SYCL_DEVICE_ONLY__)
-EIGEN_ALWAYS_INLINE float   sqrt(float x) { return cl::sycl::sqrt(x); }
-EIGEN_ALWAYS_INLINE double  sqrt(double x) { return cl::sycl::sqrt(x); }
-#endif // defined(__SYCL_DEVICE_ONLY__)
-
 template<typename T>
 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
 T log(const T &x) {
@@ -1216,12 +1051,6 @@ T log(const T &x) {
   return log(x);
 }
 
-#if defined(__SYCL_DEVICE_ONLY__)
-EIGEN_ALWAYS_INLINE float   log(float x) { return cl::sycl::log(x); }
-EIGEN_ALWAYS_INLINE double  log(double x) { return cl::sycl::log(x); }
-#endif // defined(__SYCL_DEVICE_ONLY__)
-
-
 #ifdef __CUDACC__
 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
 float log(const float &x) { return ::logf(x); }
@@ -1232,11 +1061,19 @@ double log(const double &x) { return ::log(x); }
 
 template<typename T>
 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
-typename NumTraits<T>::Real abs(const T &x) {
+typename internal::enable_if<NumTraits<T>::IsSigned || NumTraits<T>::IsComplex,typename NumTraits<T>::Real>::type
+abs(const T &x) {
   EIGEN_USING_STD_MATH(abs);
   return abs(x);
 }
 
+template<typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+typename internal::enable_if<!(NumTraits<T>::IsSigned || NumTraits<T>::IsComplex),typename NumTraits<T>::Real>::type
+abs(const T &x) {
+  return x;
+}
+
 #if defined(__SYCL_DEVICE_ONLY__)
 EIGEN_ALWAYS_INLINE float   abs(float x) { return cl::sycl::fabs(x); }
 EIGEN_ALWAYS_INLINE double  abs(double x) { return cl::sycl::fabs(x); }
@@ -1267,11 +1104,6 @@ T exp(const T &x) {
   return exp(x);
 }
 
-#if defined(__SYCL_DEVICE_ONLY__)
-EIGEN_ALWAYS_INLINE float   exp(float x) { return cl::sycl::exp(x); }
-EIGEN_ALWAYS_INLINE double  exp(double x) { return cl::sycl::exp(x); }
-#endif // defined(__SYCL_DEVICE_ONLY__)
-
 #ifdef __CUDACC__
 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
 float exp(const float &x) { return ::expf(x); }
@@ -1280,26 +1112,6 @@ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
 double exp(const double &x) { return ::exp(x); }
 #endif
 
-template<typename Scalar>
-EIGEN_DEVICE_FUNC
-inline EIGEN_MATHFUNC_RETVAL(expm1, Scalar) expm1(const Scalar& x)
-{
-  return EIGEN_MATHFUNC_IMPL(expm1, Scalar)::run(x);
-}
-
-#if defined(__SYCL_DEVICE_ONLY__)
-EIGEN_ALWAYS_INLINE float   expm1(float x) { return cl::sycl::expm1(x); }
-EIGEN_ALWAYS_INLINE double  expm1(double x) { return cl::sycl::expm1(x); }
-#endif // defined(__SYCL_DEVICE_ONLY__)
-
-#ifdef __CUDACC__
-template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
-float expm1(const float &x) { return ::expm1f(x); }
-
-template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
-double expm1(const double &x) { return ::expm1(x); }
-#endif
-
 template<typename T>
 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
 T cos(const T &x) {
@@ -1307,11 +1119,6 @@ T cos(const T &x) {
   return cos(x);
 }
 
-#if defined(__SYCL_DEVICE_ONLY__)
-EIGEN_ALWAYS_INLINE float   cos(float x) { return cl::sycl::cos(x); }
-EIGEN_ALWAYS_INLINE double  cos(double x) { return cl::sycl::cos(x); }
-#endif // defined(__SYCL_DEVICE_ONLY__)
-
 #ifdef __CUDACC__
 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
 float cos(const float &x) { return ::cosf(x); }
@@ -1327,11 +1134,6 @@ T sin(const T &x) {
   return sin(x);
 }
 
-#if defined(__SYCL_DEVICE_ONLY__)
-EIGEN_ALWAYS_INLINE float   sin(float x) { return cl::sycl::sin(x); }
-EIGEN_ALWAYS_INLINE double  sin(double x) { return cl::sycl::sin(x); }
-#endif // defined(__SYCL_DEVICE_ONLY__)
-
 #ifdef __CUDACC__
 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
 float sin(const float &x) { return ::sinf(x); }
@@ -1347,11 +1149,6 @@ T tan(const T &x) {
   return tan(x);
 }
 
-#if defined(__SYCL_DEVICE_ONLY__)
-EIGEN_ALWAYS_INLINE float   tan(float x) { return cl::sycl::tan(x); }
-EIGEN_ALWAYS_INLINE double  tan(double x) { return cl::sycl::tan(x); }
-#endif // defined(__SYCL_DEVICE_ONLY__)
-
 #ifdef __CUDACC__
 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
 float tan(const float &x) { return ::tanf(x); }
@@ -1367,11 +1164,6 @@ T acos(const T &x) {
   return acos(x);
 }
 
-#if defined(__SYCL_DEVICE_ONLY__)
-EIGEN_ALWAYS_INLINE float   acos(float x) { return cl::sycl::acos(x); }
-EIGEN_ALWAYS_INLINE double  acos(double x) { return cl::sycl::acos(x); }
-#endif // defined(__SYCL_DEVICE_ONLY__)
-
 #ifdef __CUDACC__
 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
 float acos(const float &x) { return ::acosf(x); }
@@ -1387,11 +1179,6 @@ T asin(const T &x) {
   return asin(x);
 }
 
-#if defined(__SYCL_DEVICE_ONLY__)
-EIGEN_ALWAYS_INLINE float   asin(float x) { return cl::sycl::asin(x); }
-EIGEN_ALWAYS_INLINE double  asin(double x) { return cl::sycl::asin(x); }
-#endif // defined(__SYCL_DEVICE_ONLY__)
-
 #ifdef __CUDACC__
 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
 float asin(const float &x) { return ::asinf(x); }
@@ -1407,11 +1194,6 @@ T atan(const T &x) {
   return atan(x);
 }
 
-#if defined(__SYCL_DEVICE_ONLY__)
-EIGEN_ALWAYS_INLINE float   atan(float x) { return cl::sycl::atan(x); }
-EIGEN_ALWAYS_INLINE double  atan(double x) { return cl::sycl::atan(x); }
-#endif // defined(__SYCL_DEVICE_ONLY__)
-
 #ifdef __CUDACC__
 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
 float atan(const float &x) { return ::atanf(x); }
@@ -1428,11 +1210,6 @@ T cosh(const T &x) {
   return cosh(x);
 }
 
-#if defined(__SYCL_DEVICE_ONLY__)
-EIGEN_ALWAYS_INLINE float   cosh(float x) { return cl::sycl::cosh(x); }
-EIGEN_ALWAYS_INLINE double  cosh(double x) { return cl::sycl::cosh(x); }
-#endif // defined(__SYCL_DEVICE_ONLY__)
-
 #ifdef __CUDACC__
 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
 float cosh(const float &x) { return ::coshf(x); }
@@ -1448,11 +1225,6 @@ T sinh(const T &x) {
   return sinh(x);
 }
 
-#if defined(__SYCL_DEVICE_ONLY__)
-EIGEN_ALWAYS_INLINE float   sinh(float x) { return cl::sycl::sinh(x); }
-EIGEN_ALWAYS_INLINE double  sinh(double x) { return cl::sycl::sinh(x); }
-#endif // defined(__SYCL_DEVICE_ONLY__)
-
 #ifdef __CUDACC__
 template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
 float sinh(const float &x) { return ::sinhf(x); }
@@ -1468,10 +1240,7 @@ T tanh(const T &x) {
   return tanh(x);
 }
 
-#if defined(__SYCL_DEVICE_ONLY__)
-EIGEN_ALWAYS_INLINE float   tanh(float x) { return cl::sycl::tanh(x); }
-EIGEN_ALWAYS_INLINE double  tanh(double x) { return cl::sycl::tanh(x); }
-#elif (!defined(__CUDACC__)) && EIGEN_FAST_MATH
+#if (!defined(__CUDACC__)) && EIGEN_FAST_MATH
 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
 float tanh(float x) { return internal::generic_fast_tanh_float(x); }
 #endif
@@ -1491,11 +1260,6 @@ T fmod(const T& a, const T& b) {
   return fmod(a, b);
 }
 
-#if defined(__SYCL_DEVICE_ONLY__)
-EIGEN_ALWAYS_INLINE float   fmod(float x, float y) { return cl::sycl::fmod(x, y); }
-EIGEN_ALWAYS_INLINE double  fmod(double x, double y) { return cl::sycl::fmod(x, y); }
-#endif // defined(__SYCL_DEVICE_ONLY__)
-
 #ifdef __CUDACC__
 template <>
 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
@@ -1638,13 +1402,13 @@ template<> struct random_impl<bool>
 template<> struct scalar_fuzzy_impl<bool>
 {
   typedef bool RealScalar;
-
+  
   template<typename OtherScalar> EIGEN_DEVICE_FUNC
   static inline bool isMuchSmallerThan(const bool& x, const bool&, const bool&)
   {
     return !x;
   }
-
+  
   EIGEN_DEVICE_FUNC
   static inline bool isApprox(bool x, bool y, bool)
   {
@@ -1656,10 +1420,10 @@ template<> struct scalar_fuzzy_impl<bool>
   {
     return (!x) || y;
   }
-
+  
 };
 
-
+  
 } // end namespace internal
 
 } // end namespace Eigen
diff --git a/eigen/Eigen/src/Core/MathFunctionsImpl.h b/eigen/Eigen/src/Core/MathFunctionsImpl.h
index ae1386b..3c9ef22 100644
--- a/eigen/Eigen/src/Core/MathFunctionsImpl.h
+++ b/eigen/Eigen/src/Core/MathFunctionsImpl.h
@@ -29,7 +29,12 @@ T generic_fast_tanh_float(const T& a_x)
   // this range is +/-1.0f in single-precision.
   const T plus_9 = pset1<T>(9.f);
   const T minus_9 = pset1<T>(-9.f);
-  const T x = pmax(pmin(a_x, plus_9), minus_9);
+  // NOTE GCC prior to 6.3 might improperly optimize this max/min
+  //      step such that if a_x is nan, x will be either 9 or -9,
+  //      and tanh will return 1 or -1 instead of nan.
+  //      This is supposed to be fixed in gcc6.3,
+  //      see: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=72867
+  const T x = pmax(minus_9,pmin(plus_9,a_x));
   // The monomial coefficients of the numerator polynomial (odd).
   const T alpha_1 = pset1<T>(4.89352455891786e-03f);
   const T alpha_3 = pset1<T>(6.37261928875436e-04f);
diff --git a/eigen/Eigen/src/Core/MatrixBase.h b/eigen/Eigen/src/Core/MatrixBase.h
index 200e577..ce41218 100644
--- a/eigen/Eigen/src/Core/MatrixBase.h
+++ b/eigen/Eigen/src/Core/MatrixBase.h
@@ -76,7 +76,6 @@ template<typename Derived> class MatrixBase
     using Base::coeffRef;
     using Base::lazyAssign;
     using Base::eval;
-    using Base::operator-;
     using Base::operator+=;
     using Base::operator-=;
     using Base::operator*=;
@@ -123,6 +122,7 @@ template<typename Derived> class MatrixBase
 
 #define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::MatrixBase
 #define EIGEN_DOC_UNARY_ADDONS(X,Y)
+#   include "../plugins/CommonCwiseUnaryOps.h"
 #   include "../plugins/CommonCwiseBinaryOps.h"
 #   include "../plugins/MatrixCwiseUnaryOps.h"
 #   include "../plugins/MatrixCwiseBinaryOps.h"
diff --git a/eigen/Eigen/src/Core/NestByValue.h b/eigen/Eigen/src/Core/NestByValue.h
index 01cf192..13adf07 100644
--- a/eigen/Eigen/src/Core/NestByValue.h
+++ b/eigen/Eigen/src/Core/NestByValue.h
@@ -67,25 +67,25 @@ template<typename ExpressionType> class NestByValue
     }
 
     template<int LoadMode>
-    EIGEN_DEVICE_FUNC inline const PacketScalar packet(Index row, Index col) const
+    inline const PacketScalar packet(Index row, Index col) const
     {
       return m_expression.template packet<LoadMode>(row, col);
     }
 
     template<int LoadMode>
-    EIGEN_DEVICE_FUNC inline void writePacket(Index row, Index col, const PacketScalar& x)
+    inline void writePacket(Index row, Index col, const PacketScalar& x)
     {
       m_expression.const_cast_derived().template writePacket<LoadMode>(row, col, x);
     }
 
     template<int LoadMode>
-    EIGEN_DEVICE_FUNC inline const PacketScalar packet(Index index) const
+    inline const PacketScalar packet(Index index) const
     {
       return m_expression.template packet<LoadMode>(index);
     }
 
     template<int LoadMode>
-    EIGEN_DEVICE_FUNC inline void writePacket(Index index, const PacketScalar& x)
+    inline void writePacket(Index index, const PacketScalar& x)
     {
       m_expression.const_cast_derived().template writePacket<LoadMode>(index, x);
     }
@@ -99,7 +99,7 @@ template<typename ExpressionType> class NestByValue
 /** \returns an expression of the temporary version of *this.
   */
 template<typename Derived>
-EIGEN_DEVICE_FUNC inline const NestByValue<Derived>
+inline const NestByValue<Derived>
 DenseBase<Derived>::nestByValue() const
 {
   return NestByValue<Derived>(derived());
diff --git a/eigen/Eigen/src/Core/NumTraits.h b/eigen/Eigen/src/Core/NumTraits.h
index aebc0c2..daf4898 100644
--- a/eigen/Eigen/src/Core/NumTraits.h
+++ b/eigen/Eigen/src/Core/NumTraits.h
@@ -71,7 +71,7 @@ struct default_digits10_impl<T,false,true> // Integer
   *     and to \c 0 otherwise.
   * \li Enum values ReadCost, AddCost and MulCost representing a rough estimate of the number of CPU cycles needed
   *     to by move / add / mul instructions respectively, assuming the data is already stored in CPU registers.
-  *     Stay vague here. No need to do architecture-specific stuff. If you don't know what this means, just use \c Eigen::HugeCost.
+  *     Stay vague here. No need to do architecture-specific stuff.
   * \li An enum value \a IsSigned. It is equal to \c 1 if \a T is a signed type and to 0 if \a T is unsigned.
   * \li An enum value \a RequireInitialization. It is equal to \c 1 if the constructor of the numeric type \a T must
   *     be called, and to 0 if it is safe not to call it. Default is 0 if \a T is an arithmetic type, and 1 otherwise.
@@ -215,6 +215,8 @@ struct NumTraits<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> >
   static inline RealScalar epsilon() { return NumTraits<RealScalar>::epsilon(); }
   EIGEN_DEVICE_FUNC
   static inline RealScalar dummy_precision() { return NumTraits<RealScalar>::dummy_precision(); }
+
+  static inline int digits10() { return NumTraits<Scalar>::digits10(); }
 };
 
 template<> struct NumTraits<std::string>
diff --git a/eigen/Eigen/src/Core/ProductEvaluators.h b/eigen/Eigen/src/Core/ProductEvaluators.h
index 583b7f5..c42725d 100644
--- a/eigen/Eigen/src/Core/ProductEvaluators.h
+++ b/eigen/Eigen/src/Core/ProductEvaluators.h
@@ -207,6 +207,12 @@ struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_sum_op<typename
   static const bool value = true;
 };
 
+template<typename OtherXpr, typename Lhs, typename Rhs>
+struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_difference_op<typename OtherXpr::Scalar,typename Product<Lhs,Rhs,DefaultProduct>::Scalar>, const OtherXpr,
+                                               const Product<Lhs,Rhs,DefaultProduct> >, DenseShape > {
+  static const bool value = true;
+};
+
 template<typename DstXprType, typename OtherXpr, typename ProductType, typename Func1, typename Func2>
 struct assignment_from_xpr_op_product
 {
diff --git a/eigen/Eigen/src/Core/Random.h b/eigen/Eigen/src/Core/Random.h
index 486e9ed..6faf789 100644
--- a/eigen/Eigen/src/Core/Random.h
+++ b/eigen/Eigen/src/Core/Random.h
@@ -128,7 +128,7 @@ DenseBase<Derived>::Random()
   * \sa class CwiseNullaryOp, setRandom(Index), setRandom(Index,Index)
   */
 template<typename Derived>
-EIGEN_DEVICE_FUNC inline Derived& DenseBase<Derived>::setRandom()
+inline Derived& DenseBase<Derived>::setRandom()
 {
   return *this = Random(rows(), cols());
 }
diff --git a/eigen/Eigen/src/Core/Redux.h b/eigen/Eigen/src/Core/Redux.h
index 2b5b73b..b6e8f88 100644
--- a/eigen/Eigen/src/Core/Redux.h
+++ b/eigen/Eigen/src/Core/Redux.h
@@ -407,7 +407,7 @@ protected:
   */
 template<typename Derived>
 template<typename Func>
-EIGEN_DEVICE_FUNC typename internal::traits<Derived>::Scalar
+typename internal::traits<Derived>::Scalar
 DenseBase<Derived>::redux(const Func& func) const
 {
   eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix");
@@ -422,7 +422,7 @@ DenseBase<Derived>::redux(const Func& func) const
   * \warning the result is undefined if \c *this contains NaN.
   */
 template<typename Derived>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
+EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
 DenseBase<Derived>::minCoeff() const
 {
   return derived().redux(Eigen::internal::scalar_min_op<Scalar,Scalar>());
@@ -432,7 +432,7 @@ DenseBase<Derived>::minCoeff() const
   * \warning the result is undefined if \c *this contains NaN.
   */
 template<typename Derived>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
+EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
 DenseBase<Derived>::maxCoeff() const
 {
   return derived().redux(Eigen::internal::scalar_max_op<Scalar,Scalar>());
@@ -445,7 +445,7 @@ DenseBase<Derived>::maxCoeff() const
   * \sa trace(), prod(), mean()
   */
 template<typename Derived>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
+EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
 DenseBase<Derived>::sum() const
 {
   if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
@@ -458,7 +458,7 @@ DenseBase<Derived>::sum() const
 * \sa trace(), prod(), sum()
 */
 template<typename Derived>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
+EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
 DenseBase<Derived>::mean() const
 {
 #ifdef __INTEL_COMPILER
@@ -479,7 +479,7 @@ DenseBase<Derived>::mean() const
   * \sa sum(), mean(), trace()
   */
 template<typename Derived>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
+EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
 DenseBase<Derived>::prod() const
 {
   if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
@@ -494,7 +494,7 @@ DenseBase<Derived>::prod() const
   * \sa diagonal(), sum()
   */
 template<typename Derived>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
+EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
 MatrixBase<Derived>::trace() const
 {
   return derived().diagonal().sum();
diff --git a/eigen/Eigen/src/Core/Ref.h b/eigen/Eigen/src/Core/Ref.h
index abb1e51..bdf24f5 100644
--- a/eigen/Eigen/src/Core/Ref.h
+++ b/eigen/Eigen/src/Core/Ref.h
@@ -184,8 +184,6 @@ protected:
   * void foo(const Ref<MatrixXf,0,Stride<> >& A) { foo_impl(A); }
   * \endcode
   *
-  * See also the following stackoverflow questions for further references:
-  *  - <a href="http://stackoverflow.com/questions/21132538/correct-usage-of-the-eigenref-class">Correct usage of the Eigen::Ref<> class</a>
   *
   * \sa PlainObjectBase::Map(), \ref TopicStorageOrders
   */
diff --git a/eigen/Eigen/src/Core/Replicate.h b/eigen/Eigen/src/Core/Replicate.h
index 0b2d6d7..9960ef8 100644
--- a/eigen/Eigen/src/Core/Replicate.h
+++ b/eigen/Eigen/src/Core/Replicate.h
@@ -115,7 +115,7 @@ template<typename MatrixType,int RowFactor,int ColFactor> class Replicate
   */
 template<typename Derived>
 template<int RowFactor, int ColFactor>
-EIGEN_DEVICE_FUNC const Replicate<Derived,RowFactor,ColFactor>
+const Replicate<Derived,RowFactor,ColFactor>
 DenseBase<Derived>::replicate() const
 {
   return Replicate<Derived,RowFactor,ColFactor>(derived());
@@ -130,7 +130,7 @@ DenseBase<Derived>::replicate() const
   * \sa VectorwiseOp::replicate(), DenseBase::replicate(), class Replicate
   */
 template<typename ExpressionType, int Direction>
-EIGEN_DEVICE_FUNC const typename VectorwiseOp<ExpressionType,Direction>::ReplicateReturnType
+const typename VectorwiseOp<ExpressionType,Direction>::ReplicateReturnType
 VectorwiseOp<ExpressionType,Direction>::replicate(Index factor) const
 {
   return typename VectorwiseOp<ExpressionType,Direction>::ReplicateReturnType
diff --git a/eigen/Eigen/src/Core/ReturnByValue.h b/eigen/Eigen/src/Core/ReturnByValue.h
index 11dc86d..c44b767 100644
--- a/eigen/Eigen/src/Core/ReturnByValue.h
+++ b/eigen/Eigen/src/Core/ReturnByValue.h
@@ -79,7 +79,7 @@ template<typename Derived> class ReturnByValue
 
 template<typename Derived>
 template<typename OtherDerived>
-EIGEN_DEVICE_FUNC Derived& DenseBase<Derived>::operator=(const ReturnByValue<OtherDerived>& other)
+Derived& DenseBase<Derived>::operator=(const ReturnByValue<OtherDerived>& other)
 {
   other.evalTo(derived());
   return derived();
diff --git a/eigen/Eigen/src/Core/Reverse.h b/eigen/Eigen/src/Core/Reverse.h
index 8b6b3ab..0640cda 100644
--- a/eigen/Eigen/src/Core/Reverse.h
+++ b/eigen/Eigen/src/Core/Reverse.h
@@ -114,7 +114,7 @@ template<typename MatrixType, int Direction> class Reverse
   *
   */
 template<typename Derived>
-EIGEN_DEVICE_FUNC inline typename DenseBase<Derived>::ReverseReturnType
+inline typename DenseBase<Derived>::ReverseReturnType
 DenseBase<Derived>::reverse()
 {
   return ReverseReturnType(derived());
@@ -136,7 +136,7 @@ DenseBase<Derived>::reverse()
   *
   * \sa VectorwiseOp::reverseInPlace(), reverse() */
 template<typename Derived>
-EIGEN_DEVICE_FUNC inline void DenseBase<Derived>::reverseInPlace()
+inline void DenseBase<Derived>::reverseInPlace()
 {
   if(cols()>rows())
   {
@@ -201,7 +201,7 @@ struct vectorwise_reverse_inplace_impl<Horizontal>
   *
   * \sa DenseBase::reverseInPlace(), reverse() */
 template<typename ExpressionType, int Direction>
-EIGEN_DEVICE_FUNC void VectorwiseOp<ExpressionType,Direction>::reverseInPlace()
+void VectorwiseOp<ExpressionType,Direction>::reverseInPlace()
 {
   internal::vectorwise_reverse_inplace_impl<Direction>::run(_expression().const_cast_derived());
 }
diff --git a/eigen/Eigen/src/Core/SelfAdjointView.h b/eigen/Eigen/src/Core/SelfAdjointView.h
index 7e71fe3..504c98f 100644
--- a/eigen/Eigen/src/Core/SelfAdjointView.h
+++ b/eigen/Eigen/src/Core/SelfAdjointView.h
@@ -322,7 +322,7 @@ public:
 /** This is the const version of MatrixBase::selfadjointView() */
 template<typename Derived>
 template<unsigned int UpLo>
-EIGEN_DEVICE_FUNC typename MatrixBase<Derived>::template ConstSelfAdjointViewReturnType<UpLo>::Type
+typename MatrixBase<Derived>::template ConstSelfAdjointViewReturnType<UpLo>::Type
 MatrixBase<Derived>::selfadjointView() const
 {
   return typename ConstSelfAdjointViewReturnType<UpLo>::Type(derived());
@@ -339,7 +339,7 @@ MatrixBase<Derived>::selfadjointView() const
   */
 template<typename Derived>
 template<unsigned int UpLo>
-EIGEN_DEVICE_FUNC typename MatrixBase<Derived>::template SelfAdjointViewReturnType<UpLo>::Type
+typename MatrixBase<Derived>::template SelfAdjointViewReturnType<UpLo>::Type
 MatrixBase<Derived>::selfadjointView()
 {
   return typename SelfAdjointViewReturnType<UpLo>::Type(derived());
diff --git a/eigen/Eigen/src/Core/Solve.h b/eigen/Eigen/src/Core/Solve.h
index 960a585..a8daea5 100644
--- a/eigen/Eigen/src/Core/Solve.h
+++ b/eigen/Eigen/src/Core/Solve.h
@@ -34,12 +34,12 @@ template<typename Decomposition, typename RhsType,typename StorageKind> struct s
 template<typename Decomposition, typename RhsType>
 struct solve_traits<Decomposition,RhsType,Dense>
 {
-  typedef Matrix<typename RhsType::Scalar,
+  typedef typename make_proper_matrix_type<typename RhsType::Scalar,
                  Decomposition::ColsAtCompileTime,
                  RhsType::ColsAtCompileTime,
                  RhsType::PlainObject::Options,
                  Decomposition::MaxColsAtCompileTime,
-                 RhsType::MaxColsAtCompileTime> PlainObject;  
+                 RhsType::MaxColsAtCompileTime>::type PlainObject;
 };
 
 template<typename Decomposition, typename RhsType>
diff --git a/eigen/Eigen/src/Core/SolveTriangular.h b/eigen/Eigen/src/Core/SolveTriangular.h
index a0011d4..049890b 100644
--- a/eigen/Eigen/src/Core/SolveTriangular.h
+++ b/eigen/Eigen/src/Core/SolveTriangular.h
@@ -164,7 +164,7 @@ struct triangular_solver_selector<Lhs,Rhs,OnTheRight,Mode,CompleteUnrolling,1> {
 #ifndef EIGEN_PARSED_BY_DOXYGEN
 template<typename MatrixType, unsigned int Mode>
 template<int Side, typename OtherDerived>
-EIGEN_DEVICE_FUNC void TriangularViewImpl<MatrixType,Mode,Dense>::solveInPlace(const MatrixBase<OtherDerived>& _other) const
+void TriangularViewImpl<MatrixType,Mode,Dense>::solveInPlace(const MatrixBase<OtherDerived>& _other) const
 {
   OtherDerived& other = _other.const_cast_derived();
   eigen_assert( derived().cols() == derived().rows() && ((Side==OnTheLeft && derived().cols() == other.rows()) || (Side==OnTheRight && derived().cols() == other.cols())) );
diff --git a/eigen/Eigen/src/Core/Transpose.h b/eigen/Eigen/src/Core/Transpose.h
index ba7d6e6..79b767b 100644
--- a/eigen/Eigen/src/Core/Transpose.h
+++ b/eigen/Eigen/src/Core/Transpose.h
@@ -168,7 +168,7 @@ template<typename MatrixType> class TransposeImpl<MatrixType,Dense>
   *
   * \sa transposeInPlace(), adjoint() */
 template<typename Derived>
-EIGEN_DEVICE_FUNC inline Transpose<Derived>
+inline Transpose<Derived>
 DenseBase<Derived>::transpose()
 {
   return TransposeReturnType(derived());
@@ -180,7 +180,7 @@ DenseBase<Derived>::transpose()
   *
   * \sa transposeInPlace(), adjoint() */
 template<typename Derived>
-EIGEN_DEVICE_FUNC inline typename DenseBase<Derived>::ConstTransposeReturnType
+inline typename DenseBase<Derived>::ConstTransposeReturnType
 DenseBase<Derived>::transpose() const
 {
   return ConstTransposeReturnType(derived());
@@ -206,7 +206,7 @@ DenseBase<Derived>::transpose() const
   *
   * \sa adjointInPlace(), transpose(), conjugate(), class Transpose, class internal::scalar_conjugate_op */
 template<typename Derived>
-EIGEN_DEVICE_FUNC inline const typename MatrixBase<Derived>::AdjointReturnType
+inline const typename MatrixBase<Derived>::AdjointReturnType
 MatrixBase<Derived>::adjoint() const
 {
   return AdjointReturnType(this->transpose());
@@ -281,7 +281,7 @@ struct inplace_transpose_selector<MatrixType,false,MatchPacketSize> { // non squ
   *
   * \sa transpose(), adjoint(), adjointInPlace() */
 template<typename Derived>
-EIGEN_DEVICE_FUNC inline void DenseBase<Derived>::transposeInPlace()
+inline void DenseBase<Derived>::transposeInPlace()
 {
   eigen_assert((rows() == cols() || (RowsAtCompileTime == Dynamic && ColsAtCompileTime == Dynamic))
                && "transposeInPlace() called on a non-square non-resizable matrix");
@@ -312,7 +312,7 @@ EIGEN_DEVICE_FUNC inline void DenseBase<Derived>::transposeInPlace()
   *
   * \sa transpose(), adjoint(), transposeInPlace() */
 template<typename Derived>
-EIGEN_DEVICE_FUNC inline void MatrixBase<Derived>::adjointInPlace()
+inline void MatrixBase<Derived>::adjointInPlace()
 {
   derived() = adjoint().eval();
 }
diff --git a/eigen/Eigen/src/Core/TriangularMatrix.h b/eigen/Eigen/src/Core/TriangularMatrix.h
index ed80da3..667ef09 100644
--- a/eigen/Eigen/src/Core/TriangularMatrix.h
+++ b/eigen/Eigen/src/Core/TriangularMatrix.h
@@ -488,6 +488,7 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularViewImpl<_Mat
       * \sa TriangularView::solveInPlace()
       */
     template<int Side, typename Other>
+    EIGEN_DEVICE_FUNC
     inline const internal::triangular_solve_retval<Side,TriangularViewType, Other>
     solve(const MatrixBase<Other>& other) const;
 
@@ -553,7 +554,7 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularViewImpl<_Mat
 // FIXME should we keep that possibility
 template<typename MatrixType, unsigned int Mode>
 template<typename OtherDerived>
-EIGEN_DEVICE_FUNC inline TriangularView<MatrixType, Mode>&
+inline TriangularView<MatrixType, Mode>&
 TriangularViewImpl<MatrixType, Mode, Dense>::operator=(const MatrixBase<OtherDerived>& other)
 {
   internal::call_assignment_no_alias(derived(), other.derived(), internal::assign_op<Scalar,typename OtherDerived::Scalar>());
@@ -563,7 +564,7 @@ TriangularViewImpl<MatrixType, Mode, Dense>::operator=(const MatrixBase<OtherDer
 // FIXME should we keep that possibility
 template<typename MatrixType, unsigned int Mode>
 template<typename OtherDerived>
-EIGEN_DEVICE_FUNC void TriangularViewImpl<MatrixType, Mode, Dense>::lazyAssign(const MatrixBase<OtherDerived>& other)
+void TriangularViewImpl<MatrixType, Mode, Dense>::lazyAssign(const MatrixBase<OtherDerived>& other)
 {
   internal::call_assignment_no_alias(derived(), other.template triangularView<Mode>());
 }
@@ -572,7 +573,7 @@ EIGEN_DEVICE_FUNC void TriangularViewImpl<MatrixType, Mode, Dense>::lazyAssign(c
 
 template<typename MatrixType, unsigned int Mode>
 template<typename OtherDerived>
-EIGEN_DEVICE_FUNC inline TriangularView<MatrixType, Mode>&
+inline TriangularView<MatrixType, Mode>&
 TriangularViewImpl<MatrixType, Mode, Dense>::operator=(const TriangularBase<OtherDerived>& other)
 {
   eigen_assert(Mode == int(OtherDerived::Mode));
@@ -582,7 +583,7 @@ TriangularViewImpl<MatrixType, Mode, Dense>::operator=(const TriangularBase<Othe
 
 template<typename MatrixType, unsigned int Mode>
 template<typename OtherDerived>
-EIGEN_DEVICE_FUNC void TriangularViewImpl<MatrixType, Mode, Dense>::lazyAssign(const TriangularBase<OtherDerived>& other)
+void TriangularViewImpl<MatrixType, Mode, Dense>::lazyAssign(const TriangularBase<OtherDerived>& other)
 {
   eigen_assert(Mode == int(OtherDerived::Mode));
   internal::call_assignment_no_alias(derived(), other.derived());
@@ -597,7 +598,7 @@ EIGEN_DEVICE_FUNC void TriangularViewImpl<MatrixType, Mode, Dense>::lazyAssign(c
   * If the matrix is triangular, the opposite part is set to zero. */
 template<typename Derived>
 template<typename DenseDerived>
-EIGEN_DEVICE_FUNC void TriangularBase<Derived>::evalTo(MatrixBase<DenseDerived> &other) const
+void TriangularBase<Derived>::evalTo(MatrixBase<DenseDerived> &other) const
 {
   evalToLazy(other.derived());
 }
@@ -623,7 +624,6 @@ EIGEN_DEVICE_FUNC void TriangularBase<Derived>::evalTo(MatrixBase<DenseDerived>
   */
 template<typename Derived>
 template<unsigned int Mode>
-EIGEN_DEVICE_FUNC
 typename MatrixBase<Derived>::template TriangularViewReturnType<Mode>::Type
 MatrixBase<Derived>::triangularView()
 {
@@ -633,7 +633,6 @@ MatrixBase<Derived>::triangularView()
 /** This is the const version of MatrixBase::triangularView() */
 template<typename Derived>
 template<unsigned int Mode>
-EIGEN_DEVICE_FUNC
 typename MatrixBase<Derived>::template ConstTriangularViewReturnType<Mode>::Type
 MatrixBase<Derived>::triangularView() const
 {
@@ -931,7 +930,7 @@ struct triangular_assignment_loop<Kernel, Mode, Dynamic, SetOpposite>
   * If the matrix is triangular, the opposite part is set to zero. */
 template<typename Derived>
 template<typename DenseDerived>
-EIGEN_DEVICE_FUNC void TriangularBase<Derived>::evalToLazy(MatrixBase<DenseDerived> &other) const
+void TriangularBase<Derived>::evalToLazy(MatrixBase<DenseDerived> &other) const
 {
   other.derived().resize(this->rows(), this->cols());
   internal::call_triangular_assignment_loop<Derived::Mode,(Derived::Mode&SelfAdjoint)==0 /* SetOpposite */>(other.derived(), derived().nestedExpression());
diff --git a/eigen/Eigen/src/Core/VectorwiseOp.h b/eigen/Eigen/src/Core/VectorwiseOp.h
index 893bc79..4fe267e 100644
--- a/eigen/Eigen/src/Core/VectorwiseOp.h
+++ b/eigen/Eigen/src/Core/VectorwiseOp.h
@@ -670,7 +670,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
   * \sa rowwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting
   */
 template<typename Derived>
-EIGEN_DEVICE_FUNC inline typename DenseBase<Derived>::ColwiseReturnType
+inline typename DenseBase<Derived>::ColwiseReturnType
 DenseBase<Derived>::colwise()
 {
   return ColwiseReturnType(derived());
@@ -684,7 +684,7 @@ DenseBase<Derived>::colwise()
   * \sa colwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting
   */
 template<typename Derived>
-EIGEN_DEVICE_FUNC inline typename DenseBase<Derived>::RowwiseReturnType
+inline typename DenseBase<Derived>::RowwiseReturnType
 DenseBase<Derived>::rowwise()
 {
   return RowwiseReturnType(derived());
diff --git a/eigen/Eigen/src/Core/arch/AVX/PacketMath.h b/eigen/Eigen/src/Core/arch/AVX/PacketMath.h
index 6362309..195d40f 100644
--- a/eigen/Eigen/src/Core/arch/AVX/PacketMath.h
+++ b/eigen/Eigen/src/Core/arch/AVX/PacketMath.h
@@ -183,22 +183,12 @@ template<> EIGEN_STRONG_INLINE Packet4d pmadd(const Packet4d& a, const Packet4d&
 }
 #endif
 
-template<> EIGEN_STRONG_INLINE Packet8f pmin<Packet8f>(const Packet8f& a, const Packet8f& b) {
-  // Arguments are swapped to match NaN propagation behavior of std::min.
-  return _mm256_min_ps(b,a);
-}
-template<> EIGEN_STRONG_INLINE Packet4d pmin<Packet4d>(const Packet4d& a, const Packet4d& b) {
-  // Arguments are swapped to match NaN propagation behavior of std::min.
-  return _mm256_min_pd(b,a);
-}
-template<> EIGEN_STRONG_INLINE Packet8f pmax<Packet8f>(const Packet8f& a, const Packet8f& b) {
-  // Arguments are swapped to match NaN propagation behavior of std::max.
-  return _mm256_max_ps(b,a);
-}
-template<> EIGEN_STRONG_INLINE Packet4d pmax<Packet4d>(const Packet4d& a, const Packet4d& b) {
-  // Arguments are swapped to match NaN propagation behavior of std::max.
-  return _mm256_max_pd(b,a);
-}
+template<> EIGEN_STRONG_INLINE Packet8f pmin<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_min_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4d pmin<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_min_pd(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet8f pmax<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_max_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4d pmax<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_max_pd(a,b); }
+
 template<> EIGEN_STRONG_INLINE Packet8f pround<Packet8f>(const Packet8f& a) { return _mm256_round_ps(a, _MM_FROUND_CUR_DIRECTION); }
 template<> EIGEN_STRONG_INLINE Packet4d pround<Packet4d>(const Packet4d& a) { return _mm256_round_pd(a, _MM_FROUND_CUR_DIRECTION); }
 
@@ -235,7 +225,7 @@ template<> EIGEN_STRONG_INLINE Packet8f ploaddup<Packet8f>(const float* from)
 //   Packet8f tmp  = _mm256_castps128_ps256(_mm_loadu_ps(from));
 //   tmp = _mm256_insertf128_ps(tmp, _mm_movehl_ps(_mm256_castps256_ps128(tmp),_mm256_castps256_ps128(tmp)), 1);
 //   return _mm256_unpacklo_ps(tmp,tmp);
-
+  
   // _mm256_insertf128_ps is very slow on Haswell, thus:
   Packet8f tmp = _mm256_broadcast_ps((const __m128*)(const void*)from);
   // mimic an "inplace" permutation of the lower 128bits using a blend
diff --git a/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h b/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h
index 12b8975..f6500a1 100644
--- a/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h
+++ b/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h
@@ -59,8 +59,8 @@ template<> struct packet_traits<float>  : default_packet_traits
     HasLog = 1,
 #endif
     HasExp = 1,
-    HasSqrt = EIGEN_FAST_MATH,
-    HasRsqrt = EIGEN_FAST_MATH,
+    HasSqrt = 1,
+    HasRsqrt = 1,
 #endif
     HasDiv = 1
   };
@@ -75,7 +75,7 @@ template<> struct packet_traits<double> : default_packet_traits
     size = 8,
     HasHalfPacket = 1,
 #if EIGEN_GNUC_AT_LEAST(5, 3)
-    HasSqrt = EIGEN_FAST_MATH,
+    HasSqrt = 1,
     HasRsqrt = EIGEN_FAST_MATH,
 #endif
     HasDiv = 1
@@ -230,27 +230,23 @@ EIGEN_STRONG_INLINE Packet8d pmadd(const Packet8d& a, const Packet8d& b,
 template <>
 EIGEN_STRONG_INLINE Packet16f pmin<Packet16f>(const Packet16f& a,
                                               const Packet16f& b) {
-  // Arguments are reversed to match NaN propagation behavior of std::min.
-  return _mm512_min_ps(b, a);
+  return _mm512_min_ps(a, b);
 }
 template <>
 EIGEN_STRONG_INLINE Packet8d pmin<Packet8d>(const Packet8d& a,
                                             const Packet8d& b) {
-  // Arguments are reversed to match NaN propagation behavior of std::min.
-  return _mm512_min_pd(b, a);
+  return _mm512_min_pd(a, b);
 }
 
 template <>
 EIGEN_STRONG_INLINE Packet16f pmax<Packet16f>(const Packet16f& a,
                                               const Packet16f& b) {
-  // Arguments are reversed to match NaN propagation behavior of std::max.
-  return _mm512_max_ps(b, a);
+  return _mm512_max_ps(a, b);
 }
 template <>
 EIGEN_STRONG_INLINE Packet8d pmax<Packet8d>(const Packet8d& a,
                                             const Packet8d& b) {
-  // Arguments are reversed to match NaN propagation behavior of std::max.
-  return _mm512_max_pd(b, a);
+  return _mm512_max_pd(a, b);
 }
 
 template <>
@@ -465,21 +461,53 @@ EIGEN_STRONG_INLINE Packet16i ploadu<Packet16i>(const int* from) {
 // {a0, a0  a1, a1, a2, a2, a3, a3, a4, a4, a5, a5, a6, a6, a7, a7}
 template <>
 EIGEN_STRONG_INLINE Packet16f ploaddup<Packet16f>(const float* from) {
-  __m256i low_half = _mm256_load_si256(reinterpret_cast<const __m256i*>(from));
-  __m512 even_elements = _mm512_castsi512_ps(_mm512_cvtepu32_epi64(low_half));
-  __m512 pairs = _mm512_permute_ps(even_elements, _MM_SHUFFLE(2, 2, 0, 0));
-  return pairs;
+  Packet8f lane0 = _mm256_broadcast_ps((const __m128*)(const void*)from);
+  // mimic an "inplace" permutation of the lower 128bits using a blend
+  lane0 = _mm256_blend_ps(
+      lane0, _mm256_castps128_ps256(_mm_permute_ps(
+                 _mm256_castps256_ps128(lane0), _MM_SHUFFLE(1, 0, 1, 0))),
+      15);
+  // then we can perform a consistent permutation on the global register to get
+  // everything in shape:
+  lane0 = _mm256_permute_ps(lane0, _MM_SHUFFLE(3, 3, 2, 2));
+
+  Packet8f lane1 = _mm256_broadcast_ps((const __m128*)(const void*)(from + 4));
+  // mimic an "inplace" permutation of the lower 128bits using a blend
+  lane1 = _mm256_blend_ps(
+      lane1, _mm256_castps128_ps256(_mm_permute_ps(
+                 _mm256_castps256_ps128(lane1), _MM_SHUFFLE(1, 0, 1, 0))),
+      15);
+  // then we can perform a consistent permutation on the global register to get
+  // everything in shape:
+  lane1 = _mm256_permute_ps(lane1, _MM_SHUFFLE(3, 3, 2, 2));
+
+#ifdef EIGEN_VECTORIZE_AVX512DQ
+  Packet16f res = _mm512_undefined_ps();
+  return _mm512_insertf32x8(res, lane0, 0);
+  return _mm512_insertf32x8(res, lane1, 1);
+  return res;
+#else
+  Packet16f res = _mm512_undefined_ps();
+  res = _mm512_insertf32x4(res, _mm256_extractf128_ps(lane0, 0), 0);
+  res = _mm512_insertf32x4(res, _mm256_extractf128_ps(lane0, 1), 1);
+  res = _mm512_insertf32x4(res, _mm256_extractf128_ps(lane1, 0), 2);
+  res = _mm512_insertf32x4(res, _mm256_extractf128_ps(lane1, 1), 3);
+  return res;
+#endif
 }
 // Loads 4 doubles from memory a returns the packet {a0, a0  a1, a1, a2, a2, a3,
 // a3}
 template <>
 EIGEN_STRONG_INLINE Packet8d ploaddup<Packet8d>(const double* from) {
- __m512d x = _mm512_setzero_pd();
-  x = _mm512_insertf64x2(x, _mm_loaddup_pd(&from[0]), 0);
-  x = _mm512_insertf64x2(x, _mm_loaddup_pd(&from[1]), 1);
-  x = _mm512_insertf64x2(x, _mm_loaddup_pd(&from[2]), 2);
-  x = _mm512_insertf64x2(x, _mm_loaddup_pd(&from[3]), 3);
-  return x;
+  Packet4d lane0 = _mm256_broadcast_pd((const __m128d*)(const void*)from);
+  lane0 = _mm256_permute_pd(lane0, 3 << 2);
+
+  Packet4d lane1 = _mm256_broadcast_pd((const __m128d*)(const void*)(from + 2));
+  lane1 = _mm256_permute_pd(lane1, 3 << 2);
+
+  Packet8d res = _mm512_undefined_pd();
+  res = _mm512_insertf64x4(res, lane0, 0);
+  return _mm512_insertf64x4(res, lane1, 1);
 }
 
 // Loads 4 floats from memory a returns the packet
@@ -497,11 +525,11 @@ EIGEN_STRONG_INLINE Packet16f ploadquad<Packet16f>(const float* from) {
 // {a0, a0  a0, a0, a1, a1, a1, a1}
 template <>
 EIGEN_STRONG_INLINE Packet8d ploadquad<Packet8d>(const double* from) {
-  __m128d tmp0 = _mm_load_pd1(from);
-  __m256d lane0 = _mm256_broadcastsd_pd(tmp0);
-  __m128d tmp1 = _mm_load_pd1(from + 1);
-  __m256d lane1 = _mm256_broadcastsd_pd(tmp1);
-  __m512d tmp = _mm512_undefined_pd();
+  Packet8d tmp = _mm512_undefined_pd();
+  Packet2d tmp0 = _mm_load_pd1(from);
+  Packet2d tmp1 = _mm_load_pd1(from + 1);
+  Packet4d lane0 = _mm256_broadcastsd_pd(tmp0);
+  Packet4d lane1 = _mm256_broadcastsd_pd(tmp1);
   tmp = _mm512_insertf64x4(tmp, lane0, 0);
   return _mm512_insertf64x4(tmp, lane1, 1);
 }
@@ -632,8 +660,8 @@ EIGEN_STRONG_INLINE Packet8d pabs(const Packet8d& a) {
 #ifdef EIGEN_VECTORIZE_AVX512DQ
 // AVX512F does not define _mm512_extractf32x8_ps to extract _m256 from _m512
 #define EIGEN_EXTRACT_8f_FROM_16f(INPUT, OUTPUT)                           \
-  __m256 OUTPUT##_0 = _mm512_extractf32x8_ps(INPUT, 0);                    \
-  __m256 OUTPUT##_1 = _mm512_extractf32x8_ps(INPUT, 1)
+  __m256 OUTPUT##_0 = _mm512_extractf32x8_ps(INPUT, 0) __m256 OUTPUT##_1 = \
+      _mm512_extractf32x8_ps(INPUT, 1)
 #else
 #define EIGEN_EXTRACT_8f_FROM_16f(INPUT, OUTPUT)                \
   __m256 OUTPUT##_0 = _mm256_insertf128_ps(                     \
@@ -723,7 +751,7 @@ vecs)
   blend1 = _mm256_blend_ps(sum1, sum2, 0xcc);
   blend2 = _mm256_blend_ps(sum3, sum4, 0xcc);
 
-  final = _mm256_add_ps(final, _mm256_blend_ps(blend1, blend2, 0xf0));
+  final = padd(final, _mm256_blend_ps(blend1, blend2, 0xf0));
 
   hsum1 = _mm256_hadd_ps(vecs8_0, vecs9_0);
   hsum2 = _mm256_hadd_ps(vecs10_0, vecs11_0);
@@ -773,7 +801,7 @@ vecs)
   blend1 = _mm256_blend_ps(sum1, sum2, 0xcc);
   blend2 = _mm256_blend_ps(sum3, sum4, 0xcc);
 
-  final_1 = _mm256_add_ps(final_1, _mm256_blend_ps(blend1, blend2, 0xf0));
+  final_1 = padd(final_1, _mm256_blend_ps(blend1, blend2, 0xf0));
 
   __m512 final_output;
 
@@ -823,7 +851,7 @@ template<> EIGEN_STRONG_INLINE Packet8d preduxp<Packet8d>(const Packet8d* vecs)
   tmp1 = _mm256_hadd_pd(vecs2_1, vecs3_1);
   tmp1 = _mm256_add_pd(tmp1, _mm256_permute2f128_pd(tmp1, tmp1, 1));
 
-  final_0 = _mm256_add_pd(final_0, _mm256_blend_pd(tmp0, tmp1, 0xC));
+  final_0 = padd(final_0, _mm256_blend_pd(tmp0, tmp1, 0xC));
 
   tmp0 = _mm256_hadd_pd(vecs4_0, vecs5_0);
   tmp0 = _mm256_add_pd(tmp0, _mm256_permute2f128_pd(tmp0, tmp0, 1));
@@ -839,7 +867,7 @@ template<> EIGEN_STRONG_INLINE Packet8d preduxp<Packet8d>(const Packet8d* vecs)
   tmp1 = _mm256_hadd_pd(vecs6_1, vecs7_1);
   tmp1 = _mm256_add_pd(tmp1, _mm256_permute2f128_pd(tmp1, tmp1, 1));
 
-  final_1 = _mm256_add_pd(final_1, _mm256_blend_pd(tmp0, tmp1, 0xC));
+  final_1 = padd(final_1, _mm256_blend_pd(tmp0, tmp1, 0xC));
 
   __m512d final_output = _mm512_insertf64x4(final_output, final_0, 0);
 
@@ -848,52 +876,55 @@ template<> EIGEN_STRONG_INLINE Packet8d preduxp<Packet8d>(const Packet8d* vecs)
 
 template <>
 EIGEN_STRONG_INLINE float predux<Packet16f>(const Packet16f& a) {
-#ifdef EIGEN_VECTORIZE_AVX512DQ
-  __m256 lane0 = _mm512_extractf32x8_ps(a, 0);
-  __m256 lane1 = _mm512_extractf32x8_ps(a, 1);
-  Packet8f x = _mm256_add_ps(lane0, lane1);
-  return predux<Packet8f>(x);
+  //#ifdef EIGEN_VECTORIZE_AVX512DQ
+#if 0
+  Packet8f lane0 = _mm512_extractf32x8_ps(a, 0);
+  Packet8f lane1 = _mm512_extractf32x8_ps(a, 1);
+  Packet8f sum = padd(lane0, lane1);
+  Packet8f tmp0 = _mm256_hadd_ps(sum, _mm256_permute2f128_ps(a, a, 1));
+  tmp0 = _mm256_hadd_ps(tmp0, tmp0);
+  return pfirst(_mm256_hadd_ps(tmp0, tmp0));
 #else
-  __m128 lane0 = _mm512_extractf32x4_ps(a, 0);
-  __m128 lane1 = _mm512_extractf32x4_ps(a, 1);
-  __m128 lane2 = _mm512_extractf32x4_ps(a, 2);
-  __m128 lane3 = _mm512_extractf32x4_ps(a, 3);
-  __m128 sum = _mm_add_ps(_mm_add_ps(lane0, lane1), _mm_add_ps(lane2, lane3));
+  Packet4f lane0 = _mm512_extractf32x4_ps(a, 0);
+  Packet4f lane1 = _mm512_extractf32x4_ps(a, 1);
+  Packet4f lane2 = _mm512_extractf32x4_ps(a, 2);
+  Packet4f lane3 = _mm512_extractf32x4_ps(a, 3);
+  Packet4f sum = padd(padd(lane0, lane1), padd(lane2, lane3));
   sum = _mm_hadd_ps(sum, sum);
   sum = _mm_hadd_ps(sum, _mm_permute_ps(sum, 1));
-  return _mm_cvtss_f32(sum);
+  return pfirst(sum);
 #endif
 }
 template <>
 EIGEN_STRONG_INLINE double predux<Packet8d>(const Packet8d& a) {
-  __m256d lane0 = _mm512_extractf64x4_pd(a, 0);
-  __m256d lane1 = _mm512_extractf64x4_pd(a, 1);
-  __m256d sum = _mm256_add_pd(lane0, lane1);
-  __m256d tmp0 = _mm256_hadd_pd(sum, _mm256_permute2f128_pd(sum, sum, 1));
-  return _mm_cvtsd_f64(_mm256_castpd256_pd128(_mm256_hadd_pd(tmp0, tmp0)));
+  Packet4d lane0 = _mm512_extractf64x4_pd(a, 0);
+  Packet4d lane1 = _mm512_extractf64x4_pd(a, 1);
+  Packet4d sum = padd(lane0, lane1);
+  Packet4d tmp0 = _mm256_hadd_pd(sum, _mm256_permute2f128_pd(sum, sum, 1));
+  return pfirst(_mm256_hadd_pd(tmp0, tmp0));
 }
 
 template <>
 EIGEN_STRONG_INLINE Packet8f predux_downto4<Packet16f>(const Packet16f& a) {
 #ifdef EIGEN_VECTORIZE_AVX512DQ
-  __m256 lane0 = _mm512_extractf32x8_ps(a, 0);
-  __m256 lane1 = _mm512_extractf32x8_ps(a, 1);
-  return _mm256_add_ps(lane0, lane1);
+  Packet8f lane0 = _mm512_extractf32x8_ps(a, 0);
+  Packet8f lane1 = _mm512_extractf32x8_ps(a, 1);
+  return padd(lane0, lane1);
 #else
-  __m128 lane0 = _mm512_extractf32x4_ps(a, 0);
-  __m128 lane1 = _mm512_extractf32x4_ps(a, 1);
-  __m128 lane2 = _mm512_extractf32x4_ps(a, 2);
-  __m128 lane3 = _mm512_extractf32x4_ps(a, 3);
-  __m128 sum0 = _mm_add_ps(lane0, lane2);
-  __m128 sum1 = _mm_add_ps(lane1, lane3);
+  Packet4f lane0 = _mm512_extractf32x4_ps(a, 0);
+  Packet4f lane1 = _mm512_extractf32x4_ps(a, 1);
+  Packet4f lane2 = _mm512_extractf32x4_ps(a, 2);
+  Packet4f lane3 = _mm512_extractf32x4_ps(a, 3);
+  Packet4f sum0 = padd(lane0, lane2);
+  Packet4f sum1 = padd(lane1, lane3);
   return _mm256_insertf128_ps(_mm256_castps128_ps256(sum0), sum1, 1);
 #endif
 }
 template <>
 EIGEN_STRONG_INLINE Packet4d predux_downto4<Packet8d>(const Packet8d& a) {
-  __m256d lane0 = _mm512_extractf64x4_pd(a, 0);
-  __m256d lane1 = _mm512_extractf64x4_pd(a, 1);
-  __m256d res = _mm256_add_pd(lane0, lane1);
+  Packet4d lane0 = _mm512_extractf64x4_pd(a, 0);
+  Packet4d lane1 = _mm512_extractf64x4_pd(a, 1);
+  Packet4d res = padd(lane0, lane1);
   return res;
 }
 
@@ -908,59 +939,58 @@ EIGEN_STRONG_INLINE float predux_mul<Packet16f>(const Packet16f& a) {
   res = pmul(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 3, 2)));
   return pfirst(pmul(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 0, 1))));
 #else
-  __m128 lane0 = _mm512_extractf32x4_ps(a, 0);
-  __m128 lane1 = _mm512_extractf32x4_ps(a, 1);
-  __m128 lane2 = _mm512_extractf32x4_ps(a, 2);
-  __m128 lane3 = _mm512_extractf32x4_ps(a, 3);
-  __m128 res = pmul(pmul(lane0, lane1), pmul(lane2, lane3));
+  Packet4f lane0 = _mm512_extractf32x4_ps(a, 0);
+  Packet4f lane1 = _mm512_extractf32x4_ps(a, 1);
+  Packet4f lane2 = _mm512_extractf32x4_ps(a, 2);
+  Packet4f lane3 = _mm512_extractf32x4_ps(a, 3);
+  Packet4f res = pmul(pmul(lane0, lane1), pmul(lane2, lane3));
   res = pmul(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 3, 2)));
   return pfirst(pmul(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 0, 1))));
 #endif
 }
 template <>
 EIGEN_STRONG_INLINE double predux_mul<Packet8d>(const Packet8d& a) {
-  __m256d lane0 = _mm512_extractf64x4_pd(a, 0);
-  __m256d lane1 = _mm512_extractf64x4_pd(a, 1);
-  __m256d res = pmul(lane0, lane1);
+  Packet4d lane0 = _mm512_extractf64x4_pd(a, 0);
+  Packet4d lane1 = _mm512_extractf64x4_pd(a, 1);
+  Packet4d res = pmul(lane0, lane1);
   res = pmul(res, _mm256_permute2f128_pd(res, res, 1));
   return pfirst(pmul(res, _mm256_shuffle_pd(res, res, 1)));
 }
 
 template <>
 EIGEN_STRONG_INLINE float predux_min<Packet16f>(const Packet16f& a) {
-  __m128 lane0 = _mm512_extractf32x4_ps(a, 0);
-  __m128 lane1 = _mm512_extractf32x4_ps(a, 1);
-  __m128 lane2 = _mm512_extractf32x4_ps(a, 2);
-  __m128 lane3 = _mm512_extractf32x4_ps(a, 3);
-  __m128 res = _mm_min_ps(_mm_min_ps(lane0, lane1), _mm_min_ps(lane2, lane3));
+  Packet4f lane0 = _mm512_extractf32x4_ps(a, 0);
+  Packet4f lane1 = _mm512_extractf32x4_ps(a, 1);
+  Packet4f lane2 = _mm512_extractf32x4_ps(a, 2);
+  Packet4f lane3 = _mm512_extractf32x4_ps(a, 3);
+  Packet4f res = _mm_min_ps(_mm_min_ps(lane0, lane1), _mm_min_ps(lane2, lane3));
   res = _mm_min_ps(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 3, 2)));
   return pfirst(_mm_min_ps(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 0, 1))));
 }
 template <>
 EIGEN_STRONG_INLINE double predux_min<Packet8d>(const Packet8d& a) {
-  __m256d lane0 = _mm512_extractf64x4_pd(a, 0);
-  __m256d lane1 = _mm512_extractf64x4_pd(a, 1);
-  __m256d res = _mm256_min_pd(lane0, lane1);
+  Packet4d lane0 = _mm512_extractf64x4_pd(a, 0);
+  Packet4d lane1 = _mm512_extractf64x4_pd(a, 1);
+  Packet4d res = _mm256_min_pd(lane0, lane1);
   res = _mm256_min_pd(res, _mm256_permute2f128_pd(res, res, 1));
   return pfirst(_mm256_min_pd(res, _mm256_shuffle_pd(res, res, 1)));
 }
 
 template <>
 EIGEN_STRONG_INLINE float predux_max<Packet16f>(const Packet16f& a) {
-  __m128 lane0 = _mm512_extractf32x4_ps(a, 0);
-  __m128 lane1 = _mm512_extractf32x4_ps(a, 1);
-  __m128 lane2 = _mm512_extractf32x4_ps(a, 2);
-  __m128 lane3 = _mm512_extractf32x4_ps(a, 3);
-  __m128 res = _mm_max_ps(_mm_max_ps(lane0, lane1), _mm_max_ps(lane2, lane3));
+  Packet4f lane0 = _mm512_extractf32x4_ps(a, 0);
+  Packet4f lane1 = _mm512_extractf32x4_ps(a, 1);
+  Packet4f lane2 = _mm512_extractf32x4_ps(a, 2);
+  Packet4f lane3 = _mm512_extractf32x4_ps(a, 3);
+  Packet4f res = _mm_max_ps(_mm_max_ps(lane0, lane1), _mm_max_ps(lane2, lane3));
   res = _mm_max_ps(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 3, 2)));
   return pfirst(_mm_max_ps(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 0, 1))));
 }
-
 template <>
 EIGEN_STRONG_INLINE double predux_max<Packet8d>(const Packet8d& a) {
-  __m256d lane0 = _mm512_extractf64x4_pd(a, 0);
-  __m256d lane1 = _mm512_extractf64x4_pd(a, 1);
-  __m256d res = _mm256_max_pd(lane0, lane1);
+  Packet4d lane0 = _mm512_extractf64x4_pd(a, 0);
+  Packet4d lane1 = _mm512_extractf64x4_pd(a, 1);
+  Packet4d res = _mm256_max_pd(lane0, lane1);
   res = _mm256_max_pd(res, _mm256_permute2f128_pd(res, res, 1));
   return pfirst(_mm256_max_pd(res, _mm256_shuffle_pd(res, res, 1)));
 }
diff --git a/eigen/Eigen/src/Core/arch/CUDA/Half.h b/eigen/Eigen/src/Core/arch/CUDA/Half.h
index 67518da..294c517 100644
--- a/eigen/Eigen/src/Core/arch/CUDA/Half.h
+++ b/eigen/Eigen/src/Core/arch/CUDA/Half.h
@@ -53,7 +53,7 @@ namespace half_impl {
 
 // Make our own __half definition that is similar to CUDA's.
 struct __half {
-  EIGEN_DEVICE_FUNC __half() : x(0) {}
+  EIGEN_DEVICE_FUNC __half() {}
   explicit EIGEN_DEVICE_FUNC __half(unsigned short raw) : x(raw) {}
   unsigned short x;
 };
@@ -386,18 +386,11 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half abs(const half& a) {
   return result;
 }
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half exp(const half& a) {
-#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 530
-  return half(hexp(a));
-#else
-   return half(::expf(float(a)));
-#endif
-}
-EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half expm1(const half& a) {
-  return half(numext::expm1(float(a)));
+  return half(::expf(float(a)));
 }
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log(const half& a) {
 #if defined(EIGEN_HAS_CUDA_FP16) && defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
-  return half(::hlog(a));
+  return Eigen::half(::hlog(a));
 #else
   return half(::logf(float(a)));
 #endif
@@ -409,11 +402,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log10(const half& a) {
   return half(::log10f(float(a)));
 }
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half sqrt(const half& a) {
-#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 530
-  return half(hsqrt(a));
-#else
-    return half(::sqrtf(float(a)));
-#endif
+  return half(::sqrtf(float(a)));
 }
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half pow(const half& a, const half& b) {
   return half(::powf(float(a), float(b)));
@@ -431,18 +420,10 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half tanh(const half& a) {
   return half(::tanhf(float(a)));
 }
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half floor(const half& a) {
-#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
-  return half(hfloor(a));
-#else
   return half(::floorf(float(a)));
-#endif
 }
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half ceil(const half& a) {
-#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
-  return half(hceil(a));
-#else
   return half(::ceilf(float(a)));
-#endif
 }
 
 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half (min)(const half& a, const half& b) {
@@ -493,9 +474,59 @@ template<> struct is_arithmetic<half> { enum { value = true }; };
 
 } // end namespace internal
 
+}  // end namespace Eigen
+
+namespace std {
+template<>
+struct numeric_limits<Eigen::half> {
+  static const bool is_specialized = true;
+  static const bool is_signed = true;
+  static const bool is_integer = false;
+  static const bool is_exact = false;
+  static const bool has_infinity = true;
+  static const bool has_quiet_NaN = true;
+  static const bool has_signaling_NaN = true;
+  static const float_denorm_style has_denorm = denorm_present;
+  static const bool has_denorm_loss = false;
+  static const std::float_round_style round_style = std::round_to_nearest;
+  static const bool is_iec559 = false;
+  static const bool is_bounded = false;
+  static const bool is_modulo = false;
+  static const int digits = 11;
+  static const int digits10 = 2;
+  //static const int max_digits10 = ;
+  static const int radix = 2;
+  static const int min_exponent = -13;
+  static const int min_exponent10 = -4;
+  static const int max_exponent = 16;
+  static const int max_exponent10 = 4;
+  static const bool traps = true;
+  static const bool tinyness_before = false;
+
+  static Eigen::half (min)() { return Eigen::half_impl::raw_uint16_to_half(0x400); }
+  static Eigen::half lowest() { return Eigen::half_impl::raw_uint16_to_half(0xfbff); }
+  static Eigen::half (max)() { return Eigen::half_impl::raw_uint16_to_half(0x7bff); }
+  static Eigen::half epsilon() { return Eigen::half_impl::raw_uint16_to_half(0x0800); }
+  static Eigen::half round_error() { return Eigen::half(0.5); }
+  static Eigen::half infinity() { return Eigen::half_impl::raw_uint16_to_half(0x7c00); }
+  static Eigen::half quiet_NaN() { return Eigen::half_impl::raw_uint16_to_half(0x7e00); }
+  static Eigen::half signaling_NaN() { return Eigen::half_impl::raw_uint16_to_half(0x7e00); }
+  static Eigen::half denorm_min() { return Eigen::half_impl::raw_uint16_to_half(0x1); }
+};
+}
+
+namespace Eigen {
+
 template<> struct NumTraits<Eigen::half>
     : GenericNumTraits<Eigen::half>
 {
+  enum {
+    IsSigned = true,
+    IsInteger = false,
+    IsComplex = false,
+    RequireInitialization = false
+  };
+
   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half epsilon() {
     return half_impl::raw_uint16_to_half(0x0800);
   }
diff --git a/eigen/Eigen/src/Core/arch/CUDA/MathFunctions.h b/eigen/Eigen/src/Core/arch/CUDA/MathFunctions.h
index 987a529..0348b41 100644
--- a/eigen/Eigen/src/Core/arch/CUDA/MathFunctions.h
+++ b/eigen/Eigen/src/Core/arch/CUDA/MathFunctions.h
@@ -57,18 +57,6 @@ double2 pexp<double2>(const double2& a)
 }
 
 template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-float4 pexpm1<float4>(const float4& a)
-{
-  return make_float4(expm1f(a.x), expm1f(a.y), expm1f(a.z), expm1f(a.w));
-}
-
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-double2 pexpm1<double2>(const double2& a)
-{
-  return make_double2(expm1(a.x), expm1(a.y));
-}
-
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
 float4 psqrt<float4>(const float4& a)
 {
   return make_float4(sqrtf(a.x), sqrtf(a.y), sqrtf(a.z), sqrtf(a.w));
diff --git a/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h b/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h
index 8c46af0..4dda631 100644
--- a/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h
+++ b/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h
@@ -167,10 +167,10 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 ploadu<double2>(const d
   return make_double2(from[0], from[1]);
 }
 
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 ploaddup<float4>(const float*   from) {
+template<> EIGEN_STRONG_INLINE float4 ploaddup<float4>(const float*   from) {
   return make_float4(from[0], from[0], from[1], from[1]);
 }
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 ploaddup<double2>(const double*  from) {
+template<> EIGEN_STRONG_INLINE double2 ploaddup<double2>(const double*  from) {
   return make_double2(from[0], from[0]);
 }
 
diff --git a/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h b/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h
index b9a125b..ae54225 100644
--- a/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h
+++ b/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h
@@ -34,7 +34,6 @@ template<> struct packet_traits<Eigen::half> : default_packet_traits
     HasSqrt   = 1,
     HasRsqrt  = 1,
     HasExp    = 1,
-    HasExpm1  = 1,
     HasLog    = 1,
     HasLog1p  = 1
   };
@@ -276,14 +275,6 @@ template<> __device__ EIGEN_STRONG_INLINE half2 plog1p<half2>(const half2& a) {
   return __floats2half2_rn(r1, r2);
 }
 
-template<> __device__ EIGEN_STRONG_INLINE half2 pexpm1<half2>(const half2& a) {
-  float a1 = __low2float(a);
-  float a2 = __high2float(a);
-  float r1 = expm1f(a1);
-  float r2 = expm1f(a2);
-  return __floats2half2_rn(r1, r2);
-}
-
 #if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 530
 
 template<>  __device__ EIGEN_STRONG_INLINE
diff --git a/eigen/Eigen/src/Core/arch/NEON/PacketMath.h b/eigen/Eigen/src/Core/arch/NEON/PacketMath.h
index aede4a6..836fbc0 100644
--- a/eigen/Eigen/src/Core/arch/NEON/PacketMath.h
+++ b/eigen/Eigen/src/Core/arch/NEON/PacketMath.h
@@ -116,7 +116,7 @@ template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int32_t&    from)
 
 template<> EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a)
 {
-  const float32_t f[] = {0, 1, 2, 3};
+  const float f[] = {0, 1, 2, 3};
   Packet4f countdown = vld1q_f32(f);
   return vaddq_f32(pset1<Packet4f>(a), countdown);
 }
diff --git a/eigen/Eigen/src/Core/arch/SSE/PacketMath.h b/eigen/Eigen/src/Core/arch/SSE/PacketMath.h
index 03c8a2c..3832de1 100644
--- a/eigen/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/eigen/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -45,7 +45,7 @@ struct eigen_packet_wrapper
     m_val = v;
     return *this;
   }
-
+  
   T m_val;
 };
 typedef eigen_packet_wrapper<__m128>  Packet4f;
@@ -69,7 +69,7 @@ template<> struct is_arithmetic<__m128d> { enum { value = true }; };
 
 #define vec2d_swizzle1(v,p,q) \
   (_mm_castsi128_pd(_mm_shuffle_epi32( _mm_castpd_si128(v), ((q*2+1)<<6|(q*2)<<4|(p*2+1)<<2|(p*2)))))
-
+  
 #define vec4f_swizzle2(a,b,p,q,r,s) \
   (_mm_shuffle_ps( (a), (b), ((s)<<6|(r)<<4|(q)<<2|(p))))
 
@@ -190,7 +190,7 @@ template<> EIGEN_STRONG_INLINE Packet4f pload1<Packet4f>(const float *from) {
   return vec4f_swizzle1(_mm_load_ss(from),0,0,0,0);
 }
 #endif
-
+  
 template<> EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a) { return _mm_add_ps(pset1<Packet4f>(a), _mm_set_ps(3,2,1,0)); }
 template<> EIGEN_STRONG_INLINE Packet2d plset<Packet2d>(const double& a) { return _mm_add_pd(pset1<Packet2d>(a),_mm_set_pd(1,0)); }
 template<> EIGEN_STRONG_INLINE Packet4i plset<Packet4i>(const int& a) { return _mm_add_epi32(pset1<Packet4i>(a),_mm_set_epi32(3,2,1,0)); }
@@ -250,34 +250,8 @@ template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f&
 template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return _mm_fmadd_pd(a,b,c); }
 #endif
 
-template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) {
-#if EIGEN_COMP_GNUC
-  // There appears to be a bug in GCC, by which the optimizer may
-  // flip the argument order in calls to _mm_min_ps, so we have to
-  // resort to inline ASM here. This is supposed to be fixed in gcc6.3,
-  // see also: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=72867
-  Packet4f res = b;
-  asm("minps %[a], %[res]" : [res] "+x" (res) : [a] "x" (a));
-  return res;
-#else
-  // Arguments are reversed to match NaN propagation behavior of std::min.
-  return _mm_min_ps(b, a);
-#endif
-}
-template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) {
-#if EIGEN_COMP_GNUC
-  // There appears to be a bug in GCC, by which the optimizer may
-  // flip the argument order in calls to _mm_min_pd, so we have to
-  // resort to inline ASM here. This is supposed to be fixed in gcc6.3,
-  // see also: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=72867
-  Packet2d res = b;
-  asm("minpd %[a], %[res]" : [res] "+x" (res) : [a] "x" (a));
-  return res;
-#else
-  // Arguments are reversed to match NaN propagation behavior of std::min.
-  return _mm_min_pd(b, a);
-#endif
-}
+template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_min_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_min_pd(a,b); }
 template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b)
 {
 #ifdef EIGEN_VECTORIZE_SSE4_1
@@ -289,34 +263,8 @@ template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const
 #endif
 }
 
-template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) {
-#if EIGEN_COMP_GNUC
-  // There appears to be a bug in GCC, by which the optimizer may
-  // flip the argument order in calls to _mm_max_ps, so we have to
-  // resort to inline ASM here. This is supposed to be fixed in gcc6.3,
-  // see also: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=72867
-  Packet4f res = b;
-  asm("maxps %[a], %[res]" : [res] "+x" (res) : [a] "x" (a));
-  return res;
-#else
-  // Arguments are reversed to match NaN propagation behavior of std::max.
-  return _mm_max_ps(b, a);
-#endif
-}
-template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) {
-#if EIGEN_COMP_GNUC
-  // There appears to be a bug in GCC, by which the optimizer may
-  // flip the argument order in calls to _mm_max_pd, so we have to
-  // resort to inline ASM here. This is supposed to be fixed in gcc6.3,
-  // see also: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=72867
-  Packet2d res = b;
-  asm("maxpd %[a], %[res]" : [res] "+x" (res) : [a] "x" (a));
-  return res;
-#else
-  // Arguments are reversed to match NaN propagation behavior of std::max.
-  return _mm_max_pd(b, a);
-#endif
-}
+template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_max_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_max_pd(a,b); }
 template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b)
 {
 #ifdef EIGEN_VECTORIZE_SSE4_1
diff --git a/eigen/Eigen/src/Core/functors/NullaryFunctors.h b/eigen/Eigen/src/Core/functors/NullaryFunctors.h
index 6a30466..b03be02 100644
--- a/eigen/Eigen/src/Core/functors/NullaryFunctors.h
+++ b/eigen/Eigen/src/Core/functors/NullaryFunctors.h
@@ -44,16 +44,16 @@ struct linspaced_op_impl<Scalar,Packet,/*IsInteger*/false>
 {
   linspaced_op_impl(const Scalar& low, const Scalar& high, Index num_steps) :
     m_low(low), m_high(high), m_size1(num_steps==1 ? 1 : num_steps-1), m_step(num_steps==1 ? Scalar() : (high-low)/Scalar(num_steps-1)),
-    m_interPacket(plset<Packet>(0)),
     m_flip(numext::abs(high)<numext::abs(low))
   {}
 
   template<typename IndexType>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (IndexType i) const {
+    typedef typename NumTraits<Scalar>::Real RealScalar;
     if(m_flip)
-      return (i==0)? m_low : (m_high - (m_size1-i)*m_step);
+      return (i==0)? m_low : (m_high - RealScalar(m_size1-i)*m_step);
     else
-      return (i==m_size1)? m_high : (m_low + i*m_step);
+      return (i==m_size1)? m_high : (m_low + RealScalar(i)*m_step);
   }
 
   template<typename IndexType>
@@ -63,7 +63,7 @@ struct linspaced_op_impl<Scalar,Packet,/*IsInteger*/false>
     // [low, ..., low] + ( [step, ..., step] * ( [i, ..., i] + [0, ..., size] ) )
     if(m_flip)
     {
-      Packet pi = padd(pset1<Packet>(Scalar(i-m_size1)),m_interPacket);
+      Packet pi = plset<Packet>(Scalar(i-m_size1));
       Packet res = padd(pset1<Packet>(m_high), pmul(pset1<Packet>(m_step), pi));
       if(i==0)
         res = pinsertfirst(res, m_low);
@@ -71,7 +71,7 @@ struct linspaced_op_impl<Scalar,Packet,/*IsInteger*/false>
     }
     else
     {
-      Packet pi = padd(pset1<Packet>(Scalar(i)),m_interPacket);
+      Packet pi = plset<Packet>(Scalar(i));
       Packet res = padd(pset1<Packet>(m_low), pmul(pset1<Packet>(m_step), pi));
       if(i==m_size1-unpacket_traits<Packet>::size+1)
         res = pinsertlast(res, m_high);
@@ -83,7 +83,6 @@ struct linspaced_op_impl<Scalar,Packet,/*IsInteger*/false>
   const Scalar m_high;
   const Index m_size1;
   const Scalar m_step;
-  const Packet m_interPacket;
   const bool m_flip;
 };
 
diff --git a/eigen/Eigen/src/Core/functors/UnaryFunctors.h b/eigen/Eigen/src/Core/functors/UnaryFunctors.h
index bfc0465..2e6a00f 100644
--- a/eigen/Eigen/src/Core/functors/UnaryFunctors.h
+++ b/eigen/Eigen/src/Core/functors/UnaryFunctors.h
@@ -264,26 +264,6 @@ struct functor_traits<scalar_exp_op<Scalar> > {
 
 /** \internal
   *
-  * \brief Template functor to compute the exponential of a scalar - 1.
-  *
-  * \sa class CwiseUnaryOp, ArrayBase::expm1()
-  */
-template<typename Scalar> struct scalar_expm1_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_expm1_op)
-  EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::expm1(a); }
-  template <typename Packet>
-  EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pexpm1(a); }
-};
-template <typename Scalar>
-struct functor_traits<scalar_expm1_op<Scalar> > {
-  enum {
-    PacketAccess = packet_traits<Scalar>::HasExpm1,
-    Cost = functor_traits<scalar_exp_op<Scalar> >::Cost // TODO measure cost of expm1
-  };
-};
-
-/** \internal
-  *
   * \brief Template functor to compute the logarithm of a scalar
   *
   * \sa class CwiseUnaryOp, ArrayBase::log()
@@ -698,13 +678,7 @@ struct functor_traits<scalar_ceil_op<Scalar> >
 template<typename Scalar> struct scalar_isnan_op {
   EIGEN_EMPTY_STRUCT_CTOR(scalar_isnan_op)
   typedef bool result_type;
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const {
-#if defined(__SYCL_DEVICE_ONLY__)
-    return numext::isnan(a);
-#else  
-    return (numext::isnan)(a);
-#endif
-  }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return (numext::isnan)(a); }
 };
 template<typename Scalar>
 struct functor_traits<scalar_isnan_op<Scalar> >
@@ -722,13 +696,7 @@ struct functor_traits<scalar_isnan_op<Scalar> >
 template<typename Scalar> struct scalar_isinf_op {
   EIGEN_EMPTY_STRUCT_CTOR(scalar_isinf_op)
   typedef bool result_type;
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const {
-#if defined(__SYCL_DEVICE_ONLY__)
-    return numext::isinf(a);
-#else
-    return (numext::isinf)(a);
-#endif
-  }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return (numext::isinf)(a); }
 };
 template<typename Scalar>
 struct functor_traits<scalar_isinf_op<Scalar> >
@@ -746,13 +714,7 @@ struct functor_traits<scalar_isinf_op<Scalar> >
 template<typename Scalar> struct scalar_isfinite_op {
   EIGEN_EMPTY_STRUCT_CTOR(scalar_isfinite_op)
   typedef bool result_type;
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const {
-#if defined(__SYCL_DEVICE_ONLY__)
-    return numext::isfinite(a);
-#else
-    return (numext::isfinite)(a);
-#endif
-  }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return (numext::isfinite)(a); }
 };
 template<typename Scalar>
 struct functor_traits<scalar_isfinite_op<Scalar> >
diff --git a/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h b/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h
index ad38bcf..e844e37 100644
--- a/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h
+++ b/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h
@@ -269,10 +269,13 @@ struct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,false>
     enum {
       IsRowMajor = (internal::traits<MatrixType>::Flags&RowMajorBit) ? 1 : 0,
       LhsIsRowMajor = _ActualLhs::Flags&RowMajorBit ? 1 : 0,
-      RhsIsRowMajor = _ActualRhs::Flags&RowMajorBit ? 1 : 0
+      RhsIsRowMajor = _ActualRhs::Flags&RowMajorBit ? 1 : 0,
+      SkipDiag = (UpLo&(UnitDiag|ZeroDiag))!=0
     };
 
     Index size = mat.cols();
+    if(SkipDiag)
+      size--;
     Index depth = actualLhs.cols();
 
     typedef internal::gemm_blocking_space<IsRowMajor ? RowMajor : ColMajor,typename Lhs::Scalar,typename Rhs::Scalar,
@@ -283,21 +286,23 @@ struct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,false>
     internal::general_matrix_matrix_triangular_product<Index,
       typename Lhs::Scalar, LhsIsRowMajor ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate,
       typename Rhs::Scalar, RhsIsRowMajor ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate,
-      IsRowMajor ? RowMajor : ColMajor, UpLo>
+      IsRowMajor ? RowMajor : ColMajor, UpLo&(Lower|Upper)>
       ::run(size, depth,
-            &actualLhs.coeffRef(0,0), actualLhs.outerStride(), &actualRhs.coeffRef(0,0), actualRhs.outerStride(),
-            mat.data(), mat.outerStride(), actualAlpha, blocking);
+            &actualLhs.coeffRef(SkipDiag&&(UpLo&Lower)==Lower ? 1 : 0,0), actualLhs.outerStride(),
+            &actualRhs.coeffRef(0,SkipDiag&&(UpLo&Upper)==Upper ? 1 : 0), actualRhs.outerStride(),
+            mat.data() + (SkipDiag ? (bool(IsRowMajor) != ((UpLo&Lower)==Lower) ? 1 : mat.outerStride() ) : 0), mat.outerStride(), actualAlpha, blocking);
   }
 };
 
 template<typename MatrixType, unsigned int UpLo>
 template<typename ProductType>
-EIGEN_DEVICE_FUNC TriangularView<MatrixType,UpLo>& TriangularViewImpl<MatrixType,UpLo,Dense>::_assignProduct(const ProductType& prod, const Scalar& alpha, bool beta)
+TriangularView<MatrixType,UpLo>& TriangularViewImpl<MatrixType,UpLo,Dense>::_assignProduct(const ProductType& prod, const Scalar& alpha, bool beta)
 {
+  EIGEN_STATIC_ASSERT((UpLo&UnitDiag)==0, WRITING_TO_TRIANGULAR_PART_WITH_UNIT_DIAGONAL_IS_NOT_SUPPORTED);
   eigen_assert(derived().nestedExpression().rows() == prod.rows() && derived().cols() == prod.cols());
-
+  
   general_product_to_triangular_selector<MatrixType, ProductType, UpLo, internal::traits<ProductType>::InnerSize==1>::run(derived().nestedExpression().const_cast_derived(), prod, alpha, beta);
-
+  
   return derived();
 }
 
diff --git a/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h b/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h
index 5b7c15c..41e18ff 100644
--- a/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h
+++ b/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h
@@ -52,7 +52,7 @@ struct general_matrix_matrix_triangular_product<Index,Scalar,LhsStorageOrder,Con
   static EIGEN_STRONG_INLINE void run(Index size, Index depth,const Scalar* lhs, Index lhsStride, \
                           const Scalar* rhs, Index rhsStride, Scalar* res, Index resStride, Scalar alpha, level3_blocking<Scalar, Scalar>& blocking) \
   { \
-    if (lhs==rhs) { \
+    if ( lhs==rhs && ((UpLo&(Lower|Upper)==UpLo)) ) { \
       general_matrix_matrix_rankupdate<Index,Scalar,LhsStorageOrder,ConjugateLhs,ColMajor,UpLo> \
       ::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resStride,alpha,blocking); \
     } else { \
diff --git a/eigen/Eigen/src/Core/products/GeneralMatrixVector.h b/eigen/Eigen/src/Core/products/GeneralMatrixVector.h
index 41d8242..3c1a7fc 100644
--- a/eigen/Eigen/src/Core/products/GeneralMatrixVector.h
+++ b/eigen/Eigen/src/Core/products/GeneralMatrixVector.h
@@ -1,7 +1,7 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
-// Copyright (C) 2008-2016 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
@@ -15,8 +15,10 @@ namespace Eigen {
 namespace internal {
 
 /* Optimized col-major matrix * vector product:
- * This algorithm processes the matrix per vertical panels,
- * which are then processed horizontaly per chunck of 8*PacketSize x 1 vertical segments.
+ * This algorithm processes 4 columns at onces that allows to both reduce
+ * the number of load/stores of the result by a factor 4 and to reduce
+ * the instruction dependency. Moreover, we know that all bands have the
+ * same alignment pattern.
  *
  * Mixing type logic: C += alpha * A * B
  *  |  A  |  B  |alpha| comments
@@ -25,7 +27,33 @@ namespace internal {
  *  |cplx |real |cplx | invalid, the caller has to do tmp: = A * B; C += alpha*tmp
  *  |cplx |real |real | optimal case, vectorization possible via real-cplx mul
  *
+ * Accesses to the matrix coefficients follow the following logic:
+ *
+ * - if all columns have the same alignment then
+ *   - if the columns have the same alignment as the result vector, then easy! (-> AllAligned case)
+ *   - otherwise perform unaligned loads only (-> NoneAligned case)
+ * - otherwise
+ *   - if even columns have the same alignment then
+ *     // odd columns are guaranteed to have the same alignment too
+ *     - if even or odd columns have the same alignment as the result, then
+ *       // for a register size of 2 scalars, this is guarantee to be the case (e.g., SSE with double)
+ *       - perform half aligned and half unaligned loads (-> EvenAligned case)
+ *     - otherwise perform unaligned loads only (-> NoneAligned case)
+ *   - otherwise, if the register size is 4 scalars (e.g., SSE with float) then
+ *     - one over 4 consecutive columns is guaranteed to be aligned with the result vector,
+ *       perform simple aligned loads for this column and aligned loads plus re-alignment for the other. (-> FirstAligned case)
+ *       // this re-alignment is done by the palign function implemented for SSE in Eigen/src/Core/arch/SSE/PacketMath.h
+ *   - otherwise,
+ *     // if we get here, this means the register size is greater than 4 (e.g., AVX with floats),
+ *     // we currently fall back to the NoneAligned case
+ *
  * The same reasoning apply for the transposed case.
+ *
+ * The last case (PacketSize>4) could probably be improved by generalizing the FirstAligned case, but since we do not support AVX yet...
+ * One might also wonder why in the EvenAligned case we perform unaligned loads instead of using the aligned-loads plus re-alignment
+ * strategy as in the FirstAligned case. The reason is that we observed that unaligned loads on a 8 byte boundary are not too slow
+ * compared to unaligned loads on a 4 byte boundary.
+ *
  */
 template<typename Index, typename LhsScalar, typename LhsMapper, bool ConjugateLhs, typename RhsScalar, typename RhsMapper, bool ConjugateRhs, int Version>
 struct general_matrix_vector_product<Index,LhsScalar,LhsMapper,ColMajor,ConjugateLhs,RhsScalar,RhsMapper,ConjugateRhs,Version>
@@ -59,145 +87,238 @@ EIGEN_DONT_INLINE static void run(
 template<typename Index, typename LhsScalar, typename LhsMapper, bool ConjugateLhs, typename RhsScalar, typename RhsMapper, bool ConjugateRhs, int Version>
 EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,LhsMapper,ColMajor,ConjugateLhs,RhsScalar,RhsMapper,ConjugateRhs,Version>::run(
   Index rows, Index cols,
-  const LhsMapper& alhs,
+  const LhsMapper& lhs,
   const RhsMapper& rhs,
         ResScalar* res, Index resIncr,
   RhsScalar alpha)
 {
   EIGEN_UNUSED_VARIABLE(resIncr);
   eigen_internal_assert(resIncr==1);
-
-  // The following copy tells the compiler that lhs's attributes are not modified outside this function
-  // This helps GCC to generate propoer code.
-  LhsMapper lhs(alhs);
+  #ifdef _EIGEN_ACCUMULATE_PACKETS
+  #error _EIGEN_ACCUMULATE_PACKETS has already been defined
+  #endif
+  #define _EIGEN_ACCUMULATE_PACKETS(Alignment0,Alignment13,Alignment2) \
+    pstore(&res[j], \
+      padd(pload<ResPacket>(&res[j]), \
+        padd( \
+      padd(pcj.pmul(lhs0.template load<LhsPacket, Alignment0>(j),    ptmp0), \
+      pcj.pmul(lhs1.template load<LhsPacket, Alignment13>(j),   ptmp1)),   \
+      padd(pcj.pmul(lhs2.template load<LhsPacket, Alignment2>(j),    ptmp2), \
+      pcj.pmul(lhs3.template load<LhsPacket, Alignment13>(j),   ptmp3)) )))
+
+  typedef typename LhsMapper::VectorMapper LhsScalars;
 
   conj_helper<LhsScalar,RhsScalar,ConjugateLhs,ConjugateRhs> cj;
   conj_helper<LhsPacket,RhsPacket,ConjugateLhs,ConjugateRhs> pcj;
+  if(ConjugateRhs)
+    alpha = numext::conj(alpha);
+
+  enum { AllAligned = 0, EvenAligned, FirstAligned, NoneAligned };
+  const Index columnsAtOnce = 4;
+  const Index peels = 2;
+  const Index LhsPacketAlignedMask = LhsPacketSize-1;
+  const Index ResPacketAlignedMask = ResPacketSize-1;
+//  const Index PeelAlignedMask = ResPacketSize*peels-1;
+  const Index size = rows;
+
   const Index lhsStride = lhs.stride();
-  // TODO: for padded aligned inputs, we could enable aligned reads
-  enum { LhsAlignment = Unaligned };
 
-  const Index n8 = rows-8*ResPacketSize+1;
-  const Index n4 = rows-4*ResPacketSize+1;
-  const Index n3 = rows-3*ResPacketSize+1;
-  const Index n2 = rows-2*ResPacketSize+1;
-  const Index n1 = rows-1*ResPacketSize+1;
+  // How many coeffs of the result do we have to skip to be aligned.
+  // Here we assume data are at least aligned on the base scalar type.
+  Index alignedStart = internal::first_default_aligned(res,size);
+  Index alignedSize = ResPacketSize>1 ? alignedStart + ((size-alignedStart) & ~ResPacketAlignedMask) : 0;
+  const Index peeledSize = alignedSize - RhsPacketSize*peels - RhsPacketSize + 1;
+
+  const Index alignmentStep = LhsPacketSize>1 ? (LhsPacketSize - lhsStride % LhsPacketSize) & LhsPacketAlignedMask : 0;
+  Index alignmentPattern = alignmentStep==0 ? AllAligned
+                       : alignmentStep==(LhsPacketSize/2) ? EvenAligned
+                       : FirstAligned;
 
-  // TODO: improve the following heuristic:
-  const Index block_cols = cols<128 ? cols : (lhsStride*sizeof(LhsScalar)<32000?16:4);
-  ResPacket palpha = pset1<ResPacket>(alpha);
+  // we cannot assume the first element is aligned because of sub-matrices
+  const Index lhsAlignmentOffset = lhs.firstAligned(size);
 
-  for(Index j2=0; j2<cols; j2+=block_cols)
+  // find how many columns do we have to skip to be aligned with the result (if possible)
+  Index skipColumns = 0;
+  // if the data cannot be aligned (TODO add some compile time tests when possible, e.g. for floats)
+  if( (lhsAlignmentOffset < 0) || (lhsAlignmentOffset == size) || (UIntPtr(res)%sizeof(ResScalar)) )
   {
-    Index jend = numext::mini(j2+block_cols,cols);
-    Index i=0;
-    for(; i<n8; i+=ResPacketSize*8)
+    alignedSize = 0;
+    alignedStart = 0;
+    alignmentPattern = NoneAligned;
+  }
+  else if(LhsPacketSize > 4)
+  {
+    // TODO: extend the code to support aligned loads whenever possible when LhsPacketSize > 4.
+    // Currently, it seems to be better to perform unaligned loads anyway
+    alignmentPattern = NoneAligned;
+  }
+  else if (LhsPacketSize>1)
+  {
+  //    eigen_internal_assert(size_t(firstLhs+lhsAlignmentOffset)%sizeof(LhsPacket)==0 || size<LhsPacketSize);
+
+    while (skipColumns<LhsPacketSize &&
+          alignedStart != ((lhsAlignmentOffset + alignmentStep*skipColumns)%LhsPacketSize))
+      ++skipColumns;
+    if (skipColumns==LhsPacketSize)
     {
-      ResPacket c0 = pset1<ResPacket>(ResScalar(0)),
-                c1 = pset1<ResPacket>(ResScalar(0)),
-                c2 = pset1<ResPacket>(ResScalar(0)),
-                c3 = pset1<ResPacket>(ResScalar(0)),
-                c4 = pset1<ResPacket>(ResScalar(0)),
-                c5 = pset1<ResPacket>(ResScalar(0)),
-                c6 = pset1<ResPacket>(ResScalar(0)),
-                c7 = pset1<ResPacket>(ResScalar(0));
-
-      for(Index j=j2; j<jend; j+=1)
-      {
-        RhsPacket b0 = pset1<RhsPacket>(rhs(j,0));
-        c0 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+LhsPacketSize*0,j),b0,c0);
-        c1 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+LhsPacketSize*1,j),b0,c1);
-        c2 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+LhsPacketSize*2,j),b0,c2);
-        c3 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+LhsPacketSize*3,j),b0,c3);
-        c4 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+LhsPacketSize*4,j),b0,c4);
-        c5 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+LhsPacketSize*5,j),b0,c5);
-        c6 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+LhsPacketSize*6,j),b0,c6);
-        c7 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+LhsPacketSize*7,j),b0,c7);
-      }
-      pstoreu(res+i+ResPacketSize*0, pmadd(c0,palpha,ploadu<ResPacket>(res+i+ResPacketSize*0)));
-      pstoreu(res+i+ResPacketSize*1, pmadd(c1,palpha,ploadu<ResPacket>(res+i+ResPacketSize*1)));
-      pstoreu(res+i+ResPacketSize*2, pmadd(c2,palpha,ploadu<ResPacket>(res+i+ResPacketSize*2)));
-      pstoreu(res+i+ResPacketSize*3, pmadd(c3,palpha,ploadu<ResPacket>(res+i+ResPacketSize*3)));
-      pstoreu(res+i+ResPacketSize*4, pmadd(c4,palpha,ploadu<ResPacket>(res+i+ResPacketSize*4)));
-      pstoreu(res+i+ResPacketSize*5, pmadd(c5,palpha,ploadu<ResPacket>(res+i+ResPacketSize*5)));
-      pstoreu(res+i+ResPacketSize*6, pmadd(c6,palpha,ploadu<ResPacket>(res+i+ResPacketSize*6)));
-      pstoreu(res+i+ResPacketSize*7, pmadd(c7,palpha,ploadu<ResPacket>(res+i+ResPacketSize*7)));
+      // nothing can be aligned, no need to skip any column
+      alignmentPattern = NoneAligned;
+      skipColumns = 0;
     }
-    if(i<n4)
+    else
     {
-      ResPacket c0 = pset1<ResPacket>(ResScalar(0)),
-                c1 = pset1<ResPacket>(ResScalar(0)),
-                c2 = pset1<ResPacket>(ResScalar(0)),
-                c3 = pset1<ResPacket>(ResScalar(0));
+      skipColumns = (std::min)(skipColumns,cols);
+      // note that the skiped columns are processed later.
+    }
 
-      for(Index j=j2; j<jend; j+=1)
-      {
-        RhsPacket b0 = pset1<RhsPacket>(rhs(j,0));
-        c0 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+LhsPacketSize*0,j),b0,c0);
-        c1 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+LhsPacketSize*1,j),b0,c1);
-        c2 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+LhsPacketSize*2,j),b0,c2);
-        c3 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+LhsPacketSize*3,j),b0,c3);
-      }
-      pstoreu(res+i+ResPacketSize*0, pmadd(c0,palpha,ploadu<ResPacket>(res+i+ResPacketSize*0)));
-      pstoreu(res+i+ResPacketSize*1, pmadd(c1,palpha,ploadu<ResPacket>(res+i+ResPacketSize*1)));
-      pstoreu(res+i+ResPacketSize*2, pmadd(c2,palpha,ploadu<ResPacket>(res+i+ResPacketSize*2)));
-      pstoreu(res+i+ResPacketSize*3, pmadd(c3,palpha,ploadu<ResPacket>(res+i+ResPacketSize*3)));
+    /*    eigen_internal_assert(  (alignmentPattern==NoneAligned)
+                      || (skipColumns + columnsAtOnce >= cols)
+                      || LhsPacketSize > size
+                      || (size_t(firstLhs+alignedStart+lhsStride*skipColumns)%sizeof(LhsPacket))==0);*/
+  }
+  else if(Vectorizable)
+  {
+    alignedStart = 0;
+    alignedSize = size;
+    alignmentPattern = AllAligned;
+  }
 
-      i+=ResPacketSize*4;
-    }
-    if(i<n3)
-    {
-      ResPacket c0 = pset1<ResPacket>(ResScalar(0)),
-                c1 = pset1<ResPacket>(ResScalar(0)),
-                c2 = pset1<ResPacket>(ResScalar(0));
+  const Index offset1 = (FirstAligned && alignmentStep==1)?3:1;
+  const Index offset3 = (FirstAligned && alignmentStep==1)?1:3;
 
-      for(Index j=j2; j<jend; j+=1)
-      {
-        RhsPacket b0 = pset1<RhsPacket>(rhs(j,0));
-        c0 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+LhsPacketSize*0,j),b0,c0);
-        c1 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+LhsPacketSize*1,j),b0,c1);
-        c2 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+LhsPacketSize*2,j),b0,c2);
-      }
-      pstoreu(res+i+ResPacketSize*0, pmadd(c0,palpha,ploadu<ResPacket>(res+i+ResPacketSize*0)));
-      pstoreu(res+i+ResPacketSize*1, pmadd(c1,palpha,ploadu<ResPacket>(res+i+ResPacketSize*1)));
-      pstoreu(res+i+ResPacketSize*2, pmadd(c2,palpha,ploadu<ResPacket>(res+i+ResPacketSize*2)));
+  Index columnBound = ((cols-skipColumns)/columnsAtOnce)*columnsAtOnce + skipColumns;
+  for (Index i=skipColumns; i<columnBound; i+=columnsAtOnce)
+  {
+    RhsPacket ptmp0 = pset1<RhsPacket>(alpha*rhs(i, 0)),
+              ptmp1 = pset1<RhsPacket>(alpha*rhs(i+offset1, 0)),
+              ptmp2 = pset1<RhsPacket>(alpha*rhs(i+2, 0)),
+              ptmp3 = pset1<RhsPacket>(alpha*rhs(i+offset3, 0));
 
-      i+=ResPacketSize*3;
-    }
-    if(i<n2)
+    // this helps a lot generating better binary code
+    const LhsScalars lhs0 = lhs.getVectorMapper(0, i+0),   lhs1 = lhs.getVectorMapper(0, i+offset1),
+                     lhs2 = lhs.getVectorMapper(0, i+2),   lhs3 = lhs.getVectorMapper(0, i+offset3);
+
+    if (Vectorizable)
     {
-      ResPacket c0 = pset1<ResPacket>(ResScalar(0)),
-                c1 = pset1<ResPacket>(ResScalar(0));
+      /* explicit vectorization */
+      // process initial unaligned coeffs
+      for (Index j=0; j<alignedStart; ++j)
+      {
+        res[j] = cj.pmadd(lhs0(j), pfirst(ptmp0), res[j]);
+        res[j] = cj.pmadd(lhs1(j), pfirst(ptmp1), res[j]);
+        res[j] = cj.pmadd(lhs2(j), pfirst(ptmp2), res[j]);
+        res[j] = cj.pmadd(lhs3(j), pfirst(ptmp3), res[j]);
+      }
 
-      for(Index j=j2; j<jend; j+=1)
+      if (alignedSize>alignedStart)
       {
-        RhsPacket b0 = pset1<RhsPacket>(rhs(j,0));
-        c0 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+LhsPacketSize*0,j),b0,c0);
-        c1 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+LhsPacketSize*1,j),b0,c1);
+        switch(alignmentPattern)
+        {
+          case AllAligned:
+            for (Index j = alignedStart; j<alignedSize; j+=ResPacketSize)
+              _EIGEN_ACCUMULATE_PACKETS(Aligned,Aligned,Aligned);
+            break;
+          case EvenAligned:
+            for (Index j = alignedStart; j<alignedSize; j+=ResPacketSize)
+              _EIGEN_ACCUMULATE_PACKETS(Aligned,Unaligned,Aligned);
+            break;
+          case FirstAligned:
+          {
+            Index j = alignedStart;
+            if(peels>1)
+            {
+              LhsPacket A00, A01, A02, A03, A10, A11, A12, A13;
+              ResPacket T0, T1;
+
+              A01 = lhs1.template load<LhsPacket, Aligned>(alignedStart-1);
+              A02 = lhs2.template load<LhsPacket, Aligned>(alignedStart-2);
+              A03 = lhs3.template load<LhsPacket, Aligned>(alignedStart-3);
+
+              for (; j<peeledSize; j+=peels*ResPacketSize)
+              {
+                A11 = lhs1.template load<LhsPacket, Aligned>(j-1+LhsPacketSize);  palign<1>(A01,A11);
+                A12 = lhs2.template load<LhsPacket, Aligned>(j-2+LhsPacketSize);  palign<2>(A02,A12);
+                A13 = lhs3.template load<LhsPacket, Aligned>(j-3+LhsPacketSize);  palign<3>(A03,A13);
+
+                A00 = lhs0.template load<LhsPacket, Aligned>(j);
+                A10 = lhs0.template load<LhsPacket, Aligned>(j+LhsPacketSize);
+                T0  = pcj.pmadd(A00, ptmp0, pload<ResPacket>(&res[j]));
+                T1  = pcj.pmadd(A10, ptmp0, pload<ResPacket>(&res[j+ResPacketSize]));
+
+                T0  = pcj.pmadd(A01, ptmp1, T0);
+                A01 = lhs1.template load<LhsPacket, Aligned>(j-1+2*LhsPacketSize);  palign<1>(A11,A01);
+                T0  = pcj.pmadd(A02, ptmp2, T0);
+                A02 = lhs2.template load<LhsPacket, Aligned>(j-2+2*LhsPacketSize);  palign<2>(A12,A02);
+                T0  = pcj.pmadd(A03, ptmp3, T0);
+                pstore(&res[j],T0);
+                A03 = lhs3.template load<LhsPacket, Aligned>(j-3+2*LhsPacketSize);  palign<3>(A13,A03);
+                T1  = pcj.pmadd(A11, ptmp1, T1);
+                T1  = pcj.pmadd(A12, ptmp2, T1);
+                T1  = pcj.pmadd(A13, ptmp3, T1);
+                pstore(&res[j+ResPacketSize],T1);
+              }
+            }
+            for (; j<alignedSize; j+=ResPacketSize)
+              _EIGEN_ACCUMULATE_PACKETS(Aligned,Unaligned,Unaligned);
+            break;
+          }
+          default:
+            for (Index j = alignedStart; j<alignedSize; j+=ResPacketSize)
+              _EIGEN_ACCUMULATE_PACKETS(Unaligned,Unaligned,Unaligned);
+            break;
+        }
       }
-      pstoreu(res+i+ResPacketSize*0, pmadd(c0,palpha,ploadu<ResPacket>(res+i+ResPacketSize*0)));
-      pstoreu(res+i+ResPacketSize*1, pmadd(c1,palpha,ploadu<ResPacket>(res+i+ResPacketSize*1)));
-      i+=ResPacketSize*2;
+    } // end explicit vectorization
+
+    /* process remaining coeffs (or all if there is no explicit vectorization) */
+    for (Index j=alignedSize; j<size; ++j)
+    {
+      res[j] = cj.pmadd(lhs0(j), pfirst(ptmp0), res[j]);
+      res[j] = cj.pmadd(lhs1(j), pfirst(ptmp1), res[j]);
+      res[j] = cj.pmadd(lhs2(j), pfirst(ptmp2), res[j]);
+      res[j] = cj.pmadd(lhs3(j), pfirst(ptmp3), res[j]);
     }
-    if(i<n1)
+  }
+
+  // process remaining first and last columns (at most columnsAtOnce-1)
+  Index end = cols;
+  Index start = columnBound;
+  do
+  {
+    for (Index k=start; k<end; ++k)
     {
-      ResPacket c0 = pset1<ResPacket>(ResScalar(0));
-      for(Index j=j2; j<jend; j+=1)
+      RhsPacket ptmp0 = pset1<RhsPacket>(alpha*rhs(k, 0));
+      const LhsScalars lhs0 = lhs.getVectorMapper(0, k);
+
+      if (Vectorizable)
       {
-        RhsPacket b0 = pset1<RhsPacket>(rhs(j,0));
-        c0 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+0,j),b0,c0);
+        /* explicit vectorization */
+        // process first unaligned result's coeffs
+        for (Index j=0; j<alignedStart; ++j)
+          res[j] += cj.pmul(lhs0(j), pfirst(ptmp0));
+        // process aligned result's coeffs
+        if (lhs0.template aligned<LhsPacket>(alignedStart))
+          for (Index i = alignedStart;i<alignedSize;i+=ResPacketSize)
+            pstore(&res[i], pcj.pmadd(lhs0.template load<LhsPacket, Aligned>(i), ptmp0, pload<ResPacket>(&res[i])));
+        else
+          for (Index i = alignedStart;i<alignedSize;i+=ResPacketSize)
+            pstore(&res[i], pcj.pmadd(lhs0.template load<LhsPacket, Unaligned>(i), ptmp0, pload<ResPacket>(&res[i])));
       }
-      pstoreu(res+i+ResPacketSize*0, pmadd(c0,palpha,ploadu<ResPacket>(res+i+ResPacketSize*0)));
-      i+=ResPacketSize;
+
+      // process remaining scalars (or all if no explicit vectorization)
+      for (Index i=alignedSize; i<size; ++i)
+        res[i] += cj.pmul(lhs0(i), pfirst(ptmp0));
     }
-    for(;i<rows;++i)
+    if (skipColumns)
     {
-      ResScalar c0(0);
-      for(Index j=j2; j<jend; j+=1)
-        c0 += cj.pmul(lhs(i,j), rhs(j,0));
-      res[i] += alpha*c0;
+      start = 0;
+      end = skipColumns;
+      skipColumns = 0;
     }
-  }
+    else
+      break;
+  } while(Vectorizable);
+  #undef _EIGEN_ACCUMULATE_PACKETS
 }
 
 /* Optimized row-major matrix * vector product:
@@ -242,160 +363,253 @@ EIGEN_DONT_INLINE static void run(
 template<typename Index, typename LhsScalar, typename LhsMapper, bool ConjugateLhs, typename RhsScalar, typename RhsMapper, bool ConjugateRhs, int Version>
 EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,LhsMapper,RowMajor,ConjugateLhs,RhsScalar,RhsMapper,ConjugateRhs,Version>::run(
   Index rows, Index cols,
-  const LhsMapper& alhs,
+  const LhsMapper& lhs,
   const RhsMapper& rhs,
   ResScalar* res, Index resIncr,
   ResScalar alpha)
 {
-  // The following copy tells the compiler that lhs's attributes are not modified outside this function
-  // This helps GCC to generate propoer code.
-  LhsMapper lhs(alhs);
-
   eigen_internal_assert(rhs.stride()==1);
+
+  #ifdef _EIGEN_ACCUMULATE_PACKETS
+  #error _EIGEN_ACCUMULATE_PACKETS has already been defined
+  #endif
+
+  #define _EIGEN_ACCUMULATE_PACKETS(Alignment0,Alignment13,Alignment2) {\
+    RhsPacket b = rhs.getVectorMapper(j, 0).template load<RhsPacket, Aligned>(0);  \
+    ptmp0 = pcj.pmadd(lhs0.template load<LhsPacket, Alignment0>(j), b, ptmp0); \
+    ptmp1 = pcj.pmadd(lhs1.template load<LhsPacket, Alignment13>(j), b, ptmp1); \
+    ptmp2 = pcj.pmadd(lhs2.template load<LhsPacket, Alignment2>(j), b, ptmp2); \
+    ptmp3 = pcj.pmadd(lhs3.template load<LhsPacket, Alignment13>(j), b, ptmp3); }
+
   conj_helper<LhsScalar,RhsScalar,ConjugateLhs,ConjugateRhs> cj;
   conj_helper<LhsPacket,RhsPacket,ConjugateLhs,ConjugateRhs> pcj;
 
-  // TODO: fine tune the following heuristic. The rationale is that if the matrix is very large,
-  //       processing 8 rows at once might be counter productive wrt cache.
-  const Index n8 = lhs.stride()*sizeof(LhsScalar)>32000 ? 0 : rows-7;
-  const Index n4 = rows-3;
-  const Index n2 = rows-1;
+  typedef typename LhsMapper::VectorMapper LhsScalars;
 
-  // TODO: for padded aligned inputs, we could enable aligned reads
-  enum { LhsAlignment = Unaligned };
+  enum { AllAligned=0, EvenAligned=1, FirstAligned=2, NoneAligned=3 };
+  const Index rowsAtOnce = 4;
+  const Index peels = 2;
+  const Index RhsPacketAlignedMask = RhsPacketSize-1;
+  const Index LhsPacketAlignedMask = LhsPacketSize-1;
+  const Index depth = cols;
+  const Index lhsStride = lhs.stride();
 
-  Index i=0;
-  for(; i<n8; i+=8)
+  // How many coeffs of the result do we have to skip to be aligned.
+  // Here we assume data are at least aligned on the base scalar type
+  // if that's not the case then vectorization is discarded, see below.
+  Index alignedStart = rhs.firstAligned(depth);
+  Index alignedSize = RhsPacketSize>1 ? alignedStart + ((depth-alignedStart) & ~RhsPacketAlignedMask) : 0;
+  const Index peeledSize = alignedSize - RhsPacketSize*peels - RhsPacketSize + 1;
+
+  const Index alignmentStep = LhsPacketSize>1 ? (LhsPacketSize - lhsStride % LhsPacketSize) & LhsPacketAlignedMask : 0;
+  Index alignmentPattern = alignmentStep==0 ? AllAligned
+                           : alignmentStep==(LhsPacketSize/2) ? EvenAligned
+                           : FirstAligned;
+
+  // we cannot assume the first element is aligned because of sub-matrices
+  const Index lhsAlignmentOffset = lhs.firstAligned(depth);
+  const Index rhsAlignmentOffset = rhs.firstAligned(rows);
+
+  // find how many rows do we have to skip to be aligned with rhs (if possible)
+  Index skipRows = 0;
+  // if the data cannot be aligned (TODO add some compile time tests when possible, e.g. for floats)
+  if( (sizeof(LhsScalar)!=sizeof(RhsScalar)) ||
+      (lhsAlignmentOffset < 0) || (lhsAlignmentOffset == depth) ||
+      (rhsAlignmentOffset < 0) || (rhsAlignmentOffset == rows) )
   {
-    ResPacket c0 = pset1<ResPacket>(ResScalar(0)),
-              c1 = pset1<ResPacket>(ResScalar(0)),
-              c2 = pset1<ResPacket>(ResScalar(0)),
-              c3 = pset1<ResPacket>(ResScalar(0)),
-              c4 = pset1<ResPacket>(ResScalar(0)),
-              c5 = pset1<ResPacket>(ResScalar(0)),
-              c6 = pset1<ResPacket>(ResScalar(0)),
-              c7 = pset1<ResPacket>(ResScalar(0));
-
-    Index j=0;
-    for(; j+LhsPacketSize<=cols; j+=LhsPacketSize)
-    {
-      RhsPacket b0 = rhs.template load<RhsPacket, Unaligned>(j,0);
-
-      c0 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+0,j),b0,c0);
-      c1 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+1,j),b0,c1);
-      c2 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+2,j),b0,c2);
-      c3 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+3,j),b0,c3);
-      c4 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+4,j),b0,c4);
-      c5 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+5,j),b0,c5);
-      c6 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+6,j),b0,c6);
-      c7 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+7,j),b0,c7);
-    }
-    ResScalar cc0 = predux(c0);
-    ResScalar cc1 = predux(c1);
-    ResScalar cc2 = predux(c2);
-    ResScalar cc3 = predux(c3);
-    ResScalar cc4 = predux(c4);
-    ResScalar cc5 = predux(c5);
-    ResScalar cc6 = predux(c6);
-    ResScalar cc7 = predux(c7);
-    for(; j<cols; ++j)
-    {
-      RhsScalar b0 = rhs(j,0);
-
-      cc0 += cj.pmul(lhs(i+0,j), b0);
-      cc1 += cj.pmul(lhs(i+1,j), b0);
-      cc2 += cj.pmul(lhs(i+2,j), b0);
-      cc3 += cj.pmul(lhs(i+3,j), b0);
-      cc4 += cj.pmul(lhs(i+4,j), b0);
-      cc5 += cj.pmul(lhs(i+5,j), b0);
-      cc6 += cj.pmul(lhs(i+6,j), b0);
-      cc7 += cj.pmul(lhs(i+7,j), b0);
-    }
-    res[(i+0)*resIncr] += alpha*cc0;
-    res[(i+1)*resIncr] += alpha*cc1;
-    res[(i+2)*resIncr] += alpha*cc2;
-    res[(i+3)*resIncr] += alpha*cc3;
-    res[(i+4)*resIncr] += alpha*cc4;
-    res[(i+5)*resIncr] += alpha*cc5;
-    res[(i+6)*resIncr] += alpha*cc6;
-    res[(i+7)*resIncr] += alpha*cc7;
+    alignedSize = 0;
+    alignedStart = 0;
+    alignmentPattern = NoneAligned;
   }
-  for(; i<n4; i+=4)
+  else if(LhsPacketSize > 4)
   {
-    ResPacket c0 = pset1<ResPacket>(ResScalar(0)),
-              c1 = pset1<ResPacket>(ResScalar(0)),
-              c2 = pset1<ResPacket>(ResScalar(0)),
-              c3 = pset1<ResPacket>(ResScalar(0));
+    // TODO: extend the code to support aligned loads whenever possible when LhsPacketSize > 4.
+    alignmentPattern = NoneAligned;
+  }
+  else if (LhsPacketSize>1)
+  {
+  //    eigen_internal_assert(size_t(firstLhs+lhsAlignmentOffset)%sizeof(LhsPacket)==0  || depth<LhsPacketSize);
 
-    Index j=0;
-    for(; j+LhsPacketSize<=cols; j+=LhsPacketSize)
+    while (skipRows<LhsPacketSize &&
+           alignedStart != ((lhsAlignmentOffset + alignmentStep*skipRows)%LhsPacketSize))
+      ++skipRows;
+    if (skipRows==LhsPacketSize)
     {
-      RhsPacket b0 = rhs.template load<RhsPacket, Unaligned>(j,0);
-
-      c0 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+0,j),b0,c0);
-      c1 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+1,j),b0,c1);
-      c2 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+2,j),b0,c2);
-      c3 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+3,j),b0,c3);
+      // nothing can be aligned, no need to skip any column
+      alignmentPattern = NoneAligned;
+      skipRows = 0;
     }
-    ResScalar cc0 = predux(c0);
-    ResScalar cc1 = predux(c1);
-    ResScalar cc2 = predux(c2);
-    ResScalar cc3 = predux(c3);
-    for(; j<cols; ++j)
+    else
     {
-      RhsScalar b0 = rhs(j,0);
-
-      cc0 += cj.pmul(lhs(i+0,j), b0);
-      cc1 += cj.pmul(lhs(i+1,j), b0);
-      cc2 += cj.pmul(lhs(i+2,j), b0);
-      cc3 += cj.pmul(lhs(i+3,j), b0);
+      skipRows = (std::min)(skipRows,Index(rows));
+      // note that the skiped columns are processed later.
     }
-    res[(i+0)*resIncr] += alpha*cc0;
-    res[(i+1)*resIncr] += alpha*cc1;
-    res[(i+2)*resIncr] += alpha*cc2;
-    res[(i+3)*resIncr] += alpha*cc3;
+    /*    eigen_internal_assert(  alignmentPattern==NoneAligned
+                      || LhsPacketSize==1
+                      || (skipRows + rowsAtOnce >= rows)
+                      || LhsPacketSize > depth
+                      || (size_t(firstLhs+alignedStart+lhsStride*skipRows)%sizeof(LhsPacket))==0);*/
   }
-  for(; i<n2; i+=2)
+  else if(Vectorizable)
   {
-    ResPacket c0 = pset1<ResPacket>(ResScalar(0)),
-              c1 = pset1<ResPacket>(ResScalar(0));
+    alignedStart = 0;
+    alignedSize = depth;
+    alignmentPattern = AllAligned;
+  }
 
-    Index j=0;
-    for(; j+LhsPacketSize<=cols; j+=LhsPacketSize)
-    {
-      RhsPacket b0 = rhs.template load<RhsPacket, Unaligned>(j,0);
+  const Index offset1 = (FirstAligned && alignmentStep==1)?3:1;
+  const Index offset3 = (FirstAligned && alignmentStep==1)?1:3;
 
-      c0 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+0,j),b0,c0);
-      c1 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+1,j),b0,c1);
-    }
-    ResScalar cc0 = predux(c0);
-    ResScalar cc1 = predux(c1);
-    for(; j<cols; ++j)
+  Index rowBound = ((rows-skipRows)/rowsAtOnce)*rowsAtOnce + skipRows;
+  for (Index i=skipRows; i<rowBound; i+=rowsAtOnce)
+  {
+    // FIXME: what is the purpose of this EIGEN_ALIGN_DEFAULT ??
+    EIGEN_ALIGN_MAX ResScalar tmp0 = ResScalar(0);
+    ResScalar tmp1 = ResScalar(0), tmp2 = ResScalar(0), tmp3 = ResScalar(0);
+
+    // this helps the compiler generating good binary code
+    const LhsScalars lhs0 = lhs.getVectorMapper(i+0, 0),    lhs1 = lhs.getVectorMapper(i+offset1, 0),
+                     lhs2 = lhs.getVectorMapper(i+2, 0),    lhs3 = lhs.getVectorMapper(i+offset3, 0);
+
+    if (Vectorizable)
     {
-      RhsScalar b0 = rhs(j,0);
+      /* explicit vectorization */
+      ResPacket ptmp0 = pset1<ResPacket>(ResScalar(0)), ptmp1 = pset1<ResPacket>(ResScalar(0)),
+                ptmp2 = pset1<ResPacket>(ResScalar(0)), ptmp3 = pset1<ResPacket>(ResScalar(0));
+
+      // process initial unaligned coeffs
+      // FIXME this loop get vectorized by the compiler !
+      for (Index j=0; j<alignedStart; ++j)
+      {
+        RhsScalar b = rhs(j, 0);
+        tmp0 += cj.pmul(lhs0(j),b); tmp1 += cj.pmul(lhs1(j),b);
+        tmp2 += cj.pmul(lhs2(j),b); tmp3 += cj.pmul(lhs3(j),b);
+      }
+
+      if (alignedSize>alignedStart)
+      {
+        switch(alignmentPattern)
+        {
+          case AllAligned:
+            for (Index j = alignedStart; j<alignedSize; j+=RhsPacketSize)
+              _EIGEN_ACCUMULATE_PACKETS(Aligned,Aligned,Aligned);
+            break;
+          case EvenAligned:
+            for (Index j = alignedStart; j<alignedSize; j+=RhsPacketSize)
+              _EIGEN_ACCUMULATE_PACKETS(Aligned,Unaligned,Aligned);
+            break;
+          case FirstAligned:
+          {
+            Index j = alignedStart;
+            if (peels>1)
+            {
+              /* Here we proccess 4 rows with with two peeled iterations to hide
+               * the overhead of unaligned loads. Moreover unaligned loads are handled
+               * using special shift/move operations between the two aligned packets
+               * overlaping the desired unaligned packet. This is *much* more efficient
+               * than basic unaligned loads.
+               */
+              LhsPacket A01, A02, A03, A11, A12, A13;
+              A01 = lhs1.template load<LhsPacket, Aligned>(alignedStart-1);
+              A02 = lhs2.template load<LhsPacket, Aligned>(alignedStart-2);
+              A03 = lhs3.template load<LhsPacket, Aligned>(alignedStart-3);
+
+              for (; j<peeledSize; j+=peels*RhsPacketSize)
+              {
+                RhsPacket b = rhs.getVectorMapper(j, 0).template load<RhsPacket, Aligned>(0);
+                A11 = lhs1.template load<LhsPacket, Aligned>(j-1+LhsPacketSize);  palign<1>(A01,A11);
+                A12 = lhs2.template load<LhsPacket, Aligned>(j-2+LhsPacketSize);  palign<2>(A02,A12);
+                A13 = lhs3.template load<LhsPacket, Aligned>(j-3+LhsPacketSize);  palign<3>(A03,A13);
+
+                ptmp0 = pcj.pmadd(lhs0.template load<LhsPacket, Aligned>(j), b, ptmp0);
+                ptmp1 = pcj.pmadd(A01, b, ptmp1);
+                A01 = lhs1.template load<LhsPacket, Aligned>(j-1+2*LhsPacketSize);  palign<1>(A11,A01);
+                ptmp2 = pcj.pmadd(A02, b, ptmp2);
+                A02 = lhs2.template load<LhsPacket, Aligned>(j-2+2*LhsPacketSize);  palign<2>(A12,A02);
+                ptmp3 = pcj.pmadd(A03, b, ptmp3);
+                A03 = lhs3.template load<LhsPacket, Aligned>(j-3+2*LhsPacketSize);  palign<3>(A13,A03);
+
+                b = rhs.getVectorMapper(j+RhsPacketSize, 0).template load<RhsPacket, Aligned>(0);
+                ptmp0 = pcj.pmadd(lhs0.template load<LhsPacket, Aligned>(j+LhsPacketSize), b, ptmp0);
+                ptmp1 = pcj.pmadd(A11, b, ptmp1);
+                ptmp2 = pcj.pmadd(A12, b, ptmp2);
+                ptmp3 = pcj.pmadd(A13, b, ptmp3);
+              }
+            }
+            for (; j<alignedSize; j+=RhsPacketSize)
+              _EIGEN_ACCUMULATE_PACKETS(Aligned,Unaligned,Unaligned);
+            break;
+          }
+          default:
+            for (Index j = alignedStart; j<alignedSize; j+=RhsPacketSize)
+              _EIGEN_ACCUMULATE_PACKETS(Unaligned,Unaligned,Unaligned);
+            break;
+        }
+        tmp0 += predux(ptmp0);
+        tmp1 += predux(ptmp1);
+        tmp2 += predux(ptmp2);
+        tmp3 += predux(ptmp3);
+      }
+    } // end explicit vectorization
 
-      cc0 += cj.pmul(lhs(i+0,j), b0);
-      cc1 += cj.pmul(lhs(i+1,j), b0);
+    // process remaining coeffs (or all if no explicit vectorization)
+    // FIXME this loop get vectorized by the compiler !
+    for (Index j=alignedSize; j<depth; ++j)
+    {
+      RhsScalar b = rhs(j, 0);
+      tmp0 += cj.pmul(lhs0(j),b); tmp1 += cj.pmul(lhs1(j),b);
+      tmp2 += cj.pmul(lhs2(j),b); tmp3 += cj.pmul(lhs3(j),b);
     }
-    res[(i+0)*resIncr] += alpha*cc0;
-    res[(i+1)*resIncr] += alpha*cc1;
+    res[i*resIncr]            += alpha*tmp0;
+    res[(i+offset1)*resIncr]  += alpha*tmp1;
+    res[(i+2)*resIncr]        += alpha*tmp2;
+    res[(i+offset3)*resIncr]  += alpha*tmp3;
   }
-  for(; i<rows; ++i)
+
+  // process remaining first and last rows (at most columnsAtOnce-1)
+  Index end = rows;
+  Index start = rowBound;
+  do
   {
-    ResPacket c0 = pset1<ResPacket>(ResScalar(0));
-    Index j=0;
-    for(; j+LhsPacketSize<=cols; j+=LhsPacketSize)
+    for (Index i=start; i<end; ++i)
     {
-      RhsPacket b0 = rhs.template load<RhsPacket,Unaligned>(j,0);
-      c0 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i,j),b0,c0);
+      EIGEN_ALIGN_MAX ResScalar tmp0 = ResScalar(0);
+      ResPacket ptmp0 = pset1<ResPacket>(tmp0);
+      const LhsScalars lhs0 = lhs.getVectorMapper(i, 0);
+      // process first unaligned result's coeffs
+      // FIXME this loop get vectorized by the compiler !
+      for (Index j=0; j<alignedStart; ++j)
+        tmp0 += cj.pmul(lhs0(j), rhs(j, 0));
+
+      if (alignedSize>alignedStart)
+      {
+        // process aligned rhs coeffs
+        if (lhs0.template aligned<LhsPacket>(alignedStart))
+          for (Index j = alignedStart;j<alignedSize;j+=RhsPacketSize)
+            ptmp0 = pcj.pmadd(lhs0.template load<LhsPacket, Aligned>(j), rhs.getVectorMapper(j, 0).template load<RhsPacket, Aligned>(0), ptmp0);
+        else
+          for (Index j = alignedStart;j<alignedSize;j+=RhsPacketSize)
+            ptmp0 = pcj.pmadd(lhs0.template load<LhsPacket, Unaligned>(j), rhs.getVectorMapper(j, 0).template load<RhsPacket, Aligned>(0), ptmp0);
+        tmp0 += predux(ptmp0);
+      }
+
+      // process remaining scalars
+      // FIXME this loop get vectorized by the compiler !
+      for (Index j=alignedSize; j<depth; ++j)
+        tmp0 += cj.pmul(lhs0(j), rhs(j, 0));
+      res[i*resIncr] += alpha*tmp0;
     }
-    ResScalar cc0 = predux(c0);
-    for(; j<cols; ++j)
+    if (skipRows)
     {
-      cc0 += cj.pmul(lhs(i,j), rhs(j,0));
+      start = 0;
+      end = skipRows;
+      skipRows = 0;
     }
-    res[i*resIncr] += alpha*cc0;
-  }
+    else
+      break;
+  } while(Vectorizable);
+
+  #undef _EIGEN_ACCUMULATE_PACKETS
 }
 
 } // end namespace internal
diff --git a/eigen/Eigen/src/Core/products/SelfadjointProduct.h b/eigen/Eigen/src/Core/products/SelfadjointProduct.h
index 39c5b59..f038d68 100644
--- a/eigen/Eigen/src/Core/products/SelfadjointProduct.h
+++ b/eigen/Eigen/src/Core/products/SelfadjointProduct.h
@@ -120,7 +120,7 @@ struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,false>
 
 template<typename MatrixType, unsigned int UpLo>
 template<typename DerivedU>
-EIGEN_DEVICE_FUNC SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo>
+SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo>
 ::rankUpdate(const MatrixBase<DerivedU>& u, const Scalar& alpha)
 {
   selfadjoint_product_selector<MatrixType,DerivedU,UpLo>::run(_expression().const_cast_derived(), u.derived(), alpha);
diff --git a/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h b/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h
index d395888..2ae3641 100644
--- a/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h
+++ b/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h
@@ -57,7 +57,7 @@ template<bool Cond, typename T> struct conj_expr_if
 
 template<typename MatrixType, unsigned int UpLo>
 template<typename DerivedU, typename DerivedV>
-EIGEN_DEVICE_FUNC SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo>
+SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo>
 ::rankUpdate(const MatrixBase<DerivedU>& u, const MatrixBase<DerivedV>& v, const Scalar& alpha)
 {
   typedef internal::blas_traits<DerivedU> UBlasTraits;
diff --git a/eigen/Eigen/src/Core/util/BlasUtil.h b/eigen/Eigen/src/Core/util/BlasUtil.h
index b1791fb..6e6ee11 100644
--- a/eigen/Eigen/src/Core/util/BlasUtil.h
+++ b/eigen/Eigen/src/Core/util/BlasUtil.h
@@ -222,11 +222,6 @@ class blas_data_mapper {
     return ploadt<Packet, AlignmentType>(&operator()(i, j));
   }
 
-  template <typename PacketT, int AlignmentT>
-  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketT load(Index i, Index j) const {
-    return ploadt<PacketT, AlignmentT>(&operator()(i, j));
-  }
-
   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE HalfPacket loadHalfPacket(Index i, Index j) const {
     return ploadt<HalfPacket, AlignmentType>(&operator()(i, j));
   }
diff --git a/eigen/Eigen/src/Core/util/Constants.h b/eigen/Eigen/src/Core/util/Constants.h
index 5d37e5d..7587d68 100644
--- a/eigen/Eigen/src/Core/util/Constants.h
+++ b/eigen/Eigen/src/Core/util/Constants.h
@@ -25,10 +25,6 @@ const int Dynamic = -1;
   */
 const int DynamicIndex = 0xffffff;
 
-/** This value means that the increment to go from one value to another in a sequence is not constant for each step.
-  */
-const int UndefinedIncr = 0xfffffe;
-
 /** This value means +Infinity; it is currently used only as the p parameter to MatrixBase::lpNorm<int>().
   * The value Infinity there means the L-infinity norm.
   */
diff --git a/eigen/Eigen/src/Core/util/DisableStupidWarnings.h b/eigen/Eigen/src/Core/util/DisableStupidWarnings.h
index 4431f2f..7559e12 100644
--- a/eigen/Eigen/src/Core/util/DisableStupidWarnings.h
+++ b/eigen/Eigen/src/Core/util/DisableStupidWarnings.h
@@ -4,6 +4,7 @@
 #ifdef _MSC_VER
   // 4100 - unreferenced formal parameter (occurred e.g. in aligned_allocator::destroy(pointer p))
   // 4101 - unreferenced local variable
+  // 4127 - conditional expression is constant
   // 4181 - qualifier applied to reference type ignored
   // 4211 - nonstandard extension used : redefined extern to static
   // 4244 - 'argument' : conversion from 'type1' to 'type2', possible loss of data
@@ -19,7 +20,7 @@
   #ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS
     #pragma warning( push )
   #endif
-  #pragma warning( disable : 4100 4101 4181 4211 4244 4273 4324 4503 4512 4522 4700 4714 4717 4800)
+  #pragma warning( disable : 4100 4101 4127 4181 4211 4244 4273 4324 4503 4512 4522 4700 4714 4717 4800)
 
 #elif defined __INTEL_COMPILER
   // 2196 - routine is both "inline" and "noinline" ("noinline" assumed)
@@ -41,9 +42,6 @@
     #pragma clang diagnostic push
   #endif
   #pragma clang diagnostic ignored "-Wconstant-logical-operand"
-  #if __clang_major__ >= 3 && __clang_minor__ >= 5
-    #pragma clang diagnostic ignored "-Wabsolute-value"
-  #endif
 
 #elif defined __GNUC__ && __GNUC__>=6
 
diff --git a/eigen/Eigen/src/Core/util/ForwardDeclarations.h b/eigen/Eigen/src/Core/util/ForwardDeclarations.h
index 1a48cff..ea10739 100644
--- a/eigen/Eigen/src/Core/util/ForwardDeclarations.h
+++ b/eigen/Eigen/src/Core/util/ForwardDeclarations.h
@@ -83,7 +83,6 @@ template<typename ExpressionType> class ForceAlignedAccess;
 template<typename ExpressionType> class SwapWrapper;
 
 template<typename XprType, int BlockRows=Dynamic, int BlockCols=Dynamic, bool InnerPanel = false> class Block;
-template<typename XprType, typename RowIndices, typename ColIndices> class IndexedView;
 
 template<typename MatrixType, int Size=Dynamic> class VectorBlock;
 template<typename MatrixType> class Transpose;
diff --git a/eigen/Eigen/src/Core/util/IndexedViewHelper.h b/eigen/Eigen/src/Core/util/IndexedViewHelper.h
deleted file mode 100644
index ab01c85..0000000
--- a/eigen/Eigen/src/Core/util/IndexedViewHelper.h
+++ /dev/null
@@ -1,187 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2017 Gael Guennebaud <gael.guennebaud@inria.fr>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-
-#ifndef EIGEN_INDEXED_VIEW_HELPER_H
-#define EIGEN_INDEXED_VIEW_HELPER_H
-
-namespace Eigen {
-
-/** \namespace Eigen::placeholders
-  * \ingroup Core_Module
-  *
-  * Namespace containing symbolic placeholder and identifiers
-  */
-namespace placeholders {
-
-namespace internal {
-struct symbolic_last_tag {};
-}
-
-/** \var last
-  * \ingroup Core_Module
-  *
-  * Can be used as a parameter to Eigen::seq and Eigen::seqN functions to symbolically reference the last element/row/columns
-  * of the underlying vector or matrix once passed to DenseBase::operator()(const RowIndices&, const ColIndices&).
-  *
-  * This symbolic placeholder support standard arithmetic operation.
-  *
-  * A typical usage example would be:
-  * \code
-  * using namespace Eigen;
-  * using Eigen::placeholders::last;
-  * VectorXd v(n);
-  * v(seq(2,last-2)).setOnes();
-  * \endcode
-  *
-  * \sa end
-  */
-static const Symbolic::SymbolExpr<internal::symbolic_last_tag> last;
-
-/** \var end
-  * \ingroup Core_Module
-  *
-  * Can be used as a parameter to Eigen::seq and Eigen::seqN functions to symbolically reference the last+1 element/row/columns
-  * of the underlying vector or matrix once passed to DenseBase::operator()(const RowIndices&, const ColIndices&).
-  *
-  * This symbolic placeholder support standard arithmetic operation.
-  * It is essentially an alias to last+1
-  *
-  * \sa last
-  */
-#ifdef EIGEN_PARSED_BY_DOXYGEN
-static const auto end = last+1;
-#else
-// Using a FixedExpr<1> expression is important here to make sure the compiler
-// can fully optimize the computation starting indices with zero overhead.
-static const Symbolic::AddExpr<Symbolic::SymbolExpr<internal::symbolic_last_tag>,Symbolic::ValueExpr<Eigen::internal::FixedInt<1> > > end(last+fix<1>());
-#endif
-
-} // end namespace placeholders
-
-namespace internal {
-
- // Replace symbolic last/end "keywords" by their true runtime value
-inline Index eval_expr_given_size(Index x, Index /* size */)   { return x; }
-
-template<int N>
-FixedInt<N> eval_expr_given_size(FixedInt<N> x, Index /*size*/)   { return x; }
-
-template<typename Derived>
-Index eval_expr_given_size(const Symbolic::BaseExpr<Derived> &x, Index size)
-{
-  return x.derived().eval(placeholders::last=size-1);
-}
-
-// Extract increment/step at compile time
-template<typename T, typename EnableIf = void> struct get_compile_time_incr {
-  enum { value = UndefinedIncr };
-};
-
-// Analogue of std::get<0>(x), but tailored for our needs.
-template<typename T>
-Index first(const T& x) { return x.first(); }
-
-// IndexedViewCompatibleType/makeIndexedViewCompatible turn an arbitrary object of type T into something usable by MatrixSlice
-// The generic implementation is a no-op
-template<typename T,int XprSize,typename EnableIf=void>
-struct IndexedViewCompatibleType {
-  typedef T type;
-};
-
-template<typename T,typename Q>
-const T& makeIndexedViewCompatible(const T& x, Index /*size*/, Q) { return x; }
-
-//--------------------------------------------------------------------------------
-// Handling of a single Index
-//--------------------------------------------------------------------------------
-
-struct SingleRange {
-  enum {
-    SizeAtCompileTime = 1
-  };
-  SingleRange(Index val) : m_value(val) {}
-  Index operator[](Index) const { return m_value; }
-  Index size() const { return 1; }
-  Index first() const { return m_value; }
-  Index m_value;
-};
-
-template<> struct get_compile_time_incr<SingleRange> {
-  enum { value = 1 }; // 1 or 0 ??
-};
-
-// Turn a single index into something that looks like an array (i.e., that exposes a .size(), and operatro[](int) methods)
-template<typename T, int XprSize>
-struct IndexedViewCompatibleType<T,XprSize,typename internal::enable_if<internal::is_integral<T>::value>::type> {
-  // Here we could simply use Array, but maybe it's less work for the compiler to use
-  // a simpler wrapper as SingleRange
-  //typedef Eigen::Array<Index,1,1> type;
-  typedef SingleRange type;
-};
-
-template<typename T, int XprSize>
-struct IndexedViewCompatibleType<T, XprSize, typename enable_if<Symbolic::is_symbolic<T>::value>::type> {
-  typedef SingleRange type;
-};
-
-
-template<typename T>
-typename enable_if<Symbolic::is_symbolic<T>::value,SingleRange>::type
-makeIndexedViewCompatible(const T& id, Index size, SpecializedType) {
-  return eval_expr_given_size(id,size);
-}
-
-//--------------------------------------------------------------------------------
-// Handling of all
-//--------------------------------------------------------------------------------
-
-struct all_t { all_t() {} };
-
-// Convert a symbolic 'all' into a usable range type
-template<int XprSize>
-struct AllRange {
-  enum { SizeAtCompileTime = XprSize };
-  AllRange(Index size = XprSize) : m_size(size) {}
-  Index operator[](Index i) const { return i; }
-  Index size() const { return m_size.value(); }
-  Index first() const { return 0; }
-  variable_if_dynamic<Index,XprSize> m_size;
-};
-
-template<int XprSize>
-struct IndexedViewCompatibleType<all_t,XprSize> {
-  typedef AllRange<XprSize> type;
-};
-
-template<typename XprSizeType>
-inline AllRange<get_fixed_value<XprSizeType>::value> makeIndexedViewCompatible(all_t , XprSizeType size, SpecializedType) {
-  return AllRange<get_fixed_value<XprSizeType>::value>(size);
-}
-
-template<int Size> struct get_compile_time_incr<AllRange<Size> > {
-  enum { value = 1 };
-};
-
-} // end namespace internal
-
-
-namespace placeholders {
-
-/** \var all
-  * \ingroup Core_Module
-  * Can be used as a parameter to DenseBase::operator()(const RowIndices&, const ColIndices&) to index all rows or columns
-  */
-static const Eigen::internal::all_t all;
-
-}
-
-} // end namespace Eigen
-
-#endif // EIGEN_INDEXED_VIEW_HELPER_H
diff --git a/eigen/Eigen/src/Core/util/IntegralConstant.h b/eigen/Eigen/src/Core/util/IntegralConstant.h
deleted file mode 100644
index 78a4705..0000000
--- a/eigen/Eigen/src/Core/util/IntegralConstant.h
+++ /dev/null
@@ -1,270 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2017 Gael Guennebaud <gael.guennebaud@inria.fr>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-
-#ifndef EIGEN_INTEGRAL_CONSTANT_H
-#define EIGEN_INTEGRAL_CONSTANT_H
-
-namespace Eigen {
-
-namespace internal {
-
-template<int N> class FixedInt;
-template<int N> class VariableAndFixedInt;
-
-/** \internal
-  * \class FixedInt
-  *
-  * This class embeds a compile-time integer \c N.
-  *
-  * It is similar to c++11 std::integral_constant<int,N> but with some additional features
-  * such as:
-  *  - implicit conversion to int
-  *  - arithmetic and some bitwise operators: -, +, *, /, %, &, |
-  *  - c++98/14 compatibility with fix<N> and fix<N>() syntax to define integral constants.
-  *
-  * It is strongly discouraged to directly deal with this class FixedInt. Instances are expcected to
-  * be created by the user using Eigen::fix<N> or Eigen::fix<N>(). In C++98-11, the former syntax does
-  * not create a FixedInt<N> instance but rather a point to function that needs to be \em cleaned-up
-  * using the generic helper:
-  * \code
-  * internal::cleanup_index_type<T>::type
-  * internal::cleanup_index_type<T,DynamicKey>::type
-  * \endcode
-  * where T can a FixedInt<N>, a pointer to function FixedInt<N> (*)(), or numerous other integer-like representations.
-  * \c DynamicKey is either Dynamic (default) or DynamicIndex and used to identify true compile-time values.
-  *
-  * For convenience, you can extract the compile-time value \c N in a generic way using the following helper:
-  * \code
-  * internal::get_fixed_value<T,DefaultVal>::value
-  * \endcode
-  * that will give you \c N if T equals FixedInt<N> or FixedInt<N> (*)(), and \c DefaultVal if T does not embed any compile-time value (e.g., T==int).
-  *
-  * \sa fix<N>, class VariableAndFixedInt
-  */
-template<int N> class FixedInt
-{
-public:
-  static const int value = N;
-  operator int() const { return value; }
-  FixedInt() {}
-  FixedInt( VariableAndFixedInt<N> other) {
-    EIGEN_ONLY_USED_FOR_DEBUG(other);
-    eigen_internal_assert(int(other)==N);
-  }
-
-  FixedInt<-N> operator-() const { return FixedInt<-N>(); }
-  template<int M>
-  FixedInt<N+M> operator+( FixedInt<M>) const { return FixedInt<N+M>(); }
-  template<int M>
-  FixedInt<N-M> operator-( FixedInt<M>) const { return FixedInt<N-M>(); }
-  template<int M>
-  FixedInt<N*M> operator*( FixedInt<M>) const { return FixedInt<N*M>(); }
-  template<int M>
-  FixedInt<N/M> operator/( FixedInt<M>) const { return FixedInt<N/M>(); }
-  template<int M>
-  FixedInt<N%M> operator%( FixedInt<M>) const { return FixedInt<N%M>(); }
-  template<int M>
-  FixedInt<N|M> operator|( FixedInt<M>) const { return FixedInt<N|M>(); }
-  template<int M>
-  FixedInt<N&M> operator&( FixedInt<M>) const { return FixedInt<N&M>(); }
-
-#if EIGEN_HAS_CXX14
-  // Needed in C++14 to allow fix<N>():
-  FixedInt operator() () const { return *this; }
-
-  VariableAndFixedInt<N> operator() (int val) const { return VariableAndFixedInt<N>(val); }
-#else
-  FixedInt ( FixedInt<N> (*)() ) {}
-#endif
-
-#if EIGEN_HAS_CXX11
-  FixedInt(std::integral_constant<int,N>) {}
-#endif
-};
-
-/** \internal
-  * \class VariableAndFixedInt
-  *
-  * This class embeds both a compile-time integer \c N and a runtime integer.
-  * Both values are supposed to be equal unless the compile-time value \c N has a special
-  * value meaning that the runtime-value should be used. Depending on the context, this special
-  * value can be either Eigen::Dynamic (for positive quantities) or Eigen::DynamicIndex (for
-  * quantities that can be negative).
-  *
-  * It is the return-type of the function Eigen::fix<N>(int), and most of the time this is the only
-  * way it is used. It is strongly discouraged to directly deal with instances of VariableAndFixedInt.
-  * Indeed, in order to write generic code, it is the responsibility of the callee to properly convert
-  * it to either a true compile-time quantity (i.e. a FixedInt<N>), or to a runtime quantity (e.g., an Index)
-  * using the following generic helper:
-  * \code
-  * internal::cleanup_index_type<T>::type
-  * internal::cleanup_index_type<T,DynamicKey>::type
-  * \endcode
-  * where T can be a template instantiation of VariableAndFixedInt or numerous other integer-like representations.
-  * \c DynamicKey is either Dynamic (default) or DynamicIndex and used to identify true compile-time values.
-  *
-  * For convenience, you can also extract the compile-time value \c N using the following helper:
-  * \code
-  * internal::get_fixed_value<T,DefaultVal>::value
-  * \endcode
-  * that will give you \c N if T equals VariableAndFixedInt<N>, and \c DefaultVal if T does not embed any compile-time value (e.g., T==int).
-  *
-  * \sa fix<N>(int), class FixedInt
-  */
-template<int N> class VariableAndFixedInt
-{
-public:
-  static const int value = N;
-  operator int() const { return m_value; }
-  VariableAndFixedInt(int val) { m_value = val; }
-protected:
-  int m_value;
-};
-
-template<typename T, int Default=Dynamic> struct get_fixed_value {
-  static const int value = Default;
-};
-
-template<int N,int Default> struct get_fixed_value<FixedInt<N>,Default> {
-  static const int value = N;
-};
-
-#if !EIGEN_HAS_CXX14
-template<int N,int Default> struct get_fixed_value<FixedInt<N> (*)(),Default> {
-  static const int value = N;
-};
-#endif
-
-template<int N,int Default> struct get_fixed_value<VariableAndFixedInt<N>,Default> {
-  static const int value = N ;
-};
-
-template<typename T, int N, int Default>
-struct get_fixed_value<variable_if_dynamic<T,N>,Default> {
-  static const int value = N;
-};
-
-template<typename T> EIGEN_DEVICE_FUNC Index get_runtime_value(const T &x) { return x; }
-#if !EIGEN_HAS_CXX14
-template<int N> EIGEN_DEVICE_FUNC Index get_runtime_value(FixedInt<N> (*)()) { return N; }
-#endif
-
-// Cleanup integer/FixedInt/VariableAndFixedInt/etc types:
-
-// By default, no cleanup:
-template<typename T, int DynamicKey=Dynamic, typename EnableIf=void> struct cleanup_index_type { typedef T type; };
-
-// Convert any integral type (e.g., short, int, unsigned int, etc.) to Eigen::Index
-template<typename T, int DynamicKey> struct cleanup_index_type<T,DynamicKey,typename internal::enable_if<internal::is_integral<T>::value>::type> { typedef Index type; };
-
-#if !EIGEN_HAS_CXX14
-// In c++98/c++11, fix<N> is a pointer to function that we better cleanup to a true FixedInt<N>:
-template<int N, int DynamicKey> struct cleanup_index_type<FixedInt<N> (*)(), DynamicKey> { typedef FixedInt<N> type; };
-#endif
-
-// If VariableAndFixedInt does not match DynamicKey, then we turn it to a pure compile-time value:
-template<int N, int DynamicKey> struct cleanup_index_type<VariableAndFixedInt<N>, DynamicKey> { typedef FixedInt<N> type; };
-// If VariableAndFixedInt matches DynamicKey, then we turn it to a pure runtime-value (aka Index):
-template<int DynamicKey> struct cleanup_index_type<VariableAndFixedInt<DynamicKey>, DynamicKey> { typedef Index type; };
-
-#if EIGEN_HAS_CXX11
-template<int N, int DynamicKey> struct cleanup_index_type<std::integral_constant<int,N>, DynamicKey> { typedef FixedInt<N> type; };
-#endif
-
-} // end namespace internal
-
-#ifndef EIGEN_PARSED_BY_DOXYGEN
-
-#if EIGEN_HAS_CXX14
-template<int N>
-static const internal::FixedInt<N> fix{};
-#else
-template<int N>
-inline internal::FixedInt<N> fix() { return internal::FixedInt<N>(); }
-
-// The generic typename T is mandatory. Otherwise, a code like fix<N> could refer to either the function above or this next overload.
-// This way a code like fix<N> can only refer to the previous function.
-template<int N,typename T>
-inline internal::VariableAndFixedInt<N> fix(T val) { return internal::VariableAndFixedInt<N>(val); }
-#endif
-
-#else // EIGEN_PARSED_BY_DOXYGEN
-
-/** \var fix<N>()
-  * \ingroup Core_Module
-  *
-  * This \em identifier permits to construct an object embedding a compile-time integer \c N.
-  *
-  * \tparam N the compile-time integer value
-  *
-  * It is typically used in conjunction with the Eigen::seq and Eigen::seqN functions to pass compile-time values to them:
-  * \code
-  * seqN(10,fix<4>,fix<-3>)   // <=> [10 7 4 1]
-  * \endcode
-  *
-  * See also the function fix(int) to pass both a compile-time and runtime value.
-  *
-  * In c++14, it is implemented as:
-  * \code
-  * template<int N> static const internal::FixedInt<N> fix{};
-  * \endcode
-  * where internal::FixedInt<N> is an internal template class similar to
-  * <a href="http://en.cppreference.com/w/cpp/types/integral_constant">\c std::integral_constant </a><tt> <int,N> </tt>
-  * Here, \c fix<N> is thus an object of type \c internal::FixedInt<N>.
-  *
-  * In c++98/11, it is implemented as a function:
-  * \code
-  * template<int N> inline internal::FixedInt<N> fix();
-  * \endcode
-  * Here internal::FixedInt<N> is thus a pointer to function.
-  *
-  * If for some reason you want a true object in c++98 then you can write: \code fix<N>() \endcode which is also valid in c++14.
-  *
-  * \sa fix<N>(int), seq, seqN
-  */
-template<int N>
-static const auto fix();
-
-/** \fn fix<N>(int)
-  * \ingroup Core_Module
-  *
-  * This function returns an object embedding both a compile-time integer \c N, and a fallback runtime value \a val.
-  *
-  * \tparam N the compile-time integer value
-  * \param  val the fallback runtime integer value
-  *
-  * This function is a more general version of the \ref fix identifier/function that can be used in template code
-  * where the compile-time value could turn out to actually mean "undefined at compile-time". For positive integers
-  * such as a size or a dimension, this case is identified by Eigen::Dynamic, whereas runtime signed integers
-  * (e.g., an increment/stride) are identified as Eigen::DynamicIndex. In such a case, the runtime value \a val
-  * will be used as a fallback.
-  *
-  * A typical use case would be:
-  * \code
-  * template<typename Derived> void foo(const MatrixBase<Derived> &mat) {
-  *   const int N = Derived::RowsAtCompileTime==Dynamic ? Dynamic : Derived::RowsAtCompileTime/2;
-  *   const int n = mat.rows()/2;
-  *   ... mat( seqN(0,fix<N>(n) ) ...;
-  * }
-  * \endcode
-  * In this example, the function Eigen::seqN knows that the second argument is expected to be a size.
-  * If the passed compile-time value N equals Eigen::Dynamic, then the proxy object returned by fix will be dissmissed, and converted to an Eigen::Index of value \c n.
-  * Otherwise, the runtime-value \c n will be dissmissed, and the returned ArithmeticSequence will be of the exact same type as <tt> seqN(0,fix<N>) </tt>.
-  *
-  * \sa fix, seqN, class ArithmeticSequence
-  */
-template<int N>
-static const auto fix(int val);
-
-#endif // EIGEN_PARSED_BY_DOXYGEN
-
-} // end namespace Eigen
-
-#endif // EIGEN_INTEGRAL_CONSTANT_H
diff --git a/eigen/Eigen/src/Core/util/Macros.h b/eigen/Eigen/src/Core/util/Macros.h
index 14ec87d..38d6ddb 100644
--- a/eigen/Eigen/src/Core/util/Macros.h
+++ b/eigen/Eigen/src/Core/util/Macros.h
@@ -13,7 +13,7 @@
 
 #define EIGEN_WORLD_VERSION 3
 #define EIGEN_MAJOR_VERSION 3
-#define EIGEN_MINOR_VERSION 90
+#define EIGEN_MINOR_VERSION 4
 
 #define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \
                                       (EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \
@@ -23,7 +23,7 @@
 
 /// \internal EIGEN_COMP_GNUC set to 1 for all compilers compatible with GCC
 #ifdef __GNUC__
-  #define EIGEN_COMP_GNUC (__GNUC__*10+__GNUC_MINOR__)
+  #define EIGEN_COMP_GNUC 1
 #else
   #define EIGEN_COMP_GNUC 0
 #endif
@@ -349,14 +349,6 @@
 # define __has_feature(x) 0
 #endif
 
-// Some old compilers do not support template specializations like:
-// template<typename T,int N> void foo(const T x[N]);
-#if !( EIGEN_COMP_CLANG && ((EIGEN_COMP_CLANG<309) || defined(__apple_build_version__)) || EIGEN_COMP_GNUC_STRICT && EIGEN_COMP_GNUC<49)
-#define EIGEN_HAS_STATIC_ARRAY_TEMPLATE 1
-#else
-#define EIGEN_HAS_STATIC_ARRAY_TEMPLATE 0
-#endif
-
 // Upperbound on the C++ version to use.
 // Expected values are 03, 11, 14, 17, etc.
 // By default, let's use an arbitrarily large C++ version.
@@ -370,11 +362,6 @@
 #define EIGEN_HAS_CXX11 0
 #endif
 
-#if EIGEN_MAX_CPP_VER>=14 && (defined(__cplusplus) && (__cplusplus > 201103L) || EIGEN_COMP_MSVC >= 1910)
-#define EIGEN_HAS_CXX14 1
-#else
-#define EIGEN_HAS_CXX14 0
-#endif
 
 // Do we support r-value references?
 #ifndef EIGEN_HAS_RVALUE_REFERENCES
@@ -393,8 +380,7 @@
 #if EIGEN_MAX_CPP_VER>=11 && \
     ((defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901))       \
   || (defined(__GNUC__) && defined(_GLIBCXX_USE_C99)) \
-  || (defined(_LIBCPP_VERSION) && !defined(_MSC_VER)) \
-  || (EIGEN_COMP_MSVC >= 1900) || defined(__SYCL_DEVICE_ONLY__))
+  || (defined(_LIBCPP_VERSION) && !defined(_MSC_VER)))
   #define EIGEN_HAS_C99_MATH 1
 #else
   #define EIGEN_HAS_C99_MATH 0
@@ -413,12 +399,10 @@
 // Does the compiler support variadic templates?
 #ifndef EIGEN_HAS_VARIADIC_TEMPLATES
 #if EIGEN_MAX_CPP_VER>=11 && (__cplusplus > 199711L || EIGEN_COMP_MSVC >= 1900) \
-  && (!defined(__NVCC__) || !EIGEN_ARCH_ARM_OR_ARM64 || (defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000) )
+  && ( !defined(__NVCC__) || !EIGEN_ARCH_ARM_OR_ARM64 || (defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000) )
     // ^^ Disable the use of variadic templates when compiling with versions of nvcc older than 8.0 on ARM devices:
     //    this prevents nvcc from crashing when compiling Eigen on Tegra X1
 #define EIGEN_HAS_VARIADIC_TEMPLATES 1
-#elif  EIGEN_MAX_CPP_VER>=11 && (__cplusplus > 199711L || EIGEN_COMP_MSVC >= 1900) && defined(__SYCL_DEVICE_ONLY__)
-#define EIGEN_HAS_VARIADIC_TEMPLATES 1
 #else
 #define EIGEN_HAS_VARIADIC_TEMPLATES 0
 #endif
@@ -427,14 +411,13 @@
 // Does the compiler fully support const expressions? (as in c++14)
 #ifndef EIGEN_HAS_CONSTEXPR
 
-#if defined(__CUDACC__)
+#ifdef __CUDACC__
 // Const expressions are supported provided that c++11 is enabled and we're using either clang or nvcc 7.5 or above
 #if EIGEN_MAX_CPP_VER>=14 && (__cplusplus > 199711L && defined(__CUDACC_VER__) && (EIGEN_COMP_CLANG || __CUDACC_VER__ >= 70500))
   #define EIGEN_HAS_CONSTEXPR 1
 #endif
 #elif EIGEN_MAX_CPP_VER>=14 && (__has_feature(cxx_relaxed_constexpr) || (defined(__cplusplus) && __cplusplus >= 201402L) || \
-  (EIGEN_GNUC_AT_LEAST(4,8) && (__cplusplus > 199711L)) || \
-  (EIGEN_COMP_CLANG >= 306 && (__cplusplus > 199711L)))
+  (EIGEN_GNUC_AT_LEAST(4,8) && (__cplusplus > 199711L)))
 #define EIGEN_HAS_CONSTEXPR 1
 #endif
 
@@ -542,8 +525,8 @@
 //  - static is not very good because it prevents definitions from different object files to be merged.
 //           So static causes the resulting linked executable to be bloated with multiple copies of the same function.
 //  - inline is not perfect either as it unwantedly hints the compiler toward inlining the function.
-#define EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_DEVICE_FUNC
-#define EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_DEVICE_FUNC inline
+#define EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+#define EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS inline
 
 #ifdef NDEBUG
 # ifndef EIGEN_NO_DEBUG
@@ -641,14 +624,6 @@ namespace Eigen {
 #endif
 
 
-#if EIGEN_COMP_MSVC
-  // NOTE MSVC often gives C4127 warnings with compiletime if statements. See bug 1362.
-  // This workaround is ugly, but it does the job.
-#  define EIGEN_CONST_CONDITIONAL(cond)  (void)0, cond
-#else
-#  define EIGEN_CONST_CONDITIONAL(cond)  cond
-#endif
-
 //------------------------------------------------------------------------------------------
 // Static and dynamic alignment control
 //
@@ -878,8 +853,7 @@ namespace Eigen {
   typedef typename Eigen::internal::ref_selector<Derived>::type Nested; \
   typedef typename Eigen::internal::traits<Derived>::StorageKind StorageKind; \
   typedef typename Eigen::internal::traits<Derived>::StorageIndex StorageIndex; \
-  enum CompileTimeTraits \
-      { RowsAtCompileTime = Eigen::internal::traits<Derived>::RowsAtCompileTime, \
+  enum { RowsAtCompileTime = Eigen::internal::traits<Derived>::RowsAtCompileTime, \
         ColsAtCompileTime = Eigen::internal::traits<Derived>::ColsAtCompileTime, \
         Flags = Eigen::internal::traits<Derived>::Flags, \
         SizeAtCompileTime = Base::SizeAtCompileTime, \
diff --git a/eigen/Eigen/src/Core/util/Memory.h b/eigen/Eigen/src/Core/util/Memory.h
index 7d90534..c634d7e 100644
--- a/eigen/Eigen/src/Core/util/Memory.h
+++ b/eigen/Eigen/src/Core/util/Memory.h
@@ -63,7 +63,7 @@ namespace Eigen {
 
 namespace internal {
 
-EIGEN_DEVICE_FUNC
+EIGEN_DEVICE_FUNC 
 inline void throw_std_bad_alloc()
 {
   #ifdef EIGEN_EXCEPTIONS
@@ -114,7 +114,7 @@ inline void* handmade_aligned_realloc(void* ptr, std::size_t size, std::size_t =
   void *previous_aligned = static_cast<char *>(original)+previous_offset;
   if(aligned!=previous_aligned)
     std::memmove(aligned, previous_aligned, size);
-
+  
   *(reinterpret_cast<void**>(aligned) - 1) = original;
   return aligned;
 }
@@ -142,7 +142,7 @@ EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed()
 {
   eigen_assert(is_malloc_allowed() && "heap allocation is forbidden (EIGEN_RUNTIME_NO_MALLOC is defined and g_is_malloc_allowed is false)");
 }
-#else
+#else 
 EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed()
 {}
 #endif
@@ -471,8 +471,8 @@ EIGEN_DEVICE_FUNC inline Index first_default_aligned(const Scalar* array, Index
 }
 
 /** \internal Returns the smallest integer multiple of \a base and greater or equal to \a size
-  */
-template<typename Index>
+  */ 
+template<typename Index> 
 inline Index first_multiple(Index size, Index base)
 {
   return ((size+base-1)/base)*base;
@@ -502,7 +502,7 @@ template<typename T> struct smart_copy_helper<T,false> {
   { std::copy(start, end, target); }
 };
 
-// intelligent memmove. falls back to std::memmove for POD types, uses std::copy otherwise.
+// intelligent memmove. falls back to std::memmove for POD types, uses std::copy otherwise. 
 template<typename T, bool UseMemmove> struct smart_memmove_helper;
 
 template<typename T> void smart_memmove(const T* start, const T* end, T* target)
@@ -522,15 +522,15 @@ template<typename T> struct smart_memmove_helper<T,true> {
 
 template<typename T> struct smart_memmove_helper<T,false> {
   static inline void run(const T* start, const T* end, T* target)
-  {
+  { 
     if (UIntPtr(target) < UIntPtr(start))
     {
       std::copy(start, end, target);
     }
-    else
+    else                                 
     {
       std::ptrdiff_t count = (std::ptrdiff_t(end)-std::ptrdiff_t(start)) / sizeof(T);
-      std::copy_backward(start, end, target + count);
+      std::copy_backward(start, end, target + count); 
     }
   }
 };
@@ -603,7 +603,7 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)
 {
   std::swap(a.ptr(),b.ptr());
 }
-
+    
 } // end namespace internal
 
 /** \internal
@@ -622,7 +622,7 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)
   * The underlying stack allocation function can controlled with the EIGEN_ALLOCA preprocessor token.
   */
 #ifdef EIGEN_ALLOCA
-
+  
   #if EIGEN_DEFAULT_ALIGN_BYTES>0
     // We always manually re-align the result of EIGEN_ALLOCA.
     // If alloca is already aligned, the compiler should be smart enough to optimize away the re-alignment.
@@ -645,7 +645,7 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)
     Eigen::internal::check_size_for_overflow<TYPE>(SIZE); \
     TYPE* NAME = (BUFFER)!=0 ? BUFFER : reinterpret_cast<TYPE*>(Eigen::internal::aligned_malloc(sizeof(TYPE)*SIZE));    \
     Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME,_stack_memory_destructor)((BUFFER)==0 ? NAME : 0,SIZE,true)
-
+    
 #endif
 
 
@@ -701,7 +701,7 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)
 * Example:
 * \code
 * // Matrix4f requires 16 bytes alignment:
-* std::map< int, Matrix4f, std::less<int>,
+* std::map< int, Matrix4f, std::less<int>, 
 *           aligned_allocator<std::pair<const int, Matrix4f> > > my_map_mat4;
 * // Vector3f does not require 16 bytes alignment, no need to use Eigen's allocator:
 * std::map< int, Vector3f > my_map_vec3;
diff --git a/eigen/Eigen/src/Core/util/Meta.h b/eigen/Eigen/src/Core/util/Meta.h
index 8de6055..7f63707 100644
--- a/eigen/Eigen/src/Core/util/Meta.h
+++ b/eigen/Eigen/src/Core/util/Meta.h
@@ -97,22 +97,17 @@ template<> struct is_arithmetic<unsigned int>  { enum { value = true }; };
 template<> struct is_arithmetic<signed long>   { enum { value = true }; };
 template<> struct is_arithmetic<unsigned long> { enum { value = true }; };
 
-#if EIGEN_HAS_CXX11
-using std::is_integral;
-#else
-template<typename T> struct is_integral               { enum { value = false }; };
-template<> struct is_integral<bool>                   { enum { value = true }; };
-template<> struct is_integral<char>                   { enum { value = true }; };
-template<> struct is_integral<signed char>            { enum { value = true }; };
-template<> struct is_integral<unsigned char>          { enum { value = true }; };
-template<> struct is_integral<signed short>           { enum { value = true }; };
-template<> struct is_integral<unsigned short>         { enum { value = true }; };
-template<> struct is_integral<signed int>             { enum { value = true }; };
-template<> struct is_integral<unsigned int>           { enum { value = true }; };
-template<> struct is_integral<signed long>            { enum { value = true }; };
-template<> struct is_integral<unsigned long>          { enum { value = true }; };
-#endif
-
+template<typename T> struct is_integral        { enum { value = false }; };
+template<> struct is_integral<bool>            { enum { value = true }; };
+template<> struct is_integral<char>            { enum { value = true }; };
+template<> struct is_integral<signed char>     { enum { value = true }; };
+template<> struct is_integral<unsigned char>   { enum { value = true }; };
+template<> struct is_integral<signed short>    { enum { value = true }; };
+template<> struct is_integral<unsigned short>  { enum { value = true }; };
+template<> struct is_integral<signed int>      { enum { value = true }; };
+template<> struct is_integral<unsigned int>    { enum { value = true }; };
+template<> struct is_integral<signed long>     { enum { value = true }; };
+template<> struct is_integral<unsigned long>   { enum { value = true }; };
 
 template <typename T> struct add_const { typedef const T type; };
 template <typename T> struct add_const<T&> { typedef T& type; };
@@ -284,59 +279,6 @@ protected:
 };
 
 /** \internal
-  * Provides access to the number of elements in the object of as a compile-time constant expression.
-  * It "returns" Eigen::Dynamic if the size cannot be resolved at compile-time (default).
-  *
-  * Similar to std::tuple_size, but more general.
-  *
-  * It currently supports:
-  *  - any types T defining T::SizeAtCompileTime
-  *  - plain C arrays as T[N]
-  *  - std::array (c++11)
-  *  - some internal types such as SingleRange and AllRange
-  *
-  * The second template parameter eases SFINAE-based specializations.
-  */
-template<typename T, typename EnableIf = void> struct array_size {
-  enum { value = Dynamic };
-};
-
-template<typename T> struct array_size<T,typename internal::enable_if<((T::SizeAtCompileTime&0)==0)>::type> {
-  enum { value = T::SizeAtCompileTime };
-};
-
-template<typename T, int N> struct array_size<const T (&)[N]> {
-  enum { value = N };
-};
-template<typename T, int N> struct array_size<T (&)[N]> {
-  enum { value = N };
-};
-
-#if EIGEN_HAS_CXX11
-template<typename T, std::size_t N> struct array_size<const std::array<T,N> > {
-  enum { value = N };
-};
-template<typename T, std::size_t N> struct array_size<std::array<T,N> > {
-  enum { value = N };
-};
-#endif
-
-/** \internal
-  * Analogue of the std::size free function.
-  * It returns the size of the container or view \a x of type \c T
-  *
-  * It currently supports:
-  *  - any types T defining a member T::size() const
-  *  - plain C arrays as T[N]
-  *
-  */
-template<typename T>
-Index size(const T& x) { return x.size(); }
-
-template<typename T,std::size_t N>
-Index size(const T (&) [N]) { return N; }
-
-/** \internal
   * Convenient struct to get the result type of a unary or binary functor.
   *
   * It supports both the current STL mechanism (using the result_type member) as well as
@@ -433,10 +375,10 @@ struct meta_no  { char a[2]; };
 template <typename T>
 struct has_ReturnType
 {
-  template <typename C> static meta_yes testFunctor(C const *, typename C::ReturnType const * = 0);
-  template <typename C> static meta_no  testFunctor(...);
+  template <typename C> static meta_yes testFunctor(typename C::ReturnType const *);
+  template <typename C> static meta_no testFunctor(...);
 
-  enum { value = sizeof(testFunctor<T>(static_cast<T*>(0))) == sizeof(meta_yes) };
+  enum { value = sizeof(testFunctor<T>(0)) == sizeof(meta_yes) };
 };
 
 template<typename T> const T* return_ptr();
diff --git a/eigen/Eigen/src/Core/util/SymbolicIndex.h b/eigen/Eigen/src/Core/util/SymbolicIndex.h
deleted file mode 100644
index bb6349e..0000000
--- a/eigen/Eigen/src/Core/util/SymbolicIndex.h
+++ /dev/null
@@ -1,300 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2017 Gael Guennebaud <gael.guennebaud@inria.fr>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_SYMBOLIC_INDEX_H
-#define EIGEN_SYMBOLIC_INDEX_H
-
-namespace Eigen {
-
-/** \namespace Eigen::Symbolic
-  * \ingroup Core_Module
-  *
-  * This namespace defines a set of classes and functions to build and evaluate symbolic expressions of scalar type Index.
-  * Here is a simple example:
-  *
-  * \code
-  * // First step, defines symbols:
-  * struct x_tag {};  static const Symbolic::SymbolExpr<x_tag> x;
-  * struct y_tag {};  static const Symbolic::SymbolExpr<y_tag> y;
-  * struct z_tag {};  static const Symbolic::SymbolExpr<z_tag> z;
-  *
-  * // Defines an expression:
-  * auto expr = (x+3)/y+z;
-  *
-  * // And evaluate it: (c++14)
-  * std::cout << expr.eval(x=6,y=3,z=-13) << "\n";
-  *
-  * // In c++98/11, only one symbol per expression is supported for now:
-  * auto expr98 = (3-x)/2;
-  * std::cout << expr98.eval(x=6) << "\n";
-  * \endcode
-  *
-  * It is currently only used internally to define and minipulate the placeholders::last and placeholders::end symbols in Eigen::seq and Eigen::seqN.
-  *
-  */
-namespace Symbolic {
-
-template<typename Tag> class Symbol;
-template<typename Arg0> class NegateExpr;
-template<typename Arg1,typename Arg2> class AddExpr;
-template<typename Arg1,typename Arg2> class ProductExpr;
-template<typename Arg1,typename Arg2> class QuotientExpr;
-
-// A simple wrapper around an integral value to provide the eval method.
-// We could also use a free-function symbolic_eval...
-template<typename IndexType=Index>
-class ValueExpr {
-public:
-  ValueExpr(IndexType val) : m_value(val) {}
-  template<typename T>
-  IndexType eval_impl(const T&) const { return m_value; }
-protected:
-  IndexType m_value;
-};
-
-// Specialization for compile-time value,
-// It is similar to ValueExpr(N) but this version helps the compiler to generate better code.
-template<int N>
-class ValueExpr<internal::FixedInt<N> > {
-public:
-  ValueExpr() {}
-  template<typename T>
-  Index eval_impl(const T&) const { return N; }
-};
-
-
-/** \class BaseExpr
-  * \ingroup Core_Module
-  * Common base class of any symbolic expressions
-  */
-template<typename Derived>
-class BaseExpr
-{
-public:
-  const Derived& derived() const { return *static_cast<const Derived*>(this); }
-
-  /** Evaluate the expression given the \a values of the symbols.
-    *
-    * \param values defines the values of the symbols, it can either be a SymbolValue or a std::tuple of SymbolValue
-    *               as constructed by SymbolExpr::operator= operator.
-    *
-    */
-  template<typename T>
-  Index eval(const T& values) const { return derived().eval_impl(values); }
-
-#if EIGEN_HAS_CXX14
-  template<typename... Types>
-  Index eval(Types&&... values) const { return derived().eval_impl(std::make_tuple(values...)); }
-#endif
-
-  NegateExpr<Derived> operator-() const { return NegateExpr<Derived>(derived()); }
-
-  AddExpr<Derived,ValueExpr<> > operator+(Index b) const
-  { return AddExpr<Derived,ValueExpr<> >(derived(),  b); }
-  AddExpr<Derived,ValueExpr<> > operator-(Index a) const
-  { return AddExpr<Derived,ValueExpr<> >(derived(), -a); }
-  ProductExpr<Derived,ValueExpr<> > operator*(Index a) const
-  { return ProductExpr<Derived,ValueExpr<> >(derived(),a); }
-  QuotientExpr<Derived,ValueExpr<> > operator/(Index a) const
-  { return QuotientExpr<Derived,ValueExpr<> >(derived(),a); }
-
-  friend AddExpr<Derived,ValueExpr<> > operator+(Index a, const BaseExpr& b)
-  { return AddExpr<Derived,ValueExpr<> >(b.derived(), a); }
-  friend AddExpr<NegateExpr<Derived>,ValueExpr<> > operator-(Index a, const BaseExpr& b)
-  { return AddExpr<NegateExpr<Derived>,ValueExpr<> >(-b.derived(), a); }
-  friend ProductExpr<ValueExpr<>,Derived> operator*(Index a, const BaseExpr& b)
-  { return ProductExpr<ValueExpr<>,Derived>(a,b.derived()); }
-  friend QuotientExpr<ValueExpr<>,Derived> operator/(Index a, const BaseExpr& b)
-  { return QuotientExpr<ValueExpr<>,Derived>(a,b.derived()); }
-
-  template<int N>
-  AddExpr<Derived,ValueExpr<internal::FixedInt<N> > > operator+(internal::FixedInt<N>) const
-  { return AddExpr<Derived,ValueExpr<internal::FixedInt<N> > >(derived(), ValueExpr<internal::FixedInt<N> >()); }
-  template<int N>
-  AddExpr<Derived,ValueExpr<internal::FixedInt<-N> > > operator-(internal::FixedInt<N>) const
-  { return AddExpr<Derived,ValueExpr<internal::FixedInt<-N> > >(derived(), ValueExpr<internal::FixedInt<-N> >()); }
-  template<int N>
-  ProductExpr<Derived,ValueExpr<internal::FixedInt<N> > > operator*(internal::FixedInt<N>) const
-  { return ProductExpr<Derived,ValueExpr<internal::FixedInt<N> > >(derived(),ValueExpr<internal::FixedInt<N> >()); }
-  template<int N>
-  QuotientExpr<Derived,ValueExpr<internal::FixedInt<N> > > operator/(internal::FixedInt<N>) const
-  { return QuotientExpr<Derived,ValueExpr<internal::FixedInt<N> > >(derived(),ValueExpr<internal::FixedInt<N> >()); }
-
-  template<int N>
-  friend AddExpr<Derived,ValueExpr<internal::FixedInt<N> > > operator+(internal::FixedInt<N>, const BaseExpr& b)
-  { return AddExpr<Derived,ValueExpr<internal::FixedInt<N> > >(b.derived(), ValueExpr<internal::FixedInt<N> >()); }
-  template<int N>
-  friend AddExpr<NegateExpr<Derived>,ValueExpr<internal::FixedInt<N> > > operator-(internal::FixedInt<N>, const BaseExpr& b)
-  { return AddExpr<NegateExpr<Derived>,ValueExpr<internal::FixedInt<N> > >(-b.derived(), ValueExpr<internal::FixedInt<N> >()); }
-  template<int N>
-  friend ProductExpr<ValueExpr<internal::FixedInt<N> >,Derived> operator*(internal::FixedInt<N>, const BaseExpr& b)
-  { return ProductExpr<ValueExpr<internal::FixedInt<N> >,Derived>(ValueExpr<internal::FixedInt<N> >(),b.derived()); }
-  template<int N>
-  friend QuotientExpr<ValueExpr<internal::FixedInt<N> >,Derived> operator/(internal::FixedInt<N>, const BaseExpr& b)
-  { return QuotientExpr<ValueExpr<internal::FixedInt<N> > ,Derived>(ValueExpr<internal::FixedInt<N> >(),b.derived()); }
-
-#if (!EIGEN_HAS_CXX14)
-  template<int N>
-  AddExpr<Derived,ValueExpr<internal::FixedInt<N> > > operator+(internal::FixedInt<N> (*)()) const
-  { return AddExpr<Derived,ValueExpr<internal::FixedInt<N> > >(derived(), ValueExpr<internal::FixedInt<N> >()); }
-  template<int N>
-  AddExpr<Derived,ValueExpr<internal::FixedInt<-N> > > operator-(internal::FixedInt<N> (*)()) const
-  { return AddExpr<Derived,ValueExpr<internal::FixedInt<-N> > >(derived(), ValueExpr<internal::FixedInt<-N> >()); }
-  template<int N>
-  ProductExpr<Derived,ValueExpr<internal::FixedInt<N> > > operator*(internal::FixedInt<N> (*)()) const
-  { return ProductExpr<Derived,ValueExpr<internal::FixedInt<N> > >(derived(),ValueExpr<internal::FixedInt<N> >()); }
-  template<int N>
-  QuotientExpr<Derived,ValueExpr<internal::FixedInt<N> > > operator/(internal::FixedInt<N> (*)()) const
-  { return QuotientExpr<Derived,ValueExpr<internal::FixedInt<N> > >(derived(),ValueExpr<internal::FixedInt<N> >()); }
-
-  template<int N>
-  friend AddExpr<Derived,ValueExpr<internal::FixedInt<N> > > operator+(internal::FixedInt<N> (*)(), const BaseExpr& b)
-  { return AddExpr<Derived,ValueExpr<internal::FixedInt<N> > >(b.derived(), ValueExpr<internal::FixedInt<N> >()); }
-  template<int N>
-  friend AddExpr<NegateExpr<Derived>,ValueExpr<internal::FixedInt<N> > > operator-(internal::FixedInt<N> (*)(), const BaseExpr& b)
-  { return AddExpr<NegateExpr<Derived>,ValueExpr<internal::FixedInt<N> > >(-b.derived(), ValueExpr<internal::FixedInt<N> >()); }
-  template<int N>
-  friend ProductExpr<ValueExpr<internal::FixedInt<N> >,Derived> operator*(internal::FixedInt<N> (*)(), const BaseExpr& b)
-  { return ProductExpr<ValueExpr<internal::FixedInt<N> >,Derived>(ValueExpr<internal::FixedInt<N> >(),b.derived()); }
-  template<int N>
-  friend QuotientExpr<ValueExpr<internal::FixedInt<N> >,Derived> operator/(internal::FixedInt<N> (*)(), const BaseExpr& b)
-  { return QuotientExpr<ValueExpr<internal::FixedInt<N> > ,Derived>(ValueExpr<internal::FixedInt<N> >(),b.derived()); }
-#endif
-
-
-  template<typename OtherDerived>
-  AddExpr<Derived,OtherDerived> operator+(const BaseExpr<OtherDerived> &b) const
-  { return AddExpr<Derived,OtherDerived>(derived(),  b.derived()); }
-
-  template<typename OtherDerived>
-  AddExpr<Derived,NegateExpr<OtherDerived> > operator-(const BaseExpr<OtherDerived> &b) const
-  { return AddExpr<Derived,NegateExpr<OtherDerived> >(derived(), -b.derived()); }
-
-  template<typename OtherDerived>
-  ProductExpr<Derived,OtherDerived> operator*(const BaseExpr<OtherDerived> &b) const
-  { return ProductExpr<Derived,OtherDerived>(derived(), b.derived()); }
-
-  template<typename OtherDerived>
-  QuotientExpr<Derived,OtherDerived> operator/(const BaseExpr<OtherDerived> &b) const
-  { return QuotientExpr<Derived,OtherDerived>(derived(), b.derived()); }
-};
-
-template<typename T>
-struct is_symbolic {
-  // BaseExpr has no conversion ctor, so we only have to check whether T can be staticaly cast to its base class BaseExpr<T>.
-  enum { value = internal::is_convertible<T,BaseExpr<T> >::value };
-};
-
-// Specialization for functions, because is_convertible fails in this case.
-// Useful in c++98/11 mode when testing is_symbolic<decltype(fix<N>)>
-template<typename T>
-struct is_symbolic<T (*)()> {
-  enum { value = false };
-};
-
-/** Represents the actual value of a symbol identified by its tag
-  *
-  * It is the return type of SymbolValue::operator=, and most of the time this is only way it is used.
-  */
-template<typename Tag>
-class SymbolValue
-{
-public:
-  /** Default constructor from the value \a val */
-  SymbolValue(Index val) : m_value(val) {}
-
-  /** \returns the stored value of the symbol */
-  Index value() const { return m_value; }
-protected:
-  Index m_value;
-};
-
-/** Expression of a symbol uniquely identified by the template parameter type \c tag */
-template<typename tag>
-class SymbolExpr : public BaseExpr<SymbolExpr<tag> >
-{
-public:
-  /** Alias to the template parameter \c tag */
-  typedef tag Tag;
-
-  SymbolExpr() {}
-
-  /** Associate the value \a val to the given symbol \c *this, uniquely identified by its \c Tag.
-    *
-    * The returned object should be passed to ExprBase::eval() to evaluate a given expression with this specified runtime-time value.
-    */
-  SymbolValue<Tag> operator=(Index val) const {
-    return SymbolValue<Tag>(val);
-  }
-
-  Index eval_impl(const SymbolValue<Tag> &values) const { return values.value(); }
-
-#if EIGEN_HAS_CXX14
-  // C++14 versions suitable for multiple symbols
-  template<typename... Types>
-  Index eval_impl(const std::tuple<Types...>& values) const { return std::get<SymbolValue<Tag> >(values).value(); }
-#endif
-};
-
-template<typename Arg0>
-class NegateExpr : public BaseExpr<NegateExpr<Arg0> >
-{
-public:
-  NegateExpr(const Arg0& arg0) : m_arg0(arg0) {}
-
-  template<typename T>
-  Index eval_impl(const T& values) const { return -m_arg0.eval_impl(values); }
-protected:
-  Arg0 m_arg0;
-};
-
-template<typename Arg0, typename Arg1>
-class AddExpr : public BaseExpr<AddExpr<Arg0,Arg1> >
-{
-public:
-  AddExpr(const Arg0& arg0, const Arg1& arg1) : m_arg0(arg0), m_arg1(arg1) {}
-
-  template<typename T>
-  Index eval_impl(const T& values) const { return m_arg0.eval_impl(values) + m_arg1.eval_impl(values); }
-protected:
-  Arg0 m_arg0;
-  Arg1 m_arg1;
-};
-
-template<typename Arg0, typename Arg1>
-class ProductExpr : public BaseExpr<ProductExpr<Arg0,Arg1> >
-{
-public:
-  ProductExpr(const Arg0& arg0, const Arg1& arg1) : m_arg0(arg0), m_arg1(arg1) {}
-
-  template<typename T>
-  Index eval_impl(const T& values) const { return m_arg0.eval_impl(values) * m_arg1.eval_impl(values); }
-protected:
-  Arg0 m_arg0;
-  Arg1 m_arg1;
-};
-
-template<typename Arg0, typename Arg1>
-class QuotientExpr : public BaseExpr<QuotientExpr<Arg0,Arg1> >
-{
-public:
-  QuotientExpr(const Arg0& arg0, const Arg1& arg1) : m_arg0(arg0), m_arg1(arg1) {}
-
-  template<typename T>
-  Index eval_impl(const T& values) const { return m_arg0.eval_impl(values) / m_arg1.eval_impl(values); }
-protected:
-  Arg0 m_arg0;
-  Arg1 m_arg1;
-};
-
-} // end namespace Symbolic
-
-} // end namespace Eigen
-
-#endif // EIGEN_SYMBOLIC_INDEX_H
diff --git a/eigen/Eigen/src/Core/util/XprHelper.h b/eigen/Eigen/src/Core/util/XprHelper.h
index 4b337f2..ba5bd18 100644
--- a/eigen/Eigen/src/Core/util/XprHelper.h
+++ b/eigen/Eigen/src/Core/util/XprHelper.h
@@ -109,7 +109,6 @@ template<typename T, int Value> class variable_if_dynamic
     EIGEN_EMPTY_STRUCT_CTOR(variable_if_dynamic)
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit variable_if_dynamic(T v) { EIGEN_ONLY_USED_FOR_DEBUG(v); eigen_assert(v == T(Value)); }
     EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE T value() { return T(Value); }
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE operator T() const { return T(Value); }
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void setValue(T) {}
 };
 
@@ -120,7 +119,6 @@ template<typename T> class variable_if_dynamic<T, Dynamic>
   public:
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit variable_if_dynamic(T value) : m_value(value) {}
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T value() const { return m_value; }
-    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE operator T() const { return m_value; }
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void setValue(T value) { m_value = value; }
 };
 
@@ -673,7 +671,7 @@ bool is_same_dense(const T1 &, const T2 &, typename enable_if<!(has_direct_acces
 
 // Internal helper defining the cost of a scalar division for the type T.
 // The default heuristic can be specialized for each scalar type and architecture.
-template<typename T,bool Vectorized=false,typename EnableIf = void>
+template<typename T,bool Vectorized=false,typename EnaleIf = void>
 struct scalar_div_cost {
   enum { value = 8*NumTraits<T>::MulCost };
 };
diff --git a/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h b/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h
index dbbd480..4fec8af 100644
--- a/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h
+++ b/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h
@@ -85,7 +85,7 @@ MatrixBase<Derived>::eigenvalues() const
   * \sa SelfAdjointEigenSolver::eigenvalues(), MatrixBase::eigenvalues()
   */
 template<typename MatrixType, unsigned int UpLo> 
-EIGEN_DEVICE_FUNC inline typename SelfAdjointView<MatrixType, UpLo>::EigenvaluesReturnType
+inline typename SelfAdjointView<MatrixType, UpLo>::EigenvaluesReturnType
 SelfAdjointView<MatrixType, UpLo>::eigenvalues() const
 {
   typedef typename SelfAdjointView<MatrixType, UpLo>::PlainObject PlainObject;
@@ -149,7 +149,7 @@ MatrixBase<Derived>::operatorNorm() const
   * \sa eigenvalues(), MatrixBase::operatorNorm()
   */
 template<typename MatrixType, unsigned int UpLo>
-EIGEN_DEVICE_FUNC inline typename SelfAdjointView<MatrixType, UpLo>::RealScalar
+inline typename SelfAdjointView<MatrixType, UpLo>::RealScalar
 SelfAdjointView<MatrixType, UpLo>::operatorNorm() const
 {
   return eigenvalues().cwiseAbs().maxCoeff();
diff --git a/eigen/Eigen/src/Geometry/AlignedBox.h b/eigen/Eigen/src/Geometry/AlignedBox.h
index c902d8f..066eae4 100644
--- a/eigen/Eigen/src/Geometry/AlignedBox.h
+++ b/eigen/Eigen/src/Geometry/AlignedBox.h
@@ -63,7 +63,7 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
 
   /** Default constructor initializing a null box. */
   EIGEN_DEVICE_FUNC inline AlignedBox()
-  { if (EIGEN_CONST_CONDITIONAL(AmbientDimAtCompileTime!=Dynamic)) setEmpty(); }
+  { if (AmbientDimAtCompileTime!=Dynamic) setEmpty(); }
 
   /** Constructs a null box with \a _dim the dimension of the ambient space. */
   EIGEN_DEVICE_FUNC inline explicit AlignedBox(Index _dim) : m_min(_dim), m_max(_dim)
diff --git a/eigen/Eigen/src/Geometry/ParametrizedLine.h b/eigen/Eigen/src/Geometry/ParametrizedLine.h
index 3929ca8..1e985d8 100644
--- a/eigen/Eigen/src/Geometry/ParametrizedLine.h
+++ b/eigen/Eigen/src/Geometry/ParametrizedLine.h
@@ -104,44 +104,7 @@ public:
   template <int OtherOptions>
   EIGEN_DEVICE_FUNC VectorType intersectionPoint(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const;
 
-  /** Applies the transformation matrix \a mat to \c *this and returns a reference to \c *this.
-    *
-    * \param mat the Dim x Dim transformation matrix
-    * \param traits specifies whether the matrix \a mat represents an #Isometry
-    *               or a more generic #Affine transformation. The default is #Affine.
-    */
-  template<typename XprType>
-  EIGEN_DEVICE_FUNC inline ParametrizedLine& transform(const MatrixBase<XprType>& mat, TransformTraits traits = Affine)
-  {
-    if (traits==Affine)
-      direction() = (mat * direction()).normalized();
-    else if (traits==Isometry)
-      direction() = mat * direction();
-    else
-    {
-      eigen_assert(0 && "invalid traits value in ParametrizedLine::transform()");
-    }
-    origin() = mat * origin();
-    return *this;
-  }
-
-  /** Applies the transformation \a t to \c *this and returns a reference to \c *this.
-    *
-    * \param t the transformation of dimension Dim
-    * \param traits specifies whether the transformation \a t represents an #Isometry
-    *               or a more generic #Affine transformation. The default is #Affine.
-    *               Other kind of transformations are not supported.
-    */
-  template<int TrOptions>
-  EIGEN_DEVICE_FUNC inline ParametrizedLine& transform(const Transform<Scalar,AmbientDimAtCompileTime,Affine,TrOptions>& t,
-                                                       TransformTraits traits = Affine)
-  {
-    transform(t.linear(), traits);
-    origin() += t.translation();
-    return *this;
-  }
-
-/** \returns \c *this with scalar type casted to \a NewScalarType
+  /** \returns \c *this with scalar type casted to \a NewScalarType
     *
     * Note that if \a NewScalarType is equal to the current scalar type of \c *this
     * then this function smartly returns a const reference to \c *this.
diff --git a/eigen/Eigen/src/Geometry/Quaternion.h b/eigen/Eigen/src/Geometry/Quaternion.h
index f6ef1bc..3e5a9ba 100644
--- a/eigen/Eigen/src/Geometry/Quaternion.h
+++ b/eigen/Eigen/src/Geometry/Quaternion.h
@@ -423,7 +423,7 @@ typedef Map<Quaternion<double>, Aligned>  QuaternionMapAlignedd;
 // Generic Quaternion * Quaternion product
 // This product can be specialized for a given architecture via the Arch template argument.
 namespace internal {
-template<int Arch, class Derived1, class Derived2, typename Scalar, int _Options> struct quat_product
+template<int Arch, class Derived1, class Derived2, typename Scalar> struct quat_product
 {
   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Quaternion<Scalar> run(const QuaternionBase<Derived1>& a, const QuaternionBase<Derived2>& b){
     return Quaternion<Scalar>
@@ -446,8 +446,7 @@ QuaternionBase<Derived>::operator* (const QuaternionBase<OtherDerived>& other) c
   EIGEN_STATIC_ASSERT((internal::is_same<typename Derived::Scalar, typename OtherDerived::Scalar>::value),
    YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
   return internal::quat_product<Architecture::Target, Derived, OtherDerived,
-                         typename internal::traits<Derived>::Scalar,
-                         EIGEN_PLAIN_ENUM_MIN(internal::traits<Derived>::Alignment, internal::traits<OtherDerived>::Alignment)>::run(*this, other);
+                         typename internal::traits<Derived>::Scalar>::run(*this, other);
 }
 
 /** \sa operator*(Quaternion) */
@@ -672,7 +671,7 @@ EIGEN_DEVICE_FUNC inline Quaternion<typename internal::traits<Derived>::Scalar>
 
 // Generic conjugate of a Quaternion
 namespace internal {
-template<int Arch, class Derived, typename Scalar, int _Options> struct quat_conj
+template<int Arch, class Derived, typename Scalar> struct quat_conj
 {
   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Quaternion<Scalar> run(const QuaternionBase<Derived>& q){
     return Quaternion<Scalar>(q.w(),-q.x(),-q.y(),-q.z());
@@ -691,8 +690,7 @@ EIGEN_DEVICE_FUNC inline Quaternion<typename internal::traits<Derived>::Scalar>
 QuaternionBase<Derived>::conjugate() const
 {
   return internal::quat_conj<Architecture::Target, Derived,
-                         typename internal::traits<Derived>::Scalar,
-                         internal::traits<Derived>::Alignment>::run(*this);
+                         typename internal::traits<Derived>::Scalar>::run(*this);
                          
 }
 
diff --git a/eigen/Eigen/src/Geometry/Transform.h b/eigen/Eigen/src/Geometry/Transform.h
index 2d36dfa..3f31ee4 100644
--- a/eigen/Eigen/src/Geometry/Transform.h
+++ b/eigen/Eigen/src/Geometry/Transform.h
@@ -335,7 +335,7 @@ public:
            OtherModeIsAffineCompact = OtherMode == int(AffineCompact)
     };
 
-    if(EIGEN_CONST_CONDITIONAL(ModeIsAffineCompact == OtherModeIsAffineCompact))
+    if(ModeIsAffineCompact == OtherModeIsAffineCompact)
     {
       // We need the block expression because the code is compiled for all
       // combinations of transformations and will trigger a compile time error
@@ -343,7 +343,7 @@ public:
       m_matrix.template block<Dim,Dim+1>(0,0) = other.matrix().template block<Dim,Dim+1>(0,0);
       makeAffine();
     }
-    else if(EIGEN_CONST_CONDITIONAL(OtherModeIsAffineCompact))
+    else if(OtherModeIsAffineCompact)
     {
       typedef typename Transform<Scalar,Dim,OtherMode,OtherOptions>::MatrixType OtherMatrixType;
       internal::transform_construct_from_matrix<OtherMatrixType,Mode,Options,Dim,HDim>::run(this, other.matrix());
@@ -481,7 +481,7 @@ public:
     TransformTimeDiagonalReturnType res;
     res.linear().noalias() = a*b.linear();
     res.translation().noalias() = a*b.translation();
-    if (EIGEN_CONST_CONDITIONAL(Mode!=int(AffineCompact)))
+    if (Mode!=int(AffineCompact))
       res.matrix().row(Dim) = b.matrix().row(Dim);
     return res;
   }
@@ -755,7 +755,7 @@ template<typename Scalar, int Dim, int Mode,int Options>
 Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::operator=(const QMatrix& other)
 {
   EIGEN_STATIC_ASSERT(Dim==2, YOU_MADE_A_PROGRAMMING_MISTAKE)
-  if (EIGEN_CONST_CONDITIONAL(Mode == int(AffineCompact)))
+  if (Mode == int(AffineCompact))
     m_matrix << other.m11(), other.m21(), other.dx(),
                 other.m12(), other.m22(), other.dy();
   else
@@ -801,7 +801,7 @@ Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::operator
 {
   check_template_params();
   EIGEN_STATIC_ASSERT(Dim==2, YOU_MADE_A_PROGRAMMING_MISTAKE)
-  if (EIGEN_CONST_CONDITIONAL(Mode == int(AffineCompact)))
+  if (Mode == int(AffineCompact))
     m_matrix << other.m11(), other.m21(), other.dx(),
                 other.m12(), other.m22(), other.dy();
   else
@@ -819,7 +819,7 @@ template<typename Scalar, int Dim, int Mode, int Options>
 QTransform Transform<Scalar,Dim,Mode,Options>::toQTransform(void) const
 {
   EIGEN_STATIC_ASSERT(Dim==2, YOU_MADE_A_PROGRAMMING_MISTAKE)
-  if (EIGEN_CONST_CONDITIONAL(Mode == int(AffineCompact)))
+  if (Mode == int(AffineCompact))
     return QTransform(m_matrix.coeff(0,0), m_matrix.coeff(1,0),
                       m_matrix.coeff(0,1), m_matrix.coeff(1,1),
                       m_matrix.coeff(0,2), m_matrix.coeff(1,2));
@@ -912,7 +912,7 @@ EIGEN_DEVICE_FUNC Transform<Scalar,Dim,Mode,Options>&
 Transform<Scalar,Dim,Mode,Options>::pretranslate(const MatrixBase<OtherDerived> &other)
 {
   EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,int(Dim))
-  if(EIGEN_CONST_CONDITIONAL(int(Mode)==int(Projective)))
+  if(int(Mode)==int(Projective))
     affine() += other * m_matrix.row(Dim);
   else
     translation() += other;
diff --git a/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h b/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h
index 1a86ff8..f68cab5 100644
--- a/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h
+++ b/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h
@@ -16,17 +16,23 @@ namespace Eigen {
 namespace internal {
 
 template<class Derived, class OtherDerived>
-struct quat_product<Architecture::SSE, Derived, OtherDerived, float, Aligned16>
+struct quat_product<Architecture::SSE, Derived, OtherDerived, float>
 {
+  enum {
+    AAlignment = traits<Derived>::Alignment,
+    BAlignment = traits<OtherDerived>::Alignment,
+    ResAlignment = traits<Quaternion<float> >::Alignment
+  };
   static inline Quaternion<float> run(const QuaternionBase<Derived>& _a, const QuaternionBase<OtherDerived>& _b)
   {
     Quaternion<float> res;
     const __m128 mask = _mm_setr_ps(0.f,0.f,0.f,-0.f);
-    __m128 a = _a.coeffs().template packet<Aligned16>(0);
-    __m128 b = _b.coeffs().template packet<Aligned16>(0);
+    __m128 a = _a.coeffs().template packet<AAlignment>(0);
+    __m128 b = _b.coeffs().template packet<BAlignment>(0);
     __m128 s1 = _mm_mul_ps(vec4f_swizzle1(a,1,2,0,2),vec4f_swizzle1(b,2,0,1,2));
     __m128 s2 = _mm_mul_ps(vec4f_swizzle1(a,3,3,3,1),vec4f_swizzle1(b,0,1,2,1));
-    pstore(&res.x(),
+    pstoret<float,Packet4f,ResAlignment>(
+              &res.x(),
               _mm_add_ps(_mm_sub_ps(_mm_mul_ps(a,vec4f_swizzle1(b,3,3,3,3)),
                                     _mm_mul_ps(vec4f_swizzle1(a,2,0,1,0),
                                                vec4f_swizzle1(b,1,2,0,0))),
@@ -36,14 +42,17 @@ struct quat_product<Architecture::SSE, Derived, OtherDerived, float, Aligned16>
   }
 };
 
-template<class Derived, int Alignment>
-struct quat_conj<Architecture::SSE, Derived, float, Alignment>
+template<class Derived>
+struct quat_conj<Architecture::SSE, Derived, float>
 {
+  enum {
+    ResAlignment = traits<Quaternion<float> >::Alignment
+  };
   static inline Quaternion<float> run(const QuaternionBase<Derived>& q)
   {
     Quaternion<float> res;
     const __m128 mask = _mm_setr_ps(-0.f,-0.f,-0.f,0.f);
-    pstore(&res.x(), _mm_xor_ps(mask, q.coeffs().template packet<Alignment>(0)));
+    pstoret<float,Packet4f,ResAlignment>(&res.x(), _mm_xor_ps(mask, q.coeffs().template packet<traits<Derived>::Alignment>(0)));
     return res;
   }
 };
@@ -52,6 +61,9 @@ struct quat_conj<Architecture::SSE, Derived, float, Alignment>
 template<typename VectorLhs,typename VectorRhs>
 struct cross3_impl<Architecture::SSE,VectorLhs,VectorRhs,float,true>
 {
+  enum {
+    ResAlignment = traits<typename plain_matrix_type<VectorLhs>::type>::Alignment
+  };
   static inline typename plain_matrix_type<VectorLhs>::type
   run(const VectorLhs& lhs, const VectorRhs& rhs)
   {
@@ -60,7 +72,7 @@ struct cross3_impl<Architecture::SSE,VectorLhs,VectorRhs,float,true>
     __m128 mul1=_mm_mul_ps(vec4f_swizzle1(a,1,2,0,3),vec4f_swizzle1(b,2,0,1,3));
     __m128 mul2=_mm_mul_ps(vec4f_swizzle1(a,2,0,1,3),vec4f_swizzle1(b,1,2,0,3));
     typename plain_matrix_type<VectorLhs>::type res;
-    pstore(&res.x(),_mm_sub_ps(mul1,mul2));
+    pstoret<float,Packet4f,ResAlignment>(&res.x(),_mm_sub_ps(mul1,mul2));
     return res;
   }
 };
@@ -68,9 +80,14 @@ struct cross3_impl<Architecture::SSE,VectorLhs,VectorRhs,float,true>
 
 
 
-template<class Derived, class OtherDerived, int Alignment>
-struct quat_product<Architecture::SSE, Derived, OtherDerived, double, Alignment>
+template<class Derived, class OtherDerived>
+struct quat_product<Architecture::SSE, Derived, OtherDerived, double>
 {
+  enum {
+    BAlignment = traits<OtherDerived>::Alignment,
+    ResAlignment = traits<Quaternion<double> >::Alignment
+  };
+
   static inline Quaternion<double> run(const QuaternionBase<Derived>& _a, const QuaternionBase<OtherDerived>& _b)
   {
   const Packet2d mask = _mm_castsi128_pd(_mm_set_epi32(0x0,0x0,0x80000000,0x0));
@@ -78,8 +95,8 @@ struct quat_product<Architecture::SSE, Derived, OtherDerived, double, Alignment>
   Quaternion<double> res;
 
   const double* a = _a.coeffs().data();
-  Packet2d b_xy = _b.coeffs().template packet<Alignment>(0);
-  Packet2d b_zw = _b.coeffs().template packet<Alignment>(2);
+  Packet2d b_xy = _b.coeffs().template packet<BAlignment>(0);
+  Packet2d b_zw = _b.coeffs().template packet<BAlignment>(2);
   Packet2d a_xx = pset1<Packet2d>(a[0]);
   Packet2d a_yy = pset1<Packet2d>(a[1]);
   Packet2d a_zz = pset1<Packet2d>(a[2]);
@@ -97,9 +114,9 @@ struct quat_product<Architecture::SSE, Derived, OtherDerived, double, Alignment>
   t2 = psub(pmul(a_zz, b_xy), pmul(a_xx, b_zw));
 #ifdef EIGEN_VECTORIZE_SSE3
   EIGEN_UNUSED_VARIABLE(mask)
-  pstore(&res.x(), _mm_addsub_pd(t1, preverse(t2)));
+  pstoret<double,Packet2d,ResAlignment>(&res.x(), _mm_addsub_pd(t1, preverse(t2)));
 #else
-  pstore(&res.x(), padd(t1, pxor(mask,preverse(t2))));
+  pstoret<double,Packet2d,ResAlignment>(&res.x(), padd(t1, pxor(mask,preverse(t2))));
 #endif
   
   /*
@@ -111,25 +128,28 @@ struct quat_product<Architecture::SSE, Derived, OtherDerived, double, Alignment>
   t2 = padd(pmul(a_zz, b_zw), pmul(a_xx, b_xy));
 #ifdef EIGEN_VECTORIZE_SSE3
   EIGEN_UNUSED_VARIABLE(mask)
-  pstore(&res.z(), preverse(_mm_addsub_pd(preverse(t1), t2)));
+  pstoret<double,Packet2d,ResAlignment>(&res.z(), preverse(_mm_addsub_pd(preverse(t1), t2)));
 #else
-  pstore(&res.z(), psub(t1, pxor(mask,preverse(t2))));
+  pstoret<double,Packet2d,ResAlignment>(&res.z(), psub(t1, pxor(mask,preverse(t2))));
 #endif
 
   return res;
 }
 };
 
-template<class Derived, int Alignment>
-struct quat_conj<Architecture::SSE, Derived, double, Alignment>
+template<class Derived>
+struct quat_conj<Architecture::SSE, Derived, double>
 {
+  enum {
+    ResAlignment = traits<Quaternion<double> >::Alignment
+  };
   static inline Quaternion<double> run(const QuaternionBase<Derived>& q)
   {
     Quaternion<double> res;
     const __m128d mask0 = _mm_setr_pd(-0.,-0.);
     const __m128d mask2 = _mm_setr_pd(-0.,0.);
-    pstore(&res.x(), _mm_xor_pd(mask0, q.coeffs().template packet<Alignment>(0)));
-    pstore(&res.z(), _mm_xor_pd(mask2, q.coeffs().template packet<Alignment>(2)));
+    pstoret<double,Packet2d,ResAlignment>(&res.x(), _mm_xor_pd(mask0, q.coeffs().template packet<traits<Derived>::Alignment>(0)));
+    pstoret<double,Packet2d,ResAlignment>(&res.z(), _mm_xor_pd(mask2, q.coeffs().template packet<traits<Derived>::Alignment>(2)));
     return res;
   }
 };
diff --git a/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h b/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h
index 358444a..facdaf8 100644
--- a/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h
+++ b/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h
@@ -152,13 +152,28 @@ class LeastSquareDiagonalPreconditioner : public DiagonalPreconditioner<_Scalar>
     {
       // Compute the inverse squared-norm of each column of mat
       m_invdiag.resize(mat.cols());
-      for(Index j=0; j<mat.outerSize(); ++j)
+      if(MatType::IsRowMajor)
       {
-        RealScalar sum = mat.innerVector(j).squaredNorm();
-        if(sum>0)
-          m_invdiag(j) = RealScalar(1)/sum;
-        else
-          m_invdiag(j) = RealScalar(1);
+        m_invdiag.setZero();
+        for(Index j=0; j<mat.outerSize(); ++j)
+        {
+          for(typename MatType::InnerIterator it(mat,j); it; ++it)
+            m_invdiag(it.index()) += numext::abs2(it.value());
+        }
+        for(Index j=0; j<mat.cols(); ++j)
+          if(numext::real(m_invdiag(j))>RealScalar(0))
+            m_invdiag(j) = RealScalar(1)/numext::real(m_invdiag(j));
+      }
+      else
+      {
+        for(Index j=0; j<mat.outerSize(); ++j)
+        {
+          RealScalar sum = mat.innerVector(j).squaredNorm();
+          if(sum>RealScalar(0))
+            m_invdiag(j) = RealScalar(1)/sum;
+          else
+            m_invdiag(j) = RealScalar(1);
+        }
       }
       Base::m_isInitialized = true;
       return *this;
diff --git a/eigen/Eigen/src/Jacobi/Jacobi.h b/eigen/Eigen/src/Jacobi/Jacobi.h
index d25af8e..c30326e 100644
--- a/eigen/Eigen/src/Jacobi/Jacobi.h
+++ b/eigen/Eigen/src/Jacobi/Jacobi.h
@@ -302,8 +302,12 @@ template<typename VectorX, typename VectorY, typename OtherScalar>
 void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x, DenseBase<VectorY>& xpr_y, const JacobiRotation<OtherScalar>& j)
 {
   typedef typename VectorX::Scalar Scalar;
-  enum { PacketSize = packet_traits<Scalar>::size };
+  enum {
+    PacketSize = packet_traits<Scalar>::size,
+    OtherPacketSize = packet_traits<OtherScalar>::size
+  };
   typedef typename packet_traits<Scalar>::type Packet;
+  typedef typename packet_traits<OtherScalar>::type OtherPacket;
   eigen_assert(xpr_x.size() == xpr_y.size());
   Index size = xpr_x.size();
   Index incrx = xpr_x.derived().innerStride();
@@ -321,6 +325,7 @@ void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x
 
   if(VectorX::SizeAtCompileTime == Dynamic &&
     (VectorX::Flags & VectorY::Flags & PacketAccessBit) &&
+    (PacketSize == OtherPacketSize) &&
     ((incrx==1 && incry==1) || PacketSize == 1))
   {
     // both vectors are sequentially stored in memory => vectorization
@@ -329,9 +334,10 @@ void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x
     Index alignedStart = internal::first_default_aligned(y, size);
     Index alignedEnd = alignedStart + ((size-alignedStart)/PacketSize)*PacketSize;
 
-    const Packet pc = pset1<Packet>(c);
-    const Packet ps = pset1<Packet>(s);
-    conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex,false> pcj;
+    const OtherPacket pc = pset1<OtherPacket>(c);
+    const OtherPacket ps = pset1<OtherPacket>(s);
+    conj_helper<OtherPacket,Packet,NumTraits<OtherScalar>::IsComplex,false> pcj;
+    conj_helper<OtherPacket,Packet,false,false> pm;
 
     for(Index i=0; i<alignedStart; ++i)
     {
@@ -350,8 +356,8 @@ void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x
       {
         Packet xi = pload<Packet>(px);
         Packet yi = pload<Packet>(py);
-        pstore(px, padd(pmul(pc,xi),pcj.pmul(ps,yi)));
-        pstore(py, psub(pcj.pmul(pc,yi),pmul(ps,xi)));
+        pstore(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi)));
+        pstore(py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi)));
         px += PacketSize;
         py += PacketSize;
       }
@@ -365,10 +371,10 @@ void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x
         Packet xi1  = ploadu<Packet>(px+PacketSize);
         Packet yi   = pload <Packet>(py);
         Packet yi1  = pload <Packet>(py+PacketSize);
-        pstoreu(px, padd(pmul(pc,xi),pcj.pmul(ps,yi)));
-        pstoreu(px+PacketSize, padd(pmul(pc,xi1),pcj.pmul(ps,yi1)));
-        pstore (py, psub(pcj.pmul(pc,yi),pmul(ps,xi)));
-        pstore (py+PacketSize, psub(pcj.pmul(pc,yi1),pmul(ps,xi1)));
+        pstoreu(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi)));
+        pstoreu(px+PacketSize, padd(pm.pmul(pc,xi1),pcj.pmul(ps,yi1)));
+        pstore (py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi)));
+        pstore (py+PacketSize, psub(pcj.pmul(pc,yi1),pm.pmul(ps,xi1)));
         px += Peeling*PacketSize;
         py += Peeling*PacketSize;
       }
@@ -376,8 +382,8 @@ void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x
       {
         Packet xi = ploadu<Packet>(x+peelingEnd);
         Packet yi = pload <Packet>(y+peelingEnd);
-        pstoreu(x+peelingEnd, padd(pmul(pc,xi),pcj.pmul(ps,yi)));
-        pstore (y+peelingEnd, psub(pcj.pmul(pc,yi),pmul(ps,xi)));
+        pstoreu(x+peelingEnd, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi)));
+        pstore (y+peelingEnd, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi)));
       }
     }
 
@@ -393,19 +399,21 @@ void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x
   /*** fixed-size vectorized path ***/
   else if(VectorX::SizeAtCompileTime != Dynamic &&
           (VectorX::Flags & VectorY::Flags & PacketAccessBit) &&
+          (PacketSize == OtherPacketSize) &&
           (EIGEN_PLAIN_ENUM_MIN(evaluator<VectorX>::Alignment, evaluator<VectorY>::Alignment)>0)) // FIXME should be compared to the required alignment
   {
-    const Packet pc = pset1<Packet>(c);
-    const Packet ps = pset1<Packet>(s);
-    conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex,false> pcj;
+    const OtherPacket pc = pset1<OtherPacket>(c);
+    const OtherPacket ps = pset1<OtherPacket>(s);
+    conj_helper<OtherPacket,Packet,NumTraits<OtherPacket>::IsComplex,false> pcj;
+    conj_helper<OtherPacket,Packet,false,false> pm;
     Scalar* EIGEN_RESTRICT px = x;
     Scalar* EIGEN_RESTRICT py = y;
     for(Index i=0; i<size; i+=PacketSize)
     {
       Packet xi = pload<Packet>(px);
       Packet yi = pload<Packet>(py);
-      pstore(px, padd(pmul(pc,xi),pcj.pmul(ps,yi)));
-      pstore(py, psub(pcj.pmul(pc,yi),pmul(ps,xi)));
+      pstore(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi)));
+      pstore(py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi)));
       px += PacketSize;
       py += PacketSize;
     }
diff --git a/eigen/Eigen/src/LU/FullPivLU.h b/eigen/Eigen/src/LU/FullPivLU.h
index ec61086..03b6af7 100644
--- a/eigen/Eigen/src/LU/FullPivLU.h
+++ b/eigen/Eigen/src/LU/FullPivLU.h
@@ -411,9 +411,11 @@ template<typename _MatrixType> class FullPivLU
 
     #ifndef EIGEN_PARSED_BY_DOXYGEN
     template<typename RhsType, typename DstType>
+    EIGEN_DEVICE_FUNC
     void _solve_impl(const RhsType &rhs, DstType &dst) const;
 
     template<bool Conjugate, typename RhsType, typename DstType>
+    EIGEN_DEVICE_FUNC
     void _solve_impl_transposed(const RhsType &rhs, DstType &dst) const;
     #endif
 
diff --git a/eigen/Eigen/src/QR/ColPivHouseholderQR.h b/eigen/Eigen/src/QR/ColPivHouseholderQR.h
index d35395d..a7b47d5 100644
--- a/eigen/Eigen/src/QR/ColPivHouseholderQR.h
+++ b/eigen/Eigen/src/QR/ColPivHouseholderQR.h
@@ -416,6 +416,7 @@ template<typename _MatrixType> class ColPivHouseholderQR
 
     #ifndef EIGEN_PARSED_BY_DOXYGEN
     template<typename RhsType, typename DstType>
+    EIGEN_DEVICE_FUNC
     void _solve_impl(const RhsType &rhs, DstType &dst) const;
     #endif
 
@@ -505,8 +506,8 @@ void ColPivHouseholderQR<MatrixType>::computeInPlace()
     m_colNormsUpdated.coeffRef(k) = m_colNormsDirect.coeffRef(k);
   }
 
-  RealScalar threshold_helper =  numext::abs2<Scalar>(m_colNormsUpdated.maxCoeff() * NumTraits<Scalar>::epsilon()) / RealScalar(rows);
-  RealScalar norm_downdate_threshold = numext::sqrt(NumTraits<Scalar>::epsilon());
+  RealScalar threshold_helper =  numext::abs2<RealScalar>(m_colNormsUpdated.maxCoeff() * NumTraits<RealScalar>::epsilon()) / RealScalar(rows);
+  RealScalar norm_downdate_threshold = numext::sqrt(NumTraits<RealScalar>::epsilon());
 
   m_nonzero_pivots = size; // the generic case is that in which all pivots are nonzero (invertible case)
   m_maxpivot = RealScalar(0);
@@ -552,12 +553,12 @@ void ColPivHouseholderQR<MatrixType>::computeInPlace()
       // http://www.netlib.org/lapack/lawnspdf/lawn176.pdf
       // and used in LAPACK routines xGEQPF and xGEQP3.
       // See lines 278-297 in http://www.netlib.org/lapack/explore-html/dc/df4/sgeqpf_8f_source.html
-      if (m_colNormsUpdated.coeffRef(j) != 0) {
+      if (m_colNormsUpdated.coeffRef(j) != RealScalar(0)) {
         RealScalar temp = abs(m_qr.coeffRef(k, j)) / m_colNormsUpdated.coeffRef(j);
         temp = (RealScalar(1) + temp) * (RealScalar(1) - temp);
-        temp = temp < 0 ? 0 : temp;
-        RealScalar temp2 = temp * numext::abs2<Scalar>(m_colNormsUpdated.coeffRef(j) /
-                                                       m_colNormsDirect.coeffRef(j));
+        temp = temp <  RealScalar(0) ? RealScalar(0) : temp;
+        RealScalar temp2 = temp * numext::abs2<RealScalar>(m_colNormsUpdated.coeffRef(j) /
+                                                           m_colNormsDirect.coeffRef(j));
         if (temp2 <= norm_downdate_threshold) {
           // The updated norm has become too inaccurate so re-compute the column
           // norm directly.
diff --git a/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h b/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h
index 13b61fc..34c637b 100644
--- a/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h
+++ b/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h
@@ -367,7 +367,7 @@ class CompleteOrthogonalDecomposition {
 
 #ifndef EIGEN_PARSED_BY_DOXYGEN
   template <typename RhsType, typename DstType>
-  void _solve_impl(const RhsType& rhs, DstType& dst) const;
+  EIGEN_DEVICE_FUNC void _solve_impl(const RhsType& rhs, DstType& dst) const;
 #endif
 
  protected:
diff --git a/eigen/Eigen/src/QR/FullPivHouseholderQR.h b/eigen/Eigen/src/QR/FullPivHouseholderQR.h
index c31e47c..e489bdd 100644
--- a/eigen/Eigen/src/QR/FullPivHouseholderQR.h
+++ b/eigen/Eigen/src/QR/FullPivHouseholderQR.h
@@ -392,21 +392,22 @@ template<typename _MatrixType> class FullPivHouseholderQR
       *          diagonal coefficient of U.
       */
     RealScalar maxPivot() const { return m_maxpivot; }
-
+    
     #ifndef EIGEN_PARSED_BY_DOXYGEN
     template<typename RhsType, typename DstType>
+    EIGEN_DEVICE_FUNC
     void _solve_impl(const RhsType &rhs, DstType &dst) const;
     #endif
 
   protected:
-
+    
     static void check_template_parameters()
     {
       EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
     }
-
+    
     void computeInPlace();
-
+    
     MatrixType m_qr;
     HCoeffsType m_hCoeffs;
     IntDiagSizeVectorType m_rows_transpositions;
diff --git a/eigen/Eigen/src/QR/HouseholderQR.h b/eigen/Eigen/src/QR/HouseholderQR.h
index 762b21c..3513d99 100644
--- a/eigen/Eigen/src/QR/HouseholderQR.h
+++ b/eigen/Eigen/src/QR/HouseholderQR.h
@@ -204,27 +204,28 @@ template<typename _MatrixType> class HouseholderQR
 
     inline Index rows() const { return m_qr.rows(); }
     inline Index cols() const { return m_qr.cols(); }
-
+    
     /** \returns a const reference to the vector of Householder coefficients used to represent the factor \c Q.
       * 
       * For advanced uses only.
       */
     const HCoeffsType& hCoeffs() const { return m_hCoeffs; }
-
+    
     #ifndef EIGEN_PARSED_BY_DOXYGEN
     template<typename RhsType, typename DstType>
+    EIGEN_DEVICE_FUNC
     void _solve_impl(const RhsType &rhs, DstType &dst) const;
     #endif
 
   protected:
-
+    
     static void check_template_parameters()
     {
       EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
     }
 
     void computeInPlace();
-
+    
     MatrixType m_qr;
     HCoeffsType m_hCoeffs;
     RowVectorType m_temp;
diff --git a/eigen/Eigen/src/SVD/BDCSVD.h b/eigen/Eigen/src/SVD/BDCSVD.h
index 25fca6f..d7a4271 100644
--- a/eigen/Eigen/src/SVD/BDCSVD.h
+++ b/eigen/Eigen/src/SVD/BDCSVD.h
@@ -77,6 +77,7 @@ public:
   typedef _MatrixType MatrixType;
   typedef typename MatrixType::Scalar Scalar;
   typedef typename NumTraits<typename MatrixType::Scalar>::Real RealScalar;
+  typedef typename NumTraits<RealScalar>::Literal Literal;
   enum {
     RowsAtCompileTime = MatrixType::RowsAtCompileTime, 
     ColsAtCompileTime = MatrixType::ColsAtCompileTime, 
@@ -259,7 +260,7 @@ BDCSVD<MatrixType>& BDCSVD<MatrixType>::compute(const MatrixType& matrix, unsign
   
   //**** step 0 - Copy the input matrix and apply scaling to reduce over/under-flows
   RealScalar scale = matrix.cwiseAbs().maxCoeff();
-  if(scale==RealScalar(0)) scale = RealScalar(1);
+  if(scale==Literal(0)) scale = Literal(1);
   MatrixX copy;
   if (m_isTranspose) copy = matrix.adjoint()/scale;
   else               copy = matrix/scale;
@@ -351,13 +352,13 @@ void BDCSVD<MatrixType>::structured_update(Block<MatrixXr,Dynamic,Dynamic> A, co
     Index k1=0, k2=0;
     for(Index j=0; j<n; ++j)
     {
-      if( (A.col(j).head(n1).array()!=0).any() )
+      if( (A.col(j).head(n1).array()!=Literal(0)).any() )
       {
         A1.col(k1) = A.col(j).head(n1);
         B1.row(k1) = B.row(j);
         ++k1;
       }
-      if( (A.col(j).tail(n2).array()!=0).any() )
+      if( (A.col(j).tail(n2).array()!=Literal(0)).any() )
       {
         A2.col(k2) = A.col(j).tail(n2);
         B2.row(k2) = B.row(j);
@@ -449,11 +450,11 @@ void BDCSVD<MatrixType>::divide (Index firstCol, Index lastCol, Index firstRowW,
     l = m_naiveU.row(1).segment(firstCol, k);
     f = m_naiveU.row(0).segment(firstCol + k + 1, n - k - 1);
   }
-  if (m_compV) m_naiveV(firstRowW+k, firstColW) = 1;
+  if (m_compV) m_naiveV(firstRowW+k, firstColW) = Literal(1);
   if (r0<considerZero)
   {
-    c0 = 1;
-    s0 = 0;
+    c0 = Literal(1);
+    s0 = Literal(0);
   }
   else
   {
@@ -574,7 +575,7 @@ void BDCSVD<MatrixType>::computeSVDofM(Index firstCol, Index n, MatrixXr& U, Vec
   ArrayRef col0 = m_computed.col(firstCol).segment(firstCol, n);
   m_workspace.head(n) =  m_computed.block(firstCol, firstCol, n, n).diagonal();
   ArrayRef diag = m_workspace.head(n);
-  diag(0) = 0;
+  diag(0) = Literal(0);
 
   // Allocate space for singular values and vectors
   singVals.resize(n);
@@ -590,7 +591,7 @@ void BDCSVD<MatrixType>::computeSVDofM(Index firstCol, Index n, MatrixXr& U, Vec
   // but others are interleaved and we must ignore them at this stage.
   // To this end, let's compute a permutation skipping them:
   Index actual_n = n;
-  while(actual_n>1 && diag(actual_n-1)==0) --actual_n;
+  while(actual_n>1 && diag(actual_n-1)==Literal(0)) --actual_n;
   Index m = 0; // size of the deflated problem
   for(Index k=0;k<actual_n;++k)
     if(abs(col0(k))>considerZero)
@@ -691,7 +692,7 @@ template <typename MatrixType>
 typename BDCSVD<MatrixType>::RealScalar BDCSVD<MatrixType>::secularEq(RealScalar mu, const ArrayRef& col0, const ArrayRef& diag, const IndicesRef &perm, const ArrayRef& diagShifted, RealScalar shift)
 {
   Index m = perm.size();
-  RealScalar res = 1;
+  RealScalar res = Literal(1);
   for(Index i=0; i<m; ++i)
   {
     Index j = perm(i);
@@ -710,16 +711,16 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d
 
   Index n = col0.size();
   Index actual_n = n;
-  while(actual_n>1 && col0(actual_n-1)==0) --actual_n;
+  while(actual_n>1 && col0(actual_n-1)==Literal(0)) --actual_n;
 
   for (Index k = 0; k < n; ++k)
   {
-    if (col0(k) == 0 || actual_n==1)
+    if (col0(k) == Literal(0) || actual_n==1)
     {
       // if col0(k) == 0, then entry is deflated, so singular value is on diagonal
       // if actual_n==1, then the deflated problem is already diagonalized
       singVals(k) = k==0 ? col0(0) : diag(k);
-      mus(k) = 0;
+      mus(k) = Literal(0);
       shifts(k) = k==0 ? col0(0) : diag(k);
       continue;
     } 
@@ -733,13 +734,13 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d
     {
       // Skip deflated singular values
       Index l = k+1;
-      while(col0(l)==0) { ++l; eigen_internal_assert(l<actual_n); }
+      while(col0(l)==Literal(0)) { ++l; eigen_internal_assert(l<actual_n); }
       right = diag(l);
     }
 
     // first decide whether it's closer to the left end or the right end
-    RealScalar mid = left + (right-left) / 2;
-    RealScalar fMid = secularEq(mid, col0, diag, perm, diag, 0);
+    RealScalar mid = left + (right-left) / Literal(2);
+    RealScalar fMid = secularEq(mid, col0, diag, perm, diag, Literal(0));
 #ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
     std::cout << right-left << "\n";
     std::cout << "fMid = " << fMid << " " << secularEq(mid-left, col0, diag, perm, diag-left, left) << " " << secularEq(mid-right, col0, diag, perm, diag-right, right)   << "\n";
@@ -755,7 +756,7 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d
               << " "       << secularEq(0.8*(left+right), col0, diag, perm, diag, 0)
               << " "       << secularEq(0.9*(left+right), col0, diag, perm, diag, 0) << "\n";
 #endif
-    RealScalar shift = (k == actual_n-1 || fMid > 0) ? left : right;
+    RealScalar shift = (k == actual_n-1 || fMid > Literal(0)) ? left : right;
     
     // measure everything relative to shift
     Map<ArrayXr> diagShifted(m_workspace.data()+4*n, n);
@@ -785,13 +786,13 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d
 
     // rational interpolation: fit a function of the form a / mu + b through the two previous
     // iterates and use its zero to compute the next iterate
-    bool useBisection = fPrev*fCur>0;
-    while (fCur!=0 && abs(muCur - muPrev) > 8 * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(abs(muCur), abs(muPrev)) && abs(fCur - fPrev)>NumTraits<RealScalar>::epsilon() && !useBisection)
+    bool useBisection = fPrev*fCur>Literal(0);
+    while (fCur!=Literal(0) && abs(muCur - muPrev) > Literal(8) * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(abs(muCur), abs(muPrev)) && abs(fCur - fPrev)>NumTraits<RealScalar>::epsilon() && !useBisection)
     {
       ++m_numIters;
 
       // Find a and b such that the function f(mu) = a / mu + b matches the current and previous samples.
-      RealScalar a = (fCur - fPrev) / (1/muCur - 1/muPrev);
+      RealScalar a = (fCur - fPrev) / (Literal(1)/muCur - Literal(1)/muPrev);
       RealScalar b = fCur - a / muCur;
       // And find mu such that f(mu)==0:
       RealScalar muZero = -a/b;
@@ -803,8 +804,8 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d
       fCur = fZero;
       
       
-      if (shift == left  && (muCur < 0 || muCur > right - left)) useBisection = true;
-      if (shift == right && (muCur < -(right - left) || muCur > 0)) useBisection = true;
+      if (shift == left  && (muCur < Literal(0) || muCur > right - left)) useBisection = true;
+      if (shift == right && (muCur < -(right - left) || muCur > Literal(0))) useBisection = true;
       if (abs(fCur)>abs(fPrev)) useBisection = true;
     }
 
@@ -841,13 +842,13 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d
         std::cout << k << " : " <<  fLeft << " * " << fRight << " == " << fLeft * fRight << "  ;  " << left << " - " << right << " -> " <<  leftShifted << " " << rightShifted << "   shift=" << shift << "\n";
       }
 #endif
-      eigen_internal_assert(fLeft * fRight < 0);
+      eigen_internal_assert(fLeft * fRight < Literal(0));
       
-      while (rightShifted - leftShifted > 2 * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(abs(leftShifted), abs(rightShifted)))
+      while (rightShifted - leftShifted > Literal(2) * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(abs(leftShifted), abs(rightShifted)))
       {
-        RealScalar midShifted = (leftShifted + rightShifted) / 2;
+        RealScalar midShifted = (leftShifted + rightShifted) / Literal(2);
         fMid = secularEq(midShifted, col0, diag, perm, diagShifted, shift);
-        if (fLeft * fMid < 0)
+        if (fLeft * fMid < Literal(0))
         {
           rightShifted = midShifted;
         }
@@ -858,7 +859,7 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d
         }
       }
 
-      muCur = (leftShifted + rightShifted) / 2;
+      muCur = (leftShifted + rightShifted) / Literal(2);
     }
       
     singVals[k] = shift + muCur;
@@ -892,8 +893,8 @@ void BDCSVD<MatrixType>::perturbCol0
   // The offset permits to skip deflated entries while computing zhat
   for (Index k = 0; k < n; ++k)
   {
-    if (col0(k) == 0) // deflated
-      zhat(k) = 0;
+    if (col0(k) == Literal(0)) // deflated
+      zhat(k) = Literal(0);
     else
     {
       // see equation (3.6)
@@ -918,7 +919,7 @@ void BDCSVD<MatrixType>::perturbCol0
       std::cout << "zhat(" << k << ") =  sqrt( " << prod << ")  ;  " << (singVals(last) + dk) << " * " << mus(last) + shifts(last) << " - " << dk << "\n";
 #endif
       RealScalar tmp = sqrt(prod);
-      zhat(k) = col0(k) > 0 ? tmp : -tmp;
+      zhat(k) = col0(k) > Literal(0) ? tmp : -tmp;
     }
   }
 }
@@ -934,7 +935,7 @@ void BDCSVD<MatrixType>::computeSingVecs
   
   for (Index k = 0; k < n; ++k)
   {
-    if (zhat(k) == 0)
+    if (zhat(k) == Literal(0))
     {
       U.col(k) = VectorType::Unit(n+1, k);
       if (m_compV) V.col(k) = VectorType::Unit(n, k);
@@ -947,7 +948,7 @@ void BDCSVD<MatrixType>::computeSingVecs
         Index i = perm(l);
         U(i,k) = zhat(i)/(((diag(i) - shifts(k)) - mus(k)) )/( (diag(i) + singVals[k]));
       }
-      U(n,k) = 0;      
+      U(n,k) = Literal(0);
       U.col(k).normalize();
     
       if (m_compV)
@@ -958,7 +959,7 @@ void BDCSVD<MatrixType>::computeSingVecs
           Index i = perm(l);
           V(i,k) = diag(i) * zhat(i) / (((diag(i) - shifts(k)) - mus(k)) )/( (diag(i) + singVals[k]));
         }
-        V(0,k) = -1;
+        V(0,k) = Literal(-1);
         V.col(k).normalize();
       }
     }
@@ -980,14 +981,14 @@ void BDCSVD<MatrixType>::deflation43(Index firstCol, Index shift, Index i, Index
   RealScalar c = m_computed(start, start);
   RealScalar s = m_computed(start+i, start);
   RealScalar r = sqrt(numext::abs2(c) + numext::abs2(s));
-  if (r == 0)
+  if (r == Literal(0))
   {
-    m_computed(start+i, start+i) = 0;
+    m_computed(start+i, start+i) = Literal(0);
     return;
   }
   m_computed(start,start) = r;  
-  m_computed(start+i, start) = 0;
-  m_computed(start+i, start+i) = 0;
+  m_computed(start+i, start) = Literal(0);
+  m_computed(start+i, start+i) = Literal(0);
   
   JacobiRotation<RealScalar> J(c/r,-s/r);
   if (m_compU)  m_naiveU.middleRows(firstCol, size+1).applyOnTheRight(firstCol, firstCol+i, J);
@@ -1020,7 +1021,7 @@ void BDCSVD<MatrixType>::deflation44(Index firstColu , Index firstColm, Index fi
     << m_computed(firstColm + i+1, firstColm+i+1) << " "
     << m_computed(firstColm + i+2, firstColm+i+2) << "\n";
 #endif
-  if (r==0)
+  if (r==Literal(0))
   {
     m_computed(firstColm + i, firstColm + i) = m_computed(firstColm + j, firstColm + j);
     return;
@@ -1029,7 +1030,7 @@ void BDCSVD<MatrixType>::deflation44(Index firstColu , Index firstColm, Index fi
   s/=r;
   m_computed(firstColm + i, firstColm) = r;  
   m_computed(firstColm + j, firstColm + j) = m_computed(firstColm + i, firstColm + i);
-  m_computed(firstColm + j, firstColm) = 0;
+  m_computed(firstColm + j, firstColm) = Literal(0);
 
   JacobiRotation<RealScalar> J(c,-s);
   if (m_compU)  m_naiveU.middleRows(firstColu, size+1).applyOnTheRight(firstColu + i, firstColu + j, J);
@@ -1053,7 +1054,7 @@ void BDCSVD<MatrixType>::deflation(Index firstCol, Index lastCol, Index k, Index
   const RealScalar considerZero = (std::numeric_limits<RealScalar>::min)();
   RealScalar maxDiag = diag.tail((std::max)(Index(1),length-1)).cwiseAbs().maxCoeff();
   RealScalar epsilon_strict = numext::maxi<RealScalar>(considerZero,NumTraits<RealScalar>::epsilon() * maxDiag);
-  RealScalar epsilon_coarse = 8 * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(col0.cwiseAbs().maxCoeff(), maxDiag);
+  RealScalar epsilon_coarse = Literal(8) * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(col0.cwiseAbs().maxCoeff(), maxDiag);
   
 #ifdef EIGEN_BDCSVD_SANITY_CHECKS
   assert(m_naiveU.allFinite());
@@ -1081,7 +1082,7 @@ void BDCSVD<MatrixType>::deflation(Index firstCol, Index lastCol, Index k, Index
 #ifdef  EIGEN_BDCSVD_DEBUG_VERBOSE
       std::cout << "deflation 4.2, set z(" << i << ") to zero because " << abs(col0(i)) << " < " << epsilon_strict << "  (diag(" << i << ")=" << diag(i) << ")\n";
 #endif
-      col0(i) = 0;
+      col0(i) = Literal(0);
     }
 
   //condition 4.3
diff --git a/eigen/Eigen/src/SVD/SVDBase.h b/eigen/Eigen/src/SVD/SVDBase.h
index 4294147..cc90a3b 100644
--- a/eigen/Eigen/src/SVD/SVDBase.h
+++ b/eigen/Eigen/src/SVD/SVDBase.h
@@ -212,6 +212,7 @@ public:
   
   #ifndef EIGEN_PARSED_BY_DOXYGEN
   template<typename RhsType, typename DstType>
+  EIGEN_DEVICE_FUNC
   void _solve_impl(const RhsType &rhs, DstType &dst) const;
   #endif
 
diff --git a/eigen/Eigen/src/SVD/UpperBidiagonalization.h b/eigen/Eigen/src/SVD/UpperBidiagonalization.h
index 0b14608..11ac847 100644
--- a/eigen/Eigen/src/SVD/UpperBidiagonalization.h
+++ b/eigen/Eigen/src/SVD/UpperBidiagonalization.h
@@ -159,6 +159,8 @@ void upperbidiagonalization_blocked_helper(MatrixType& A,
                                                       traits<MatrixType>::Flags & RowMajorBit> > Y)
 {
   typedef typename MatrixType::Scalar Scalar;
+  typedef typename MatrixType::RealScalar RealScalar;
+  typedef typename NumTraits<RealScalar>::Literal Literal;
   enum { StorageOrder = traits<MatrixType>::Flags & RowMajorBit };
   typedef InnerStride<int(StorageOrder) == int(ColMajor) ? 1 : Dynamic> ColInnerStride;
   typedef InnerStride<int(StorageOrder) == int(ColMajor) ? Dynamic : 1> RowInnerStride;
@@ -263,7 +265,7 @@ void upperbidiagonalization_blocked_helper(MatrixType& A,
     SubMatType A10( A.block(bs,0, brows-bs,bs) );
     SubMatType A01( A.block(0,bs, bs,bcols-bs) );
     Scalar tmp = A01(bs-1,0);
-    A01(bs-1,0) = 1;
+    A01(bs-1,0) = Literal(1);
     A11.noalias() -= A10 * Y.topLeftCorner(bcols,bs).bottomRows(bcols-bs).adjoint();
     A11.noalias() -= X.topLeftCorner(brows,bs).bottomRows(brows-bs) * A01;
     A01(bs-1,0) = tmp;
diff --git a/eigen/Eigen/src/SparseCore/SparseCompressedBase.h b/eigen/Eigen/src/SparseCore/SparseCompressedBase.h
index e0b3c22..5ccb466 100644
--- a/eigen/Eigen/src/SparseCore/SparseCompressedBase.h
+++ b/eigen/Eigen/src/SparseCore/SparseCompressedBase.h
@@ -185,14 +185,6 @@ class SparseCompressedBase<Derived>::InnerIterator
     }
 
     inline InnerIterator& operator++() { m_id++; return *this; }
-    inline InnerIterator& operator+=(Index i) { m_id += i ; return *this; }
-
-    inline InnerIterator operator+(Index i) 
-    { 
-        InnerIterator result = *this;
-        result += i;
-        return result;
-    }
 
     inline const Scalar& value() const { return m_values[m_id]; }
     inline Scalar& valueRef() { return const_cast<Scalar&>(m_values[m_id]); }
@@ -253,14 +245,6 @@ class SparseCompressedBase<Derived>::ReverseInnerIterator
     }
 
     inline ReverseInnerIterator& operator--() { --m_id; return *this; }
-    inline ReverseInnerIterator& operator-=(Index i) { m_id -= i; return *this; }
-
-    inline ReverseInnerIterator operator-(Index i) 
-    {
-        ReverseInnerIterator result = *this;
-        result -= i;
-        return result;
-    }
 
     inline const Scalar& value() const { return m_values[m_id-1]; }
     inline Scalar& valueRef() { return const_cast<Scalar&>(m_values[m_id-1]); }
diff --git a/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h b/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h
index c41c07a..e315e35 100644
--- a/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h
+++ b/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h
@@ -212,7 +212,8 @@ public:
 
   enum {
     CoeffReadCost = evaluator<Lhs>::CoeffReadCost + evaluator<Rhs>::CoeffReadCost + functor_traits<BinaryOp>::Cost,
-    Flags = XprType::Flags
+    // Expose storage order of the sparse expression
+    Flags = (XprType::Flags & ~RowMajorBit) | (int(Rhs::Flags)&RowMajorBit)
   };
 
   explicit binary_evaluator(const XprType& xpr)
@@ -299,7 +300,8 @@ public:
 
   enum {
     CoeffReadCost = evaluator<Lhs>::CoeffReadCost + evaluator<Rhs>::CoeffReadCost + functor_traits<BinaryOp>::Cost,
-    Flags = XprType::Flags
+    // Expose storage order of the sparse expression
+    Flags = (XprType::Flags & ~RowMajorBit) | (int(Lhs::Flags)&RowMajorBit)
   };
 
   explicit binary_evaluator(const XprType& xpr)
@@ -531,7 +533,8 @@ public:
   
   enum {
     CoeffReadCost = evaluator<LhsArg>::CoeffReadCost + evaluator<RhsArg>::CoeffReadCost + functor_traits<BinaryOp>::Cost,
-    Flags = XprType::Flags
+    // Expose storage order of the sparse expression
+    Flags = (XprType::Flags & ~RowMajorBit) | (int(RhsArg::Flags)&RowMajorBit)
   };
   
   explicit sparse_conjunction_evaluator(const XprType& xpr)
@@ -605,7 +608,8 @@ public:
   
   enum {
     CoeffReadCost = evaluator<LhsArg>::CoeffReadCost + evaluator<RhsArg>::CoeffReadCost + functor_traits<BinaryOp>::Cost,
-    Flags = XprType::Flags
+    // Expose storage order of the sparse expression
+    Flags = (XprType::Flags & ~RowMajorBit) | (int(LhsArg::Flags)&RowMajorBit)
   };
   
   explicit sparse_conjunction_evaluator(const XprType& xpr)
diff --git a/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h b/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h
index 9e39be7..5ab64f1 100644
--- a/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h
+++ b/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h
@@ -47,6 +47,7 @@ template<typename MatrixType, unsigned int _Mode> class SparseSelfAdjointView
     
     enum {
       Mode = _Mode,
+      TransposeMode = ((Mode & Upper) ? Lower : 0) | ((Mode & Lower) ? Upper : 0),
       RowsAtCompileTime = internal::traits<SparseSelfAdjointView>::RowsAtCompileTime,
       ColsAtCompileTime = internal::traits<SparseSelfAdjointView>::ColsAtCompileTime
     };
@@ -368,7 +369,7 @@ struct generic_product_impl<Lhs, RhsView, DenseShape, SparseSelfAdjointShape, Pr
     
     // transpose everything
     Transpose<Dest> dstT(dst);
-    internal::sparse_selfadjoint_time_dense_product<RhsView::Mode>(rhsNested.transpose(), lhsNested.transpose(), dstT, alpha);
+    internal::sparse_selfadjoint_time_dense_product<RhsView::TransposeMode>(rhsNested.transpose(), lhsNested.transpose(), dstT, alpha);
   }
 };
 
diff --git a/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h b/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h
index 9568cc1..91c09ab 100644
--- a/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h
+++ b/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h
@@ -320,7 +320,7 @@ class UmfPackLU : public SparseSolverBase<UmfPackLU<_MatrixType> >
       *
       * \sa umfpackControl()
       */
-    void printUmfpackControl()
+    void umfpackReportControl()
     {
       umfpack_report_control(m_control.data(), Scalar());
     }
@@ -329,7 +329,7 @@ class UmfPackLU : public SparseSolverBase<UmfPackLU<_MatrixType> >
       *
       * \sa analyzePattern(), compute()
       */
-    void printUmfpackInfo()
+    void umfpackReportInfo()
     {
       eigen_assert(m_analysisIsOk && "UmfPackLU: you must first call analyzePattern()");
       umfpack_report_info(m_control.data(), m_umfpackInfo.data(), Scalar());
@@ -339,7 +339,7 @@ class UmfPackLU : public SparseSolverBase<UmfPackLU<_MatrixType> >
       *
       * \sa analyzePattern(), compute()
       */
-    void printUmfpackStatus() {
+    void umfpackReportStatus() {
       eigen_assert(m_analysisIsOk && "UmfPackLU: you must first call analyzePattern()");
       umfpack_report_status(m_control.data(), m_fact_errorCode, Scalar());
     }
diff --git a/eigen/Eigen/src/misc/lapacke.h b/eigen/Eigen/src/misc/lapacke.h
index 3d8e24f..8c7e79b 100644
--- a/eigen/Eigen/src/misc/lapacke.h
+++ b/eigen/Eigen/src/misc/lapacke.h
@@ -43,6 +43,10 @@
 #include "lapacke_config.h"
 #endif
 
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
 #include <stdlib.h>
 
 #ifndef lapack_int
@@ -104,11 +108,6 @@ lapack_complex_double lapack_make_complex_double( double re, double im );
 
 #endif
 
-
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
 #ifndef LAPACKE_malloc
 #define LAPACKE_malloc( size ) malloc( size )
 #endif
diff --git a/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h b/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h
index 43615bd..ebaa3f1 100644
--- a/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h
+++ b/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h
@@ -10,7 +10,6 @@ typedef CwiseUnaryOp<internal::scalar_inverse_op<Scalar>, const Derived> Inverse
 typedef CwiseUnaryOp<internal::scalar_boolean_not_op<Scalar>, const Derived> BooleanNotReturnType;
 
 typedef CwiseUnaryOp<internal::scalar_exp_op<Scalar>, const Derived> ExpReturnType;
-typedef CwiseUnaryOp<internal::scalar_expm1_op<Scalar>, const Derived> Expm1ReturnType;
 typedef CwiseUnaryOp<internal::scalar_log_op<Scalar>, const Derived> LogReturnType;
 typedef CwiseUnaryOp<internal::scalar_log1p_op<Scalar>, const Derived> Log1pReturnType;
 typedef CwiseUnaryOp<internal::scalar_log10_op<Scalar>, const Derived> Log10ReturnType;
@@ -91,20 +90,6 @@ exp() const
   return ExpReturnType(derived());
 }
 
-/** \returns an expression of the coefficient-wise exponential of *this minus 1.
-  *
-  * In exact arithmetic, \c x.expm1() is equivalent to \c x.exp() - 1,
-  * however, with finite precision, this function is much more accurate when \c x is close to zero.
-  *
-  * \sa <a href="group__CoeffwiseMathFunctions.html#cwisetable_expm1">Math functions</a>, exp()
-  */
-EIGEN_DEVICE_FUNC
-inline const Expm1ReturnType
-expm1() const
-{
-  return Expm1ReturnType(derived());
-}
-
 /** \returns an expression of the coefficient-wise logarithm of *this.
   *
   * This function computes the coefficient-wise logarithm. The function MatrixBase::log() in the
@@ -113,7 +98,7 @@ expm1() const
   * Example: \include Cwise_log.cpp
   * Output: \verbinclude Cwise_log.out
   *
-  * \sa <a href="group__CoeffwiseMathFunctions.html#cwisetable_log">Math functions</a>, log()
+  * \sa <a href="group__CoeffwiseMathFunctions.html#cwisetable_log">Math functions</a>, exp()
   */
 EIGEN_DEVICE_FUNC
 inline const LogReturnType
diff --git a/eigen/Eigen/src/plugins/BlockMethods.h b/eigen/Eigen/src/plugins/BlockMethods.h
index 5caf144..ac35a00 100644
--- a/eigen/Eigen/src/plugins/BlockMethods.h
+++ b/eigen/Eigen/src/plugins/BlockMethods.h
@@ -42,116 +42,66 @@ template<int Size> struct ConstFixedSegmentReturnType { typedef const VectorBloc
 
 #endif // not EIGEN_PARSED_BY_DOXYGEN
 
-/// \returns an expression of a block in \c *this with either dynamic or fixed sizes.
+/// \returns a dynamic-size expression of a block in *this.
 ///
-/// \param  startRow  the first row in the block
-/// \param  startCol  the first column in the block
-/// \param  blockRows number of rows in the block, specified at either run-time or compile-time
-/// \param  blockCols number of columns in the block, specified at either run-time or compile-time
-/// \tparam NRowsType the type of the value handling the number of rows in the block, typically Index.
-/// \tparam NColsType the type of the value handling the number of columns in the block, typically Index.
+/// \param startRow the first row in the block
+/// \param startCol the first column in the block
+/// \param blockRows the number of rows in the block
+/// \param blockCols the number of columns in the block
 ///
-/// Example using runtime (aka dynamic) sizes: \include MatrixBase_block_int_int_int_int.cpp
+/// Example: \include MatrixBase_block_int_int_int_int.cpp
 /// Output: \verbinclude MatrixBase_block_int_int_int_int.out
 ///
-/// \newin{3.4}:
-///
-/// The number of rows \a blockRows and columns \a blockCols can also be specified at compile-time by passing Eigen::fix<N>,
-/// or Eigen::fix<N>(n) as arguments. In the later case, \c n plays the role of a runtime fallback value in case \c N equals Eigen::Dynamic.
-/// Here is an example with a fixed number of rows \c NRows and dynamic number of columns \c cols:
-/// \code
-/// mat.block(i,j,fix<NRows>,cols)
-/// \endcode
-///
-/// This function thus fully covers the features offered by the following overloads block<NRows,NCols>(Index, Index),
-/// and block<NRows,NCols>(Index, Index, Index, Index) that are thus obsolete. Indeed, this generic version avoids
-/// redundancy, it preserves the argument order, and prevents the need to rely on the template keyword in templated code.
-///
-/// but with less redundancy and more consistency as it does not modify the argument order
-/// and seamlessly enable hybrid fixed/dynamic sizes.
-///
-/// \note Even in the case that the returned expression has dynamic size, in the case
+/// \note Even though the returned expression has dynamic size, in the case
 /// when it is applied to a fixed-size matrix, it inherits a fixed maximal size,
 /// which means that evaluating it does not cause a dynamic memory allocation.
 ///
 EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
 ///
-/// \sa class Block, fix, fix<N>(int)
+/// \sa class Block, block(Index,Index)
 ///
-template<typename NRowsType, typename NColsType>
 EIGEN_DEVICE_FUNC
-#ifndef EIGEN_PARSED_BY_DOXYGEN
-inline typename FixedBlockXpr<internal::get_fixed_value<NRowsType>::value,internal::get_fixed_value<NColsType>::value>::Type
-#else
-inline typename FixedBlockXpr<...,...>::Type
-#endif
-block(Index startRow, Index startCol, NRowsType blockRows, NColsType blockCols)
+inline BlockXpr block(Index startRow, Index startCol, Index blockRows, Index blockCols)
 {
-  return typename FixedBlockXpr<internal::get_fixed_value<NRowsType>::value,internal::get_fixed_value<NColsType>::value>::Type(
-            derived(), startRow, startCol, internal::get_runtime_value(blockRows), internal::get_runtime_value(blockCols));
+  return BlockXpr(derived(), startRow, startCol, blockRows, blockCols);
 }
 
-/// This is the const version of block(Index,Index,NRowsType,NColsType)
-template<typename NRowsType, typename NColsType>
+/// This is the const version of block(Index,Index,Index,Index). */
 EIGEN_DEVICE_FUNC
-#ifndef EIGEN_PARSED_BY_DOXYGEN
-inline const typename ConstFixedBlockXpr<internal::get_fixed_value<NRowsType>::value,internal::get_fixed_value<NColsType>::value>::Type
-#else
-inline const typename ConstFixedBlockXpr<...,...>::Type
-#endif
-block(Index startRow, Index startCol, NRowsType blockRows, NColsType blockCols) const
+inline const ConstBlockXpr block(Index startRow, Index startCol, Index blockRows, Index blockCols) const
 {
-  return typename ConstFixedBlockXpr<internal::get_fixed_value<NRowsType>::value,internal::get_fixed_value<NColsType>::value>::Type(
-            derived(), startRow, startCol, internal::get_runtime_value(blockRows), internal::get_runtime_value(blockCols));
+  return ConstBlockXpr(derived(), startRow, startCol, blockRows, blockCols);
 }
 
 
 
-/// \returns a expression of a top-right corner of \c *this with either dynamic or fixed sizes.
+
+/// \returns a dynamic-size expression of a top-right corner of *this.
 ///
 /// \param cRows the number of rows in the corner
 /// \param cCols the number of columns in the corner
-/// \tparam NRowsType the type of the value handling the number of rows in the block, typically Index.
-/// \tparam NColsType the type of the value handling the number of columns in the block, typically Index.
 ///
-/// Example with dynamic sizes: \include MatrixBase_topRightCorner_int_int.cpp
+/// Example: \include MatrixBase_topRightCorner_int_int.cpp
 /// Output: \verbinclude MatrixBase_topRightCorner_int_int.out
 ///
-/// The number of rows \a blockRows and columns \a blockCols can also be specified at compile-time by passing Eigen::fix<N>,
-/// or Eigen::fix<N>(n) as arguments. See \link block(Index,Index,NRowsType,NColsType) block() \endlink for the details.
-///
 EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
 ///
-/// \sa block(Index,Index,NRowsType,NColsType), class Block
+/// \sa class Block, block(Index,Index,Index,Index)
 ///
-template<typename NRowsType, typename NColsType>
 EIGEN_DEVICE_FUNC
-#ifndef EIGEN_PARSED_BY_DOXYGEN
-inline typename FixedBlockXpr<internal::get_fixed_value<NRowsType>::value,internal::get_fixed_value<NColsType>::value>::Type
-#else
-inline typename FixedBlockXpr<...,...>::Type
-#endif
-topRightCorner(NRowsType cRows, NColsType cCols)
+inline BlockXpr topRightCorner(Index cRows, Index cCols)
 {
-  return typename FixedBlockXpr<internal::get_fixed_value<NRowsType>::value,internal::get_fixed_value<NColsType>::value>::Type
-            (derived(), 0, cols() - internal::get_runtime_value(cCols), internal::get_runtime_value(cRows), internal::get_runtime_value(cCols));
+  return BlockXpr(derived(), 0, cols() - cCols, cRows, cCols);
 }
 
-/// This is the const version of topRightCorner(NRowsType, NColsType).
-template<typename NRowsType, typename NColsType>
+/// This is the const version of topRightCorner(Index, Index).
 EIGEN_DEVICE_FUNC
-#ifndef EIGEN_PARSED_BY_DOXYGEN
-inline const typename ConstFixedBlockXpr<internal::get_fixed_value<NRowsType>::value,internal::get_fixed_value<NColsType>::value>::Type
-#else
-inline const typename ConstFixedBlockXpr<...,...>::Type
-#endif
-topRightCorner(NRowsType cRows, NColsType cCols) const
+inline const ConstBlockXpr topRightCorner(Index cRows, Index cCols) const
 {
-  return typename ConstFixedBlockXpr<internal::get_fixed_value<NRowsType>::value,internal::get_fixed_value<NColsType>::value>::Type
-            (derived(), 0, cols() - internal::get_runtime_value(cCols), internal::get_runtime_value(cRows), internal::get_runtime_value(cCols));
+  return ConstBlockXpr(derived(), 0, cols() - cCols, cRows, cCols);
 }
 
-/// \returns an expression of a fixed-size top-right corner of \c *this.
+/// \returns an expression of a fixed-size top-right corner of *this.
 ///
 /// \tparam CRows the number of rows in the corner
 /// \tparam CCols the number of columns in the corner
@@ -178,7 +128,7 @@ inline const typename ConstFixedBlockXpr<CRows,CCols>::Type topRightCorner() con
   return typename ConstFixedBlockXpr<CRows,CCols>::Type(derived(), 0, cols() - CCols);
 }
 
-/// \returns an expression of a top-right corner of \c *this.
+/// \returns an expression of a top-right corner of *this.
 ///
 /// \tparam CRows number of rows in corner as specified at compile-time
 /// \tparam CCols number of columns in corner as specified at compile-time
@@ -212,51 +162,32 @@ inline const typename ConstFixedBlockXpr<CRows,CCols>::Type topRightCorner(Index
 
 
 
-/// \returns an expression of a top-left corner of \c *this  with either dynamic or fixed sizes.
+/// \returns a dynamic-size expression of a top-left corner of *this.
 ///
 /// \param cRows the number of rows in the corner
 /// \param cCols the number of columns in the corner
-/// \tparam NRowsType the type of the value handling the number of rows in the block, typically Index.
-/// \tparam NColsType the type of the value handling the number of columns in the block, typically Index.
 ///
 /// Example: \include MatrixBase_topLeftCorner_int_int.cpp
 /// Output: \verbinclude MatrixBase_topLeftCorner_int_int.out
 ///
-/// The number of rows \a blockRows and columns \a blockCols can also be specified at compile-time by passing Eigen::fix<N>,
-/// or Eigen::fix<N>(n) as arguments. See \link block(Index,Index,NRowsType,NColsType) block() \endlink for the details.
-///
 EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
 ///
-/// \sa block(Index,Index,NRowsType,NColsType), class Block
+/// \sa class Block, block(Index,Index,Index,Index)
 ///
-template<typename NRowsType, typename NColsType>
 EIGEN_DEVICE_FUNC
-#ifndef EIGEN_PARSED_BY_DOXYGEN
-inline typename FixedBlockXpr<internal::get_fixed_value<NRowsType>::value,internal::get_fixed_value<NColsType>::value>::Type
-#else
-inline typename FixedBlockXpr<...,...>::Type
-#endif
-topLeftCorner(NRowsType cRows, NColsType cCols)
+inline BlockXpr topLeftCorner(Index cRows, Index cCols)
 {
-  return typename FixedBlockXpr<internal::get_fixed_value<NRowsType>::value,internal::get_fixed_value<NColsType>::value>::Type
-            (derived(), 0, 0, internal::get_runtime_value(cRows), internal::get_runtime_value(cCols));
+  return BlockXpr(derived(), 0, 0, cRows, cCols);
 }
 
 /// This is the const version of topLeftCorner(Index, Index).
-template<typename NRowsType, typename NColsType>
 EIGEN_DEVICE_FUNC
-#ifndef EIGEN_PARSED_BY_DOXYGEN
-inline const typename ConstFixedBlockXpr<internal::get_fixed_value<NRowsType>::value,internal::get_fixed_value<NColsType>::value>::Type
-#else
-inline const typename ConstFixedBlockXpr<...,...>::Type
-#endif
-topLeftCorner(NRowsType cRows, NColsType cCols) const
+inline const ConstBlockXpr topLeftCorner(Index cRows, Index cCols) const
 {
-  return typename ConstFixedBlockXpr<internal::get_fixed_value<NRowsType>::value,internal::get_fixed_value<NColsType>::value>::Type
-            (derived(), 0, 0, internal::get_runtime_value(cRows), internal::get_runtime_value(cCols));
+  return ConstBlockXpr(derived(), 0, 0, cRows, cCols);
 }
 
-/// \returns an expression of a fixed-size top-left corner of \c *this.
+/// \returns an expression of a fixed-size top-left corner of *this.
 ///
 /// The template parameters CRows and CCols are the number of rows and columns in the corner.
 ///
@@ -265,7 +196,7 @@ topLeftCorner(NRowsType cRows, NColsType cCols) const
 ///
 EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
 ///
-/// \sa block(Index,Index,NRowsType,NColsType), class Block
+/// \sa class Block, block(Index,Index,Index,Index)
 ///
 template<int CRows, int CCols>
 EIGEN_DEVICE_FUNC
@@ -282,7 +213,7 @@ inline const typename ConstFixedBlockXpr<CRows,CCols>::Type topLeftCorner() cons
   return typename ConstFixedBlockXpr<CRows,CCols>::Type(derived(), 0, 0);
 }
 
-/// \returns an expression of a top-left corner of \c *this.
+/// \returns an expression of a top-left corner of *this.
 ///
 /// \tparam CRows number of rows in corner as specified at compile-time
 /// \tparam CCols number of columns in corner as specified at compile-time
@@ -316,53 +247,32 @@ inline const typename ConstFixedBlockXpr<CRows,CCols>::Type topLeftCorner(Index
 
 
 
-/// \returns an expression of a bottom-right corner of \c *this  with either dynamic or fixed sizes.
+/// \returns a dynamic-size expression of a bottom-right corner of *this.
 ///
 /// \param cRows the number of rows in the corner
 /// \param cCols the number of columns in the corner
-/// \tparam NRowsType the type of the value handling the number of rows in the block, typically Index.
-/// \tparam NColsType the type of the value handling the number of columns in the block, typically Index.
 ///
 /// Example: \include MatrixBase_bottomRightCorner_int_int.cpp
 /// Output: \verbinclude MatrixBase_bottomRightCorner_int_int.out
 ///
-/// The number of rows \a blockRows and columns \a blockCols can also be specified at compile-time by passing Eigen::fix<N>,
-/// or Eigen::fix<N>(n) as arguments. See \link block(Index,Index,NRowsType,NColsType) block() \endlink for the details.
-///
 EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
 ///
-/// \sa block(Index,Index,NRowsType,NColsType), class Block
+/// \sa class Block, block(Index,Index,Index,Index)
 ///
-template<typename NRowsType, typename NColsType>
 EIGEN_DEVICE_FUNC
-#ifndef EIGEN_PARSED_BY_DOXYGEN
-inline typename FixedBlockXpr<internal::get_fixed_value<NRowsType>::value,internal::get_fixed_value<NColsType>::value>::Type
-#else
-inline typename FixedBlockXpr<...,...>::Type
-#endif
-bottomRightCorner(NRowsType cRows, NColsType cCols)
+inline BlockXpr bottomRightCorner(Index cRows, Index cCols)
 {
-  return typename FixedBlockXpr<internal::get_fixed_value<NRowsType>::value,internal::get_fixed_value<NColsType>::value>::Type
-            (derived(), rows() - internal::get_runtime_value(cRows), cols() - internal::get_runtime_value(cCols),
-                        internal::get_runtime_value(cRows), internal::get_runtime_value(cCols));
+  return BlockXpr(derived(), rows() - cRows, cols() - cCols, cRows, cCols);
 }
 
-/// This is the const version of bottomRightCorner(NRowsType, NColsType).
-template<typename NRowsType, typename NColsType>
+/// This is the const version of bottomRightCorner(Index, Index).
 EIGEN_DEVICE_FUNC
-#ifndef EIGEN_PARSED_BY_DOXYGEN
-inline const typename ConstFixedBlockXpr<internal::get_fixed_value<NRowsType>::value,internal::get_fixed_value<NColsType>::value>::Type
-#else
-inline const typename ConstFixedBlockXpr<...,...>::Type
-#endif
-bottomRightCorner(NRowsType cRows, NColsType cCols) const
+inline const ConstBlockXpr bottomRightCorner(Index cRows, Index cCols) const
 {
-  return typename ConstFixedBlockXpr<internal::get_fixed_value<NRowsType>::value,internal::get_fixed_value<NColsType>::value>::Type
-            (derived(), rows() - internal::get_runtime_value(cRows), cols() - internal::get_runtime_value(cCols),
-                        internal::get_runtime_value(cRows), internal::get_runtime_value(cCols));
+  return ConstBlockXpr(derived(), rows() - cRows, cols() - cCols, cRows, cCols);
 }
 
-/// \returns an expression of a fixed-size bottom-right corner of \c *this.
+/// \returns an expression of a fixed-size bottom-right corner of *this.
 ///
 /// The template parameters CRows and CCols are the number of rows and columns in the corner.
 ///
@@ -371,7 +281,7 @@ bottomRightCorner(NRowsType cRows, NColsType cCols) const
 ///
 EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
 ///
-/// \sa block(Index,Index,NRowsType,NColsType), class Block
+/// \sa class Block, block(Index,Index,Index,Index)
 ///
 template<int CRows, int CCols>
 EIGEN_DEVICE_FUNC
@@ -388,7 +298,7 @@ inline const typename ConstFixedBlockXpr<CRows,CCols>::Type bottomRightCorner()
   return typename ConstFixedBlockXpr<CRows,CCols>::Type(derived(), rows() - CRows, cols() - CCols);
 }
 
-/// \returns an expression of a bottom-right corner of \c *this.
+/// \returns an expression of a bottom-right corner of *this.
 ///
 /// \tparam CRows number of rows in corner as specified at compile-time
 /// \tparam CCols number of columns in corner as specified at compile-time
@@ -422,53 +332,32 @@ inline const typename ConstFixedBlockXpr<CRows,CCols>::Type bottomRightCorner(In
 
 
 
-/// \returns an expression of a bottom-left corner of \c *this  with either dynamic or fixed sizes.
+/// \returns a dynamic-size expression of a bottom-left corner of *this.
 ///
 /// \param cRows the number of rows in the corner
 /// \param cCols the number of columns in the corner
-/// \tparam NRowsType the type of the value handling the number of rows in the block, typically Index.
-/// \tparam NColsType the type of the value handling the number of columns in the block, typically Index.
 ///
 /// Example: \include MatrixBase_bottomLeftCorner_int_int.cpp
 /// Output: \verbinclude MatrixBase_bottomLeftCorner_int_int.out
 ///
-/// The number of rows \a blockRows and columns \a blockCols can also be specified at compile-time by passing Eigen::fix<N>,
-/// or Eigen::fix<N>(n) as arguments. See \link block(Index,Index,NRowsType,NColsType) block() \endlink for the details.
-///
 EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
 ///
-/// \sa block(Index,Index,NRowsType,NColsType), class Block
+/// \sa class Block, block(Index,Index,Index,Index)
 ///
-template<typename NRowsType, typename NColsType>
 EIGEN_DEVICE_FUNC
-#ifndef EIGEN_PARSED_BY_DOXYGEN
-inline typename FixedBlockXpr<internal::get_fixed_value<NRowsType>::value,internal::get_fixed_value<NColsType>::value>::Type
-#else
-inline typename FixedBlockXpr<...,...>::Type
-#endif
-bottomLeftCorner(NRowsType cRows, NColsType cCols)
+inline BlockXpr bottomLeftCorner(Index cRows, Index cCols)
 {
-  return typename FixedBlockXpr<internal::get_fixed_value<NRowsType>::value,internal::get_fixed_value<NColsType>::value>::Type
-            (derived(), rows() - internal::get_runtime_value(cRows), 0,
-                        internal::get_runtime_value(cRows), internal::get_runtime_value(cCols));
+  return BlockXpr(derived(), rows() - cRows, 0, cRows, cCols);
 }
 
-/// This is the const version of bottomLeftCorner(NRowsType, NColsType).
-template<typename NRowsType, typename NColsType>
+/// This is the const version of bottomLeftCorner(Index, Index).
 EIGEN_DEVICE_FUNC
-#ifndef EIGEN_PARSED_BY_DOXYGEN
-inline typename ConstFixedBlockXpr<internal::get_fixed_value<NRowsType>::value,internal::get_fixed_value<NColsType>::value>::Type
-#else
-inline typename ConstFixedBlockXpr<...,...>::Type
-#endif
-bottomLeftCorner(NRowsType cRows, NColsType cCols) const
+inline const ConstBlockXpr bottomLeftCorner(Index cRows, Index cCols) const
 {
-  return typename ConstFixedBlockXpr<internal::get_fixed_value<NRowsType>::value,internal::get_fixed_value<NColsType>::value>::Type
-            (derived(), rows() - internal::get_runtime_value(cRows), 0,
-                        internal::get_runtime_value(cRows), internal::get_runtime_value(cCols));
+  return ConstBlockXpr(derived(), rows() - cRows, 0, cRows, cCols);
 }
 
-/// \returns an expression of a fixed-size bottom-left corner of \c *this.
+/// \returns an expression of a fixed-size bottom-left corner of *this.
 ///
 /// The template parameters CRows and CCols are the number of rows and columns in the corner.
 ///
@@ -477,7 +366,7 @@ bottomLeftCorner(NRowsType cRows, NColsType cCols) const
 ///
 EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
 ///
-/// \sa block(Index,Index,NRowsType,NColsType), class Block
+/// \sa class Block, block(Index,Index,Index,Index)
 ///
 template<int CRows, int CCols>
 EIGEN_DEVICE_FUNC
@@ -494,7 +383,7 @@ inline const typename ConstFixedBlockXpr<CRows,CCols>::Type bottomLeftCorner() c
   return typename ConstFixedBlockXpr<CRows,CCols>::Type(derived(), rows() - CRows, 0);
 }
 
-/// \returns an expression of a bottom-left corner of \c *this.
+/// \returns an expression of a bottom-left corner of *this.
 ///
 /// \tparam CRows number of rows in corner as specified at compile-time
 /// \tparam CCols number of columns in corner as specified at compile-time
@@ -528,50 +417,31 @@ inline const typename ConstFixedBlockXpr<CRows,CCols>::Type bottomLeftCorner(Ind
 
 
 
-/// \returns a block consisting of the top rows of \c *this.
+/// \returns a block consisting of the top rows of *this.
 ///
 /// \param n the number of rows in the block
-/// \tparam NRowsType the type of the value handling the number of rows in the block, typically Index.
 ///
 /// Example: \include MatrixBase_topRows_int.cpp
 /// Output: \verbinclude MatrixBase_topRows_int.out
 ///
-/// The number of rows \a n can also be specified at compile-time by passing Eigen::fix<N>,
-/// or Eigen::fix<N>(n) as arguments.
-/// See \link block(Index,Index,NRowsType,NColsType) block() \endlink for the details.
-///
 EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major)
 ///
-/// \sa block(Index,Index,NRowsType,NColsType), class Block
+/// \sa class Block, block(Index,Index,Index,Index)
 ///
-template<typename NRowsType>
 EIGEN_DEVICE_FUNC
-#ifndef EIGEN_PARSED_BY_DOXYGEN
-inline typename NRowsBlockXpr<internal::get_fixed_value<NRowsType>::value>::Type
-#else
-inline typename NRowsBlockXpr<...>::Type
-#endif
-topRows(NRowsType n)
+inline RowsBlockXpr topRows(Index n)
 {
-  return typename NRowsBlockXpr<internal::get_fixed_value<NRowsType>::value>::Type
-            (derived(), 0, 0, internal::get_runtime_value(n), cols());
+  return RowsBlockXpr(derived(), 0, 0, n, cols());
 }
 
-/// This is the const version of topRows(NRowsType).
-template<typename NRowsType>
+/// This is the const version of topRows(Index).
 EIGEN_DEVICE_FUNC
-#ifndef EIGEN_PARSED_BY_DOXYGEN
-inline const typename ConstNRowsBlockXpr<internal::get_fixed_value<NRowsType>::value>::Type
-#else
-inline const typename ConstNRowsBlockXpr<...>::Type
-#endif
-topRows(NRowsType n) const
+inline ConstRowsBlockXpr topRows(Index n) const
 {
-  return typename ConstNRowsBlockXpr<internal::get_fixed_value<NRowsType>::value>::Type
-            (derived(), 0, 0, internal::get_runtime_value(n), cols());
+  return ConstRowsBlockXpr(derived(), 0, 0, n, cols());
 }
 
-/// \returns a block consisting of the top rows of \c *this.
+/// \returns a block consisting of the top rows of *this.
 ///
 /// \tparam N the number of rows in the block as specified at compile-time
 /// \param n the number of rows in the block as specified at run-time
@@ -584,7 +454,7 @@ topRows(NRowsType n) const
 ///
 EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major)
 ///
-/// \sa block(Index,Index,NRowsType,NColsType), class Block
+/// \sa class Block, block(Index,Index,Index,Index)
 ///
 template<int N>
 EIGEN_DEVICE_FUNC
@@ -603,50 +473,31 @@ inline typename ConstNRowsBlockXpr<N>::Type topRows(Index n = N) const
 
 
 
-/// \returns a block consisting of the bottom rows of \c *this.
+/// \returns a block consisting of the bottom rows of *this.
 ///
 /// \param n the number of rows in the block
-/// \tparam NRowsType the type of the value handling the number of rows in the block, typically Index.
 ///
 /// Example: \include MatrixBase_bottomRows_int.cpp
 /// Output: \verbinclude MatrixBase_bottomRows_int.out
 ///
-/// The number of rows \a n can also be specified at compile-time by passing Eigen::fix<N>,
-/// or Eigen::fix<N>(n) as arguments.
-/// See \link block(Index,Index,NRowsType,NColsType) block() \endlink for the details.
-///
 EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major)
 ///
-/// \sa block(Index,Index,NRowsType,NColsType), class Block
+/// \sa class Block, block(Index,Index,Index,Index)
 ///
-template<typename NRowsType>
 EIGEN_DEVICE_FUNC
-#ifndef EIGEN_PARSED_BY_DOXYGEN
-inline typename NRowsBlockXpr<internal::get_fixed_value<NRowsType>::value>::Type
-#else
-inline typename NRowsBlockXpr<...>::Type
-#endif
-bottomRows(NRowsType n)
+inline RowsBlockXpr bottomRows(Index n)
 {
-  return typename NRowsBlockXpr<internal::get_fixed_value<NRowsType>::value>::Type
-            (derived(), rows() - internal::get_runtime_value(n), 0, internal::get_runtime_value(n), cols());
+  return RowsBlockXpr(derived(), rows() - n, 0, n, cols());
 }
 
-/// This is the const version of bottomRows(NRowsType).
-template<typename NRowsType>
+/// This is the const version of bottomRows(Index).
 EIGEN_DEVICE_FUNC
-#ifndef EIGEN_PARSED_BY_DOXYGEN
-inline const typename ConstNRowsBlockXpr<internal::get_fixed_value<NRowsType>::value>::Type
-#else
-inline const typename ConstNRowsBlockXpr<...>::Type
-#endif
-bottomRows(NRowsType n) const
+inline ConstRowsBlockXpr bottomRows(Index n) const
 {
-  return typename ConstNRowsBlockXpr<internal::get_fixed_value<NRowsType>::value>::Type
-            (derived(), rows() - internal::get_runtime_value(n), 0, internal::get_runtime_value(n), cols());
+  return ConstRowsBlockXpr(derived(), rows() - n, 0, n, cols());
 }
 
-/// \returns a block consisting of the bottom rows of \c *this.
+/// \returns a block consisting of the bottom rows of *this.
 ///
 /// \tparam N the number of rows in the block as specified at compile-time
 /// \param n the number of rows in the block as specified at run-time
@@ -659,7 +510,7 @@ bottomRows(NRowsType n) const
 ///
 EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major)
 ///
-/// \sa block(Index,Index,NRowsType,NColsType), class Block
+/// \sa class Block, block(Index,Index,Index,Index)
 ///
 template<int N>
 EIGEN_DEVICE_FUNC
@@ -678,51 +529,32 @@ inline typename ConstNRowsBlockXpr<N>::Type bottomRows(Index n = N) const
 
 
 
-/// \returns a block consisting of a range of rows of \c *this.
+/// \returns a block consisting of a range of rows of *this.
 ///
 /// \param startRow the index of the first row in the block
 /// \param n the number of rows in the block
-/// \tparam NRowsType the type of the value handling the number of rows in the block, typically Index.
 ///
 /// Example: \include DenseBase_middleRows_int.cpp
 /// Output: \verbinclude DenseBase_middleRows_int.out
 ///
-/// The number of rows \a n can also be specified at compile-time by passing Eigen::fix<N>,
-/// or Eigen::fix<N>(n) as arguments.
-/// See \link block(Index,Index,NRowsType,NColsType) block() \endlink for the details.
-///
 EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major)
 ///
-/// \sa block(Index,Index,NRowsType,NColsType), class Block
+/// \sa class Block, block(Index,Index,Index,Index)
 ///
-template<typename NRowsType>
 EIGEN_DEVICE_FUNC
-#ifndef EIGEN_PARSED_BY_DOXYGEN
-inline typename NRowsBlockXpr<internal::get_fixed_value<NRowsType>::value>::Type
-#else
-inline typename NRowsBlockXpr<...>::Type
-#endif
-middleRows(Index startRow, NRowsType n)
+inline RowsBlockXpr middleRows(Index startRow, Index n)
 {
-  return typename NRowsBlockXpr<internal::get_fixed_value<NRowsType>::value>::Type
-            (derived(), startRow, 0, internal::get_runtime_value(n), cols());
+  return RowsBlockXpr(derived(), startRow, 0, n, cols());
 }
 
-/// This is the const version of middleRows(Index,NRowsType).
-template<typename NRowsType>
+/// This is the const version of middleRows(Index,Index).
 EIGEN_DEVICE_FUNC
-#ifndef EIGEN_PARSED_BY_DOXYGEN
-inline const typename ConstNRowsBlockXpr<internal::get_fixed_value<NRowsType>::value>::Type
-#else
-inline const typename ConstNRowsBlockXpr<...>::Type
-#endif
-middleRows(Index startRow, NRowsType n) const
+inline ConstRowsBlockXpr middleRows(Index startRow, Index n) const
 {
-  return typename ConstNRowsBlockXpr<internal::get_fixed_value<NRowsType>::value>::Type
-            (derived(), startRow, 0, internal::get_runtime_value(n), cols());
+  return ConstRowsBlockXpr(derived(), startRow, 0, n, cols());
 }
 
-/// \returns a block consisting of a range of rows of \c *this.
+/// \returns a block consisting of a range of rows of *this.
 ///
 /// \tparam N the number of rows in the block as specified at compile-time
 /// \param startRow the index of the first row in the block
@@ -736,7 +568,7 @@ middleRows(Index startRow, NRowsType n) const
 ///
 EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major)
 ///
-/// \sa block(Index,Index,NRowsType,NColsType), class Block
+/// \sa class Block, block(Index,Index,Index,Index)
 ///
 template<int N>
 EIGEN_DEVICE_FUNC
@@ -755,50 +587,31 @@ inline typename ConstNRowsBlockXpr<N>::Type middleRows(Index startRow, Index n =
 
 
 
-/// \returns a block consisting of the left columns of \c *this.
+/// \returns a block consisting of the left columns of *this.
 ///
 /// \param n the number of columns in the block
-/// \tparam NColsType the type of the value handling the number of columns in the block, typically Index.
 ///
 /// Example: \include MatrixBase_leftCols_int.cpp
 /// Output: \verbinclude MatrixBase_leftCols_int.out
 ///
-/// The number of columns \a n can also be specified at compile-time by passing Eigen::fix<N>,
-/// or Eigen::fix<N>(n) as arguments.
-/// See \link block(Index,Index,NRowsType,NColsType) block() \endlink for the details.
-///
 EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major)
 ///
-/// \sa block(Index,Index,NRowsType,NColsType), class Block
+/// \sa class Block, block(Index,Index,Index,Index)
 ///
-template<typename NColsType>
 EIGEN_DEVICE_FUNC
-#ifndef EIGEN_PARSED_BY_DOXYGEN
-inline typename NColsBlockXpr<internal::get_fixed_value<NColsType>::value>::Type
-#else
-inline typename NColsBlockXpr<...>::Type
-#endif
-leftCols(NColsType n)
+inline ColsBlockXpr leftCols(Index n)
 {
-  return typename NColsBlockXpr<internal::get_fixed_value<NColsType>::value>::Type
-            (derived(), 0, 0, rows(), internal::get_runtime_value(n));
+  return ColsBlockXpr(derived(), 0, 0, rows(), n);
 }
 
-/// This is the const version of leftCols(NColsType).
-template<typename NColsType>
+/// This is the const version of leftCols(Index).
 EIGEN_DEVICE_FUNC
-#ifndef EIGEN_PARSED_BY_DOXYGEN
-inline const typename ConstNColsBlockXpr<internal::get_fixed_value<NColsType>::value>::Type
-#else
-inline const typename ConstNColsBlockXpr<...>::Type
-#endif
-leftCols(NColsType n) const
+inline ConstColsBlockXpr leftCols(Index n) const
 {
-  return typename ConstNColsBlockXpr<internal::get_fixed_value<NColsType>::value>::Type
-            (derived(), 0, 0, rows(), internal::get_runtime_value(n));
+  return ConstColsBlockXpr(derived(), 0, 0, rows(), n);
 }
 
-/// \returns a block consisting of the left columns of \c *this.
+/// \returns a block consisting of the left columns of *this.
 ///
 /// \tparam N the number of columns in the block as specified at compile-time
 /// \param n the number of columns in the block as specified at run-time
@@ -811,7 +624,7 @@ leftCols(NColsType n) const
 ///
 EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major)
 ///
-/// \sa block(Index,Index,NRowsType,NColsType), class Block
+/// \sa class Block, block(Index,Index,Index,Index)
 ///
 template<int N>
 EIGEN_DEVICE_FUNC
@@ -830,50 +643,31 @@ inline typename ConstNColsBlockXpr<N>::Type leftCols(Index n = N) const
 
 
 
-/// \returns a block consisting of the right columns of \c *this.
+/// \returns a block consisting of the right columns of *this.
 ///
 /// \param n the number of columns in the block
-/// \tparam NColsType the type of the value handling the number of columns in the block, typically Index.
 ///
 /// Example: \include MatrixBase_rightCols_int.cpp
 /// Output: \verbinclude MatrixBase_rightCols_int.out
 ///
-/// The number of columns \a n can also be specified at compile-time by passing Eigen::fix<N>,
-/// or Eigen::fix<N>(n) as arguments.
-/// See \link block(Index,Index,NRowsType,NColsType) block() \endlink for the details.
-///
 EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major)
 ///
-/// \sa block(Index,Index,NRowsType,NColsType), class Block
+/// \sa class Block, block(Index,Index,Index,Index)
 ///
-template<typename NColsType>
 EIGEN_DEVICE_FUNC
-#ifndef EIGEN_PARSED_BY_DOXYGEN
-inline typename NColsBlockXpr<internal::get_fixed_value<NColsType>::value>::Type
-#else
-inline typename NColsBlockXpr<...>::Type
-#endif
-rightCols(NColsType n)
+inline ColsBlockXpr rightCols(Index n)
 {
-  return typename NColsBlockXpr<internal::get_fixed_value<NColsType>::value>::Type
-            (derived(), 0, cols() - internal::get_runtime_value(n), rows(), internal::get_runtime_value(n));
+  return ColsBlockXpr(derived(), 0, cols() - n, rows(), n);
 }
 
-/// This is the const version of rightCols(NColsType).
-template<typename NColsType>
+/// This is the const version of rightCols(Index).
 EIGEN_DEVICE_FUNC
-#ifndef EIGEN_PARSED_BY_DOXYGEN
-inline const typename ConstNColsBlockXpr<internal::get_fixed_value<NColsType>::value>::Type
-#else
-inline const typename ConstNColsBlockXpr<...>::Type
-#endif
-rightCols(NColsType n) const
+inline ConstColsBlockXpr rightCols(Index n) const
 {
-  return typename ConstNColsBlockXpr<internal::get_fixed_value<NColsType>::value>::Type
-            (derived(), 0, cols() - internal::get_runtime_value(n), rows(), internal::get_runtime_value(n));
+  return ConstColsBlockXpr(derived(), 0, cols() - n, rows(), n);
 }
 
-/// \returns a block consisting of the right columns of \c *this.
+/// \returns a block consisting of the right columns of *this.
 ///
 /// \tparam N the number of columns in the block as specified at compile-time
 /// \param n the number of columns in the block as specified at run-time
@@ -886,7 +680,7 @@ rightCols(NColsType n) const
 ///
 EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major)
 ///
-/// \sa block(Index,Index,NRowsType,NColsType), class Block
+/// \sa class Block, block(Index,Index,Index,Index)
 ///
 template<int N>
 EIGEN_DEVICE_FUNC
@@ -905,51 +699,32 @@ inline typename ConstNColsBlockXpr<N>::Type rightCols(Index n = N) const
 
 
 
-/// \returns a block consisting of a range of columns of \c *this.
+/// \returns a block consisting of a range of columns of *this.
 ///
 /// \param startCol the index of the first column in the block
 /// \param numCols the number of columns in the block
-/// \tparam NColsType the type of the value handling the number of columns in the block, typically Index.
 ///
 /// Example: \include DenseBase_middleCols_int.cpp
 /// Output: \verbinclude DenseBase_middleCols_int.out
 ///
-/// The number of columns \a n can also be specified at compile-time by passing Eigen::fix<N>,
-/// or Eigen::fix<N>(n) as arguments.
-/// See \link block(Index,Index,NRowsType,NColsType) block() \endlink for the details.
-///
 EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major)
 ///
-/// \sa block(Index,Index,NRowsType,NColsType), class Block
+/// \sa class Block, block(Index,Index,Index,Index)
 ///
-template<typename NColsType>
 EIGEN_DEVICE_FUNC
-#ifndef EIGEN_PARSED_BY_DOXYGEN
-inline typename NColsBlockXpr<internal::get_fixed_value<NColsType>::value>::Type
-#else
-inline typename NColsBlockXpr<...>::Type
-#endif
-middleCols(Index startCol, NColsType numCols)
+inline ColsBlockXpr middleCols(Index startCol, Index numCols)
 {
-  return typename NColsBlockXpr<internal::get_fixed_value<NColsType>::value>::Type
-            (derived(), 0, startCol, rows(), internal::get_runtime_value(numCols));
+  return ColsBlockXpr(derived(), 0, startCol, rows(), numCols);
 }
 
-/// This is the const version of middleCols(Index,NColsType).
-template<typename NColsType>
+/// This is the const version of middleCols(Index,Index).
 EIGEN_DEVICE_FUNC
-#ifndef EIGEN_PARSED_BY_DOXYGEN
-inline const typename ConstNColsBlockXpr<internal::get_fixed_value<NColsType>::value>::Type
-#else
-inline const typename ConstNColsBlockXpr<...>::Type
-#endif
-middleCols(Index startCol, NColsType numCols) const
+inline ConstColsBlockXpr middleCols(Index startCol, Index numCols) const
 {
-  return typename ConstNColsBlockXpr<internal::get_fixed_value<NColsType>::value>::Type
-            (derived(), 0, startCol, rows(), internal::get_runtime_value(numCols));
+  return ConstColsBlockXpr(derived(), 0, startCol, rows(), numCols);
 }
 
-/// \returns a block consisting of a range of columns of \c *this.
+/// \returns a block consisting of a range of columns of *this.
 ///
 /// \tparam N the number of columns in the block as specified at compile-time
 /// \param startCol the index of the first column in the block
@@ -963,7 +738,7 @@ middleCols(Index startCol, NColsType numCols) const
 ///
 EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major)
 ///
-/// \sa block(Index,Index,NRowsType,NColsType), class Block
+/// \sa class Block, block(Index,Index,Index,Index)
 ///
 template<int N>
 EIGEN_DEVICE_FUNC
@@ -982,7 +757,7 @@ inline typename ConstNColsBlockXpr<N>::Type middleCols(Index startCol, Index n =
 
 
 
-/// \returns a fixed-size expression of a block of \c *this.
+/// \returns a fixed-size expression of a block in *this.
 ///
 /// The template parameters \a NRows and \a NCols are the number of
 /// rows and columns in the block.
@@ -993,18 +768,12 @@ inline typename ConstNColsBlockXpr<N>::Type middleCols(Index startCol, Index n =
 /// Example: \include MatrixBase_block_int_int.cpp
 /// Output: \verbinclude MatrixBase_block_int_int.out
 ///
-/// \note The usage of of this overload is discouraged from %Eigen 3.4, better used the generic
-/// block(Index,Index,NRowsType,NColsType), here is the one-to-one equivalence:
-/// \code
-/// mat.template block<NRows,NCols>(i,j)  <-->  mat.block(i,j,fix<NRows>,fix<NCols>)
-/// \endcode
-///
 /// \note since block is a templated member, the keyword template has to be used
 /// if the matrix type is also a template parameter: \code m.template block<3,3>(1,1); \endcode
 ///
 EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
 ///
-/// \sa block(Index,Index,NRowsType,NColsType), class Block
+/// \sa class Block, block(Index,Index,Index,Index)
 ///
 template<int NRows, int NCols>
 EIGEN_DEVICE_FUNC
@@ -1021,7 +790,7 @@ inline const typename ConstFixedBlockXpr<NRows,NCols>::Type block(Index startRow
   return typename ConstFixedBlockXpr<NRows,NCols>::Type(derived(), startRow, startCol);
 }
 
-/// \returns an expression of a block of \c *this.
+/// \returns an expression of a block in *this.
 ///
 /// \tparam NRows number of rows in block as specified at compile-time
 /// \tparam NCols number of columns in block as specified at compile-time
@@ -1038,19 +807,9 @@ inline const typename ConstFixedBlockXpr<NRows,NCols>::Type block(Index startRow
 /// Example: \include MatrixBase_template_int_int_block_int_int_int_int.cpp
 /// Output: \verbinclude MatrixBase_template_int_int_block_int_int_int_int.cpp
 ///
-/// \note The usage of of this overload is discouraged from %Eigen 3.4, better used the generic
-/// block(Index,Index,NRowsType,NColsType), here is the one-to-one complete equivalence:
-/// \code
-/// mat.template block<NRows,NCols>(i,j,rows,cols)     <-->  mat.block(i,j,fix<NRows>(rows),fix<NCols>(cols))
-/// \endcode
-/// If we known that, e.g., NRows==Dynamic and NCols!=Dynamic, then the equivalence becomes:
-/// \code
-/// mat.template block<Dynamic,NCols>(i,j,rows,NCols)  <-->  mat.block(i,j,rows,fix<NCols>)
-/// \endcode
-///
 EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
 ///
-/// \sa block(Index,Index,NRowsType,NColsType), class Block
+/// \sa class Block, block(Index,Index,Index,Index)
 ///
 template<int NRows, int NCols>
 inline typename FixedBlockXpr<NRows,NCols>::Type block(Index startRow, Index startCol,
@@ -1067,7 +826,7 @@ inline const typename ConstFixedBlockXpr<NRows,NCols>::Type block(Index startRow
   return typename ConstFixedBlockXpr<NRows,NCols>::Type(derived(), startRow, startCol, blockRows, blockCols);
 }
 
-/// \returns an expression of the \a i-th column of \c *this. Note that the numbering starts at 0.
+/// \returns an expression of the \a i-th column of *this. Note that the numbering starts at 0.
 ///
 /// Example: \include MatrixBase_col.cpp
 /// Output: \verbinclude MatrixBase_col.out
@@ -1088,7 +847,7 @@ inline ConstColXpr col(Index i) const
   return ConstColXpr(derived(), i);
 }
 
-/// \returns an expression of the \a i-th row of \c *this. Note that the numbering starts at 0.
+/// \returns an expression of the \a i-th row of *this. Note that the numbering starts at 0.
 ///
 /// Example: \include MatrixBase_row.cpp
 /// Output: \verbinclude MatrixBase_row.out
@@ -1109,153 +868,96 @@ inline ConstRowXpr row(Index i) const
   return ConstRowXpr(derived(), i);
 }
 
-/// \returns an expression of a segment (i.e. a vector block) in \c *this with either dynamic or fixed sizes.
+/// \returns a dynamic-size expression of a segment (i.e. a vector block) in *this.
 ///
 /// \only_for_vectors
 ///
 /// \param start the first coefficient in the segment
 /// \param n the number of coefficients in the segment
-/// \tparam NType the type of the value handling the number of coefficients in the segment, typically Index.
 ///
 /// Example: \include MatrixBase_segment_int_int.cpp
 /// Output: \verbinclude MatrixBase_segment_int_int.out
 ///
-/// The number of coefficients \a n can also be specified at compile-time by passing Eigen::fix<N>,
-/// or Eigen::fix<N>(n) as arguments.
-/// See \link block(Index,Index,NRowsType,NColsType) block() \endlink for the details.
-///
-/// \note Even in the case that the returned expression has dynamic size, in the case
+/// \note Even though the returned expression has dynamic size, in the case
 /// when it is applied to a fixed-size vector, it inherits a fixed maximal size,
 /// which means that evaluating it does not cause a dynamic memory allocation.
 ///
-/// \sa block(Index,Index,NRowsType,NColsType), fix<N>, fix<N>(int), class Block
+/// \sa class Block, segment(Index)
 ///
-template<typename NType>
 EIGEN_DEVICE_FUNC
-#ifndef EIGEN_PARSED_BY_DOXYGEN
-inline typename FixedSegmentReturnType<internal::get_fixed_value<NType>::value>::Type
-#else
-inline typename FixedSegmentReturnType<...>::Type
-#endif
-segment(Index start, NType n)
+inline SegmentReturnType segment(Index start, Index n)
 {
   EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
-  return typename FixedSegmentReturnType<internal::get_fixed_value<NType>::value>::Type
-            (derived(), start, internal::get_runtime_value(n));
+  return SegmentReturnType(derived(), start, n);
 }
 
 
-/// This is the const version of segment(Index,NType).
-template<typename NType>
+/// This is the const version of segment(Index,Index).
 EIGEN_DEVICE_FUNC
-#ifndef EIGEN_PARSED_BY_DOXYGEN
-inline const typename ConstFixedSegmentReturnType<internal::get_fixed_value<NType>::value>::Type
-#else
-inline const typename ConstFixedSegmentReturnType<...>::Type
-#endif
-segment(Index start, NType n) const
+inline ConstSegmentReturnType segment(Index start, Index n) const
 {
   EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
-  return typename ConstFixedSegmentReturnType<internal::get_fixed_value<NType>::value>::Type
-            (derived(), start, internal::get_runtime_value(n));
+  return ConstSegmentReturnType(derived(), start, n);
 }
 
-/// \returns an expression of the first coefficients of \c *this with either dynamic or fixed sizes.
+/// \returns a dynamic-size expression of the first coefficients of *this.
 ///
 /// \only_for_vectors
 ///
 /// \param n the number of coefficients in the segment
-/// \tparam NType the type of the value handling the number of coefficients in the segment, typically Index.
 ///
 /// Example: \include MatrixBase_start_int.cpp
 /// Output: \verbinclude MatrixBase_start_int.out
 ///
-/// The number of coefficients \a n can also be specified at compile-time by passing Eigen::fix<N>,
-/// or Eigen::fix<N>(n) as arguments.
-/// See \link block(Index,Index,NRowsType,NColsType) block() \endlink for the details.
-///
-/// \note Even in the case that the returned expression has dynamic size, in the case
+/// \note Even though the returned expression has dynamic size, in the case
 /// when it is applied to a fixed-size vector, it inherits a fixed maximal size,
 /// which means that evaluating it does not cause a dynamic memory allocation.
 ///
 /// \sa class Block, block(Index,Index)
 ///
-template<typename NType>
 EIGEN_DEVICE_FUNC
-#ifndef EIGEN_PARSED_BY_DOXYGEN
-inline typename FixedSegmentReturnType<internal::get_fixed_value<NType>::value>::Type
-#else
-inline typename FixedSegmentReturnType<...>::Type
-#endif
-head(NType n)
+inline SegmentReturnType head(Index n)
 {
   EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
-  return typename FixedSegmentReturnType<internal::get_fixed_value<NType>::value>::Type
-              (derived(), 0, internal::get_runtime_value(n));
+  return SegmentReturnType(derived(), 0, n);
 }
 
-/// This is the const version of head(NType).
-template<typename NType>
+/// This is the const version of head(Index).
 EIGEN_DEVICE_FUNC
-#ifndef EIGEN_PARSED_BY_DOXYGEN
-inline const typename ConstFixedSegmentReturnType<internal::get_fixed_value<NType>::value>::Type
-#else
-inline const typename ConstFixedSegmentReturnType<...>::Type
-#endif
-head(NType n) const
+inline ConstSegmentReturnType head(Index n) const
 {
   EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
-  return typename ConstFixedSegmentReturnType<internal::get_fixed_value<NType>::value>::Type
-            (derived(), 0, internal::get_runtime_value(n));
+  return ConstSegmentReturnType(derived(), 0, n);
 }
 
-/// \returns an expression of a last coefficients of \c *this with either dynamic or fixed sizes.
+/// \returns a dynamic-size expression of the last coefficients of *this.
 ///
 /// \only_for_vectors
 ///
 /// \param n the number of coefficients in the segment
-/// \tparam NType the type of the value handling the number of coefficients in the segment, typically Index.
 ///
 /// Example: \include MatrixBase_end_int.cpp
 /// Output: \verbinclude MatrixBase_end_int.out
 ///
-/// The number of coefficients \a n can also be specified at compile-time by passing Eigen::fix<N>,
-/// or Eigen::fix<N>(n) as arguments.
-/// See \link block(Index,Index,NRowsType,NColsType) block() \endlink for the details.
-///
-/// \note Even in the case that the returned expression has dynamic size, in the case
+/// \note Even though the returned expression has dynamic size, in the case
 /// when it is applied to a fixed-size vector, it inherits a fixed maximal size,
 /// which means that evaluating it does not cause a dynamic memory allocation.
 ///
 /// \sa class Block, block(Index,Index)
 ///
-template<typename NType>
 EIGEN_DEVICE_FUNC
-#ifndef EIGEN_PARSED_BY_DOXYGEN
-inline typename FixedSegmentReturnType<internal::get_fixed_value<NType>::value>::Type
-#else
-inline typename FixedSegmentReturnType<...>::Type
-#endif
-tail(NType n)
+inline SegmentReturnType tail(Index n)
 {
   EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
-  return typename FixedSegmentReturnType<internal::get_fixed_value<NType>::value>::Type
-            (derived(), this->size() - internal::get_runtime_value(n), internal::get_runtime_value(n));
+  return SegmentReturnType(derived(), this->size() - n, n);
 }
 
 /// This is the const version of tail(Index).
-template<typename NType>
 EIGEN_DEVICE_FUNC
-#ifndef EIGEN_PARSED_BY_DOXYGEN
-inline const typename ConstFixedSegmentReturnType<internal::get_fixed_value<NType>::value>::Type
-#else
-inline const typename ConstFixedSegmentReturnType<...>::Type
-#endif
-tail(NType n) const
+inline ConstSegmentReturnType tail(Index n) const
 {
   EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
-  return typename ConstFixedSegmentReturnType<internal::get_fixed_value<NType>::value>::Type
-            (derived(), this->size() - internal::get_runtime_value(n), internal::get_runtime_value(n));
+  return ConstSegmentReturnType(derived(), this->size() - n, n);
 }
 
 /// \returns a fixed-size expression of a segment (i.e. a vector block) in \c *this
@@ -1272,7 +974,7 @@ tail(NType n) const
 /// Example: \include MatrixBase_template_int_segment.cpp
 /// Output: \verbinclude MatrixBase_template_int_segment.out
 ///
-/// \sa segment(Index,NType), class Block
+/// \sa class Block
 ///
 template<int N>
 EIGEN_DEVICE_FUNC
@@ -1291,7 +993,7 @@ inline typename ConstFixedSegmentReturnType<N>::Type segment(Index start, Index
   return typename ConstFixedSegmentReturnType<N>::Type(derived(), start, n);
 }
 
-/// \returns a fixed-size expression of the first coefficients of \c *this.
+/// \returns a fixed-size expression of the first coefficients of *this.
 ///
 /// \only_for_vectors
 ///
@@ -1304,7 +1006,7 @@ inline typename ConstFixedSegmentReturnType<N>::Type segment(Index start, Index
 /// Example: \include MatrixBase_template_int_start.cpp
 /// Output: \verbinclude MatrixBase_template_int_start.out
 ///
-/// \sa head(NType), class Block
+/// \sa class Block
 ///
 template<int N>
 EIGEN_DEVICE_FUNC
@@ -1323,7 +1025,7 @@ inline typename ConstFixedSegmentReturnType<N>::Type head(Index n = N) const
   return typename ConstFixedSegmentReturnType<N>::Type(derived(), 0, n);
 }
 
-/// \returns a fixed-size expression of the last coefficients of \c *this.
+/// \returns a fixed-size expression of the last coefficients of *this.
 ///
 /// \only_for_vectors
 ///
@@ -1336,7 +1038,7 @@ inline typename ConstFixedSegmentReturnType<N>::Type head(Index n = N) const
 /// Example: \include MatrixBase_template_int_end.cpp
 /// Output: \verbinclude MatrixBase_template_int_end.out
 ///
-/// \sa tail(NType), class Block
+/// \sa class Block
 ///
 template<int N>
 EIGEN_DEVICE_FUNC
diff --git a/eigen/Eigen/src/plugins/IndexedViewMethods.h b/eigen/Eigen/src/plugins/IndexedViewMethods.h
deleted file mode 100644
index 22c1666..0000000
--- a/eigen/Eigen/src/plugins/IndexedViewMethods.h
+++ /dev/null
@@ -1,267 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2017 Gael Guennebaud <gael.guennebaud@inria.fr>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#if !defined(EIGEN_PARSED_BY_DOXYGEN)
-
-// This file is automatically included twice to generate const and non-const versions
-
-#ifndef EIGEN_INDEXED_VIEW_METHOD_2ND_PASS
-#define EIGEN_INDEXED_VIEW_METHOD_CONST const
-#define EIGEN_INDEXED_VIEW_METHOD_TYPE  ConstIndexedViewType
-#else
-#define EIGEN_INDEXED_VIEW_METHOD_CONST
-#define EIGEN_INDEXED_VIEW_METHOD_TYPE IndexedViewType
-#endif
-
-#ifndef EIGEN_INDEXED_VIEW_METHOD_2ND_PASS
-protected:
-
-// define some aliases to ease readability
-
-template<typename Indices>
-struct IvcRowType : public internal::IndexedViewCompatibleType<Indices,RowsAtCompileTime> {};
-
-template<typename Indices>
-struct IvcColType : public internal::IndexedViewCompatibleType<Indices,ColsAtCompileTime> {};
-
-template<typename Indices>
-struct IvcType : public internal::IndexedViewCompatibleType<Indices,SizeAtCompileTime> {};
-
-typedef typename internal::IndexedViewCompatibleType<Index,1>::type IvcIndex;
-
-template<typename Indices>
-typename IvcRowType<Indices>::type
-ivcRow(const Indices& indices) const {
-  return internal::makeIndexedViewCompatible(indices, internal::variable_if_dynamic<Index,RowsAtCompileTime>(derived().rows()),Specialized);
-}
-
-template<typename Indices>
-typename IvcColType<Indices>::type
-ivcCol(const Indices& indices) const {
-  return internal::makeIndexedViewCompatible(indices, internal::variable_if_dynamic<Index,ColsAtCompileTime>(derived().cols()),Specialized);
-}
-
-template<typename Indices>
-typename IvcColType<Indices>::type
-ivcSize(const Indices& indices) const {
-  return internal::makeIndexedViewCompatible(indices, internal::variable_if_dynamic<Index,SizeAtCompileTime>(derived().size()),Specialized);
-}
-
-template<typename RowIndices, typename ColIndices>
-struct valid_indexed_view_overload {
-  // Here we use is_convertible to Index instead of is_integral in order to treat enums as Index.
-  // In c++11 we could use is_integral<T> && is_enum<T> if is_convertible appears to be too permissive.
-  enum { value = !(internal::is_convertible<RowIndices,Index>::value && internal::is_convertible<ColIndices,Index>::value) };
-};
-
-public:
-
-#endif
-
-template<typename RowIndices, typename ColIndices>
-struct EIGEN_INDEXED_VIEW_METHOD_TYPE {
-  typedef IndexedView<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,
-                      typename IvcRowType<RowIndices>::type,
-                      typename IvcColType<ColIndices>::type> type;
-};
-
-// This is the generic version
-
-template<typename RowIndices, typename ColIndices>
-typename internal::enable_if<valid_indexed_view_overload<RowIndices,ColIndices>::value
-  && internal::traits<typename EIGEN_INDEXED_VIEW_METHOD_TYPE<RowIndices,ColIndices>::type>::ReturnAsIndexedView,
-  typename EIGEN_INDEXED_VIEW_METHOD_TYPE<RowIndices,ColIndices>::type >::type
-operator()(const RowIndices& rowIndices, const ColIndices& colIndices) EIGEN_INDEXED_VIEW_METHOD_CONST
-{
-  return typename EIGEN_INDEXED_VIEW_METHOD_TYPE<RowIndices,ColIndices>::type
-            (derived(), ivcRow(rowIndices), ivcCol(colIndices));
-}
-
-// The following overload returns a Block<> object
-
-template<typename RowIndices, typename ColIndices>
-typename internal::enable_if<valid_indexed_view_overload<RowIndices,ColIndices>::value
-  && internal::traits<typename EIGEN_INDEXED_VIEW_METHOD_TYPE<RowIndices,ColIndices>::type>::ReturnAsBlock,
-  typename internal::traits<typename EIGEN_INDEXED_VIEW_METHOD_TYPE<RowIndices,ColIndices>::type>::BlockType>::type
-operator()(const RowIndices& rowIndices, const ColIndices& colIndices) EIGEN_INDEXED_VIEW_METHOD_CONST
-{
-  typedef typename internal::traits<typename EIGEN_INDEXED_VIEW_METHOD_TYPE<RowIndices,ColIndices>::type>::BlockType BlockType;
-  typename IvcRowType<RowIndices>::type actualRowIndices = ivcRow(rowIndices);
-  typename IvcColType<ColIndices>::type actualColIndices = ivcCol(colIndices);
-  return BlockType(derived(),
-                   internal::first(actualRowIndices),
-                   internal::first(actualColIndices),
-                   internal::size(actualRowIndices),
-                   internal::size(actualColIndices));
-}
-
-// The following overload returns a Scalar
-
-template<typename RowIndices, typename ColIndices>
-typename internal::enable_if<valid_indexed_view_overload<RowIndices,ColIndices>::value
-  && internal::traits<typename EIGEN_INDEXED_VIEW_METHOD_TYPE<RowIndices,ColIndices>::type>::ReturnAsScalar,
-  CoeffReturnType >::type
-operator()(const RowIndices& rowIndices, const ColIndices& colIndices) EIGEN_INDEXED_VIEW_METHOD_CONST
-{
-  return Base::operator()(internal::eval_expr_given_size(rowIndices,rows()),internal::eval_expr_given_size(colIndices,cols()));
-}
-
-#if EIGEN_HAS_STATIC_ARRAY_TEMPLATE
-
-// The folowing three overloads are needed to handle raw Index[N] arrays.
-
-template<typename RowIndicesT, std::size_t RowIndicesN, typename ColIndices>
-IndexedView<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,const RowIndicesT (&)[RowIndicesN],typename IvcColType<ColIndices>::type>
-operator()(const RowIndicesT (&rowIndices)[RowIndicesN], const ColIndices& colIndices) EIGEN_INDEXED_VIEW_METHOD_CONST
-{
-  return IndexedView<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,const RowIndicesT (&)[RowIndicesN],typename IvcColType<ColIndices>::type>
-                    (derived(), rowIndices, ivcCol(colIndices));
-}
-
-template<typename RowIndices, typename ColIndicesT, std::size_t ColIndicesN>
-IndexedView<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,typename IvcRowType<RowIndices>::type, const ColIndicesT (&)[ColIndicesN]>
-operator()(const RowIndices& rowIndices, const ColIndicesT (&colIndices)[ColIndicesN]) EIGEN_INDEXED_VIEW_METHOD_CONST
-{
-  return IndexedView<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,typename IvcRowType<RowIndices>::type,const ColIndicesT (&)[ColIndicesN]>
-                    (derived(), ivcRow(rowIndices), colIndices);
-}
-
-template<typename RowIndicesT, std::size_t RowIndicesN, typename ColIndicesT, std::size_t ColIndicesN>
-IndexedView<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,const RowIndicesT (&)[RowIndicesN], const ColIndicesT (&)[ColIndicesN]>
-operator()(const RowIndicesT (&rowIndices)[RowIndicesN], const ColIndicesT (&colIndices)[ColIndicesN]) EIGEN_INDEXED_VIEW_METHOD_CONST
-{
-  return IndexedView<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,const RowIndicesT (&)[RowIndicesN],const ColIndicesT (&)[ColIndicesN]>
-                    (derived(), rowIndices, colIndices);
-}
-
-#endif // EIGEN_HAS_STATIC_ARRAY_TEMPLATE
-
-// Overloads for 1D vectors/arrays
-
-template<typename Indices>
-typename internal::enable_if<
-  IsRowMajor && (!(internal::get_compile_time_incr<typename IvcType<Indices>::type>::value==1 || internal::is_integral<Indices>::value)),
-  IndexedView<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,IvcIndex,typename IvcType<Indices>::type> >::type
-operator()(const Indices& indices) EIGEN_INDEXED_VIEW_METHOD_CONST
-{
-  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
-  return IndexedView<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,IvcIndex,typename IvcType<Indices>::type>
-            (derived(), IvcIndex(0), ivcCol(indices));
-}
-
-template<typename Indices>
-typename internal::enable_if<
-  (!IsRowMajor) && (!(internal::get_compile_time_incr<typename IvcType<Indices>::type>::value==1 || internal::is_integral<Indices>::value)),
-  IndexedView<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,typename IvcType<Indices>::type,IvcIndex> >::type
-operator()(const Indices& indices) EIGEN_INDEXED_VIEW_METHOD_CONST
-{
-  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
-  return IndexedView<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,typename IvcType<Indices>::type,IvcIndex>
-            (derived(), ivcRow(indices), IvcIndex(0));
-}
-
-template<typename Indices>
-typename internal::enable_if<
-  (internal::get_compile_time_incr<typename IvcType<Indices>::type>::value==1) && (!internal::is_integral<Indices>::value) && (!Symbolic::is_symbolic<Indices>::value),
-  VectorBlock<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,internal::array_size<Indices>::value> >::type
-operator()(const Indices& indices) EIGEN_INDEXED_VIEW_METHOD_CONST
-{
-  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
-  typename IvcType<Indices>::type actualIndices = ivcSize(indices);
-  return VectorBlock<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,internal::array_size<Indices>::value>
-            (derived(), internal::first(actualIndices), internal::size(actualIndices));
-}
-
-template<typename IndexType>
-typename internal::enable_if<Symbolic::is_symbolic<IndexType>::value, CoeffReturnType >::type
-operator()(const IndexType& id) EIGEN_INDEXED_VIEW_METHOD_CONST
-{
-  return Base::operator()(internal::eval_expr_given_size(id,size()));
-}
-
-#if EIGEN_HAS_STATIC_ARRAY_TEMPLATE
-
-template<typename IndicesT, std::size_t IndicesN>
-typename internal::enable_if<IsRowMajor,
-  IndexedView<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,IvcIndex,const IndicesT (&)[IndicesN]> >::type
-operator()(const IndicesT (&indices)[IndicesN]) EIGEN_INDEXED_VIEW_METHOD_CONST
-{
-  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
-  return IndexedView<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,IvcIndex,const IndicesT (&)[IndicesN]>
-            (derived(), IvcIndex(0), indices);
-}
-
-template<typename IndicesT, std::size_t IndicesN>
-typename internal::enable_if<!IsRowMajor,
-  IndexedView<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,const IndicesT (&)[IndicesN],IvcIndex> >::type
-operator()(const IndicesT (&indices)[IndicesN]) EIGEN_INDEXED_VIEW_METHOD_CONST
-{
-  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
-  return IndexedView<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,const IndicesT (&)[IndicesN],IvcIndex>
-            (derived(), indices, IvcIndex(0));
-}
-
-#endif // EIGEN_HAS_STATIC_ARRAY_TEMPLATE
-
-#undef EIGEN_INDEXED_VIEW_METHOD_CONST
-#undef EIGEN_INDEXED_VIEW_METHOD_TYPE
-
-#ifndef EIGEN_INDEXED_VIEW_METHOD_2ND_PASS
-#define EIGEN_INDEXED_VIEW_METHOD_2ND_PASS
-#include "IndexedViewMethods.h"
-#undef EIGEN_INDEXED_VIEW_METHOD_2ND_PASS
-#endif
-
-#else // EIGEN_PARSED_BY_DOXYGEN
-
-/**
-  * \returns a generic submatrix view defined by the rows and columns indexed \a rowIndices and \a colIndices respectively.
-  *
-  * Each parameter must either be:
-  *  - An integer indexing a single row or column
-  *  - Eigen::all indexing the full set of respective rows or columns in increasing order
-  *  - An ArithmeticSequence as returned by the Eigen::seq and Eigen::seqN functions
-  *  - Any %Eigen's vector/array of integers or expressions
-  *  - Plain C arrays: \c int[N]
-  *  - And more generally any type exposing the following two member functions:
-  * \code
-  * <integral type> operator[](<integral type>) const;
-  * <integral type> size() const;
-  * \endcode
-  * where \c <integral \c type>  stands for any integer type compatible with Eigen::Index (i.e. \c std::ptrdiff_t).
-  *
-  * The last statement implies compatibility with \c std::vector, \c std::valarray, \c std::array, many of the Range-v3's ranges, etc.
-  *
-  * If the submatrix can be represented using a starting position \c (i,j) and positive sizes \c (rows,columns), then this
-  * method will returns a Block object after extraction of the relevant information from the passed arguments. This is the case
-  * when all arguments are either:
-  *  - An integer
-  *  - Eigen::all
-  *  - An ArithmeticSequence with compile-time increment strictly equal to 1, as returned by Eigen::seq(a,b), and Eigen::seqN(a,N).
-  *
-  * Otherwise a more general IndexedView<Derived,RowIndices',ColIndices'> object will be returned, after conversion of the inputs
-  * to more suitable types \c RowIndices' and \c ColIndices'.
-  *
-  * For 1D vectors and arrays, you better use the operator()(const Indices&) overload, which behave the same way but taking a single parameter.
-  *
-  * \sa operator()(const Indices&), class Block, class IndexedView, DenseBase::block(Index,Index,Index,Index)
-  */
-template<typename RowIndices, typename ColIndices>
-IndexedView_or_Block
-operator()(const RowIndices& rowIndices, const ColIndices& colIndices);
-
-/** This is an overload of operator()(const RowIndices&, const ColIndices&) for 1D vectors or arrays
-  *
-  * \only_for_vectors
-  */
-template<typename Indices>
-IndexedView_or_VectorBlock
-operator()(const Indices& indices);
-
-#endif  // EIGEN_PARSED_BY_DOXYGEN